问题
I wrote the below program to iterate over all images in memory and dump their string tables.
#include <mach-o/dyld.h>
#include <stdio.h>
#include <string.h>
int main(int argc, char** argv) {
uint32_t count = _dyld_image_count();
for (uint32_t i = 0 ; i < count ; i++) {
const char* imageName = _dyld_get_image_name(i);
printf("IMAGE[%u]=%s\n", i, imageName);
const struct mach_header* header = _dyld_get_image_header(i);
if (header->magic != MH_MAGIC_64)
continue;
struct mach_header_64* header64 = (struct mach_header_64*)header;
char *ptr = ((void*)header64) + sizeof(struct mach_header_64);
for (uint32_t j = 0; j < header64->ncmds; j++) {
struct load_command *lc = (struct load_command *)ptr;
ptr += lc->cmdsize;
if (lc->cmd != LC_SYMTAB)
continue;
struct symtab_command* symtab = (struct symtab_command*)lc;
printf("\t\tLC_SYMTAB.stroff=%u\n", symtab->stroff);
printf("\t\tLC_SYMTAB.strsize=%u\n", symtab->strsize);
if (symtab->strsize > 100*1024*1024) {
printf("\t\tHUH? Don't believe string table is over 100MiB in size!\n");
continue;
}
char *strtab = (((void*)header64) + symtab->stroff);
uint32_t off = 0;
while (off < symtab->strsize) {
char *e = &(strtab[off]);
if (e[0] != 0)
printf("\t\tSTR[%u]=\"%s\"\n", off, e);
off += strlen(e) + 1;
}
}
}
return 0;
}
It seems to randomly work for some images, but for others the stroff/strsize have nonsensical values:
LC_SYMTAB.stroff=1266154560
LC_SYMTAB.strsize=143767728
It seems to always be the same two magic values, but I'm not sure if this is system-dependent in some way or if other people will get the same specific values.
If I comment out the check for strsize being over 100MiB, then printing the string table segfaults.
Most images seem to have this problem, but some don't. When I run it, I get the issue for 29 images out of 38.
I can't observe any pattern as to which do and which won't. What is going on here?
If it is relevant, I am testing on macOS 10.14.6 and compiling with Apple LLVM version 10.0.1 (clang-1001.0.46.4).
回答1:
As you already worked out, those are from the dyld_shared_cache
. And the 0x80000000
flag is indeed documented, in the headers shipped with Xcode or any semi-recent XNU source:
#define MH_DYLIB_IN_CACHE 0x80000000 /* Only for use on dylibs. When this bit
is set, the dylib is part of the dyld
shared cache, rather than loose in
the filesystem. */
As you've also discovered, the stroff
/strsize
values do not yield usable results when added to the dyld_shared_cache
base. That is because those are not memory offsets, but file offsets. This is true for all Mach-O's, it's just often the case that the segments of non-cached binaries have the same relative position in file and memory offsets. But this is definitely not true for the shared cache.
To translate the file offset into a memory address, you'll have to parse the segments in the shared cache header. You can find struct definitions in the dyld source.
回答2:
Here's a program which prints out the contents of the string table of the dyld
shared cache.
My original program in the question can be enhanced to skip dumping string table of images with MH_DYLIB_IN_CACHE
set, and combined with this program to dump the shared cache string table. (All images in the shared cache share the same string table.)
#include <mach-o/dyld.h>
#include <stdio.h>
#include <string.h>
#include <inttypes.h>
const void* _dyld_get_shared_cache_range(size_t* cacheLen);
struct dyld_cache_header {
char magic[16];
uint32_t mappingOffset;
uint32_t mappingCount;
// Omitted remaining fields, not relevant to this task
};
struct dyld_cache_mapping_info {
uint64_t address;
uint64_t size;
uint64_t fileOffset;
uint32_t maxProt;
uint32_t initProt;
};
#ifndef MH_DYLIB_IN_CACHE
# define MH_DYLIB_IN_CACHE 0x80000000
#endif
// Finds first shared cache DYLD image. Any will do, just grab the first
const struct mach_header_64* findSharedCacheDyldImage(void) {
uint32_t count = _dyld_image_count();
for (uint32_t i = 0 ; i < count ; i++) {
const struct mach_header* header = _dyld_get_image_header(i);
if (header->magic != MH_MAGIC_64)
continue;
const struct mach_header_64* header64 = (const struct mach_header_64*)header;
if (!(header64->flags & MH_DYLIB_IN_CACHE))
continue;
return header64;
}
return NULL;
}
// Find first instance of given load command in image
const struct load_command* findFirstLoadCommand(const struct mach_header_64* header64, uint32_t cmd) {
const char *ptr = ((void*)header64) + sizeof(struct mach_header_64);
for (uint32_t j = 0; j < header64->ncmds; j++) {
const struct load_command *lc = (const struct load_command *)ptr;
ptr += lc->cmdsize;
if (lc->cmd == cmd)
return lc;
}
return NULL;
}
// Translates a shared cache file offset to a memory address
void *translateOffset(const struct dyld_cache_header *cache, uint64_t offset) {
const struct dyld_cache_mapping_info* mappings = (struct dyld_cache_mapping_info*)(((void*)cache) + cache->mappingOffset);
for (uint32_t i = 0; i < cache->mappingCount; i++) {
if (offset < mappings[i].fileOffset) continue;
if (offset >= (mappings[i].fileOffset + mappings[i].size)) continue;
return (void*)(mappings[i].address - mappings[0].address + (offset - mappings[i].fileOffset) + (uint64_t)cache);
}
return NULL;
}
int main(int argc, char** argv) {
size_t cacheLen;
const struct dyld_cache_header *cache = _dyld_get_shared_cache_range(&cacheLen);
const struct mach_header_64* sharedCacheDyldImage = findSharedCacheDyldImage();
const struct symtab_command* symtab = (const struct symtab_command*)findFirstLoadCommand(sharedCacheDyldImage,LC_SYMTAB);
const void *stringTbl = translateOffset(cache, symtab->stroff);
uint32_t off = 0;
while (off < symtab->strsize) {
const char *e = &(stringTbl[off]);
if (e[0] != 0)
printf("STR[%u]=\"%s\"\n", off, e);
off += strlen(e) + 1;
}
return 0;
}
来源:https://stackoverflow.com/questions/65248584/why-does-lc-symtab-have-invalid-stroff-strsize-but-only-for-some-loaded-images