Overload symbols of running process (LD_PRELOAD attachment)

前端 未结 2 1240
粉色の甜心
粉色の甜心 2021-02-02 00:47

I\'m working on a heap profiler for Linux, called heaptrack. Currently, I rely on LD_PRELOAD to overload various (de-)allocation functions, and that works extremely

相关标签:
2条回答
  • 2021-02-02 01:33

    This can not be done without tweaking with assembler a bit. Basically, you will have to do what gdb and ltrace do: find malloc and friends virtual addresses in the process image and put breakpoints at their entry. This process usually involves temporary rewriting the executable code, as you need to replace normal instructions with "trap" ones (such as int 3 on x86).

    If you want to avoid doing this yourself, there exists linkable wrapper around gdb (libgdb) or you can build ltrace as a library (libltrace). As ltrace is much smaller, and the library variety of it is available out of the box, it will probably allow you to do what you want at lower effort.

    For example, here's the best part of the "main.c" file from the ltrace package:

    int
    main(int argc, char *argv[]) {
        ltrace_init(argc, argv);
    
     /*
        ltrace_add_callback(callback_call, EVENT_SYSCALL);
        ltrace_add_callback(callback_ret, EVENT_SYSRET);
        ltrace_add_callback(endcallback, EVENT_EXIT);
    
        But you would probably need EVENT_LIBCALL and EVENT_LIBRET
     */
    
        ltrace_main();
        return 0;
    }
    

    http://anonscm.debian.org/cgit/collab-maint/ltrace.git/tree/?id=0.7.3

    0 讨论(0)
  • 2021-02-02 01:34

    Just for the lulz, another solution without ptracing your own process or touching a single line of assembly or playing around with /proc. You only have to load the library in the context of the process and let the magic happen.

    The solution I propose is to use the constructor feature (brought from C++ to C by gcc) to run some code when a library is loaded. Then this library just patch the GOT (Global Offset Table) entry for malloc. The GOT stores the real addresses for the library functions so that the name resolution happen only once. To patch the GOT you have to play around with the ELF structures (see man 5 elf). And Linux is kind enough to give you the aux vector (see man 3 getauxval) that tells you where to find in memory the program headers of the current program. However, better interface is provided by dl_iterate_phdr, which is used below.

    Here is an example code of library that does exactly this when the init function is called. Although the same could probably be achieved with a gdb script.

    #define _GNU_SOURCE
    #include <stdio.h>
    #include <stdlib.h>
    #include <string.h>
    #include <dlfcn.h>
    #include <sys/auxv.h>
    #include <elf.h>
    #include <link.h>
    #include <sys/mman.h>
    
    
    struct strtab {
        char *tab;
        ElfW(Xword) size;
    };
    
    
    struct jmpreltab {
        ElfW(Rela) *tab;
        ElfW(Xword) size;
    };
    
    
    struct symtab {
        ElfW(Sym) *tab;
        ElfW(Xword) entsz;
    };
    
    
    
    /* Backup of the real malloc function */
    static void *(*realmalloc)(size_t) = NULL;
    
    
    /* My local versions of the malloc functions */
    static void *mymalloc(size_t size);
    
    
    /*************/
    /* ELF stuff */
    /*************/
    static const ElfW(Phdr) *get_phdr_dynamic(const ElfW(Phdr) *phdr,
            uint16_t phnum, uint16_t phentsize) {
        int i;
    
        for (i = 0; i < phnum; i++) {
            if (phdr->p_type == PT_DYNAMIC)
                return phdr;
            phdr = (ElfW(Phdr) *)((char *)phdr + phentsize);
        }
    
        return NULL;
    }
    
    
    
    static const ElfW(Dyn) *get_dynentry(ElfW(Addr) base, const ElfW(Phdr) *pdyn,
            uint32_t type) {
        ElfW(Dyn) *dyn;
    
        for (dyn = (ElfW(Dyn) *)(base + pdyn->p_vaddr); dyn->d_tag; dyn++) {
            if (dyn->d_tag == type)
                return dyn;
        }
    
        return NULL;
    }
    
    
    
    static struct jmpreltab get_jmprel(ElfW(Addr) base, const ElfW(Phdr) *pdyn) {
        struct jmpreltab table;
        const ElfW(Dyn) *dyn;
    
        dyn = get_dynentry(base, pdyn, DT_JMPREL);
        table.tab = (dyn == NULL) ? NULL : (ElfW(Rela) *)dyn->d_un.d_ptr;
    
        dyn = get_dynentry(base, pdyn, DT_PLTRELSZ);
        table.size = (dyn == NULL) ? 0 : dyn->d_un.d_val;
        return table;
    }
    
    
    
    static struct symtab get_symtab(ElfW(Addr) base, const ElfW(Phdr) *pdyn) {
        struct symtab table;
        const ElfW(Dyn) *dyn;
    
        dyn = get_dynentry(base, pdyn, DT_SYMTAB);
        table.tab = (dyn == NULL) ? NULL : (ElfW(Sym) *)dyn->d_un.d_ptr;
        dyn = get_dynentry(base, pdyn, DT_SYMENT);
        table.entsz = (dyn == NULL) ? 0 : dyn->d_un.d_val;
        return table;
    }
    
    
    
    static struct strtab get_strtab(ElfW(Addr) base, const ElfW(Phdr) *pdyn) {
        struct strtab table;
        const ElfW(Dyn) *dyn;
    
        dyn = get_dynentry(base, pdyn, DT_STRTAB);
        table.tab = (dyn == NULL) ? NULL : (char *)dyn->d_un.d_ptr;
        dyn = get_dynentry(base, pdyn, DT_STRSZ);
        table.size = (dyn == NULL) ? 0 : dyn->d_un.d_val;
        return table;
    }
    
    
    
    static void *get_got_entry(ElfW(Addr) base, struct jmpreltab jmprel,
            struct symtab symtab, struct strtab strtab, const char *symname) {
    
        ElfW(Rela) *rela;
        ElfW(Rela) *relaend;
    
        relaend = (ElfW(Rela) *)((char *)jmprel.tab + jmprel.size);
        for (rela = jmprel.tab; rela < relaend; rela++) {
            uint32_t relsymidx;
            char *relsymname;
            relsymidx = ELF64_R_SYM(rela->r_info);
            relsymname = strtab.tab + symtab.tab[relsymidx].st_name;
    
            if (strcmp(symname, relsymname) == 0)
                return (void *)(base + rela->r_offset);
        }
    
        return NULL;
    }
    
    
    
    static void patch_got(ElfW(Addr) base, const ElfW(Phdr) *phdr, int16_t phnum,
            int16_t phentsize) {
    
        const ElfW(Phdr) *dphdr;
        struct jmpreltab jmprel;
        struct symtab symtab;
        struct strtab strtab;
        void *(**mallocgot)(size_t);
    
        dphdr = get_phdr_dynamic(phdr, phnum, phentsize);
        jmprel = get_jmprel(base, dphdr);
        symtab = get_symtab(base, dphdr);
        strtab = get_strtab(base, dphdr);
        mallocgot = get_got_entry(base, jmprel, symtab, strtab, "malloc");
    
        /* Replace the pointer with our version. */
        if (mallocgot != NULL) {
            /* Quick & dirty hack for some programs that need it. */
            /* Should check the returned value. */
            void *page = (void *)((intptr_t)mallocgot & ~(0x1000 - 1));
            mprotect(page, 0x1000, PROT_READ | PROT_WRITE);
            *mallocgot = mymalloc;
        }
    }
    
    
    
    static int callback(struct dl_phdr_info *info, size_t size, void *data) {
        uint16_t phentsize;
        data = data;
        size = size;
    
        printf("Patching GOT entry of \"%s\"\n", info->dlpi_name);
        phentsize = getauxval(AT_PHENT);
        patch_got(info->dlpi_addr, info->dlpi_phdr, info->dlpi_phnum, phentsize);
    
        return 0;
    }
    
    
    
    /*****************/
    /* Init function */
    /*****************/
    __attribute__((constructor)) static void init(void) {
        realmalloc = malloc;
        dl_iterate_phdr(callback, NULL);
    }
    
    
    
    /*********************************************/
    /* Here come the malloc function and sisters */
    /*********************************************/
    static void *mymalloc(size_t size) {
        printf("hello from my malloc\n");
        return realmalloc(size);
    }
    

    And an example program that just loads the library between two malloc calls.

    #include <stdio.h>
    #include <stdlib.h>
    #include <dlfcn.h>
    
    
    
    void loadmymalloc(void) {
        /* Should check return value. */
        dlopen("./mymalloc.so", RTLD_LAZY);
    }
    
    
    
    int main(void) {
        void *ptr;
    
        ptr = malloc(42);
        printf("malloc returned: %p\n", ptr);
    
        loadmymalloc();
    
        ptr = malloc(42);
        printf("malloc returned: %p\n", ptr);
    
        return EXIT_SUCCESS;
    }
    

    The call to mprotect is usually useless. However I found that gvim (which is compiled as a shared object) needs it. If you also want to catch the references to malloc as pointers (which may allow to later call the real function and bypass yours), you can apply the very same process to the symbol table pointed to by the DT_RELA dynamic entry.

    If the constructor feature is not available for you, all you have to do is resolve the init symbol from the newly loaded library and call it.

    Note that you may also want to replace dlopen so that libraries loaded after yours gets patched as well. Which may happen if you load your library quite early or if the application has dynamically loaded plugins.

    0 讨论(0)
提交回复
热议问题