File indexing completed on 2024-05-12 17:15:54

0001 /*
0002     SPDX-FileCopyrightText: 2014-2017 Milian Wolff <mail@milianw.de>
0003 
0004     SPDX-License-Identifier: LGPL-2.1-or-later
0005 */
0006 
0007 #include "libheaptrack.h"
0008 #include "util/config.h"
0009 #include "util/linewriter.h"
0010 
0011 #include <tsl/robin_map.h>
0012 
0013 #include <cstdlib>
0014 #include <cstring>
0015 
0016 #include <dlfcn.h>
0017 #include <errno.h>
0018 #include <fcntl.h>
0019 #include <link.h>
0020 #include <unistd.h>
0021 
0022 #include <limits.h>
0023 #include <sys/mman.h>
0024 #include <sys/stat.h>
0025 
0026 #include <type_traits>
0027 
0028 /**
0029  * @file heaptrack_inject.cpp
0030  *
0031  * @brief Experimental support for symbol overloading after runtime injection.
0032  */
0033 
0034 #if ULONG_MAX == 0xffffffffffffffff
0035 #define WORDSIZE 64
0036 #elif ULONG_MAX == 0xffffffff
0037 #define WORDSIZE 32
0038 #endif
0039 
0040 #ifndef ELF_R_SYM
0041 #if WORDSIZE == 64
0042 #define ELF_R_SYM(i) ELF64_R_SYM(i)
0043 #elif WORDSIZE == 32
0044 #define ELF_R_SYM(i) ELF32_R_SYM(i)
0045 #else
0046 #error unsupported word size
0047 #endif
0048 #endif
0049 
0050 #ifndef ElfW
0051 #if WORDSIZE == 64
0052 #define ElfW(type) Elf64_##type
0053 #elif WORDSIZE == 32
0054 #define ElfW(type) Elf32_##type
0055 #else
0056 #error unsupported word size
0057 #endif
0058 #endif
0059 
0060 // NOTE: adding noexcept to C functions is a hard error in clang++
0061 //       (but not even a warning in GCC, even with -Wall)
0062 #if defined(__GNUC__) && !defined(__clang__)
0063 #define LIBC_FUN_ATTRS noexcept
0064 #else
0065 #define LIBC_FUN_ATTRS
0066 #endif
0067 
0068 extern "C" {
0069 
0070 // Foward declare mimalloc (https://github.com/microsoft/mimalloc) functions so we don't need to include its .h.
0071 __attribute__((weak)) void* mi_malloc(size_t size) LIBC_FUN_ATTRS;
0072 __attribute__((weak)) void* mi_calloc(size_t count, size_t size) LIBC_FUN_ATTRS;
0073 __attribute__((weak)) void* mi_realloc(void* p, size_t newsize) LIBC_FUN_ATTRS;
0074 __attribute__((weak)) void mi_free(void* p) LIBC_FUN_ATTRS;
0075 }
0076 
0077 namespace {
0078 
0079 namespace Elf {
0080 using Ehdr = ElfW(Ehdr);
0081 using Shdr = ElfW(Shdr);
0082 using Half = ElfW(Half);
0083 using Addr = ElfW(Addr);
0084 using Dyn = ElfW(Dyn);
0085 using Rel = ElfW(Rel);
0086 using Rela = ElfW(Rela);
0087 using Sym = ElfW(Sym);
0088 #if WORDSIZE == 64
0089 using Sxword = ElfW(Sxword);
0090 using Xword = ElfW(Xword);
0091 #else
0092 // FreeBSD elf32.h doesn't define Elf32_Sxword or _Xword. This is used in struct
0093 // elftable, where it's used as a tag value. Our Elf32_Dyn uses Elf32_Sword there,
0094 // as does the Linux definition (and the standard); the El64_Dyn uses Sxword.
0095 //
0096 // Linux elf.h defines Elf32_Sxword as a 64-bit quantity, so let's do that
0097 using Sxword = int64_t;
0098 using Xword = uint64_t;
0099 #endif
0100 }
0101 
0102 void overwrite_symbols() noexcept;
0103 
0104 namespace hooks {
0105 
0106 struct malloc
0107 {
0108     static constexpr auto name = "malloc";
0109     static constexpr auto original = &::malloc;
0110 
0111     static void* hook(size_t size) noexcept
0112     {
0113         auto ptr = original(size);
0114         heaptrack_malloc(ptr, size);
0115         return ptr;
0116     }
0117 };
0118 
0119 struct free
0120 {
0121     static constexpr auto name = "free";
0122     static constexpr auto original = &::free;
0123 
0124     static void hook(void* ptr) noexcept
0125     {
0126         heaptrack_free(ptr);
0127         original(ptr);
0128     }
0129 };
0130 
0131 struct realloc
0132 {
0133     static constexpr auto name = "realloc";
0134     static constexpr auto original = &::realloc;
0135 
0136     static void* hook(void* ptr, size_t size) noexcept
0137     {
0138         auto inPtr = reinterpret_cast<uintptr_t>(ptr);
0139         auto ret = original(ptr, size);
0140         heaptrack_realloc2(inPtr, size, reinterpret_cast<uintptr_t>(ret));
0141 
0142         return ret;
0143     }
0144 };
0145 
0146 struct calloc
0147 {
0148     static constexpr auto name = "calloc";
0149     static constexpr auto original = &::calloc;
0150 
0151     static void* hook(size_t num, size_t size) noexcept
0152     {
0153         auto ptr = original(num, size);
0154         heaptrack_malloc(ptr, num * size);
0155         return ptr;
0156     }
0157 };
0158 
0159 #if HAVE_CFREE
0160 struct cfree
0161 {
0162     static constexpr auto name = "cfree";
0163     static constexpr auto original = &::cfree;
0164 
0165     static void hook(void* ptr) noexcept
0166     {
0167         heaptrack_free(ptr);
0168         original(ptr);
0169     }
0170 };
0171 #endif
0172 
0173 struct dlopen
0174 {
0175     static constexpr auto name = "dlopen";
0176     static constexpr auto original = &::dlopen;
0177 
0178     static void* hook(const char* filename, int flag) noexcept
0179     {
0180         auto ret = original(filename, flag);
0181         if (ret) {
0182             heaptrack_invalidate_module_cache();
0183             overwrite_symbols();
0184         }
0185         return ret;
0186     }
0187 };
0188 
0189 struct dlclose
0190 {
0191     static constexpr auto name = "dlclose";
0192     static constexpr auto original = &::dlclose;
0193 
0194     static int hook(void* handle) noexcept
0195     {
0196         auto ret = original(handle);
0197         if (!ret) {
0198             heaptrack_invalidate_module_cache();
0199         }
0200         return ret;
0201     }
0202 };
0203 
0204 struct posix_memalign
0205 {
0206     static constexpr auto name = "posix_memalign";
0207     static constexpr auto original = &::posix_memalign;
0208 
0209     static int hook(void** memptr, size_t alignment, size_t size) noexcept
0210     {
0211         auto ret = original(memptr, alignment, size);
0212         if (!ret) {
0213             heaptrack_malloc(*memptr, size);
0214         }
0215         return ret;
0216     }
0217 };
0218 
0219 // mimalloc functions
0220 struct mi_malloc
0221 {
0222     static constexpr auto name = "mi_malloc";
0223     static constexpr auto original = &::mi_malloc;
0224 
0225     static void* hook(size_t size) noexcept
0226     {
0227         auto ptr = original(size);
0228         heaptrack_malloc(ptr, size);
0229         return ptr;
0230     }
0231 };
0232 
0233 struct mi_free
0234 {
0235     static constexpr auto name = "mi_free";
0236     static constexpr auto original = &::mi_free;
0237 
0238     static void hook(void* ptr) noexcept
0239     {
0240         heaptrack_free(ptr);
0241         original(ptr);
0242     }
0243 };
0244 
0245 struct mi_realloc
0246 {
0247     static constexpr auto name = "mi_realloc";
0248     static constexpr auto original = &::mi_realloc;
0249 
0250     static void* hook(void* ptr, size_t size) noexcept
0251     {
0252         auto ret = original(ptr, size);
0253         heaptrack_realloc(ptr, size, ret);
0254         return ret;
0255     }
0256 };
0257 
0258 struct mi_calloc
0259 {
0260     static constexpr auto name = "mi_calloc";
0261     static constexpr auto original = &::mi_calloc;
0262 
0263     static void* hook(size_t num, size_t size) noexcept
0264     {
0265         auto ptr = original(num, size);
0266         heaptrack_malloc(ptr, num * size);
0267         return ptr;
0268     }
0269 };
0270 
0271 template <typename Hook>
0272 bool hook(const char* symname, Elf::Addr addr, bool restore)
0273 {
0274     static_assert(std::is_convertible<decltype(&Hook::hook), decltype(Hook::original)>::value,
0275                   "hook is not compatible to original function");
0276 
0277     if (strcmp(Hook::name, symname) != 0) {
0278         return false;
0279     }
0280 
0281     // try to make the page read/write accessible, which is hackish
0282     // but apparently required for some shared libraries
0283     auto page = reinterpret_cast<void*>(addr & ~(0x1000 - 1));
0284     mprotect(page, 0x1000, PROT_READ | PROT_WRITE);
0285 
0286     // now write to the address
0287     auto typedAddr = reinterpret_cast<typename std::remove_const<decltype(Hook::original)>::type*>(addr);
0288     if (restore) {
0289         // restore the original address on shutdown
0290         *typedAddr = Hook::original;
0291     } else {
0292         // now actually inject our hook
0293         *typedAddr = &Hook::hook;
0294     }
0295 
0296     return true;
0297 }
0298 
0299 void apply(const char* symname, Elf::Addr addr, bool restore)
0300 {
0301     // TODO: use std::apply once we can rely on C++17
0302     hook<malloc>(symname, addr, restore) || hook<free>(symname, addr, restore) || hook<realloc>(symname, addr, restore)
0303         || hook<calloc>(symname, addr, restore)
0304 #if HAVE_CFREE
0305         || hook<cfree>(symname, addr, restore)
0306 #endif
0307         || hook<posix_memalign>(symname, addr, restore) || hook<dlopen>(symname, addr, restore)
0308         || hook<dlclose>(symname, addr, restore)
0309         // mimalloc functions
0310         || hook<mi_malloc>(symname, addr, restore) || hook<mi_free>(symname, addr, restore)
0311         || hook<mi_realloc>(symname, addr, restore) || hook<mi_calloc>(symname, addr, restore);
0312 }
0313 }
0314 
0315 template <typename T, Elf::Sxword AddrTag, Elf::Sxword SizeTag>
0316 struct elftable
0317 {
0318     using type = T;
0319     Elf::Addr table = 0;
0320     Elf::Xword size = 0;
0321 
0322     bool consume(const Elf::Dyn* dyn) noexcept
0323     {
0324         if (dyn->d_tag == AddrTag) {
0325             table = dyn->d_un.d_ptr;
0326             return true;
0327         } else if (dyn->d_tag == SizeTag) {
0328             size = dyn->d_un.d_val;
0329             return true;
0330         }
0331         return false;
0332     }
0333 
0334     explicit operator bool() const noexcept
0335     {
0336         return table && size;
0337     }
0338 
0339     T* start(Elf::Addr tableOffset) const noexcept
0340     {
0341         return reinterpret_cast<T*>(table + tableOffset);
0342     }
0343 
0344     T* end(Elf::Addr tableOffset) const noexcept
0345     {
0346         return reinterpret_cast<T*>(table + tableOffset + size);
0347     }
0348 };
0349 
0350 using elf_string_table = elftable<const char, DT_STRTAB, DT_STRSZ>;
0351 using elf_rel_table = elftable<Elf::Rel, DT_REL, DT_RELSZ>;
0352 using elf_rela_table = elftable<Elf::Rela, DT_RELA, DT_RELASZ>;
0353 using elf_jmprel_table = elftable<Elf::Rela, DT_JMPREL, DT_PLTRELSZ>;
0354 using elf_symbol_table = elftable<const Elf::Sym, DT_SYMTAB, DT_SYMENT>;
0355 
0356 template <typename Table>
0357 void try_overwrite_elftable(const Table& jumps, const elf_string_table& strings, const elf_symbol_table& symbols,
0358                             const Elf::Addr base, const bool restore, const Elf::Xword symtabSize) noexcept
0359 {
0360     Elf::Addr tableOffset =
0361 #ifdef __linux__
0362         0; // Already has memory addresses
0363 #elif defined(__FreeBSD__)
0364         base; // Only has ELF offsets
0365 #else
0366 #error port me
0367 #endif
0368 
0369     const auto rela_start = jumps.start(tableOffset);
0370     const auto rela_end = jumps.end(tableOffset);
0371 
0372     const auto sym_start = symbols.start(tableOffset);
0373     const auto sym_end = symbols.start(tableOffset + symtabSize);
0374     const auto num_syms = static_cast<uintptr_t>(sym_end - sym_start);
0375 
0376     const auto str_start = strings.start(tableOffset);
0377     const auto str_end = strings.end(tableOffset);
0378     const auto num_str = static_cast<uintptr_t>(str_end - str_start);
0379 
0380     for (auto rela = rela_start; rela < rela_end; rela++) {
0381         const auto sym_index = ELF_R_SYM(rela->r_info);
0382         if (sym_index < 0 || sym_index >= num_syms) {
0383             continue;
0384         }
0385 
0386         const auto str_index = sym_start[sym_index].st_name;
0387         if (str_index < 0 || str_index >= num_str) {
0388             continue;
0389         }
0390 
0391         const char* symname = str_start + str_index;
0392 
0393         auto addr = rela->r_offset + base;
0394         hooks::apply(symname, addr, restore);
0395     }
0396 }
0397 
0398 void try_overwrite_symbols(const Elf::Dyn* dyn, const Elf::Addr base, const bool restore,
0399                            const Elf::Xword symtabSize) noexcept
0400 {
0401     elf_symbol_table symbols;
0402     elf_rel_table rels;
0403     elf_rela_table relas;
0404     elf_jmprel_table jmprels;
0405     elf_string_table strings;
0406 
0407     // initialize the elf tables
0408     for (; dyn->d_tag != DT_NULL; ++dyn) {
0409         symbols.consume(dyn) || strings.consume(dyn) || rels.consume(dyn) || relas.consume(dyn) || jmprels.consume(dyn);
0410     }
0411 
0412     if (!symbols || !strings) {
0413         return;
0414     }
0415 
0416     // find symbols to overwrite
0417     if (rels) {
0418         try_overwrite_elftable(rels, strings, symbols, base, restore, symtabSize);
0419     }
0420 
0421     if (relas) {
0422         try_overwrite_elftable(relas, strings, symbols, base, restore, symtabSize);
0423     }
0424 
0425     if (jmprels) {
0426         try_overwrite_elftable(jmprels, strings, symbols, base, restore, symtabSize);
0427     }
0428 }
0429 
0430 template <typename Cleanup>
0431 struct ScopeGuard
0432 {
0433     ScopeGuard(Cleanup cleanup)
0434         : cleanup(std::move(cleanup))
0435     {
0436     }
0437 
0438     ~ScopeGuard()
0439     {
0440         cleanup();
0441     }
0442 
0443     Cleanup cleanup;
0444 };
0445 
0446 template <typename Cleanup>
0447 auto scopeGuard(Cleanup cleanup)
0448 {
0449     return ScopeGuard<Cleanup>(std::move(cleanup));
0450 }
0451 
0452 Elf::Xword symtabSize(const char* path)
0453 {
0454     auto fd = open(path, O_RDONLY);
0455     if (fd == -1) {
0456         fprintf(stderr, "open failed: %s %s\n", path, strerror(errno));
0457         return 0;
0458     }
0459     auto closeOnExit = scopeGuard([fd]() { close(fd); });
0460 
0461     struct stat stat_info;
0462     if (fstat(fd, &stat_info) != 0) {
0463         fprintf(stderr, "stat failed: %s %s\n", path, strerror(errno));
0464         return 0;
0465     }
0466 
0467     auto mapping = mmap(nullptr, stat_info.st_size, PROT_READ, MAP_SHARED, fd, 0);
0468     auto unmapOnExit = scopeGuard([&]() { munmap(mapping, stat_info.st_size); });
0469 
0470     const auto base = reinterpret_cast<ElfW(Addr)>(mapping);
0471     const auto ehdr = reinterpret_cast<const ElfW(Ehdr)*>(base);
0472     const auto shdr = reinterpret_cast<const ElfW(Shdr)*>(base + ehdr->e_shoff);
0473 
0474     for (ElfW(Half) i = 0; i < ehdr->e_shnum; ++i) {
0475         if (shdr[i].sh_type == SHT_DYNSYM) {
0476             return shdr[i].sh_size;
0477         }
0478     }
0479 
0480     fprintf(stderr, "failed to query symtab size: %s\n", path);
0481     return 0;
0482 }
0483 
0484 Elf::Xword cachedSymtabSize(const char* path)
0485 {
0486     if (!strlen(path)) {
0487         path = "/proc/self/exe";
0488     }
0489 
0490     static tsl::robin_map<std::string, Elf::Xword> cache;
0491 
0492     auto key = std::string(path);
0493     auto it = cache.find(path);
0494     if (it == cache.end()) {
0495         it = cache.insert(it, {std::move(key), symtabSize(path)});
0496     }
0497     return it->second;
0498 }
0499 
0500 int iterate_phdrs(dl_phdr_info* info, size_t /*size*/, void* data) noexcept
0501 {
0502     if (strstr(info->dlpi_name, "/libheaptrack_inject.so")) {
0503         // prevent infinite recursion: do not overwrite our own symbols
0504         return 0;
0505     } else if (strstr(info->dlpi_name, "/ld-linux")) {
0506         // prevent strange crashes due to overwriting the free symbol in ld-linux
0507         // (doesn't seem to be necessary in FreeBSD's ld-elf)
0508         return 0;
0509     } else if (strstr(info->dlpi_name, "linux-vdso.so")) {
0510         // don't overwrite anything within linux-vdso
0511         return 0;
0512     }
0513 
0514     const auto symtabSize = cachedSymtabSize(info->dlpi_name);
0515     for (auto phdr = info->dlpi_phdr, end = phdr + info->dlpi_phnum; phdr != end; ++phdr) {
0516         if (phdr->p_type == PT_DYNAMIC) {
0517             try_overwrite_symbols(reinterpret_cast<const Elf::Dyn*>(phdr->p_vaddr + info->dlpi_addr), info->dlpi_addr,
0518                                   data != nullptr, symtabSize);
0519         }
0520     }
0521     return 0;
0522 }
0523 
0524 void overwrite_symbols() noexcept
0525 {
0526     dl_iterate_phdr(&iterate_phdrs, nullptr);
0527 }
0528 
0529 void restore_symbols() noexcept
0530 {
0531     bool do_shutdown = true;
0532     dl_iterate_phdr(&iterate_phdrs, &do_shutdown);
0533 }
0534 }
0535 
0536 extern "C" {
0537 // this function is called when heaptrack_inject is runtime injected via GDB
0538 void heaptrack_inject(const char* outputFileName) noexcept
0539 {
0540     heaptrack_init(
0541         outputFileName, &overwrite_symbols, [](LineWriter& out) { out.write("A\n"); }, &restore_symbols);
0542 }
0543 }
0544 
0545 // alternatively, the code below may initialize heaptrack when we use
0546 // heaptrack_inject via LD_PRELOAD and have the right environment variables setup
0547 struct HeaptrackInjectPreloadInitialization
0548 {
0549     HeaptrackInjectPreloadInitialization()
0550     {
0551         const auto outputFileName = getenv("DUMP_HEAPTRACK_OUTPUT");
0552         if (!outputFileName) {
0553             // when the env var wasn't set, then this means we got runtime injected, don't do anything here
0554             return;
0555         }
0556         heaptrack_init(outputFileName, &overwrite_symbols, nullptr, &restore_symbols);
0557     }
0558 };
0559 
0560 static HeaptrackInjectPreloadInitialization heaptrackInjectPreloadInitialization;