File indexing completed on 2024-05-12 17:15:54
0001 /* 0002 SPDX-FileCopyrightText: 2014-2017 Milian Wolff <mail@milianw.de> 0003 0004 SPDX-License-Identifier: LGPL-2.1-or-later 0005 */ 0006 0007 #include "libheaptrack.h" 0008 #include "util/config.h" 0009 #include "util/linewriter.h" 0010 0011 #include <tsl/robin_map.h> 0012 0013 #include <cstdlib> 0014 #include <cstring> 0015 0016 #include <dlfcn.h> 0017 #include <errno.h> 0018 #include <fcntl.h> 0019 #include <link.h> 0020 #include <unistd.h> 0021 0022 #include <limits.h> 0023 #include <sys/mman.h> 0024 #include <sys/stat.h> 0025 0026 #include <type_traits> 0027 0028 /** 0029 * @file heaptrack_inject.cpp 0030 * 0031 * @brief Experimental support for symbol overloading after runtime injection. 0032 */ 0033 0034 #if ULONG_MAX == 0xffffffffffffffff 0035 #define WORDSIZE 64 0036 #elif ULONG_MAX == 0xffffffff 0037 #define WORDSIZE 32 0038 #endif 0039 0040 #ifndef ELF_R_SYM 0041 #if WORDSIZE == 64 0042 #define ELF_R_SYM(i) ELF64_R_SYM(i) 0043 #elif WORDSIZE == 32 0044 #define ELF_R_SYM(i) ELF32_R_SYM(i) 0045 #else 0046 #error unsupported word size 0047 #endif 0048 #endif 0049 0050 #ifndef ElfW 0051 #if WORDSIZE == 64 0052 #define ElfW(type) Elf64_##type 0053 #elif WORDSIZE == 32 0054 #define ElfW(type) Elf32_##type 0055 #else 0056 #error unsupported word size 0057 #endif 0058 #endif 0059 0060 // NOTE: adding noexcept to C functions is a hard error in clang++ 0061 // (but not even a warning in GCC, even with -Wall) 0062 #if defined(__GNUC__) && !defined(__clang__) 0063 #define LIBC_FUN_ATTRS noexcept 0064 #else 0065 #define LIBC_FUN_ATTRS 0066 #endif 0067 0068 extern "C" { 0069 0070 // Foward declare mimalloc (https://github.com/microsoft/mimalloc) functions so we don't need to include its .h. 0071 __attribute__((weak)) void* mi_malloc(size_t size) LIBC_FUN_ATTRS; 0072 __attribute__((weak)) void* mi_calloc(size_t count, size_t size) LIBC_FUN_ATTRS; 0073 __attribute__((weak)) void* mi_realloc(void* p, size_t newsize) LIBC_FUN_ATTRS; 0074 __attribute__((weak)) void mi_free(void* p) LIBC_FUN_ATTRS; 0075 } 0076 0077 namespace { 0078 0079 namespace Elf { 0080 using Ehdr = ElfW(Ehdr); 0081 using Shdr = ElfW(Shdr); 0082 using Half = ElfW(Half); 0083 using Addr = ElfW(Addr); 0084 using Dyn = ElfW(Dyn); 0085 using Rel = ElfW(Rel); 0086 using Rela = ElfW(Rela); 0087 using Sym = ElfW(Sym); 0088 #if WORDSIZE == 64 0089 using Sxword = ElfW(Sxword); 0090 using Xword = ElfW(Xword); 0091 #else 0092 // FreeBSD elf32.h doesn't define Elf32_Sxword or _Xword. This is used in struct 0093 // elftable, where it's used as a tag value. Our Elf32_Dyn uses Elf32_Sword there, 0094 // as does the Linux definition (and the standard); the El64_Dyn uses Sxword. 0095 // 0096 // Linux elf.h defines Elf32_Sxword as a 64-bit quantity, so let's do that 0097 using Sxword = int64_t; 0098 using Xword = uint64_t; 0099 #endif 0100 } 0101 0102 void overwrite_symbols() noexcept; 0103 0104 namespace hooks { 0105 0106 struct malloc 0107 { 0108 static constexpr auto name = "malloc"; 0109 static constexpr auto original = &::malloc; 0110 0111 static void* hook(size_t size) noexcept 0112 { 0113 auto ptr = original(size); 0114 heaptrack_malloc(ptr, size); 0115 return ptr; 0116 } 0117 }; 0118 0119 struct free 0120 { 0121 static constexpr auto name = "free"; 0122 static constexpr auto original = &::free; 0123 0124 static void hook(void* ptr) noexcept 0125 { 0126 heaptrack_free(ptr); 0127 original(ptr); 0128 } 0129 }; 0130 0131 struct realloc 0132 { 0133 static constexpr auto name = "realloc"; 0134 static constexpr auto original = &::realloc; 0135 0136 static void* hook(void* ptr, size_t size) noexcept 0137 { 0138 auto inPtr = reinterpret_cast<uintptr_t>(ptr); 0139 auto ret = original(ptr, size); 0140 heaptrack_realloc2(inPtr, size, reinterpret_cast<uintptr_t>(ret)); 0141 0142 return ret; 0143 } 0144 }; 0145 0146 struct calloc 0147 { 0148 static constexpr auto name = "calloc"; 0149 static constexpr auto original = &::calloc; 0150 0151 static void* hook(size_t num, size_t size) noexcept 0152 { 0153 auto ptr = original(num, size); 0154 heaptrack_malloc(ptr, num * size); 0155 return ptr; 0156 } 0157 }; 0158 0159 #if HAVE_CFREE 0160 struct cfree 0161 { 0162 static constexpr auto name = "cfree"; 0163 static constexpr auto original = &::cfree; 0164 0165 static void hook(void* ptr) noexcept 0166 { 0167 heaptrack_free(ptr); 0168 original(ptr); 0169 } 0170 }; 0171 #endif 0172 0173 struct dlopen 0174 { 0175 static constexpr auto name = "dlopen"; 0176 static constexpr auto original = &::dlopen; 0177 0178 static void* hook(const char* filename, int flag) noexcept 0179 { 0180 auto ret = original(filename, flag); 0181 if (ret) { 0182 heaptrack_invalidate_module_cache(); 0183 overwrite_symbols(); 0184 } 0185 return ret; 0186 } 0187 }; 0188 0189 struct dlclose 0190 { 0191 static constexpr auto name = "dlclose"; 0192 static constexpr auto original = &::dlclose; 0193 0194 static int hook(void* handle) noexcept 0195 { 0196 auto ret = original(handle); 0197 if (!ret) { 0198 heaptrack_invalidate_module_cache(); 0199 } 0200 return ret; 0201 } 0202 }; 0203 0204 struct posix_memalign 0205 { 0206 static constexpr auto name = "posix_memalign"; 0207 static constexpr auto original = &::posix_memalign; 0208 0209 static int hook(void** memptr, size_t alignment, size_t size) noexcept 0210 { 0211 auto ret = original(memptr, alignment, size); 0212 if (!ret) { 0213 heaptrack_malloc(*memptr, size); 0214 } 0215 return ret; 0216 } 0217 }; 0218 0219 // mimalloc functions 0220 struct mi_malloc 0221 { 0222 static constexpr auto name = "mi_malloc"; 0223 static constexpr auto original = &::mi_malloc; 0224 0225 static void* hook(size_t size) noexcept 0226 { 0227 auto ptr = original(size); 0228 heaptrack_malloc(ptr, size); 0229 return ptr; 0230 } 0231 }; 0232 0233 struct mi_free 0234 { 0235 static constexpr auto name = "mi_free"; 0236 static constexpr auto original = &::mi_free; 0237 0238 static void hook(void* ptr) noexcept 0239 { 0240 heaptrack_free(ptr); 0241 original(ptr); 0242 } 0243 }; 0244 0245 struct mi_realloc 0246 { 0247 static constexpr auto name = "mi_realloc"; 0248 static constexpr auto original = &::mi_realloc; 0249 0250 static void* hook(void* ptr, size_t size) noexcept 0251 { 0252 auto ret = original(ptr, size); 0253 heaptrack_realloc(ptr, size, ret); 0254 return ret; 0255 } 0256 }; 0257 0258 struct mi_calloc 0259 { 0260 static constexpr auto name = "mi_calloc"; 0261 static constexpr auto original = &::mi_calloc; 0262 0263 static void* hook(size_t num, size_t size) noexcept 0264 { 0265 auto ptr = original(num, size); 0266 heaptrack_malloc(ptr, num * size); 0267 return ptr; 0268 } 0269 }; 0270 0271 template <typename Hook> 0272 bool hook(const char* symname, Elf::Addr addr, bool restore) 0273 { 0274 static_assert(std::is_convertible<decltype(&Hook::hook), decltype(Hook::original)>::value, 0275 "hook is not compatible to original function"); 0276 0277 if (strcmp(Hook::name, symname) != 0) { 0278 return false; 0279 } 0280 0281 // try to make the page read/write accessible, which is hackish 0282 // but apparently required for some shared libraries 0283 auto page = reinterpret_cast<void*>(addr & ~(0x1000 - 1)); 0284 mprotect(page, 0x1000, PROT_READ | PROT_WRITE); 0285 0286 // now write to the address 0287 auto typedAddr = reinterpret_cast<typename std::remove_const<decltype(Hook::original)>::type*>(addr); 0288 if (restore) { 0289 // restore the original address on shutdown 0290 *typedAddr = Hook::original; 0291 } else { 0292 // now actually inject our hook 0293 *typedAddr = &Hook::hook; 0294 } 0295 0296 return true; 0297 } 0298 0299 void apply(const char* symname, Elf::Addr addr, bool restore) 0300 { 0301 // TODO: use std::apply once we can rely on C++17 0302 hook<malloc>(symname, addr, restore) || hook<free>(symname, addr, restore) || hook<realloc>(symname, addr, restore) 0303 || hook<calloc>(symname, addr, restore) 0304 #if HAVE_CFREE 0305 || hook<cfree>(symname, addr, restore) 0306 #endif 0307 || hook<posix_memalign>(symname, addr, restore) || hook<dlopen>(symname, addr, restore) 0308 || hook<dlclose>(symname, addr, restore) 0309 // mimalloc functions 0310 || hook<mi_malloc>(symname, addr, restore) || hook<mi_free>(symname, addr, restore) 0311 || hook<mi_realloc>(symname, addr, restore) || hook<mi_calloc>(symname, addr, restore); 0312 } 0313 } 0314 0315 template <typename T, Elf::Sxword AddrTag, Elf::Sxword SizeTag> 0316 struct elftable 0317 { 0318 using type = T; 0319 Elf::Addr table = 0; 0320 Elf::Xword size = 0; 0321 0322 bool consume(const Elf::Dyn* dyn) noexcept 0323 { 0324 if (dyn->d_tag == AddrTag) { 0325 table = dyn->d_un.d_ptr; 0326 return true; 0327 } else if (dyn->d_tag == SizeTag) { 0328 size = dyn->d_un.d_val; 0329 return true; 0330 } 0331 return false; 0332 } 0333 0334 explicit operator bool() const noexcept 0335 { 0336 return table && size; 0337 } 0338 0339 T* start(Elf::Addr tableOffset) const noexcept 0340 { 0341 return reinterpret_cast<T*>(table + tableOffset); 0342 } 0343 0344 T* end(Elf::Addr tableOffset) const noexcept 0345 { 0346 return reinterpret_cast<T*>(table + tableOffset + size); 0347 } 0348 }; 0349 0350 using elf_string_table = elftable<const char, DT_STRTAB, DT_STRSZ>; 0351 using elf_rel_table = elftable<Elf::Rel, DT_REL, DT_RELSZ>; 0352 using elf_rela_table = elftable<Elf::Rela, DT_RELA, DT_RELASZ>; 0353 using elf_jmprel_table = elftable<Elf::Rela, DT_JMPREL, DT_PLTRELSZ>; 0354 using elf_symbol_table = elftable<const Elf::Sym, DT_SYMTAB, DT_SYMENT>; 0355 0356 template <typename Table> 0357 void try_overwrite_elftable(const Table& jumps, const elf_string_table& strings, const elf_symbol_table& symbols, 0358 const Elf::Addr base, const bool restore, const Elf::Xword symtabSize) noexcept 0359 { 0360 Elf::Addr tableOffset = 0361 #ifdef __linux__ 0362 0; // Already has memory addresses 0363 #elif defined(__FreeBSD__) 0364 base; // Only has ELF offsets 0365 #else 0366 #error port me 0367 #endif 0368 0369 const auto rela_start = jumps.start(tableOffset); 0370 const auto rela_end = jumps.end(tableOffset); 0371 0372 const auto sym_start = symbols.start(tableOffset); 0373 const auto sym_end = symbols.start(tableOffset + symtabSize); 0374 const auto num_syms = static_cast<uintptr_t>(sym_end - sym_start); 0375 0376 const auto str_start = strings.start(tableOffset); 0377 const auto str_end = strings.end(tableOffset); 0378 const auto num_str = static_cast<uintptr_t>(str_end - str_start); 0379 0380 for (auto rela = rela_start; rela < rela_end; rela++) { 0381 const auto sym_index = ELF_R_SYM(rela->r_info); 0382 if (sym_index < 0 || sym_index >= num_syms) { 0383 continue; 0384 } 0385 0386 const auto str_index = sym_start[sym_index].st_name; 0387 if (str_index < 0 || str_index >= num_str) { 0388 continue; 0389 } 0390 0391 const char* symname = str_start + str_index; 0392 0393 auto addr = rela->r_offset + base; 0394 hooks::apply(symname, addr, restore); 0395 } 0396 } 0397 0398 void try_overwrite_symbols(const Elf::Dyn* dyn, const Elf::Addr base, const bool restore, 0399 const Elf::Xword symtabSize) noexcept 0400 { 0401 elf_symbol_table symbols; 0402 elf_rel_table rels; 0403 elf_rela_table relas; 0404 elf_jmprel_table jmprels; 0405 elf_string_table strings; 0406 0407 // initialize the elf tables 0408 for (; dyn->d_tag != DT_NULL; ++dyn) { 0409 symbols.consume(dyn) || strings.consume(dyn) || rels.consume(dyn) || relas.consume(dyn) || jmprels.consume(dyn); 0410 } 0411 0412 if (!symbols || !strings) { 0413 return; 0414 } 0415 0416 // find symbols to overwrite 0417 if (rels) { 0418 try_overwrite_elftable(rels, strings, symbols, base, restore, symtabSize); 0419 } 0420 0421 if (relas) { 0422 try_overwrite_elftable(relas, strings, symbols, base, restore, symtabSize); 0423 } 0424 0425 if (jmprels) { 0426 try_overwrite_elftable(jmprels, strings, symbols, base, restore, symtabSize); 0427 } 0428 } 0429 0430 template <typename Cleanup> 0431 struct ScopeGuard 0432 { 0433 ScopeGuard(Cleanup cleanup) 0434 : cleanup(std::move(cleanup)) 0435 { 0436 } 0437 0438 ~ScopeGuard() 0439 { 0440 cleanup(); 0441 } 0442 0443 Cleanup cleanup; 0444 }; 0445 0446 template <typename Cleanup> 0447 auto scopeGuard(Cleanup cleanup) 0448 { 0449 return ScopeGuard<Cleanup>(std::move(cleanup)); 0450 } 0451 0452 Elf::Xword symtabSize(const char* path) 0453 { 0454 auto fd = open(path, O_RDONLY); 0455 if (fd == -1) { 0456 fprintf(stderr, "open failed: %s %s\n", path, strerror(errno)); 0457 return 0; 0458 } 0459 auto closeOnExit = scopeGuard([fd]() { close(fd); }); 0460 0461 struct stat stat_info; 0462 if (fstat(fd, &stat_info) != 0) { 0463 fprintf(stderr, "stat failed: %s %s\n", path, strerror(errno)); 0464 return 0; 0465 } 0466 0467 auto mapping = mmap(nullptr, stat_info.st_size, PROT_READ, MAP_SHARED, fd, 0); 0468 auto unmapOnExit = scopeGuard([&]() { munmap(mapping, stat_info.st_size); }); 0469 0470 const auto base = reinterpret_cast<ElfW(Addr)>(mapping); 0471 const auto ehdr = reinterpret_cast<const ElfW(Ehdr)*>(base); 0472 const auto shdr = reinterpret_cast<const ElfW(Shdr)*>(base + ehdr->e_shoff); 0473 0474 for (ElfW(Half) i = 0; i < ehdr->e_shnum; ++i) { 0475 if (shdr[i].sh_type == SHT_DYNSYM) { 0476 return shdr[i].sh_size; 0477 } 0478 } 0479 0480 fprintf(stderr, "failed to query symtab size: %s\n", path); 0481 return 0; 0482 } 0483 0484 Elf::Xword cachedSymtabSize(const char* path) 0485 { 0486 if (!strlen(path)) { 0487 path = "/proc/self/exe"; 0488 } 0489 0490 static tsl::robin_map<std::string, Elf::Xword> cache; 0491 0492 auto key = std::string(path); 0493 auto it = cache.find(path); 0494 if (it == cache.end()) { 0495 it = cache.insert(it, {std::move(key), symtabSize(path)}); 0496 } 0497 return it->second; 0498 } 0499 0500 int iterate_phdrs(dl_phdr_info* info, size_t /*size*/, void* data) noexcept 0501 { 0502 if (strstr(info->dlpi_name, "/libheaptrack_inject.so")) { 0503 // prevent infinite recursion: do not overwrite our own symbols 0504 return 0; 0505 } else if (strstr(info->dlpi_name, "/ld-linux")) { 0506 // prevent strange crashes due to overwriting the free symbol in ld-linux 0507 // (doesn't seem to be necessary in FreeBSD's ld-elf) 0508 return 0; 0509 } else if (strstr(info->dlpi_name, "linux-vdso.so")) { 0510 // don't overwrite anything within linux-vdso 0511 return 0; 0512 } 0513 0514 const auto symtabSize = cachedSymtabSize(info->dlpi_name); 0515 for (auto phdr = info->dlpi_phdr, end = phdr + info->dlpi_phnum; phdr != end; ++phdr) { 0516 if (phdr->p_type == PT_DYNAMIC) { 0517 try_overwrite_symbols(reinterpret_cast<const Elf::Dyn*>(phdr->p_vaddr + info->dlpi_addr), info->dlpi_addr, 0518 data != nullptr, symtabSize); 0519 } 0520 } 0521 return 0; 0522 } 0523 0524 void overwrite_symbols() noexcept 0525 { 0526 dl_iterate_phdr(&iterate_phdrs, nullptr); 0527 } 0528 0529 void restore_symbols() noexcept 0530 { 0531 bool do_shutdown = true; 0532 dl_iterate_phdr(&iterate_phdrs, &do_shutdown); 0533 } 0534 } 0535 0536 extern "C" { 0537 // this function is called when heaptrack_inject is runtime injected via GDB 0538 void heaptrack_inject(const char* outputFileName) noexcept 0539 { 0540 heaptrack_init( 0541 outputFileName, &overwrite_symbols, [](LineWriter& out) { out.write("A\n"); }, &restore_symbols); 0542 } 0543 } 0544 0545 // alternatively, the code below may initialize heaptrack when we use 0546 // heaptrack_inject via LD_PRELOAD and have the right environment variables setup 0547 struct HeaptrackInjectPreloadInitialization 0548 { 0549 HeaptrackInjectPreloadInitialization() 0550 { 0551 const auto outputFileName = getenv("DUMP_HEAPTRACK_OUTPUT"); 0552 if (!outputFileName) { 0553 // when the env var wasn't set, then this means we got runtime injected, don't do anything here 0554 return; 0555 } 0556 heaptrack_init(outputFileName, &overwrite_symbols, nullptr, &restore_symbols); 0557 } 0558 }; 0559 0560 static HeaptrackInjectPreloadInitialization heaptrackInjectPreloadInitialization;