File indexing completed on 2024-05-12 05:43:26
0001 /* 0002 Copyright (C) 2013-2014 Volker Krause <vkrause@kde.org> 0003 0004 This program is free software; you can redistribute it and/or modify it 0005 under the terms of the GNU Library General Public License as published by 0006 the Free Software Foundation; either version 2 of the License, or (at your 0007 option) any later version. 0008 0009 This program is distributed in the hope that it will be useful, but WITHOUT 0010 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 0011 FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public 0012 License for more details. 0013 0014 You should have received a copy of the GNU General Public License 0015 along with this program. If not, see <https://www.gnu.org/licenses/>. 0016 */ 0017 0018 #include "disassembler.h" 0019 #include "config-elf-dissector.h" 0020 0021 #include <elf/elfsymboltableentry.h> 0022 #include <elf/elfsymboltablesection.h> 0023 #include <elf/elffile.h> 0024 #include <elf/elfheader.h> 0025 #include <elf/elfpltentry.h> 0026 #include <elf/elfpltsection.h> 0027 #include <elf/elfgotsection.h> 0028 #include <elf/elfrelocationentry.h> 0029 #if HAVE_DWARF 0030 #include <dwarf/dwarfinfo.h> 0031 #include <dwarf/dwarfaddressranges.h> 0032 #include <dwarf/dwarfcudie.h> 0033 #include <dwarf/dwarfline.h> 0034 #endif 0035 0036 #include <QDebug> 0037 #include <QString> 0038 #include <QUrl> 0039 0040 #include <cassert> 0041 #include <cstdarg> 0042 0043 #include <ansidecl.h> 0044 0045 #define PACKAGE "elf-dissector" 0046 #define PACKAGE_VERSION "0.0.1" 0047 #include <dis-asm.h> 0048 #include <elf.h> 0049 #include <stdio.h> 0050 0051 #if BINUTILS_VERSION >= BINUTILS_VERSION_CHECK(2, 29) 0052 // in binutils 2.29 print_insn_i386 disappeared from the dis-asm.h header, 0053 // not sure what the proper replacement for it is, so define it here 0054 // See commit 88c1242dc0a1e1ab582a65ea8bd05eb5f244c59b in binutils. 0055 extern "C" int print_insn_i386 (bfd_vma, disassemble_info *); 0056 extern "C" int print_insn_big_arm(bfd_vma, disassemble_info *); 0057 extern "C" int print_insn_little_arm(bfd_vma, disassemble_info *); 0058 #endif 0059 0060 #ifdef HAVE_CAPSTONE 0061 #include <capstone.h> 0062 #endif 0063 0064 static int qstring_printf(void *data, const char *format, ...) 0065 { 0066 QString buffer; 0067 va_list args; 0068 va_start(args, format); 0069 buffer.vsprintf(format, args); 0070 va_end(args); 0071 0072 QString *s = static_cast<QString*>(data); 0073 s->append(buffer); 0074 return buffer.size(); 0075 } 0076 0077 static void print_address(bfd_vma addr, struct disassemble_info *info) 0078 { 0079 const auto disasm = static_cast<Disassembler*>(info->application_data); 0080 assert(disasm); 0081 0082 // TODO handle relocations/PLT/etc 0083 0084 (*info->fprintf_func) (info->stream, "0x%lx", addr); 0085 auto s = static_cast<QString*>(info->stream); 0086 0087 const uint64_t targetAddr = disasm->baseAddress() + addr; 0088 disasm->printAddress(targetAddr, s); 0089 } 0090 0091 Disassembler::Disassembler() = default; 0092 0093 Disassembler::~Disassembler() = default; 0094 0095 QString Disassembler::disassemble(ElfSection* section) 0096 { 0097 m_file = section->file(); 0098 m_baseAddress = section->header()->virtualAddress(); 0099 return disassemble(section->rawData(), section->size()); 0100 } 0101 0102 QString Disassembler::disassemble(ElfSymbolTableEntry* entry) 0103 { 0104 m_file = entry->symbolTable()->file(); 0105 m_baseAddress = entry->value(); 0106 return disassemble(entry->data(), entry->size()); 0107 0108 } 0109 0110 QString Disassembler::disassemble(ElfPltEntry* entry) 0111 { 0112 m_file = entry->section()->file(); 0113 m_baseAddress = entry->section()->header()->virtualAddress() + entry->index() * entry->size(); 0114 return disassemble(entry->rawData(), entry->size()); 0115 } 0116 0117 QString Disassembler::disassemble(const unsigned char* data, uint64_t size) 0118 { 0119 #if defined(__x86_64__) || defined(__i386__) 0120 if (file()->header()->machine() == EM_386 || file()->header()->machine() == EM_X86_64) { 0121 return disassembleBinutils(data, size); 0122 } 0123 #endif 0124 0125 return disassembleCapstone(data, size); 0126 } 0127 0128 #if BINUTILS_VERSION >= BINUTILS_VERSION_CHECK(2, 39) 0129 static int fprintf_styled(void *, enum disassembler_style, const char* fmt, ...) 0130 { 0131 va_list args; 0132 int r; 0133 0134 va_start(args, fmt); 0135 r = vprintf(fmt, args); 0136 va_end(args); 0137 0138 return r; 0139 } 0140 #endif 0141 0142 QString Disassembler::disassembleBinutils(const unsigned char* data, uint64_t size) 0143 { 0144 QString result; 0145 disassembler_ftype disassemble_fn; 0146 disassemble_info info; 0147 #if BINUTILS_VERSION >= BINUTILS_VERSION_CHECK(2, 39) 0148 INIT_DISASSEMBLE_INFO(info, &result, qstring_printf, fprintf_styled); 0149 #else 0150 INIT_DISASSEMBLE_INFO(info, &result, qstring_printf); 0151 #endif 0152 0153 info.application_data = this; 0154 info.flavour = bfd_target_elf_flavour; 0155 info.endian = m_file->byteOrder() == ELFDATA2LSB ? BFD_ENDIAN_LITTLE : BFD_ENDIAN_BIG; 0156 switch (m_file->header()->machine()) { 0157 #if defined(__x86_64__) || defined(__i386__) 0158 case EM_386: 0159 info.arch = bfd_arch_i386; 0160 info.mach = bfd_mach_i386_i386; 0161 disassemble_fn = print_insn_i386; 0162 break; 0163 case EM_X86_64: 0164 info.arch = bfd_arch_i386; 0165 info.mach = bfd_mach_x86_64; 0166 disassemble_fn = print_insn_i386; 0167 break; 0168 #endif 0169 #if defined(__arm__) 0170 case EM_ARM: 0171 info.arch = bfd_arch_arm; 0172 info.mach = bfd_mach_arm_unknown; 0173 if (info.endian == BFD_ENDIAN_LITTLE) 0174 disassemble_fn = print_insn_little_arm; 0175 else 0176 disassemble_fn = print_insn_big_arm; 0177 break; 0178 #endif 0179 default: 0180 qWarning() << "Unsupported architecture!"; 0181 return {}; 0182 } 0183 0184 info.buffer = const_cast<bfd_byte*>(data); 0185 info.buffer_length = size; 0186 info.buffer_vma = 0; 0187 info.print_address_func = print_address; 0188 0189 uint32_t bytes = 0; 0190 while (bytes < size) { 0191 #if HAVE_DWARF 0192 auto line = lineForAddress(baseAddress() + bytes); 0193 if (!line.isNull()) 0194 result += printSourceLine(line) + "<br/>"; 0195 #endif 0196 result += QStringLiteral("%1: ").arg(bytes, 8, 10); 0197 bytes += (*disassemble_fn)(bytes, &info); 0198 result += QLatin1String("<br/>"); 0199 } 0200 0201 return result; 0202 } 0203 0204 #ifdef HAVE_CAPSTONE 0205 static bool isInsnGroup(cs_insn *insn, uint8_t group) 0206 { 0207 for (uint8_t i = 0; i < insn->detail->groups_count; ++i) { 0208 if (insn->detail->groups[i] == group) 0209 return true; 0210 } 0211 return false; 0212 } 0213 #endif 0214 0215 QString Disassembler::disassembleCapstone(const unsigned char* data, uint64_t size) 0216 { 0217 #ifdef HAVE_CAPSTONE 0218 csh handle; 0219 cs_err err; 0220 switch (file()->header()->machine()) { 0221 case EM_386: 0222 err = cs_open(CS_ARCH_X86, CS_MODE_32, &handle); 0223 break; 0224 case EM_X86_64: 0225 err = cs_open(CS_ARCH_X86, CS_MODE_64, &handle); 0226 break; 0227 case EM_ARM: 0228 err = cs_open(CS_ARCH_ARM, CS_MODE_LITTLE_ENDIAN, &handle); 0229 break; 0230 case EM_AARCH64: 0231 err = cs_open(CS_ARCH_ARM64, CS_MODE_LITTLE_ENDIAN, &handle); 0232 break; 0233 default: 0234 qWarning() << "Unsupported architecture!"; 0235 return {}; 0236 } 0237 if (err != CS_ERR_OK) { 0238 qWarning() << "Error opening Capstone handle:" << err; 0239 return {}; 0240 } 0241 std::unique_ptr<csh, decltype(&cs_close)> handleGuard(&handle, &cs_close); 0242 cs_option(handle, CS_OPT_SYNTAX, CS_OPT_SYNTAX_ATT); 0243 cs_option(handle, CS_OPT_DETAIL, CS_OPT_ON); 0244 0245 cs_insn *insn = cs_malloc(handle); 0246 const auto insnFree = [](cs_insn *insn) { cs_free(insn, 1); }; 0247 std::unique_ptr<cs_insn, decltype(insnFree)> insnGuard(insn, insnFree); 0248 0249 auto address = baseAddress(); 0250 QString result; 0251 0252 size_t cs_size = size; // force to size_t for 32bit host support 0253 while (cs_size > 0) { 0254 if (!cs_disasm_iter(handle, &data, &cs_size, &address, insn)) { 0255 return result; 0256 } 0257 0258 #if HAVE_DWARF 0259 const auto line = lineForAddress(insn->address); 0260 if (!line.isNull()) 0261 result += printSourceLine(line) + "<br/>"; 0262 #endif 0263 0264 result += QString::number(insn->address - baseAddress()) + ": " + insn->mnemonic + QLatin1Char(' ') + insn->op_str; 0265 switch (file()->header()->machine()) { 0266 case EM_386: 0267 case EM_X86_64: 0268 for (int i = 0; i < insn->detail->x86.op_count; ++i) { 0269 const auto op = insn->detail->x86.operands[i]; 0270 if (op.type == X86_OP_MEM) { 0271 result += QLatin1String(" # 0x") + QString::number(op.mem.disp + address, 16); 0272 printAddress(op.mem.disp + address, &result); 0273 } else if (op.type == X86_OP_IMM) { 0274 result += QLatin1String(" # 0x") + QString::number(op.imm, 16); 0275 printAddress(op.imm, &result); 0276 } 0277 } 0278 break; 0279 case EM_AARCH64: 0280 for (int i = 0; i < insn->detail->arm64.op_count; ++i) { 0281 const auto op = insn->detail->arm64.operands[i]; 0282 if (op.type == ARM64_OP_MEM && (isInsnGroup(insn, CS_GRP_CALL) || isInsnGroup(insn, CS_GRP_JUMP))) { 0283 result += QLatin1String(" # 0x") + QString::number(op.mem.disp + address, 16); 0284 printAddress(op.mem.disp + address, &result); 0285 } else if (op.type == ARM64_OP_IMM && (isInsnGroup(insn, CS_GRP_CALL) || isInsnGroup(insn, CS_GRP_JUMP) || insn->id == ARM64_INS_ADRP)) { 0286 result += QLatin1String(" # 0x") + QString::number(op.imm, 16); 0287 printAddress(op.imm, &result); 0288 } 0289 } 0290 break; 0291 default: 0292 break; 0293 } 0294 result += "<br/>"; 0295 } 0296 0297 return result; 0298 #else 0299 return {}; 0300 #endif 0301 } 0302 0303 ElfFile* Disassembler::file() const 0304 { 0305 return m_file; 0306 } 0307 0308 uint64_t Disassembler::baseAddress() const 0309 { 0310 return m_baseAddress; 0311 } 0312 0313 void Disassembler::printAddress(uint64_t addr, QString *s) const 0314 { 0315 if (auto symbolTable = file()->symbolTable()) { 0316 const auto target = symbolTable->entryContainingValue(addr); 0317 if (target) { 0318 s->append(" ("); 0319 s->append(printSymbol(target)); 0320 if (target->value() < addr) { 0321 s->append(QLatin1String("+0x") + QString::number(addr - target->value(), 16)); 0322 } 0323 s->append(')'); 0324 return; 0325 } 0326 } 0327 0328 const auto secIdx = file()->indexOfSectionWithVirtualAddress(addr); 0329 if (secIdx < 0) 0330 return; 0331 0332 const auto section = file()->section<ElfSection>(secIdx); 0333 assert(section); 0334 0335 const auto pltSection = dynamic_cast<ElfPltSection*>(section); 0336 if (pltSection) { 0337 const auto pltEntry = pltSection->entry((addr - section->header()->virtualAddress()) / section->header()->entrySize()); 0338 assert(pltEntry); 0339 s->append(" ("); 0340 s->append(printPltEntry(pltEntry)); 0341 s->append(')'); 0342 return; 0343 } 0344 0345 const auto gotSection = dynamic_cast<ElfGotSection*>(section); 0346 if (gotSection) { 0347 const auto gotEntry = gotSection->entry((addr - section->header()->virtualAddress()) / file()->addressSize()); 0348 assert(gotEntry); 0349 s->append(" ("); 0350 s->append(printGotEntry(gotEntry)); 0351 s->append(')'); 0352 return; 0353 } 0354 0355 s->append(QLatin1String(" (") + section->header()->name() + QLatin1String(" + 0x") + QString::number(addr - section->header()->virtualAddress(), 16) + QLatin1Char(')')); 0356 } 0357 0358 QString Disassembler::printSymbol(ElfSymbolTableEntry* entry) const 0359 { 0360 return QLatin1String(entry->name()); 0361 } 0362 0363 QString Disassembler::printGotEntry(ElfGotEntry* entry) const 0364 { 0365 const auto reloc = entry->relocation(); 0366 const auto sym = reloc ? reloc->symbol() : nullptr; 0367 if (sym) 0368 return sym->name() + QStringLiteral("@got"); 0369 return entry->section()->header()->name() + QStringLiteral(" + 0x") + QString::number(entry->index() * entry->section()->file()->addressSize()); 0370 } 0371 0372 QString Disassembler::printPltEntry(ElfPltEntry* entry) const 0373 { 0374 const auto gotEntry = entry->gotEntry(); 0375 const auto reloc = gotEntry ? gotEntry->relocation() : nullptr; 0376 const auto sym = reloc ? reloc->symbol() : nullptr; 0377 if (sym) 0378 return sym->name() + QStringLiteral("@plt"); 0379 return entry->section()->header()->name() + QStringLiteral(" + 0x") + QString::number(entry->index() * entry->section()->header()->entrySize()); 0380 } 0381 0382 #if HAVE_DWARF 0383 DwarfLine Disassembler::lineForAddress(uint64_t addr) const 0384 { 0385 if (!file()->dwarfInfo()) 0386 return {}; 0387 0388 auto cu = file()->dwarfInfo()->compilationUnitForAddress(addr); 0389 if (!cu) 0390 return {}; 0391 return cu->lineForAddress(addr); 0392 } 0393 0394 QString Disassembler::printSourceLine(DwarfLine line) const 0395 { 0396 assert(!line.isNull()); 0397 auto cu = file()->dwarfInfo()->compilationUnitForAddress(line.address()); 0398 assert(cu); 0399 0400 QUrl url; 0401 url.setScheme(QStringLiteral("code")); 0402 url.setPath(cu->sourceFileForLine(line)); 0403 url.setFragment(QString::number(line.line())); 0404 0405 QString s; 0406 s += "<i>Source: <a href=\"" + url.toEncoded() + "\">" + cu->sourceFileForLine(line); 0407 s += ':' + QString::number(line.line()) + "</a></i>"; 0408 return s; 0409 } 0410 #endif