File indexing completed on 2024-05-12 05:43:26

0001 /*
0002     Copyright (C) 2013-2014 Volker Krause <vkrause@kde.org>
0003 
0004     This program is free software; you can redistribute it and/or modify it
0005     under the terms of the GNU Library General Public License as published by
0006     the Free Software Foundation; either version 2 of the License, or (at your
0007     option) any later version.
0008 
0009     This program is distributed in the hope that it will be useful, but WITHOUT
0010     ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
0011     FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
0012     License for more details.
0013 
0014     You should have received a copy of the GNU General Public License
0015     along with this program.  If not, see <https://www.gnu.org/licenses/>.
0016 */
0017 
0018 #include "disassembler.h"
0019 #include "config-elf-dissector.h"
0020 
0021 #include <elf/elfsymboltableentry.h>
0022 #include <elf/elfsymboltablesection.h>
0023 #include <elf/elffile.h>
0024 #include <elf/elfheader.h>
0025 #include <elf/elfpltentry.h>
0026 #include <elf/elfpltsection.h>
0027 #include <elf/elfgotsection.h>
0028 #include <elf/elfrelocationentry.h>
0029 #if HAVE_DWARF
0030 #include <dwarf/dwarfinfo.h>
0031 #include <dwarf/dwarfaddressranges.h>
0032 #include <dwarf/dwarfcudie.h>
0033 #include <dwarf/dwarfline.h>
0034 #endif
0035 
0036 #include <QDebug>
0037 #include <QString>
0038 #include <QUrl>
0039 
0040 #include <cassert>
0041 #include <cstdarg>
0042 
0043 #include <ansidecl.h>
0044 
0045 #define PACKAGE "elf-dissector"
0046 #define PACKAGE_VERSION "0.0.1"
0047 #include <dis-asm.h>
0048 #include <elf.h>
0049 #include <stdio.h>
0050 
0051 #if BINUTILS_VERSION >= BINUTILS_VERSION_CHECK(2, 29)
0052     // in binutils 2.29 print_insn_i386 disappeared from the dis-asm.h header,
0053     // not sure what the proper replacement for it is, so define it here
0054     // See commit 88c1242dc0a1e1ab582a65ea8bd05eb5f244c59b in binutils.
0055     extern "C" int print_insn_i386 (bfd_vma, disassemble_info *);
0056     extern "C" int print_insn_big_arm(bfd_vma, disassemble_info *);
0057     extern "C" int print_insn_little_arm(bfd_vma, disassemble_info *);
0058 #endif
0059 
0060 #ifdef HAVE_CAPSTONE
0061 #include <capstone.h>
0062 #endif
0063 
0064 static int qstring_printf(void *data, const char *format, ...)
0065 {
0066     QString buffer;
0067     va_list args;
0068     va_start(args, format);
0069     buffer.vsprintf(format, args);
0070     va_end(args);
0071 
0072     QString *s = static_cast<QString*>(data);
0073     s->append(buffer);
0074     return buffer.size();
0075 }
0076 
0077 static void print_address(bfd_vma addr, struct disassemble_info *info)
0078 {
0079     const auto disasm = static_cast<Disassembler*>(info->application_data);
0080     assert(disasm);
0081 
0082     // TODO handle relocations/PLT/etc
0083 
0084     (*info->fprintf_func) (info->stream, "0x%lx", addr);
0085     auto s = static_cast<QString*>(info->stream);
0086 
0087     const uint64_t targetAddr = disasm->baseAddress() + addr;
0088     disasm->printAddress(targetAddr, s);
0089 }
0090 
0091 Disassembler::Disassembler() = default;
0092 
0093 Disassembler::~Disassembler() = default;
0094 
0095 QString Disassembler::disassemble(ElfSection* section)
0096 {
0097     m_file = section->file();
0098     m_baseAddress = section->header()->virtualAddress();
0099     return disassemble(section->rawData(), section->size());
0100 }
0101 
0102 QString Disassembler::disassemble(ElfSymbolTableEntry* entry)
0103 {
0104     m_file = entry->symbolTable()->file();
0105     m_baseAddress = entry->value();
0106     return disassemble(entry->data(), entry->size());
0107 
0108 }
0109 
0110 QString Disassembler::disassemble(ElfPltEntry* entry)
0111 {
0112     m_file = entry->section()->file();
0113     m_baseAddress = entry->section()->header()->virtualAddress() + entry->index() * entry->size();
0114     return disassemble(entry->rawData(), entry->size());
0115 }
0116 
0117 QString Disassembler::disassemble(const unsigned char* data, uint64_t size)
0118 {
0119 #if defined(__x86_64__) || defined(__i386__)
0120     if (file()->header()->machine() == EM_386 || file()->header()->machine() == EM_X86_64) {
0121         return disassembleBinutils(data, size);
0122     }
0123 #endif
0124 
0125     return disassembleCapstone(data, size);
0126 }
0127 
0128 #if BINUTILS_VERSION >= BINUTILS_VERSION_CHECK(2, 39)
0129 static int fprintf_styled(void *, enum disassembler_style, const char* fmt, ...)
0130 {
0131     va_list args;
0132     int r;
0133 
0134     va_start(args, fmt);
0135     r = vprintf(fmt, args);
0136     va_end(args);
0137 
0138     return r;
0139 }
0140 #endif
0141 
0142 QString Disassembler::disassembleBinutils(const unsigned char* data, uint64_t size)
0143 {
0144     QString result;
0145     disassembler_ftype disassemble_fn;
0146     disassemble_info info;
0147 #if BINUTILS_VERSION >= BINUTILS_VERSION_CHECK(2, 39)
0148     INIT_DISASSEMBLE_INFO(info, &result, qstring_printf, fprintf_styled);
0149 #else
0150     INIT_DISASSEMBLE_INFO(info, &result, qstring_printf);
0151 #endif
0152 
0153     info.application_data = this;
0154     info.flavour = bfd_target_elf_flavour;
0155     info.endian = m_file->byteOrder() == ELFDATA2LSB ? BFD_ENDIAN_LITTLE : BFD_ENDIAN_BIG;
0156     switch (m_file->header()->machine()) {
0157 #if defined(__x86_64__) || defined(__i386__)
0158         case EM_386:
0159             info.arch = bfd_arch_i386;
0160             info.mach = bfd_mach_i386_i386;
0161             disassemble_fn = print_insn_i386;
0162             break;
0163         case EM_X86_64:
0164             info.arch = bfd_arch_i386;
0165             info.mach = bfd_mach_x86_64;
0166             disassemble_fn = print_insn_i386;
0167             break;
0168 #endif
0169 #if defined(__arm__)
0170         case EM_ARM:
0171             info.arch = bfd_arch_arm;
0172             info.mach = bfd_mach_arm_unknown;
0173             if (info.endian == BFD_ENDIAN_LITTLE)
0174                 disassemble_fn = print_insn_little_arm;
0175             else
0176                 disassemble_fn = print_insn_big_arm;
0177             break;
0178 #endif
0179         default:
0180             qWarning() << "Unsupported architecture!";
0181             return {};
0182     }
0183 
0184     info.buffer = const_cast<bfd_byte*>(data);
0185     info.buffer_length = size;
0186     info.buffer_vma = 0;
0187     info.print_address_func = print_address;
0188 
0189     uint32_t bytes = 0;
0190     while (bytes < size) {
0191 #if HAVE_DWARF
0192         auto line = lineForAddress(baseAddress() + bytes);
0193         if (!line.isNull())
0194             result += printSourceLine(line) + "<br/>";
0195 #endif
0196         result += QStringLiteral("%1: ").arg(bytes, 8, 10);
0197         bytes += (*disassemble_fn)(bytes, &info);
0198         result += QLatin1String("<br/>");
0199     }
0200 
0201     return result;
0202 }
0203 
0204 #ifdef HAVE_CAPSTONE
0205 static bool isInsnGroup(cs_insn *insn, uint8_t group)
0206 {
0207     for (uint8_t i = 0; i < insn->detail->groups_count; ++i) {
0208         if (insn->detail->groups[i] == group)
0209             return true;
0210     }
0211     return false;
0212 }
0213 #endif
0214 
0215 QString Disassembler::disassembleCapstone(const unsigned char* data, uint64_t size)
0216 {
0217 #ifdef HAVE_CAPSTONE
0218     csh handle;
0219     cs_err err;
0220     switch (file()->header()->machine()) {
0221         case EM_386:
0222             err = cs_open(CS_ARCH_X86, CS_MODE_32, &handle);
0223             break;
0224         case EM_X86_64:
0225             err = cs_open(CS_ARCH_X86, CS_MODE_64, &handle);
0226             break;
0227         case EM_ARM:
0228             err = cs_open(CS_ARCH_ARM, CS_MODE_LITTLE_ENDIAN, &handle);
0229             break;
0230         case EM_AARCH64:
0231             err = cs_open(CS_ARCH_ARM64, CS_MODE_LITTLE_ENDIAN, &handle);
0232             break;
0233         default:
0234             qWarning() << "Unsupported architecture!";
0235             return {};
0236     }
0237     if (err != CS_ERR_OK) {
0238         qWarning() << "Error opening Capstone handle:" << err;
0239         return {};
0240     }
0241     std::unique_ptr<csh, decltype(&cs_close)> handleGuard(&handle, &cs_close);
0242     cs_option(handle, CS_OPT_SYNTAX, CS_OPT_SYNTAX_ATT);
0243     cs_option(handle, CS_OPT_DETAIL, CS_OPT_ON);
0244 
0245     cs_insn *insn = cs_malloc(handle);
0246     const auto insnFree = [](cs_insn *insn) { cs_free(insn, 1); };
0247     std::unique_ptr<cs_insn, decltype(insnFree)> insnGuard(insn, insnFree);
0248 
0249     auto address = baseAddress();
0250     QString result;
0251 
0252     size_t cs_size = size; // force to size_t for 32bit host support
0253     while (cs_size > 0) {
0254         if (!cs_disasm_iter(handle, &data, &cs_size, &address, insn)) {
0255             return result;
0256         }
0257 
0258 #if HAVE_DWARF
0259         const auto line = lineForAddress(insn->address);
0260         if (!line.isNull())
0261             result += printSourceLine(line) + "<br/>";
0262 #endif
0263 
0264         result += QString::number(insn->address - baseAddress()) + ": " + insn->mnemonic + QLatin1Char(' ') + insn->op_str;
0265         switch (file()->header()->machine()) {
0266             case EM_386:
0267             case EM_X86_64:
0268                 for (int i = 0; i < insn->detail->x86.op_count; ++i) {
0269                     const auto op = insn->detail->x86.operands[i];
0270                     if (op.type == X86_OP_MEM) {
0271                         result += QLatin1String(" # 0x") + QString::number(op.mem.disp + address, 16);
0272                         printAddress(op.mem.disp + address, &result);
0273                     } else if (op.type == X86_OP_IMM) {
0274                         result += QLatin1String(" # 0x") + QString::number(op.imm, 16);
0275                         printAddress(op.imm, &result);
0276                     }
0277                 }
0278                 break;
0279             case EM_AARCH64:
0280                 for (int i = 0; i < insn->detail->arm64.op_count; ++i) {
0281                     const auto op = insn->detail->arm64.operands[i];
0282                     if (op.type == ARM64_OP_MEM && (isInsnGroup(insn, CS_GRP_CALL) || isInsnGroup(insn, CS_GRP_JUMP))) {
0283                         result += QLatin1String(" # 0x") + QString::number(op.mem.disp + address, 16);
0284                         printAddress(op.mem.disp + address, &result);
0285                     } else if (op.type == ARM64_OP_IMM && (isInsnGroup(insn, CS_GRP_CALL) || isInsnGroup(insn, CS_GRP_JUMP) || insn->id == ARM64_INS_ADRP)) {
0286                         result += QLatin1String(" # 0x") + QString::number(op.imm, 16);
0287                         printAddress(op.imm, &result);
0288                     }
0289                 }
0290                 break;
0291             default:
0292                 break;
0293         }
0294         result += "<br/>";
0295     }
0296 
0297     return result;
0298 #else
0299     return {};
0300 #endif
0301 }
0302 
0303 ElfFile* Disassembler::file() const
0304 {
0305     return m_file;
0306 }
0307 
0308 uint64_t Disassembler::baseAddress() const
0309 {
0310     return m_baseAddress;
0311 }
0312 
0313 void Disassembler::printAddress(uint64_t addr, QString *s) const
0314 {
0315     if (auto symbolTable = file()->symbolTable()) {
0316         const auto target = symbolTable->entryContainingValue(addr);
0317         if (target) {
0318             s->append(" (");
0319             s->append(printSymbol(target));
0320             if (target->value() < addr) {
0321                 s->append(QLatin1String("+0x") + QString::number(addr - target->value(), 16));
0322             }
0323             s->append(')');
0324             return;
0325         }
0326     }
0327 
0328     const auto secIdx = file()->indexOfSectionWithVirtualAddress(addr);
0329     if (secIdx < 0)
0330         return;
0331 
0332     const auto section = file()->section<ElfSection>(secIdx);
0333     assert(section);
0334 
0335     const auto pltSection = dynamic_cast<ElfPltSection*>(section);
0336     if (pltSection) {
0337         const auto pltEntry = pltSection->entry((addr - section->header()->virtualAddress()) / section->header()->entrySize());
0338         assert(pltEntry);
0339         s->append(" (");
0340         s->append(printPltEntry(pltEntry));
0341         s->append(')');
0342         return;
0343     }
0344 
0345     const auto gotSection = dynamic_cast<ElfGotSection*>(section);
0346     if (gotSection) {
0347         const auto gotEntry = gotSection->entry((addr - section->header()->virtualAddress()) / file()->addressSize());
0348         assert(gotEntry);
0349         s->append(" (");
0350         s->append(printGotEntry(gotEntry));
0351         s->append(')');
0352         return;
0353     }
0354 
0355     s->append(QLatin1String(" (") + section->header()->name() + QLatin1String(" + 0x") + QString::number(addr - section->header()->virtualAddress(), 16) + QLatin1Char(')'));
0356 }
0357 
0358 QString Disassembler::printSymbol(ElfSymbolTableEntry* entry) const
0359 {
0360     return QLatin1String(entry->name());
0361 }
0362 
0363 QString Disassembler::printGotEntry(ElfGotEntry* entry) const
0364 {
0365     const auto reloc = entry->relocation();
0366     const auto sym = reloc ? reloc->symbol() : nullptr;
0367     if (sym)
0368         return sym->name() + QStringLiteral("@got");
0369     return entry->section()->header()->name() + QStringLiteral(" + 0x") + QString::number(entry->index() * entry->section()->file()->addressSize());
0370 }
0371 
0372 QString Disassembler::printPltEntry(ElfPltEntry* entry) const
0373 {
0374     const auto gotEntry = entry->gotEntry();
0375     const auto reloc = gotEntry ? gotEntry->relocation() : nullptr;
0376     const auto sym = reloc ? reloc->symbol() : nullptr;
0377     if (sym)
0378         return sym->name() + QStringLiteral("@plt");
0379     return entry->section()->header()->name() + QStringLiteral(" + 0x") + QString::number(entry->index() * entry->section()->header()->entrySize());
0380 }
0381 
0382 #if HAVE_DWARF
0383 DwarfLine Disassembler::lineForAddress(uint64_t addr) const
0384 {
0385     if (!file()->dwarfInfo())
0386         return {};
0387 
0388     auto cu = file()->dwarfInfo()->compilationUnitForAddress(addr);
0389     if (!cu)
0390         return {};
0391     return cu->lineForAddress(addr);
0392 }
0393 
0394 QString Disassembler::printSourceLine(DwarfLine line) const
0395 {
0396     assert(!line.isNull());
0397     auto cu = file()->dwarfInfo()->compilationUnitForAddress(line.address());
0398     assert(cu);
0399 
0400     QUrl url;
0401     url.setScheme(QStringLiteral("code"));
0402     url.setPath(cu->sourceFileForLine(line));
0403     url.setFragment(QString::number(line.line()));
0404 
0405     QString s;
0406     s += "<i>Source: <a href=\"" + url.toEncoded() + "\">" + cu->sourceFileForLine(line);
0407     s += ':' + QString::number(line.line()) + "</a></i>";
0408     return s;
0409 }
0410 #endif