File indexing completed on 2024-05-12 05:43:26

0001 /*
0002     Copyright (C) 2015 Volker Krause <vkrause@kde.org>
0003 
0004     This program is free software; you can redistribute it and/or modify it
0005     under the terms of the GNU Library General Public License as published by
0006     the Free Software Foundation; either version 2 of the License, or (at your
0007     option) any later version.
0008 
0009     This program is distributed in the hope that it will be useful, but WITHOUT
0010     ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
0011     FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
0012     License for more details.
0013 
0014     You should have received a copy of the GNU General Public License
0015     along with this program.  If not, see <https://www.gnu.org/licenses/>.
0016 */
0017 
0018 #include "config-elf-dissector.h"
0019 #include "structurepackingcheck.h"
0020 
0021 #include <elf/elffileset.h>
0022 #if HAVE_DWARF
0023 #include <dwarf/dwarfinfo.h>
0024 #include <dwarf/dwarfdie.h>
0025 #include <dwarf/dwarfcudie.h>
0026 #include <dwarf/dwarfexpression.h>
0027 
0028 #include <dwarf.h>
0029 #endif
0030 
0031 #include <QBitArray>
0032 #include <QDebug>
0033 #include <QString>
0034 #include <QTextStream>
0035 
0036 #include <cassert>
0037 #include <iostream>
0038 
0039 void StructurePackingCheck::setElfFileSet(ElfFileSet* fileSet)
0040 {
0041     m_fileSet = fileSet;
0042 }
0043 
0044 void StructurePackingCheck::checkAll(DwarfInfo* info)
0045 {
0046 #if HAVE_DWARF
0047     assert(m_fileSet);
0048     if (!info)
0049         return;
0050 
0051     foreach (auto die, info->compilationUnits())
0052         checkDie(die);
0053 #endif
0054 }
0055 
0056 static QString printSummary(int structSize, int usedBytes, int usedBits, int optimalSize)
0057 {
0058     QString s;
0059     s += QLatin1String("Used bytes: ") + QString::number(usedBytes) + QLatin1Char('/') + QString::number(structSize);
0060     s += " (" + QString::number(double(usedBytes*100) / double(structSize), 'g', 4) + "%)\n";
0061     s += QLatin1String("Used bits: ") + QString::number(usedBits) + QLatin1Char('/') + QString::number(structSize*8);
0062     s += " (" + QString::number(double(usedBits*100) / double(structSize*8), 'g', 4) + "%)\n";
0063     if (optimalSize < structSize) {
0064         s += "Optimal size: " + QString::number(optimalSize) + " bytes (";
0065         const int saving = structSize - optimalSize;
0066         s += QString::number(-saving) + " bytes, " + QString::number(double(saving*100) / double(structSize), 'g', 4) + "%)\n";
0067     }
0068     return s;
0069 }
0070 
0071 #if HAVE_DWARF
0072 static int dataMemberLocation(DwarfDie *die)
0073 {
0074     const auto attr = die->attribute(DW_AT_data_member_location);
0075     if (attr.canConvert(QVariant::Int))
0076         return attr.toInt();
0077     qWarning() << "Cannot convert location of" << die->displayName() << ":" << attr.value<DwarfExpression>().displayString();
0078     return 0;
0079 }
0080 
0081 static bool compareMemberDiesByLocation(DwarfDie *lhs, DwarfDie *rhs)
0082 {
0083     const auto lhsLoc = dataMemberLocation(lhs);
0084     const auto rhsLoc = dataMemberLocation(rhs);
0085     if (lhsLoc == rhsLoc) {
0086         return lhs->attribute(DW_AT_bit_offset) > rhs->attribute(DW_AT_bit_offset);
0087     }
0088     return lhsLoc < rhsLoc;
0089 }
0090 #endif
0091 
0092 QString StructurePackingCheck::checkOneStructure(DwarfDie* structDie) const
0093 {
0094 #if HAVE_DWARF
0095     assert(structDie->tag() == DW_TAG_class_type || structDie->tag() == DW_TAG_structure_type);
0096 
0097     QVector<DwarfDie*> members;
0098     foreach (auto child, structDie->children()) {
0099         if (child->tag() == DW_TAG_member && !child->isStaticMember())
0100             members.push_back(child);
0101         else if (child->tag() == DW_TAG_inheritance)
0102             members.push_back(child);
0103     }
0104     std::sort(members.begin(), members.end(), compareMemberDiesByLocation);
0105 
0106     const int structSize = structDie->typeSize();
0107     int usedBytes;
0108     int usedBits;
0109     std::tie(usedBytes, usedBits) = computeStructureMemoryUsage(structDie, members);
0110     const int optimalSize = optimalStructureSize(structDie, members);
0111 
0112     QString s = printSummary(structSize, usedBytes, usedBits, optimalSize);
0113     s += '\n';
0114     s += printStructure(structDie, members);
0115     return s;
0116 #else
0117     return {};
0118 #endif
0119 }
0120 
0121 void StructurePackingCheck::checkDie(DwarfDie* die)
0122 {
0123 #if HAVE_DWARF
0124     if (die->tag() == DW_TAG_structure_type || die->tag() == DW_TAG_class_type) {
0125         QVector<DwarfDie*> members;
0126         foreach (auto child, die->children()) {
0127             if (child->tag() == DW_TAG_member && !child->isStaticMember())
0128                 members.push_back(child);
0129             else if (child->tag() == DW_TAG_inheritance)
0130                 members.push_back(child);
0131             else
0132                 checkDie(child);
0133         }
0134         std::sort(members.begin(), members.end(), compareMemberDiesByLocation);
0135 
0136         const int structSize = die->typeSize();
0137         if (structSize <= 0)
0138             return;
0139 
0140         int usedBytes;
0141         int usedBits;
0142         std::tie(usedBytes, usedBits) = computeStructureMemoryUsage(die, members);
0143         const int optimalSize = optimalStructureSize(die, members);
0144 
0145         if ((usedBytes != structSize || usedBits != structSize * 8) && optimalSize != structSize) {
0146             const QString loc = die->sourceLocation();
0147             if (m_duplicateCheck.contains(loc))
0148                 return;
0149             std::cout << printSummary(structSize, usedBytes, usedBits, optimalSize).toLocal8Bit().constData();
0150             std::cout << printStructure(die, members).toLocal8Bit().constData();
0151             std::cout << std::endl;
0152             m_duplicateCheck.insert(loc);
0153         }
0154 
0155     } else {
0156         foreach (auto child, die->children())
0157             checkDie(child);
0158     }
0159 #endif
0160 }
0161 
0162 static int countBytes(const QBitArray &bits)
0163 {
0164     int count = 0;
0165     for (int byteIndex = 0; byteIndex < bits.size() / 8; ++byteIndex) {
0166         for (int bitIndex = 0; bitIndex < 8; ++bitIndex) {
0167             if (bits[byteIndex * 8 + bitIndex]) {
0168                 ++count;
0169                 break;
0170             }
0171         }
0172     }
0173     return count;
0174 }
0175 
0176 static int countBits(const QBitArray &bits)
0177 {
0178     int count = 0;
0179     for (int i = 0; i < bits.size(); ++i) {
0180         if (bits[i])
0181             ++count;
0182     }
0183     return count;
0184 }
0185 
0186 #if HAVE_DWARF
0187 static int bitsForEnum(DwarfDie *die)
0188 {
0189     assert(die->tag() == DW_TAG_enumeration_type);
0190 
0191     // approach 1: count all covered bits
0192     QBitArray bits(die->typeSize() * 8, false);
0193     // approach 2: count number of enum values
0194     int enumCount = 0;
0195 
0196     foreach (auto child, die->children()) {
0197         if (child->tag() != DW_TAG_enumerator)
0198             continue;
0199         ++enumCount;
0200         const auto enumValue = child->attribute(DW_AT_const_value).toInt();
0201         for (int i = 0; i < bits.size(); ++i) {
0202             if ((1 << i) & enumValue)
0203                 bits[i] = true;
0204         }
0205     }
0206     if (enumCount == 0) {
0207         return die->typeSize() * 8; // incomplete information or something went wrong here...
0208     }
0209 
0210     // minimum of the above is our best guess
0211     return std::min(enumCount - 1, countBits(bits));
0212 }
0213 
0214 static int actualTypeSize(DwarfDie *die)
0215 {
0216     switch (die->tag()) {
0217         case DW_TAG_base_type:
0218             if (die->name() == "bool")
0219                 return 1;
0220             return die->typeSize() * 8;
0221         case DW_TAG_enumeration_type:
0222             return bitsForEnum(die);
0223         case DW_TAG_pointer_type:
0224             return die->typeSize() * 8; // TODO pointer alignment can save a few bits
0225         case DW_TAG_const_type:
0226         case DW_TAG_restrict_type:
0227         case DW_TAG_typedef:
0228         case DW_TAG_volatile_type:
0229         {
0230             const auto typeDie = die->attribute(DW_AT_type).value<DwarfDie*>();
0231             assert(typeDie);
0232             return actualTypeSize(typeDie);
0233         }
0234         case DW_TAG_array_type:
0235         {
0236             return die->typeSize() * 8; // TODO the below is correct, but we need to distribute that over the memory bit array below, otherwise usedBytes is wrong
0237 /*            const auto typeDie = die->attribute(DW_AT_type).value<DwarfDie*>();
0238             assert(typeDie);
0239             return die->typeSize() / typeDie->typeSize() * actualTypeSize(typeDie);*/
0240         }
0241     }
0242     return die->typeSize() * 8;
0243 }
0244 #endif
0245 
0246 std::tuple<int, int> StructurePackingCheck::computeStructureMemoryUsage(DwarfDie* structDie, const QVector< DwarfDie* >& memberDies) const
0247 {
0248 #if HAVE_DWARF
0249     const auto structSize = structDie->typeSize();
0250     if (structSize <= 0)
0251         return {};
0252 
0253     assert(structSize > 0);
0254     QBitArray memUsage(structSize * 8);
0255 
0256     for (DwarfDie *memberDie : memberDies) {
0257         const auto memberTypeDie = findTypeDefinition(memberDie->attribute(DW_AT_type).value<DwarfDie*>());
0258         assert(memberTypeDie);
0259 
0260         const auto memberLocation = dataMemberLocation(memberDie);
0261         const auto bitSize = memberDie->attribute(DW_AT_bit_size).toInt();
0262         const auto bitOffset = memberDie->attribute(DW_AT_bit_offset).toInt();
0263 
0264         if (bitSize <= 0) {
0265             assert((structSize * 8) >= (memberLocation * 8 + memberTypeDie->typeSize() * 8));
0266             memUsage.fill(true, memberLocation * 8, memberLocation * 8 + actualTypeSize(memberTypeDie));
0267         } else {
0268             assert((structSize * 8) >= (memberLocation * 8 + bitOffset + bitSize));
0269             memUsage.fill(true, memberLocation * 8 + bitOffset, memberLocation * 8 + bitOffset + bitSize);
0270         }
0271     }
0272 
0273     const auto usedBytes = countBytes(memUsage);
0274     const auto usedBits = countBits(memUsage);
0275     return std::make_tuple(usedBytes, usedBits);
0276 #else
0277     return std::make_tuple(0, 0);
0278 #endif
0279 }
0280 
0281 #if HAVE_DWARF
0282 static bool hasUnknownSize(DwarfDie *typeDie)
0283 {
0284     // 0-size elements can exist, see e.g. __flexarr in inotify.h
0285     return typeDie->typeSize() == 0 && (typeDie->tag() == DW_TAG_class_type || typeDie->tag() == DW_TAG_structure_type);
0286 }
0287 #endif
0288 
0289 QString StructurePackingCheck::printStructure(DwarfDie* structDie, const QVector<DwarfDie*>& memberDies) const
0290 {
0291     QString str;
0292 #if HAVE_DWARF
0293     QTextStream s(&str);
0294 
0295     s << (structDie->tag() == DW_TAG_class_type ? "class " : "struct ");
0296     s << structDie->fullyQualifiedName();
0297     s << " // location: " << structDie->sourceLocation();
0298     s << "\n{\n";
0299 
0300     bool skipPadding = false; // TODO this should not be needed, look outside of the current CU for the real DIE?
0301     int nextMemberLocation = 0;
0302     for (DwarfDie *memberDie : memberDies) {
0303         s << "    ";
0304 
0305         if (memberDie->tag() == DW_TAG_inheritance)
0306             s << "inherits ";
0307 
0308         DwarfDie *unresolvedTypeDie = memberDie->attribute(DW_AT_type).value<DwarfDie*>();
0309         const auto memberTypeDie = findTypeDefinition(unresolvedTypeDie);
0310         assert(memberTypeDie);
0311 
0312         const auto memberLocation = dataMemberLocation(memberDie);
0313         if (memberLocation > nextMemberLocation && !skipPadding) {
0314             s << "// " << (memberLocation - nextMemberLocation) << " byte(s) padding\n";
0315             s << "    ";
0316         }
0317 
0318         // we use the unresolved DIE here to have the user-visible type name, e.g. of a typedef
0319         s << unresolvedTypeDie->fullyQualifiedName();
0320         s << " ";
0321         s << memberDie->name();
0322 
0323         const auto bitSize = memberDie->attribute(DW_AT_bit_size).toInt();
0324         if (bitSize > 0) {
0325             s << ':' << bitSize;
0326         }
0327 
0328         s << "; // member offset: " << memberLocation;
0329 
0330         if (hasUnknownSize(memberTypeDie)) {
0331             s << ", unknown size";
0332             skipPadding = true;
0333         } else {
0334             s << ", size: " << memberTypeDie->typeSize();
0335             skipPadding = false;
0336 
0337             const auto actualSize = actualTypeSize(memberTypeDie);
0338             if (actualSize != memberTypeDie->typeSize() * 8)
0339                 s << " (needed: " << actualSize << " bits)";
0340         }
0341         s << ", alignment: " << memberTypeDie->typeAlignment();
0342 
0343         if (bitSize > 0) {
0344             const auto bitOffset = memberDie->attribute(DW_AT_bit_offset).toInt();
0345             s << ", bit offset: " << bitOffset;
0346         }
0347 
0348         s << "\n";
0349 
0350         nextMemberLocation = memberLocation + memberTypeDie->typeSize();
0351     }
0352 
0353     if (nextMemberLocation < structDie->typeSize() && !skipPadding)
0354         s << "    // " << (structDie->typeSize() - nextMemberLocation) << " byte(s) padding\n";
0355 
0356     s << "}; // size: " << structDie->typeSize();
0357     s << ", alignment: " << structDie->typeAlignment();
0358     s << "\n";
0359 #endif
0360     return str;
0361 }
0362 
0363 static bool isEmptyBaseClass(DwarfDie* inheritanceDie)
0364 {
0365 #if HAVE_DWARF
0366     assert(inheritanceDie->tag() == DW_TAG_inheritance);
0367     const auto baseTypeDie = inheritanceDie->attribute(DW_AT_type).value<DwarfDie*>();
0368     if (baseTypeDie->typeSize() != 1)
0369         return false;
0370 
0371     foreach (auto d, baseTypeDie->children()) {
0372         if (d->tag() == DW_TAG_member)
0373             return false;
0374         if (d->tag() != DW_TAG_inheritance)
0375             continue;
0376         if (!isEmptyBaseClass(d))
0377             return false;
0378     }
0379 #endif
0380     return true;
0381 }
0382 
0383 int StructurePackingCheck::optimalStructureSize(DwarfDie* structDie, const QVector< DwarfDie* >& memberDies) const
0384 {
0385     int size = 0;
0386 #if HAVE_DWARF
0387     int alignment = 1;
0388     QVector<int> sizes;
0389 
0390     // TODO: lots of stuff missing to compute optimal bit field layout
0391     int prevMemberLocation = -1;
0392     bool guessSize = false; // TODO see above, this probably needs better lookup for external types
0393     for (DwarfDie* memberDie : memberDies) {
0394         // consider empty base class optimization, they don't count, unless we are entirely empty, which is handled at the very end
0395         if (memberDie->tag() == DW_TAG_inheritance && isEmptyBaseClass(memberDie))
0396             continue;
0397 
0398         if (prevMemberLocation == dataMemberLocation(memberDie))
0399             continue; // skip bit fields for now
0400 
0401         const auto memberTypeDie = findTypeDefinition(memberDie->attribute(DW_AT_type).value<DwarfDie*>());
0402         assert(memberTypeDie);
0403 
0404         const auto memberLocation = dataMemberLocation(memberDie);
0405         if (guessSize)
0406             sizes.push_back(memberLocation - prevMemberLocation);
0407 
0408         guessSize = hasUnknownSize(memberTypeDie);
0409         sizes.push_back(memberTypeDie->typeSize());
0410         alignment = std::max(alignment, memberTypeDie->typeAlignment());
0411 
0412         prevMemberLocation = memberLocation;
0413     }
0414 
0415     if (guessSize)
0416         sizes.push_back(structDie->typeSize() - prevMemberLocation);
0417 
0418     // TODO: sort by alignment and add padding
0419     foreach (const auto s, sizes)
0420         size += s;
0421 
0422     // align the entire struct to maximum member alignment
0423     if (size % alignment)
0424         size += alignment - (size % alignment);
0425 #endif
0426 
0427     // structs are always at least 1 byte
0428     return std::max(1, size);
0429 }
0430 
0431 static DwarfDie* findTypeDefinitionRecursive(DwarfDie *die, const QVector<QByteArray> &fullId)
0432 {
0433 #if HAVE_DWARF
0434     // TODO filter to namespace/class/struct tags?
0435     if (die->name() != fullId.first())
0436         return nullptr;
0437     if (fullId.size() == 1)
0438         return die;
0439 
0440     QVector<QByteArray> partialId = fullId;
0441     partialId.pop_front();
0442     foreach (auto child, die->children()) {
0443         DwarfDie *found = findTypeDefinitionRecursive(child, partialId);
0444         if (found)
0445             return found;
0446     }
0447 #endif
0448     return nullptr;
0449 }
0450 
0451 DwarfDie* StructurePackingCheck::findTypeDefinition(DwarfDie* typeDie) const
0452 {
0453 #if HAVE_DWARF
0454     // recurse into typedefs
0455     if (typeDie->tag() == DW_TAG_typedef)
0456         return findTypeDefinition(typeDie->attribute(DW_AT_type).value<DwarfDie*>());
0457 
0458     if (!hasUnknownSize(typeDie))
0459         return typeDie;
0460 
0461     // determine the full identifier of the type
0462     QVector<QByteArray> fullId;
0463     DwarfDie *parentDie = typeDie;
0464     do {
0465         fullId.prepend(parentDie->name());
0466         parentDie = parentDie->parentDie();
0467     } while (parentDie && parentDie->tag() != DW_TAG_compile_unit);
0468 
0469     // sequential search in all CUs for a DIE with the same full id containing the full definition
0470     for (int i = 0; i < m_fileSet->size(); ++i) {
0471         const auto file = m_fileSet->file(i);
0472         if (!file->dwarfInfo())
0473             continue;
0474 
0475         foreach (auto cuDie, file->dwarfInfo()->compilationUnits()) {
0476             foreach (auto topDie, cuDie->children()) {
0477                 DwarfDie *die = findTypeDefinitionRecursive(topDie, fullId);
0478                 if (die && die->typeSize() > 0) {
0479                     //qDebug() << "replacing" << typeDie->displayName() << "with" << die->displayName();
0480                     return die;
0481                 }
0482             }
0483         }
0484     }
0485 
0486     // no luck
0487     qDebug() << "didn't fine a full definition for" << typeDie->displayName();
0488     return typeDie;
0489 #else
0490     return nullptr;
0491 #endif
0492 }