src/analyze/accumulatedtracedata.cpp

0001 /*
0002     SPDX-FileCopyrightText: 2015-2020 Milian Wolff <mail@milianw.de>
0003
0004     SPDX-License-Identifier: LGPL-2.1-or-later
0005 */
0006
0007 #include "accumulatedtracedata.h"
0008 #include "analyze_config.h"
0009
0010 #include <algorithm>
0011 #include <cassert>
0012 #include <iostream>
0013 #include <memory>
0014
0015 #include <boost/algorithm/string/predicate.hpp>
0016 #include <boost/iostreams/filter/gzip.hpp>
0017 #if ZSTD_FOUND
0018 #if BOOST_IOSTREAMS_HAS_ZSTD
0019 #include <boost/iostreams/filter/zstd.hpp>
0020 #else
0021 #include <boost-zstd/zstd.hpp>
0022 #endif
0023 #endif
0024 #include <boost/iostreams/filtering_stream.hpp>
0025
0026 #include <boost/filesystem.hpp>
0027
0028 #include "util/config.h"
0029 #include "util/linereader.h"
0030 #include "util/macroutils.h"
0031 #include "util/pointermap.h"
0032
0033 #include "suppressions.h"
0034
0035 using namespace std;
0036
0037 namespace {
0038
0039 template <typename Base>
0040 bool operator>>(LineReader& reader, Index<Base>& index)
0041 {
0042     return reader.readHex(index.index);
0043 }
0044
0045 template <typename Base>
0046 ostream& operator<<(ostream& out, const Index<Base> index)
0047 {
0048     out << index.index;
0049     return out;
0050 }
0051
0052 // boost's counter filter uses an int for the count which overflows for large streams; so replace it with a work alike.
0053 class byte_counter
0054 {
0055 public:
0056     using char_type = char;
0057     using category = boost::iostreams::multichar_input_filter_tag;
0058
0059     uint64_t bytes() const
0060     {
0061         return m_bytes;
0062     }
0063
0064     template <typename Source>
0065     std::streamsize read(Source& src, char* str, std::streamsize size)
0066     {
0067         auto const readsize = boost::iostreams::read(src, str, size);
0068         if (readsize == -1)
0069             return -1;
0070         m_bytes += readsize;
0071         return readsize;
0072     }
0073
0074 private:
0075     uint64_t m_bytes = 0;
0076 };
0077 }
0078
0079 AccumulatedTraceData::AccumulatedTraceData()
0080 {
0081     instructionPointers.reserve(16384);
0082     traces.reserve(65536);
0083     strings.reserve(4096);
0084     allocations.reserve(16384);
0085     traceIndexToAllocationIndex.reserve(16384);
0086     stopIndices.reserve(4);
0087     opNewIpIndices.reserve(16);
0088 }
0089
0090 AccumulatedTraceData::~AccumulatedTraceData() = default;
0091
0092 const string& AccumulatedTraceData::stringify(const StringIndex stringId) const
0093 {
0094     if (!stringId || stringId.index > strings.size()) {
0095         static const string empty;
0096         return empty;
0097     } else {
0098         return strings.at(stringId.index - 1);
0099     }
0100 }
0101
0102 string AccumulatedTraceData::prettyFunction(const string& function) const
0103 {
0104     if (!shortenTemplates) {
0105         return function;
0106     }
0107     string ret;
0108     ret.reserve(function.size());
0109     int depth = 0;
0110     for (size_t i = 0; i < function.size(); ++i) {
0111         const auto c = function[i];
0112         if ((c == '<' || c == '>') && ret.size() >= 8) {
0113             // don't get confused by C++ operators
0114             const char* cmp = "operator";
0115             if (ret.back() == c) {
0116                 // skip second angle bracket for operator<< or operator>>
0117                 if (c == '<') {
0118                     cmp = "operator<";
0119                 } else {
0120                     cmp = "operator>";
0121                 }
0122             }
0123             if (boost::algorithm::ends_with(ret, cmp)) {
0124                 ret.push_back(c);
0125                 continue;
0126             }
0127         }
0128         if (c == '<') {
0129             ++depth;
0130             if (depth == 1) {
0131                 ret.push_back(c);
0132             }
0133         } else if (c == '>') {
0134             --depth;
0135         }
0136         if (depth) {
0137             continue;
0138         }
0139         ret.push_back(c);
0140     }
0141     return ret;
0142 }
0143
0144 bool AccumulatedTraceData::read(const string& inputFile, bool isReparsing)
0145 {
0146     return read(inputFile, FirstPass, isReparsing) && read(inputFile, SecondPass, isReparsing);
0147 }
0148
0149 bool AccumulatedTraceData::read(const string& inputFile, const ParsePass pass, bool isReparsing)
0150 {
0151     const bool isGzCompressed = boost::algorithm::ends_with(inputFile, ".gz");
0152     const bool isZstdCompressed = boost::algorithm::ends_with(inputFile, ".zst");
0153     const bool isCompressed = isGzCompressed || isZstdCompressed;
0154     ifstream file(inputFile, isCompressed ? ios_base::in | ios_base::binary : ios_base::in);
0155
0156     if (!file.is_open()) {
0157         cerr << "Failed to open heaptrack log file: " << inputFile << endl;
0158         return false;
0159     }
0160
0161     boost::iostreams::filtering_istream in;
0162     in.push(byte_counter()); // caution, ::read dependant on filter order
0163     if (isGzCompressed) {
0164         in.push(boost::iostreams::gzip_decompressor());
0165     } else if (isZstdCompressed) {
0166 #if ZSTD_FOUND
0167         in.push(boost::iostreams::zstd_decompressor());
0168 #else
0169         cerr << "Heaptrack was built without zstd support, cannot decompressed data file: " << inputFile << endl;
0170         return false;
0171 #endif
0172     }
0173     in.push(byte_counter()); // caution, ::read dependant on filter order
0174     in.push(file);
0175
0176     parsingState.fileSize = boost::filesystem::file_size(inputFile);
0177
0178     return read(in, pass, isReparsing);
0179 }
0180
0181 bool AccumulatedTraceData::read(boost::iostreams::filtering_istream& in, const ParsePass pass, bool isReparsing)
0182 {
0183     LineReader reader;
0184     int64_t timeStamp = 0;
0185
0186     vector<string> opNewStrings = {
0187         // 64 bit
0188         "operator new(unsigned long)",
0189         "operator new[](unsigned long)",
0190         // 32 bit
0191         "operator new(unsigned int)",
0192         "operator new[](unsigned int)",
0193     };
0194     vector<StringIndex> opNewStrIndices;
0195     opNewStrIndices.reserve(opNewStrings.size());
0196
0197     vector<string> stopStrings = {"main", "__libc_start_main", "__static_initialization_and_destruction_0"};
0198
0199     const auto lastPeakCost = pass != FirstPass ? totalCost.peak : 0;
0200     const auto lastPeakTime = pass != FirstPass ? peakTime : 0;
0201
0202     totalCost = {};
0203     peakTime = 0;
0204     if (pass == FirstPass) {
0205         if (!filterParameters.disableBuiltinSuppressions) {
0206             suppressions = builtinSuppressions();
0207         }
0208
0209         suppressions.resize(suppressions.size() + filterParameters.suppressions.size());
0210         std::transform(filterParameters.suppressions.begin(), filterParameters.suppressions.end(), suppressions.begin(),
0211                        [](const std::string& pattern) {
0212                            return Suppression {pattern, 0, 0};
0213                        });
0214     }
0215     peakRSS = 0;
0216     for (auto& allocation : allocations) {
0217         allocation.clearCost();
0218     }
0219     unsigned int fileVersion = 0;
0220     bool debuggeeEncountered = false;
0221     bool inFilteredTime = !filterParameters.minTime;
0222
0223     // required for backwards compatibility
0224     // newer versions handle this in heaptrack_interpret already
0225     AllocationInfoSet allocationInfoSet;
0226     PointerMap pointers;
0227     // in older files, this contains the pointer address, in newer formats
0228     // it holds the allocation info index. both can be used to find temporary
0229     // allocations, i.e. when a deallocation follows with the same data
0230     uint64_t lastAllocationPtr = 0;
0231
0232     const auto uncompressedCount = in.component<byte_counter>(0);
0233     const auto compressedCount = in.component<byte_counter>(in.size() - 2);
0234
0235     parsingState.pass = pass;
0236     parsingState.reparsing = isReparsing;
0237
0238     while (timeStamp < filterParameters.maxTime && reader.getLine(in)) {
0239         parsingState.readCompressedByte = compressedCount->bytes();
0240         parsingState.readUncompressedByte = uncompressedCount->bytes();
0241         parsingState.timestamp = timeStamp;
0242
0243         if (reader.mode() == 's') {
0244             if (pass != FirstPass || isReparsing) {
0245                 continue;
0246             }
0247             if (fileVersion >= 3) {
0248                 // read sized string directly
0249                 std::string string;
0250                 reader >> string;
0251                 strings.push_back(std::move(string));
0252             } else {
0253                 // read remaining line as string, possibly including white spaces
0254                 strings.push_back(reader.line().substr(2));
0255             }
0256
0257             StringIndex index;
0258             index.index = strings.size();
0259
0260             auto opNewIt = find(opNewStrings.begin(), opNewStrings.end(), strings.back());
0261             if (opNewIt != opNewStrings.end()) {
0262                 opNewStrIndices.push_back(index);
0263                 opNewStrings.erase(opNewIt);
0264             } else {
0265                 auto stopIt = find(stopStrings.begin(), stopStrings.end(), strings.back());
0266                 if (stopIt != stopStrings.end()) {
0267                     stopIndices.push_back(index);
0268                     stopStrings.erase(stopIt);
0269                 }
0270             }
0271         } else if (reader.mode() == 't') {
0272             if (pass != FirstPass || isReparsing) {
0273                 continue;
0274             }
0275             TraceNode node;
0276             reader >> node.ipIndex;
0277             reader >> node.parentIndex;
0278             // skip operator new and operator new[] at the beginning of traces
0279             while (find(opNewIpIndices.begin(), opNewIpIndices.end(), node.ipIndex) != opNewIpIndices.end()) {
0280                 node = findTrace(node.parentIndex);
0281             }
0282             traces.push_back(node);
0283         } else if (reader.mode() == 'i') {
0284             if (pass != FirstPass || isReparsing) {
0285                 continue;
0286             }
0287             InstructionPointer ip;
0288             reader >> ip.instructionPointer;
0289             reader >> ip.moduleIndex;
0290             auto readFrame = [&reader](Frame* frame) {
0291                 return (reader >> frame->functionIndex) && (reader >> frame->fileIndex) && (reader >> frame->line);
0292             };
0293             if (readFrame(&ip.frame)) {
0294                 Frame inlinedFrame;
0295                 while (readFrame(&inlinedFrame)) {
0296                     ip.inlined.push_back(inlinedFrame);
0297                 }
0298             }
0299
0300             instructionPointers.push_back(ip);
0301             if (find(opNewStrIndices.begin(), opNewStrIndices.end(), ip.frame.functionIndex) != opNewStrIndices.end()) {
0302                 IpIndex index;
0303                 index.index = instructionPointers.size();
0304                 opNewIpIndices.push_back(index);
0305             }
0306         } else if (reader.mode() == '+') {
0307             if (!inFilteredTime) {
0308                 continue;
0309             }
0310             AllocationInfo info;
0311             AllocationInfoIndex allocationIndex;
0312             if (fileVersion >= 1) {
0313                 if (!(reader >> allocationIndex)) {
0314                     cerr << "failed to parse line: " << reader.line() << ' ' << __LINE__ << endl;
0315                     continue;
0316                 } else if (allocationIndex.index >= allocationInfos.size()) {
0317                     cerr << "allocation index out of bounds: " << allocationIndex
0318                          << ", maximum is: " << allocationInfos.size() << endl;
0319                     continue;
0320                 }
0321                 info = allocationInfos[allocationIndex.index];
0322                 lastAllocationPtr = allocationIndex.index;
0323             } else { // backwards compatibility
0324                 uint64_t ptr = 0;
0325                 TraceIndex traceIndex;
0326                 if (!(reader >> info.size) || !(reader >> traceIndex) || !(reader >> ptr)) {
0327                     cerr << "failed to parse line: " << reader.line() << ' ' << __LINE__ << endl;
0328                     continue;
0329                 }
0330                 info.allocationIndex = mapToAllocationIndex(traceIndex);
0331                 if (allocationInfoSet.add(info.size, traceIndex, &allocationIndex)) {
0332                     allocationInfos.push_back(info);
0333                 }
0334                 pointers.addPointer(ptr, allocationIndex);
0335                 lastAllocationPtr = ptr;
0336             }
0337
0338             if (pass != FirstPass) {
0339                 auto& allocation = allocations[info.allocationIndex.index];
0340                 allocation.leaked += info.size;
0341                 ++allocation.allocations;
0342
0343                 handleAllocation(info, allocationIndex);
0344             }
0345
0346             ++totalCost.allocations;
0347             totalCost.leaked += info.size;
0348             if (totalCost.leaked > totalCost.peak) {
0349                 totalCost.peak = totalCost.leaked;
0350                 peakTime = timeStamp;
0351
0352                 if (pass == SecondPass && totalCost.peak == lastPeakCost && peakTime == lastPeakTime) {
0353                     for (auto& allocation : allocations) {
0354                         allocation.peak = allocation.leaked;
0355                     }
0356                 }
0357             }
0358         } else if (reader.mode() == '-') {
0359             if (!inFilteredTime) {
0360                 continue;
0361             }
0362             AllocationInfoIndex allocationInfoIndex;
0363             bool temporary = false;
0364             if (fileVersion >= 1) {
0365                 if (!(reader >> allocationInfoIndex)) {
0366                     cerr << "failed to parse line: " << reader.line() << endl;
0367                     continue;
0368                 }
0369                 temporary = lastAllocationPtr == allocationInfoIndex.index;
0370             } else { // backwards compatibility
0371                 uint64_t ptr = 0;
0372                 if (!(reader >> ptr)) {
0373                     cerr << "failed to parse line: " << reader.line() << endl;
0374                     continue;
0375                 }
0376                 auto taken = pointers.takePointer(ptr);
0377                 if (!taken.second) {
0378                     // happens when we attached to a running application
0379                     continue;
0380                 }
0381                 allocationInfoIndex = taken.first;
0382                 temporary = lastAllocationPtr == ptr;
0383             }
0384             lastAllocationPtr = 0;
0385
0386             const auto& info = allocationInfos[allocationInfoIndex.index];
0387             totalCost.leaked -= info.size;
0388             if (temporary) {
0389                 ++totalCost.temporary;
0390             }
0391
0392             if (pass != FirstPass) {
0393                 auto& allocation = allocations[info.allocationIndex.index];
0394                 allocation.leaked -= info.size;
0395                 if (temporary) {
0396                     ++allocation.temporary;
0397                 }
0398             }
0399         } else if (reader.mode() == 'a') {
0400             if (pass != FirstPass || isReparsing) {
0401                 continue;
0402             }
0403             AllocationInfo info;
0404             TraceIndex traceIndex;
0405             if (!(reader >> info.size) || !(reader >> traceIndex)) {
0406                 cerr << "failed to parse line: " << reader.line() << endl;
0407                 continue;
0408             }
0409             info.allocationIndex = mapToAllocationIndex(traceIndex);
0410             allocationInfos.push_back(info);
0411
0412         } else if (reader.mode() == '#') {
0413             // comment or empty line
0414             continue;
0415         } else if (reader.mode() == 'c') {
0416             int64_t newStamp = 0;
0417             if (!(reader >> newStamp)) {
0418                 cerr << "Failed to read time stamp: " << reader.line() << endl;
0419                 continue;
0420             }
0421             inFilteredTime = newStamp >= filterParameters.minTime && newStamp <= filterParameters.maxTime;
0422             if (inFilteredTime) {
0423                 handleTimeStamp(timeStamp, newStamp, false, pass);
0424             }
0425             timeStamp = newStamp;
0426         } else if (reader.mode() == 'R') { // RSS timestamp
0427             if (!inFilteredTime) {
0428                 continue;
0429             }
0430             int64_t rss = 0;
0431             reader >> rss;
0432             if (rss > peakRSS) {
0433                 peakRSS = rss;
0434             }
0435         } else if (reader.mode() == 'X') {
0436             if (debuggeeEncountered) {
0437                 cerr << "Duplicated debuggee entry - corrupt data file?" << endl;
0438                 return false;
0439             }
0440             debuggeeEncountered = true;
0441             if (pass != FirstPass && !isReparsing) {
0442                 handleDebuggee(reader.line().c_str() + 2);
0443             }
0444         } else if (reader.mode() == 'A') {
0445             if (pass != FirstPass || isReparsing)
0446                 continue;
0447             totalCost = {};
0448             fromAttached = true;
0449         } else if (reader.mode() == 'v') {
0450             unsigned int heaptrackVersion = 0;
0451             reader >> heaptrackVersion;
0452             if (!(reader >> fileVersion) && heaptrackVersion == 0x010200) {
0453                 // backwards compatibility: before the 1.0.0, I actually
0454                 // bumped the version to 0x010200 already and used that
0455                 // as file version. This is what we now consider v1 of the
0456                 // file format
0457                 fileVersion = 1;
0458             }
0459             if (fileVersion > HEAPTRACK_FILE_FORMAT_VERSION) {
0460                 cerr << "The data file has version " << hex << fileVersion << " and was written by heaptrack version "
0461                      << hex << heaptrackVersion << ")\n"
0462                      << "This is not compatible with this build of heaptrack (version " << hex << HEAPTRACK_VERSION
0463                      << "), which can read file format version " << hex << HEAPTRACK_FILE_FORMAT_VERSION << " and below"
0464                      << endl;
0465                 return false;
0466             }
0467             if (fileVersion >= 3) {
0468                 reader.setExpectedSizedStrings(true);
0469             }
0470         } else if (reader.mode() == 'I') { // system information
0471             reader >> systemInfo.pageSize;
0472             reader >> systemInfo.pages;
0473         } else if (reader.mode() == 'S') { // embedded suppression
0474             if (pass != FirstPass || filterParameters.disableEmbeddedSuppressions) {
0475                 continue;
0476             }
0477             auto suppression = parseSuppression(reader.line().substr(2));
0478             if (!suppression.empty()) {
0479                 suppressions.push_back({std::move(suppression), 0, 0});
0480             }
0481         } else {
0482             cerr << "failed to parse line: " << reader.line() << endl;
0483         }
0484     }
0485
0486     if (pass == FirstPass && !isReparsing) {
0487         totalTime = timeStamp + 1;
0488         filterParameters.maxTime = totalTime;
0489     }
0490
0491     handleTimeStamp(timeStamp, timeStamp + 1, true, pass);
0492
0493     return true;
0494 }
0495
0496 namespace { // helpers for diffing
0497
0498 template <typename IndexT, typename SortF>
0499 vector<IndexT> sortedIndices(size_t numIndices, SortF sorter)
0500 {
0501     vector<IndexT> indices;
0502     indices.resize(numIndices);
0503     for (size_t i = 0; i < numIndices; ++i) {
0504         indices[i].index = (i + 1);
0505     }
0506     sort(indices.begin(), indices.end(), sorter);
0507     return indices;
0508 }
0509
0510 vector<StringIndex> remapStrings(vector<string>& lhs, const vector<string>& rhs)
0511 {
0512     tsl::robin_map<string, StringIndex> stringRemapping;
0513
0514     // insert known strings in lhs into the map for lookup below
0515     StringIndex stringIndex;
0516     {
0517         stringRemapping.reserve(lhs.size());
0518         for (const auto& string : lhs) {
0519             ++stringIndex.index;
0520             stringRemapping.insert(make_pair(string, stringIndex));
0521         }
0522     }
0523
0524     // now insert the missing strings form rhs into lhs
0525     // and create a remapped string vector, keeping the order
0526     // of the strings in rhs, but mapping into the string vector from lhs
0527     vector<StringIndex> map;
0528     {
0529         map.reserve(rhs.size() + 1);
0530         map.push_back({});
0531         for (const auto& string : rhs) {
0532             auto it = stringRemapping.find(string);
0533             if (it == stringRemapping.end()) {
0534                 // a string that only occurs in rhs, but not lhs
0535                 // add it to lhs to make sure we can find it again later on
0536                 ++stringIndex.index;
0537                 lhs.push_back(string);
0538                 map.push_back(stringIndex);
0539             } else {
0540                 map.push_back(it->second);
0541             }
0542         }
0543     }
0544     return map;
0545 }
0546
0547 // replace by std::identity once we can leverage C++20
0548 struct identity
0549 {
0550     template <typename T>
0551     const T& operator()(const T& t) const
0552     {
0553         return t;
0554     }
0555
0556     template <typename T>
0557     T operator()(T&& t) const
0558     {
0559         return std::move(t);
0560     }
0561 };
0562
0563 template <typename IpMapper>
0564 int compareTraceIndices(const TraceIndex& lhs, const AccumulatedTraceData& lhsData, const TraceIndex& rhs,
0565                         const AccumulatedTraceData& rhsData, IpMapper ipMapper)
0566 {
0567     if (!lhs && !rhs) {
0568         return 0;
0569     } else if (lhs && !rhs) {
0570         return 1;
0571     } else if (rhs && !lhs) {
0572         return -1;
0573     } else if (&lhsData == &rhsData && lhs == rhs) {
0574         // fast-path if both indices are equal and we compare the same data
0575         return 0;
0576     }
0577
0578     const auto& lhsTrace = lhsData.findTrace(lhs);
0579     const auto& rhsTrace = rhsData.findTrace(rhs);
0580
0581     const int parentComparsion =
0582         compareTraceIndices(lhsTrace.parentIndex, lhsData, rhsTrace.parentIndex, rhsData, ipMapper);
0583     if (parentComparsion != 0) {
0584         return parentComparsion;
0585     } // else fall-through to below, parents are equal
0586
0587     const auto& lhsIp = lhsData.findIp(lhsTrace.ipIndex);
0588     const auto& rhsIp = ipMapper(rhsData.findIp(rhsTrace.ipIndex));
0589     if (lhsIp.equalWithoutAddress(rhsIp)) {
0590         return 0;
0591     }
0592     return lhsIp.compareWithoutAddress(rhsIp) ? -1 : 1;
0593 }
0594
0595 POTENTIALLY_UNUSED void printCost(const AllocationData& data)
0596 {
0597     cerr << data.allocations << " (" << data.temporary << "), " << data.peak << " (" << data.leaked << ")\n";
0598 }
0599
0600 POTENTIALLY_UNUSED void printTrace(const AccumulatedTraceData& data, TraceIndex index)
0601 {
0602     do {
0603         const auto trace = data.findTrace(index);
0604         const auto& ip = data.findIp(trace.ipIndex);
0605         cerr << index << " (" << trace.ipIndex << ", " << trace.parentIndex << ")" << '\t'
0606              << data.stringify(ip.frame.functionIndex) << " in " << data.stringify(ip.moduleIndex) << " at "
0607              << data.stringify(ip.frame.fileIndex) << ':' << ip.frame.line << '\n';
0608         for (const auto& inlined : ip.inlined) {
0609             cerr << '\t' << data.stringify(inlined.functionIndex) << " at " << data.stringify(inlined.fileIndex) << ':'
0610                  << inlined.line << '\n';
0611         }
0612         index = trace.parentIndex;
0613     } while (index);
0614     cerr << "---\n";
0615 }
0616
0617 template <class ForwardIt, class BinaryPredicateCompare, class BinaryOpReduce>
0618 ForwardIt inplace_unique_reduce(ForwardIt first, ForwardIt last, BinaryPredicateCompare cmp, BinaryOpReduce reduce)
0619 {
0620     if (first == last)
0621         return last;
0622
0623     ForwardIt result = first;
0624     while (++first != last) {
0625         if (cmp(*result, *first)) {
0626             reduce(*result, *first);
0627         } else if (++result != first) {
0628             *result = std::move(*first);
0629         }
0630     }
0631     return ++result;
0632 }
0633 }
0634
0635 void AccumulatedTraceData::diff(const AccumulatedTraceData& base)
0636 {
0637     totalCost -= base.totalCost;
0638     totalTime -= base.totalTime;
0639     peakRSS -= base.peakRSS;
0640     systemInfo.pages -= base.systemInfo.pages;
0641     systemInfo.pageSize -= base.systemInfo.pageSize;
0642
0643     // step 1: sort allocations for efficient lookup and to prepare for merging equal allocations
0644
0645     std::sort(allocations.begin(), allocations.end(), [this](const Allocation& lhs, const Allocation& rhs) {
0646         return compareTraceIndices(lhs.traceIndex, *this, rhs.traceIndex, *this, identity {}) < 0;
0647     });
0648
0649     // step 2: now merge equal allocations
0650
0651     allocations.erase(inplace_unique_reduce(
0652                           allocations.begin(), allocations.end(),
0653                           [this](const Allocation& lhs, const Allocation& rhs) {
0654                               return compareTraceIndices(lhs.traceIndex, *this, rhs.traceIndex, *this, identity {})
0655                                   == 0;
0656                           },
0657                           [](Allocation& lhs, const Allocation& rhs) { lhs += rhs; }),
0658                       allocations.end());
0659
0660     // step 3: map string indices from rhs to lhs data
0661
0662     const auto& stringMap = remapStrings(strings, base.strings);
0663     auto remapString = [&stringMap](StringIndex& index) {
0664         if (index) {
0665             index.index = stringMap[index.index].index;
0666         }
0667     };
0668     auto remapFrame = [&remapString](Frame frame) -> Frame {
0669         remapString(frame.functionIndex);
0670         remapString(frame.fileIndex);
0671         return frame;
0672     };
0673     auto remapIp = [&remapString, &remapFrame](InstructionPointer ip) -> InstructionPointer {
0674         remapString(ip.moduleIndex);
0675         ip.frame = remapFrame(ip.frame);
0676         for (auto& inlined : ip.inlined) {
0677             inlined = remapFrame(inlined);
0678         }
0679         return ip;
0680     };
0681
0682     // step 4: iterate over rhs data and find matching traces
0683     //         if no match is found, copy the data over
0684
0685     auto sortedIps = sortedIndices<IpIndex>(instructionPointers.size(), [this](const IpIndex& lhs, const IpIndex& rhs) {
0686         return findIp(lhs).compareWithoutAddress(findIp(rhs));
0687     });
0688
0689     // map an IpIndex from the rhs data into the lhs data space, or copy the data
0690     // if it does not exist yet
0691     auto remapIpIndex = [&sortedIps, this, &base, &remapIp](IpIndex rhsIndex) -> IpIndex {
0692         if (!rhsIndex) {
0693             return rhsIndex;
0694         }
0695
0696         const auto& rhsIp = base.findIp(rhsIndex);
0697         const auto& lhsIp = remapIp(rhsIp);
0698
0699         auto it = lower_bound(sortedIps.begin(), sortedIps.end(), lhsIp,
0700                               [this](const IpIndex& lhs, const InstructionPointer& rhs) {
0701                                   return findIp(lhs).compareWithoutAddress(rhs);
0702                               });
0703         if (it != sortedIps.end() && findIp(*it).equalWithoutAddress(lhsIp)) {
0704             return *it;
0705         }
0706
0707         instructionPointers.push_back(lhsIp);
0708
0709         IpIndex ret;
0710         ret.index = instructionPointers.size();
0711         sortedIps.insert(it, ret);
0712
0713         return ret;
0714     };
0715
0716     // copy the rhs trace index and the data it references into the lhs data,
0717     // recursively
0718     function<TraceIndex(TraceIndex)> copyTrace = [this, &base, remapIpIndex,
0719                                                   &copyTrace](TraceIndex rhsIndex) -> TraceIndex {
0720         if (!rhsIndex) {
0721             return rhsIndex;
0722         }
0723
0724         // new location, add it
0725         const auto& rhsTrace = base.findTrace(rhsIndex);
0726
0727         TraceNode node;
0728         node.parentIndex = copyTrace(rhsTrace.parentIndex);
0729         node.ipIndex = remapIpIndex(rhsTrace.ipIndex);
0730
0731         traces.push_back(node);
0732         TraceIndex ret;
0733         ret.index = traces.size();
0734
0735         return ret;
0736     };
0737
0738     // find an equivalent trace or copy the data over if it does not exist yet
0739     // a trace is equivalent if the complete backtrace has equal InstructionPointer
0740     // data while ignoring the actual pointer address
0741     for (const auto& rhsAllocation : base.allocations) {
0742
0743         assert(rhsAllocation.traceIndex);
0744         auto it = lower_bound(allocations.begin(), allocations.end(), rhsAllocation.traceIndex,
0745                               [&base, this, remapIp](const Allocation& lhs, const TraceIndex& rhs) -> bool {
0746                                   return compareTraceIndices(lhs.traceIndex, *this, rhs, base, remapIp) < 0;
0747                               });
0748
0749         if (it == allocations.end()
0750             || compareTraceIndices(it->traceIndex, *this, rhsAllocation.traceIndex, base, remapIp) != 0) {
0751             Allocation lhsAllocation;
0752             lhsAllocation.traceIndex = copyTrace(rhsAllocation.traceIndex);
0753             it = allocations.insert(it, lhsAllocation);
0754         }
0755
0756         (*it) -= rhsAllocation;
0757     }
0758
0759     // step 5: remove allocations that don't show any differences
0760     //         note that when there are differences in the backtraces,
0761     //         we can still end up with merged backtraces that have a total
0762     //         of 0, but different "tails" of different origin with non-zero cost
0763     allocations.erase(remove_if(allocations.begin(), allocations.end(),
0764                                 [&](const Allocation& allocation) -> bool { return allocation == AllocationData(); }),
0765                       allocations.end());
0766 }
0767
0768 AllocationIndex AccumulatedTraceData::mapToAllocationIndex(const TraceIndex traceIndex)
0769 {
0770     AllocationIndex allocationIndex;
0771     if (traceIndex < m_maxAllocationTraceIndex) {
0772         // only need to search when the trace index is previously known
0773         auto it = lower_bound(traceIndexToAllocationIndex.begin(), traceIndexToAllocationIndex.end(), traceIndex,
0774                               [](const pair<TraceIndex, AllocationIndex>& indexMap,
0775                                  const TraceIndex traceIndex) -> bool { return indexMap.first < traceIndex; });
0776         if (it != traceIndexToAllocationIndex.end() && it->first == traceIndex) {
0777             return it->second;
0778         }
0779         // new allocation
0780         allocationIndex.index = allocations.size();
0781         traceIndexToAllocationIndex.insert(it, make_pair(traceIndex, allocationIndex));
0782         Allocation allocation;
0783         allocation.traceIndex = traceIndex;
0784         allocations.push_back(allocation);
0785     } else if (traceIndex == m_maxAllocationTraceIndex && !allocations.empty()) {
0786         // reuse the last allocation
0787         assert(allocations[m_maxAllocationIndex.index].traceIndex == traceIndex);
0788         allocationIndex = m_maxAllocationIndex;
0789     } else {
0790         // new allocation
0791         allocationIndex.index = allocations.size();
0792         traceIndexToAllocationIndex.push_back(make_pair(traceIndex, allocationIndex));
0793         Allocation allocation;
0794         allocation.traceIndex = traceIndex;
0795         m_maxAllocationIndex.index = allocations.size();
0796         allocations.push_back(allocation);
0797         m_maxAllocationTraceIndex = traceIndex;
0798     }
0799     return allocationIndex;
0800 }
0801
0802 const InstructionPointer& AccumulatedTraceData::findIp(const IpIndex ipIndex) const
0803 {
0804     static const InstructionPointer invalid;
0805     if (!ipIndex || ipIndex.index > instructionPointers.size()) {
0806         return invalid;
0807     } else {
0808         return instructionPointers[ipIndex.index - 1];
0809     }
0810 }
0811
0812 TraceNode AccumulatedTraceData::findTrace(const TraceIndex traceIndex) const
0813 {
0814     if (!traceIndex || traceIndex.index > traces.size()) {
0815         return {};
0816     } else {
0817         return traces[traceIndex.index - 1];
0818     }
0819 }
0820
0821 bool AccumulatedTraceData::isStopIndex(const StringIndex index) const
0822 {
0823     return find(stopIndices.begin(), stopIndices.end(), index) != stopIndices.end();
0824 }
0825
0826 struct SuppressionStringMatch
0827 {
0828     static constexpr auto NO_MATCH = std::numeric_limits<std::size_t>::max();
0829
0830     SuppressionStringMatch(std::size_t index = NO_MATCH)
0831         : suppressionIndex(index)
0832     {
0833     }
0834
0835     explicit operator bool() const
0836     {
0837         return suppressionIndex != NO_MATCH;
0838     }
0839
0840     std::size_t suppressionIndex;
0841 };
0842
0843 void AccumulatedTraceData::applyLeakSuppressions()
0844 {
0845     totalLeakedSuppressed = 0;
0846
0847     if (suppressions.empty()) {
0848         return;
0849     }
0850
0851     // match all strings once against all suppression rules
0852     bool hasAnyMatch = false;
0853     std::vector<SuppressionStringMatch> suppressedStrings(strings.size());
0854     std::transform(strings.begin(), strings.end(), suppressedStrings.begin(), [&](const auto& string) {
0855         auto it = std::find_if(suppressions.begin(), suppressions.end(), [&string](const Suppression& suppression) {
0856             return matchesSuppression(suppression.pattern, string);
0857         });
0858         if (it == suppressions.end()) {
0859             return SuppressionStringMatch();
0860         } else {
0861             hasAnyMatch = true;
0862             return SuppressionStringMatch(static_cast<std::size_t>(std::distance(suppressions.begin(), it)));
0863         }
0864     });
0865     if (!hasAnyMatch) {
0866         // nothing matched the suppressions, we can return early
0867         return;
0868     }
0869
0870     auto isSuppressedString = [&suppressedStrings](StringIndex index) {
0871         if (index && index.index <= suppressedStrings.size()) {
0872             return suppressedStrings[index.index - 1];
0873         } else {
0874             return SuppressionStringMatch();
0875         }
0876     };
0877     auto isSuppressedFrame = [&isSuppressedString](Frame frame) {
0878         auto match = isSuppressedString(frame.functionIndex);
0879         if (match) {
0880             return match;
0881         }
0882         return isSuppressedString(frame.fileIndex);
0883     };
0884
0885     // now match all instruction pointers against the suppressed strings
0886     std::vector<SuppressionStringMatch> suppressedIps(instructionPointers.size());
0887     std::transform(instructionPointers.begin(), instructionPointers.end(), suppressedIps.begin(), [&](const auto& ip) {
0888         auto match = isSuppressedString(ip.moduleIndex);
0889         if (match) {
0890             return match;
0891         }
0892         match = isSuppressedFrame(ip.frame);
0893         if (match) {
0894             return match;
0895         }
0896         for (const auto& inlined : ip.inlined) {
0897             match = isSuppressedFrame(inlined);
0898             if (match) {
0899                 return match;
0900             }
0901         }
0902         return SuppressionStringMatch();
0903     });
0904     suppressedStrings = {};
0905     auto isSuppressedIp = [&suppressedIps](IpIndex index) {
0906         if (index && index.index <= suppressedIps.size()) {
0907             return suppressedIps[index.index - 1];
0908         }
0909         return SuppressionStringMatch();
0910     };
0911
0912     // now match all trace indices against the suppressed instruction pointers
0913     std::vector<SuppressionStringMatch> suppressedTraces(traces.size());
0914     auto isSuppressedTrace = [&suppressedTraces](TraceIndex index) {
0915         if (index && index.index <= suppressedTraces.size()) {
0916             return suppressedTraces[index.index - 1];
0917         }
0918         return SuppressionStringMatch();
0919     };
0920     std::transform(traces.begin(), traces.end(), suppressedTraces.begin(), [&](const auto& trace) {
0921         auto match = isSuppressedTrace(trace.parentIndex);
0922         if (match) {
0923             return match;
0924         }
0925         return isSuppressedIp(trace.ipIndex);
0926     });
0927     suppressedIps = {};
0928
0929     // now finally zero all the matching allocations
0930     for (auto& allocation : allocations) {
0931         auto match = isSuppressedTrace(allocation.traceIndex);
0932         if (match) {
0933             totalLeakedSuppressed += allocation.leaked;
0934
0935             auto& suppression = suppressions[match.suppressionIndex];
0936             ++suppression.matches;
0937             suppression.leaked += allocation.leaked;
0938
0939             totalCost.leaked -= allocation.leaked;
0940             allocation.leaked = 0;
0941         }
0942     }
0943 }