File indexing completed on 2024-04-28 04:37:46
0001 /* 0002 SPDX-FileCopyrightText: 2007 Andreas Pakulat <apaku@gmx.de> 0003 0004 SPDX-License-Identifier: LGPL-2.0-or-later 0005 */ 0006 0007 #include "vcsdiff.h" 0008 0009 #include "debug.h" 0010 0011 #include <QString> 0012 #include <QUrl> 0013 #include <QSharedData> 0014 #include <QRegularExpression> 0015 #include <QtGlobal> 0016 0017 #include <vector> 0018 0019 namespace KDevelop 0020 { 0021 namespace 0022 { 0023 /* A class representing a diff hunk (a collection of localized changes) */ 0024 class DiffHunk 0025 { 0026 public: 0027 /* Metadata for the hunk */ 0028 uint srcStart /**< the 1-based (!) start line number of the range in the source file where the hunk applies */ 0029 , 0030 srcCount /**< the size of the range (in # of lines) in the source where the hunk applies (i.e. ctx lines + 0031 deleted lines)*/ 0032 , 0033 tgtStart /**< the 1-based (!) start line number of the range in the target file where the hunk applies */ 0034 , 0035 tgtCount /**< the size of the range (in # of lines) in the target where the hunk applies (i.e. ctx lines + 0036 deleted lines)*/ 0037 , 0038 headingLineIdx /**< The 0-based line number (in the whole diff) of the hunk header line (the one starting 0039 with `@@`) */ 0040 ; 0041 QString srcFile /**< The source filename */ 0042 , 0043 tgtFile /**< The target filename */ 0044 , 0045 heading /**< The heading of the hunk (the stuff in the header line after the position spec, i.e. after the 0046 second `@@`) */ 0047 ; 0048 QStringList lines; /**< The lines comprising the hunk (excluding the header) */ 0049 0050 /** 0051 * @returns the 0-based line number (in the whole diff) of the last line contained in the hunk. 0052 */ 0053 uint lastLineIdx() const { return headingLineIdx + lines.size(); } 0054 0055 /** 0056 * @param lineIdx the 0-based line number of the tested line in the whole diff 0057 * @returns true if the line is part of the diff and false otherwise 0058 * @note: Returns true also for the header line (the one starting with `@@`) 0059 */ 0060 bool containsDiffLine(uint lineIdx) const { return headingLineIdx <= lineIdx && lineIdx <= lastLineIdx(); } 0061 0062 /** 0063 * Returns the index of the line within the hunk 0064 * 0065 * @param diffLineIdx the 0-based index of the line in the diff 0066 * 0067 * @note assumes that the line is contained within the hunk 0068 * @note if the line is a header line, -1 is returned; otherwise the returned 0069 * number is the index of the line in the `lines` list 0070 */ 0071 int diffLineToHunkLine(uint diffLineIdx) const { return diffLineIdx - (headingLineIdx + 1); } 0072 0073 /** 0074 * A helper method to construct a hunk header from the provided info 0075 * 0076 * A hunk header has the following form: 0077 * 0078 * @@ oldStart,oldCount newStart,newCount @@ heading 0079 * e.g. 0080 * @@ -36,14 +36,28 @@ public: 0081 * 0082 * @returns the hunk header 0083 */ 0084 static QString formatHeader(uint oldStart, uint oldCount, uint newStart, uint newCount, QString head); 0085 0086 /** 0087 * The following operators define a PARTIAL order on the hunks list. 0088 * A hunk H is strictly below a hunk K iff the endline of H is strictly below 0089 * the start line of K. In particular, the only non-overlapping hunks are 0090 * ordered. 0091 */ 0092 bool operator<(const DiffHunk& b) const { return lastLineIdx() < b.headingLineIdx; } 0093 bool operator<(uint line) const { return lastLineIdx() < line; } 0094 bool operator<=(const KDevelop::DiffHunk& b) const { return lastLineIdx() <= b.headingLineIdx; } 0095 bool operator<=(uint line) const { return lastLineIdx() <= line; } 0096 bool operator>(const KDevelop::DiffHunk& b) const { return headingLineIdx > b.lastLineIdx(); } 0097 bool operator>(uint line) const { return headingLineIdx > line; } 0098 bool operator>=(const KDevelop::DiffHunk& b) { return headingLineIdx >= b.lastLineIdx(); } 0099 bool operator>=(uint line) const { return headingLineIdx >= line; } 0100 }; 0101 0102 /* RegExp matching a hunk header line */ 0103 Q_GLOBAL_STATIC_WITH_ARGS(const QRegularExpression, HUNK_HEADER_RE, 0104 (QLatin1String("^@@ -([0-9,]+) \\+([0-9,]+) @@(.*)"))) 0105 // static const auto HUNK_HEADER_RE = QRegularExpression(QStringLiteral("^@@ -([0-9,]+) \\+([0-9,]+) @@(.*)")); 0106 0107 /* RegExp matching a meta line containing a source of target filename */ 0108 Q_GLOBAL_STATIC_WITH_ARGS(const QRegularExpression, DIFF_FILENAME_RE, (QLatin1String("^[-+]{3} [ab]/(.*)"))) 0109 0110 /* RegExp matching a meta line (hunk header, filename, other info) */ 0111 Q_GLOBAL_STATIC_WITH_ARGS(const QRegularExpression, META_LINE_RE, (QLatin1String("(^[-+]{3} )|^[^-+ ]"))) 0112 0113 /* RegExps matching conflict delimiting lines */ 0114 Q_GLOBAL_STATIC_WITH_ARGS(const QRegularExpression, CONFLICT_START_RE, (QLatin1String("^<<<<<<<"))) 0115 Q_GLOBAL_STATIC_WITH_ARGS(const QRegularExpression, CONFLICT_MID_RE, (QLatin1String("^======="))) 0116 Q_GLOBAL_STATIC_WITH_ARGS(const QRegularExpression, CONFLICT_END_RE, (QLatin1String("^>>>>>>>"))) 0117 Q_GLOBAL_STATIC_WITH_ARGS(const QRegularExpression, CONFLICT_RE, 0118 (QLatin1String("(^>>>>>>>)|(^=======)|(^<<<<<<<)"))) 0119 0120 QString formatRange(uint start, uint count) 0121 { 0122 if (count == 1) 0123 return QString().setNum(start); 0124 return QString().setNum(start) + QLatin1Char(',') + QString().setNum(count); 0125 } 0126 std::pair<uint, uint> parseRange(const QString& range) 0127 { 0128 int commaPos = range.indexOf(QLatin1Char(',')); 0129 if (commaPos > -1) { 0130 return { range.midRef(0, commaPos).toInt(), range.midRef(commaPos + 1).toInt() }; 0131 } 0132 return { range.toInt(), 1 }; 0133 } 0134 0135 /* Creates a hunk header line (starting with @@) 0136 * 0137 * Note: The line will not end with a newline */ 0138 QString DiffHunk::formatHeader(uint oldStart, uint oldCount, uint newStart, uint newCount, QString head) 0139 { 0140 return QLatin1String("@@ -") + formatRange(oldStart, oldCount) + QLatin1String(" +") 0141 + formatRange(newStart, newCount) + QLatin1String(" @@") + head; 0142 } 0143 0144 /** 0145 * Parses a unified diff into a list of "diff hunks" (each hunk starts with a 0146 * line starting with @@ and represents a collection of localized changes). 0147 * 0148 * @param diff a diff in git's unified diff format 0149 * @returns a list of hunk structures 0150 * 0151 * The diff is assumed to be a collection of hunks, where each hunk has the 0152 * following structure: 0153 * 0154 * METADATA 0155 * --- a/SOURCE_PATH 0156 * +++ b/TARGET_PATH 0157 * HUNK HEADER 0158 * HUNK CONTENT 0159 * 0160 * All metadata lines match the @ref:META_LINE_RE regexp (starts with anything 0161 * except for a '+', '-' and ' ') and these are discarded except for the hunk 0162 * header and source/target path specifications. The path specifications 0163 * are assumed to apply to all following hunks until a new path specification 0164 * is found and are stored in the srcFileName and tgtFileName attributes of 0165 * the hunk structure. 0166 * 0167 * 0168 * Hunk Header 0169 * ----------- 0170 * 0171 * The hunk header has the following form 0172 * 0173 * @@ -SRC_OFFSET[, SRC_CHANGES_COUNT] +TGT_OFFSET[, TGT_CHANGES_COUNT] @@ Heading 0174 * 0175 * where the SRC_OFFSET is a 1-based line index pointing to the source file where 0176 * the hunk applies and TGT_OFFSET is a 1-based line index pointing to the target 0177 * file where the hunk applies. These are parsed into the srcStart and tgtStart 0178 * attributes of the hunk structure. 0179 * 0180 * The optional SRC_CHANGES_COUNTS (assumed to be 1 if not present) specifies the 0181 * number of context lines (starting with ' ') plus the number of deleted lines 0182 * (starting with '-'). Similarly, the optional TGT_CHANGES_COUNT specifies the 0183 * number of context lines plus the number of added lines (starting with '+'). 0184 * These are parsed and stored in the srcCount and tgtCount attributes of the hunk 0185 * structure, but not checked (!). I.e. if the diff hunk has more/less changes then 0186 * specified, the returned hunk structure will have invalid src & tgt counts. 0187 * 0188 * Finally the Heading, used as a visual aid for users, is supposed to show the line 0189 * where the nearest enclosing function scope of the hunk starts. It is parsed and 0190 * stored in the heading attribute. 0191 * 0192 * Hunk Content 0193 * ------------ 0194 * 0195 * The hunk content is a collection of lines which 0196 * 0197 * 1) start with '+' (additions); or 0198 * 2) start with '-' (deletions); or 0199 * 3) start with ' ' (context lines); or 0200 * 4) are empty (context lines); or 0201 * 5) are within conflict markers 0202 * 0203 * These lines are parsed and stored in the lines attribute of the hunk structure. 0204 * The parsing of the hunk stops when a METADATA line (outside of conflict markers) 0205 * is encountered or the end of the file is reached. 0206 * 0207 * Conflict Markers 0208 * ---------------- 0209 * 0210 * Conflict markers are collections of lines of the form: 0211 * 0212 * >>>>>>> our ref 0213 * our content 0214 * ... 0215 * ======= 0216 * their content 0217 * ... 0218 * <<<<<<< their ref 0219 * 0220 * And show content which a merge was not able to merge automatically. 0221 * Strictly speaking, these should not appear in diffs, but git diff 0222 * generates them anyway for files with unresolved conflicts. 0223 */ 0224 std::vector<DiffHunk> parseHunks(VcsDiff& diff) 0225 { 0226 std::vector<DiffHunk> ret; 0227 int lineNo = -1; 0228 QString curSrcFileName, curTgtFileName; 0229 QStringListIterator lines(diff.diff().split(QLatin1Char('\n'))); 0230 while (lines.hasNext()) { 0231 lineNo++; 0232 auto curln = lines.next(); 0233 auto m = DIFF_FILENAME_RE->match(curln); 0234 if (m.hasMatch()) { 0235 if (curln.startsWith(QLatin1Char('-'))) 0236 curSrcFileName = m.captured(1); 0237 else if (curln.startsWith(QLatin1Char('+'))) 0238 curTgtFileName = m.captured(1); 0239 continue; 0240 } 0241 m = HUNK_HEADER_RE->match(curln); 0242 if (!m.hasMatch()) 0243 continue; 0244 auto [oldStart, oldCount] = parseRange(m.captured(1)); 0245 auto [newStart, newCount] = parseRange(m.captured(2)); 0246 auto heading = m.captured(3); 0247 uint firstLineIdx = lineNo; 0248 QStringList hunkLines; 0249 while (lines.hasNext() 0250 && (CONFLICT_START_RE->match(lines.peekNext()).hasMatch() 0251 || !META_LINE_RE->match(lines.peekNext()).hasMatch())) { 0252 // Consume the conflict 0253 if (CONFLICT_START_RE->match(lines.peekNext()).hasMatch()) { 0254 lineNo++; 0255 hunkLines << lines.next(); 0256 while (lines.hasNext() && !CONFLICT_END_RE->match(lines.peekNext()).hasMatch()) { 0257 lineNo++; 0258 hunkLines << lines.next(); 0259 } 0260 if (!CONFLICT_END_RE->match(lines.peekNext()).hasMatch()) { 0261 qCWarning(VCS) << "Invalid diff format, end of file reached before conflict finished"; 0262 qCDebug(VCS) << diff.diff(); 0263 break; 0264 } 0265 } 0266 lineNo++; 0267 hunkLines << lines.next(); 0268 } 0269 0270 // The number of filenames present in the diff should match the number 0271 // of hunks 0272 ret.push_back({ oldStart, oldCount, newStart, newCount, firstLineIdx, curSrcFileName, curTgtFileName, 0273 heading, hunkLines }); 0274 } 0275 0276 // If the diff ends with a newline, for the last hunk, when splitting into lines above 0277 // we will always get an empty string at the end, which we now remove 0278 if (diff.diff().endsWith(QLatin1Char('\n'))) { 0279 if (ret.size() > 0 && ret.back().lines.size() > 0) { 0280 ret.back().lines.pop_back(); 0281 } else { 0282 qCWarning(VCS) << "Failed to parse a diff, produced no hunks"; 0283 qCDebug(VCS) << "Failed diff:" << diff.diff(); 0284 } 0285 } 0286 0287 return ret; 0288 } 0289 } 0290 0291 class VcsDiffPrivate : public QSharedData 0292 { 0293 public: 0294 QUrl baseDiff; 0295 QString diff; 0296 uint depth = 0; 0297 std::vector<DiffHunk> hunks; 0298 0299 enum Dest { 0300 SRC = '-', 0301 TGT = '+', 0302 }; 0303 0304 /** 0305 * Maps a line position in the diff to a corresponding line position in the destination file. 0306 * 0307 * @param line a 0-based line position in the diff 0308 * @param dest specifies the destination file to map to: 0309 * either SRC (the source file, '-') or TGT (the target file, '+') 0310 * @returns a @ref VcsDiff::SourceLocation whose path is the (relative to diff root) 0311 * destination file path and line the 0-based line position in the 0312 * destination file or {"", -1} if no such position exists. 0313 */ 0314 VcsDiff::SourceLocation mapDiffLine ( const uint line, const Dest dest ) const 0315 { 0316 const QLatin1Char skipChar = (dest == SRC) ? QLatin1Char(TGT) : QLatin1Char(SRC); 0317 for (const auto& h : hunks) { 0318 if (h.containsDiffLine(line)) { 0319 int hunkPos = h.diffLineToHunkLine(line); 0320 0321 // The line refers to the heading line 0322 if (hunkPos < 0) 0323 return {}; 0324 0325 // Any lines in the diff hunk which come before line and come from the opposite 0326 // of dest should not be counted (they are not present in the dest) 0327 int skipCount = 0; 0328 for(int i=0; i<hunkPos; i++) { 0329 if (h.lines.at(i).startsWith(skipChar)) 0330 skipCount++; 0331 } 0332 0333 // Any lines in the diff hunk which come from the second part (src)/ first part (tgt) 0334 // of a conflict should not be counted either 0335 bool inConflict = false; // This is set so that a line inside a conflict is recognized as a valid line 0336 for(int i=0; i<hunkPos; i++) { 0337 if (CONFLICT_START_RE->match(h.lines.at(i)).hasMatch()) { 0338 skipCount++; // skip the conflict marker line 0339 if (dest == TGT) { 0340 while ((++i) < hunkPos && !CONFLICT_MID_RE->match(h.lines.at(i)).hasMatch()) { 0341 skipCount++; 0342 } 0343 } else { 0344 inConflict = true; 0345 } 0346 } 0347 if (CONFLICT_MID_RE->match(h.lines.at(i)).hasMatch()) { 0348 skipCount++; // skip the conflict marker line 0349 if (dest == SRC) { 0350 while ((++i) < hunkPos && !CONFLICT_END_RE->match(h.lines.at(i)).hasMatch()) 0351 skipCount++; 0352 } else { 0353 inConflict = true; 0354 } 0355 } 0356 if (CONFLICT_END_RE->match(h.lines.at(i)).hasMatch()) { 0357 skipCount++; // skip the conflict marker line 0358 inConflict = false; 0359 } 0360 } 0361 0362 auto ln = h.lines[hunkPos]; 0363 0364 // This works around the fact that inConflict is set even if hunkPos 0365 // ends up hitting a conflict marker 0366 if (CONFLICT_RE->match(ln).hasMatch()) 0367 return {}; 0368 0369 if (ln.startsWith(dest) || ln.startsWith(QLatin1Char(' ')) || ln.isEmpty() || inConflict) { 0370 if (dest == SRC) 0371 // The -1 accounts for the fact that srcStart is 1-based 0372 // but we need to return 0-based line numbers 0373 return { h.srcFile, static_cast<int>(h.srcStart - 1 + hunkPos - skipCount) }; 0374 else 0375 // The -1 accounts for the fact that srcStart is 1-based 0376 // but we need to return 0-based line numbers 0377 return { h.tgtFile, static_cast<int>(h.tgtStart - 1 + hunkPos - skipCount) }; 0378 } else return {}; 0379 } 0380 } 0381 return {}; 0382 } 0383 }; 0384 0385 KDevelop::VcsDiff VcsDiff::subDiffHunk(const uint line, DiffDirection dir) const 0386 { 0387 auto hunks = d->hunks; 0388 for (const auto& hunk : hunks) { 0389 if (hunk.containsDiffLine(line)) { 0390 return subDiff(hunk.headingLineIdx, hunk.lastLineIdx(), dir); 0391 } 0392 } 0393 0394 VcsDiff emptyDiff; 0395 emptyDiff.setBaseDiff(d->baseDiff); 0396 emptyDiff.setDepth(d->depth); 0397 emptyDiff.setDiff(d->diff.mid(0,d->diff.indexOf(QStringLiteral("@@")))); 0398 return emptyDiff; 0399 } 0400 0401 KDevelop::VcsDiff VcsDiff::subDiff(const uint startLine, const uint endLine, DiffDirection dir) const 0402 { 0403 // Code adapted from cola/diffparse.py 0404 enum LineType { 0405 ADD = '+', 0406 DEL = '-', 0407 CTX = ' ', 0408 NO_NEWLINE = '\\' 0409 }; 0410 0411 VcsDiff ret; 0412 ret.setBaseDiff(baseDiff()); 0413 ret.setDepth(depth()); 0414 0415 0416 auto hunks = d->hunks; 0417 QStringList lines; 0418 for (const auto& hunk : hunks) { 0419 // Skip hunks before the first line 0420 if (hunk < startLine) 0421 continue; 0422 0423 // Skip hunks after the last line 0424 if (hunk > endLine) 0425 break; 0426 0427 std::map<LineType, int> counts = { 0428 {ADD, 0}, 0429 {DEL, 0}, 0430 {CTX, 0}, 0431 {NO_NEWLINE, 0} 0432 }; 0433 QStringList filteredLines; 0434 0435 // Will be set if the previous line in a hunk was 0436 // skipped because it was not in the selected range 0437 bool prevSkipped = false; 0438 0439 uint lnIdx = hunk.headingLineIdx; 0440 0441 // Store the number of skipped lines which start the hunk 0442 // (i.e. lines before a first deletion (addition in case of reverse) 0443 // so that we can adjust the start appropriately 0444 int startOffset = 0; 0445 const auto _lines = QStringList(hunk.lines.constBegin(), hunk.lines.constEnd()); 0446 for(const auto& line: _lines) { 0447 lnIdx++; 0448 LineType tp = line.length() > 0 ? (LineType) line[0].toLatin1() : (LineType) 0; 0449 QString content = line.mid(1); 0450 0451 if (dir == Reverse) { 0452 if (tp == ADD) tp = DEL; 0453 else if (tp == DEL) tp = ADD; 0454 } 0455 0456 if (lnIdx < startLine || endLine < lnIdx) { 0457 // skip additions (or deletions if reverse) that are not in range 0458 if (tp == ADD) { 0459 prevSkipped=true; 0460 // If we are before the range and 0461 // so far we only encountered ADD (or DEL, in case of reverse) lines 0462 // these will not be included in the subdiff hunk so we increase the startOffset 0463 if (lnIdx < startLine && counts[CTX] == 0) startOffset++; 0464 continue; 0465 } 0466 // change deletions (or additions if reverse) that are not in range into context 0467 if (tp == DEL) tp=CTX; 0468 } 0469 0470 // If the line immediately before a "No newline" line was 0471 // skipped (because it was an unselected addition) skip 0472 // the "No newline" line as well. 0473 if (tp == NO_NEWLINE && prevSkipped ) { 0474 if (lnIdx <= endLine ) startOffset++; 0475 continue; 0476 } 0477 0478 // Empty lines are context lines and we 0479 // preserve them 0480 if ((int)tp == 0) { 0481 filteredLines << content; 0482 tp = CTX; 0483 } else { 0484 filteredLines << QLatin1Char(tp)+content; 0485 } 0486 counts[tp]++; 0487 prevSkipped = false; 0488 } 0489 0490 // Skip hunks which have only context lines 0491 if (counts[ADD] + counts[DEL] == 0) 0492 continue; 0493 0494 0495 // Compute the start & counts of the hunks 0496 uint subSrcStart, subTgtStart; 0497 if (dir == Reverse) { 0498 subSrcStart = hunk.tgtStart + startOffset; 0499 subTgtStart = hunk.srcStart + startOffset; 0500 } else { 0501 subSrcStart = hunk.srcStart + startOffset; 0502 subTgtStart = hunk.tgtStart + startOffset; 0503 } 0504 uint subSrcCount = counts[CTX] + counts[DEL]; 0505 uint subTgtCount = counts[CTX] + counts[ADD]; 0506 0507 // Prepend lines identifying the source files 0508 lines << QStringLiteral("--- a/") + ((dir == Reverse) ? hunk.tgtFile : hunk.srcFile); 0509 lines << QStringLiteral("+++ b/") + ((dir == Reverse) ? hunk.srcFile : hunk.tgtFile); 0510 0511 lines << DiffHunk::formatHeader(subSrcStart, subSrcCount, subTgtStart, subTgtCount, hunk.heading); 0512 lines += filteredLines; 0513 } 0514 if (lines.size() > 2) 0515 ret.setDiff(lines.join(QLatin1Char('\n'))+QLatin1Char('\n')); 0516 return ret; 0517 } 0518 0519 0520 const QVector<VcsDiff::FilePair> VcsDiff::fileNames() const 0521 { 0522 QVector<VcsDiff::FilePair> ret; 0523 VcsDiff::FilePair current; 0524 for (const auto& h : d->hunks) { 0525 // List each pair only once 0526 if (h.srcFile == current.source && h.tgtFile == current.target) 0527 continue; 0528 current = { h.srcFile, h.tgtFile }; 0529 ret.push_back(current); 0530 } 0531 return ret; 0532 } 0533 0534 0535 VcsDiff::SourceLocation VcsDiff::diffLineToSource ( const uint line ) const 0536 { 0537 return d->mapDiffLine(line, VcsDiffPrivate::SRC); 0538 } 0539 0540 VcsDiff::SourceLocation VcsDiff::diffLineToTarget ( const uint line ) const 0541 { 0542 return d->mapDiffLine(line, VcsDiffPrivate::TGT); 0543 } 0544 0545 VcsDiff::VcsDiff() 0546 : d(new VcsDiffPrivate) 0547 { 0548 } 0549 0550 VcsDiff::~VcsDiff() = default; 0551 0552 VcsDiff::VcsDiff( const VcsDiff& rhs ) 0553 : d(rhs.d) 0554 { 0555 } 0556 0557 bool VcsDiff::isEmpty() const 0558 { 0559 return d->diff.isEmpty(); 0560 } 0561 0562 QString VcsDiff::diff() const 0563 { 0564 return d->diff; 0565 } 0566 0567 0568 void VcsDiff::setDiff( const QString& s ) 0569 { 0570 d->diff = s; 0571 d->hunks = parseHunks(*this); 0572 } 0573 0574 VcsDiff& VcsDiff::operator=( const VcsDiff& rhs) 0575 { 0576 d = rhs.d; 0577 return *this; 0578 } 0579 0580 QUrl VcsDiff::baseDiff() const 0581 { 0582 return d->baseDiff; 0583 } 0584 0585 uint VcsDiff::depth() const 0586 { 0587 return d->depth; 0588 } 0589 0590 void VcsDiff::setBaseDiff(const QUrl& url) 0591 { 0592 d->baseDiff=url; 0593 } 0594 0595 void VcsDiff::setDepth(const uint depth) 0596 { 0597 d->depth = depth; 0598 } 0599 0600 0601 } 0602