File indexing completed on 2024-05-12 04:38:04
0001 /* 0002 SPDX-FileCopyrightText: 2007 David Nolden <david.nolden.kdevelop@art-master.de> 0003 0004 SPDX-License-Identifier: LGPL-2.0-only 0005 */ 0006 0007 #include "stringhelpers.h" 0008 #include <debug.h> 0009 0010 #include <QString> 0011 0012 #include <algorithm> 0013 0014 namespace { 0015 bool endsWithWordBoundary(QStringView str) 0016 { 0017 if (str.isEmpty()) { 0018 return true; 0019 } 0020 const auto boundary = str.last(); 0021 return !boundary.isLetterOrNumber() && boundary != QLatin1Char('_'); 0022 } 0023 0024 /// libclang surrounds binary operators but not angle brackets with spaces. 0025 bool isOperatorSurroundedWithSpaces(QStringView str, int pos) 0026 { 0027 Q_ASSERT(pos >= 0 && pos < str.size()); 0028 0029 if (pos == 0 || pos == str.size() - 1) { 0030 return false; // there is no place for surrounding spaces 0031 } 0032 0033 constexpr QLatin1Char lt{'<'}, gt{'>'}, eq{'='}, space{' '}; 0034 0035 const auto c = str[pos]; 0036 Q_ASSERT(c == lt || c == gt); 0037 0038 // Note: due to the `pos == 0 || pos == str.size() - 1` check above, 0039 // most conditionals below don't need to check boundaries. 0040 int operatorEnd = pos + 1; 0041 if (str[pos + 1] == c) 0042 ++operatorEnd; // << or >> 0043 else if (str[pos - 1] == c) { 0044 --pos; // << or >> 0045 } else { 0046 // <=> 0047 if (c == lt && str[pos + 1] == eq && pos + 2 < str.size() && str[pos + 2] == gt) { 0048 operatorEnd += 2; 0049 } else if (c == gt && str[pos - 1] == eq && pos >= 2 && str[pos - 2] == lt) { 0050 pos -= 2; 0051 } 0052 } 0053 0054 if (operatorEnd - pos < 3 && operatorEnd < str.size() && str[operatorEnd] == eq) { 0055 ++operatorEnd; // <= or >= or <<= or >>= 0056 } 0057 0058 return pos > 0 && str[pos - 1] == space && operatorEnd < str.size() && str[operatorEnd] == space; 0059 } 0060 0061 bool isOperator(QStringView str, int pos) 0062 { 0063 Q_ASSERT(pos >= 0 && pos < str.size()); 0064 0065 if (isOperatorSurroundedWithSpaces(str, pos)) { 0066 return true; 0067 } 0068 0069 const auto op = QLatin1String("operator"); 0070 if (pos < op.size()) { 0071 return false; 0072 } 0073 0074 const auto c = str[pos]; 0075 Q_ASSERT(c == QLatin1Char('<') || c == QLatin1Char('>')); 0076 0077 --pos; 0078 0079 // note: due to the `pos < op.size()` check above, the below conditionals don't need to check boundaries 0080 if (str[pos] == c) { 0081 // handle `operator<<` and `operator>>` 0082 --pos; 0083 } else if (c == QLatin1Char('>') && str[pos] == QLatin1Char('=') && str[pos - 1] == QLatin1Char('<')) { 0084 // handle `operator<=>` 0085 pos -= 2; 0086 } 0087 0088 // skip spaces, e.g. `operator <` 0089 while (pos > 0 && str[pos].isSpace()) { 0090 --pos; 0091 } 0092 0093 auto prefix = str.left(pos + 1); 0094 if (!prefix.endsWith(op)) { 0095 return false; 0096 } 0097 0098 prefix.chop(op.size()); 0099 return endsWithWordBoundary(prefix); 0100 } 0101 0102 // check for operator-> but don't get confused by operator--> 0103 bool isArrowOperator(QStringView str, int pos) 0104 { 0105 Q_ASSERT(pos >= 0 && pos < str.size()); 0106 0107 Q_ASSERT(str[pos] == QLatin1Char('>')); 0108 return pos > 0 && str[pos - 1] == QLatin1Char('-') && (pos == 1 || str[pos - 2] != QLatin1Char('-')); 0109 } 0110 0111 bool isOperatorOrArrowOperator(QStringView str, int pos) 0112 { 0113 return isOperator(str, pos) || isArrowOperator(str, pos); 0114 } 0115 0116 /// Skips literals enclosed in single or double quotes. 0117 /// No need to support raw string literals, because they cannot appear within a macro parameter list; 0118 /// in other contexts libclang converts them into non-raw string literals in each string that ends up here. 0119 int skipStringOrCharLiteral(QStringView str, int pos) 0120 { 0121 Q_ASSERT(pos >= 0 && pos < str.size()); 0122 0123 const auto quote = str[pos]; 0124 Q_ASSERT(quote == QLatin1Char('\'') || quote == QLatin1Char('"')); 0125 0126 const auto end = str.size(); 0127 pos++; 0128 while (pos < end && (str[pos] != quote || str[pos - 1] == QLatin1Char('\\'))) { 0129 pos++; 0130 } 0131 return pos; 0132 } 0133 0134 /// Skips multi-line comments. 0135 /// No need to support single-line comments, because they cannot appear within a macro parameter list; 0136 /// in other contexts libclang removes comments from each string that ends up here. 0137 int skipComment(QStringView str, int pos) 0138 { 0139 Q_ASSERT(pos >= 0 && pos < str.size()); 0140 Q_ASSERT(str[pos] == QLatin1Char{'/'}); 0141 0142 if (pos + 1 == str.size() || str[pos + 1] != QLatin1Char{'*'}) 0143 return pos; // not a comment 0144 pos += 2; 0145 0146 while (pos < str.size() && (str[pos] != QLatin1Char{'/'} || str[pos - 1] != QLatin1Char{'*'})) { 0147 ++pos; 0148 } 0149 0150 return pos; 0151 } 0152 0153 int trySkipStringOrCharLiteralOrComment(QStringView str, int pos) 0154 { 0155 Q_ASSERT(pos >= 0 && pos < str.size()); 0156 0157 switch (str[pos].unicode()) { 0158 case '"': 0159 case '\'': 0160 return skipStringOrCharLiteral(str, pos); 0161 case '/': 0162 return skipComment(str, pos); 0163 } 0164 return pos; 0165 } 0166 } // unnamed namespace 0167 0168 namespace KDevelop { 0169 bool consistsOfWhitespace(QStringView str) 0170 { 0171 return std::all_of(str.cbegin(), str.cend(), [](QChar c) { 0172 return c.isSpace(); 0173 }); 0174 } 0175 0176 class ParamIteratorPrivate 0177 { 0178 Q_DISABLE_COPY_MOVE(ParamIteratorPrivate) 0179 public: 0180 explicit ParamIteratorPrivate(QStringView parens, QStringView source) 0181 : m_parens(parens) 0182 , m_source(source) 0183 { 0184 } 0185 0186 const QStringView m_parens; 0187 const QStringView m_source; 0188 QStringView m_prefix; 0189 int m_cur; 0190 int m_curEnd; 0191 int m_end; 0192 0193 QStringView sourceRange(int first, int last) const 0194 { 0195 return m_source.mid(first, last - first); 0196 } 0197 0198 int next() const 0199 { 0200 return findCommaOrEnd(m_source, m_cur, m_parens[1]); 0201 } 0202 }; 0203 0204 namespace { 0205 QChar fittingClosingNonAngleBracket(QChar openingBracket) 0206 { 0207 switch (openingBracket.unicode()) { 0208 case '(': 0209 return QLatin1Char(')'); 0210 case '[': 0211 return QLatin1Char(']'); 0212 case '{': 0213 return QLatin1Char('}'); 0214 default: 0215 Q_UNREACHABLE(); 0216 } 0217 } 0218 0219 // findClosingNonAngleBracket() and findClosingAngleBracket() have different implementations for the following reason. 0220 // Taking all bracket types into account while looking for a closing angle bracket may improve correctness, because the 0221 // characters of other bracket types are always brackets, not [parts of] operators; distinguishing between angle 0222 // brackets and operators is heuristic and unreliable. For example, in `Foo<(A>B)>` the round brackets help to recognize 0223 // the first '>' character as an operator rather than a closing angle bracket. Conversely, taking all bracket types into 0224 // account while looking for a closing non-angle bracket may adversely affect correctness. For example, in `Foo<(A<B)>` 0225 // the second '<' character would be regarded as an opening angle bracket, which would prevent recognizing the closing 0226 // round bracket. 0227 0228 /// Finds in @p str the position of a fitting closing bracket for the opening bracket @p str[@p pos], e.g. ')' for '('. 0229 /// @return the position of a fitting closing bracket or str.size() if not found. 0230 /// @warning This function does not support angle brackets. Use findClosingAngleBracket() for that. 0231 int findClosingNonAngleBracket(QStringView str, int pos) 0232 { 0233 Q_ASSERT(pos >= 0 && pos < str.size()); 0234 Q_ASSERT(str[pos] == QLatin1Char{'('} || str[pos] == QLatin1Char{'['} || str[pos] == QLatin1Char{'{'}); 0235 0236 const auto openingBracket = str[pos]; 0237 const auto closingBracket = fittingClosingNonAngleBracket(openingBracket); 0238 0239 int depth = 1; 0240 0241 for (++pos; pos < str.size(); ++pos) { 0242 if (str[pos] == openingBracket) { 0243 ++depth; 0244 } else if (str[pos] == closingBracket) { 0245 if (--depth == 0) { 0246 return pos; 0247 } 0248 } else { 0249 pos = trySkipStringOrCharLiteralOrComment(str, pos); 0250 } 0251 } 0252 0253 Q_ASSERT(depth > 0); 0254 return str.size(); 0255 } 0256 0257 /// Finds in @p str the position of a fitting closing angle bracket for the opening angle bracket @p str[@p pos] == '<'. 0258 /// @return the position of a fitting closing bracket or str.size() if not found. 0259 int findClosingAngleBracket(QStringView str, int pos) 0260 { 0261 Q_ASSERT(pos >= 0 && pos < str.size()); 0262 Q_ASSERT(str[pos] == QLatin1Char{'<'}); 0263 0264 int depth = 1; 0265 0266 for (++pos; pos < str.size(); ++pos) { 0267 switch (str[pos].unicode()) { 0268 case '<': 0269 if (!isOperator(str, pos)) { 0270 ++depth; 0271 } 0272 break; 0273 case '>': 0274 if (!isOperatorOrArrowOperator(str, pos)) { 0275 if (--depth == 0) { 0276 return pos; 0277 } 0278 } 0279 break; 0280 case '(': 0281 case '[': 0282 case '{': 0283 pos = findClosingNonAngleBracket(str, pos); 0284 break; 0285 default: 0286 pos = trySkipStringOrCharLiteralOrComment(str, pos); 0287 } 0288 } 0289 0290 Q_ASSERT(depth > 0); 0291 return str.size(); 0292 } 0293 0294 /// Finds in @p str the position of @p parens[0] or @p parens[2] starting from @p pos at the top level. 0295 /// @return the position of the found symbol or str.size() if not found. 0296 /// @param parens see ParamIterator(). 0297 int findOpeningBracketOrEnd(QStringView parens, QStringView str, int pos) 0298 { 0299 Q_ASSERT(pos >= 0 && pos <= str.size()); 0300 0301 Q_ASSERT(parens.size() == 2 || parens.size() == 3); 0302 0303 Q_ASSERT(QStringView(u"<([{").contains(parens[0])); 0304 Q_ASSERT(parens.left(2) == u"<>" || parens[1] == fittingClosingNonAngleBracket(parens[0])); 0305 0306 Q_ASSERT(parens.size() == 2 || !QStringView(u"<>()[]{}").contains(parens[2])); 0307 0308 for (; pos < str.size(); ++pos) { 0309 switch (str[pos].unicode()) { 0310 // Take into account brackets of all types to skip searched-for symbols within them (i.e. not at the top level). 0311 case '<': 0312 if (!isOperator(str, pos)) { 0313 if (str[pos] == parens[0]) { 0314 return pos; 0315 } 0316 pos = findClosingAngleBracket(str, pos); 0317 } 0318 break; 0319 case '(': 0320 case '[': 0321 case '{': 0322 if (str[pos] == parens[0]) { 0323 return pos; 0324 } 0325 pos = findClosingNonAngleBracket(str, pos); 0326 break; 0327 default: 0328 if (parens.size() > 2 && str[pos] == parens[2]) { 0329 return pos; 0330 } 0331 pos = trySkipStringOrCharLiteralOrComment(str, pos); 0332 } 0333 } 0334 0335 return str.size(); 0336 } 0337 } // unnamed namespace 0338 0339 int findCommaOrEnd(QStringView str, int pos, QChar validEnd) 0340 { 0341 const auto size = str.size(); 0342 Q_ASSERT(pos >= 0 && pos <= size); 0343 0344 for (; pos < size; ++pos) { 0345 switch (str[pos].unicode()) { 0346 // Take into account brackets of all types, not just the validEnd type, to skip ',' within them. 0347 case '<': 0348 if (!isOperator(str, pos)) { 0349 pos = findClosingAngleBracket(str, pos); 0350 } 0351 break; 0352 case '(': 0353 case '[': 0354 case '{': 0355 pos = findClosingNonAngleBracket(str, pos); 0356 break; 0357 case ',': 0358 return pos; 0359 default: 0360 if (str[pos] == validEnd && !(str[pos] == QLatin1Char('>') && isOperatorOrArrowOperator(str, pos))) { 0361 return pos; 0362 } 0363 pos = trySkipStringOrCharLiteralOrComment(str, pos); 0364 } 0365 } 0366 0367 return size; 0368 } 0369 0370 // NOTE: keep in sync with QString overload below 0371 QByteArray formatComment(const QByteArray& comment) 0372 { 0373 if (comment.isEmpty()) 0374 return comment; 0375 0376 auto lines = comment.split('\n'); 0377 // remove common leading & trailing chars from the lines 0378 for (auto& l : lines) { 0379 // don't trigger repeated temporary allocations here 0380 0381 // possible comment starts, sorted from longest to shortest 0382 static const QByteArray startMatches[] = { 0383 QByteArrayLiteral("//!<"), QByteArrayLiteral("/*!<"), QByteArrayLiteral("/**<"), QByteArrayLiteral("///<"), 0384 QByteArrayLiteral("///"), QByteArrayLiteral("//!"), QByteArrayLiteral("/**"), QByteArrayLiteral("/*!"), 0385 QByteArrayLiteral("//"), QByteArrayLiteral("/*"), QByteArrayLiteral("/"), QByteArrayLiteral("*")}; 0386 0387 // possible comment ends, sorted from longest to shortest 0388 static const QByteArray endMatches[] = {QByteArrayLiteral("**/"), QByteArrayLiteral("*/")}; 0389 0390 l = l.trimmed(); 0391 0392 // check for ends first, as the starting pattern "*" might interfere with the ending pattern 0393 for (const auto& m : endMatches) { 0394 if (l.endsWith(m)) { 0395 l.chop(m.length()); 0396 break; 0397 } 0398 } 0399 0400 for (const auto& m : startMatches) { 0401 if (l.startsWith(m)) { 0402 l.remove(0, m.length()); 0403 break; 0404 } 0405 } 0406 } 0407 0408 QByteArray ret; 0409 for (const auto& line : qAsConst(lines)) { 0410 if (!ret.isEmpty()) 0411 ret += '\n'; 0412 ret += line; 0413 } 0414 return ret.trimmed(); 0415 } 0416 0417 // NOTE: keep in sync with QByteArray overload above 0418 QString formatComment(const QString& comment) 0419 { 0420 if (comment.isEmpty()) 0421 return comment; 0422 0423 auto lines = comment.splitRef(QLatin1Char('\n')); 0424 0425 // remove common leading & trailing chars from the lines 0426 for (auto& l : lines) { 0427 // don't trigger repeated temporary allocations here 0428 0429 // possible comment starts, sorted from longest to shortest 0430 static const QString startMatches[] = {QStringLiteral("//!<"), QStringLiteral("/*!<"), QStringLiteral("/**<"), 0431 QStringLiteral("///<"), QStringLiteral("///"), QStringLiteral("//!"), 0432 QStringLiteral("/**"), QStringLiteral("/*!"), QStringLiteral("//"), 0433 QStringLiteral("/*"), QStringLiteral("/"), QStringLiteral("*")}; 0434 0435 // possible comment ends, sorted from longest to shortest 0436 static const QString endMatches[] = {QStringLiteral("**/"), QStringLiteral("*/")}; 0437 0438 l = l.trimmed(); 0439 0440 // check for ends first, as the starting pattern "*" might interfere with the ending pattern 0441 for (const auto& m : endMatches) { 0442 if (l.endsWith(m)) { 0443 l.chop(m.length()); 0444 break; 0445 } 0446 } 0447 0448 for (const auto& m : startMatches) { 0449 if (l.startsWith(m)) { 0450 l = l.mid(m.length()); 0451 break; 0452 } 0453 } 0454 } 0455 0456 QString ret; 0457 for (const auto& line : qAsConst(lines)) { 0458 if (!ret.isEmpty()) 0459 ret += QLatin1Char('\n'); 0460 ret += line; 0461 } 0462 0463 return ret.trimmed(); 0464 } 0465 0466 QString removeWhitespace(const QString& str) 0467 { 0468 return str.simplified().remove(QLatin1Char(' ')); 0469 } 0470 0471 ParamIterator::~ParamIterator() = default; 0472 0473 ParamIterator::ParamIterator(QStringView parens, QStringView source, int offset) 0474 : d_ptr(new ParamIteratorPrivate{parens, source}) 0475 { 0476 Q_D(ParamIterator); 0477 0478 const auto foundPos = findOpeningBracketOrEnd(parens, source, offset); 0479 if (foundPos != source.size()) { 0480 if (parens.size() > 2 && source[foundPos] == parens[2]) { 0481 //We have to stop the search, because we found an interrupting end-sign before the opening-paren 0482 d->m_prefix = d->sourceRange(offset, foundPos); 0483 d->m_curEnd = d->m_end = d->m_cur = foundPos; 0484 return; 0485 } 0486 0487 Q_ASSERT(source[foundPos] == parens[0]); 0488 //We have a valid prefix before an opening-paren. Take the prefix, and start iterating parameters. 0489 d->m_cur = foundPos + 1; 0490 d->m_curEnd = d->next(); 0491 if (d->m_curEnd != d->m_source.length()) { 0492 d->m_prefix = d->sourceRange(offset, foundPos); 0493 d->m_end = d->m_source.size(); 0494 0495 if (d->m_source[d->m_curEnd] == d->m_parens[1]) { 0496 const auto singleParam = d->sourceRange(d->m_cur, d->m_curEnd); 0497 if (consistsOfWhitespace(singleParam)) { 0498 // Only whitespace characters are present between parentheses => assume that 0499 // there are zero parameters, not a single empty parameter, and stop iterating. 0500 d->m_cur = d->m_end = d->m_curEnd + 1; 0501 } 0502 } 0503 0504 return; 0505 } // else: the paren was not closed. It might be an identifier like "operator<", so count everything as prefix. 0506 } // else: we have neither found an ending-character, nor an opening-paren, so take the whole input and end. 0507 0508 d->m_prefix = d->m_source.mid(offset); 0509 d->m_curEnd = d->m_end = d->m_cur = d->m_source.length(); 0510 } 0511 0512 ParamIterator& ParamIterator::operator ++() 0513 { 0514 Q_D(ParamIterator); 0515 0516 Q_ASSERT(*this); 0517 0518 if (d->m_curEnd >= d->m_source.size()) { 0519 //We have reached the end-paren. Stop iterating. 0520 d->m_cur = d->m_end = d->m_curEnd; 0521 } else if (d->m_source[d->m_curEnd] == d->m_parens[1]) { 0522 //We have reached the end-paren. Stop iterating. 0523 d->m_cur = d->m_end = d->m_curEnd + 1; 0524 } else { 0525 //Iterate on through parameters 0526 d->m_cur = d->m_curEnd + 1; 0527 if (d->m_cur < d->m_source.length()) { 0528 d->m_curEnd = d->next(); 0529 } 0530 } 0531 return *this; 0532 } 0533 0534 QStringView ParamIterator::operator*() const 0535 { 0536 Q_D(const ParamIterator); 0537 0538 Q_ASSERT(*this); 0539 0540 return d->sourceRange(d->m_cur, d->m_curEnd).trimmed(); 0541 } 0542 0543 ParamIterator::operator bool() const 0544 { 0545 Q_D(const ParamIterator); 0546 0547 return d->m_cur < d->m_end; 0548 } 0549 0550 QStringView ParamIterator::prefix() const 0551 { 0552 Q_D(const ParamIterator); 0553 0554 return d->m_prefix; 0555 } 0556 0557 uint ParamIterator::position() const 0558 { 0559 Q_D(const ParamIterator); 0560 0561 return ( uint )d->m_cur; 0562 } 0563 }