File indexing completed on 2024-05-12 04:38:04

0001 /*
0002     SPDX-FileCopyrightText: 2007 David Nolden <david.nolden.kdevelop@art-master.de>
0003 
0004     SPDX-License-Identifier: LGPL-2.0-only
0005 */
0006 
0007 #include "stringhelpers.h"
0008 #include <debug.h>
0009 
0010 #include <QString>
0011 
0012 #include <algorithm>
0013 
0014 namespace {
0015 bool endsWithWordBoundary(QStringView str)
0016 {
0017     if (str.isEmpty()) {
0018         return true;
0019     }
0020     const auto boundary = str.last();
0021     return !boundary.isLetterOrNumber() && boundary != QLatin1Char('_');
0022 }
0023 
0024 /// libclang surrounds binary operators but not angle brackets with spaces.
0025 bool isOperatorSurroundedWithSpaces(QStringView str, int pos)
0026 {
0027     Q_ASSERT(pos >= 0 && pos < str.size());
0028 
0029     if (pos == 0 || pos == str.size() - 1) {
0030         return false; // there is no place for surrounding spaces
0031     }
0032 
0033     constexpr QLatin1Char lt{'<'}, gt{'>'}, eq{'='}, space{' '};
0034 
0035     const auto c = str[pos];
0036     Q_ASSERT(c == lt || c == gt);
0037 
0038     // Note: due to the `pos == 0 || pos == str.size() - 1` check above,
0039     // most conditionals below don't need to check boundaries.
0040     int operatorEnd = pos + 1;
0041     if (str[pos + 1] == c)
0042         ++operatorEnd; // << or >>
0043     else if (str[pos - 1] == c) {
0044         --pos; // << or >>
0045     } else {
0046         // <=>
0047         if (c == lt && str[pos + 1] == eq && pos + 2 < str.size() && str[pos + 2] == gt) {
0048             operatorEnd += 2;
0049         } else if (c == gt && str[pos - 1] == eq && pos >= 2 && str[pos - 2] == lt) {
0050             pos -= 2;
0051         }
0052     }
0053 
0054     if (operatorEnd - pos < 3 && operatorEnd < str.size() && str[operatorEnd] == eq) {
0055         ++operatorEnd; // <= or >= or <<= or >>=
0056     }
0057 
0058     return pos > 0 && str[pos - 1] == space && operatorEnd < str.size() && str[operatorEnd] == space;
0059 }
0060 
0061 bool isOperator(QStringView str, int pos)
0062 {
0063     Q_ASSERT(pos >= 0 && pos < str.size());
0064 
0065     if (isOperatorSurroundedWithSpaces(str, pos)) {
0066         return true;
0067     }
0068 
0069     const auto op = QLatin1String("operator");
0070     if (pos < op.size()) {
0071         return false;
0072     }
0073 
0074     const auto c = str[pos];
0075     Q_ASSERT(c == QLatin1Char('<') || c == QLatin1Char('>'));
0076 
0077     --pos;
0078 
0079     // note: due to the `pos < op.size()` check above, the below conditionals don't need to check boundaries
0080     if (str[pos] == c) {
0081         // handle `operator<<` and `operator>>`
0082         --pos;
0083     } else if (c == QLatin1Char('>') && str[pos] == QLatin1Char('=') && str[pos - 1] == QLatin1Char('<')) {
0084         // handle `operator<=>`
0085         pos -= 2;
0086     }
0087 
0088     // skip spaces, e.g. `operator <`
0089     while (pos > 0 && str[pos].isSpace()) {
0090         --pos;
0091     }
0092 
0093     auto prefix = str.left(pos + 1);
0094     if (!prefix.endsWith(op)) {
0095         return false;
0096     }
0097 
0098     prefix.chop(op.size());
0099     return endsWithWordBoundary(prefix);
0100 }
0101 
0102 // check for operator-> but don't get confused by operator-->
0103 bool isArrowOperator(QStringView str, int pos)
0104 {
0105     Q_ASSERT(pos >= 0 && pos < str.size());
0106 
0107     Q_ASSERT(str[pos] == QLatin1Char('>'));
0108     return pos > 0 && str[pos - 1] == QLatin1Char('-') && (pos == 1 || str[pos - 2] != QLatin1Char('-'));
0109 }
0110 
0111 bool isOperatorOrArrowOperator(QStringView str, int pos)
0112 {
0113     return isOperator(str, pos) || isArrowOperator(str, pos);
0114 }
0115 
0116 /// Skips literals enclosed in single or double quotes.
0117 /// No need to support raw string literals, because they cannot appear within a macro parameter list;
0118 /// in other contexts libclang converts them into non-raw string literals in each string that ends up here.
0119 int skipStringOrCharLiteral(QStringView str, int pos)
0120 {
0121     Q_ASSERT(pos >= 0 && pos < str.size());
0122 
0123     const auto quote = str[pos];
0124     Q_ASSERT(quote == QLatin1Char('\'') || quote == QLatin1Char('"'));
0125 
0126     const auto end = str.size();
0127     pos++;
0128     while (pos < end && (str[pos] != quote || str[pos - 1] == QLatin1Char('\\'))) {
0129         pos++;
0130     }
0131     return pos;
0132 }
0133 
0134 /// Skips multi-line comments.
0135 /// No need to support single-line comments, because they cannot appear within a macro parameter list;
0136 /// in other contexts libclang removes comments from each string that ends up here.
0137 int skipComment(QStringView str, int pos)
0138 {
0139     Q_ASSERT(pos >= 0 && pos < str.size());
0140     Q_ASSERT(str[pos] == QLatin1Char{'/'});
0141 
0142     if (pos + 1 == str.size() || str[pos + 1] != QLatin1Char{'*'})
0143         return pos; // not a comment
0144     pos += 2;
0145 
0146     while (pos < str.size() && (str[pos] != QLatin1Char{'/'} || str[pos - 1] != QLatin1Char{'*'})) {
0147         ++pos;
0148     }
0149 
0150     return pos;
0151 }
0152 
0153 int trySkipStringOrCharLiteralOrComment(QStringView str, int pos)
0154 {
0155     Q_ASSERT(pos >= 0 && pos < str.size());
0156 
0157     switch (str[pos].unicode()) {
0158     case '"':
0159     case '\'':
0160         return skipStringOrCharLiteral(str, pos);
0161     case '/':
0162         return skipComment(str, pos);
0163     }
0164     return pos;
0165 }
0166 } // unnamed namespace
0167 
0168 namespace KDevelop {
0169 bool consistsOfWhitespace(QStringView str)
0170 {
0171     return std::all_of(str.cbegin(), str.cend(), [](QChar c) {
0172         return c.isSpace();
0173     });
0174 }
0175 
0176 class ParamIteratorPrivate
0177 {
0178     Q_DISABLE_COPY_MOVE(ParamIteratorPrivate)
0179 public:
0180     explicit ParamIteratorPrivate(QStringView parens, QStringView source)
0181         : m_parens(parens)
0182         , m_source(source)
0183     {
0184     }
0185 
0186     const QStringView m_parens;
0187     const QStringView m_source;
0188     QStringView m_prefix;
0189     int m_cur;
0190     int m_curEnd;
0191     int m_end;
0192 
0193     QStringView sourceRange(int first, int last) const
0194     {
0195         return m_source.mid(first, last - first);
0196     }
0197 
0198     int next() const
0199     {
0200         return findCommaOrEnd(m_source, m_cur, m_parens[1]);
0201     }
0202 };
0203 
0204 namespace {
0205 QChar fittingClosingNonAngleBracket(QChar openingBracket)
0206 {
0207     switch (openingBracket.unicode()) {
0208     case '(':
0209         return QLatin1Char(')');
0210     case '[':
0211         return QLatin1Char(']');
0212     case '{':
0213         return QLatin1Char('}');
0214     default:
0215         Q_UNREACHABLE();
0216     }
0217 }
0218 
0219 // findClosingNonAngleBracket() and findClosingAngleBracket() have different implementations for the following reason.
0220 // Taking all bracket types into account while looking for a closing angle bracket may improve correctness, because the
0221 // characters of other bracket types are always brackets, not [parts of] operators; distinguishing between angle
0222 // brackets and operators is heuristic and unreliable. For example, in `Foo<(A>B)>` the round brackets help to recognize
0223 // the first '>' character as an operator rather than a closing angle bracket. Conversely, taking all bracket types into
0224 // account while looking for a closing non-angle bracket may adversely affect correctness. For example, in `Foo<(A<B)>`
0225 // the second '<' character would be regarded as an opening angle bracket, which would prevent recognizing the closing
0226 // round bracket.
0227 
0228 /// Finds in @p str the position of a fitting closing bracket for the opening bracket @p str[@p pos], e.g. ')' for '('.
0229 /// @return the position of a fitting closing bracket or str.size() if not found.
0230 /// @warning This function does not support angle brackets. Use findClosingAngleBracket() for that.
0231 int findClosingNonAngleBracket(QStringView str, int pos)
0232 {
0233     Q_ASSERT(pos >= 0 && pos < str.size());
0234     Q_ASSERT(str[pos] == QLatin1Char{'('} || str[pos] == QLatin1Char{'['} || str[pos] == QLatin1Char{'{'});
0235 
0236     const auto openingBracket = str[pos];
0237     const auto closingBracket = fittingClosingNonAngleBracket(openingBracket);
0238 
0239     int depth = 1;
0240 
0241     for (++pos; pos < str.size(); ++pos) {
0242         if (str[pos] == openingBracket) {
0243             ++depth;
0244         } else if (str[pos] == closingBracket) {
0245             if (--depth == 0) {
0246                 return pos;
0247             }
0248         } else {
0249             pos = trySkipStringOrCharLiteralOrComment(str, pos);
0250         }
0251     }
0252 
0253     Q_ASSERT(depth > 0);
0254     return str.size();
0255 }
0256 
0257 /// Finds in @p str the position of a fitting closing angle bracket for the opening angle bracket @p str[@p pos] == '<'.
0258 /// @return the position of a fitting closing bracket or str.size() if not found.
0259 int findClosingAngleBracket(QStringView str, int pos)
0260 {
0261     Q_ASSERT(pos >= 0 && pos < str.size());
0262     Q_ASSERT(str[pos] == QLatin1Char{'<'});
0263 
0264     int depth = 1;
0265 
0266     for (++pos; pos < str.size(); ++pos) {
0267         switch (str[pos].unicode()) {
0268         case '<':
0269             if (!isOperator(str, pos)) {
0270                 ++depth;
0271             }
0272             break;
0273         case '>':
0274             if (!isOperatorOrArrowOperator(str, pos)) {
0275                 if (--depth == 0) {
0276                     return pos;
0277                 }
0278             }
0279             break;
0280         case '(':
0281         case '[':
0282         case '{':
0283             pos = findClosingNonAngleBracket(str, pos);
0284             break;
0285         default:
0286             pos = trySkipStringOrCharLiteralOrComment(str, pos);
0287         }
0288     }
0289 
0290     Q_ASSERT(depth > 0);
0291     return str.size();
0292 }
0293 
0294 /// Finds in @p str the position of @p parens[0] or @p parens[2] starting from @p pos at the top level.
0295 /// @return the position of the found symbol or str.size() if not found.
0296 /// @param parens see ParamIterator().
0297 int findOpeningBracketOrEnd(QStringView parens, QStringView str, int pos)
0298 {
0299     Q_ASSERT(pos >= 0 && pos <= str.size());
0300 
0301     Q_ASSERT(parens.size() == 2 || parens.size() == 3);
0302 
0303     Q_ASSERT(QStringView(u"<([{").contains(parens[0]));
0304     Q_ASSERT(parens.left(2) == u"<>" || parens[1] == fittingClosingNonAngleBracket(parens[0]));
0305 
0306     Q_ASSERT(parens.size() == 2 || !QStringView(u"<>()[]{}").contains(parens[2]));
0307 
0308     for (; pos < str.size(); ++pos) {
0309         switch (str[pos].unicode()) {
0310         // Take into account brackets of all types to skip searched-for symbols within them (i.e. not at the top level).
0311         case '<':
0312             if (!isOperator(str, pos)) {
0313                 if (str[pos] == parens[0]) {
0314                     return pos;
0315                 }
0316                 pos = findClosingAngleBracket(str, pos);
0317             }
0318             break;
0319         case '(':
0320         case '[':
0321         case '{':
0322             if (str[pos] == parens[0]) {
0323                 return pos;
0324             }
0325             pos = findClosingNonAngleBracket(str, pos);
0326             break;
0327         default:
0328             if (parens.size() > 2 && str[pos] == parens[2]) {
0329                 return pos;
0330             }
0331             pos = trySkipStringOrCharLiteralOrComment(str, pos);
0332         }
0333     }
0334 
0335     return str.size();
0336 }
0337 } // unnamed namespace
0338 
0339 int findCommaOrEnd(QStringView str, int pos, QChar validEnd)
0340 {
0341     const auto size = str.size();
0342     Q_ASSERT(pos >= 0 && pos <= size);
0343 
0344     for (; pos < size; ++pos) {
0345         switch (str[pos].unicode()) {
0346         // Take into account brackets of all types, not just the validEnd type, to skip ',' within them.
0347         case '<':
0348             if (!isOperator(str, pos)) {
0349                 pos = findClosingAngleBracket(str, pos);
0350             }
0351             break;
0352         case '(':
0353         case '[':
0354         case '{':
0355             pos = findClosingNonAngleBracket(str, pos);
0356             break;
0357         case ',':
0358             return pos;
0359         default:
0360             if (str[pos] == validEnd && !(str[pos] == QLatin1Char('>') && isOperatorOrArrowOperator(str, pos))) {
0361                 return pos;
0362             }
0363             pos = trySkipStringOrCharLiteralOrComment(str, pos);
0364         }
0365     }
0366 
0367     return size;
0368 }
0369 
0370 // NOTE: keep in sync with QString overload below
0371 QByteArray formatComment(const QByteArray& comment)
0372 {
0373     if (comment.isEmpty())
0374         return comment;
0375 
0376     auto lines = comment.split('\n');
0377     // remove common leading & trailing chars from the lines
0378     for (auto& l : lines) {
0379         // don't trigger repeated temporary allocations here
0380 
0381         // possible comment starts, sorted from longest to shortest
0382         static const QByteArray startMatches[] = {
0383             QByteArrayLiteral("//!<"), QByteArrayLiteral("/*!<"), QByteArrayLiteral("/**<"), QByteArrayLiteral("///<"),
0384             QByteArrayLiteral("///"),  QByteArrayLiteral("//!"),  QByteArrayLiteral("/**"),  QByteArrayLiteral("/*!"),
0385             QByteArrayLiteral("//"),   QByteArrayLiteral("/*"),   QByteArrayLiteral("/"),    QByteArrayLiteral("*")};
0386 
0387         // possible comment ends, sorted from longest to shortest
0388         static const QByteArray endMatches[] = {QByteArrayLiteral("**/"), QByteArrayLiteral("*/")};
0389 
0390         l = l.trimmed();
0391 
0392         // check for ends first, as the starting pattern "*" might interfere with the ending pattern
0393         for (const auto& m : endMatches) {
0394             if (l.endsWith(m)) {
0395                 l.chop(m.length());
0396                 break;
0397             }
0398         }
0399 
0400         for (const auto& m : startMatches) {
0401             if (l.startsWith(m)) {
0402                 l.remove(0, m.length());
0403                 break;
0404             }
0405         }
0406     }
0407 
0408     QByteArray ret;
0409     for (const auto& line : qAsConst(lines)) {
0410         if (!ret.isEmpty())
0411             ret += '\n';
0412         ret += line;
0413     }
0414     return ret.trimmed();
0415 }
0416 
0417 // NOTE: keep in sync with QByteArray overload above
0418 QString formatComment(const QString& comment)
0419 {
0420     if (comment.isEmpty())
0421         return comment;
0422 
0423     auto lines = comment.splitRef(QLatin1Char('\n'));
0424 
0425     // remove common leading & trailing chars from the lines
0426     for (auto& l : lines) {
0427         // don't trigger repeated temporary allocations here
0428 
0429         // possible comment starts, sorted from longest to shortest
0430         static const QString startMatches[] = {QStringLiteral("//!<"), QStringLiteral("/*!<"), QStringLiteral("/**<"),
0431                                                QStringLiteral("///<"), QStringLiteral("///"),  QStringLiteral("//!"),
0432                                                QStringLiteral("/**"),  QStringLiteral("/*!"),  QStringLiteral("//"),
0433                                                QStringLiteral("/*"),   QStringLiteral("/"),    QStringLiteral("*")};
0434 
0435         // possible comment ends, sorted from longest to shortest
0436         static const QString endMatches[] = {QStringLiteral("**/"), QStringLiteral("*/")};
0437 
0438         l = l.trimmed();
0439 
0440         // check for ends first, as the starting pattern "*" might interfere with the ending pattern
0441         for (const auto& m : endMatches) {
0442             if (l.endsWith(m)) {
0443                 l.chop(m.length());
0444                 break;
0445             }
0446         }
0447 
0448         for (const auto& m : startMatches) {
0449             if (l.startsWith(m)) {
0450                 l = l.mid(m.length());
0451                 break;
0452             }
0453         }
0454     }
0455 
0456     QString ret;
0457     for (const auto& line : qAsConst(lines)) {
0458         if (!ret.isEmpty())
0459             ret += QLatin1Char('\n');
0460         ret += line;
0461     }
0462 
0463     return ret.trimmed();
0464 }
0465 
0466 QString removeWhitespace(const QString& str)
0467 {
0468     return str.simplified().remove(QLatin1Char(' '));
0469 }
0470 
0471 ParamIterator::~ParamIterator() = default;
0472 
0473 ParamIterator::ParamIterator(QStringView parens, QStringView source, int offset)
0474     : d_ptr(new ParamIteratorPrivate{parens, source})
0475 {
0476     Q_D(ParamIterator);
0477 
0478     const auto foundPos = findOpeningBracketOrEnd(parens, source, offset);
0479     if (foundPos != source.size()) {
0480         if (parens.size() > 2 && source[foundPos] == parens[2]) {
0481             //We have to stop the search, because we found an interrupting end-sign before the opening-paren
0482             d->m_prefix = d->sourceRange(offset, foundPos);
0483             d->m_curEnd = d->m_end = d->m_cur = foundPos;
0484             return;
0485         }
0486 
0487         Q_ASSERT(source[foundPos] == parens[0]);
0488         //We have a valid prefix before an opening-paren. Take the prefix, and start iterating parameters.
0489         d->m_cur = foundPos + 1;
0490         d->m_curEnd = d->next();
0491         if (d->m_curEnd != d->m_source.length()) {
0492             d->m_prefix = d->sourceRange(offset, foundPos);
0493             d->m_end = d->m_source.size();
0494 
0495             if (d->m_source[d->m_curEnd] == d->m_parens[1]) {
0496                 const auto singleParam = d->sourceRange(d->m_cur, d->m_curEnd);
0497                 if (consistsOfWhitespace(singleParam)) {
0498                     // Only whitespace characters are present between parentheses => assume that
0499                     // there are zero parameters, not a single empty parameter, and stop iterating.
0500                     d->m_cur = d->m_end = d->m_curEnd + 1;
0501                 }
0502             }
0503 
0504             return;
0505         } // else: the paren was not closed. It might be an identifier like "operator<", so count everything as prefix.
0506     } // else: we have neither found an ending-character, nor an opening-paren, so take the whole input and end.
0507 
0508     d->m_prefix = d->m_source.mid(offset);
0509     d->m_curEnd = d->m_end = d->m_cur = d->m_source.length();
0510 }
0511 
0512 ParamIterator& ParamIterator::operator ++()
0513 {
0514     Q_D(ParamIterator);
0515 
0516     Q_ASSERT(*this);
0517 
0518     if (d->m_curEnd >= d->m_source.size()) {
0519         //We have reached the end-paren. Stop iterating.
0520         d->m_cur = d->m_end = d->m_curEnd;
0521     } else if (d->m_source[d->m_curEnd] == d->m_parens[1]) {
0522         //We have reached the end-paren. Stop iterating.
0523         d->m_cur = d->m_end = d->m_curEnd + 1;
0524     } else {
0525         //Iterate on through parameters
0526         d->m_cur = d->m_curEnd + 1;
0527         if (d->m_cur < d->m_source.length()) {
0528             d->m_curEnd = d->next();
0529         }
0530     }
0531     return *this;
0532 }
0533 
0534 QStringView ParamIterator::operator*() const
0535 {
0536     Q_D(const ParamIterator);
0537 
0538     Q_ASSERT(*this);
0539 
0540     return d->sourceRange(d->m_cur, d->m_curEnd).trimmed();
0541 }
0542 
0543 ParamIterator::operator bool() const
0544 {
0545     Q_D(const ParamIterator);
0546 
0547     return d->m_cur < d->m_end;
0548 }
0549 
0550 QStringView ParamIterator::prefix() const
0551 {
0552     Q_D(const ParamIterator);
0553 
0554     return d->m_prefix;
0555 }
0556 
0557 uint ParamIterator::position() const
0558 {
0559     Q_D(const ParamIterator);
0560 
0561     return ( uint )d->m_cur;
0562 }
0563 }