File indexing completed on 2024-05-05 04:38:46

0001 /*
0002     SPDX-FileCopyrightText: 2009 Andreas Pakulat <apaku@gmx.de>
0003 
0004     SPDX-License-Identifier: LGPL-2.0-or-later
0005 
0006     This file mostly code takes from Qt's QSettings class, the copyright
0007     header from that file follows:
0008 
0009     SPDX-FileCopyrightText: 2009 Nokia Corporation and /or its subsidiary(-ies). <qt-info@nokia.com>
0010 
0011     This file is part of the QtCore module of the Qt Toolkit.
0012     SPDX-License-Identifier: LGPL-2.1-only WITH LicenseRef-Qt-LGPL-exception-1.0 OR GPL-3.0-only OR LicenseRef-Qt-Commercial
0013 */
0014 
0015 #include "kdevstringhandler.h"
0016 
0017 #include <QStringList>
0018 #include <QString>
0019 #include <QStringRef>
0020 #include <QByteArray>
0021 #include <QChar>
0022 #include <QDataStream>
0023 #include <QVariant>
0024 #include <QRegExp>
0025 #include <QTextDocument>
0026 
0027 #include <algorithm>
0028 #include <cctype>
0029 
0030 namespace KDevelop {
0031 QString joinWithEscaping(const QStringList& input, QChar joinchar, QChar escapechar)
0032 {
0033     QStringList tmp = input;
0034     return tmp.replaceInStrings(joinchar, QString(joinchar) + QString(escapechar)).join(joinchar);
0035 }
0036 
0037 QStringList splitWithEscaping(const QString& input, QChar splitchar, QChar escapechar)
0038 {
0039     enum State { Normal, SeenEscape } state;
0040 
0041     state = Normal;
0042 
0043     QStringList result;
0044     QString currentstring;
0045     for (const QChar c : input) {
0046         switch (state) {
0047         case Normal:
0048             if (c == escapechar) {
0049                 state = SeenEscape;
0050             } else if (c == splitchar) {
0051                 result << currentstring;
0052                 currentstring.clear();
0053             } else {
0054                 currentstring += c;
0055             }
0056             break;
0057         case SeenEscape:
0058             currentstring += c;
0059             state = Normal;
0060             break;
0061         }
0062     }
0063 
0064     if (!currentstring.isEmpty()) {
0065         result << currentstring;
0066     }
0067     return result;
0068 }
0069 
0070 QVariant stringToQVariant(const QString& s)
0071 {
0072     // Taken from qsettings.cpp, stringToVariant()
0073     if (s.startsWith(QLatin1Char('@'))) {
0074         if (s.endsWith(QLatin1Char(')'))) {
0075             if (s.startsWith(QLatin1String("@Variant("))) {
0076                 QByteArray a(s.toLatin1().mid(9));
0077                 QDataStream stream(&a, QIODevice::ReadOnly);
0078                 stream.setVersion(QDataStream::Qt_4_4);
0079                 QVariant result;
0080                 stream >> result;
0081                 return result;
0082             }
0083         }
0084     }
0085     return QVariant();
0086 
0087 }
0088 
0089 QString qvariantToString(const QVariant& variant)
0090 {
0091     // Taken from qsettings.cpp, variantToString()
0092     QByteArray a;
0093     {
0094         QDataStream s(&a, QIODevice::WriteOnly);
0095         s.setVersion(QDataStream::Qt_4_4);
0096         s << variant;
0097     }
0098 
0099     QString result = QLatin1String("@Variant(") + QString::fromLatin1(a.constData(), a.size()) + QLatin1Char(')');
0100     return result;
0101 
0102 }
0103 
0104 QString htmlToPlainText(const QString& s, HtmlToPlainTextMode mode)
0105 {
0106     switch (mode) {
0107     case FastMode: {
0108         QString result(s);
0109         result.remove(QRegExp(QStringLiteral("<[^>]+>")));
0110         return result;
0111     }
0112     case CompleteMode: {
0113         QTextDocument doc;
0114         doc.setHtml(s);
0115         return doc.toPlainText();
0116     }
0117     }
0118     return QString();     // never reached
0119 }
0120 
0121 QByteArray escapeJavaScriptString(const QByteArray& str)
0122 {
0123     // The special symbols that have to be escaped are listed e.g. here:
0124     // https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String#escape_sequences
0125     QByteArray result;
0126     result.reserve(str.size());
0127 
0128     for (char ch : str) {
0129         switch (ch) {
0130         case '\n':
0131             result += "\\n";
0132             break;
0133         case '\r':
0134             result += "\\r";
0135             break;
0136         case '\t':
0137             result += "\\t";
0138             break;
0139         case '\b':
0140             result += "\\b";
0141             break;
0142         case '\f':
0143             result += "\\f";
0144             break;
0145         case '\v':
0146             result += "\\v";
0147             break;
0148         case '\0':
0149             result += "\\0";
0150             break;
0151         case '\'':
0152         case '"':
0153         case '\\':
0154             result += '\\';
0155             [[fallthrough]];
0156         default:
0157             result += ch;
0158         }
0159     }
0160 
0161     return result;
0162 }
0163 }
0164 
0165 int KDevelop::findAsciiIdentifierLength(const QStringRef& str)
0166 {
0167     if (str.isEmpty()) {
0168         return 0;
0169     }
0170 
0171     constexpr ushort maxAscii{127};
0172     const auto firstChar = str[0].unicode();
0173     const bool isIdentifier = firstChar <= maxAscii
0174                                 && (std::isalpha(firstChar) || firstChar == '_');
0175     if (!isIdentifier) {
0176         return 0;
0177     }
0178 
0179     const auto partOfIdentifier = [=](QChar character) {
0180         const auto u = character.unicode();
0181         return u <= maxAscii && (std::isalnum(u) || u == '_');
0182     };
0183     return std::find_if_not(str.cbegin() + 1, str.cend(), partOfIdentifier) - str.cbegin();
0184 }
0185 
0186 KDevelop::VariableMatch KDevelop::matchPossiblyBracedAsciiVariable(const QStringRef& str)
0187 {
0188     if (str.isEmpty()) {
0189         return {};
0190     }
0191 
0192     if (str[0].unicode() == '{') {
0193         const auto nameLength = findAsciiIdentifierLength(str.mid(1));
0194         if (nameLength == 0) {
0195             return {};
0196         }
0197         const auto closingBraceIndex = 1 + nameLength;
0198         if (closingBraceIndex < str.size() && str[closingBraceIndex].unicode() == '}') {
0199             return {nameLength + 2, str.mid(1, nameLength).toString()};
0200         }
0201     } else {
0202         const auto nameLength = findAsciiIdentifierLength(str);
0203         if (nameLength != 0) {
0204             return {nameLength, str.left(nameLength).toString()};
0205         }
0206     }
0207 
0208     return {};
0209 }
0210 
0211 QString KDevelop::stripAnsiSequences(const QString& str)
0212 {
0213     if (str.isEmpty()) {
0214         return QString(); // fast path
0215     }
0216 
0217     enum {
0218         PLAIN,
0219         ANSI_START,
0220         ANSI_CSI,
0221         ANSI_SEQUENCE,
0222         ANSI_WAITING_FOR_ST,
0223         ANSI_ST_STARTED
0224     } state = PLAIN;
0225 
0226     QString result;
0227     result.reserve(str.count());
0228 
0229     for (const QChar c : str) {
0230         const auto val = c.unicode();
0231         switch (state) {
0232         case PLAIN:
0233             if (val == 27) // 'ESC'
0234                 state = ANSI_START;
0235             else if (val == 155) // equivalent to 'ESC'-'['
0236                 state = ANSI_CSI;
0237             else
0238                 result.append(c);
0239             break;
0240         case ANSI_START:
0241             if (val == 91) // [
0242                 state = ANSI_CSI;
0243             else if (val == 80 || val == 93 || val == 94 || val == 95) // 'P', ']', '^' and '_'
0244                 state = ANSI_WAITING_FOR_ST;
0245             else if (val >= 64 && val <= 95)
0246                 state = PLAIN;
0247             else
0248                 state = ANSI_SEQUENCE;
0249             break;
0250         case ANSI_CSI:
0251             if (val >= 64 && val <= 126) // Anything between '@' and '~'
0252                 state = PLAIN;
0253             break;
0254         case ANSI_SEQUENCE:
0255             if (val >= 64 && val <= 95) // Anything between '@' and '_'
0256                 state = PLAIN;
0257             break;
0258         case ANSI_WAITING_FOR_ST:
0259             if (val == 7) // 'BEL'
0260                 state = PLAIN;
0261             else if (val == 27) // 'ESC'
0262                 state = ANSI_ST_STARTED;
0263             break;
0264         case ANSI_ST_STARTED:
0265             if (val == 92) // '\'
0266                 state = PLAIN;
0267             else
0268                 state = ANSI_WAITING_FOR_ST;
0269             break;
0270         }
0271     }
0272 
0273     return result;
0274 }
0275 
0276 void KDevelop::normalizeLineEndings(QByteArray& text)
0277 {
0278     for (int i = 0, s = text.size(); i < s; ++i) {
0279         if (text[i] != '\r') {
0280             continue;
0281         }
0282         if (i + 1 < s && text[i + 1] == '\n') {
0283             text.remove(i, 1);
0284         } else {
0285             text[i] = '\n';
0286         }
0287     }
0288 }