File indexing completed on 2024-04-28 03:53:48

0001 /*
0002     This file is part of the KDE libraries
0003 
0004     SPDX-FileCopyrightText: 1999 Ian Zepp <icszepp@islc.net>
0005     SPDX-FileCopyrightText: 2006 Dominic Battre <dominic@battre.de>
0006     SPDX-FileCopyrightText: 2006 Martin Pool <mbp@canonical.com>
0007 
0008     SPDX-License-Identifier: LGPL-2.0-or-later
0009 */
0010 
0011 #include "kstringhandler.h"
0012 
0013 #include <stdlib.h> // random()
0014 
0015 #include <QList>
0016 #include <QRegularExpression>
0017 
0018 //
0019 // Capitalization routines
0020 //
0021 QString KStringHandler::capwords(const QString &text)
0022 {
0023     if (text.isEmpty()) {
0024         return text;
0025     }
0026 
0027     const QString strippedText = text.trimmed();
0028     const QString space = QString(QLatin1Char(' '));
0029     const QStringList words = capwords(strippedText.split(space));
0030 
0031     QString result = text;
0032     result.replace(strippedText, words.join(space));
0033     return result;
0034 }
0035 
0036 QStringList KStringHandler::capwords(const QStringList &list)
0037 {
0038     QStringList tmp = list;
0039     for (auto &str : tmp) {
0040         str[0] = str.at(0).toUpper();
0041     }
0042     return tmp;
0043 }
0044 
0045 QString KStringHandler::lsqueeze(const QString &str, const int maxlen)
0046 {
0047     if (str.length() > maxlen) {
0048         const int part = maxlen - 3;
0049         return QLatin1String("...") + QStringView(str).right(part);
0050     } else {
0051         return str;
0052     }
0053 }
0054 
0055 QString KStringHandler::csqueeze(const QString &str, const int maxlen)
0056 {
0057     if (str.length() > maxlen && maxlen > 3) {
0058         const int part = (maxlen - 3) / 2;
0059         const QStringView strView{str};
0060         return strView.left(part) + QLatin1String("...") + strView.right(part);
0061     } else {
0062         return str;
0063     }
0064 }
0065 
0066 QString KStringHandler::rsqueeze(const QString &str, const int maxlen)
0067 {
0068     if (str.length() > maxlen) {
0069         const int part = maxlen - 3;
0070         return QStringView(str).left(part) + QLatin1String("...");
0071     } else {
0072         return str;
0073     }
0074 }
0075 
0076 QStringList KStringHandler::perlSplit(const QStringView sep, const QStringView str, int max)
0077 {
0078     const bool ignoreMax = max == 0;
0079 
0080     const int sepLength = sep.size();
0081 
0082     QStringList list;
0083     int searchStart = 0;
0084     int sepIndex = str.indexOf(sep, searchStart);
0085 
0086     while (sepIndex != -1 && (ignoreMax || list.count() < max - 1)) {
0087         const auto chunk = str.mid(searchStart, sepIndex - searchStart);
0088         if (!chunk.isEmpty()) {
0089             list.append(chunk.toString());
0090         }
0091 
0092         searchStart = sepIndex + sepLength;
0093         sepIndex = str.indexOf(sep, searchStart);
0094     }
0095 
0096     const auto lastChunk = str.mid(searchStart, str.length() - searchStart);
0097     if (!lastChunk.isEmpty()) {
0098         list.append(lastChunk.toString());
0099     }
0100 
0101     return list;
0102 }
0103 
0104 QStringList KStringHandler::perlSplit(const QString &sep, const QString &s, int max)
0105 {
0106     return perlSplit(QStringView(sep), QStringView(s), max);
0107 }
0108 
0109 QStringList KStringHandler::perlSplit(const QChar &sep, const QString &str, int max)
0110 {
0111     return perlSplit(QStringView(&sep, 1), QStringView(str), max);
0112 }
0113 
0114 QStringList KStringHandler::perlSplit(const QRegularExpression &sep, const QString &str, int max)
0115 {
0116     // nothing to split
0117     if (str.isEmpty()) {
0118         return QStringList();
0119     }
0120 
0121     const bool ignoreMax = max == 0;
0122 
0123     QStringList list;
0124 
0125     int start = 0;
0126 
0127     const QStringView strView(str);
0128 
0129     QRegularExpression separator(sep);
0130     separator.setPatternOptions(QRegularExpression::UseUnicodePropertiesOption);
0131 
0132     QRegularExpressionMatchIterator iter = separator.globalMatch(strView);
0133     QRegularExpressionMatch match;
0134     while (iter.hasNext() && (ignoreMax || list.count() < max - 1)) {
0135         match = iter.next();
0136         const QStringView chunk = strView.mid(start, match.capturedStart() - start);
0137         if (!chunk.isEmpty()) {
0138             list.append(chunk.toString());
0139         }
0140 
0141         start = match.capturedEnd();
0142     }
0143 
0144     // catch the remainder
0145     const QStringView lastChunk = strView.mid(start, strView.size() - start);
0146     if (!lastChunk.isEmpty()) {
0147         list.append(lastChunk.toString());
0148     }
0149 
0150     return list;
0151 }
0152 
0153 QString KStringHandler::tagUrls(const QString &text)
0154 {
0155     QString richText(text);
0156 
0157     static const QRegularExpression urlEx(QStringLiteral(R"((www\.(?!\.)|(fish|ftp|http|https)://[\d\w./,:_~?=&;#@\-+%$()]+))"),
0158                                           QRegularExpression::UseUnicodePropertiesOption);
0159     // The reference \1 is going to be replaced by the matched url
0160     richText.replace(urlEx, QStringLiteral("<a href=\"\\1\">\\1</a>"));
0161     return richText;
0162 }
0163 
0164 QString KStringHandler::obscure(const QString &str)
0165 {
0166     QString result;
0167     for (const QChar ch : str) {
0168         // yes, no typo. can't encode ' ' or '!' because
0169         // they're the unicode BOM. stupid scrambling. stupid.
0170         const ushort uc = ch.unicode();
0171         result += (uc <= 0x21) ? ch : QChar(0x1001F - uc);
0172     }
0173 
0174     return result;
0175 }
0176 
0177 static inline bool containsSpaces(const QString &text)
0178 {
0179     for (int i = 0; i < text.length(); i++) {
0180         const QChar c = text[i];
0181         if (c.isSpace()) {
0182             return true;
0183         }
0184     }
0185     return false;
0186 }
0187 
0188 QString KStringHandler::preProcessWrap(const QString &text)
0189 {
0190     const QChar zwsp(0x200b);
0191 
0192     QString result;
0193     result.reserve(text.length());
0194 
0195     const bool containsSpaces = ::containsSpaces(text);
0196 
0197     for (int i = 0; i < text.length(); i++) {
0198         const QChar c = text[i];
0199 
0200         const bool openingParens = (c == QLatin1Char('(') || c == QLatin1Char('{') || c == QLatin1Char('['));
0201         const bool singleQuote = (c == QLatin1Char('\''));
0202         const bool closingParens = (c == QLatin1Char(')') || c == QLatin1Char('}') || c == QLatin1Char(']'));
0203         const bool breakAfter = (closingParens || c.isPunct() || c.isSymbol());
0204         const bool isLastChar = i == (text.length() - 1);
0205         const bool isLower = c.isLower();
0206         const bool nextIsUpper = !isLastChar && text[i + 1].isUpper(); // false by default
0207         const bool nextIsSpace = isLastChar || text[i + 1].isSpace(); // true by default
0208         const bool prevIsSpace = (i == 0 || text[i - 1].isSpace() || result[result.length() - 1] == zwsp);
0209 
0210         // Provide a breaking opportunity before opening parenthesis
0211         if (openingParens && !prevIsSpace) {
0212             result += zwsp;
0213         }
0214 
0215         // Provide a word joiner before the single quote
0216         if (singleQuote && !prevIsSpace) {
0217             result += QChar(0x2060);
0218         }
0219 
0220         result += c;
0221 
0222         // Provide a breaking opportunity between camelCase and PascalCase sub-words;
0223         // but if source string contains whitespaces, then it should be sufficiently wrappable on its own
0224         const bool isCamelCase = !containsSpaces && isLower && nextIsUpper;
0225 
0226         if (isCamelCase || (breakAfter && !openingParens && !nextIsSpace && !singleQuote)) {
0227             result += zwsp;
0228         }
0229     }
0230 
0231     return result;
0232 }
0233 
0234 int KStringHandler::logicalLength(const QString &text)
0235 {
0236     int length = 0;
0237     const auto chrs = text.toUcs4();
0238     for (const auto chr : chrs) {
0239         const auto script = QChar::script(chr);
0240         /* clang-format off */
0241         if (script == QChar::Script_Han
0242             || script == QChar::Script_Hangul
0243             || script == QChar::Script_Hiragana
0244             || script == QChar::Script_Katakana
0245             || script == QChar::Script_Yi
0246             || QChar::isHighSurrogate(chr)) { /* clang-format on */
0247             length += 2;
0248         } else {
0249             length += 1;
0250         }
0251     }
0252     return length;
0253 }