File indexing completed on 2024-05-12 05:10:17
0001 /*************************************************************************** 0002 Copyright (C) 2003-2009 Robby Stephenson <robby@periapsis.org> 0003 ***************************************************************************/ 0004 0005 /*************************************************************************** 0006 * * 0007 * This program is free software; you can redistribute it and/or * 0008 * modify it under the terms of the GNU General Public License as * 0009 * published by the Free Software Foundation; either version 2 of * 0010 * the License or (at your option) version 3 or any later version * 0011 * accepted by the membership of KDE e.V. (or its successor approved * 0012 * by the membership of KDE e.V.), which shall act as a proxy * 0013 * defined in Section 14 of version 3 of the license. * 0014 * * 0015 * This program is distributed in the hope that it will be useful, * 0016 * but WITHOUT ANY WARRANTY; without even the implied warranty of * 0017 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * 0018 * GNU General Public License for more details. * 0019 * * 0020 * You should have received a copy of the GNU General Public License * 0021 * along with this program. If not, see <http://www.gnu.org/licenses/>. * 0022 * * 0023 ***************************************************************************/ 0024 0025 #include "string_utils.h" 0026 #include "../fieldformat.h" 0027 0028 #include <KCharsets> 0029 #include <KLocalizedString> 0030 #include <KRandom> 0031 0032 #include <QRegularExpression> 0033 #include <QTextCodec> 0034 #include <QVariant> 0035 #include <QCache> 0036 #include <QRandomGenerator> 0037 0038 namespace { 0039 static const int STRING_STORE_SIZE = 4999; // too big, too small? 0040 0041 class StringIterator { 0042 QString::const_iterator pos, e; 0043 public: 0044 explicit StringIterator(QStringView string) : pos(string.begin()), e(string.end()) {} 0045 inline bool hasNext() const { return pos < e; } 0046 inline uint next() { 0047 Q_ASSERT(hasNext()); 0048 const QChar uc = *pos++; 0049 if(uc.isSurrogate()) { 0050 if(uc.isHighSurrogate() && pos < e && pos->isLowSurrogate()) 0051 return QChar::surrogateToUcs4(uc, *pos++); 0052 return QChar::ReplacementCharacter; 0053 } 0054 return uc.unicode(); 0055 } 0056 }; 0057 } 0058 0059 QString Tellico::decodeHTML(const QByteArray& data_) { 0060 return decodeHTML(fromHtmlData(data_)); 0061 } 0062 0063 QString Tellico::decodeHTML(const QString& text) { 0064 return KCharsets::resolveEntities(text); 0065 } 0066 0067 QString Tellico::uid(int l, bool prefix) { 0068 QString uid; 0069 if(prefix) { 0070 uid = QStringLiteral("Tellico"); 0071 } 0072 uid.append(KRandom::randomString(qMax(l - uid.length(), 0))); 0073 return uid; 0074 } 0075 0076 uint Tellico::toUInt(const QString& s, bool* ok) { 0077 if(s.isEmpty()) { 0078 if(ok) { 0079 *ok = false; 0080 } 0081 return 0; 0082 } 0083 0084 int idx = 0; 0085 while(idx < s.length() && s[idx].isDigit()) { 0086 ++idx; 0087 } 0088 if(idx == 0) { 0089 if(ok) { 0090 *ok = false; 0091 } 0092 return 0; 0093 } 0094 return s.leftRef(idx).toUInt(ok); 0095 } 0096 0097 QString Tellico::i18nReplace(QString text) { 0098 // Because QDomDocument sticks in random newlines, go ahead and grab them too 0099 static QRegularExpression rx(QStringLiteral("(?:\\n+ *)*<i18n>(.*?)</i18n>(?: *\\n+)*")); 0100 QRegularExpressionMatch match = rx.match(text); 0101 while(match.hasMatch()) { 0102 // KDE bug 254863, be sure to escape just in case of spurious & entities 0103 text.replace(match.capturedStart(), 0104 match.capturedLength(), 0105 i18n(match.captured(1).toUtf8().constData()).toHtmlEscaped()); 0106 match = rx.match(text, match.capturedStart()+1); 0107 } 0108 return text; 0109 } 0110 0111 int Tellico::stringHash(const QString& str) { 0112 uint h = 0; 0113 uint g = 0; 0114 for(int i = 0; i < str.length(); ++i) { 0115 h = (h << 4) + str.unicode()[i].cell(); 0116 if((g = h & 0xf0000000)) { 0117 h ^= g >> 24; 0118 } 0119 h &= ~g; 0120 } 0121 0122 const int index = h; 0123 return index < 0 ? -index : index; 0124 } 0125 0126 QString Tellico::shareString(const QString& str) { 0127 static QString stringStore[STRING_STORE_SIZE]; 0128 0129 const int hash = stringHash(str) % STRING_STORE_SIZE; 0130 if(stringStore[hash] != str) { 0131 stringStore[hash] = str; 0132 } 0133 return stringStore[hash]; 0134 } 0135 0136 QString Tellico::minutes(int seconds) { 0137 int min = seconds / 60; 0138 seconds = seconds % 60; 0139 return QString::number(min) + QLatin1Char(':') + QString::number(seconds).rightJustified(2, QLatin1Char('0')); 0140 } 0141 0142 QString Tellico::fromHtmlData(const QByteArray& data_, const char* codecName) { 0143 QTextCodec* codec = codecName ? QTextCodec::codecForHtml(data_, QTextCodec::codecForName(codecName)) 0144 : QTextCodec::codecForHtml(data_); 0145 return codec->toUnicode(data_); 0146 } 0147 0148 QString Tellico::removeAccents(const QString& value_) { 0149 static QCache<QString, QString> stringCache(STRING_STORE_SIZE); 0150 if(stringCache.contains(value_)) { 0151 return *stringCache.object(value_); 0152 } 0153 static QRegularExpression rx; 0154 if(rx.pattern().isEmpty()) { 0155 QString pattern(QStringLiteral("(?:")); 0156 for(int i = 0x0300; i <= 0x036F; ++i) { 0157 pattern += QChar(i) + QLatin1Char('|'); 0158 } 0159 pattern.chop(1); 0160 pattern += QLatin1Char(')'); 0161 rx.setPattern(pattern); 0162 rx.optimize(); 0163 } 0164 // remove accents from table "Combining Diacritical Marks" 0165 const QString value2 = value_.normalized(QString::NormalizationForm_D).remove(rx); 0166 stringCache.insert(value_, new QString(value2)); 0167 return value2; 0168 } 0169 0170 QByteArray Tellico::obfuscate(const QString& string) { 0171 QByteArray b; 0172 b.reserve(string.length() * 2); 0173 for(int p = 0; p < string.length(); p++) { 0174 char c = QRandomGenerator::global()->generate() % 255; 0175 b.prepend(c ^ string.at(p).unicode()); 0176 b.prepend(c); 0177 } 0178 return b.toHex(); 0179 } 0180 0181 QString Tellico::reverseObfuscate(const QByteArray& bytes) { 0182 if(bytes.length() % 2 != 0 || bytes.isEmpty()) { 0183 return QString(); 0184 } 0185 const QByteArray b = QByteArray::fromHex(bytes); 0186 QString result; 0187 result.reserve(b.length() / 2); 0188 for(int p = b.length()-1; p >= 0; p -= 2) { 0189 result.append(QLatin1Char(b.at(p-1) ^ b.at(p))); 0190 } 0191 return result; 0192 } 0193 0194 QString Tellico::removeControlCodes(const QString& string) { 0195 QString result; 0196 result.reserve(string.size()); 0197 StringIterator it(string); 0198 while(it.hasNext()) { 0199 const auto c = it.next(); 0200 // legal control codes in XML 1.0 are U+0009, U+000A, U+000D 0201 // https://www.w3.org/TR/xml/#charsets 0202 if(c > 0x1F || c == 0x9 || c == 0xA || c == 0xD) { 0203 if(c < 0xd800) result += QChar(c); 0204 else result += QString::fromUcs4(&c, 1); 0205 } 0206 } 0207 return result; 0208 }