File indexing completed on 2024-05-12 05:10:17

0001 /***************************************************************************
0002     Copyright (C) 2003-2009 Robby Stephenson <robby@periapsis.org>
0003  ***************************************************************************/
0004 
0005 /***************************************************************************
0006  *                                                                         *
0007  *   This program is free software; you can redistribute it and/or         *
0008  *   modify it under the terms of the GNU General Public License as        *
0009  *   published by the Free Software Foundation; either version 2 of        *
0010  *   the License or (at your option) version 3 or any later version        *
0011  *   accepted by the membership of KDE e.V. (or its successor approved     *
0012  *   by the membership of KDE e.V.), which shall act as a proxy            *
0013  *   defined in Section 14 of version 3 of the license.                    *
0014  *                                                                         *
0015  *   This program is distributed in the hope that it will be useful,       *
0016  *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
0017  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
0018  *   GNU General Public License for more details.                          *
0019  *                                                                         *
0020  *   You should have received a copy of the GNU General Public License     *
0021  *   along with this program.  If not, see <http://www.gnu.org/licenses/>. *
0022  *                                                                         *
0023  ***************************************************************************/
0024 
0025 #include "string_utils.h"
0026 #include "../fieldformat.h"
0027 
0028 #include <KCharsets>
0029 #include <KLocalizedString>
0030 #include <KRandom>
0031 
0032 #include <QRegularExpression>
0033 #include <QTextCodec>
0034 #include <QVariant>
0035 #include <QCache>
0036 #include <QRandomGenerator>
0037 
0038 namespace {
0039   static const int STRING_STORE_SIZE = 4999; // too big, too small?
0040 
0041   class StringIterator {
0042     QString::const_iterator pos, e;
0043   public:
0044     explicit StringIterator(QStringView string) : pos(string.begin()), e(string.end()) {}
0045     inline bool hasNext() const { return pos < e; }
0046     inline uint next() {
0047       Q_ASSERT(hasNext());
0048       const QChar uc = *pos++;
0049       if(uc.isSurrogate()) {
0050         if(uc.isHighSurrogate() && pos < e && pos->isLowSurrogate())
0051           return QChar::surrogateToUcs4(uc, *pos++);
0052         return QChar::ReplacementCharacter;
0053       }
0054       return uc.unicode();
0055     }
0056   };
0057 }
0058 
0059 QString Tellico::decodeHTML(const QByteArray& data_) {
0060   return decodeHTML(fromHtmlData(data_));
0061 }
0062 
0063 QString Tellico::decodeHTML(const QString& text) {
0064   return KCharsets::resolveEntities(text);
0065 }
0066 
0067 QString Tellico::uid(int l, bool prefix) {
0068   QString uid;
0069   if(prefix) {
0070     uid = QStringLiteral("Tellico");
0071   }
0072   uid.append(KRandom::randomString(qMax(l - uid.length(), 0)));
0073   return uid;
0074 }
0075 
0076 uint Tellico::toUInt(const QString& s, bool* ok) {
0077   if(s.isEmpty()) {
0078     if(ok) {
0079       *ok = false;
0080     }
0081     return 0;
0082   }
0083 
0084   int idx = 0;
0085   while(idx < s.length() && s[idx].isDigit()) {
0086     ++idx;
0087   }
0088   if(idx == 0) {
0089     if(ok) {
0090       *ok = false;
0091     }
0092     return 0;
0093   }
0094   return s.leftRef(idx).toUInt(ok);
0095 }
0096 
0097 QString Tellico::i18nReplace(QString text) {
0098   // Because QDomDocument sticks in random newlines, go ahead and grab them too
0099   static QRegularExpression rx(QStringLiteral("(?:\\n+ *)*<i18n>(.*?)</i18n>(?: *\\n+)*"));
0100   QRegularExpressionMatch match = rx.match(text);
0101   while(match.hasMatch()) {
0102     // KDE bug 254863, be sure to escape just in case of spurious & entities
0103     text.replace(match.capturedStart(),
0104                  match.capturedLength(),
0105                  i18n(match.captured(1).toUtf8().constData()).toHtmlEscaped());
0106     match = rx.match(text, match.capturedStart()+1);
0107   }
0108   return text;
0109 }
0110 
0111 int Tellico::stringHash(const QString& str) {
0112   uint h = 0;
0113   uint g = 0;
0114   for(int i = 0; i < str.length(); ++i) {
0115     h = (h << 4) + str.unicode()[i].cell();
0116     if((g = h & 0xf0000000)) {
0117       h ^= g >> 24;
0118     }
0119     h &= ~g;
0120   }
0121 
0122   const int index = h;
0123   return index < 0 ? -index : index;
0124 }
0125 
0126 QString Tellico::shareString(const QString& str) {
0127   static QString stringStore[STRING_STORE_SIZE];
0128 
0129   const int hash = stringHash(str) % STRING_STORE_SIZE;
0130   if(stringStore[hash] != str) {
0131     stringStore[hash] = str;
0132   }
0133   return stringStore[hash];
0134 }
0135 
0136 QString Tellico::minutes(int seconds) {
0137   int min = seconds / 60;
0138   seconds = seconds % 60;
0139   return QString::number(min) + QLatin1Char(':') + QString::number(seconds).rightJustified(2, QLatin1Char('0'));
0140 }
0141 
0142 QString Tellico::fromHtmlData(const QByteArray& data_, const char* codecName) {
0143   QTextCodec* codec = codecName ? QTextCodec::codecForHtml(data_, QTextCodec::codecForName(codecName))
0144                                 : QTextCodec::codecForHtml(data_);
0145   return codec->toUnicode(data_);
0146 }
0147 
0148 QString Tellico::removeAccents(const QString& value_) {
0149   static QCache<QString, QString> stringCache(STRING_STORE_SIZE);
0150   if(stringCache.contains(value_)) {
0151     return *stringCache.object(value_);
0152   }
0153   static QRegularExpression rx;
0154   if(rx.pattern().isEmpty()) {
0155     QString pattern(QStringLiteral("(?:"));
0156     for(int i = 0x0300; i <= 0x036F; ++i) {
0157       pattern += QChar(i) + QLatin1Char('|');
0158     }
0159     pattern.chop(1);
0160     pattern += QLatin1Char(')');
0161     rx.setPattern(pattern);
0162     rx.optimize();
0163   }
0164   // remove accents from table "Combining Diacritical Marks"
0165   const QString value2 = value_.normalized(QString::NormalizationForm_D).remove(rx);
0166   stringCache.insert(value_, new QString(value2));
0167   return value2;
0168 }
0169 
0170 QByteArray Tellico::obfuscate(const QString& string) {
0171   QByteArray b;
0172   b.reserve(string.length() * 2);
0173   for(int p = 0; p < string.length(); p++) {
0174     char c = QRandomGenerator::global()->generate() % 255;
0175     b.prepend(c ^ string.at(p).unicode());
0176     b.prepend(c);
0177   }
0178   return b.toHex();
0179 }
0180 
0181 QString Tellico::reverseObfuscate(const QByteArray& bytes) {
0182   if(bytes.length() % 2 != 0 || bytes.isEmpty()) {
0183     return QString();
0184   }
0185   const QByteArray b = QByteArray::fromHex(bytes);
0186   QString result;
0187   result.reserve(b.length() / 2);
0188   for(int p = b.length()-1; p >= 0; p -= 2) {
0189     result.append(QLatin1Char(b.at(p-1) ^ b.at(p)));
0190   }
0191   return result;
0192 }
0193 
0194 QString Tellico::removeControlCodes(const QString& string) {
0195   QString result;
0196   result.reserve(string.size());
0197   StringIterator it(string);
0198   while(it.hasNext()) {
0199     const auto c = it.next();
0200     // legal control codes in XML 1.0 are U+0009, U+000A, U+000D
0201     // https://www.w3.org/TR/xml/#charsets
0202     if(c > 0x1F || c == 0x9 || c == 0xA || c == 0xD) {
0203       if(c < 0xd800) result += QChar(c);
0204       else result += QString::fromUcs4(&c, 1);
0205     }
0206   }
0207   return result;
0208 }