Warning, file /frameworks/syndication/src/tools.cpp was not indexed or was modified since last indexation (in which case cross-reference links may be missing, inaccurate or erroneous).
0001 /* 0002 This file is part of the syndication library 0003 SPDX-FileCopyrightText: 2006 Frank Osterfeld <osterfeld@kde.org> 0004 0005 SPDX-License-Identifier: LGPL-2.0-or-later 0006 */ 0007 0008 #include "tools.h" 0009 #include "personimpl.h" 0010 0011 #include <KCharsets> 0012 0013 #include <QByteArray> 0014 #include <QCryptographicHash> 0015 #include <QDateTime> 0016 #include <QRegularExpression> 0017 0018 #include <ctime> 0019 0020 namespace Syndication 0021 { 0022 QCryptographicHash md5Machine(QCryptographicHash::Md5); 0023 0024 unsigned int calcHash(const QString &str) 0025 { 0026 return calcHash(str.toUtf8()); 0027 } 0028 0029 unsigned int calcHash(const QByteArray &array) 0030 { 0031 if (array.isEmpty()) { 0032 return 0; 0033 } else { 0034 const char *s = array.data(); 0035 unsigned int hash = 5381; 0036 int c; 0037 while ((c = *s++)) { 0038 hash = ((hash << 5) + hash) + c; // hash*33 + c 0039 } 0040 return hash; 0041 } 0042 } 0043 0044 static uint toTimeT(QDateTime &kdt) 0045 { 0046 if (kdt.isValid()) { 0047 // work around unspecified timezones/date-only timestamps by setting the time to 12:00 UTC 0048 if (kdt.time().isNull() // 0049 || (kdt.time() == QTime(0, 0) && kdt.timeSpec() == Qt::LocalTime)) { 0050 kdt.setTimeSpec(Qt::UTC); 0051 kdt.setTime(QTime(12, 0)); 0052 } 0053 return kdt.toMSecsSinceEpoch() / 1000; 0054 } else { 0055 return 0; 0056 } 0057 } 0058 0059 uint parseISODate(const QString &str) 0060 { 0061 QDateTime kdt = QDateTime::fromString(str, Qt::ISODate); 0062 return toTimeT(kdt); 0063 } 0064 0065 uint parseRFCDate(const QString &str) 0066 { 0067 QDateTime kdt = QDateTime::fromString(str, Qt::RFC2822Date); 0068 #if QT_VERSION >= QT_VERSION_CHECK(6, 0, 0) 0069 // Qt5 used to ignore invalid textual offsets but Qt6 rejects those, so handle that explictly 0070 if (!kdt.isValid() && str.endsWith(QLatin1String(" GMT"))) { 0071 kdt = QDateTime::fromString(QStringView(str).chopped(4), Qt::RFC2822Date); 0072 } 0073 #endif 0074 return toTimeT(kdt); 0075 } 0076 0077 uint parseDate(const QString &str, DateFormat hint) 0078 { 0079 if (str.isEmpty()) { 0080 return 0; 0081 } 0082 0083 if (hint == RFCDate) { 0084 time_t t = parseRFCDate(str); 0085 return t != 0 ? t : parseISODate(str); 0086 } else { 0087 time_t t = parseISODate(str); 0088 return t != 0 ? t : parseRFCDate(str); 0089 } 0090 } 0091 0092 QString dateTimeToString(uint date) 0093 { 0094 if (date == 0) { 0095 return QString(); 0096 } 0097 0098 const QString format = QStringLiteral("ddd MMM d HH:mm:ss yyyy"); 0099 QDateTime dt; 0100 dt.setMSecsSinceEpoch(quint64(date) * 1000); 0101 return dt.toUTC().toString(format); 0102 } 0103 0104 QString calcMD5Sum(const QString &str) 0105 { 0106 md5Machine.reset(); 0107 md5Machine.addData(str.toUtf8()); 0108 return QLatin1String(md5Machine.result().toHex().constData()); 0109 } 0110 0111 QString resolveEntities(const QString &str) 0112 { 0113 return KCharsets::resolveEntities(str); 0114 } 0115 0116 QString escapeSpecialCharacters(const QString &strp) 0117 { 0118 QString str(strp); 0119 str.replace(QLatin1Char('&'), QLatin1String("&")); 0120 str.replace(QLatin1Char('\"'), QLatin1String(""")); 0121 str.replace(QLatin1Char('<'), QLatin1String("<")); 0122 str.replace(QLatin1Char('>'), QLatin1String(">")); 0123 str.replace(QLatin1Char('\''), QLatin1String("'")); 0124 return str.trimmed(); 0125 } 0126 0127 QString convertNewlines(const QString &strp) 0128 { 0129 QString str(strp); 0130 str.replace(QLatin1Char('\n'), QLatin1String("<br/>")); 0131 return str; 0132 } 0133 0134 QString plainTextToHtml(const QString &plainText) 0135 { 0136 QString str(plainText); 0137 str.replace(QLatin1Char('&'), QLatin1String("&")); 0138 str.replace(QLatin1Char('\"'), QLatin1String(""")); 0139 str.replace(QLatin1Char('<'), QLatin1String("<")); 0140 // str.replace(QLatin1Char('>'), QLatin1String(">")); 0141 str.replace(QLatin1Char('\n'), QLatin1String("<br/>")); 0142 return str.trimmed(); 0143 } 0144 0145 QString htmlToPlainText(const QString &html) 0146 { 0147 QString str(html); 0148 // TODO: preserve some formatting, such as line breaks 0149 str.remove(QRegularExpression(QStringLiteral("<[^>]*?>"))); // remove tags 0150 str = resolveEntities(str); 0151 return str.trimmed(); 0152 } 0153 0154 static QRegularExpression tagRegExp() 0155 { 0156 static QRegularExpression exp(QStringLiteral("<\\w+.*/?>")); 0157 return exp; 0158 } 0159 0160 bool stringContainsMarkup(const QString &str) 0161 { 0162 // check for entities 0163 if (str.contains(QRegularExpression(QStringLiteral("&[a-zA-Z0-9#]+;")))) { 0164 return true; 0165 } 0166 0167 const int ltc = str.count(QLatin1Char('<')); 0168 if (ltc == 0) { 0169 return false; 0170 } 0171 0172 return str.contains(tagRegExp()); 0173 } 0174 0175 bool isHtml(const QString &str) 0176 { 0177 // check for entities 0178 if (str.contains(QRegularExpression(QStringLiteral("&[a-zA-Z0-9#]+;")))) { 0179 return true; 0180 } 0181 0182 const int ltc = str.count(QLatin1Char('<')); 0183 if (ltc == 0) { 0184 return false; 0185 } 0186 0187 return str.contains(tagRegExp()); 0188 } 0189 0190 QString normalize(const QString &str) 0191 { 0192 return isHtml(str) ? str.trimmed() : plainTextToHtml(str); 0193 } 0194 0195 QString normalize(const QString &strp, bool isCDATA, bool containsMarkup) 0196 { 0197 if (containsMarkup) { 0198 return strp.trimmed(); 0199 } else { 0200 if (isCDATA) { 0201 QString str = resolveEntities(strp); 0202 str = escapeSpecialCharacters(str); 0203 str = convertNewlines(str); 0204 str = str.trimmed(); 0205 return str; 0206 } else { 0207 QString str = escapeSpecialCharacters(strp); 0208 str = str.trimmed(); 0209 return str; 0210 } 0211 } 0212 } 0213 0214 PersonPtr personFromString(const QString &strp) 0215 { 0216 QString str = strp.trimmed(); 0217 if (str.isEmpty()) { 0218 return PersonPtr(new PersonImpl()); 0219 } 0220 0221 str = resolveEntities(str); 0222 QString name; 0223 QString uri; 0224 QString email; 0225 0226 // look for something looking like a mail address ("foo@bar.com", 0227 // "<foo@bar.com>") and extract it 0228 0229 const QRegularExpression remail(QStringLiteral("<?([^@\\s<]+@[^>\\s]+)>?")); // FIXME: user "proper" regexp, 0230 // search kmail source for it 0231 0232 QRegularExpressionMatch match = remail.match(str); 0233 if (match.hasMatch()) { 0234 const QString all = match.captured(0); 0235 email = match.captured(1); 0236 str.remove(all); // remove mail address 0237 } 0238 0239 // replace "mailto", "(", ")" (to be extended) 0240 email.remove(QStringLiteral("mailto:")); 0241 email.remove(QRegularExpression(QStringLiteral("[()]"))); 0242 0243 // simplify the rest and use it as name 0244 0245 name = str.simplified(); 0246 0247 // after removing the email, str might have 0248 // the format "(Foo M. Bar)". We cut off 0249 // parentheses if there are any. However, if 0250 // str is of the format "Foo M. Bar (President)", 0251 // we should not cut anything. 0252 0253 QRegularExpression rename(QRegularExpression::anchoredPattern(QStringLiteral("^\\(([^)]*)\\)"))); 0254 match = rename.match(name); 0255 if (match.hasMatch()) { 0256 name = match.captured(1); 0257 } 0258 0259 name = name.isEmpty() ? QString() : name; 0260 email = email.isEmpty() ? QString() : email; 0261 uri = uri.isEmpty() ? QString() : uri; 0262 0263 if (name.isEmpty() && email.isEmpty() && uri.isEmpty()) { 0264 return PersonPtr(new PersonImpl()); 0265 } 0266 0267 return PersonPtr(new PersonImpl(name, uri, email)); 0268 } 0269 0270 ElementType::ElementType(const QString &localnamep, const QString &nsp) 0271 : ns(nsp) 0272 , localname(localnamep) 0273 { 0274 } 0275 0276 bool ElementType::operator==(const ElementType &other) const 0277 { 0278 return localname == other.localname && ns == other.ns; 0279 } 0280 0281 } // namespace Syndication