File indexing completed on 2024-03-24 04:03:51
0001 /* 0002 This file is part of the syndication library 0003 SPDX-FileCopyrightText: 2006 Frank Osterfeld <osterfeld@kde.org> 0004 0005 SPDX-License-Identifier: LGPL-2.0-or-later 0006 */ 0007 0008 #include "tools.h" 0009 #include "personimpl.h" 0010 0011 #include <KCharsets> 0012 0013 #include <QByteArray> 0014 #include <QCryptographicHash> 0015 #include <QDateTime> 0016 #include <QRegularExpression> 0017 #include <QTimeZone> 0018 0019 #include <ctime> 0020 0021 namespace Syndication 0022 { 0023 QCryptographicHash md5Machine(QCryptographicHash::Md5); 0024 0025 unsigned int calcHash(const QString &str) 0026 { 0027 return calcHash(str.toUtf8()); 0028 } 0029 0030 unsigned int calcHash(const QByteArray &array) 0031 { 0032 if (array.isEmpty()) { 0033 return 0; 0034 } else { 0035 const char *s = array.data(); 0036 unsigned int hash = 5381; 0037 int c; 0038 while ((c = *s++)) { 0039 hash = ((hash << 5) + hash) + c; // hash*33 + c 0040 } 0041 return hash; 0042 } 0043 } 0044 0045 static uint toTimeT(QDateTime &kdt) 0046 { 0047 if (kdt.isValid()) { 0048 // work around unspecified timezones/date-only timestamps by setting the time to 12:00 UTC 0049 if (kdt.time().isNull() // 0050 || (kdt.time() == QTime(0, 0) && kdt.timeSpec() == Qt::LocalTime)) { 0051 kdt.setTimeZone(QTimeZone::utc()); 0052 kdt.setTime(QTime(12, 0)); 0053 } 0054 return kdt.toMSecsSinceEpoch() / 1000; 0055 } else { 0056 return 0; 0057 } 0058 } 0059 0060 uint parseISODate(const QString &str) 0061 { 0062 QDateTime kdt = QDateTime::fromString(str, Qt::ISODate); 0063 return toTimeT(kdt); 0064 } 0065 0066 uint parseRFCDate(const QString &str) 0067 { 0068 QDateTime kdt = QDateTime::fromString(str, Qt::RFC2822Date); 0069 // Qt5 used to ignore invalid textual offsets but Qt6 rejects those, so handle that explictly 0070 if (!kdt.isValid() && str.endsWith(QLatin1String(" GMT"))) { 0071 kdt = QDateTime::fromString(QStringView(str).chopped(4), Qt::RFC2822Date); 0072 } 0073 return toTimeT(kdt); 0074 } 0075 0076 uint parseDate(const QString &str, DateFormat hint) 0077 { 0078 if (str.isEmpty()) { 0079 return 0; 0080 } 0081 0082 if (hint == RFCDate) { 0083 time_t t = parseRFCDate(str); 0084 return t != 0 ? t : parseISODate(str); 0085 } else { 0086 time_t t = parseISODate(str); 0087 return t != 0 ? t : parseRFCDate(str); 0088 } 0089 } 0090 0091 QString dateTimeToString(uint date) 0092 { 0093 if (date == 0) { 0094 return QString(); 0095 } 0096 0097 const QString format = QStringLiteral("ddd MMM d HH:mm:ss yyyy"); 0098 QDateTime dt; 0099 dt.setMSecsSinceEpoch(quint64(date) * 1000); 0100 return dt.toUTC().toString(format); 0101 } 0102 0103 QString calcMD5Sum(const QString &str) 0104 { 0105 md5Machine.reset(); 0106 md5Machine.addData(str.toUtf8()); 0107 return QLatin1String(md5Machine.result().toHex().constData()); 0108 } 0109 0110 QString resolveEntities(const QString &str) 0111 { 0112 return KCharsets::resolveEntities(str); 0113 } 0114 0115 QString escapeSpecialCharacters(const QString &strp) 0116 { 0117 QString str(strp); 0118 str.replace(QLatin1Char('&'), QLatin1String("&")); 0119 str.replace(QLatin1Char('\"'), QLatin1String(""")); 0120 str.replace(QLatin1Char('<'), QLatin1String("<")); 0121 str.replace(QLatin1Char('>'), QLatin1String(">")); 0122 str.replace(QLatin1Char('\''), QLatin1String("'")); 0123 return str.trimmed(); 0124 } 0125 0126 QString convertNewlines(const QString &strp) 0127 { 0128 QString str(strp); 0129 str.replace(QLatin1Char('\n'), QLatin1String("<br/>")); 0130 return str; 0131 } 0132 0133 QString plainTextToHtml(const QString &plainText) 0134 { 0135 QString str(plainText); 0136 str.replace(QLatin1Char('&'), QLatin1String("&")); 0137 str.replace(QLatin1Char('\"'), QLatin1String(""")); 0138 str.replace(QLatin1Char('<'), QLatin1String("<")); 0139 // str.replace(QLatin1Char('>'), QLatin1String(">")); 0140 str.replace(QLatin1Char('\n'), QLatin1String("<br/>")); 0141 return str.trimmed(); 0142 } 0143 0144 QString htmlToPlainText(const QString &html) 0145 { 0146 QString str(html); 0147 // TODO: preserve some formatting, such as line breaks 0148 str.remove(QRegularExpression(QStringLiteral("<[^>]*?>"))); // remove tags 0149 str = resolveEntities(str); 0150 return str.trimmed(); 0151 } 0152 0153 static QRegularExpression tagRegExp() 0154 { 0155 static QRegularExpression exp(QStringLiteral("<\\w+.*/?>")); 0156 return exp; 0157 } 0158 0159 bool stringContainsMarkup(const QString &str) 0160 { 0161 // check for entities 0162 if (str.contains(QRegularExpression(QStringLiteral("&[a-zA-Z0-9#]+;")))) { 0163 return true; 0164 } 0165 0166 const int ltc = str.count(QLatin1Char('<')); 0167 if (ltc == 0) { 0168 return false; 0169 } 0170 0171 return str.contains(tagRegExp()); 0172 } 0173 0174 bool isHtml(const QString &str) 0175 { 0176 // check for entities 0177 if (str.contains(QRegularExpression(QStringLiteral("&[a-zA-Z0-9#]+;")))) { 0178 return true; 0179 } 0180 0181 const int ltc = str.count(QLatin1Char('<')); 0182 if (ltc == 0) { 0183 return false; 0184 } 0185 0186 return str.contains(tagRegExp()); 0187 } 0188 0189 QString normalize(const QString &str) 0190 { 0191 return isHtml(str) ? str.trimmed() : plainTextToHtml(str); 0192 } 0193 0194 QString normalize(const QString &strp, bool isCDATA, bool containsMarkup) 0195 { 0196 if (containsMarkup) { 0197 return strp.trimmed(); 0198 } else { 0199 if (isCDATA) { 0200 QString str = resolveEntities(strp); 0201 str = escapeSpecialCharacters(str); 0202 str = convertNewlines(str); 0203 str = str.trimmed(); 0204 return str; 0205 } else { 0206 QString str = escapeSpecialCharacters(strp); 0207 str = str.trimmed(); 0208 return str; 0209 } 0210 } 0211 } 0212 0213 PersonPtr personFromString(const QString &strp) 0214 { 0215 QString str = strp.trimmed(); 0216 if (str.isEmpty()) { 0217 return PersonPtr(new PersonImpl()); 0218 } 0219 0220 str = resolveEntities(str); 0221 QString name; 0222 QString uri; 0223 QString email; 0224 0225 // look for something looking like a mail address ("foo@bar.com", 0226 // "<foo@bar.com>") and extract it 0227 0228 const QRegularExpression remail(QStringLiteral("<?([^@\\s<]+@[^>\\s]+)>?")); // FIXME: user "proper" regexp, 0229 // search kmail source for it 0230 0231 QRegularExpressionMatch match = remail.match(str); 0232 if (match.hasMatch()) { 0233 const QString all = match.captured(0); 0234 email = match.captured(1); 0235 str.remove(all); // remove mail address 0236 } 0237 0238 // replace "mailto", "(", ")" (to be extended) 0239 email.remove(QStringLiteral("mailto:")); 0240 email.remove(QRegularExpression(QStringLiteral("[()]"))); 0241 0242 // simplify the rest and use it as name 0243 0244 name = str.simplified(); 0245 0246 // after removing the email, str might have 0247 // the format "(Foo M. Bar)". We cut off 0248 // parentheses if there are any. However, if 0249 // str is of the format "Foo M. Bar (President)", 0250 // we should not cut anything. 0251 0252 QRegularExpression rename(QRegularExpression::anchoredPattern(QStringLiteral("^\\(([^)]*)\\)"))); 0253 match = rename.match(name); 0254 if (match.hasMatch()) { 0255 name = match.captured(1); 0256 } 0257 0258 name = name.isEmpty() ? QString() : name; 0259 email = email.isEmpty() ? QString() : email; 0260 uri = uri.isEmpty() ? QString() : uri; 0261 0262 if (name.isEmpty() && email.isEmpty() && uri.isEmpty()) { 0263 return PersonPtr(new PersonImpl()); 0264 } 0265 0266 return PersonPtr(new PersonImpl(name, uri, email)); 0267 } 0268 0269 ElementType::ElementType(const QString &localnamep, const QString &nsp) 0270 : ns(nsp) 0271 , localname(localnamep) 0272 { 0273 } 0274 0275 bool ElementType::operator==(const ElementType &other) const 0276 { 0277 return localname == other.localname && ns == other.ns; 0278 } 0279 0280 } // namespace Syndication