File indexing completed on 2024-04-21 15:07:31

0001 /*
0002     This file is part of the syndication library
0003     SPDX-FileCopyrightText: 2006 Frank Osterfeld <osterfeld@kde.org>
0004 
0005     SPDX-License-Identifier: LGPL-2.0-or-later
0006 */
0007 
0008 #include "tools.h"
0009 #include "personimpl.h"
0010 
0011 #include <KCharsets>
0012 
0013 #include <QByteArray>
0014 #include <QCryptographicHash>
0015 #include <QDateTime>
0016 #include <QRegularExpression>
0017 
0018 #include <ctime>
0019 
0020 namespace Syndication
0021 {
0022 QCryptographicHash md5Machine(QCryptographicHash::Md5);
0023 
0024 unsigned int calcHash(const QString &str)
0025 {
0026     return calcHash(str.toUtf8());
0027 }
0028 
0029 unsigned int calcHash(const QByteArray &array)
0030 {
0031     if (array.isEmpty()) {
0032         return 0;
0033     } else {
0034         const char *s = array.data();
0035         unsigned int hash = 5381;
0036         int c;
0037         while ((c = *s++)) {
0038             hash = ((hash << 5) + hash) + c; // hash*33 + c
0039         }
0040         return hash;
0041     }
0042 }
0043 
0044 static uint toTimeT(QDateTime &kdt)
0045 {
0046     if (kdt.isValid()) {
0047         // work around unspecified timezones/date-only timestamps by setting the time to 12:00 UTC
0048         if (kdt.time().isNull() //
0049             || (kdt.time() == QTime(0, 0) && kdt.timeSpec() == Qt::LocalTime)) {
0050             kdt.setTimeSpec(Qt::UTC);
0051             kdt.setTime(QTime(12, 0));
0052         }
0053         return kdt.toMSecsSinceEpoch() / 1000;
0054     } else {
0055         return 0;
0056     }
0057 }
0058 
0059 uint parseISODate(const QString &str)
0060 {
0061     QDateTime kdt = QDateTime::fromString(str, Qt::ISODate);
0062     return toTimeT(kdt);
0063 }
0064 
0065 uint parseRFCDate(const QString &str)
0066 {
0067     QDateTime kdt = QDateTime::fromString(str, Qt::RFC2822Date);
0068 #if QT_VERSION >= QT_VERSION_CHECK(6, 0, 0)
0069     // Qt5 used to ignore invalid textual offsets but Qt6 rejects those, so handle that explictly
0070     if (!kdt.isValid() && str.endsWith(QLatin1String(" GMT"))) {
0071         kdt = QDateTime::fromString(QStringView(str).chopped(4), Qt::RFC2822Date);
0072     }
0073 #endif
0074     return toTimeT(kdt);
0075 }
0076 
0077 uint parseDate(const QString &str, DateFormat hint)
0078 {
0079     if (str.isEmpty()) {
0080         return 0;
0081     }
0082 
0083     if (hint == RFCDate) {
0084         time_t t = parseRFCDate(str);
0085         return t != 0 ? t : parseISODate(str);
0086     } else {
0087         time_t t = parseISODate(str);
0088         return t != 0 ? t : parseRFCDate(str);
0089     }
0090 }
0091 
0092 QString dateTimeToString(uint date)
0093 {
0094     if (date == 0) {
0095         return QString();
0096     }
0097 
0098     const QString format = QStringLiteral("ddd MMM d HH:mm:ss yyyy");
0099     QDateTime dt;
0100     dt.setMSecsSinceEpoch(quint64(date) * 1000);
0101     return dt.toUTC().toString(format);
0102 }
0103 
0104 QString calcMD5Sum(const QString &str)
0105 {
0106     md5Machine.reset();
0107     md5Machine.addData(str.toUtf8());
0108     return QLatin1String(md5Machine.result().toHex().constData());
0109 }
0110 
0111 QString resolveEntities(const QString &str)
0112 {
0113     return KCharsets::resolveEntities(str);
0114 }
0115 
0116 QString escapeSpecialCharacters(const QString &strp)
0117 {
0118     QString str(strp);
0119     str.replace(QLatin1Char('&'), QLatin1String("&amp;"));
0120     str.replace(QLatin1Char('\"'), QLatin1String("&quot;"));
0121     str.replace(QLatin1Char('<'), QLatin1String("&lt;"));
0122     str.replace(QLatin1Char('>'), QLatin1String("&gt;"));
0123     str.replace(QLatin1Char('\''), QLatin1String("&apos;"));
0124     return str.trimmed();
0125 }
0126 
0127 QString convertNewlines(const QString &strp)
0128 {
0129     QString str(strp);
0130     str.replace(QLatin1Char('\n'), QLatin1String("<br/>"));
0131     return str;
0132 }
0133 
0134 QString plainTextToHtml(const QString &plainText)
0135 {
0136     QString str(plainText);
0137     str.replace(QLatin1Char('&'), QLatin1String("&amp;"));
0138     str.replace(QLatin1Char('\"'), QLatin1String("&quot;"));
0139     str.replace(QLatin1Char('<'), QLatin1String("&lt;"));
0140     // str.replace(QLatin1Char('>'), QLatin1String("&gt;"));
0141     str.replace(QLatin1Char('\n'), QLatin1String("<br/>"));
0142     return str.trimmed();
0143 }
0144 
0145 QString htmlToPlainText(const QString &html)
0146 {
0147     QString str(html);
0148     // TODO: preserve some formatting, such as line breaks
0149     str.remove(QRegularExpression(QStringLiteral("<[^>]*?>"))); // remove tags
0150     str = resolveEntities(str);
0151     return str.trimmed();
0152 }
0153 
0154 static QRegularExpression tagRegExp()
0155 {
0156     static QRegularExpression exp(QStringLiteral("<\\w+.*/?>"));
0157     return exp;
0158 }
0159 
0160 bool stringContainsMarkup(const QString &str)
0161 {
0162     // check for entities
0163     if (str.contains(QRegularExpression(QStringLiteral("&[a-zA-Z0-9#]+;")))) {
0164         return true;
0165     }
0166 
0167     const int ltc = str.count(QLatin1Char('<'));
0168     if (ltc == 0) {
0169         return false;
0170     }
0171 
0172     return str.contains(tagRegExp());
0173 }
0174 
0175 bool isHtml(const QString &str)
0176 {
0177     // check for entities
0178     if (str.contains(QRegularExpression(QStringLiteral("&[a-zA-Z0-9#]+;")))) {
0179         return true;
0180     }
0181 
0182     const int ltc = str.count(QLatin1Char('<'));
0183     if (ltc == 0) {
0184         return false;
0185     }
0186 
0187     return str.contains(tagRegExp());
0188 }
0189 
0190 QString normalize(const QString &str)
0191 {
0192     return isHtml(str) ? str.trimmed() : plainTextToHtml(str);
0193 }
0194 
0195 QString normalize(const QString &strp, bool isCDATA, bool containsMarkup)
0196 {
0197     if (containsMarkup) {
0198         return strp.trimmed();
0199     } else {
0200         if (isCDATA) {
0201             QString str = resolveEntities(strp);
0202             str = escapeSpecialCharacters(str);
0203             str = convertNewlines(str);
0204             str = str.trimmed();
0205             return str;
0206         } else {
0207             QString str = escapeSpecialCharacters(strp);
0208             str = str.trimmed();
0209             return str;
0210         }
0211     }
0212 }
0213 
0214 PersonPtr personFromString(const QString &strp)
0215 {
0216     QString str = strp.trimmed();
0217     if (str.isEmpty()) {
0218         return PersonPtr(new PersonImpl());
0219     }
0220 
0221     str = resolveEntities(str);
0222     QString name;
0223     QString uri;
0224     QString email;
0225 
0226     // look for something looking like a mail address ("foo@bar.com",
0227     // "<foo@bar.com>") and extract it
0228 
0229     const QRegularExpression remail(QStringLiteral("<?([^@\\s<]+@[^>\\s]+)>?")); // FIXME: user "proper" regexp,
0230     // search kmail source for it
0231 
0232     QRegularExpressionMatch match = remail.match(str);
0233     if (match.hasMatch()) {
0234         const QString all = match.captured(0);
0235         email = match.captured(1);
0236         str.remove(all); // remove mail address
0237     }
0238 
0239     // replace "mailto", "(", ")" (to be extended)
0240     email.remove(QStringLiteral("mailto:"));
0241     email.remove(QRegularExpression(QStringLiteral("[()]")));
0242 
0243     // simplify the rest and use it as name
0244 
0245     name = str.simplified();
0246 
0247     // after removing the email, str might have
0248     // the format "(Foo M. Bar)". We cut off
0249     // parentheses if there are any. However, if
0250     // str is of the format "Foo M. Bar (President)",
0251     // we should not cut anything.
0252 
0253     QRegularExpression rename(QRegularExpression::anchoredPattern(QStringLiteral("^\\(([^)]*)\\)")));
0254     match = rename.match(name);
0255     if (match.hasMatch()) {
0256         name = match.captured(1);
0257     }
0258 
0259     name = name.isEmpty() ? QString() : name;
0260     email = email.isEmpty() ? QString() : email;
0261     uri = uri.isEmpty() ? QString() : uri;
0262 
0263     if (name.isEmpty() && email.isEmpty() && uri.isEmpty()) {
0264         return PersonPtr(new PersonImpl());
0265     }
0266 
0267     return PersonPtr(new PersonImpl(name, uri, email));
0268 }
0269 
0270 ElementType::ElementType(const QString &localnamep, const QString &nsp)
0271     : ns(nsp)
0272     , localname(localnamep)
0273 {
0274 }
0275 
0276 bool ElementType::operator==(const ElementType &other) const
0277 {
0278     return localname == other.localname && ns == other.ns;
0279 }
0280 
0281 } // namespace Syndication