File indexing completed on 2024-03-24 04:03:51

0001 /*
0002     This file is part of the syndication library
0003     SPDX-FileCopyrightText: 2006 Frank Osterfeld <osterfeld@kde.org>
0004 
0005     SPDX-License-Identifier: LGPL-2.0-or-later
0006 */
0007 
0008 #include "tools.h"
0009 #include "personimpl.h"
0010 
0011 #include <KCharsets>
0012 
0013 #include <QByteArray>
0014 #include <QCryptographicHash>
0015 #include <QDateTime>
0016 #include <QRegularExpression>
0017 #include <QTimeZone>
0018 
0019 #include <ctime>
0020 
0021 namespace Syndication
0022 {
0023 QCryptographicHash md5Machine(QCryptographicHash::Md5);
0024 
0025 unsigned int calcHash(const QString &str)
0026 {
0027     return calcHash(str.toUtf8());
0028 }
0029 
0030 unsigned int calcHash(const QByteArray &array)
0031 {
0032     if (array.isEmpty()) {
0033         return 0;
0034     } else {
0035         const char *s = array.data();
0036         unsigned int hash = 5381;
0037         int c;
0038         while ((c = *s++)) {
0039             hash = ((hash << 5) + hash) + c; // hash*33 + c
0040         }
0041         return hash;
0042     }
0043 }
0044 
0045 static uint toTimeT(QDateTime &kdt)
0046 {
0047     if (kdt.isValid()) {
0048         // work around unspecified timezones/date-only timestamps by setting the time to 12:00 UTC
0049         if (kdt.time().isNull() //
0050             || (kdt.time() == QTime(0, 0) && kdt.timeSpec() == Qt::LocalTime)) {
0051             kdt.setTimeZone(QTimeZone::utc());
0052             kdt.setTime(QTime(12, 0));
0053         }
0054         return kdt.toMSecsSinceEpoch() / 1000;
0055     } else {
0056         return 0;
0057     }
0058 }
0059 
0060 uint parseISODate(const QString &str)
0061 {
0062     QDateTime kdt = QDateTime::fromString(str, Qt::ISODate);
0063     return toTimeT(kdt);
0064 }
0065 
0066 uint parseRFCDate(const QString &str)
0067 {
0068     QDateTime kdt = QDateTime::fromString(str, Qt::RFC2822Date);
0069     // Qt5 used to ignore invalid textual offsets but Qt6 rejects those, so handle that explictly
0070     if (!kdt.isValid() && str.endsWith(QLatin1String(" GMT"))) {
0071         kdt = QDateTime::fromString(QStringView(str).chopped(4), Qt::RFC2822Date);
0072     }
0073     return toTimeT(kdt);
0074 }
0075 
0076 uint parseDate(const QString &str, DateFormat hint)
0077 {
0078     if (str.isEmpty()) {
0079         return 0;
0080     }
0081 
0082     if (hint == RFCDate) {
0083         time_t t = parseRFCDate(str);
0084         return t != 0 ? t : parseISODate(str);
0085     } else {
0086         time_t t = parseISODate(str);
0087         return t != 0 ? t : parseRFCDate(str);
0088     }
0089 }
0090 
0091 QString dateTimeToString(uint date)
0092 {
0093     if (date == 0) {
0094         return QString();
0095     }
0096 
0097     const QString format = QStringLiteral("ddd MMM d HH:mm:ss yyyy");
0098     QDateTime dt;
0099     dt.setMSecsSinceEpoch(quint64(date) * 1000);
0100     return dt.toUTC().toString(format);
0101 }
0102 
0103 QString calcMD5Sum(const QString &str)
0104 {
0105     md5Machine.reset();
0106     md5Machine.addData(str.toUtf8());
0107     return QLatin1String(md5Machine.result().toHex().constData());
0108 }
0109 
0110 QString resolveEntities(const QString &str)
0111 {
0112     return KCharsets::resolveEntities(str);
0113 }
0114 
0115 QString escapeSpecialCharacters(const QString &strp)
0116 {
0117     QString str(strp);
0118     str.replace(QLatin1Char('&'), QLatin1String("&amp;"));
0119     str.replace(QLatin1Char('\"'), QLatin1String("&quot;"));
0120     str.replace(QLatin1Char('<'), QLatin1String("&lt;"));
0121     str.replace(QLatin1Char('>'), QLatin1String("&gt;"));
0122     str.replace(QLatin1Char('\''), QLatin1String("&apos;"));
0123     return str.trimmed();
0124 }
0125 
0126 QString convertNewlines(const QString &strp)
0127 {
0128     QString str(strp);
0129     str.replace(QLatin1Char('\n'), QLatin1String("<br/>"));
0130     return str;
0131 }
0132 
0133 QString plainTextToHtml(const QString &plainText)
0134 {
0135     QString str(plainText);
0136     str.replace(QLatin1Char('&'), QLatin1String("&amp;"));
0137     str.replace(QLatin1Char('\"'), QLatin1String("&quot;"));
0138     str.replace(QLatin1Char('<'), QLatin1String("&lt;"));
0139     // str.replace(QLatin1Char('>'), QLatin1String("&gt;"));
0140     str.replace(QLatin1Char('\n'), QLatin1String("<br/>"));
0141     return str.trimmed();
0142 }
0143 
0144 QString htmlToPlainText(const QString &html)
0145 {
0146     QString str(html);
0147     // TODO: preserve some formatting, such as line breaks
0148     str.remove(QRegularExpression(QStringLiteral("<[^>]*?>"))); // remove tags
0149     str = resolveEntities(str);
0150     return str.trimmed();
0151 }
0152 
0153 static QRegularExpression tagRegExp()
0154 {
0155     static QRegularExpression exp(QStringLiteral("<\\w+.*/?>"));
0156     return exp;
0157 }
0158 
0159 bool stringContainsMarkup(const QString &str)
0160 {
0161     // check for entities
0162     if (str.contains(QRegularExpression(QStringLiteral("&[a-zA-Z0-9#]+;")))) {
0163         return true;
0164     }
0165 
0166     const int ltc = str.count(QLatin1Char('<'));
0167     if (ltc == 0) {
0168         return false;
0169     }
0170 
0171     return str.contains(tagRegExp());
0172 }
0173 
0174 bool isHtml(const QString &str)
0175 {
0176     // check for entities
0177     if (str.contains(QRegularExpression(QStringLiteral("&[a-zA-Z0-9#]+;")))) {
0178         return true;
0179     }
0180 
0181     const int ltc = str.count(QLatin1Char('<'));
0182     if (ltc == 0) {
0183         return false;
0184     }
0185 
0186     return str.contains(tagRegExp());
0187 }
0188 
0189 QString normalize(const QString &str)
0190 {
0191     return isHtml(str) ? str.trimmed() : plainTextToHtml(str);
0192 }
0193 
0194 QString normalize(const QString &strp, bool isCDATA, bool containsMarkup)
0195 {
0196     if (containsMarkup) {
0197         return strp.trimmed();
0198     } else {
0199         if (isCDATA) {
0200             QString str = resolveEntities(strp);
0201             str = escapeSpecialCharacters(str);
0202             str = convertNewlines(str);
0203             str = str.trimmed();
0204             return str;
0205         } else {
0206             QString str = escapeSpecialCharacters(strp);
0207             str = str.trimmed();
0208             return str;
0209         }
0210     }
0211 }
0212 
0213 PersonPtr personFromString(const QString &strp)
0214 {
0215     QString str = strp.trimmed();
0216     if (str.isEmpty()) {
0217         return PersonPtr(new PersonImpl());
0218     }
0219 
0220     str = resolveEntities(str);
0221     QString name;
0222     QString uri;
0223     QString email;
0224 
0225     // look for something looking like a mail address ("foo@bar.com",
0226     // "<foo@bar.com>") and extract it
0227 
0228     const QRegularExpression remail(QStringLiteral("<?([^@\\s<]+@[^>\\s]+)>?")); // FIXME: user "proper" regexp,
0229     // search kmail source for it
0230 
0231     QRegularExpressionMatch match = remail.match(str);
0232     if (match.hasMatch()) {
0233         const QString all = match.captured(0);
0234         email = match.captured(1);
0235         str.remove(all); // remove mail address
0236     }
0237 
0238     // replace "mailto", "(", ")" (to be extended)
0239     email.remove(QStringLiteral("mailto:"));
0240     email.remove(QRegularExpression(QStringLiteral("[()]")));
0241 
0242     // simplify the rest and use it as name
0243 
0244     name = str.simplified();
0245 
0246     // after removing the email, str might have
0247     // the format "(Foo M. Bar)". We cut off
0248     // parentheses if there are any. However, if
0249     // str is of the format "Foo M. Bar (President)",
0250     // we should not cut anything.
0251 
0252     QRegularExpression rename(QRegularExpression::anchoredPattern(QStringLiteral("^\\(([^)]*)\\)")));
0253     match = rename.match(name);
0254     if (match.hasMatch()) {
0255         name = match.captured(1);
0256     }
0257 
0258     name = name.isEmpty() ? QString() : name;
0259     email = email.isEmpty() ? QString() : email;
0260     uri = uri.isEmpty() ? QString() : uri;
0261 
0262     if (name.isEmpty() && email.isEmpty() && uri.isEmpty()) {
0263         return PersonPtr(new PersonImpl());
0264     }
0265 
0266     return PersonPtr(new PersonImpl(name, uri, email));
0267 }
0268 
0269 ElementType::ElementType(const QString &localnamep, const QString &nsp)
0270     : ns(nsp)
0271     , localname(localnamep)
0272 {
0273 }
0274 
0275 bool ElementType::operator==(const ElementType &other) const
0276 {
0277     return localname == other.localname && ns == other.ns;
0278 }
0279 
0280 } // namespace Syndication