File indexing completed on 2024-05-19 05:05:44

0001 /***************************************************************************
0002  *   SPDX-License-Identifier: GPL-2.0-or-later
0003  *                                                                         *
0004  *   SPDX-FileCopyrightText: 2004-2022 Thomas Fischer <fischer@unix-ag.uni-kl.de>
0005  *                                                                         *
0006  *   This program is free software; you can redistribute it and/or modify  *
0007  *   it under the terms of the GNU General Public License as published by  *
0008  *   the Free Software Foundation; either version 2 of the License, or     *
0009  *   (at your option) any later version.                                   *
0010  *                                                                         *
0011  *   This program is distributed in the hope that it will be useful,       *
0012  *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
0013  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
0014  *   GNU General Public License for more details.                          *
0015  *                                                                         *
0016  *   You should have received a copy of the GNU General Public License     *
0017  *   along with this program; if not, see <https://www.gnu.org/licenses/>. *
0018  ***************************************************************************/
0019 
0020 #include "faviconlocator.h"
0021 
0022 #include <QNetworkRequest>
0023 #include <QNetworkReply>
0024 #include <QRegularExpression>
0025 #include <QStandardPaths>
0026 #include <QDir>
0027 #include <QTimer>
0028 
0029 #include "internalnetworkaccessmanager.h"
0030 #include "logging_networking.h"
0031 
0032 static int earliest(const QString &haystack, const QSet<QString> &needles, const int haystackFrom = 0) {
0033     int result = INT_MAX;
0034     for (const QString &needle : needles) {
0035         const int p = haystack.indexOf(needle, haystackFrom);
0036         if (p >= 0 && p < result)
0037             result = p;
0038     }
0039     return result == INT_MAX ? -1 : result;
0040 }
0041 
0042 FavIconLocator::FavIconLocator(const QUrl &webpageUrl, QObject *parent)
0043         : QObject(parent), favIcon(QIcon::fromTheme(QStringLiteral("applications-internet")))
0044 {
0045     static const QRegularExpression invalidChars(QStringLiteral("[^-a-z0-9_]"), QRegularExpression::CaseInsensitiveOption);
0046     static const QString cacheDirectory = QStandardPaths::writableLocation(QStandardPaths::CacheLocation) + QStringLiteral("/favicons/");
0047     QDir().mkpath(cacheDirectory);
0048     const QString fileNameStem = cacheDirectory + webpageUrl.toDisplayString().remove(invalidChars);
0049 
0050     /// Try to locate icon in cache first before actually querying the webpage
0051     static const QStringList fileNameExtensions {QStringLiteral(".png"), QStringLiteral(".ico")};
0052     for (const QString &extension : fileNameExtensions) {
0053         const QString fileName = fileNameStem + extension;
0054         const QFileInfo fi(fileName);
0055         if (fi.exists(fileName)) {
0056             if (fi.lastModified().daysTo(QDateTime::currentDateTime()) > 90) {
0057                 /// If icon is other than 90 days, delete it and fetch current one
0058                 QFile::remove(fileName);
0059             } else {
0060                 favIcon = QIcon(fileName);
0061                 QTimer::singleShot(100, this, [this]() {
0062 #if QT_VERSION < QT_VERSION_CHECK(6, 5, 0)
0063                     QMetaObject::invokeMethod(this, "gotIcon", Qt::DirectConnection, QGenericReturnArgument(), Q_ARG(QIcon, favIcon));
0064 #else // QT_VERSION >= QT_VERSION_CHECK(6, 5, 0)
0065                     QMetaObject::invokeMethod(this, "gotIcon", Qt::DirectConnection, QMetaMethodReturnArgument(), Q_ARG(QIcon, favIcon));
0066 #endif
0067                 });
0068                 return;
0069             }
0070         }
0071     }
0072 
0073     QNetworkRequest request(webpageUrl);
0074     request.setAttribute(QNetworkRequest::RedirectPolicyAttribute, QNetworkRequest::NoLessSafeRedirectPolicy);
0075     QNetworkReply *reply = InternalNetworkAccessManager::instance().get(request);
0076     connect(reply, &QNetworkReply::finished, parent, [this, reply, fileNameStem, webpageUrl]() {
0077         QUrl favIconUrl;
0078 
0079         if (reply->error() == QNetworkReply::NoError) {
0080             /// Assume that favicon information is within the first 4K of HTML code
0081             const QString htmlCode = QString::fromUtf8(reply->readAll()).left(4096);
0082             /// Some ugly but hopefully fast/flexible/robust HTML code parsing
0083             int p1 = -1;
0084             while ((p1 = htmlCode.indexOf(QStringLiteral("<link "), p1 + 5)) > 0) {
0085                 const int p2 = htmlCode.indexOf(QLatin1Char('>'), p1 + 5);
0086                 if (p2 > p1) {
0087                     const int p3 = htmlCode.indexOf(QStringLiteral("rel=\""), p1 + 5);
0088                     if (p3 > p1 && p3 < p2) {
0089                         const int p4 = htmlCode.indexOf(QLatin1Char('"'), p3 + 5);
0090                         if (p4 > p3 && p4 < p2) {
0091                             const QString relValue = htmlCode.mid(p3 + 5, p4 - p3 - 5);
0092                             if (relValue == QStringLiteral("icon") || relValue == QStringLiteral("shortcut icon")) {
0093                                 const int p5 = earliest(htmlCode, {QStringLiteral("href=\""), QStringLiteral("href=")}, p1 + 5);
0094                                 if (p5 > p1 && p5 < p2) {
0095                                     const int p6 = earliest(htmlCode, {QStringLiteral("\""), QStringLiteral(" "), QStringLiteral(">")}, p5 + 6);
0096                                     if (p6 > p5 + 5 && p6 <= p2) {
0097                                         QString hrefValue = htmlCode.mid(p5 + 6, p6 - p5 - 6).replace(QLatin1Char('&'), QLatin1String("&amp;")).replace(QLatin1Char('>'), QLatin1String("&gt;")).replace(QLatin1Char('<'), QLatin1String("&lt;"));
0098                                         /// Do some resolving in case favicon URL in HTML code is relative
0099                                         favIconUrl = reply->url().resolved(QUrl(hrefValue));
0100                                         if (favIconUrl.isValid()) {
0101                                             qCDebug(LOG_KBIBTEX_NETWORKING) << "Found favicon URL" << favIconUrl.toDisplayString() << "in HTML code of webpage" << webpageUrl.toDisplayString();
0102                                             break;
0103                                         } else
0104                                             favIconUrl.clear();
0105                                     }
0106                                 }
0107                             }
0108                         }
0109                     }
0110                 }
0111             }
0112         }
0113 
0114         if (!favIconUrl.isValid()) {
0115             favIconUrl = reply->url();
0116             favIconUrl.setPath(QStringLiteral("/favicon.ico"));
0117             qCInfo(LOG_KBIBTEX_NETWORKING) << "Could not locate favicon in HTML code for webpage" << webpageUrl.toDisplayString() << ", falling back to" << favIconUrl.toDisplayString();
0118         }
0119 
0120         QNetworkRequest request(favIconUrl);
0121         request.setAttribute(QNetworkRequest::RedirectPolicyAttribute, QNetworkRequest::NoLessSafeRedirectPolicy);
0122         QNetworkReply *reply = InternalNetworkAccessManager::instance().get(request);
0123         connect(reply, &QNetworkReply::finished, this, [this, reply, fileNameStem, favIconUrl, webpageUrl]() {
0124             if (reply->error() == QNetworkReply::NoError) {
0125                 const QByteArray iconData = reply->readAll();
0126                 if (iconData.size() > 10) {
0127                     QString extension;
0128                     if (iconData[1] == 'P' && iconData[2] == 'N' && iconData[3] == 'G') {
0129                         /// PNG files have string "PNG" at second to fourth byte
0130                         extension = QStringLiteral(".png");
0131                     } else if (iconData[0] == static_cast<char>(0x00) && iconData[1] == static_cast<char>(0x00) && iconData[2] == static_cast<char>(0x01) && iconData[3] == static_cast<char>(0x00)) {
0132                         /// Microsoft Icon have first two bytes always 0x0000,
0133                         /// third and fourth byte is 0x0001 (for .ico)
0134                         extension = QStringLiteral(".ico");
0135                     } else if (iconData[0] == '<') {
0136                         /// HTML or XML code
0137                         const QString htmlCode = QString::fromUtf8(iconData);
0138                         qCWarning(LOG_KBIBTEX_NETWORKING) << "Received XML or HTML data from " << InternalNetworkAccessManager::removeApiKey(reply->url()).toDisplayString() << ": " << htmlCode.left(128);
0139                     } else {
0140                         qCWarning(LOG_KBIBTEX_NETWORKING) << "Favicon is of unknown format: " << InternalNetworkAccessManager::removeApiKey(reply->url()).toDisplayString();
0141                     }
0142 
0143                     if (!extension.isEmpty()) {
0144                         const QString filename = fileNameStem + extension;
0145 
0146                         QFile iconFile(filename);
0147                         if (iconFile.open(QFile::WriteOnly)) {
0148                             iconFile.write(iconData);
0149                             iconFile.close();
0150                             qCInfo(LOG_KBIBTEX_NETWORKING) << "Got icon from URL" << favIconUrl.toDisplayString() << "for webpage" << webpageUrl.toDisplayString() << "stored in" << filename;
0151                             favIcon = QIcon(filename);
0152                         } else {
0153                             qCWarning(LOG_KBIBTEX_NETWORKING) << "Could not save icon data from URL" << InternalNetworkAccessManager::removeApiKey(reply->url()).toDisplayString() << "to file" << filename;
0154                         }
0155                     }
0156                 } else {
0157                     /// Unlikely that an icon's data is less than 10 bytes,
0158                     /// must be an error.
0159                     qCWarning(LOG_KBIBTEX_NETWORKING) << "Received invalid icon data from " << InternalNetworkAccessManager::removeApiKey(reply->url()).toDisplayString();
0160                 }
0161             } else
0162                 qCWarning(LOG_KBIBTEX_NETWORKING) << "Could not download icon from URL " << InternalNetworkAccessManager::removeApiKey(reply->url()).toDisplayString() << ": " << reply->errorString();
0163 
0164 #if QT_VERSION < QT_VERSION_CHECK(6, 5, 0)
0165             QMetaObject::invokeMethod(this, "gotIcon", Qt::DirectConnection, QGenericReturnArgument(), Q_ARG(QIcon, favIcon));
0166 #else // QT_VERSION >= QT_VERSION_CHECK(6, 5, 0)
0167             QMetaObject::invokeMethod(this, "gotIcon", Qt::DirectConnection, QMetaMethodReturnArgument(), Q_ARG(QIcon, favIcon));
0168 #endif
0169         });
0170     });
0171 }
0172 
0173 QIcon FavIconLocator::icon() const
0174 {
0175     return favIcon;
0176 }