File indexing completed on 2024-05-19 05:05:44
0001 /*************************************************************************** 0002 * SPDX-License-Identifier: GPL-2.0-or-later 0003 * * 0004 * SPDX-FileCopyrightText: 2004-2022 Thomas Fischer <fischer@unix-ag.uni-kl.de> 0005 * * 0006 * This program is free software; you can redistribute it and/or modify * 0007 * it under the terms of the GNU General Public License as published by * 0008 * the Free Software Foundation; either version 2 of the License, or * 0009 * (at your option) any later version. * 0010 * * 0011 * This program is distributed in the hope that it will be useful, * 0012 * but WITHOUT ANY WARRANTY; without even the implied warranty of * 0013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * 0014 * GNU General Public License for more details. * 0015 * * 0016 * You should have received a copy of the GNU General Public License * 0017 * along with this program; if not, see <https://www.gnu.org/licenses/>. * 0018 ***************************************************************************/ 0019 0020 #include "faviconlocator.h" 0021 0022 #include <QNetworkRequest> 0023 #include <QNetworkReply> 0024 #include <QRegularExpression> 0025 #include <QStandardPaths> 0026 #include <QDir> 0027 #include <QTimer> 0028 0029 #include "internalnetworkaccessmanager.h" 0030 #include "logging_networking.h" 0031 0032 static int earliest(const QString &haystack, const QSet<QString> &needles, const int haystackFrom = 0) { 0033 int result = INT_MAX; 0034 for (const QString &needle : needles) { 0035 const int p = haystack.indexOf(needle, haystackFrom); 0036 if (p >= 0 && p < result) 0037 result = p; 0038 } 0039 return result == INT_MAX ? -1 : result; 0040 } 0041 0042 FavIconLocator::FavIconLocator(const QUrl &webpageUrl, QObject *parent) 0043 : QObject(parent), favIcon(QIcon::fromTheme(QStringLiteral("applications-internet"))) 0044 { 0045 static const QRegularExpression invalidChars(QStringLiteral("[^-a-z0-9_]"), QRegularExpression::CaseInsensitiveOption); 0046 static const QString cacheDirectory = QStandardPaths::writableLocation(QStandardPaths::CacheLocation) + QStringLiteral("/favicons/"); 0047 QDir().mkpath(cacheDirectory); 0048 const QString fileNameStem = cacheDirectory + webpageUrl.toDisplayString().remove(invalidChars); 0049 0050 /// Try to locate icon in cache first before actually querying the webpage 0051 static const QStringList fileNameExtensions {QStringLiteral(".png"), QStringLiteral(".ico")}; 0052 for (const QString &extension : fileNameExtensions) { 0053 const QString fileName = fileNameStem + extension; 0054 const QFileInfo fi(fileName); 0055 if (fi.exists(fileName)) { 0056 if (fi.lastModified().daysTo(QDateTime::currentDateTime()) > 90) { 0057 /// If icon is other than 90 days, delete it and fetch current one 0058 QFile::remove(fileName); 0059 } else { 0060 favIcon = QIcon(fileName); 0061 QTimer::singleShot(100, this, [this]() { 0062 #if QT_VERSION < QT_VERSION_CHECK(6, 5, 0) 0063 QMetaObject::invokeMethod(this, "gotIcon", Qt::DirectConnection, QGenericReturnArgument(), Q_ARG(QIcon, favIcon)); 0064 #else // QT_VERSION >= QT_VERSION_CHECK(6, 5, 0) 0065 QMetaObject::invokeMethod(this, "gotIcon", Qt::DirectConnection, QMetaMethodReturnArgument(), Q_ARG(QIcon, favIcon)); 0066 #endif 0067 }); 0068 return; 0069 } 0070 } 0071 } 0072 0073 QNetworkRequest request(webpageUrl); 0074 request.setAttribute(QNetworkRequest::RedirectPolicyAttribute, QNetworkRequest::NoLessSafeRedirectPolicy); 0075 QNetworkReply *reply = InternalNetworkAccessManager::instance().get(request); 0076 connect(reply, &QNetworkReply::finished, parent, [this, reply, fileNameStem, webpageUrl]() { 0077 QUrl favIconUrl; 0078 0079 if (reply->error() == QNetworkReply::NoError) { 0080 /// Assume that favicon information is within the first 4K of HTML code 0081 const QString htmlCode = QString::fromUtf8(reply->readAll()).left(4096); 0082 /// Some ugly but hopefully fast/flexible/robust HTML code parsing 0083 int p1 = -1; 0084 while ((p1 = htmlCode.indexOf(QStringLiteral("<link "), p1 + 5)) > 0) { 0085 const int p2 = htmlCode.indexOf(QLatin1Char('>'), p1 + 5); 0086 if (p2 > p1) { 0087 const int p3 = htmlCode.indexOf(QStringLiteral("rel=\""), p1 + 5); 0088 if (p3 > p1 && p3 < p2) { 0089 const int p4 = htmlCode.indexOf(QLatin1Char('"'), p3 + 5); 0090 if (p4 > p3 && p4 < p2) { 0091 const QString relValue = htmlCode.mid(p3 + 5, p4 - p3 - 5); 0092 if (relValue == QStringLiteral("icon") || relValue == QStringLiteral("shortcut icon")) { 0093 const int p5 = earliest(htmlCode, {QStringLiteral("href=\""), QStringLiteral("href=")}, p1 + 5); 0094 if (p5 > p1 && p5 < p2) { 0095 const int p6 = earliest(htmlCode, {QStringLiteral("\""), QStringLiteral(" "), QStringLiteral(">")}, p5 + 6); 0096 if (p6 > p5 + 5 && p6 <= p2) { 0097 QString hrefValue = htmlCode.mid(p5 + 6, p6 - p5 - 6).replace(QLatin1Char('&'), QLatin1String("&")).replace(QLatin1Char('>'), QLatin1String(">")).replace(QLatin1Char('<'), QLatin1String("<")); 0098 /// Do some resolving in case favicon URL in HTML code is relative 0099 favIconUrl = reply->url().resolved(QUrl(hrefValue)); 0100 if (favIconUrl.isValid()) { 0101 qCDebug(LOG_KBIBTEX_NETWORKING) << "Found favicon URL" << favIconUrl.toDisplayString() << "in HTML code of webpage" << webpageUrl.toDisplayString(); 0102 break; 0103 } else 0104 favIconUrl.clear(); 0105 } 0106 } 0107 } 0108 } 0109 } 0110 } 0111 } 0112 } 0113 0114 if (!favIconUrl.isValid()) { 0115 favIconUrl = reply->url(); 0116 favIconUrl.setPath(QStringLiteral("/favicon.ico")); 0117 qCInfo(LOG_KBIBTEX_NETWORKING) << "Could not locate favicon in HTML code for webpage" << webpageUrl.toDisplayString() << ", falling back to" << favIconUrl.toDisplayString(); 0118 } 0119 0120 QNetworkRequest request(favIconUrl); 0121 request.setAttribute(QNetworkRequest::RedirectPolicyAttribute, QNetworkRequest::NoLessSafeRedirectPolicy); 0122 QNetworkReply *reply = InternalNetworkAccessManager::instance().get(request); 0123 connect(reply, &QNetworkReply::finished, this, [this, reply, fileNameStem, favIconUrl, webpageUrl]() { 0124 if (reply->error() == QNetworkReply::NoError) { 0125 const QByteArray iconData = reply->readAll(); 0126 if (iconData.size() > 10) { 0127 QString extension; 0128 if (iconData[1] == 'P' && iconData[2] == 'N' && iconData[3] == 'G') { 0129 /// PNG files have string "PNG" at second to fourth byte 0130 extension = QStringLiteral(".png"); 0131 } else if (iconData[0] == static_cast<char>(0x00) && iconData[1] == static_cast<char>(0x00) && iconData[2] == static_cast<char>(0x01) && iconData[3] == static_cast<char>(0x00)) { 0132 /// Microsoft Icon have first two bytes always 0x0000, 0133 /// third and fourth byte is 0x0001 (for .ico) 0134 extension = QStringLiteral(".ico"); 0135 } else if (iconData[0] == '<') { 0136 /// HTML or XML code 0137 const QString htmlCode = QString::fromUtf8(iconData); 0138 qCWarning(LOG_KBIBTEX_NETWORKING) << "Received XML or HTML data from " << InternalNetworkAccessManager::removeApiKey(reply->url()).toDisplayString() << ": " << htmlCode.left(128); 0139 } else { 0140 qCWarning(LOG_KBIBTEX_NETWORKING) << "Favicon is of unknown format: " << InternalNetworkAccessManager::removeApiKey(reply->url()).toDisplayString(); 0141 } 0142 0143 if (!extension.isEmpty()) { 0144 const QString filename = fileNameStem + extension; 0145 0146 QFile iconFile(filename); 0147 if (iconFile.open(QFile::WriteOnly)) { 0148 iconFile.write(iconData); 0149 iconFile.close(); 0150 qCInfo(LOG_KBIBTEX_NETWORKING) << "Got icon from URL" << favIconUrl.toDisplayString() << "for webpage" << webpageUrl.toDisplayString() << "stored in" << filename; 0151 favIcon = QIcon(filename); 0152 } else { 0153 qCWarning(LOG_KBIBTEX_NETWORKING) << "Could not save icon data from URL" << InternalNetworkAccessManager::removeApiKey(reply->url()).toDisplayString() << "to file" << filename; 0154 } 0155 } 0156 } else { 0157 /// Unlikely that an icon's data is less than 10 bytes, 0158 /// must be an error. 0159 qCWarning(LOG_KBIBTEX_NETWORKING) << "Received invalid icon data from " << InternalNetworkAccessManager::removeApiKey(reply->url()).toDisplayString(); 0160 } 0161 } else 0162 qCWarning(LOG_KBIBTEX_NETWORKING) << "Could not download icon from URL " << InternalNetworkAccessManager::removeApiKey(reply->url()).toDisplayString() << ": " << reply->errorString(); 0163 0164 #if QT_VERSION < QT_VERSION_CHECK(6, 5, 0) 0165 QMetaObject::invokeMethod(this, "gotIcon", Qt::DirectConnection, QGenericReturnArgument(), Q_ARG(QIcon, favIcon)); 0166 #else // QT_VERSION >= QT_VERSION_CHECK(6, 5, 0) 0167 QMetaObject::invokeMethod(this, "gotIcon", Qt::DirectConnection, QMetaMethodReturnArgument(), Q_ARG(QIcon, favIcon)); 0168 #endif 0169 }); 0170 }); 0171 } 0172 0173 QIcon FavIconLocator::icon() const 0174 { 0175 return favIcon; 0176 }