File indexing completed on 2024-05-12 05:09:44
0001 /*************************************************************************** 0002 Copyright (C) 2023 Robby Stephenson <robby@periapsis.org> 0003 ***************************************************************************/ 0004 0005 /*************************************************************************** 0006 * * 0007 * This program is free software; you can redistribute it and/or * 0008 * modify it under the terms of the GNU General Public License as * 0009 * published by the Free Software Foundation; either version 2 of * 0010 * the License or (at your option) version 3 or any later version * 0011 * accepted by the membership of KDE e.V. (or its successor approved * 0012 * by the membership of KDE e.V.), which shall act as a proxy * 0013 * defined in Section 14 of version 3 of the license. * 0014 * * 0015 * This program is distributed in the hope that it will be useful, * 0016 * but WITHOUT ANY WARRANTY; without even the implied warranty of * 0017 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * 0018 * GNU General Public License for more details. * 0019 * * 0020 * You should have received a copy of the GNU General Public License * 0021 * along with this program. If not, see <http://www.gnu.org/licenses/>. * 0022 * * 0023 ***************************************************************************/ 0024 0025 #include "vgcollectfetcher.h" 0026 #include "../utils/guiproxy.h" 0027 #include "../utils/string_utils.h" 0028 #include "../collections/gamecollection.h" 0029 #include "../entry.h" 0030 #include "../core/filehandler.h" 0031 #include "../images/imagefactory.h" 0032 #include "../tellico_debug.h" 0033 0034 #include <KLocalizedString> 0035 #include <KConfig> 0036 #include <KIO/Job> 0037 #include <KIO/JobUiDelegate> 0038 #include <KJobWidgets/KJobWidgets> 0039 0040 #include <QRegularExpression> 0041 #include <QLabel> 0042 #include <QFile> 0043 #include <QTextStream> 0044 #include <QVBoxLayout> 0045 0046 namespace { 0047 static const char* VGCOLLECT_BASE_URL = "https://vgcollect.com/search/advanced"; 0048 } 0049 0050 using namespace Tellico; 0051 using Tellico::Fetch::VGCollectFetcher; 0052 0053 VGCollectFetcher::VGCollectFetcher(QObject* parent_) 0054 : Fetcher(parent_), m_started(false) { 0055 } 0056 0057 VGCollectFetcher::~VGCollectFetcher() { 0058 } 0059 0060 QString VGCollectFetcher::source() const { 0061 return m_name.isEmpty() ? defaultName() : m_name; 0062 } 0063 0064 bool VGCollectFetcher::canFetch(int type) const { 0065 return type == Data::Collection::Game; 0066 } 0067 0068 void VGCollectFetcher::readConfigHook(const KConfigGroup& config_) { 0069 Q_UNUSED(config_); 0070 } 0071 0072 void VGCollectFetcher::search() { 0073 m_started = true; 0074 m_matches.clear(); 0075 0076 QUrl u(QString::fromLatin1(VGCOLLECT_BASE_URL)); 0077 QString urlPath(QStringLiteral("/no-filter/%1/no-filter/0/ALL/ALL/ALL/ALL/no-filter/%2/%3")); 0078 0079 switch(request().key()) { 0080 case Keyword: 0081 { 0082 QString value = request().value(); 0083 QString yearStart, yearEnd; 0084 // pull out year, keep the regexp a little loose 0085 QRegularExpression yearRX(QStringLiteral("\\s*[12][0-9]{3}\\s*")); 0086 QRegularExpressionMatch match = yearRX.match(value); 0087 if(match.hasMatch()) { 0088 // fragile, but the form uses a year index 0089 yearStart = match.captured(0).trimmed() + QLatin1String("-01-01"); 0090 yearEnd = match.captured(0).trimmed() + QLatin1String("-12-31"); 0091 value = value.remove(yearRX); 0092 } else { 0093 yearStart = QStringLiteral("no-filter"); 0094 yearEnd = yearStart; 0095 } 0096 urlPath = urlPath.arg(value, yearStart, yearEnd); 0097 } 0098 break; 0099 0100 default: 0101 myWarning() << source() << "- key not recognized:" << request().key(); 0102 stop(); 0103 return; 0104 } 0105 u.setPath(u.path() + urlPath); 0106 // myDebug() << "url:" << u; 0107 0108 m_job = KIO::storedGet(u, KIO::NoReload, KIO::HideProgressInfo); 0109 m_job->addMetaData(QStringLiteral("referrer"), QString::fromLatin1("https://vgcollect.com/search")); 0110 KJobWidgets::setWindow(m_job, GUI::Proxy::widget()); 0111 connect(m_job.data(), &KJob::result, 0112 this, &VGCollectFetcher::slotComplete); 0113 } 0114 0115 void VGCollectFetcher::stop() { 0116 if(!m_started) { 0117 return; 0118 } 0119 0120 if(m_job) { 0121 m_job->kill(); 0122 m_job = nullptr; 0123 } 0124 m_started = false; 0125 emit signalDone(this); 0126 } 0127 0128 void VGCollectFetcher::slotComplete(KJob*) { 0129 if(m_job->error()) { 0130 m_job->uiDelegate()->showErrorMessage(); 0131 stop(); 0132 return; 0133 } 0134 0135 const QByteArray data = m_job->data(); 0136 if(data.isEmpty()) { 0137 myDebug() << "no data"; 0138 stop(); 0139 return; 0140 } 0141 0142 // since the fetch is done, don't worry about holding the job pointer 0143 m_job = nullptr; 0144 0145 const QString s = Tellico::decodeHTML(data); 0146 #if 0 0147 myWarning() << "Remove debug from vgcollectfetcher.cpp"; 0148 QFile f(QStringLiteral("/tmp/test.html")); 0149 if(f.open(QIODevice::WriteOnly)) { 0150 QTextStream t(&f); 0151 t.setCodec("UTF-8"); 0152 t << s; 0153 } 0154 f.close(); 0155 #endif 0156 0157 static const QRegularExpression rowRx(QStringLiteral("<div class=\"span10\">(.+?)<div class=\"item-notes\""), 0158 QRegularExpression::DotMatchesEverythingOption); 0159 static const QRegularExpression itemRx(QStringLiteral("<a href\\s*=\\s*\"(https://vgcollect.com/item/\\d+)\">(.+?)</a")); 0160 static const QRegularExpression platformRx(QStringLiteral("<a href=\"https://vgcollect.com/browse/[0-9a-z]+\">(.+?)</a")); 0161 static const QRegularExpression tagRx(QLatin1String("<.*?>")); 0162 0163 QRegularExpressionMatchIterator i = rowRx.globalMatch(s); 0164 while(i.hasNext()) { 0165 auto rowMatch = i.next(); 0166 auto itemMatch = itemRx.match(rowMatch.captured(0)); 0167 if(!itemMatch.hasMatch()) { 0168 continue; 0169 } 0170 auto u = itemMatch.captured(1); 0171 auto title = itemMatch.captured(2); 0172 QString platform; 0173 auto platformMatch = platformRx.match(rowMatch.captured(0)); 0174 if(platformMatch.hasMatch()) { 0175 platform = platformMatch.captured(1); 0176 platform = platform.remove(tagRx).trimmed(); 0177 } 0178 // skip some non-game "platforms" 0179 if(platform == QLatin1String("Toys") || 0180 platform == QLatin1String("Clothing") || 0181 platform == QLatin1String("Merchandise") || 0182 platform == QLatin1String("Soundtrack") || 0183 platform == QLatin1String("Books") || 0184 platform == QLatin1String("Comics") || 0185 platform == QLatin1String("GOG.com") || 0186 platform.startsWith(QLatin1String("Amiibo Figures")) || 0187 platform.contains(QLatin1String("Video")) || 0188 platform == QLatin1String("Steam") || 0189 platform == QLatin1String("Consoles") || 0190 platform == QLatin1String("Accessory")) { 0191 continue; 0192 } 0193 // myDebug() << title << platform << u; 0194 FetchResult* r = new FetchResult(this, title, platform); 0195 QUrl url = QUrl(QString::fromLatin1(VGCOLLECT_BASE_URL)).resolved(QUrl(u)); 0196 m_matches.insert(r->uid, url); 0197 // don't emit signal until after putting url in matches hash 0198 emit signalResultFound(r); 0199 } 0200 0201 stop(); 0202 } 0203 0204 Tellico::Data::EntryPtr VGCollectFetcher::fetchEntryHook(uint uid_) { 0205 Data::EntryPtr entry = m_entries.value(uid_); 0206 if(entry || !m_matches.contains(uid_)) { 0207 return entry; 0208 } 0209 0210 auto url = m_matches[uid_]; 0211 QString results = Tellico::decodeHTML(FileHandler::readTextFile(url, true, true)); 0212 if(results.isEmpty()) { 0213 myDebug() << "no text results from" << m_matches[uid_]; 0214 return entry; 0215 } 0216 0217 #if 0 0218 myWarning() << "Remove debug2 from vgcollectfetcher.cpp"; 0219 QFile f(QStringLiteral("/tmp/test2.html")); 0220 if(f.open(QIODevice::WriteOnly)) { 0221 QTextStream t(&f); 0222 t.setCodec("UTF-8"); 0223 t << results; 0224 } 0225 f.close(); 0226 #endif 0227 0228 Data::CollPtr coll(new Data::GameCollection(true)); 0229 entry = new Data::Entry(coll); 0230 parseEntry(entry, results); 0231 m_entries.insert(uid_, entry); 0232 0233 const QString vgcollect(QStringLiteral("vgcollect")); 0234 if(optionalFields().contains(vgcollect)) { 0235 Data::FieldPtr field(new Data::Field(vgcollect, i18n("VGCollect Link"), Data::Field::URL)); 0236 field->setCategory(i18n("General")); 0237 coll->addField(field); 0238 entry->setField(vgcollect, url.url()); 0239 } 0240 0241 // remove url to indicate the entry is fully populated 0242 m_matches.remove(uid_); 0243 return entry; 0244 } 0245 0246 void VGCollectFetcher::parseEntry(Data::EntryPtr entry_, const QString& str_) { 0247 static const QRegularExpression divRx(QLatin1String("<div class=\"tab-pane active\" id=\"info\">(.+?)</div"), 0248 QRegularExpression::DotMatchesEverythingOption); 0249 static const QRegularExpression trRx(QLatin1String("<tr>(.+?)</tr"), 0250 QRegularExpression::DotMatchesEverythingOption); 0251 static const QRegularExpression tdRx(QLatin1String("<td[^>]*>(.+?)</td"), 0252 QRegularExpression::DotMatchesEverythingOption); 0253 0254 auto divMatch = divRx.match(str_); 0255 if(divMatch.hasMatch()) { 0256 auto i = trRx.globalMatch(divMatch.captured(1)); 0257 while(i.hasNext()) { 0258 auto rowMatch = i.next(); 0259 auto headerMatch = tdRx.match(rowMatch.captured(1)); 0260 if(headerMatch.hasMatch()) { 0261 auto valueMatch = tdRx.match(rowMatch.captured(1), headerMatch.capturedEnd()); 0262 if(valueMatch.hasMatch()) { 0263 populateValue(entry_, headerMatch.captured(1), valueMatch.captured(1)); 0264 } 0265 } 0266 } 0267 } 0268 0269 static const QRegularExpression titleRx(QLatin1String("<meta property=\"og:title\" content=\"([^\"]+?) \\|")); 0270 auto titleMatch = titleRx.match(str_); 0271 if(titleMatch.hasMatch()) { 0272 entry_->setField(QStringLiteral("title"), titleMatch.captured(1)); 0273 } 0274 0275 static const QRegularExpression coverRx(QLatin1String("<meta property=\"og:image\" content=\"(.+?)\">")); 0276 auto coverMatch = coverRx.match(str_); 0277 if(coverMatch.hasMatch()) { 0278 const QString u = coverMatch.captured(1); 0279 const QUrl coverUrl = QUrl(QString::fromLatin1(VGCOLLECT_BASE_URL)).resolved(QUrl(u)); 0280 0281 const QString id = ImageFactory::addImage(coverUrl, true /* quiet */); 0282 if(id.isEmpty()) { 0283 myDebug() << "Could not load" << coverUrl; 0284 message(i18n("The cover image could not be loaded."), MessageHandler::Warning); 0285 } 0286 // empty image ID is ok 0287 entry_->setField(QStringLiteral("cover"), id); 0288 } 0289 } 0290 0291 void VGCollectFetcher::populateValue(Data::EntryPtr entry_, const QString& header_, const QString& value_) const { 0292 static const QRegularExpression tagRx(QLatin1String("<.*?>")); 0293 auto header = header_; 0294 header = header.remove(tagRx).simplified(); 0295 auto value = value_.simplified(); 0296 if(header_.isEmpty() || value_.isEmpty() || value == QLatin1String("NA")) { 0297 return; 0298 } 0299 0300 if(header.startsWith(QLatin1String("Publisher"))) { 0301 entry_->setField(QStringLiteral("publisher"), value); 0302 } else if(header.startsWith(QLatin1String("Developer"))) { 0303 entry_->setField(QStringLiteral("developer"), value); 0304 } else if(header.startsWith(QLatin1String("Platform"))) { 0305 const QString platform = Data::GameCollection::normalizePlatform(value.remove(tagRx).trimmed()); 0306 entry_->setField(QStringLiteral("platform"), platform); 0307 } else if(header.startsWith(QLatin1String("Genre"))) { 0308 entry_->setField(QStringLiteral("genre"), value); 0309 } else if(header.startsWith(QLatin1String("Rating"))) { 0310 QString pegi; 0311 Data::GameCollection::EsrbRating esrb = Data::GameCollection::UnknownEsrb; 0312 if(value.contains(QLatin1String("ratings/u.png"))) esrb = Data::GameCollection::Unrated; 0313 else if(value.contains(QLatin1String("ratings/t.png"))) esrb = Data::GameCollection::Teen; 0314 else if(value.contains(QLatin1String("ratings/e.png"))) esrb = Data::GameCollection::Everyone; 0315 else if(value.contains(QLatin1String("ratings/ka.png"))) esrb = Data::GameCollection::Everyone; 0316 else if(value.contains(QLatin1String("ratings/e10.png"))) esrb = Data::GameCollection::Everyone10; 0317 else if(value.contains(QLatin1String("ratings/ec.png"))) esrb = Data::GameCollection::EarlyChildhood; 0318 else if(value.contains(QLatin1String("ratings/m.png"))) esrb = Data::GameCollection::Mature; 0319 else if(value.contains(QLatin1String("ratings/ao.png"))) esrb = Data::GameCollection::Adults; 0320 else if(value.contains(QLatin1String("ratings/pegi-3"))) pegi = QLatin1String("PEGI 3"); 0321 else if(value.contains(QLatin1String("ratings/pegi-7"))) pegi = QLatin1String("PEGI 7"); 0322 else if(value.contains(QLatin1String("ratings/pegi-12"))) pegi = QLatin1String("PEGI 12"); 0323 else if(value.contains(QLatin1String("ratings/pegi-16"))) pegi = QLatin1String("PEGI 16"); 0324 else if(value.contains(QLatin1String("ratings/pegi-18"))) pegi = QLatin1String("PEGI 18"); 0325 if(esrb != Data::GameCollection::UnknownEsrb) { 0326 entry_->setField(QStringLiteral("certification"), Data::GameCollection::esrbRating(esrb)); 0327 } 0328 if(!pegi.isEmpty() && optionalFields().contains(QStringLiteral("pegi"))) { 0329 entry_->collection()->addField(Data::Field::createDefaultField(Data::Field::PegiField)); 0330 entry_->setField(QStringLiteral("pegi"), pegi); 0331 } 0332 } else if(header.startsWith(QLatin1String("Release Date"))) { 0333 entry_->setField(QStringLiteral("year"), value.right(4)); 0334 } else if(header.startsWith(QLatin1String("Box Text"))) { 0335 entry_->setField(QStringLiteral("description"), value); 0336 } else if(header.startsWith(QLatin1String("Barcode")) && 0337 optionalFields().contains(QStringLiteral("barcode"))) { 0338 Data::FieldPtr field(new Data::Field(QStringLiteral("barcode"), i18n("Barcode"))); 0339 field->setCategory(i18n("General")); 0340 entry_->collection()->addField(field); 0341 entry_->setField(QStringLiteral("barcode"), value); 0342 } else { 0343 // myDebug() << "Row header:" << header; 0344 // myDebug() << "Row data:" << value; 0345 } 0346 } 0347 0348 Tellico::Fetch::FetchRequest VGCollectFetcher::updateRequest(Data::EntryPtr entry_) { 0349 QString t = entry_->field(QStringLiteral("title")); 0350 if(!t.isEmpty()) { 0351 return FetchRequest(Fetch::Keyword, t); 0352 } 0353 return FetchRequest(); 0354 } 0355 0356 Tellico::Fetch::ConfigWidget* VGCollectFetcher::configWidget(QWidget* parent_) const { 0357 return new VGCollectFetcher::ConfigWidget(parent_, this); 0358 } 0359 0360 QString VGCollectFetcher::defaultName() { 0361 return QStringLiteral("VGCollect"); 0362 } 0363 0364 QString VGCollectFetcher::defaultIcon() { 0365 return favIcon("https://vgcollect.com/assets/favicon.ico"); 0366 } 0367 0368 //static 0369 Tellico::StringHash VGCollectFetcher::allOptionalFields() { 0370 StringHash hash; 0371 hash[QStringLiteral("vgcollect")] = i18n("VGCollect Link"); 0372 hash[QStringLiteral("pegi")] = i18n("PEGI Rating"); 0373 hash[QStringLiteral("barcode")] = i18n("Barcode"); 0374 return hash; 0375 } 0376 0377 VGCollectFetcher::ConfigWidget::ConfigWidget(QWidget* parent_, const VGCollectFetcher* fetcher_) 0378 : Fetch::ConfigWidget(parent_) { 0379 QVBoxLayout* l = new QVBoxLayout(optionsWidget()); 0380 l->addWidget(new QLabel(i18n("This source has no options."), optionsWidget())); 0381 l->addStretch(); 0382 0383 // now add additional fields widget 0384 addFieldsWidget(VGCollectFetcher::allOptionalFields(), fetcher_ ? fetcher_->optionalFields() : QStringList()); 0385 } 0386 0387 QString VGCollectFetcher::ConfigWidget::preferredName() const { 0388 return VGCollectFetcher::defaultName(); 0389 }