File indexing completed on 2024-05-12 05:09:44

0001 /***************************************************************************
0002     Copyright (C) 2023 Robby Stephenson <robby@periapsis.org>
0003  ***************************************************************************/
0004 
0005 /***************************************************************************
0006  *                                                                         *
0007  *   This program is free software; you can redistribute it and/or         *
0008  *   modify it under the terms of the GNU General Public License as        *
0009  *   published by the Free Software Foundation; either version 2 of        *
0010  *   the License or (at your option) version 3 or any later version        *
0011  *   accepted by the membership of KDE e.V. (or its successor approved     *
0012  *   by the membership of KDE e.V.), which shall act as a proxy            *
0013  *   defined in Section 14 of version 3 of the license.                    *
0014  *                                                                         *
0015  *   This program is distributed in the hope that it will be useful,       *
0016  *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
0017  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
0018  *   GNU General Public License for more details.                          *
0019  *                                                                         *
0020  *   You should have received a copy of the GNU General Public License     *
0021  *   along with this program.  If not, see <http://www.gnu.org/licenses/>. *
0022  *                                                                         *
0023  ***************************************************************************/
0024 
0025 #include "vgcollectfetcher.h"
0026 #include "../utils/guiproxy.h"
0027 #include "../utils/string_utils.h"
0028 #include "../collections/gamecollection.h"
0029 #include "../entry.h"
0030 #include "../core/filehandler.h"
0031 #include "../images/imagefactory.h"
0032 #include "../tellico_debug.h"
0033 
0034 #include <KLocalizedString>
0035 #include <KConfig>
0036 #include <KIO/Job>
0037 #include <KIO/JobUiDelegate>
0038 #include <KJobWidgets/KJobWidgets>
0039 
0040 #include <QRegularExpression>
0041 #include <QLabel>
0042 #include <QFile>
0043 #include <QTextStream>
0044 #include <QVBoxLayout>
0045 
0046 namespace {
0047   static const char* VGCOLLECT_BASE_URL = "https://vgcollect.com/search/advanced";
0048 }
0049 
0050 using namespace Tellico;
0051 using Tellico::Fetch::VGCollectFetcher;
0052 
0053 VGCollectFetcher::VGCollectFetcher(QObject* parent_)
0054     : Fetcher(parent_), m_started(false) {
0055 }
0056 
0057 VGCollectFetcher::~VGCollectFetcher() {
0058 }
0059 
0060 QString VGCollectFetcher::source() const {
0061   return m_name.isEmpty() ? defaultName() : m_name;
0062 }
0063 
0064 bool VGCollectFetcher::canFetch(int type) const {
0065   return type == Data::Collection::Game;
0066 }
0067 
0068 void VGCollectFetcher::readConfigHook(const KConfigGroup& config_) {
0069   Q_UNUSED(config_);
0070 }
0071 
0072 void VGCollectFetcher::search() {
0073   m_started = true;
0074   m_matches.clear();
0075 
0076   QUrl u(QString::fromLatin1(VGCOLLECT_BASE_URL));
0077   QString urlPath(QStringLiteral("/no-filter/%1/no-filter/0/ALL/ALL/ALL/ALL/no-filter/%2/%3"));
0078 
0079   switch(request().key()) {
0080     case Keyword:
0081       {
0082         QString value = request().value();
0083         QString yearStart, yearEnd;
0084         // pull out year, keep the regexp a little loose
0085         QRegularExpression yearRX(QStringLiteral("\\s*[12][0-9]{3}\\s*"));
0086         QRegularExpressionMatch match = yearRX.match(value);
0087         if(match.hasMatch()) {
0088           // fragile, but the form uses a year index
0089           yearStart = match.captured(0).trimmed() + QLatin1String("-01-01");
0090           yearEnd = match.captured(0).trimmed() + QLatin1String("-12-31");
0091           value = value.remove(yearRX);
0092         } else {
0093           yearStart = QStringLiteral("no-filter");
0094           yearEnd = yearStart;
0095         }
0096         urlPath = urlPath.arg(value, yearStart, yearEnd);
0097       }
0098       break;
0099 
0100     default:
0101       myWarning() << source() << "- key not recognized:" << request().key();
0102       stop();
0103       return;
0104   }
0105   u.setPath(u.path() + urlPath);
0106 //  myDebug() << "url:" << u;
0107 
0108   m_job = KIO::storedGet(u, KIO::NoReload, KIO::HideProgressInfo);
0109   m_job->addMetaData(QStringLiteral("referrer"), QString::fromLatin1("https://vgcollect.com/search"));
0110   KJobWidgets::setWindow(m_job, GUI::Proxy::widget());
0111   connect(m_job.data(), &KJob::result,
0112           this, &VGCollectFetcher::slotComplete);
0113 }
0114 
0115 void VGCollectFetcher::stop() {
0116   if(!m_started) {
0117     return;
0118   }
0119 
0120   if(m_job) {
0121     m_job->kill();
0122     m_job = nullptr;
0123   }
0124   m_started = false;
0125   emit signalDone(this);
0126 }
0127 
0128 void VGCollectFetcher::slotComplete(KJob*) {
0129   if(m_job->error()) {
0130     m_job->uiDelegate()->showErrorMessage();
0131     stop();
0132     return;
0133   }
0134 
0135   const QByteArray data = m_job->data();
0136   if(data.isEmpty()) {
0137     myDebug() << "no data";
0138     stop();
0139     return;
0140   }
0141 
0142   // since the fetch is done, don't worry about holding the job pointer
0143   m_job = nullptr;
0144 
0145   const QString s = Tellico::decodeHTML(data);
0146 #if 0
0147   myWarning() << "Remove debug from vgcollectfetcher.cpp";
0148   QFile f(QStringLiteral("/tmp/test.html"));
0149   if(f.open(QIODevice::WriteOnly)) {
0150     QTextStream t(&f);
0151     t.setCodec("UTF-8");
0152     t << s;
0153   }
0154   f.close();
0155 #endif
0156 
0157   static const QRegularExpression rowRx(QStringLiteral("<div class=\"span10\">(.+?)<div class=\"item-notes\""),
0158                                         QRegularExpression::DotMatchesEverythingOption);
0159   static const QRegularExpression itemRx(QStringLiteral("<a href\\s*=\\s*\"(https://vgcollect.com/item/\\d+)\">(.+?)</a"));
0160   static const QRegularExpression platformRx(QStringLiteral("<a href=\"https://vgcollect.com/browse/[0-9a-z]+\">(.+?)</a"));
0161   static const QRegularExpression tagRx(QLatin1String("<.*?>"));
0162 
0163   QRegularExpressionMatchIterator i = rowRx.globalMatch(s);
0164   while(i.hasNext()) {
0165     auto rowMatch = i.next();
0166     auto itemMatch = itemRx.match(rowMatch.captured(0));
0167     if(!itemMatch.hasMatch()) {
0168       continue;
0169     }
0170     auto u = itemMatch.captured(1);
0171     auto title = itemMatch.captured(2);
0172     QString platform;
0173     auto platformMatch = platformRx.match(rowMatch.captured(0));
0174     if(platformMatch.hasMatch()) {
0175       platform = platformMatch.captured(1);
0176       platform = platform.remove(tagRx).trimmed();
0177     }
0178     // skip some non-game "platforms"
0179     if(platform == QLatin1String("Toys") ||
0180        platform == QLatin1String("Clothing") ||
0181        platform == QLatin1String("Merchandise") ||
0182        platform == QLatin1String("Soundtrack") ||
0183        platform == QLatin1String("Books") ||
0184        platform == QLatin1String("Comics") ||
0185        platform == QLatin1String("GOG.com") ||
0186        platform.startsWith(QLatin1String("Amiibo Figures")) ||
0187        platform.contains(QLatin1String("Video")) ||
0188        platform == QLatin1String("Steam") ||
0189        platform == QLatin1String("Consoles") ||
0190        platform == QLatin1String("Accessory")) {
0191         continue;
0192     }
0193 //    myDebug() << title << platform << u;
0194     FetchResult* r = new FetchResult(this, title, platform);
0195     QUrl url = QUrl(QString::fromLatin1(VGCOLLECT_BASE_URL)).resolved(QUrl(u));
0196     m_matches.insert(r->uid, url);
0197     // don't emit signal until after putting url in matches hash
0198     emit signalResultFound(r);
0199   }
0200 
0201   stop();
0202 }
0203 
0204 Tellico::Data::EntryPtr VGCollectFetcher::fetchEntryHook(uint uid_) {
0205   Data::EntryPtr entry = m_entries.value(uid_);
0206   if(entry || !m_matches.contains(uid_)) {
0207     return entry;
0208   }
0209 
0210   auto url = m_matches[uid_];
0211   QString results = Tellico::decodeHTML(FileHandler::readTextFile(url, true, true));
0212   if(results.isEmpty()) {
0213     myDebug() << "no text results from" << m_matches[uid_];
0214     return entry;
0215   }
0216 
0217 #if 0
0218   myWarning() << "Remove debug2 from vgcollectfetcher.cpp";
0219   QFile f(QStringLiteral("/tmp/test2.html"));
0220   if(f.open(QIODevice::WriteOnly)) {
0221     QTextStream t(&f);
0222     t.setCodec("UTF-8");
0223     t << results;
0224   }
0225   f.close();
0226 #endif
0227 
0228   Data::CollPtr coll(new Data::GameCollection(true));
0229   entry = new Data::Entry(coll);
0230   parseEntry(entry, results);
0231   m_entries.insert(uid_, entry);
0232 
0233   const QString vgcollect(QStringLiteral("vgcollect"));
0234   if(optionalFields().contains(vgcollect)) {
0235     Data::FieldPtr field(new Data::Field(vgcollect, i18n("VGCollect Link"), Data::Field::URL));
0236     field->setCategory(i18n("General"));
0237     coll->addField(field);
0238     entry->setField(vgcollect, url.url());
0239   }
0240 
0241   // remove url to indicate the entry is fully populated
0242   m_matches.remove(uid_);
0243   return entry;
0244 }
0245 
0246 void VGCollectFetcher::parseEntry(Data::EntryPtr entry_, const QString& str_) {
0247   static const QRegularExpression divRx(QLatin1String("<div class=\"tab-pane active\" id=\"info\">(.+?)</div"),
0248                                         QRegularExpression::DotMatchesEverythingOption);
0249   static const QRegularExpression trRx(QLatin1String("<tr>(.+?)</tr"),
0250                                        QRegularExpression::DotMatchesEverythingOption);
0251   static const QRegularExpression tdRx(QLatin1String("<td[^>]*>(.+?)</td"),
0252                                         QRegularExpression::DotMatchesEverythingOption);
0253 
0254   auto divMatch = divRx.match(str_);
0255   if(divMatch.hasMatch()) {
0256     auto i = trRx.globalMatch(divMatch.captured(1));
0257     while(i.hasNext()) {
0258       auto rowMatch = i.next();
0259       auto headerMatch = tdRx.match(rowMatch.captured(1));
0260       if(headerMatch.hasMatch()) {
0261         auto valueMatch = tdRx.match(rowMatch.captured(1), headerMatch.capturedEnd());
0262         if(valueMatch.hasMatch()) {
0263           populateValue(entry_, headerMatch.captured(1), valueMatch.captured(1));
0264         }
0265       }
0266     }
0267   }
0268 
0269   static const QRegularExpression titleRx(QLatin1String("<meta property=\"og:title\" content=\"([^\"]+?) \\|"));
0270   auto titleMatch = titleRx.match(str_);
0271   if(titleMatch.hasMatch()) {
0272     entry_->setField(QStringLiteral("title"), titleMatch.captured(1));
0273   }
0274 
0275   static const QRegularExpression coverRx(QLatin1String("<meta property=\"og:image\" content=\"(.+?)\">"));
0276   auto coverMatch = coverRx.match(str_);
0277   if(coverMatch.hasMatch()) {
0278     const QString u = coverMatch.captured(1);
0279     const QUrl coverUrl = QUrl(QString::fromLatin1(VGCOLLECT_BASE_URL)).resolved(QUrl(u));
0280 
0281     const QString id = ImageFactory::addImage(coverUrl, true /* quiet */);
0282     if(id.isEmpty()) {
0283       myDebug() << "Could not load" << coverUrl;
0284       message(i18n("The cover image could not be loaded."), MessageHandler::Warning);
0285     }
0286     // empty image ID is ok
0287     entry_->setField(QStringLiteral("cover"), id);
0288   }
0289 }
0290 
0291 void VGCollectFetcher::populateValue(Data::EntryPtr entry_, const QString& header_, const QString& value_) const {
0292   static const QRegularExpression tagRx(QLatin1String("<.*?>"));
0293   auto header = header_;
0294   header = header.remove(tagRx).simplified();
0295   auto value = value_.simplified();
0296   if(header_.isEmpty() || value_.isEmpty() || value == QLatin1String("NA")) {
0297     return;
0298   }
0299 
0300   if(header.startsWith(QLatin1String("Publisher"))) {
0301     entry_->setField(QStringLiteral("publisher"), value);
0302   } else if(header.startsWith(QLatin1String("Developer"))) {
0303     entry_->setField(QStringLiteral("developer"), value);
0304   } else if(header.startsWith(QLatin1String("Platform"))) {
0305     const QString platform = Data::GameCollection::normalizePlatform(value.remove(tagRx).trimmed());
0306     entry_->setField(QStringLiteral("platform"), platform);
0307   } else if(header.startsWith(QLatin1String("Genre"))) {
0308     entry_->setField(QStringLiteral("genre"), value);
0309   } else if(header.startsWith(QLatin1String("Rating"))) {
0310     QString pegi;
0311     Data::GameCollection::EsrbRating esrb = Data::GameCollection::UnknownEsrb;
0312     if(value.contains(QLatin1String("ratings/u.png")))       esrb = Data::GameCollection::Unrated;
0313     else if(value.contains(QLatin1String("ratings/t.png")))  esrb = Data::GameCollection::Teen;
0314     else if(value.contains(QLatin1String("ratings/e.png")))  esrb = Data::GameCollection::Everyone;
0315     else if(value.contains(QLatin1String("ratings/ka.png")))  esrb = Data::GameCollection::Everyone;
0316     else if(value.contains(QLatin1String("ratings/e10.png"))) esrb = Data::GameCollection::Everyone10;
0317     else if(value.contains(QLatin1String("ratings/ec.png"))) esrb = Data::GameCollection::EarlyChildhood;
0318     else if(value.contains(QLatin1String("ratings/m.png")))  esrb = Data::GameCollection::Mature;
0319     else if(value.contains(QLatin1String("ratings/ao.png")))  esrb = Data::GameCollection::Adults;
0320     else if(value.contains(QLatin1String("ratings/pegi-3")))  pegi = QLatin1String("PEGI 3");
0321     else if(value.contains(QLatin1String("ratings/pegi-7")))  pegi = QLatin1String("PEGI 7");
0322     else if(value.contains(QLatin1String("ratings/pegi-12"))) pegi = QLatin1String("PEGI 12");
0323     else if(value.contains(QLatin1String("ratings/pegi-16"))) pegi = QLatin1String("PEGI 16");
0324     else if(value.contains(QLatin1String("ratings/pegi-18"))) pegi = QLatin1String("PEGI 18");
0325     if(esrb != Data::GameCollection::UnknownEsrb) {
0326       entry_->setField(QStringLiteral("certification"), Data::GameCollection::esrbRating(esrb));
0327     }
0328     if(!pegi.isEmpty() && optionalFields().contains(QStringLiteral("pegi"))) {
0329       entry_->collection()->addField(Data::Field::createDefaultField(Data::Field::PegiField));
0330       entry_->setField(QStringLiteral("pegi"), pegi);
0331     }
0332   } else if(header.startsWith(QLatin1String("Release Date"))) {
0333     entry_->setField(QStringLiteral("year"), value.right(4));
0334   } else if(header.startsWith(QLatin1String("Box Text"))) {
0335     entry_->setField(QStringLiteral("description"), value);
0336   } else if(header.startsWith(QLatin1String("Barcode")) &&
0337             optionalFields().contains(QStringLiteral("barcode"))) {
0338     Data::FieldPtr field(new Data::Field(QStringLiteral("barcode"), i18n("Barcode")));
0339     field->setCategory(i18n("General"));
0340     entry_->collection()->addField(field);
0341     entry_->setField(QStringLiteral("barcode"), value);
0342   } else {
0343 //    myDebug() << "Row header:" << header;
0344 //    myDebug() << "Row data:" << value;
0345   }
0346 }
0347 
0348 Tellico::Fetch::FetchRequest VGCollectFetcher::updateRequest(Data::EntryPtr entry_) {
0349   QString t = entry_->field(QStringLiteral("title"));
0350   if(!t.isEmpty()) {
0351     return FetchRequest(Fetch::Keyword, t);
0352   }
0353   return FetchRequest();
0354 }
0355 
0356 Tellico::Fetch::ConfigWidget* VGCollectFetcher::configWidget(QWidget* parent_) const {
0357   return new VGCollectFetcher::ConfigWidget(parent_, this);
0358 }
0359 
0360 QString VGCollectFetcher::defaultName() {
0361   return QStringLiteral("VGCollect");
0362 }
0363 
0364 QString VGCollectFetcher::defaultIcon() {
0365   return favIcon("https://vgcollect.com/assets/favicon.ico");
0366 }
0367 
0368 //static
0369 Tellico::StringHash VGCollectFetcher::allOptionalFields() {
0370   StringHash hash;
0371   hash[QStringLiteral("vgcollect")] = i18n("VGCollect Link");
0372   hash[QStringLiteral("pegi")]      = i18n("PEGI Rating");
0373   hash[QStringLiteral("barcode")]   = i18n("Barcode");
0374   return hash;
0375 }
0376 
0377 VGCollectFetcher::ConfigWidget::ConfigWidget(QWidget* parent_, const VGCollectFetcher* fetcher_)
0378     : Fetch::ConfigWidget(parent_) {
0379   QVBoxLayout* l = new QVBoxLayout(optionsWidget());
0380   l->addWidget(new QLabel(i18n("This source has no options."), optionsWidget()));
0381   l->addStretch();
0382 
0383   // now add additional fields widget
0384   addFieldsWidget(VGCollectFetcher::allOptionalFields(), fetcher_ ? fetcher_->optionalFields() : QStringList());
0385 }
0386 
0387 QString VGCollectFetcher::ConfigWidget::preferredName() const {
0388   return VGCollectFetcher::defaultName();
0389 }