File indexing completed on 2024-05-12 05:09:33

0001 /***************************************************************************
0002     Copyright (C) 2022 Robby Stephenson <robby@periapsis.org>
0003  ***************************************************************************/
0004 
0005 /***************************************************************************
0006  *                                                                         *
0007  *   This program is free software; you can redistribute it and/or         *
0008  *   modify it under the terms of the GNU General Public License as        *
0009  *   published by the Free Software Foundation; either version 2 of        *
0010  *   the License or (at your option) version 3 or any later version        *
0011  *   accepted by the membership of KDE e.V. (or its successor approved     *
0012  *   by the membership of KDE e.V.), which shall act as a proxy            *
0013  *   defined in Section 14 of version 3 of the license.                    *
0014  *                                                                         *
0015  *   This program is distributed in the hope that it will be useful,       *
0016  *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
0017  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
0018  *   GNU General Public License for more details.                          *
0019  *                                                                         *
0020  *   You should have received a copy of the GNU General Public License     *
0021  *   along with this program.  If not, see <http://www.gnu.org/licenses/>. *
0022  *                                                                         *
0023  ***************************************************************************/
0024 
0025 #include "gaminghistoryfetcher.h"
0026 #include "../utils/guiproxy.h"
0027 #include "../utils/string_utils.h"
0028 #include "../collections/gamecollection.h"
0029 #include "../entry.h"
0030 #include "../core/filehandler.h"
0031 #include "../images/imagefactory.h"
0032 #include "../tellico_debug.h"
0033 
0034 #include <KLocalizedString>
0035 #include <KConfig>
0036 #include <KIO/Job>
0037 #include <KIO/JobUiDelegate>
0038 #include <KJobWidgets/KJobWidgets>
0039 
0040 #include <QRegularExpression>
0041 #include <QLabel>
0042 #include <QFile>
0043 #include <QTextStream>
0044 #include <QVBoxLayout>
0045 #include <QUrlQuery>
0046 
0047 namespace {
0048   static const char* GAMINGHISTORY_BASE_URL = "https://www.arcade-history.com/index.php";
0049 }
0050 
0051 using namespace Tellico;
0052 using Tellico::Fetch::GamingHistoryFetcher;
0053 
0054 GamingHistoryFetcher::GamingHistoryFetcher(QObject* parent_)
0055     : Fetcher(parent_), m_started(false) {
0056   populateYearIndex();
0057 }
0058 
0059 GamingHistoryFetcher::~GamingHistoryFetcher() {
0060 }
0061 
0062 QString GamingHistoryFetcher::source() const {
0063   return m_name.isEmpty() ? defaultName() : m_name;
0064 }
0065 
0066 bool GamingHistoryFetcher::canFetch(int type) const {
0067   return type == Data::Collection::Game;
0068 }
0069 
0070 void GamingHistoryFetcher::readConfigHook(const KConfigGroup& config_) {
0071   Q_UNUSED(config_);
0072 }
0073 
0074 void GamingHistoryFetcher::search() {
0075   m_started = true;
0076   m_matches.clear();
0077 
0078   QUrl u(QString::fromLatin1(GAMINGHISTORY_BASE_URL));
0079   QUrlQuery q;
0080   q.addQueryItem(QStringLiteral("page"), QStringLiteral("database"));
0081 
0082   switch(request().key()) {
0083     case Keyword:
0084       {
0085         QString value = request().value();
0086         // pull out year, keep the regexp a little loose
0087         QRegularExpression yearRX(QStringLiteral("\\s*[0-9]{4}\\s*"));
0088         QRegularExpressionMatch match = yearRX.match(value);
0089         if(match.hasMatch()) {
0090           // fragile, but the form uses a year index
0091           QString year = match.captured(0).trimmed();
0092           if(m_yearIndex.contains(year)) {
0093             q.addQueryItem(QStringLiteral("annee"), QString::number(m_yearIndex.value(year)));
0094             value = value.remove(yearRX);
0095           }
0096         }
0097         q.addQueryItem(QStringLiteral("lemot"), value);
0098       }
0099       break;
0100 
0101     default:
0102       myWarning() << source() << "- key not recognized:" << request().key();
0103       stop();
0104       return;
0105   }
0106   u.setQuery(q);
0107 //  myDebug() << "url:" << u;
0108 
0109   m_job = KIO::storedGet(u, KIO::NoReload, KIO::HideProgressInfo);
0110   KJobWidgets::setWindow(m_job, GUI::Proxy::widget());
0111   connect(m_job.data(), &KJob::result,
0112           this, &GamingHistoryFetcher::slotComplete);
0113 }
0114 
0115 void GamingHistoryFetcher::stop() {
0116   if(!m_started) {
0117     return;
0118   }
0119 
0120   if(m_job) {
0121     m_job->kill();
0122     m_job = nullptr;
0123   }
0124   m_started = false;
0125   emit signalDone(this);
0126 }
0127 
0128 void GamingHistoryFetcher::slotComplete(KJob*) {
0129   if(m_job->error()) {
0130     m_job->uiDelegate()->showErrorMessage();
0131     stop();
0132     return;
0133   }
0134 
0135   const QByteArray data = m_job->data();
0136   if(data.isEmpty()) {
0137     myDebug() << "no data";
0138     stop();
0139     return;
0140   }
0141 
0142   // since the fetch is done, don't worry about holding the job pointer
0143   m_job = nullptr;
0144 
0145   const QString s = Tellico::decodeHTML(data);
0146 #if 0
0147   myWarning() << "Remove debug from gaminghistoryfetcher.cpp";
0148   QFile f(QStringLiteral("/tmp/test.html"));
0149   if(f.open(QIODevice::WriteOnly)) {
0150     QTextStream t(&f);
0151     t.setCodec("UTF-8");
0152     t << s;
0153   }
0154   f.close();
0155 #endif
0156 
0157   static const QRegularExpression rowRx(QStringLiteral("<tr class='big-box'>(.+?)</tr"));
0158   static const QRegularExpression dataRx(QStringLiteral("<td data-title='(.+?)'>(.+?)</td"));
0159   static const QRegularExpression tagRx(QLatin1String("<.*?>"));
0160   static const QRegularExpression emRx(QLatin1String("<em.*?>[^<]+?</em>"));
0161   static const QRegularExpression anchorRx(QStringLiteral("<a[^>]+?href='(.+?)'"));
0162 
0163   QRegularExpressionMatchIterator i = rowRx.globalMatch(s);
0164   while(i.hasNext()) {
0165     Data::CollPtr coll(new Data::GameCollection(true));
0166     Data::EntryPtr entry(new Data::Entry(coll));
0167     coll->addEntries(entry);
0168     QString u;
0169     QRegularExpressionMatch rowMatch = i.next();
0170     QRegularExpressionMatchIterator i2 = dataRx.globalMatch(rowMatch.captured(1));
0171     while(i2.hasNext()) {
0172       QRegularExpressionMatch dataMatch = i2.next();
0173       const QStringRef dataType = dataMatch.capturedRef(1);
0174       QString dataValue = dataMatch.captured(2);
0175       if(dataType == QLatin1String("Name")) {
0176         auto anchorMatch = anchorRx.match(dataValue);
0177         if(anchorMatch.hasMatch()) {
0178           u = anchorMatch.captured(1);
0179         }
0180         dataValue = dataValue.remove(emRx).remove(tagRx).simplified();
0181         entry->setField(QStringLiteral("title"), dataValue);
0182       } else if(dataType == QLatin1String("Year")) {
0183         entry->setField(QStringLiteral("year"), dataValue);
0184       } else if(dataType == QLatin1String("Publisher")) {
0185         dataValue = dataValue.remove(emRx).remove(tagRx).simplified();
0186         entry->setField(QStringLiteral("publisher"), dataValue);
0187       } else if(dataType == QLatin1String("Type")) {
0188         populatePlatform(entry, dataValue);
0189       }
0190     }
0191 
0192     FetchResult* r = new FetchResult(this, entry);
0193     m_entries.insert(r->uid, entry);
0194     QUrl url = QUrl(QString::fromLatin1(GAMINGHISTORY_BASE_URL)).resolved(QUrl(u));
0195     if(optionalFields().contains(QStringLiteral("gaming-history"))) {
0196       Data::FieldPtr field(new Data::Field(QStringLiteral("gaming-history"), i18n("Gaming History Link"), Data::Field::URL));
0197       field->setCategory(i18n("General"));
0198       coll->addField(field);
0199       entry->setField(QStringLiteral("gaming-history"), url.url());
0200     }
0201     m_matches.insert(r->uid, url);
0202     // don't emit signal until after putting url in matches hash
0203     emit signalResultFound(r);
0204   }
0205 
0206   if(m_matches.isEmpty()) {
0207     // an exact match is handled by returning a page with <script> at the top
0208     if(s.startsWith(QLatin1String("<script>"))) {
0209       static const QRegularExpression locationRx(QLatin1String("'([^']+?)'</script>"));
0210       auto locationMatch = locationRx.match(s);
0211       if(locationMatch.hasMatch()) {
0212         Data::CollPtr coll(new Data::GameCollection(true));
0213         Data::EntryPtr entry(new Data::Entry(coll));
0214         coll->addEntries(entry);
0215 
0216         QUrl u(locationMatch.captured(1));
0217         parseSingleResult(entry, u);
0218 
0219         FetchResult* r = new FetchResult(this, entry);
0220         m_entries.insert(r->uid, entry);
0221         emit signalResultFound(r);
0222       }
0223     } else {
0224       myDebug() << "no results";
0225     }
0226   }
0227 
0228   stop();
0229 }
0230 
0231 Tellico::Data::EntryPtr GamingHistoryFetcher::fetchEntryHook(uint uid_) {
0232   if(!m_entries.contains(uid_)) {
0233     myWarning() << "no entry in hash";
0234     return Data::EntryPtr();
0235   }
0236 
0237   Data::EntryPtr entry = m_entries[uid_];
0238   // if the url is not in the hash, the entry has already been fully populated
0239   if(!m_matches.contains(uid_)) {
0240     return entry;
0241   }
0242 
0243   QString results = Tellico::decodeHTML(FileHandler::readTextFile(m_matches[uid_], true, true));
0244   if(results.isEmpty()) {
0245     myDebug() << "no text results from" << m_matches[uid_];
0246     return entry;
0247   }
0248 
0249 #if 0
0250   myWarning() << "Remove debug2 from gaminghistoryfetcher.cpp";
0251   QFile f(QStringLiteral("/tmp/test2.html"));
0252   if(f.open(QIODevice::WriteOnly)) {
0253     QTextStream t(&f);
0254     t.setCodec("UTF-8");
0255     t << results;
0256   }
0257   f.close();
0258 #endif
0259 
0260   parseEntry(entry, results);
0261   // remove url to signal the entry is fully populated
0262   m_matches.remove(uid_);
0263   return entry;
0264 }
0265 
0266 void GamingHistoryFetcher::parseEntry(Data::EntryPtr entry, const QString& str_) {
0267   static const QRegularExpression tagRx(QLatin1String("<.*?>"));
0268   static const QRegularExpression divRx(QLatin1String("<div class='ContainerTableau100'><div class='CelluleTexte100'>(.+?)</div"),
0269                                         QRegularExpression::DotMatchesEverythingOption);
0270   auto divMatch = divRx.match(str_);
0271   if(divMatch.hasMatch()) {
0272     QString desc = divMatch.captured(1);
0273     desc.replace(QLatin1String("<br />"), QLatin1String("\n"));
0274     // if the title is empty, need to parse it
0275     if(entry->title().isEmpty()) {
0276       const QString info = desc.section(QLatin1Char('\n'), 0, 0).remove(tagRx).simplified();
0277       QRegularExpression infoRx(QString::fromUtf8("^(.+?) \u00A9 (\\d{4}) (.+?)$"));
0278       auto infoMatch = infoRx.match(info);
0279       if(infoMatch.hasMatch()) {
0280         entry->setField(QStringLiteral("title"), infoMatch.captured(1).trimmed());
0281         entry->setField(QStringLiteral("year"), infoMatch.captured(2).trimmed());
0282         entry->setField(QStringLiteral("publisher"), infoMatch.captured(3).trimmed());
0283       }
0284     }
0285     // take the description as everything after the first line break
0286     desc = desc.section(QLatin1Char('\n'), 1).remove(tagRx).simplified();
0287     entry->setField(QStringLiteral("description"), desc);
0288   }
0289 
0290   // if the platform is empty, grab it from the html title
0291   if(entry->field(QStringLiteral("platform")).isEmpty()) {
0292     static const QRegularExpression titleRx(QLatin1String("<title>.+?, (.+?) by .+?</title>"));
0293     auto titleMatch = titleRx.match(str_);
0294     if(titleMatch.hasMatch()) {
0295       populatePlatform(entry, titleMatch.captured(1));
0296     }
0297   }
0298 
0299   static const QRegularExpression coverRx(QLatin1String("<img [^>]*?id='kukulcan'[^>]*?src='([^>]+?)'"));
0300   auto coverMatch = coverRx.match(str_);
0301   if(coverMatch.hasMatch()) {
0302     QString u = coverMatch.captured(1);
0303     QUrl coverUrl = QUrl(QString::fromLatin1(GAMINGHISTORY_BASE_URL)).resolved(QUrl(u));
0304 
0305     const QString id = ImageFactory::addImage(coverUrl, true /* quiet */);
0306     if(id.isEmpty()) {
0307       myDebug() << "Could not load" << coverUrl;
0308       message(i18n("The cover image could not be loaded."), MessageHandler::Warning);
0309     }
0310     // empty image ID is ok
0311     entry->setField(QStringLiteral("cover"), id);
0312   }
0313 }
0314 
0315 void GamingHistoryFetcher::parseSingleResult(Data::EntryPtr entry, const QUrl& url_) {
0316   QString results = Tellico::decodeHTML(FileHandler::readTextFile(url_, true, true));
0317   parseEntry(entry, results);
0318   if(optionalFields().contains(QStringLiteral("gaming-history"))) {
0319     Data::FieldPtr field(new Data::Field(QStringLiteral("gaming-history"), i18n("Gaming History Link"), Data::Field::URL));
0320     field->setCategory(i18n("General"));
0321     entry->collection()->addField(field);
0322     entry->setField(QStringLiteral("gaming-history"), url_.url());
0323   }
0324 }
0325 
0326 void GamingHistoryFetcher::populatePlatform(Data::EntryPtr entry, const QString& platform_) {
0327   static const QString platformString(QStringLiteral("platform"));
0328 
0329   QString platform = platform_;
0330   if(platform.endsWith(QLatin1String(" game")) ||
0331      platform.endsWith(QLatin1String(" disc"))) {
0332     platform.chop(5);
0333   } else if(platform.endsWith(QLatin1String(" disk.")) ||
0334             platform.endsWith(QLatin1String(" cass.")) ||
0335             platform.endsWith(QLatin1String(" cart."))) {
0336     platform.chop(6);
0337   } else if(platform.endsWith(QLatin1String(" CD"))) {
0338     platform.chop(3);
0339   }
0340 
0341   Data::FieldPtr platformField = entry->collection()->fieldByName(platformString);
0342   if(platformField && !platformField->allowed().contains(platform)) {
0343     QStringList allowed = platformField->allowed();
0344     allowed.append(platform);
0345     platformField->setAllowed(allowed);
0346   }
0347 
0348    entry->setField(platformString, platform);
0349 }
0350 
0351 Tellico::Fetch::FetchRequest GamingHistoryFetcher::updateRequest(Data::EntryPtr entry_) {
0352   QString t = entry_->field(QStringLiteral("title"));
0353   if(!t.isEmpty()) {
0354     return FetchRequest(Fetch::Keyword, t);
0355   }
0356   return FetchRequest();
0357 }
0358 
0359 Tellico::Fetch::ConfigWidget* GamingHistoryFetcher::configWidget(QWidget* parent_) const {
0360   return new GamingHistoryFetcher::ConfigWidget(parent_, this);
0361 }
0362 
0363 QString GamingHistoryFetcher::defaultName() {
0364   return QStringLiteral("Gaming History");
0365 }
0366 
0367 QString GamingHistoryFetcher::defaultIcon() {
0368   return favIcon("https://www.arcade-history.com");
0369 }
0370 
0371 //static
0372 Tellico::StringHash GamingHistoryFetcher::allOptionalFields() {
0373   StringHash hash;
0374   hash.insert(QStringLiteral("gaming-history"), i18n("Gaming History Link"));
0375   return hash;
0376 }
0377 
0378 void GamingHistoryFetcher::populateYearIndex() {
0379   m_yearIndex.clear();
0380   m_yearIndex.insert(QStringLiteral("1971"), 1);
0381   m_yearIndex.insert(QStringLiteral("1972"), 2);
0382   m_yearIndex.insert(QStringLiteral("1973"), 3);
0383   m_yearIndex.insert(QStringLiteral("1974"), 4);
0384   m_yearIndex.insert(QStringLiteral("1975"), 5);
0385   m_yearIndex.insert(QStringLiteral("1976"), 6);
0386   m_yearIndex.insert(QStringLiteral("1977"), 7);
0387   m_yearIndex.insert(QStringLiteral("1978"), 8);
0388   m_yearIndex.insert(QStringLiteral("1979"), 9);
0389   m_yearIndex.insert(QStringLiteral("1980"), 11);
0390   m_yearIndex.insert(QStringLiteral("1981"), 12);
0391   m_yearIndex.insert(QStringLiteral("1982"), 13);
0392   m_yearIndex.insert(QStringLiteral("1983"), 14);
0393   m_yearIndex.insert(QStringLiteral("1984"), 15);
0394   m_yearIndex.insert(QStringLiteral("1985"), 16);
0395   m_yearIndex.insert(QStringLiteral("1986"), 17);
0396   m_yearIndex.insert(QStringLiteral("1987"), 18);
0397   m_yearIndex.insert(QStringLiteral("1988"), 19);
0398   m_yearIndex.insert(QStringLiteral("1989"), 20);
0399   m_yearIndex.insert(QStringLiteral("1990"), 22);
0400   m_yearIndex.insert(QStringLiteral("1991"), 23);
0401   m_yearIndex.insert(QStringLiteral("1992"), 24);
0402   m_yearIndex.insert(QStringLiteral("1993"), 25);
0403   m_yearIndex.insert(QStringLiteral("1994"), 26);
0404   m_yearIndex.insert(QStringLiteral("1995"), 27);
0405   m_yearIndex.insert(QStringLiteral("1996"), 28);
0406   m_yearIndex.insert(QStringLiteral("1997"), 29);
0407   m_yearIndex.insert(QStringLiteral("1998"), 30);
0408   m_yearIndex.insert(QStringLiteral("1999"), 31);
0409   m_yearIndex.insert(QStringLiteral("2000"), 34);
0410   m_yearIndex.insert(QStringLiteral("2001"), 35);
0411   m_yearIndex.insert(QStringLiteral("2002"), 36);
0412   m_yearIndex.insert(QStringLiteral("2003"), 37);
0413   m_yearIndex.insert(QStringLiteral("2004"), 38);
0414   m_yearIndex.insert(QStringLiteral("2005"), 39);
0415   m_yearIndex.insert(QStringLiteral("2006"), 44);
0416   m_yearIndex.insert(QStringLiteral("2007"), 107);
0417   m_yearIndex.insert(QStringLiteral("2008"), 150);
0418   m_yearIndex.insert(QStringLiteral("2009"), 151);
0419   m_yearIndex.insert(QStringLiteral("2010"), 163);
0420   m_yearIndex.insert(QStringLiteral("2011"), 165);
0421   m_yearIndex.insert(QStringLiteral("2012"), 168);
0422   m_yearIndex.insert(QStringLiteral("2013"), 170);
0423   m_yearIndex.insert(QStringLiteral("2014"), 171);
0424   m_yearIndex.insert(QStringLiteral("2015"), 172);
0425   m_yearIndex.insert(QStringLiteral("2016"), 173);
0426   m_yearIndex.insert(QStringLiteral("2017"), 174);
0427   m_yearIndex.insert(QStringLiteral("2018"), 175);
0428   m_yearIndex.insert(QStringLiteral("2019"), 176);
0429   m_yearIndex.insert(QStringLiteral("2020"), 178);
0430   m_yearIndex.insert(QStringLiteral("2021"), 179);
0431   m_yearIndex.insert(QStringLiteral("2022"), 180);
0432   m_yearIndex.insert(QStringLiteral("2023"), 181);
0433 }
0434 
0435 GamingHistoryFetcher::ConfigWidget::ConfigWidget(QWidget* parent_, const GamingHistoryFetcher* fetcher_)
0436     : Fetch::ConfigWidget(parent_) {
0437   QVBoxLayout* l = new QVBoxLayout(optionsWidget());
0438   l->addWidget(new QLabel(i18n("This source has no options."), optionsWidget()));
0439   l->addStretch();
0440 
0441   // now add additional fields widget
0442   addFieldsWidget(GamingHistoryFetcher::allOptionalFields(), fetcher_ ? fetcher_->optionalFields() : QStringList());
0443 }
0444 
0445 QString GamingHistoryFetcher::ConfigWidget::preferredName() const {
0446   return GamingHistoryFetcher::defaultName();
0447 }