File indexing completed on 2024-05-12 05:09:33
0001 /*************************************************************************** 0002 Copyright (C) 2022 Robby Stephenson <robby@periapsis.org> 0003 ***************************************************************************/ 0004 0005 /*************************************************************************** 0006 * * 0007 * This program is free software; you can redistribute it and/or * 0008 * modify it under the terms of the GNU General Public License as * 0009 * published by the Free Software Foundation; either version 2 of * 0010 * the License or (at your option) version 3 or any later version * 0011 * accepted by the membership of KDE e.V. (or its successor approved * 0012 * by the membership of KDE e.V.), which shall act as a proxy * 0013 * defined in Section 14 of version 3 of the license. * 0014 * * 0015 * This program is distributed in the hope that it will be useful, * 0016 * but WITHOUT ANY WARRANTY; without even the implied warranty of * 0017 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * 0018 * GNU General Public License for more details. * 0019 * * 0020 * You should have received a copy of the GNU General Public License * 0021 * along with this program. If not, see <http://www.gnu.org/licenses/>. * 0022 * * 0023 ***************************************************************************/ 0024 0025 #include "gaminghistoryfetcher.h" 0026 #include "../utils/guiproxy.h" 0027 #include "../utils/string_utils.h" 0028 #include "../collections/gamecollection.h" 0029 #include "../entry.h" 0030 #include "../core/filehandler.h" 0031 #include "../images/imagefactory.h" 0032 #include "../tellico_debug.h" 0033 0034 #include <KLocalizedString> 0035 #include <KConfig> 0036 #include <KIO/Job> 0037 #include <KIO/JobUiDelegate> 0038 #include <KJobWidgets/KJobWidgets> 0039 0040 #include <QRegularExpression> 0041 #include <QLabel> 0042 #include <QFile> 0043 #include <QTextStream> 0044 #include <QVBoxLayout> 0045 #include <QUrlQuery> 0046 0047 namespace { 0048 static const char* GAMINGHISTORY_BASE_URL = "https://www.arcade-history.com/index.php"; 0049 } 0050 0051 using namespace Tellico; 0052 using Tellico::Fetch::GamingHistoryFetcher; 0053 0054 GamingHistoryFetcher::GamingHistoryFetcher(QObject* parent_) 0055 : Fetcher(parent_), m_started(false) { 0056 populateYearIndex(); 0057 } 0058 0059 GamingHistoryFetcher::~GamingHistoryFetcher() { 0060 } 0061 0062 QString GamingHistoryFetcher::source() const { 0063 return m_name.isEmpty() ? defaultName() : m_name; 0064 } 0065 0066 bool GamingHistoryFetcher::canFetch(int type) const { 0067 return type == Data::Collection::Game; 0068 } 0069 0070 void GamingHistoryFetcher::readConfigHook(const KConfigGroup& config_) { 0071 Q_UNUSED(config_); 0072 } 0073 0074 void GamingHistoryFetcher::search() { 0075 m_started = true; 0076 m_matches.clear(); 0077 0078 QUrl u(QString::fromLatin1(GAMINGHISTORY_BASE_URL)); 0079 QUrlQuery q; 0080 q.addQueryItem(QStringLiteral("page"), QStringLiteral("database")); 0081 0082 switch(request().key()) { 0083 case Keyword: 0084 { 0085 QString value = request().value(); 0086 // pull out year, keep the regexp a little loose 0087 QRegularExpression yearRX(QStringLiteral("\\s*[0-9]{4}\\s*")); 0088 QRegularExpressionMatch match = yearRX.match(value); 0089 if(match.hasMatch()) { 0090 // fragile, but the form uses a year index 0091 QString year = match.captured(0).trimmed(); 0092 if(m_yearIndex.contains(year)) { 0093 q.addQueryItem(QStringLiteral("annee"), QString::number(m_yearIndex.value(year))); 0094 value = value.remove(yearRX); 0095 } 0096 } 0097 q.addQueryItem(QStringLiteral("lemot"), value); 0098 } 0099 break; 0100 0101 default: 0102 myWarning() << source() << "- key not recognized:" << request().key(); 0103 stop(); 0104 return; 0105 } 0106 u.setQuery(q); 0107 // myDebug() << "url:" << u; 0108 0109 m_job = KIO::storedGet(u, KIO::NoReload, KIO::HideProgressInfo); 0110 KJobWidgets::setWindow(m_job, GUI::Proxy::widget()); 0111 connect(m_job.data(), &KJob::result, 0112 this, &GamingHistoryFetcher::slotComplete); 0113 } 0114 0115 void GamingHistoryFetcher::stop() { 0116 if(!m_started) { 0117 return; 0118 } 0119 0120 if(m_job) { 0121 m_job->kill(); 0122 m_job = nullptr; 0123 } 0124 m_started = false; 0125 emit signalDone(this); 0126 } 0127 0128 void GamingHistoryFetcher::slotComplete(KJob*) { 0129 if(m_job->error()) { 0130 m_job->uiDelegate()->showErrorMessage(); 0131 stop(); 0132 return; 0133 } 0134 0135 const QByteArray data = m_job->data(); 0136 if(data.isEmpty()) { 0137 myDebug() << "no data"; 0138 stop(); 0139 return; 0140 } 0141 0142 // since the fetch is done, don't worry about holding the job pointer 0143 m_job = nullptr; 0144 0145 const QString s = Tellico::decodeHTML(data); 0146 #if 0 0147 myWarning() << "Remove debug from gaminghistoryfetcher.cpp"; 0148 QFile f(QStringLiteral("/tmp/test.html")); 0149 if(f.open(QIODevice::WriteOnly)) { 0150 QTextStream t(&f); 0151 t.setCodec("UTF-8"); 0152 t << s; 0153 } 0154 f.close(); 0155 #endif 0156 0157 static const QRegularExpression rowRx(QStringLiteral("<tr class='big-box'>(.+?)</tr")); 0158 static const QRegularExpression dataRx(QStringLiteral("<td data-title='(.+?)'>(.+?)</td")); 0159 static const QRegularExpression tagRx(QLatin1String("<.*?>")); 0160 static const QRegularExpression emRx(QLatin1String("<em.*?>[^<]+?</em>")); 0161 static const QRegularExpression anchorRx(QStringLiteral("<a[^>]+?href='(.+?)'")); 0162 0163 QRegularExpressionMatchIterator i = rowRx.globalMatch(s); 0164 while(i.hasNext()) { 0165 Data::CollPtr coll(new Data::GameCollection(true)); 0166 Data::EntryPtr entry(new Data::Entry(coll)); 0167 coll->addEntries(entry); 0168 QString u; 0169 QRegularExpressionMatch rowMatch = i.next(); 0170 QRegularExpressionMatchIterator i2 = dataRx.globalMatch(rowMatch.captured(1)); 0171 while(i2.hasNext()) { 0172 QRegularExpressionMatch dataMatch = i2.next(); 0173 const QStringRef dataType = dataMatch.capturedRef(1); 0174 QString dataValue = dataMatch.captured(2); 0175 if(dataType == QLatin1String("Name")) { 0176 auto anchorMatch = anchorRx.match(dataValue); 0177 if(anchorMatch.hasMatch()) { 0178 u = anchorMatch.captured(1); 0179 } 0180 dataValue = dataValue.remove(emRx).remove(tagRx).simplified(); 0181 entry->setField(QStringLiteral("title"), dataValue); 0182 } else if(dataType == QLatin1String("Year")) { 0183 entry->setField(QStringLiteral("year"), dataValue); 0184 } else if(dataType == QLatin1String("Publisher")) { 0185 dataValue = dataValue.remove(emRx).remove(tagRx).simplified(); 0186 entry->setField(QStringLiteral("publisher"), dataValue); 0187 } else if(dataType == QLatin1String("Type")) { 0188 populatePlatform(entry, dataValue); 0189 } 0190 } 0191 0192 FetchResult* r = new FetchResult(this, entry); 0193 m_entries.insert(r->uid, entry); 0194 QUrl url = QUrl(QString::fromLatin1(GAMINGHISTORY_BASE_URL)).resolved(QUrl(u)); 0195 if(optionalFields().contains(QStringLiteral("gaming-history"))) { 0196 Data::FieldPtr field(new Data::Field(QStringLiteral("gaming-history"), i18n("Gaming History Link"), Data::Field::URL)); 0197 field->setCategory(i18n("General")); 0198 coll->addField(field); 0199 entry->setField(QStringLiteral("gaming-history"), url.url()); 0200 } 0201 m_matches.insert(r->uid, url); 0202 // don't emit signal until after putting url in matches hash 0203 emit signalResultFound(r); 0204 } 0205 0206 if(m_matches.isEmpty()) { 0207 // an exact match is handled by returning a page with <script> at the top 0208 if(s.startsWith(QLatin1String("<script>"))) { 0209 static const QRegularExpression locationRx(QLatin1String("'([^']+?)'</script>")); 0210 auto locationMatch = locationRx.match(s); 0211 if(locationMatch.hasMatch()) { 0212 Data::CollPtr coll(new Data::GameCollection(true)); 0213 Data::EntryPtr entry(new Data::Entry(coll)); 0214 coll->addEntries(entry); 0215 0216 QUrl u(locationMatch.captured(1)); 0217 parseSingleResult(entry, u); 0218 0219 FetchResult* r = new FetchResult(this, entry); 0220 m_entries.insert(r->uid, entry); 0221 emit signalResultFound(r); 0222 } 0223 } else { 0224 myDebug() << "no results"; 0225 } 0226 } 0227 0228 stop(); 0229 } 0230 0231 Tellico::Data::EntryPtr GamingHistoryFetcher::fetchEntryHook(uint uid_) { 0232 if(!m_entries.contains(uid_)) { 0233 myWarning() << "no entry in hash"; 0234 return Data::EntryPtr(); 0235 } 0236 0237 Data::EntryPtr entry = m_entries[uid_]; 0238 // if the url is not in the hash, the entry has already been fully populated 0239 if(!m_matches.contains(uid_)) { 0240 return entry; 0241 } 0242 0243 QString results = Tellico::decodeHTML(FileHandler::readTextFile(m_matches[uid_], true, true)); 0244 if(results.isEmpty()) { 0245 myDebug() << "no text results from" << m_matches[uid_]; 0246 return entry; 0247 } 0248 0249 #if 0 0250 myWarning() << "Remove debug2 from gaminghistoryfetcher.cpp"; 0251 QFile f(QStringLiteral("/tmp/test2.html")); 0252 if(f.open(QIODevice::WriteOnly)) { 0253 QTextStream t(&f); 0254 t.setCodec("UTF-8"); 0255 t << results; 0256 } 0257 f.close(); 0258 #endif 0259 0260 parseEntry(entry, results); 0261 // remove url to signal the entry is fully populated 0262 m_matches.remove(uid_); 0263 return entry; 0264 } 0265 0266 void GamingHistoryFetcher::parseEntry(Data::EntryPtr entry, const QString& str_) { 0267 static const QRegularExpression tagRx(QLatin1String("<.*?>")); 0268 static const QRegularExpression divRx(QLatin1String("<div class='ContainerTableau100'><div class='CelluleTexte100'>(.+?)</div"), 0269 QRegularExpression::DotMatchesEverythingOption); 0270 auto divMatch = divRx.match(str_); 0271 if(divMatch.hasMatch()) { 0272 QString desc = divMatch.captured(1); 0273 desc.replace(QLatin1String("<br />"), QLatin1String("\n")); 0274 // if the title is empty, need to parse it 0275 if(entry->title().isEmpty()) { 0276 const QString info = desc.section(QLatin1Char('\n'), 0, 0).remove(tagRx).simplified(); 0277 QRegularExpression infoRx(QString::fromUtf8("^(.+?) \u00A9 (\\d{4}) (.+?)$")); 0278 auto infoMatch = infoRx.match(info); 0279 if(infoMatch.hasMatch()) { 0280 entry->setField(QStringLiteral("title"), infoMatch.captured(1).trimmed()); 0281 entry->setField(QStringLiteral("year"), infoMatch.captured(2).trimmed()); 0282 entry->setField(QStringLiteral("publisher"), infoMatch.captured(3).trimmed()); 0283 } 0284 } 0285 // take the description as everything after the first line break 0286 desc = desc.section(QLatin1Char('\n'), 1).remove(tagRx).simplified(); 0287 entry->setField(QStringLiteral("description"), desc); 0288 } 0289 0290 // if the platform is empty, grab it from the html title 0291 if(entry->field(QStringLiteral("platform")).isEmpty()) { 0292 static const QRegularExpression titleRx(QLatin1String("<title>.+?, (.+?) by .+?</title>")); 0293 auto titleMatch = titleRx.match(str_); 0294 if(titleMatch.hasMatch()) { 0295 populatePlatform(entry, titleMatch.captured(1)); 0296 } 0297 } 0298 0299 static const QRegularExpression coverRx(QLatin1String("<img [^>]*?id='kukulcan'[^>]*?src='([^>]+?)'")); 0300 auto coverMatch = coverRx.match(str_); 0301 if(coverMatch.hasMatch()) { 0302 QString u = coverMatch.captured(1); 0303 QUrl coverUrl = QUrl(QString::fromLatin1(GAMINGHISTORY_BASE_URL)).resolved(QUrl(u)); 0304 0305 const QString id = ImageFactory::addImage(coverUrl, true /* quiet */); 0306 if(id.isEmpty()) { 0307 myDebug() << "Could not load" << coverUrl; 0308 message(i18n("The cover image could not be loaded."), MessageHandler::Warning); 0309 } 0310 // empty image ID is ok 0311 entry->setField(QStringLiteral("cover"), id); 0312 } 0313 } 0314 0315 void GamingHistoryFetcher::parseSingleResult(Data::EntryPtr entry, const QUrl& url_) { 0316 QString results = Tellico::decodeHTML(FileHandler::readTextFile(url_, true, true)); 0317 parseEntry(entry, results); 0318 if(optionalFields().contains(QStringLiteral("gaming-history"))) { 0319 Data::FieldPtr field(new Data::Field(QStringLiteral("gaming-history"), i18n("Gaming History Link"), Data::Field::URL)); 0320 field->setCategory(i18n("General")); 0321 entry->collection()->addField(field); 0322 entry->setField(QStringLiteral("gaming-history"), url_.url()); 0323 } 0324 } 0325 0326 void GamingHistoryFetcher::populatePlatform(Data::EntryPtr entry, const QString& platform_) { 0327 static const QString platformString(QStringLiteral("platform")); 0328 0329 QString platform = platform_; 0330 if(platform.endsWith(QLatin1String(" game")) || 0331 platform.endsWith(QLatin1String(" disc"))) { 0332 platform.chop(5); 0333 } else if(platform.endsWith(QLatin1String(" disk.")) || 0334 platform.endsWith(QLatin1String(" cass.")) || 0335 platform.endsWith(QLatin1String(" cart."))) { 0336 platform.chop(6); 0337 } else if(platform.endsWith(QLatin1String(" CD"))) { 0338 platform.chop(3); 0339 } 0340 0341 Data::FieldPtr platformField = entry->collection()->fieldByName(platformString); 0342 if(platformField && !platformField->allowed().contains(platform)) { 0343 QStringList allowed = platformField->allowed(); 0344 allowed.append(platform); 0345 platformField->setAllowed(allowed); 0346 } 0347 0348 entry->setField(platformString, platform); 0349 } 0350 0351 Tellico::Fetch::FetchRequest GamingHistoryFetcher::updateRequest(Data::EntryPtr entry_) { 0352 QString t = entry_->field(QStringLiteral("title")); 0353 if(!t.isEmpty()) { 0354 return FetchRequest(Fetch::Keyword, t); 0355 } 0356 return FetchRequest(); 0357 } 0358 0359 Tellico::Fetch::ConfigWidget* GamingHistoryFetcher::configWidget(QWidget* parent_) const { 0360 return new GamingHistoryFetcher::ConfigWidget(parent_, this); 0361 } 0362 0363 QString GamingHistoryFetcher::defaultName() { 0364 return QStringLiteral("Gaming History"); 0365 } 0366 0367 QString GamingHistoryFetcher::defaultIcon() { 0368 return favIcon("https://www.arcade-history.com"); 0369 } 0370 0371 //static 0372 Tellico::StringHash GamingHistoryFetcher::allOptionalFields() { 0373 StringHash hash; 0374 hash.insert(QStringLiteral("gaming-history"), i18n("Gaming History Link")); 0375 return hash; 0376 } 0377 0378 void GamingHistoryFetcher::populateYearIndex() { 0379 m_yearIndex.clear(); 0380 m_yearIndex.insert(QStringLiteral("1971"), 1); 0381 m_yearIndex.insert(QStringLiteral("1972"), 2); 0382 m_yearIndex.insert(QStringLiteral("1973"), 3); 0383 m_yearIndex.insert(QStringLiteral("1974"), 4); 0384 m_yearIndex.insert(QStringLiteral("1975"), 5); 0385 m_yearIndex.insert(QStringLiteral("1976"), 6); 0386 m_yearIndex.insert(QStringLiteral("1977"), 7); 0387 m_yearIndex.insert(QStringLiteral("1978"), 8); 0388 m_yearIndex.insert(QStringLiteral("1979"), 9); 0389 m_yearIndex.insert(QStringLiteral("1980"), 11); 0390 m_yearIndex.insert(QStringLiteral("1981"), 12); 0391 m_yearIndex.insert(QStringLiteral("1982"), 13); 0392 m_yearIndex.insert(QStringLiteral("1983"), 14); 0393 m_yearIndex.insert(QStringLiteral("1984"), 15); 0394 m_yearIndex.insert(QStringLiteral("1985"), 16); 0395 m_yearIndex.insert(QStringLiteral("1986"), 17); 0396 m_yearIndex.insert(QStringLiteral("1987"), 18); 0397 m_yearIndex.insert(QStringLiteral("1988"), 19); 0398 m_yearIndex.insert(QStringLiteral("1989"), 20); 0399 m_yearIndex.insert(QStringLiteral("1990"), 22); 0400 m_yearIndex.insert(QStringLiteral("1991"), 23); 0401 m_yearIndex.insert(QStringLiteral("1992"), 24); 0402 m_yearIndex.insert(QStringLiteral("1993"), 25); 0403 m_yearIndex.insert(QStringLiteral("1994"), 26); 0404 m_yearIndex.insert(QStringLiteral("1995"), 27); 0405 m_yearIndex.insert(QStringLiteral("1996"), 28); 0406 m_yearIndex.insert(QStringLiteral("1997"), 29); 0407 m_yearIndex.insert(QStringLiteral("1998"), 30); 0408 m_yearIndex.insert(QStringLiteral("1999"), 31); 0409 m_yearIndex.insert(QStringLiteral("2000"), 34); 0410 m_yearIndex.insert(QStringLiteral("2001"), 35); 0411 m_yearIndex.insert(QStringLiteral("2002"), 36); 0412 m_yearIndex.insert(QStringLiteral("2003"), 37); 0413 m_yearIndex.insert(QStringLiteral("2004"), 38); 0414 m_yearIndex.insert(QStringLiteral("2005"), 39); 0415 m_yearIndex.insert(QStringLiteral("2006"), 44); 0416 m_yearIndex.insert(QStringLiteral("2007"), 107); 0417 m_yearIndex.insert(QStringLiteral("2008"), 150); 0418 m_yearIndex.insert(QStringLiteral("2009"), 151); 0419 m_yearIndex.insert(QStringLiteral("2010"), 163); 0420 m_yearIndex.insert(QStringLiteral("2011"), 165); 0421 m_yearIndex.insert(QStringLiteral("2012"), 168); 0422 m_yearIndex.insert(QStringLiteral("2013"), 170); 0423 m_yearIndex.insert(QStringLiteral("2014"), 171); 0424 m_yearIndex.insert(QStringLiteral("2015"), 172); 0425 m_yearIndex.insert(QStringLiteral("2016"), 173); 0426 m_yearIndex.insert(QStringLiteral("2017"), 174); 0427 m_yearIndex.insert(QStringLiteral("2018"), 175); 0428 m_yearIndex.insert(QStringLiteral("2019"), 176); 0429 m_yearIndex.insert(QStringLiteral("2020"), 178); 0430 m_yearIndex.insert(QStringLiteral("2021"), 179); 0431 m_yearIndex.insert(QStringLiteral("2022"), 180); 0432 m_yearIndex.insert(QStringLiteral("2023"), 181); 0433 } 0434 0435 GamingHistoryFetcher::ConfigWidget::ConfigWidget(QWidget* parent_, const GamingHistoryFetcher* fetcher_) 0436 : Fetch::ConfigWidget(parent_) { 0437 QVBoxLayout* l = new QVBoxLayout(optionsWidget()); 0438 l->addWidget(new QLabel(i18n("This source has no options."), optionsWidget())); 0439 l->addStretch(); 0440 0441 // now add additional fields widget 0442 addFieldsWidget(GamingHistoryFetcher::allOptionalFields(), fetcher_ ? fetcher_->optionalFields() : QStringList()); 0443 } 0444 0445 QString GamingHistoryFetcher::ConfigWidget::preferredName() const { 0446 return GamingHistoryFetcher::defaultName(); 0447 }