File indexing completed on 2024-05-12 05:09:33
0001 /*************************************************************************** 0002 Copyright (C) 2023 Robby Stephenson <robby@periapsis.org> 0003 ***************************************************************************/ 0004 0005 /*************************************************************************** 0006 * * 0007 * This program is free software; you can redistribute it and/or * 0008 * modify it under the terms of the GNU General Public License as * 0009 * published by the Free Software Foundation; either version 2 of * 0010 * the License or (at your option) version 3 or any later version * 0011 * accepted by the membership of KDE e.V. (or its successor approved * 0012 * by the membership of KDE e.V.), which shall act as a proxy * 0013 * defined in Section 14 of version 3 of the license. * 0014 * * 0015 * This program is distributed in the hope that it will be useful, * 0016 * but WITHOUT ANY WARRANTY; without even the implied warranty of * 0017 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * 0018 * GNU General Public License for more details. * 0019 * * 0020 * You should have received a copy of the GNU General Public License * 0021 * along with this program. If not, see <http://www.gnu.org/licenses/>. * 0022 * * 0023 ***************************************************************************/ 0024 0025 #include "filmaffinityfetcher.h" 0026 #include "../utils/guiproxy.h" 0027 #include "../utils/string_utils.h" 0028 #include "../collections/videocollection.h" 0029 #include "../entry.h" 0030 #include "../fieldformat.h" 0031 #include "../core/filehandler.h" 0032 #include "../images/imagefactory.h" 0033 #include "../gui/combobox.h" 0034 #include "../tellico_debug.h" 0035 0036 #include <KLocalizedString> 0037 #include <KIO/Job> 0038 #include <KJobUiDelegate> 0039 #include <KJobWidgets/KJobWidgets> 0040 0041 #include <QRegularExpression> 0042 #include <QLabel> 0043 #include <QFile> 0044 #include <QTextStream> 0045 #include <QGridLayout> 0046 #include <QSpinBox> 0047 #include <QUrlQuery> 0048 #include <QStandardPaths> 0049 0050 namespace { 0051 static const char* FILMAFFINITY_SEARCH_URL = "https://www.filmaffinity.com"; 0052 static const uint FILMAFFINITY_DEFAULT_CAST_SIZE = 10; 0053 } 0054 0055 using namespace Tellico; 0056 using Tellico::Fetch::FilmAffinityFetcher; 0057 0058 FilmAffinityFetcher::FilmAffinityFetcher(QObject* parent_) 0059 : Fetcher(parent_), m_started(false), m_locale(ES), m_numCast(FILMAFFINITY_DEFAULT_CAST_SIZE) { 0060 } 0061 0062 FilmAffinityFetcher::~FilmAffinityFetcher() { 0063 } 0064 0065 // static 0066 const FilmAffinityFetcher::LocaleData& FilmAffinityFetcher::localeData(int locale_) { 0067 Q_ASSERT(locale_ >= 0); 0068 Q_ASSERT(locale_ < 2); 0069 static LocaleData dataVector[6] = { 0070 { 0071 QStringLiteral("es"), 0072 QStringLiteral("(Serie de TV)"), 0073 QString::fromUtf8("Año"), 0074 QStringLiteral("Título original"), 0075 QStringLiteral("País"), 0076 QString::fromUtf8("Duración"), 0077 QString::fromUtf8("Dirección"), 0078 QStringLiteral("Reparto"), 0079 QString::fromUtf8("Género"), 0080 QStringLiteral("Guion"), 0081 QStringLiteral("Historia:"), 0082 QString::fromUtf8("Compañías"), 0083 QStringLiteral("Distribuidora"), 0084 QStringLiteral("Emitida por:"), 0085 QString::fromUtf8("Música"), 0086 QStringLiteral("Sinopsis") 0087 }, 0088 { 0089 QStringLiteral("us"), 0090 QStringLiteral("(TV Series)"), 0091 QStringLiteral("Year"), 0092 QStringLiteral("Original title"), 0093 QStringLiteral("Country"), 0094 QStringLiteral("Running time"), 0095 QStringLiteral("Director"), 0096 QStringLiteral("Cast"), 0097 QStringLiteral("Genre"), 0098 QStringLiteral("Screenwriter"), 0099 QStringLiteral("Story:"), 0100 QStringLiteral("Producer"), 0101 QStringLiteral("Distributor:"), 0102 QStringLiteral("Broadcast by:"), 0103 QStringLiteral("Music"), 0104 QStringLiteral("Synopsis") 0105 } 0106 }; 0107 0108 return dataVector[qBound(0, locale_, static_cast<int>(sizeof(dataVector)/sizeof(LocaleData)))]; 0109 } 0110 0111 QString FilmAffinityFetcher::source() const { 0112 return m_name.isEmpty() ? defaultName() : m_name; 0113 } 0114 0115 bool FilmAffinityFetcher::canFetch(int type) const { 0116 return type == Data::Collection::Video; 0117 } 0118 0119 bool FilmAffinityFetcher::canSearch(Fetch::FetchKey k) const { 0120 return k == Title; 0121 } 0122 0123 void FilmAffinityFetcher::readConfigHook(const KConfigGroup& config_) { 0124 const int locale = config_.readEntry("Locale", int(ES)); 0125 m_locale = static_cast<Locale>(locale); 0126 m_numCast = config_.readEntry("Max Cast", FILMAFFINITY_DEFAULT_CAST_SIZE); 0127 } 0128 0129 void FilmAffinityFetcher::search() { 0130 m_started = true; 0131 m_matches.clear(); 0132 0133 QUrl u(QString::fromLatin1(FILMAFFINITY_SEARCH_URL)); 0134 u.setPath(QLatin1String("/") + localeData(m_locale).siteSlug + QLatin1String("/advsearch.php")); 0135 QString searchValue = request().value(); 0136 QUrlQuery q; 0137 // extract the year from the end of the search string, accept the posible corner case of a movie 0138 // having some other year in the title? 0139 QRegularExpression yearRx(QStringLiteral("\\s(19|20)\\d\\d$")); 0140 auto match = yearRx.match(searchValue); 0141 if(match.hasMatch()) { 0142 searchValue.remove(match.captured()); 0143 const auto& year = match.captured().simplified(); 0144 q.addQueryItem(QStringLiteral("fromyear"), year); 0145 q.addQueryItem(QStringLiteral("toyear"), year); 0146 } 0147 q.addQueryItem(QStringLiteral("stext"), searchValue); 0148 0149 switch(request().key()) { 0150 case Title: 0151 //q.addQueryItem(QStringLiteral("year"), QStringLiteral("yes")); 0152 q.addQueryItem(QStringLiteral("stype[]"), QLatin1String("title")); 0153 break; 0154 0155 default: 0156 myWarning() << source() << "- key not recognized:" << request().key(); 0157 stop(); 0158 return; 0159 } 0160 u.setQuery(q); 0161 myLog() << "Reading" << u.toDisplayString(); 0162 0163 m_job = KIO::storedGet(u, KIO::NoReload, KIO::HideProgressInfo); 0164 KJobWidgets::setWindow(m_job, GUI::Proxy::widget()); 0165 connect(m_job.data(), &KJob::result, this, &FilmAffinityFetcher::slotComplete); 0166 } 0167 0168 void FilmAffinityFetcher::stop() { 0169 if(!m_started) { 0170 return; 0171 } 0172 0173 if(m_job) { 0174 m_job->kill(); 0175 m_job = nullptr; 0176 } 0177 m_started = false; 0178 emit signalDone(this); 0179 } 0180 0181 void FilmAffinityFetcher::slotComplete(KJob*) { 0182 if(m_job->error()) { 0183 m_job->uiDelegate()->showErrorMessage(); 0184 stop(); 0185 return; 0186 } 0187 0188 QByteArray data = m_job->data(); 0189 if(data.isEmpty()) { 0190 myDebug() << "no data"; 0191 stop(); 0192 return; 0193 } 0194 0195 const QString output = Tellico::decodeHTML(data); 0196 #if 0 0197 myWarning() << "Remove debug from filmaffinityfetcher.cpp"; 0198 QFile f(QStringLiteral("/tmp/test1.html")); 0199 if(f.open(QIODevice::WriteOnly)) { 0200 QTextStream t(&f); 0201 t.setCodec("UTF-8"); 0202 t << output; 0203 } 0204 f.close(); 0205 #endif 0206 0207 // look for a specific div, with an href and title, sometime uses single-quote, sometimes double-quotes 0208 QRegularExpression resultRx(QStringLiteral("<div class=\"fa-shadow adv-search-item\">(.+?)<div class=\"mc-actions\">"), 0209 QRegularExpression::DotMatchesEverythingOption); 0210 QRegularExpression titleRx(QStringLiteral("<a\\s+href=\"(.+?)\"\\s+title=\"(.+?)\">(.+?)<img")); 0211 // the year is within the title text as a 4-digit number, starting with 1 or 2 0212 QRegularExpression yearRx(QStringLiteral("\\(([12]\\d\\d\\d)\\)")); 0213 0214 QString href, title, year; 0215 QRegularExpressionMatchIterator i = resultRx.globalMatch(output); 0216 while(i.hasNext() && m_started) { 0217 auto topMatch = i.next(); 0218 auto anchorMatch = titleRx.match(topMatch.captured(1)); 0219 if(anchorMatch.hasMatch()) { 0220 href = anchorMatch.captured(1); 0221 title = anchorMatch.captured(2).trimmed(); 0222 auto yearMatch = yearRx.match(anchorMatch.captured(3)); 0223 if(yearMatch.hasMatch()) { 0224 year = yearMatch.captured(1); 0225 } 0226 } 0227 if(!href.isEmpty()) { 0228 QUrl url(QString::fromLatin1(FILMAFFINITY_SEARCH_URL)); 0229 url = url.resolved(QUrl(href)); 0230 // myDebug() << url << title << year; 0231 FetchResult* r = new FetchResult(this, title, year); 0232 m_matches.insert(r->uid, url); 0233 emit signalResultFound(r); 0234 } 0235 } 0236 0237 // since the fetch is done, don't worry about holding the job pointer 0238 m_job = nullptr; 0239 stop(); 0240 } 0241 0242 Tellico::Data::EntryPtr FilmAffinityFetcher::fetchEntryHook(uint uid_) { 0243 // if we already grabbed this one, then just pull it out of the dict 0244 Data::EntryPtr entry = m_entries[uid_]; 0245 if(entry) { 0246 return entry; 0247 } 0248 0249 QUrl url = m_matches[uid_]; 0250 if(url.isEmpty()) { 0251 myWarning() << "no url in map"; 0252 return Data::EntryPtr(); 0253 } 0254 0255 const QString results = Tellico::decodeHTML(FileHandler::readDataFile(url, true)); 0256 if(results.isEmpty()) { 0257 myDebug() << "no text results"; 0258 return Data::EntryPtr(); 0259 } 0260 0261 #if 0 0262 myDebug() << url.url(); 0263 myWarning() << "Remove debug2 from filmaffinityfetcher.cpp"; 0264 QFile f(QStringLiteral("/tmp/test-filmaffinity.html")); 0265 if(f.open(QIODevice::WriteOnly)) { 0266 QTextStream t(&f); 0267 t.setCodec("UTF-8"); 0268 t << results; 0269 } 0270 f.close(); 0271 #endif 0272 0273 entry = parseEntry(results); 0274 if(!entry) { 0275 myDebug() << "error in processing entry"; 0276 return Data::EntryPtr(); 0277 } 0278 0279 const QString fa = QStringLiteral("filmaffinity"); 0280 if(optionalFields().contains(fa)) { 0281 Data::FieldPtr field(new Data::Field(fa, i18n("FilmAffinity Link"), Data::Field::URL)); 0282 field->setCategory(i18n("General")); 0283 entry->collection()->addField(field); 0284 entry->setField(fa, url.url()); 0285 } 0286 0287 m_entries.insert(uid_, entry); // keep for later 0288 return entry; 0289 } 0290 0291 Tellico::Data::EntryPtr FilmAffinityFetcher::parseEntry(const QString& str_) { 0292 Data::CollPtr coll(new Data::VideoCollection(true)); 0293 Data::EntryPtr entry(new Data::Entry(coll)); 0294 coll->addEntries(entry); 0295 0296 const LocaleData& data = localeData(m_locale); 0297 0298 QRegularExpression titleRx(QStringLiteral("<span itemprop=\"name\">(.+?)</span")); 0299 QRegularExpressionMatch match = titleRx.match(str_); 0300 if(match.hasMatch()) { 0301 // remove anything in parentheses 0302 QString title = match.captured(1).simplified(); 0303 title.remove(data.tvSeries); 0304 title = title.trimmed(); 0305 entry->setField(QStringLiteral("title"), title); 0306 } 0307 0308 const QString origtitle = QStringLiteral("origtitle"); 0309 QRegularExpression tagRx(QStringLiteral("<.+?>")); 0310 QRegularExpression spanRx(QStringLiteral("<span.*?>(.+?),*\\s*</span")); 0311 QRegularExpression divRx(QStringLiteral("<div [^>]*?class=\"name\"[^>]*?>(.+?)</div")); 0312 QRegularExpression defRx(QStringLiteral("<dt>(.+?)</dt>\\s*?<dd.*?>(.+?)</dd>"), 0313 QRegularExpression::DotMatchesEverythingOption); 0314 QRegularExpressionMatchIterator i = defRx.globalMatch(str_); 0315 while(i.hasNext()) { 0316 auto match = i.next(); 0317 const auto& term = match.captured(1); 0318 if(term == data.year) { 0319 entry->setField(QStringLiteral("year"), match.captured(2).trimmed()); 0320 } else if(term == data.origTitle && 0321 optionalFields().contains(origtitle)) { 0322 Data::FieldPtr f(new Data::Field(origtitle, i18n("Original Title"))); 0323 f->setFormatType(FieldFormat::FormatTitle); 0324 coll->addField(f); 0325 // might have an aka in a span 0326 QString oTitle = match.captured(2); 0327 const int start = oTitle.indexOf(QLatin1String("<span")); 0328 if(start > -1) oTitle = oTitle.left(start); 0329 entry->setField(origtitle, oTitle.remove(tagRx).simplified()); 0330 } else if(term == data.runningTime) { 0331 QRegularExpression timeRx(QStringLiteral("\\d+")); 0332 auto timeMatch = timeRx.match(match.captured(2)); 0333 if(timeMatch.hasMatch()) { 0334 entry->setField(QStringLiteral("running-time"), timeMatch.captured()); 0335 } 0336 } else if(term == data.country) { 0337 QRegularExpression countryRx(QStringLiteral("alt=\"(.+?)\"")); 0338 auto countryMatch = countryRx.match(match.captured(2)); 0339 if(countryMatch.hasMatch()) { 0340 entry->setField(QStringLiteral("nationality"), countryMatch.captured(1)); 0341 } 0342 } else if(term == data.director) { 0343 QStringList directors; 0344 auto iSpan = spanRx.globalMatch(match.captured(2)); 0345 while(iSpan.hasNext()) { 0346 auto spanMatch = iSpan.next(); 0347 directors += spanMatch.captured(1).remove(tagRx).simplified(); 0348 } 0349 if(!directors.isEmpty()) { 0350 entry->setField(QStringLiteral("director"), directors.join(FieldFormat::delimiterString())); 0351 } 0352 } else if(term == data.cast) { 0353 QStringList cast; 0354 const auto& captured = match.captured(2); 0355 // only read up to the hidden credits 0356 auto end = captured.indexOf(QLatin1String("hidden-credit")); 0357 if(end == -1) end = captured.indexOf(QLatin1String("see-more-cre")); 0358 if(end == -1) end = captured.size(); 0359 auto iDiv = divRx.globalMatch(captured.left(end)); 0360 while(iDiv.hasNext() && cast.size() < m_numCast) { 0361 auto spanMatch = iDiv.next(); 0362 cast += spanMatch.captured(1).remove(tagRx).simplified(); 0363 } 0364 if(!cast.isEmpty()) { 0365 entry->setField(QStringLiteral("cast"), cast.join(FieldFormat::rowDelimiterString())); 0366 } 0367 } else if(term == data.genre) { 0368 QStringList genres; 0369 auto iSpan = spanRx.globalMatch(match.captured(2)); 0370 while(iSpan.hasNext()) { 0371 auto spanMatch = iSpan.next(); 0372 genres += spanMatch.captured(1).remove(tagRx).simplified(); 0373 } 0374 if(!genres.isEmpty()) { 0375 entry->setField(QStringLiteral("genre"), genres.join(FieldFormat::delimiterString())); 0376 } 0377 } else if(term == data.writer) { 0378 QStringList writers; 0379 const auto& captured = match.captured(2); 0380 // skip ahead to "Story" 0381 const auto start = captured.indexOf(data.story); 0382 auto iSpan = spanRx.globalMatch(captured.mid(qMax(0,start))); 0383 while(iSpan.hasNext()) { 0384 auto spanMatch = iSpan.next(); 0385 writers += spanMatch.captured(1).remove(tagRx).simplified(); 0386 } 0387 if(!writers.isEmpty()) { 0388 entry->setField(QStringLiteral("writer"), writers.join(FieldFormat::delimiterString())); 0389 } 0390 } else if(term == data.producer) { 0391 // producer seems to be all the studio, use distributor as the main 0392 QStringList studios; 0393 const auto& captured = match.captured(2); 0394 // skip ahead to "Story" 0395 const auto start1 = captured.indexOf(data.distributor); 0396 const auto start2 = captured.indexOf(data.broadcast); 0397 auto iSpan = spanRx.globalMatch(captured.mid(qMax(0,qMax(start1,start2)))); 0398 while(iSpan.hasNext()) { 0399 auto spanMatch = iSpan.next(); 0400 studios += spanMatch.captured(1).remove(tagRx).simplified(); 0401 } 0402 if(!studios.isEmpty()) { 0403 entry->setField(QStringLiteral("studio"), studios.join(FieldFormat::delimiterString())); 0404 } 0405 } else if(term == data.music) { 0406 entry->setField(QStringLiteral("composer"), match.captured(2).remove(tagRx).trimmed()); 0407 } else if(term == data.plot) { 0408 entry->setField(QStringLiteral("plot"), match.captured(2).trimmed()); 0409 } 0410 } 0411 0412 QString cover; 0413 QRegularExpression coverRx(QStringLiteral("<img\\s.*?itemprop=\"image\".+?src=\"(.+?)\".*?>")); 0414 match = coverRx.match(str_); 0415 if(match.hasMatch()) { 0416 cover = match.captured(1); 0417 } else { 0418 coverRx.setPattern(QStringLiteral("<meta property=\"og:image\" content=\"(.+?)\"")); 0419 match = coverRx.match(str_); 0420 if(match.hasMatch()) { 0421 cover = match.captured(1); 0422 } 0423 } 0424 if(!cover.isEmpty()) { 0425 // myDebug() << "cover:" << cover; 0426 const QString id = ImageFactory::addImage(QUrl::fromUserInput(cover), true /* quiet */); 0427 if(id.isEmpty()) { 0428 message(i18n("The cover image could not be loaded."), MessageHandler::Warning); 0429 } 0430 // empty image ID is ok 0431 entry->setField(QStringLiteral("cover"), id); 0432 } 0433 0434 return entry; 0435 } 0436 0437 Tellico::Fetch::FetchRequest FilmAffinityFetcher::updateRequest(Data::EntryPtr entry_) { 0438 QString t = entry_->field(QStringLiteral("title")); 0439 if(!t.isEmpty()) { 0440 return FetchRequest(Fetch::Title, t); 0441 } 0442 return FetchRequest(); 0443 } 0444 0445 Tellico::Fetch::ConfigWidget* FilmAffinityFetcher::configWidget(QWidget* parent_) const { 0446 return new FilmAffinityFetcher::ConfigWidget(parent_); 0447 } 0448 0449 QString FilmAffinityFetcher::defaultName() { 0450 return QStringLiteral("FilmAffinity"); 0451 } 0452 0453 QString FilmAffinityFetcher::defaultIcon() { 0454 return favIcon("https://www.filmaffinity.com"); 0455 } 0456 0457 Tellico::StringHash FilmAffinityFetcher::allOptionalFields() { 0458 StringHash hash; 0459 hash[QStringLiteral("origtitle")] = i18n("Original Title"); 0460 hash[QStringLiteral("filmaffinity")] = i18n("FilmAffinity Link"); 0461 return hash; 0462 } 0463 0464 FilmAffinityFetcher::ConfigWidget::ConfigWidget(QWidget* parent_, const FilmAffinityFetcher* fetcher_) 0465 : Fetch::ConfigWidget(parent_) { 0466 QGridLayout* l = new QGridLayout(optionsWidget()); 0467 l->setSpacing(4); 0468 l->setColumnStretch(1, 10); 0469 0470 int row = -1; 0471 0472 QLabel* label = new QLabel(i18n("&Maximum cast: "), optionsWidget()); 0473 l->addWidget(label, ++row, 0); 0474 m_numCast = new QSpinBox(optionsWidget()); 0475 m_numCast->setMaximum(99); 0476 m_numCast->setMinimum(0); 0477 m_numCast->setValue(FILMAFFINITY_DEFAULT_CAST_SIZE); 0478 #if (QT_VERSION < QT_VERSION_CHECK(5, 14, 0)) 0479 void (QSpinBox::* textChanged)(const QString&) = &QSpinBox::valueChanged; 0480 #else 0481 void (QSpinBox::* textChanged)(const QString&) = &QSpinBox::textChanged; 0482 #endif 0483 connect(m_numCast, textChanged, this, &ConfigWidget::slotSetModified); 0484 l->addWidget(m_numCast, row, 1); 0485 QString w = i18n("The list of cast members may include many people. Set the maximum number returned from the search."); 0486 label->setWhatsThis(w); 0487 m_numCast->setWhatsThis(w); 0488 label->setBuddy(m_numCast); 0489 0490 label = new QLabel(i18n("Language: "), optionsWidget()); 0491 l->addWidget(label, ++row, 0); 0492 m_localeCombo = new GUI::ComboBox(optionsWidget()); 0493 QIcon iconES(QStandardPaths::locate(QStandardPaths::GenericDataLocation, 0494 QStringLiteral("kf5/locale/countries/es/flag.png"))); 0495 m_localeCombo->addItem(iconES, i18nc("Country", "Spain"), int(FilmAffinityFetcher::ES)); 0496 QIcon iconUS(QStandardPaths::locate(QStandardPaths::GenericDataLocation, 0497 QStringLiteral("kf5/locale/countries/us/flag.png"))); 0498 m_localeCombo->addItem(iconUS, i18nc("Country", "USA"), int(FilmAffinityFetcher::US)); 0499 void (GUI::ComboBox::* activatedInt)(int) = &GUI::ComboBox::activated; 0500 connect(m_localeCombo, activatedInt, this, &ConfigWidget::slotSetModified); 0501 l->addWidget(m_localeCombo, row, 1); 0502 label->setBuddy(m_localeCombo); 0503 0504 l->setRowStretch(++row, 10); 0505 0506 addFieldsWidget(FilmAffinityFetcher::allOptionalFields(), fetcher_ ? fetcher_->optionalFields() : QStringList()); 0507 0508 if(fetcher_) { 0509 m_localeCombo->setCurrentData(fetcher_->m_locale); 0510 m_numCast->setValue(fetcher_->m_numCast); 0511 } 0512 } 0513 0514 void FilmAffinityFetcher::ConfigWidget::saveConfigHook(KConfigGroup& config_) { 0515 config_.writeEntry("Locale", m_localeCombo->currentData().toInt()); 0516 config_.writeEntry("Max Cast", m_numCast->value()); 0517 } 0518 0519 QString FilmAffinityFetcher::ConfigWidget::preferredName() const { 0520 return FilmAffinityFetcher::defaultName(); 0521 }