File indexing completed on 2024-05-12 05:09:33

0001 /***************************************************************************
0002     Copyright (C) 2023 Robby Stephenson <robby@periapsis.org>
0003  ***************************************************************************/
0004 
0005 /***************************************************************************
0006  *                                                                         *
0007  *   This program is free software; you can redistribute it and/or         *
0008  *   modify it under the terms of the GNU General Public License as        *
0009  *   published by the Free Software Foundation; either version 2 of        *
0010  *   the License or (at your option) version 3 or any later version        *
0011  *   accepted by the membership of KDE e.V. (or its successor approved     *
0012  *   by the membership of KDE e.V.), which shall act as a proxy            *
0013  *   defined in Section 14 of version 3 of the license.                    *
0014  *                                                                         *
0015  *   This program is distributed in the hope that it will be useful,       *
0016  *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
0017  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
0018  *   GNU General Public License for more details.                          *
0019  *                                                                         *
0020  *   You should have received a copy of the GNU General Public License     *
0021  *   along with this program.  If not, see <http://www.gnu.org/licenses/>. *
0022  *                                                                         *
0023  ***************************************************************************/
0024 
0025 #include "filmaffinityfetcher.h"
0026 #include "../utils/guiproxy.h"
0027 #include "../utils/string_utils.h"
0028 #include "../collections/videocollection.h"
0029 #include "../entry.h"
0030 #include "../fieldformat.h"
0031 #include "../core/filehandler.h"
0032 #include "../images/imagefactory.h"
0033 #include "../gui/combobox.h"
0034 #include "../tellico_debug.h"
0035 
0036 #include <KLocalizedString>
0037 #include <KIO/Job>
0038 #include <KJobUiDelegate>
0039 #include <KJobWidgets/KJobWidgets>
0040 
0041 #include <QRegularExpression>
0042 #include <QLabel>
0043 #include <QFile>
0044 #include <QTextStream>
0045 #include <QGridLayout>
0046 #include <QSpinBox>
0047 #include <QUrlQuery>
0048 #include <QStandardPaths>
0049 
0050 namespace {
0051   static const char* FILMAFFINITY_SEARCH_URL = "https://www.filmaffinity.com";
0052   static const uint FILMAFFINITY_DEFAULT_CAST_SIZE = 10;
0053 }
0054 
0055 using namespace Tellico;
0056 using Tellico::Fetch::FilmAffinityFetcher;
0057 
0058 FilmAffinityFetcher::FilmAffinityFetcher(QObject* parent_)
0059     : Fetcher(parent_), m_started(false), m_locale(ES), m_numCast(FILMAFFINITY_DEFAULT_CAST_SIZE) {
0060 }
0061 
0062 FilmAffinityFetcher::~FilmAffinityFetcher() {
0063 }
0064 
0065 // static
0066 const FilmAffinityFetcher::LocaleData& FilmAffinityFetcher::localeData(int locale_) {
0067   Q_ASSERT(locale_ >= 0);
0068   Q_ASSERT(locale_ <  2);
0069   static LocaleData dataVector[6] = {
0070     {
0071       QStringLiteral("es"),
0072       QStringLiteral("(Serie de TV)"),
0073       QString::fromUtf8("Año"),
0074       QStringLiteral("Título original"),
0075       QStringLiteral("País"),
0076       QString::fromUtf8("Duración"),
0077       QString::fromUtf8("Dirección"),
0078       QStringLiteral("Reparto"),
0079       QString::fromUtf8("Género"),
0080       QStringLiteral("Guion"),
0081       QStringLiteral("Historia:"),
0082       QString::fromUtf8("Compañías"),
0083       QStringLiteral("Distribuidora"),
0084       QStringLiteral("Emitida por:"),
0085       QString::fromUtf8("Música"),
0086       QStringLiteral("Sinopsis")
0087     },
0088     {
0089       QStringLiteral("us"),
0090       QStringLiteral("(TV Series)"),
0091       QStringLiteral("Year"),
0092       QStringLiteral("Original title"),
0093       QStringLiteral("Country"),
0094       QStringLiteral("Running time"),
0095       QStringLiteral("Director"),
0096       QStringLiteral("Cast"),
0097       QStringLiteral("Genre"),
0098       QStringLiteral("Screenwriter"),
0099       QStringLiteral("Story:"),
0100       QStringLiteral("Producer"),
0101       QStringLiteral("Distributor:"),
0102       QStringLiteral("Broadcast by:"),
0103       QStringLiteral("Music"),
0104       QStringLiteral("Synopsis")
0105     }
0106   };
0107 
0108   return dataVector[qBound(0, locale_, static_cast<int>(sizeof(dataVector)/sizeof(LocaleData)))];
0109 }
0110 
0111 QString FilmAffinityFetcher::source() const {
0112   return m_name.isEmpty() ? defaultName() : m_name;
0113 }
0114 
0115 bool FilmAffinityFetcher::canFetch(int type) const {
0116   return type == Data::Collection::Video;
0117 }
0118 
0119 bool FilmAffinityFetcher::canSearch(Fetch::FetchKey k) const {
0120   return k == Title;
0121 }
0122 
0123 void FilmAffinityFetcher::readConfigHook(const KConfigGroup& config_) {
0124   const int locale = config_.readEntry("Locale", int(ES));
0125   m_locale = static_cast<Locale>(locale);
0126   m_numCast = config_.readEntry("Max Cast", FILMAFFINITY_DEFAULT_CAST_SIZE);
0127 }
0128 
0129 void FilmAffinityFetcher::search() {
0130   m_started = true;
0131   m_matches.clear();
0132 
0133   QUrl u(QString::fromLatin1(FILMAFFINITY_SEARCH_URL));
0134   u.setPath(QLatin1String("/") + localeData(m_locale).siteSlug + QLatin1String("/advsearch.php"));
0135   QString searchValue = request().value();
0136   QUrlQuery q;
0137   // extract the year from the end of the search string, accept the posible corner case of a movie
0138   // having some other year in the title?
0139   QRegularExpression yearRx(QStringLiteral("\\s(19|20)\\d\\d$"));
0140   auto match = yearRx.match(searchValue);
0141   if(match.hasMatch()) {
0142     searchValue.remove(match.captured());
0143     const auto& year = match.captured().simplified();
0144     q.addQueryItem(QStringLiteral("fromyear"), year);
0145     q.addQueryItem(QStringLiteral("toyear"), year);
0146   }
0147   q.addQueryItem(QStringLiteral("stext"), searchValue);
0148 
0149   switch(request().key()) {
0150     case Title:
0151       //q.addQueryItem(QStringLiteral("year"), QStringLiteral("yes"));
0152       q.addQueryItem(QStringLiteral("stype[]"), QLatin1String("title"));
0153       break;
0154 
0155     default:
0156       myWarning() << source() << "- key not recognized:" << request().key();
0157       stop();
0158       return;
0159   }
0160   u.setQuery(q);
0161   myLog() << "Reading" << u.toDisplayString();
0162 
0163   m_job = KIO::storedGet(u, KIO::NoReload, KIO::HideProgressInfo);
0164   KJobWidgets::setWindow(m_job, GUI::Proxy::widget());
0165   connect(m_job.data(), &KJob::result, this, &FilmAffinityFetcher::slotComplete);
0166 }
0167 
0168 void FilmAffinityFetcher::stop() {
0169   if(!m_started) {
0170     return;
0171   }
0172 
0173   if(m_job) {
0174     m_job->kill();
0175     m_job = nullptr;
0176   }
0177   m_started = false;
0178   emit signalDone(this);
0179 }
0180 
0181 void FilmAffinityFetcher::slotComplete(KJob*) {
0182   if(m_job->error()) {
0183     m_job->uiDelegate()->showErrorMessage();
0184     stop();
0185     return;
0186   }
0187 
0188   QByteArray data = m_job->data();
0189   if(data.isEmpty()) {
0190     myDebug() << "no data";
0191     stop();
0192     return;
0193   }
0194 
0195   const QString output = Tellico::decodeHTML(data);
0196 #if 0
0197   myWarning() << "Remove debug from filmaffinityfetcher.cpp";
0198   QFile f(QStringLiteral("/tmp/test1.html"));
0199   if(f.open(QIODevice::WriteOnly)) {
0200     QTextStream t(&f);
0201     t.setCodec("UTF-8");
0202     t << output;
0203   }
0204   f.close();
0205 #endif
0206 
0207   // look for a specific div, with an href and title, sometime uses single-quote, sometimes double-quotes
0208   QRegularExpression resultRx(QStringLiteral("<div class=\"fa-shadow adv-search-item\">(.+?)<div class=\"mc-actions\">"),
0209                               QRegularExpression::DotMatchesEverythingOption);
0210   QRegularExpression titleRx(QStringLiteral("<a\\s+href=\"(.+?)\"\\s+title=\"(.+?)\">(.+?)<img"));
0211   // the year is within the title text as a 4-digit number, starting with 1 or 2
0212   QRegularExpression yearRx(QStringLiteral("\\(([12]\\d\\d\\d)\\)"));
0213 
0214   QString href, title, year;
0215   QRegularExpressionMatchIterator i = resultRx.globalMatch(output);
0216   while(i.hasNext() && m_started) {
0217     auto topMatch = i.next();
0218     auto anchorMatch = titleRx.match(topMatch.captured(1));
0219     if(anchorMatch.hasMatch()) {
0220       href = anchorMatch.captured(1);
0221       title = anchorMatch.captured(2).trimmed();
0222       auto yearMatch = yearRx.match(anchorMatch.captured(3));
0223       if(yearMatch.hasMatch()) {
0224         year = yearMatch.captured(1);
0225       }
0226     }
0227     if(!href.isEmpty()) {
0228       QUrl url(QString::fromLatin1(FILMAFFINITY_SEARCH_URL));
0229       url = url.resolved(QUrl(href));
0230 //      myDebug() << url << title << year;
0231       FetchResult* r = new FetchResult(this, title, year);
0232       m_matches.insert(r->uid, url);
0233       emit signalResultFound(r);
0234     }
0235   }
0236 
0237   // since the fetch is done, don't worry about holding the job pointer
0238   m_job = nullptr;
0239   stop();
0240 }
0241 
0242 Tellico::Data::EntryPtr FilmAffinityFetcher::fetchEntryHook(uint uid_) {
0243   // if we already grabbed this one, then just pull it out of the dict
0244   Data::EntryPtr entry = m_entries[uid_];
0245   if(entry) {
0246     return entry;
0247   }
0248 
0249   QUrl url = m_matches[uid_];
0250   if(url.isEmpty()) {
0251     myWarning() << "no url in map";
0252     return Data::EntryPtr();
0253   }
0254 
0255   const QString results = Tellico::decodeHTML(FileHandler::readDataFile(url, true));
0256   if(results.isEmpty()) {
0257     myDebug() << "no text results";
0258     return Data::EntryPtr();
0259   }
0260 
0261 #if 0
0262   myDebug() << url.url();
0263   myWarning() << "Remove debug2 from filmaffinityfetcher.cpp";
0264   QFile f(QStringLiteral("/tmp/test-filmaffinity.html"));
0265   if(f.open(QIODevice::WriteOnly)) {
0266     QTextStream t(&f);
0267     t.setCodec("UTF-8");
0268     t << results;
0269   }
0270   f.close();
0271 #endif
0272 
0273   entry = parseEntry(results);
0274   if(!entry) {
0275     myDebug() << "error in processing entry";
0276     return Data::EntryPtr();
0277   }
0278 
0279   const QString fa = QStringLiteral("filmaffinity");
0280   if(optionalFields().contains(fa)) {
0281     Data::FieldPtr field(new Data::Field(fa, i18n("FilmAffinity Link"), Data::Field::URL));
0282     field->setCategory(i18n("General"));
0283     entry->collection()->addField(field);
0284     entry->setField(fa, url.url());
0285   }
0286 
0287   m_entries.insert(uid_, entry); // keep for later
0288   return entry;
0289 }
0290 
0291 Tellico::Data::EntryPtr FilmAffinityFetcher::parseEntry(const QString& str_) {
0292   Data::CollPtr coll(new Data::VideoCollection(true));
0293   Data::EntryPtr entry(new Data::Entry(coll));
0294   coll->addEntries(entry);
0295 
0296   const LocaleData& data = localeData(m_locale);
0297 
0298   QRegularExpression titleRx(QStringLiteral("<span itemprop=\"name\">(.+?)</span"));
0299   QRegularExpressionMatch match = titleRx.match(str_);
0300   if(match.hasMatch()) {
0301     // remove anything in parentheses
0302     QString title = match.captured(1).simplified();
0303     title.remove(data.tvSeries);
0304     title = title.trimmed();
0305     entry->setField(QStringLiteral("title"), title);
0306   }
0307 
0308   const QString origtitle = QStringLiteral("origtitle");
0309   QRegularExpression tagRx(QStringLiteral("<.+?>"));
0310   QRegularExpression spanRx(QStringLiteral("<span.*?>(.+?),*\\s*</span"));
0311   QRegularExpression divRx(QStringLiteral("<div [^>]*?class=\"name\"[^>]*?>(.+?)</div"));
0312   QRegularExpression defRx(QStringLiteral("<dt>(.+?)</dt>\\s*?<dd.*?>(.+?)</dd>"),
0313                            QRegularExpression::DotMatchesEverythingOption);
0314   QRegularExpressionMatchIterator i = defRx.globalMatch(str_);
0315   while(i.hasNext()) {
0316     auto match = i.next();
0317     const auto& term = match.captured(1);
0318     if(term == data.year) {
0319       entry->setField(QStringLiteral("year"), match.captured(2).trimmed());
0320     } else if(term == data.origTitle &&
0321               optionalFields().contains(origtitle)) {
0322       Data::FieldPtr f(new Data::Field(origtitle, i18n("Original Title")));
0323       f->setFormatType(FieldFormat::FormatTitle);
0324       coll->addField(f);
0325       // might have an aka in a span
0326       QString oTitle = match.captured(2);
0327       const int start = oTitle.indexOf(QLatin1String("<span"));
0328       if(start > -1) oTitle = oTitle.left(start);
0329       entry->setField(origtitle, oTitle.remove(tagRx).simplified());
0330     } else if(term == data.runningTime) {
0331       QRegularExpression timeRx(QStringLiteral("\\d+"));
0332       auto timeMatch = timeRx.match(match.captured(2));
0333       if(timeMatch.hasMatch()) {
0334         entry->setField(QStringLiteral("running-time"), timeMatch.captured());
0335       }
0336     } else if(term == data.country) {
0337       QRegularExpression countryRx(QStringLiteral("alt=\"(.+?)\""));
0338       auto countryMatch = countryRx.match(match.captured(2));
0339       if(countryMatch.hasMatch()) {
0340         entry->setField(QStringLiteral("nationality"), countryMatch.captured(1));
0341       }
0342     } else if(term == data.director) {
0343       QStringList directors;
0344       auto iSpan = spanRx.globalMatch(match.captured(2));
0345       while(iSpan.hasNext()) {
0346         auto spanMatch = iSpan.next();
0347         directors += spanMatch.captured(1).remove(tagRx).simplified();
0348       }
0349       if(!directors.isEmpty()) {
0350         entry->setField(QStringLiteral("director"), directors.join(FieldFormat::delimiterString()));
0351       }
0352     } else if(term == data.cast) {
0353       QStringList cast;
0354       const auto& captured = match.captured(2);
0355       // only read up to the hidden credits
0356       auto end = captured.indexOf(QLatin1String("hidden-credit"));
0357       if(end == -1) end = captured.indexOf(QLatin1String("see-more-cre"));
0358       if(end == -1) end = captured.size();
0359       auto iDiv = divRx.globalMatch(captured.left(end));
0360       while(iDiv.hasNext() && cast.size() < m_numCast) {
0361         auto spanMatch = iDiv.next();
0362         cast += spanMatch.captured(1).remove(tagRx).simplified();
0363       }
0364       if(!cast.isEmpty()) {
0365         entry->setField(QStringLiteral("cast"), cast.join(FieldFormat::rowDelimiterString()));
0366       }
0367     } else if(term == data.genre) {
0368       QStringList genres;
0369       auto iSpan = spanRx.globalMatch(match.captured(2));
0370       while(iSpan.hasNext()) {
0371         auto spanMatch = iSpan.next();
0372         genres += spanMatch.captured(1).remove(tagRx).simplified();
0373       }
0374       if(!genres.isEmpty()) {
0375         entry->setField(QStringLiteral("genre"), genres.join(FieldFormat::delimiterString()));
0376       }
0377     } else if(term == data.writer) {
0378       QStringList writers;
0379       const auto& captured = match.captured(2);
0380       // skip ahead to "Story"
0381       const auto start = captured.indexOf(data.story);
0382       auto iSpan = spanRx.globalMatch(captured.mid(qMax(0,start)));
0383       while(iSpan.hasNext()) {
0384         auto spanMatch = iSpan.next();
0385         writers += spanMatch.captured(1).remove(tagRx).simplified();
0386       }
0387       if(!writers.isEmpty()) {
0388         entry->setField(QStringLiteral("writer"), writers.join(FieldFormat::delimiterString()));
0389       }
0390     } else if(term == data.producer) {
0391       // producer seems to be all the studio, use distributor as the main
0392       QStringList studios;
0393       const auto& captured = match.captured(2);
0394       // skip ahead to "Story"
0395       const auto start1 = captured.indexOf(data.distributor);
0396       const auto start2 = captured.indexOf(data.broadcast);
0397       auto iSpan = spanRx.globalMatch(captured.mid(qMax(0,qMax(start1,start2))));
0398       while(iSpan.hasNext()) {
0399         auto spanMatch = iSpan.next();
0400         studios += spanMatch.captured(1).remove(tagRx).simplified();
0401       }
0402       if(!studios.isEmpty()) {
0403         entry->setField(QStringLiteral("studio"), studios.join(FieldFormat::delimiterString()));
0404       }
0405     } else if(term == data.music) {
0406       entry->setField(QStringLiteral("composer"), match.captured(2).remove(tagRx).trimmed());
0407     } else if(term == data.plot) {
0408       entry->setField(QStringLiteral("plot"), match.captured(2).trimmed());
0409     }
0410   }
0411 
0412   QString cover;
0413   QRegularExpression coverRx(QStringLiteral("<img\\s.*?itemprop=\"image\".+?src=\"(.+?)\".*?>"));
0414   match = coverRx.match(str_);
0415   if(match.hasMatch()) {
0416     cover = match.captured(1);
0417   } else {
0418     coverRx.setPattern(QStringLiteral("<meta property=\"og:image\" content=\"(.+?)\""));
0419     match = coverRx.match(str_);
0420     if(match.hasMatch()) {
0421       cover = match.captured(1);
0422     }
0423   }
0424   if(!cover.isEmpty()) {
0425 //    myDebug() << "cover:" << cover;
0426     const QString id = ImageFactory::addImage(QUrl::fromUserInput(cover), true /* quiet */);
0427     if(id.isEmpty()) {
0428       message(i18n("The cover image could not be loaded."), MessageHandler::Warning);
0429     }
0430     // empty image ID is ok
0431     entry->setField(QStringLiteral("cover"), id);
0432   }
0433 
0434   return entry;
0435 }
0436 
0437 Tellico::Fetch::FetchRequest FilmAffinityFetcher::updateRequest(Data::EntryPtr entry_) {
0438   QString t = entry_->field(QStringLiteral("title"));
0439   if(!t.isEmpty()) {
0440     return FetchRequest(Fetch::Title, t);
0441   }
0442   return FetchRequest();
0443 }
0444 
0445 Tellico::Fetch::ConfigWidget* FilmAffinityFetcher::configWidget(QWidget* parent_) const {
0446   return new FilmAffinityFetcher::ConfigWidget(parent_);
0447 }
0448 
0449 QString FilmAffinityFetcher::defaultName() {
0450   return QStringLiteral("FilmAffinity");
0451 }
0452 
0453 QString FilmAffinityFetcher::defaultIcon() {
0454   return favIcon("https://www.filmaffinity.com");
0455 }
0456 
0457 Tellico::StringHash FilmAffinityFetcher::allOptionalFields() {
0458   StringHash hash;
0459   hash[QStringLiteral("origtitle")] = i18n("Original Title");
0460   hash[QStringLiteral("filmaffinity")] = i18n("FilmAffinity Link");
0461   return hash;
0462 }
0463 
0464 FilmAffinityFetcher::ConfigWidget::ConfigWidget(QWidget* parent_, const FilmAffinityFetcher* fetcher_)
0465     : Fetch::ConfigWidget(parent_) {
0466   QGridLayout* l = new QGridLayout(optionsWidget());
0467   l->setSpacing(4);
0468   l->setColumnStretch(1, 10);
0469 
0470   int row = -1;
0471 
0472   QLabel* label = new QLabel(i18n("&Maximum cast: "), optionsWidget());
0473   l->addWidget(label, ++row, 0);
0474   m_numCast = new QSpinBox(optionsWidget());
0475   m_numCast->setMaximum(99);
0476   m_numCast->setMinimum(0);
0477   m_numCast->setValue(FILMAFFINITY_DEFAULT_CAST_SIZE);
0478 #if (QT_VERSION < QT_VERSION_CHECK(5, 14, 0))
0479   void (QSpinBox::* textChanged)(const QString&) = &QSpinBox::valueChanged;
0480 #else
0481   void (QSpinBox::* textChanged)(const QString&) = &QSpinBox::textChanged;
0482 #endif
0483   connect(m_numCast, textChanged, this, &ConfigWidget::slotSetModified);
0484   l->addWidget(m_numCast, row, 1);
0485   QString w = i18n("The list of cast members may include many people. Set the maximum number returned from the search.");
0486   label->setWhatsThis(w);
0487   m_numCast->setWhatsThis(w);
0488   label->setBuddy(m_numCast);
0489 
0490   label = new QLabel(i18n("Language: "), optionsWidget());
0491   l->addWidget(label, ++row, 0);
0492   m_localeCombo = new GUI::ComboBox(optionsWidget());
0493   QIcon iconES(QStandardPaths::locate(QStandardPaths::GenericDataLocation,
0494                                       QStringLiteral("kf5/locale/countries/es/flag.png")));
0495   m_localeCombo->addItem(iconES, i18nc("Country", "Spain"), int(FilmAffinityFetcher::ES));
0496   QIcon iconUS(QStandardPaths::locate(QStandardPaths::GenericDataLocation,
0497                                       QStringLiteral("kf5/locale/countries/us/flag.png")));
0498   m_localeCombo->addItem(iconUS, i18nc("Country", "USA"), int(FilmAffinityFetcher::US));
0499   void (GUI::ComboBox::* activatedInt)(int) = &GUI::ComboBox::activated;
0500   connect(m_localeCombo, activatedInt, this, &ConfigWidget::slotSetModified);
0501   l->addWidget(m_localeCombo, row, 1);
0502   label->setBuddy(m_localeCombo);
0503 
0504   l->setRowStretch(++row, 10);
0505 
0506   addFieldsWidget(FilmAffinityFetcher::allOptionalFields(), fetcher_ ? fetcher_->optionalFields() : QStringList());
0507 
0508   if(fetcher_) {
0509     m_localeCombo->setCurrentData(fetcher_->m_locale);
0510     m_numCast->setValue(fetcher_->m_numCast);
0511   }
0512 }
0513 
0514 void FilmAffinityFetcher::ConfigWidget::saveConfigHook(KConfigGroup& config_) {
0515   config_.writeEntry("Locale", m_localeCombo->currentData().toInt());
0516   config_.writeEntry("Max Cast", m_numCast->value());
0517 }
0518 
0519 QString FilmAffinityFetcher::ConfigWidget::preferredName() const {
0520   return FilmAffinityFetcher::defaultName();
0521 }