File indexing completed on 2024-05-12 05:09:37

0001 /***************************************************************************
0002     Copyright (C) 2017 Robby Stephenson <robby@periapsis.org>
0003  ***************************************************************************/
0004 
0005 /***************************************************************************
0006  *                                                                         *
0007  *   This program is free software; you can redistribute it and/or         *
0008  *   modify it under the terms of the GNU General Public License as        *
0009  *   published by the Free Software Foundation; either version 2 of        *
0010  *   the License or (at your option) version 3 or any later version        *
0011  *   accepted by the membership of KDE e.V. (or its successor approved     *
0012  *   by the membership of KDE e.V.), which shall act as a proxy            *
0013  *   defined in Section 14 of version 3 of the license.                    *
0014  *                                                                         *
0015  *   This program is distributed in the hope that it will be useful,       *
0016  *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
0017  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
0018  *   GNU General Public License for more details.                          *
0019  *                                                                         *
0020  *   You should have received a copy of the GNU General Public License     *
0021  *   along with this program.  If not, see <http://www.gnu.org/licenses/>. *
0022  *                                                                         *
0023  ***************************************************************************/
0024 
0025 #include "kinofetcher.h"
0026 #include "../utils/guiproxy.h"
0027 #include "../collections/videocollection.h"
0028 #include "../entry.h"
0029 #include "../fieldformat.h"
0030 #include "../core/filehandler.h"
0031 #include "../images/imagefactory.h"
0032 #include "../utils/string_utils.h"
0033 #include "../utils/mapvalue.h"
0034 #include "../tellico_debug.h"
0035 
0036 #include <KLocalizedString>
0037 #include <KConfig>
0038 #include <KIO/Job>
0039 #include <KIO/JobUiDelegate>
0040 #include <KJobWidgets/KJobWidgets>
0041 
0042 #include <QRegularExpression>
0043 #include <QLabel>
0044 #include <QFile>
0045 #include <QTextStream>
0046 #include <QVBoxLayout>
0047 #include <QUrlQuery>
0048 #include <QJsonDocument>
0049 #include <QJsonObject>
0050 
0051 namespace {
0052   static const char* KINO_BASE_URL = "https://www.kino.de/se/";
0053 }
0054 
0055 using namespace Tellico;
0056 using Tellico::Fetch::KinoFetcher;
0057 
0058 KinoFetcher::KinoFetcher(QObject* parent_)
0059     : Fetcher(parent_), m_started(false) {
0060 }
0061 
0062 KinoFetcher::~KinoFetcher() {
0063 }
0064 
0065 QString KinoFetcher::source() const {
0066   return m_name.isEmpty() ? defaultName() : m_name;
0067 }
0068 
0069 bool KinoFetcher::canFetch(int type) const {
0070   return type == Data::Collection::Video;
0071 }
0072 
0073 void KinoFetcher::readConfigHook(const KConfigGroup& config_) {
0074   Q_UNUSED(config_);
0075 }
0076 
0077 void KinoFetcher::search() {
0078   m_started = true;
0079   m_matches.clear();
0080 
0081   QUrl u(QString::fromLatin1(KINO_BASE_URL));
0082   QUrlQuery q;
0083   q.addQueryItem(QStringLiteral("types"), QStringLiteral("movie"));
0084 
0085   switch(request().key()) {
0086     case Title:
0087       q.addQueryItem(QStringLiteral("searchterm"), request().value());
0088       break;
0089 
0090     default:
0091       myWarning() << source() << "- key not recognized:" << request().key();
0092       stop();
0093       return;
0094   }
0095   u.setQuery(q);
0096 //  myDebug() << "url:" << u;
0097 
0098   m_job = KIO::storedGet(u, KIO::NoReload, KIO::HideProgressInfo);
0099   KJobWidgets::setWindow(m_job, GUI::Proxy::widget());
0100   connect(m_job.data(), &KJob::result,
0101           this, &KinoFetcher::slotComplete);
0102 }
0103 
0104 void KinoFetcher::stop() {
0105   if(!m_started) {
0106     return;
0107   }
0108 
0109   if(m_job) {
0110     m_job->kill();
0111     m_job = nullptr;
0112   }
0113   m_started = false;
0114   emit signalDone(this);
0115 }
0116 
0117 void KinoFetcher::slotComplete(KJob*) {
0118   if(m_job->error()) {
0119     m_job->uiDelegate()->showErrorMessage();
0120     stop();
0121     return;
0122   }
0123 
0124   const QByteArray data = m_job->data();
0125   if(data.isEmpty()) {
0126     myDebug() << "no data";
0127     stop();
0128     return;
0129   }
0130 
0131   // since the fetch is done, don't worry about holding the job pointer
0132   m_job = nullptr;
0133 
0134   const QString pageText = Tellico::decodeHTML(data);
0135 #if 0
0136   myWarning() << "Remove debug from kinofetcher.cpp";
0137   QFile f(QStringLiteral("/tmp/test.html"));
0138   if(f.open(QIODevice::WriteOnly)) {
0139     QTextStream t(&f);
0140     t.setCodec("UTF-8");
0141     t << pageText;
0142   }
0143   f.close();
0144 #endif
0145 
0146   QRegularExpression linkRx(QStringLiteral("<div class=\"alice-teaser-title\">.*?<a .+?teaser-link.+?href=\"(.+?)\".*?>(.+?)</"),
0147                             QRegularExpression::DotMatchesEverythingOption);
0148   QRegularExpression dateSpanRx(QStringLiteral("<span .+?movie-startdate.+?>(.+?)</span"));
0149   QRegularExpression dateRx(QStringLiteral("\\d{2}\\.\\d{2}\\.(\\d{4})"));
0150   QRegularExpression yearEndRx(QStringLiteral("(\\d{4})/?$"));
0151 
0152   auto i = linkRx.globalMatch(pageText);
0153   while(i.hasNext()) {
0154     auto match = i.next();
0155     QString u = match.captured(1);
0156     if(u.isEmpty() || u.contains(QLatin1String("news")) || !u.contains(QLatin1String("film"))) {
0157       continue;
0158     }
0159     if(u.startsWith(QLatin1String("//"))) {
0160       u.prepend(QLatin1String("https:"));
0161     }
0162     Data::CollPtr coll(new Data::VideoCollection(true));
0163     Data::EntryPtr entry(new Data::Entry(coll));
0164     coll->addEntries(entry);
0165 
0166     entry->setField(QStringLiteral("title"), match.captured(2));
0167 
0168     QString y;
0169     auto dateMatch = dateSpanRx.match(pageText, match.capturedEnd());
0170     if(dateMatch.hasMatch()) {
0171       y = dateRx.match(dateMatch.captured(1)).captured(1);
0172     } else {
0173       // see if year is embedded in url
0174       y = yearEndRx.match(u).captured(1);
0175     }
0176     entry->setField(QStringLiteral("year"), y);
0177 
0178     FetchResult* r = new FetchResult(this, entry);
0179     QUrl url = QUrl(QString::fromLatin1(KINO_BASE_URL)).resolved(QUrl(u));
0180     m_matches.insert(r->uid, url);
0181     m_entries.insert(r->uid, entry);
0182     // don't emit signal until after putting url in matches hash
0183     emit signalResultFound(r);
0184   }
0185 
0186   stop();
0187 }
0188 
0189 Tellico::Data::EntryPtr KinoFetcher::fetchEntryHook(uint uid_) {
0190   if(!m_entries.contains(uid_)) {
0191     myWarning() << "no entry in hash";
0192     return Data::EntryPtr();
0193   }
0194 
0195   Data::EntryPtr entry = m_entries[uid_];
0196   // if the url is not in the hash, the entry has already been fully populated
0197   if(!m_matches.contains(uid_)) {
0198     return entry;
0199   }
0200 
0201   QString results = Tellico::decodeHTML(FileHandler::readTextFile(m_matches[uid_], true, true));
0202   if(results.isEmpty()) {
0203     myDebug() << "No text results from" << m_matches[uid_];
0204     return entry;
0205   }
0206 
0207 #if 0
0208   myWarning() << "Remove debug2 from kinofetcher.cpp";
0209   QFile f(QStringLiteral("/tmp/test2.html"));
0210   if(f.open(QIODevice::WriteOnly)) {
0211     QTextStream t(&f);
0212     t.setCodec("UTF-8");
0213     t << results;
0214   }
0215   f.close();
0216 #endif
0217 
0218   parseEntry(entry, results);
0219   // remove url to signal the entry is fully populated
0220   m_matches.remove(uid_);
0221   return entry;
0222 }
0223 
0224 void KinoFetcher::parseEntry(Data::EntryPtr entry, const QString& str_) {
0225   static const QRegularExpression jsonRx(QStringLiteral("<script type=\"application/ld\\+json\">(.*?)</script"),
0226                                          QRegularExpression::DotMatchesEverythingOption);
0227   auto i = jsonRx.globalMatch(str_);
0228   while(i.hasNext()) {
0229     QJsonDocument doc = QJsonDocument::fromJson(i.next().captured(1).toUtf8());
0230     QVariantMap objectMap = doc.object().toVariantMap();
0231     if(mapValue(objectMap, "@type") != QStringLiteral("Movie")) {
0232       continue;
0233     }
0234     entry->setField(QStringLiteral("director"), mapValue(objectMap, "director", "name"));
0235 
0236     QStringList actors;
0237     foreach(QVariant v, objectMap.value(QLatin1String("actor")).toList()) {
0238       const QString actor = mapValue(v.toMap(), "name");
0239       if(!actor.isEmpty()) actors += actor;
0240     }
0241     if(!actors.isEmpty()) {
0242       entry->setField(QStringLiteral("cast"), actors.join(FieldFormat::rowDelimiterString()));
0243     }
0244     // cover could be a relative link
0245     QString coverLink = mapValue(objectMap, "image");
0246     if(coverLink.startsWith(QLatin1String("//"))) {
0247       coverLink.prepend(QLatin1String("https:"));
0248     }
0249     entry->setField(QStringLiteral("cover"), coverLink);
0250 
0251     QString genreString = mapValue(objectMap, "genre");
0252     if(!genreString.isEmpty()) {
0253       static const QRegularExpression commaRx(QLatin1String(",\\s+"));
0254       QStringList genres = genreString.split(commaRx);
0255       entry->setField(QStringLiteral("genre"), genres.join(FieldFormat::delimiterString()));
0256     }
0257   }
0258 
0259   static const QRegularExpression tagRx(QStringLiteral("<.+?>"));
0260   QRegularExpression nationalityRx(QStringLiteral(">Produktionsland:(.*?)</a>"));
0261   auto nationalityMatch = nationalityRx.match(str_);
0262   if(nationalityMatch.hasMatch()) {
0263     const QString n = nationalityMatch.captured(1).remove(tagRx).trimmed();
0264     entry->setField(QStringLiteral("nationality"), n);
0265   }
0266 
0267   QRegularExpression lengthRx(QStringLiteral(">Dauer:(.*?)</li"),
0268                               QRegularExpression::DotMatchesEverythingOption);
0269   auto lengthMatch = lengthRx.match(str_);
0270   if(lengthMatch.hasMatch()) {
0271     const QString l = lengthMatch.captured(1).remove(tagRx).remove(QStringLiteral(" Min")).trimmed();
0272     entry->setField(QStringLiteral("running-time"), l);
0273   }
0274 
0275   QRegularExpression genreRx(QStringLiteral("<dt.*?>Genre</dt><dd.*?>(.*?)</dd>"));
0276   auto genreMatch = genreRx.match(str_);
0277   if(genreMatch.hasMatch()) {
0278     QRegularExpression anchorRx(QStringLiteral("<a.*?>(.*?)</a>"));
0279     auto i = anchorRx.globalMatch(genreMatch.captured(1));
0280     QStringList genres;
0281     while(i.hasNext()) {
0282       genres += i.next().captured(1).trimmed();
0283     }
0284     entry->setField(QStringLiteral("genre"), genres.join(FieldFormat::delimiterString()));
0285   }
0286 
0287   QRegularExpression certRx(QStringLiteral(">FSK:(.*?)</a"),
0288                             QRegularExpression::DotMatchesEverythingOption);
0289   auto certMatch = certRx.match(str_);
0290   if(certMatch.hasMatch()) {
0291     // need to translate? Let's just add FSK ratings to the allowed values
0292     QStringList allowed = entry->collection()->hasField(QStringLiteral("certification")) ?
0293                           entry->collection()->fieldByName(QStringLiteral("certification"))->allowed() :
0294                           QStringList();
0295     if(!allowed.contains(QStringLiteral("FSK 0 (DE)"))) {
0296       allowed << QStringLiteral("FSK 0 (DE)")
0297               << QStringLiteral("FSK 6 (DE)")
0298               << QStringLiteral("FSK 12 (DE)")
0299               << QStringLiteral("FSK 16 (DE)")
0300               << QStringLiteral("FSK 18 (DE)");
0301       entry->collection()->fieldByName(QStringLiteral("certification"))->setAllowed(allowed);
0302     }
0303     QString c = certMatch.captured(1).remove(tagRx).trimmed();
0304     if(c == QStringLiteral("ab 0")) {
0305       c = QStringLiteral("FSK 0 (DE)");
0306     } else if(c == QLatin1String("ab 6")) {
0307       c = QStringLiteral("FSK 6 (DE)");
0308     } else if(c == QLatin1String("ab 12")) {
0309       c = QStringLiteral("FSK 12 (DE)");
0310     } else if(c == QLatin1String("ab 16")) {
0311       c = QStringLiteral("FSK 16 (DE)");
0312     } else if(c == QLatin1String("ab 18")) {
0313       c = QStringLiteral("FSK 18 (DE)");
0314     }
0315     entry->setField(QStringLiteral("certification"), c);
0316   }
0317 
0318   QRegularExpression studioRx(QStringLiteral(">Filmverleih:(.*?)</li"));
0319   auto studioMatch = studioRx.match(str_);
0320   if(studioMatch.hasMatch()) {
0321     QString s = studioMatch.captured(1).remove(tagRx).trimmed();
0322     entry->setField(QStringLiteral("studio"), s);
0323   }
0324 
0325   QRegularExpression plotRx(QStringLiteral("(<p class=\"movie-plot-synopsis\">.+?</p>)<(div|h2)"),
0326                             QRegularExpression::DotMatchesEverythingOption);
0327   auto plotMatch = plotRx.match(str_);
0328   if(!plotMatch.hasMatch()) {
0329     QRegularExpression plot2Rx(QStringLiteral("(</h2><p>.+?</p>)<(div|h2)"),
0330                                QRegularExpression::DotMatchesEverythingOption);
0331     plotMatch = plot2Rx.match(str_);
0332   }
0333   if(plotMatch.hasMatch()) {
0334     QString plot;
0335     // sometimes the plot starts with double <p>
0336     QRegularExpression pRx(QStringLiteral("<p.*?>(?!<p.*?>).*?</p>"));
0337     auto i = pRx.globalMatch(plotMatch.captured(1));
0338     while(i.hasNext()) {
0339       plot += i.next().captured(0);
0340     }
0341     plot = plot.remove(tagRx).trimmed();
0342     entry->setField(QStringLiteral("plot"), plot);
0343   }
0344 
0345   QString cover = entry->field(QStringLiteral("cover"));
0346   if(!cover.isEmpty()) {
0347     const QString id = ImageFactory::addImage(QUrl::fromUserInput(cover), true /* quiet */);
0348     if(id.isEmpty()) {
0349       message(i18n("The cover image could not be loaded."), MessageHandler::Warning);
0350     }
0351     // empty image ID is ok
0352     entry->setField(QStringLiteral("cover"), id);
0353   }
0354 }
0355 
0356 Tellico::Fetch::FetchRequest KinoFetcher::updateRequest(Data::EntryPtr entry_) {
0357   QString t = entry_->field(QStringLiteral("title"));
0358   if(!t.isEmpty()) {
0359     return FetchRequest(Fetch::Title, t);
0360   }
0361   return FetchRequest();
0362 }
0363 
0364 Tellico::Fetch::ConfigWidget* KinoFetcher::configWidget(QWidget* parent_) const {
0365   return new KinoFetcher::ConfigWidget(parent_, this);
0366 }
0367 
0368 QString KinoFetcher::defaultName() {
0369   return QStringLiteral("Kino.de");
0370 }
0371 
0372 QString KinoFetcher::defaultIcon() {
0373   return favIcon("https://www.kino.de");
0374 }
0375 
0376 //static
0377 Tellico::StringHash KinoFetcher::allOptionalFields() {
0378   StringHash hash;
0379   // TODO: add link
0380 //  hash[QStringLiteral("kino")] = i18n("Kino.de Link");
0381   return hash;
0382 }
0383 
0384 KinoFetcher::ConfigWidget::ConfigWidget(QWidget* parent_, const KinoFetcher* fetcher_)
0385     : Fetch::ConfigWidget(parent_) {
0386   QVBoxLayout* l = new QVBoxLayout(optionsWidget());
0387   l->addWidget(new QLabel(i18n("This source has no options."), optionsWidget()));
0388   l->addStretch();
0389 
0390   // now add additional fields widget
0391   addFieldsWidget(KinoFetcher::allOptionalFields(), fetcher_ ? fetcher_->optionalFields() : QStringList());
0392 }
0393 
0394 QString KinoFetcher::ConfigWidget::preferredName() const {
0395   return KinoFetcher::defaultName();
0396 }