File indexing completed on 2024-05-12 16:45:54

0001 /***************************************************************************
0002     Copyright (C) 2017 Robby Stephenson <robby@periapsis.org>
0003  ***************************************************************************/
0004 
0005 /***************************************************************************
0006  *                                                                         *
0007  *   This program is free software; you can redistribute it and/or         *
0008  *   modify it under the terms of the GNU General Public License as        *
0009  *   published by the Free Software Foundation; either version 2 of        *
0010  *   the License or (at your option) version 3 or any later version        *
0011  *   accepted by the membership of KDE e.V. (or its successor approved     *
0012  *   by the membership of KDE e.V.), which shall act as a proxy            *
0013  *   defined in Section 14 of version 3 of the license.                    *
0014  *                                                                         *
0015  *   This program is distributed in the hope that it will be useful,       *
0016  *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
0017  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
0018  *   GNU General Public License for more details.                          *
0019  *                                                                         *
0020  *   You should have received a copy of the GNU General Public License     *
0021  *   along with this program.  If not, see <http://www.gnu.org/licenses/>. *
0022  *                                                                         *
0023  ***************************************************************************/
0024 
0025 #include "kinofetcher.h"
0026 #include "../utils/guiproxy.h"
0027 #include "../utils/string_utils.h"
0028 #include "../collections/bookcollection.h"
0029 #include "../collections/videocollection.h"
0030 #include "../entry.h"
0031 #include "../fieldformat.h"
0032 #include "../core/filehandler.h"
0033 #include "../images/imagefactory.h"
0034 #include "../tellico_debug.h"
0035 
0036 #include <KLocalizedString>
0037 #include <KConfig>
0038 #include <KIO/Job>
0039 #include <KIO/JobUiDelegate>
0040 #include <KJobWidgets/KJobWidgets>
0041 
0042 #include <QRegularExpression>
0043 #include <QLabel>
0044 #include <QFile>
0045 #include <QTextStream>
0046 #include <QVBoxLayout>
0047 #include <QUrlQuery>
0048 #include <QJsonDocument>
0049 #include <QJsonObject>
0050 
0051 namespace {
0052   static const char* KINO_BASE_URL = "https://www.kino.de/se/";
0053 }
0054 
0055 using namespace Tellico;
0056 using Tellico::Fetch::KinoFetcher;
0057 
0058 KinoFetcher::KinoFetcher(QObject* parent_)
0059     : Fetcher(parent_), m_started(false) {
0060 }
0061 
0062 KinoFetcher::~KinoFetcher() {
0063 }
0064 
0065 QString KinoFetcher::source() const {
0066   return m_name.isEmpty() ? defaultName() : m_name;
0067 }
0068 
0069 bool KinoFetcher::canFetch(int type) const {
0070   return type == Data::Collection::Video;
0071 }
0072 
0073 void KinoFetcher::readConfigHook(const KConfigGroup& config_) {
0074   Q_UNUSED(config_);
0075 }
0076 
0077 void KinoFetcher::search() {
0078   m_started = true;
0079   m_matches.clear();
0080 
0081   QUrl u(QString::fromLatin1(KINO_BASE_URL));
0082   QUrlQuery q;
0083   q.addQueryItem(QStringLiteral("sp_search_filter"), QStringLiteral("movie"));
0084 
0085   switch(request().key()) {
0086     case Title:
0087       q.addQueryItem(QStringLiteral("searchterm"), request().value());
0088       break;
0089 
0090     default:
0091       myWarning() << "key not recognized: " << request().key();
0092       stop();
0093       return;
0094   }
0095   u.setQuery(q);
0096 //  myDebug() << "url:" << u;
0097 
0098   m_job = KIO::storedGet(u, KIO::NoReload, KIO::HideProgressInfo);
0099   KJobWidgets::setWindow(m_job, GUI::Proxy::widget());
0100   connect(m_job.data(), &KJob::result,
0101           this, &KinoFetcher::slotComplete);
0102 }
0103 
0104 void KinoFetcher::stop() {
0105   if(!m_started) {
0106     return;
0107   }
0108 
0109   if(m_job) {
0110     m_job->kill();
0111     m_job = nullptr;
0112   }
0113   m_started = false;
0114   emit signalDone(this);
0115 }
0116 
0117 void KinoFetcher::slotComplete(KJob*) {
0118   if(m_job->error()) {
0119     m_job->uiDelegate()->showErrorMessage();
0120     stop();
0121     return;
0122   }
0123 
0124   const QByteArray data = m_job->data();
0125   if(data.isEmpty()) {
0126     myDebug() << "no data";
0127     stop();
0128     return;
0129   }
0130 
0131   // since the fetch is done, don't worry about holding the job pointer
0132   m_job = nullptr;
0133 
0134   const QString s = Tellico::decodeHTML(data);
0135 #if 0
0136   myWarning() << "Remove debug from kinofetcher.cpp";
0137   QFile f(QStringLiteral("/tmp/test.html"));
0138   if(f.open(QIODevice::WriteOnly)) {
0139     QTextStream t(&f);
0140     t.setCodec("UTF-8");
0141     t << s;
0142   }
0143   f.close();
0144 #endif
0145 
0146   QRegularExpression linkRx(QStringLiteral("<span class=\"alice-teaser-label\\s*?\">.+?Film.+?<a .+?teaser-link.+?href=\"(.+?)\".*?>(.+?)</"));
0147   QRegularExpression dateSpanRx(QStringLiteral("<span .+?movie-startdate.+?>(.+?)</span"));
0148   QRegularExpression dateRx(QStringLiteral("\\d{2}\\.\\d{2}\\.(\\d{4})"));
0149   QRegularExpression yearEndRx(QStringLiteral("(\\d{4})/?$"));
0150 
0151   QRegularExpressionMatchIterator i = linkRx.globalMatch(s);
0152   while(i.hasNext()) {
0153     QRegularExpressionMatch match = i.next();
0154     QString u = match.captured(1);
0155     if(u.isEmpty() || u.contains(QLatin1String("news")) || !u.contains(QLatin1String("film"))) {
0156       continue;
0157     }
0158     if(u.startsWith(QLatin1String("//"))) {
0159       u.prepend(QLatin1String("https:"));
0160     }
0161     Data::CollPtr coll(new Data::VideoCollection(true));
0162     Data::EntryPtr entry(new Data::Entry(coll));
0163     coll->addEntries(entry);
0164 
0165     entry->setField(QStringLiteral("title"), match.captured(2));
0166 
0167     QString y;
0168     QRegularExpressionMatch dateMatch = dateSpanRx.match(s, match.capturedEnd());
0169     if(dateMatch.hasMatch()) {
0170       y = dateRx.match(dateMatch.captured(1)).captured(1);
0171     } else {
0172       // see if year is embedded in url
0173       y = yearEndRx.match(u).captured(1);
0174     }
0175     entry->setField(QStringLiteral("year"), y);
0176 
0177     FetchResult* r = new FetchResult(this, entry);
0178     QUrl url = QUrl(QString::fromLatin1(KINO_BASE_URL)).resolved(QUrl(u));
0179     m_matches.insert(r->uid, url);
0180     m_entries.insert(r->uid, entry);
0181     // don't emit signal until after putting url in matches hash
0182     emit signalResultFound(r);
0183   }
0184 
0185   stop();
0186 }
0187 
0188 Tellico::Data::EntryPtr KinoFetcher::fetchEntryHook(uint uid_) {
0189   if(!m_entries.contains(uid_)) {
0190     myWarning() << "no entry in hash";
0191     return Data::EntryPtr();
0192   }
0193 
0194   Data::EntryPtr entry = m_entries[uid_];
0195   // if the url is not in the hash, the entry has already been fully populated
0196   if(!m_matches.contains(uid_)) {
0197     return entry;
0198   }
0199 
0200   QString results = Tellico::decodeHTML(FileHandler::readTextFile(m_matches[uid_], true, true));
0201   if(results.isEmpty()) {
0202     myDebug() << "no text results from" << m_matches[uid_];
0203     return entry;
0204   }
0205 
0206 #if 0
0207   myWarning() << "Remove debug2 from kinofetcher.cpp";
0208   QFile f(QStringLiteral("/tmp/test2.html"));
0209   if(f.open(QIODevice::WriteOnly)) {
0210     QTextStream t(&f);
0211     t.setCodec("UTF-8");
0212     t << results;
0213   }
0214   f.close();
0215 #endif
0216 
0217   parseEntry(entry, results);
0218   // remove url to signal the entry is fully populated
0219   m_matches.remove(uid_);
0220   return entry;
0221 }
0222 
0223 void KinoFetcher::parseEntry(Data::EntryPtr entry, const QString& str_) {
0224   QRegularExpression jsonRx(QStringLiteral("<script type=\"application/ld\\+json\">(.*?)</script"),
0225                             QRegularExpression::DotMatchesEverythingOption);
0226   QRegularExpressionMatchIterator i = jsonRx.globalMatch(str_);
0227   while(i.hasNext()) {
0228     QJsonDocument doc = QJsonDocument::fromJson(i.next().captured(1).toUtf8());
0229     QVariantMap objectMap = doc.object().toVariantMap();
0230     if(mapValue(objectMap, "@type") != QStringLiteral("Movie")) {
0231       continue;
0232     }
0233     entry->setField(QStringLiteral("director"), mapValue(objectMap, "director", "name"));
0234 
0235     QStringList actors;
0236     foreach(QVariant v, objectMap.value(QLatin1String("actor")).toList()) {
0237       const QString actor = mapValue(v.toMap(), "name");
0238       if(!actor.isEmpty()) actors += actor;
0239     }
0240     if(!actors.isEmpty()) {
0241       entry->setField(QStringLiteral("cast"), actors.join(FieldFormat::rowDelimiterString()));
0242     }
0243     // cover could be a relative link
0244     QString coverLink = mapValue(objectMap, "image");
0245     if(coverLink.startsWith(QLatin1String("//"))) {
0246       coverLink.prepend(QLatin1String("https:"));
0247     }
0248     entry->setField(QStringLiteral("cover"), coverLink);
0249 
0250     QString genreString = mapValue(objectMap, "genre");
0251     if(!genreString.isEmpty()) {
0252       QStringList genres = genreString.split(QRegularExpression(QLatin1String(",\\s+")));
0253       entry->setField(QStringLiteral("genre"), genres.join(FieldFormat::delimiterString()));
0254     }
0255   }
0256 
0257   QRegularExpression tagRx(QStringLiteral("<.+?>"));
0258 
0259   QRegularExpression nationalityRx(QStringLiteral(">Produktionsland:(.*?)</a>"));
0260   QRegularExpressionMatch nationalityMatch = nationalityRx.match(str_);
0261   if(nationalityMatch.hasMatch()) {
0262     const QString n = nationalityMatch.captured(1).remove(tagRx).trimmed();
0263     entry->setField(QStringLiteral("nationality"), n);
0264   }
0265 
0266   QRegularExpression lengthRx(QStringLiteral(">Dauer:(.*?)</li"));
0267   QRegularExpressionMatch lengthMatch = lengthRx.match(str_);
0268   if(lengthMatch.hasMatch()) {
0269     const QString l = lengthMatch.captured(1).remove(tagRx).remove(QStringLiteral(" Min")).trimmed();
0270     entry->setField(QStringLiteral("running-time"), l);
0271   }
0272 
0273   QRegularExpression genreRx(QStringLiteral("<dt.*?>Genre</dt><dd.*?>(.*?)</dd>"));
0274   QRegularExpressionMatch genreMatch = genreRx.match(str_);
0275   if(genreMatch.hasMatch()) {
0276     QRegularExpression anchorRx(QStringLiteral("<a.*?>(.*?)</a>"));
0277     QRegularExpressionMatchIterator i = anchorRx.globalMatch(genreMatch.captured(1));
0278     QStringList genres;
0279     while(i.hasNext()) {
0280       genres += i.next().captured(1).trimmed();
0281     }
0282     entry->setField(QStringLiteral("genre"), genres.join(FieldFormat::delimiterString()));
0283   }
0284 
0285   QRegularExpression certRx(QStringLiteral(">FSK:(.*?)</a"));
0286   QRegularExpressionMatch certMatch = certRx.match(str_);
0287   if(certMatch.hasMatch()) {
0288     // need to translate? Let's just add FSK ratings to the allowed values
0289     QStringList allowed = entry->collection()->hasField(QStringLiteral("certification")) ?
0290                           entry->collection()->fieldByName(QStringLiteral("certification"))->allowed() :
0291                           QStringList();
0292     if(!allowed.contains(QStringLiteral("FSK 0 (DE)"))) {
0293       allowed << QStringLiteral("FSK 0 (DE)")
0294               << QStringLiteral("FSK 6 (DE)")
0295               << QStringLiteral("FSK 12 (DE)")
0296               << QStringLiteral("FSK 16 (DE)")
0297               << QStringLiteral("FSK 18 (DE)");
0298       entry->collection()->fieldByName(QStringLiteral("certification"))->setAllowed(allowed);
0299     }
0300     QString c = certMatch.captured(1).remove(tagRx).trimmed();
0301     if(c == QStringLiteral("ab 0")) {
0302       c = QStringLiteral("FSK 0 (DE)");
0303     } else if(c == QLatin1String("ab 6")) {
0304       c = QStringLiteral("FSK 6 (DE)");
0305     } else if(c == QLatin1String("ab 12")) {
0306       c = QStringLiteral("FSK 12 (DE)");
0307     } else if(c == QLatin1String("ab 16")) {
0308       c = QStringLiteral("FSK 16 (DE)");
0309     } else if(c == QLatin1String("ab 18")) {
0310       c = QStringLiteral("FSK 18 (DE)");
0311     }
0312     entry->setField(QStringLiteral("certification"), c);
0313   }
0314 
0315   QRegularExpression studioRx(QStringLiteral(">Filmverleih:(.*?)</li"));
0316   QRegularExpressionMatch studioMatch = studioRx.match(str_);
0317   if(studioMatch.hasMatch()) {
0318     QString s = studioMatch.captured(1).remove(tagRx).trimmed();
0319     entry->setField(QStringLiteral("studio"), s);
0320   }
0321 
0322   QRegularExpression plotRx(QStringLiteral("(<p class=\"movie-plot-synopsis\">.+?</p>)<(div|h2)"),
0323                                           QRegularExpression::DotMatchesEverythingOption);
0324   QRegularExpressionMatch plotMatch = plotRx.match(str_);
0325   if(plotMatch.hasMatch()) {
0326     QString plot;
0327     // sometimes the plot starts with double <p>
0328     QRegularExpression pRx(QStringLiteral("<p.*?>(?!<p.*?>).*?</p>"));
0329     QRegularExpressionMatchIterator i = pRx.globalMatch(plotMatch.captured(1));
0330     while(i.hasNext()) {
0331       plot += i.next().captured(0);
0332     }
0333     plot = plot.remove(tagRx).trimmed();
0334     entry->setField(QStringLiteral("plot"), plot);
0335   }
0336 
0337   QString cover = entry->field(QStringLiteral("cover"));
0338   if(!cover.isEmpty()) {
0339     const QString id = ImageFactory::addImage(QUrl::fromUserInput(cover), true /* quiet */);
0340     if(id.isEmpty()) {
0341       message(i18n("The cover image could not be loaded."), MessageHandler::Warning);
0342     }
0343     // empty image ID is ok
0344     entry->setField(QStringLiteral("cover"), id);
0345   }
0346 }
0347 
0348 Tellico::Fetch::FetchRequest KinoFetcher::updateRequest(Data::EntryPtr entry_) {
0349   QString t = entry_->field(QStringLiteral("title"));
0350   if(!t.isEmpty()) {
0351     return FetchRequest(Fetch::Title, t);
0352   }
0353   return FetchRequest();
0354 }
0355 
0356 Tellico::Fetch::ConfigWidget* KinoFetcher::configWidget(QWidget* parent_) const {
0357   return new KinoFetcher::ConfigWidget(parent_, this);
0358 }
0359 
0360 QString KinoFetcher::defaultName() {
0361   return QStringLiteral("Kino.de");
0362 }
0363 
0364 QString KinoFetcher::defaultIcon() {
0365   return favIcon("https://www.kino.de");
0366 }
0367 
0368 //static
0369 Tellico::StringHash KinoFetcher::allOptionalFields() {
0370   StringHash hash;
0371   // TODO: add link
0372 //  hash[QStringLiteral("kino")] = i18n("Kino.de Link");
0373   return hash;
0374 }
0375 
0376 KinoFetcher::ConfigWidget::ConfigWidget(QWidget* parent_, const KinoFetcher* fetcher_)
0377     : Fetch::ConfigWidget(parent_) {
0378   QVBoxLayout* l = new QVBoxLayout(optionsWidget());
0379   l->addWidget(new QLabel(i18n("This source has no options."), optionsWidget()));
0380   l->addStretch();
0381 
0382   // now add additional fields widget
0383   addFieldsWidget(KinoFetcher::allOptionalFields(), fetcher_ ? fetcher_->optionalFields() : QStringList());
0384 }
0385 
0386 QString KinoFetcher::ConfigWidget::preferredName() const {
0387   return KinoFetcher::defaultName();
0388 }