File indexing completed on 2024-05-12 05:09:37
0001 /*************************************************************************** 0002 Copyright (C) 2017 Robby Stephenson <robby@periapsis.org> 0003 ***************************************************************************/ 0004 0005 /*************************************************************************** 0006 * * 0007 * This program is free software; you can redistribute it and/or * 0008 * modify it under the terms of the GNU General Public License as * 0009 * published by the Free Software Foundation; either version 2 of * 0010 * the License or (at your option) version 3 or any later version * 0011 * accepted by the membership of KDE e.V. (or its successor approved * 0012 * by the membership of KDE e.V.), which shall act as a proxy * 0013 * defined in Section 14 of version 3 of the license. * 0014 * * 0015 * This program is distributed in the hope that it will be useful, * 0016 * but WITHOUT ANY WARRANTY; without even the implied warranty of * 0017 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * 0018 * GNU General Public License for more details. * 0019 * * 0020 * You should have received a copy of the GNU General Public License * 0021 * along with this program. If not, see <http://www.gnu.org/licenses/>. * 0022 * * 0023 ***************************************************************************/ 0024 0025 #include "kinofetcher.h" 0026 #include "../utils/guiproxy.h" 0027 #include "../collections/videocollection.h" 0028 #include "../entry.h" 0029 #include "../fieldformat.h" 0030 #include "../core/filehandler.h" 0031 #include "../images/imagefactory.h" 0032 #include "../utils/string_utils.h" 0033 #include "../utils/mapvalue.h" 0034 #include "../tellico_debug.h" 0035 0036 #include <KLocalizedString> 0037 #include <KConfig> 0038 #include <KIO/Job> 0039 #include <KIO/JobUiDelegate> 0040 #include <KJobWidgets/KJobWidgets> 0041 0042 #include <QRegularExpression> 0043 #include <QLabel> 0044 #include <QFile> 0045 #include <QTextStream> 0046 #include <QVBoxLayout> 0047 #include <QUrlQuery> 0048 #include <QJsonDocument> 0049 #include <QJsonObject> 0050 0051 namespace { 0052 static const char* KINO_BASE_URL = "https://www.kino.de/se/"; 0053 } 0054 0055 using namespace Tellico; 0056 using Tellico::Fetch::KinoFetcher; 0057 0058 KinoFetcher::KinoFetcher(QObject* parent_) 0059 : Fetcher(parent_), m_started(false) { 0060 } 0061 0062 KinoFetcher::~KinoFetcher() { 0063 } 0064 0065 QString KinoFetcher::source() const { 0066 return m_name.isEmpty() ? defaultName() : m_name; 0067 } 0068 0069 bool KinoFetcher::canFetch(int type) const { 0070 return type == Data::Collection::Video; 0071 } 0072 0073 void KinoFetcher::readConfigHook(const KConfigGroup& config_) { 0074 Q_UNUSED(config_); 0075 } 0076 0077 void KinoFetcher::search() { 0078 m_started = true; 0079 m_matches.clear(); 0080 0081 QUrl u(QString::fromLatin1(KINO_BASE_URL)); 0082 QUrlQuery q; 0083 q.addQueryItem(QStringLiteral("types"), QStringLiteral("movie")); 0084 0085 switch(request().key()) { 0086 case Title: 0087 q.addQueryItem(QStringLiteral("searchterm"), request().value()); 0088 break; 0089 0090 default: 0091 myWarning() << source() << "- key not recognized:" << request().key(); 0092 stop(); 0093 return; 0094 } 0095 u.setQuery(q); 0096 // myDebug() << "url:" << u; 0097 0098 m_job = KIO::storedGet(u, KIO::NoReload, KIO::HideProgressInfo); 0099 KJobWidgets::setWindow(m_job, GUI::Proxy::widget()); 0100 connect(m_job.data(), &KJob::result, 0101 this, &KinoFetcher::slotComplete); 0102 } 0103 0104 void KinoFetcher::stop() { 0105 if(!m_started) { 0106 return; 0107 } 0108 0109 if(m_job) { 0110 m_job->kill(); 0111 m_job = nullptr; 0112 } 0113 m_started = false; 0114 emit signalDone(this); 0115 } 0116 0117 void KinoFetcher::slotComplete(KJob*) { 0118 if(m_job->error()) { 0119 m_job->uiDelegate()->showErrorMessage(); 0120 stop(); 0121 return; 0122 } 0123 0124 const QByteArray data = m_job->data(); 0125 if(data.isEmpty()) { 0126 myDebug() << "no data"; 0127 stop(); 0128 return; 0129 } 0130 0131 // since the fetch is done, don't worry about holding the job pointer 0132 m_job = nullptr; 0133 0134 const QString pageText = Tellico::decodeHTML(data); 0135 #if 0 0136 myWarning() << "Remove debug from kinofetcher.cpp"; 0137 QFile f(QStringLiteral("/tmp/test.html")); 0138 if(f.open(QIODevice::WriteOnly)) { 0139 QTextStream t(&f); 0140 t.setCodec("UTF-8"); 0141 t << pageText; 0142 } 0143 f.close(); 0144 #endif 0145 0146 QRegularExpression linkRx(QStringLiteral("<div class=\"alice-teaser-title\">.*?<a .+?teaser-link.+?href=\"(.+?)\".*?>(.+?)</"), 0147 QRegularExpression::DotMatchesEverythingOption); 0148 QRegularExpression dateSpanRx(QStringLiteral("<span .+?movie-startdate.+?>(.+?)</span")); 0149 QRegularExpression dateRx(QStringLiteral("\\d{2}\\.\\d{2}\\.(\\d{4})")); 0150 QRegularExpression yearEndRx(QStringLiteral("(\\d{4})/?$")); 0151 0152 auto i = linkRx.globalMatch(pageText); 0153 while(i.hasNext()) { 0154 auto match = i.next(); 0155 QString u = match.captured(1); 0156 if(u.isEmpty() || u.contains(QLatin1String("news")) || !u.contains(QLatin1String("film"))) { 0157 continue; 0158 } 0159 if(u.startsWith(QLatin1String("//"))) { 0160 u.prepend(QLatin1String("https:")); 0161 } 0162 Data::CollPtr coll(new Data::VideoCollection(true)); 0163 Data::EntryPtr entry(new Data::Entry(coll)); 0164 coll->addEntries(entry); 0165 0166 entry->setField(QStringLiteral("title"), match.captured(2)); 0167 0168 QString y; 0169 auto dateMatch = dateSpanRx.match(pageText, match.capturedEnd()); 0170 if(dateMatch.hasMatch()) { 0171 y = dateRx.match(dateMatch.captured(1)).captured(1); 0172 } else { 0173 // see if year is embedded in url 0174 y = yearEndRx.match(u).captured(1); 0175 } 0176 entry->setField(QStringLiteral("year"), y); 0177 0178 FetchResult* r = new FetchResult(this, entry); 0179 QUrl url = QUrl(QString::fromLatin1(KINO_BASE_URL)).resolved(QUrl(u)); 0180 m_matches.insert(r->uid, url); 0181 m_entries.insert(r->uid, entry); 0182 // don't emit signal until after putting url in matches hash 0183 emit signalResultFound(r); 0184 } 0185 0186 stop(); 0187 } 0188 0189 Tellico::Data::EntryPtr KinoFetcher::fetchEntryHook(uint uid_) { 0190 if(!m_entries.contains(uid_)) { 0191 myWarning() << "no entry in hash"; 0192 return Data::EntryPtr(); 0193 } 0194 0195 Data::EntryPtr entry = m_entries[uid_]; 0196 // if the url is not in the hash, the entry has already been fully populated 0197 if(!m_matches.contains(uid_)) { 0198 return entry; 0199 } 0200 0201 QString results = Tellico::decodeHTML(FileHandler::readTextFile(m_matches[uid_], true, true)); 0202 if(results.isEmpty()) { 0203 myDebug() << "No text results from" << m_matches[uid_]; 0204 return entry; 0205 } 0206 0207 #if 0 0208 myWarning() << "Remove debug2 from kinofetcher.cpp"; 0209 QFile f(QStringLiteral("/tmp/test2.html")); 0210 if(f.open(QIODevice::WriteOnly)) { 0211 QTextStream t(&f); 0212 t.setCodec("UTF-8"); 0213 t << results; 0214 } 0215 f.close(); 0216 #endif 0217 0218 parseEntry(entry, results); 0219 // remove url to signal the entry is fully populated 0220 m_matches.remove(uid_); 0221 return entry; 0222 } 0223 0224 void KinoFetcher::parseEntry(Data::EntryPtr entry, const QString& str_) { 0225 static const QRegularExpression jsonRx(QStringLiteral("<script type=\"application/ld\\+json\">(.*?)</script"), 0226 QRegularExpression::DotMatchesEverythingOption); 0227 auto i = jsonRx.globalMatch(str_); 0228 while(i.hasNext()) { 0229 QJsonDocument doc = QJsonDocument::fromJson(i.next().captured(1).toUtf8()); 0230 QVariantMap objectMap = doc.object().toVariantMap(); 0231 if(mapValue(objectMap, "@type") != QStringLiteral("Movie")) { 0232 continue; 0233 } 0234 entry->setField(QStringLiteral("director"), mapValue(objectMap, "director", "name")); 0235 0236 QStringList actors; 0237 foreach(QVariant v, objectMap.value(QLatin1String("actor")).toList()) { 0238 const QString actor = mapValue(v.toMap(), "name"); 0239 if(!actor.isEmpty()) actors += actor; 0240 } 0241 if(!actors.isEmpty()) { 0242 entry->setField(QStringLiteral("cast"), actors.join(FieldFormat::rowDelimiterString())); 0243 } 0244 // cover could be a relative link 0245 QString coverLink = mapValue(objectMap, "image"); 0246 if(coverLink.startsWith(QLatin1String("//"))) { 0247 coverLink.prepend(QLatin1String("https:")); 0248 } 0249 entry->setField(QStringLiteral("cover"), coverLink); 0250 0251 QString genreString = mapValue(objectMap, "genre"); 0252 if(!genreString.isEmpty()) { 0253 static const QRegularExpression commaRx(QLatin1String(",\\s+")); 0254 QStringList genres = genreString.split(commaRx); 0255 entry->setField(QStringLiteral("genre"), genres.join(FieldFormat::delimiterString())); 0256 } 0257 } 0258 0259 static const QRegularExpression tagRx(QStringLiteral("<.+?>")); 0260 QRegularExpression nationalityRx(QStringLiteral(">Produktionsland:(.*?)</a>")); 0261 auto nationalityMatch = nationalityRx.match(str_); 0262 if(nationalityMatch.hasMatch()) { 0263 const QString n = nationalityMatch.captured(1).remove(tagRx).trimmed(); 0264 entry->setField(QStringLiteral("nationality"), n); 0265 } 0266 0267 QRegularExpression lengthRx(QStringLiteral(">Dauer:(.*?)</li"), 0268 QRegularExpression::DotMatchesEverythingOption); 0269 auto lengthMatch = lengthRx.match(str_); 0270 if(lengthMatch.hasMatch()) { 0271 const QString l = lengthMatch.captured(1).remove(tagRx).remove(QStringLiteral(" Min")).trimmed(); 0272 entry->setField(QStringLiteral("running-time"), l); 0273 } 0274 0275 QRegularExpression genreRx(QStringLiteral("<dt.*?>Genre</dt><dd.*?>(.*?)</dd>")); 0276 auto genreMatch = genreRx.match(str_); 0277 if(genreMatch.hasMatch()) { 0278 QRegularExpression anchorRx(QStringLiteral("<a.*?>(.*?)</a>")); 0279 auto i = anchorRx.globalMatch(genreMatch.captured(1)); 0280 QStringList genres; 0281 while(i.hasNext()) { 0282 genres += i.next().captured(1).trimmed(); 0283 } 0284 entry->setField(QStringLiteral("genre"), genres.join(FieldFormat::delimiterString())); 0285 } 0286 0287 QRegularExpression certRx(QStringLiteral(">FSK:(.*?)</a"), 0288 QRegularExpression::DotMatchesEverythingOption); 0289 auto certMatch = certRx.match(str_); 0290 if(certMatch.hasMatch()) { 0291 // need to translate? Let's just add FSK ratings to the allowed values 0292 QStringList allowed = entry->collection()->hasField(QStringLiteral("certification")) ? 0293 entry->collection()->fieldByName(QStringLiteral("certification"))->allowed() : 0294 QStringList(); 0295 if(!allowed.contains(QStringLiteral("FSK 0 (DE)"))) { 0296 allowed << QStringLiteral("FSK 0 (DE)") 0297 << QStringLiteral("FSK 6 (DE)") 0298 << QStringLiteral("FSK 12 (DE)") 0299 << QStringLiteral("FSK 16 (DE)") 0300 << QStringLiteral("FSK 18 (DE)"); 0301 entry->collection()->fieldByName(QStringLiteral("certification"))->setAllowed(allowed); 0302 } 0303 QString c = certMatch.captured(1).remove(tagRx).trimmed(); 0304 if(c == QStringLiteral("ab 0")) { 0305 c = QStringLiteral("FSK 0 (DE)"); 0306 } else if(c == QLatin1String("ab 6")) { 0307 c = QStringLiteral("FSK 6 (DE)"); 0308 } else if(c == QLatin1String("ab 12")) { 0309 c = QStringLiteral("FSK 12 (DE)"); 0310 } else if(c == QLatin1String("ab 16")) { 0311 c = QStringLiteral("FSK 16 (DE)"); 0312 } else if(c == QLatin1String("ab 18")) { 0313 c = QStringLiteral("FSK 18 (DE)"); 0314 } 0315 entry->setField(QStringLiteral("certification"), c); 0316 } 0317 0318 QRegularExpression studioRx(QStringLiteral(">Filmverleih:(.*?)</li")); 0319 auto studioMatch = studioRx.match(str_); 0320 if(studioMatch.hasMatch()) { 0321 QString s = studioMatch.captured(1).remove(tagRx).trimmed(); 0322 entry->setField(QStringLiteral("studio"), s); 0323 } 0324 0325 QRegularExpression plotRx(QStringLiteral("(<p class=\"movie-plot-synopsis\">.+?</p>)<(div|h2)"), 0326 QRegularExpression::DotMatchesEverythingOption); 0327 auto plotMatch = plotRx.match(str_); 0328 if(!plotMatch.hasMatch()) { 0329 QRegularExpression plot2Rx(QStringLiteral("(</h2><p>.+?</p>)<(div|h2)"), 0330 QRegularExpression::DotMatchesEverythingOption); 0331 plotMatch = plot2Rx.match(str_); 0332 } 0333 if(plotMatch.hasMatch()) { 0334 QString plot; 0335 // sometimes the plot starts with double <p> 0336 QRegularExpression pRx(QStringLiteral("<p.*?>(?!<p.*?>).*?</p>")); 0337 auto i = pRx.globalMatch(plotMatch.captured(1)); 0338 while(i.hasNext()) { 0339 plot += i.next().captured(0); 0340 } 0341 plot = plot.remove(tagRx).trimmed(); 0342 entry->setField(QStringLiteral("plot"), plot); 0343 } 0344 0345 QString cover = entry->field(QStringLiteral("cover")); 0346 if(!cover.isEmpty()) { 0347 const QString id = ImageFactory::addImage(QUrl::fromUserInput(cover), true /* quiet */); 0348 if(id.isEmpty()) { 0349 message(i18n("The cover image could not be loaded."), MessageHandler::Warning); 0350 } 0351 // empty image ID is ok 0352 entry->setField(QStringLiteral("cover"), id); 0353 } 0354 } 0355 0356 Tellico::Fetch::FetchRequest KinoFetcher::updateRequest(Data::EntryPtr entry_) { 0357 QString t = entry_->field(QStringLiteral("title")); 0358 if(!t.isEmpty()) { 0359 return FetchRequest(Fetch::Title, t); 0360 } 0361 return FetchRequest(); 0362 } 0363 0364 Tellico::Fetch::ConfigWidget* KinoFetcher::configWidget(QWidget* parent_) const { 0365 return new KinoFetcher::ConfigWidget(parent_, this); 0366 } 0367 0368 QString KinoFetcher::defaultName() { 0369 return QStringLiteral("Kino.de"); 0370 } 0371 0372 QString KinoFetcher::defaultIcon() { 0373 return favIcon("https://www.kino.de"); 0374 } 0375 0376 //static 0377 Tellico::StringHash KinoFetcher::allOptionalFields() { 0378 StringHash hash; 0379 // TODO: add link 0380 // hash[QStringLiteral("kino")] = i18n("Kino.de Link"); 0381 return hash; 0382 } 0383 0384 KinoFetcher::ConfigWidget::ConfigWidget(QWidget* parent_, const KinoFetcher* fetcher_) 0385 : Fetch::ConfigWidget(parent_) { 0386 QVBoxLayout* l = new QVBoxLayout(optionsWidget()); 0387 l->addWidget(new QLabel(i18n("This source has no options."), optionsWidget())); 0388 l->addStretch(); 0389 0390 // now add additional fields widget 0391 addFieldsWidget(KinoFetcher::allOptionalFields(), fetcher_ ? fetcher_->optionalFields() : QStringList()); 0392 } 0393 0394 QString KinoFetcher::ConfigWidget::preferredName() const { 0395 return KinoFetcher::defaultName(); 0396 }