File indexing completed on 2024-05-12 16:45:54
0001 /*************************************************************************** 0002 Copyright (C) 2017 Robby Stephenson <robby@periapsis.org> 0003 ***************************************************************************/ 0004 0005 /*************************************************************************** 0006 * * 0007 * This program is free software; you can redistribute it and/or * 0008 * modify it under the terms of the GNU General Public License as * 0009 * published by the Free Software Foundation; either version 2 of * 0010 * the License or (at your option) version 3 or any later version * 0011 * accepted by the membership of KDE e.V. (or its successor approved * 0012 * by the membership of KDE e.V.), which shall act as a proxy * 0013 * defined in Section 14 of version 3 of the license. * 0014 * * 0015 * This program is distributed in the hope that it will be useful, * 0016 * but WITHOUT ANY WARRANTY; without even the implied warranty of * 0017 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * 0018 * GNU General Public License for more details. * 0019 * * 0020 * You should have received a copy of the GNU General Public License * 0021 * along with this program. If not, see <http://www.gnu.org/licenses/>. * 0022 * * 0023 ***************************************************************************/ 0024 0025 #include "kinofetcher.h" 0026 #include "../utils/guiproxy.h" 0027 #include "../utils/string_utils.h" 0028 #include "../collections/bookcollection.h" 0029 #include "../collections/videocollection.h" 0030 #include "../entry.h" 0031 #include "../fieldformat.h" 0032 #include "../core/filehandler.h" 0033 #include "../images/imagefactory.h" 0034 #include "../tellico_debug.h" 0035 0036 #include <KLocalizedString> 0037 #include <KConfig> 0038 #include <KIO/Job> 0039 #include <KIO/JobUiDelegate> 0040 #include <KJobWidgets/KJobWidgets> 0041 0042 #include <QRegularExpression> 0043 #include <QLabel> 0044 #include <QFile> 0045 #include <QTextStream> 0046 #include <QVBoxLayout> 0047 #include <QUrlQuery> 0048 #include <QJsonDocument> 0049 #include <QJsonObject> 0050 0051 namespace { 0052 static const char* KINO_BASE_URL = "https://www.kino.de/se/"; 0053 } 0054 0055 using namespace Tellico; 0056 using Tellico::Fetch::KinoFetcher; 0057 0058 KinoFetcher::KinoFetcher(QObject* parent_) 0059 : Fetcher(parent_), m_started(false) { 0060 } 0061 0062 KinoFetcher::~KinoFetcher() { 0063 } 0064 0065 QString KinoFetcher::source() const { 0066 return m_name.isEmpty() ? defaultName() : m_name; 0067 } 0068 0069 bool KinoFetcher::canFetch(int type) const { 0070 return type == Data::Collection::Video; 0071 } 0072 0073 void KinoFetcher::readConfigHook(const KConfigGroup& config_) { 0074 Q_UNUSED(config_); 0075 } 0076 0077 void KinoFetcher::search() { 0078 m_started = true; 0079 m_matches.clear(); 0080 0081 QUrl u(QString::fromLatin1(KINO_BASE_URL)); 0082 QUrlQuery q; 0083 q.addQueryItem(QStringLiteral("sp_search_filter"), QStringLiteral("movie")); 0084 0085 switch(request().key()) { 0086 case Title: 0087 q.addQueryItem(QStringLiteral("searchterm"), request().value()); 0088 break; 0089 0090 default: 0091 myWarning() << "key not recognized: " << request().key(); 0092 stop(); 0093 return; 0094 } 0095 u.setQuery(q); 0096 // myDebug() << "url:" << u; 0097 0098 m_job = KIO::storedGet(u, KIO::NoReload, KIO::HideProgressInfo); 0099 KJobWidgets::setWindow(m_job, GUI::Proxy::widget()); 0100 connect(m_job.data(), &KJob::result, 0101 this, &KinoFetcher::slotComplete); 0102 } 0103 0104 void KinoFetcher::stop() { 0105 if(!m_started) { 0106 return; 0107 } 0108 0109 if(m_job) { 0110 m_job->kill(); 0111 m_job = nullptr; 0112 } 0113 m_started = false; 0114 emit signalDone(this); 0115 } 0116 0117 void KinoFetcher::slotComplete(KJob*) { 0118 if(m_job->error()) { 0119 m_job->uiDelegate()->showErrorMessage(); 0120 stop(); 0121 return; 0122 } 0123 0124 const QByteArray data = m_job->data(); 0125 if(data.isEmpty()) { 0126 myDebug() << "no data"; 0127 stop(); 0128 return; 0129 } 0130 0131 // since the fetch is done, don't worry about holding the job pointer 0132 m_job = nullptr; 0133 0134 const QString s = Tellico::decodeHTML(data); 0135 #if 0 0136 myWarning() << "Remove debug from kinofetcher.cpp"; 0137 QFile f(QStringLiteral("/tmp/test.html")); 0138 if(f.open(QIODevice::WriteOnly)) { 0139 QTextStream t(&f); 0140 t.setCodec("UTF-8"); 0141 t << s; 0142 } 0143 f.close(); 0144 #endif 0145 0146 QRegularExpression linkRx(QStringLiteral("<span class=\"alice-teaser-label\\s*?\">.+?Film.+?<a .+?teaser-link.+?href=\"(.+?)\".*?>(.+?)</")); 0147 QRegularExpression dateSpanRx(QStringLiteral("<span .+?movie-startdate.+?>(.+?)</span")); 0148 QRegularExpression dateRx(QStringLiteral("\\d{2}\\.\\d{2}\\.(\\d{4})")); 0149 QRegularExpression yearEndRx(QStringLiteral("(\\d{4})/?$")); 0150 0151 QRegularExpressionMatchIterator i = linkRx.globalMatch(s); 0152 while(i.hasNext()) { 0153 QRegularExpressionMatch match = i.next(); 0154 QString u = match.captured(1); 0155 if(u.isEmpty() || u.contains(QLatin1String("news")) || !u.contains(QLatin1String("film"))) { 0156 continue; 0157 } 0158 if(u.startsWith(QLatin1String("//"))) { 0159 u.prepend(QLatin1String("https:")); 0160 } 0161 Data::CollPtr coll(new Data::VideoCollection(true)); 0162 Data::EntryPtr entry(new Data::Entry(coll)); 0163 coll->addEntries(entry); 0164 0165 entry->setField(QStringLiteral("title"), match.captured(2)); 0166 0167 QString y; 0168 QRegularExpressionMatch dateMatch = dateSpanRx.match(s, match.capturedEnd()); 0169 if(dateMatch.hasMatch()) { 0170 y = dateRx.match(dateMatch.captured(1)).captured(1); 0171 } else { 0172 // see if year is embedded in url 0173 y = yearEndRx.match(u).captured(1); 0174 } 0175 entry->setField(QStringLiteral("year"), y); 0176 0177 FetchResult* r = new FetchResult(this, entry); 0178 QUrl url = QUrl(QString::fromLatin1(KINO_BASE_URL)).resolved(QUrl(u)); 0179 m_matches.insert(r->uid, url); 0180 m_entries.insert(r->uid, entry); 0181 // don't emit signal until after putting url in matches hash 0182 emit signalResultFound(r); 0183 } 0184 0185 stop(); 0186 } 0187 0188 Tellico::Data::EntryPtr KinoFetcher::fetchEntryHook(uint uid_) { 0189 if(!m_entries.contains(uid_)) { 0190 myWarning() << "no entry in hash"; 0191 return Data::EntryPtr(); 0192 } 0193 0194 Data::EntryPtr entry = m_entries[uid_]; 0195 // if the url is not in the hash, the entry has already been fully populated 0196 if(!m_matches.contains(uid_)) { 0197 return entry; 0198 } 0199 0200 QString results = Tellico::decodeHTML(FileHandler::readTextFile(m_matches[uid_], true, true)); 0201 if(results.isEmpty()) { 0202 myDebug() << "no text results from" << m_matches[uid_]; 0203 return entry; 0204 } 0205 0206 #if 0 0207 myWarning() << "Remove debug2 from kinofetcher.cpp"; 0208 QFile f(QStringLiteral("/tmp/test2.html")); 0209 if(f.open(QIODevice::WriteOnly)) { 0210 QTextStream t(&f); 0211 t.setCodec("UTF-8"); 0212 t << results; 0213 } 0214 f.close(); 0215 #endif 0216 0217 parseEntry(entry, results); 0218 // remove url to signal the entry is fully populated 0219 m_matches.remove(uid_); 0220 return entry; 0221 } 0222 0223 void KinoFetcher::parseEntry(Data::EntryPtr entry, const QString& str_) { 0224 QRegularExpression jsonRx(QStringLiteral("<script type=\"application/ld\\+json\">(.*?)</script"), 0225 QRegularExpression::DotMatchesEverythingOption); 0226 QRegularExpressionMatchIterator i = jsonRx.globalMatch(str_); 0227 while(i.hasNext()) { 0228 QJsonDocument doc = QJsonDocument::fromJson(i.next().captured(1).toUtf8()); 0229 QVariantMap objectMap = doc.object().toVariantMap(); 0230 if(mapValue(objectMap, "@type") != QStringLiteral("Movie")) { 0231 continue; 0232 } 0233 entry->setField(QStringLiteral("director"), mapValue(objectMap, "director", "name")); 0234 0235 QStringList actors; 0236 foreach(QVariant v, objectMap.value(QLatin1String("actor")).toList()) { 0237 const QString actor = mapValue(v.toMap(), "name"); 0238 if(!actor.isEmpty()) actors += actor; 0239 } 0240 if(!actors.isEmpty()) { 0241 entry->setField(QStringLiteral("cast"), actors.join(FieldFormat::rowDelimiterString())); 0242 } 0243 // cover could be a relative link 0244 QString coverLink = mapValue(objectMap, "image"); 0245 if(coverLink.startsWith(QLatin1String("//"))) { 0246 coverLink.prepend(QLatin1String("https:")); 0247 } 0248 entry->setField(QStringLiteral("cover"), coverLink); 0249 0250 QString genreString = mapValue(objectMap, "genre"); 0251 if(!genreString.isEmpty()) { 0252 QStringList genres = genreString.split(QRegularExpression(QLatin1String(",\\s+"))); 0253 entry->setField(QStringLiteral("genre"), genres.join(FieldFormat::delimiterString())); 0254 } 0255 } 0256 0257 QRegularExpression tagRx(QStringLiteral("<.+?>")); 0258 0259 QRegularExpression nationalityRx(QStringLiteral(">Produktionsland:(.*?)</a>")); 0260 QRegularExpressionMatch nationalityMatch = nationalityRx.match(str_); 0261 if(nationalityMatch.hasMatch()) { 0262 const QString n = nationalityMatch.captured(1).remove(tagRx).trimmed(); 0263 entry->setField(QStringLiteral("nationality"), n); 0264 } 0265 0266 QRegularExpression lengthRx(QStringLiteral(">Dauer:(.*?)</li")); 0267 QRegularExpressionMatch lengthMatch = lengthRx.match(str_); 0268 if(lengthMatch.hasMatch()) { 0269 const QString l = lengthMatch.captured(1).remove(tagRx).remove(QStringLiteral(" Min")).trimmed(); 0270 entry->setField(QStringLiteral("running-time"), l); 0271 } 0272 0273 QRegularExpression genreRx(QStringLiteral("<dt.*?>Genre</dt><dd.*?>(.*?)</dd>")); 0274 QRegularExpressionMatch genreMatch = genreRx.match(str_); 0275 if(genreMatch.hasMatch()) { 0276 QRegularExpression anchorRx(QStringLiteral("<a.*?>(.*?)</a>")); 0277 QRegularExpressionMatchIterator i = anchorRx.globalMatch(genreMatch.captured(1)); 0278 QStringList genres; 0279 while(i.hasNext()) { 0280 genres += i.next().captured(1).trimmed(); 0281 } 0282 entry->setField(QStringLiteral("genre"), genres.join(FieldFormat::delimiterString())); 0283 } 0284 0285 QRegularExpression certRx(QStringLiteral(">FSK:(.*?)</a")); 0286 QRegularExpressionMatch certMatch = certRx.match(str_); 0287 if(certMatch.hasMatch()) { 0288 // need to translate? Let's just add FSK ratings to the allowed values 0289 QStringList allowed = entry->collection()->hasField(QStringLiteral("certification")) ? 0290 entry->collection()->fieldByName(QStringLiteral("certification"))->allowed() : 0291 QStringList(); 0292 if(!allowed.contains(QStringLiteral("FSK 0 (DE)"))) { 0293 allowed << QStringLiteral("FSK 0 (DE)") 0294 << QStringLiteral("FSK 6 (DE)") 0295 << QStringLiteral("FSK 12 (DE)") 0296 << QStringLiteral("FSK 16 (DE)") 0297 << QStringLiteral("FSK 18 (DE)"); 0298 entry->collection()->fieldByName(QStringLiteral("certification"))->setAllowed(allowed); 0299 } 0300 QString c = certMatch.captured(1).remove(tagRx).trimmed(); 0301 if(c == QStringLiteral("ab 0")) { 0302 c = QStringLiteral("FSK 0 (DE)"); 0303 } else if(c == QLatin1String("ab 6")) { 0304 c = QStringLiteral("FSK 6 (DE)"); 0305 } else if(c == QLatin1String("ab 12")) { 0306 c = QStringLiteral("FSK 12 (DE)"); 0307 } else if(c == QLatin1String("ab 16")) { 0308 c = QStringLiteral("FSK 16 (DE)"); 0309 } else if(c == QLatin1String("ab 18")) { 0310 c = QStringLiteral("FSK 18 (DE)"); 0311 } 0312 entry->setField(QStringLiteral("certification"), c); 0313 } 0314 0315 QRegularExpression studioRx(QStringLiteral(">Filmverleih:(.*?)</li")); 0316 QRegularExpressionMatch studioMatch = studioRx.match(str_); 0317 if(studioMatch.hasMatch()) { 0318 QString s = studioMatch.captured(1).remove(tagRx).trimmed(); 0319 entry->setField(QStringLiteral("studio"), s); 0320 } 0321 0322 QRegularExpression plotRx(QStringLiteral("(<p class=\"movie-plot-synopsis\">.+?</p>)<(div|h2)"), 0323 QRegularExpression::DotMatchesEverythingOption); 0324 QRegularExpressionMatch plotMatch = plotRx.match(str_); 0325 if(plotMatch.hasMatch()) { 0326 QString plot; 0327 // sometimes the plot starts with double <p> 0328 QRegularExpression pRx(QStringLiteral("<p.*?>(?!<p.*?>).*?</p>")); 0329 QRegularExpressionMatchIterator i = pRx.globalMatch(plotMatch.captured(1)); 0330 while(i.hasNext()) { 0331 plot += i.next().captured(0); 0332 } 0333 plot = plot.remove(tagRx).trimmed(); 0334 entry->setField(QStringLiteral("plot"), plot); 0335 } 0336 0337 QString cover = entry->field(QStringLiteral("cover")); 0338 if(!cover.isEmpty()) { 0339 const QString id = ImageFactory::addImage(QUrl::fromUserInput(cover), true /* quiet */); 0340 if(id.isEmpty()) { 0341 message(i18n("The cover image could not be loaded."), MessageHandler::Warning); 0342 } 0343 // empty image ID is ok 0344 entry->setField(QStringLiteral("cover"), id); 0345 } 0346 } 0347 0348 Tellico::Fetch::FetchRequest KinoFetcher::updateRequest(Data::EntryPtr entry_) { 0349 QString t = entry_->field(QStringLiteral("title")); 0350 if(!t.isEmpty()) { 0351 return FetchRequest(Fetch::Title, t); 0352 } 0353 return FetchRequest(); 0354 } 0355 0356 Tellico::Fetch::ConfigWidget* KinoFetcher::configWidget(QWidget* parent_) const { 0357 return new KinoFetcher::ConfigWidget(parent_, this); 0358 } 0359 0360 QString KinoFetcher::defaultName() { 0361 return QStringLiteral("Kino.de"); 0362 } 0363 0364 QString KinoFetcher::defaultIcon() { 0365 return favIcon("https://www.kino.de"); 0366 } 0367 0368 //static 0369 Tellico::StringHash KinoFetcher::allOptionalFields() { 0370 StringHash hash; 0371 // TODO: add link 0372 // hash[QStringLiteral("kino")] = i18n("Kino.de Link"); 0373 return hash; 0374 } 0375 0376 KinoFetcher::ConfigWidget::ConfigWidget(QWidget* parent_, const KinoFetcher* fetcher_) 0377 : Fetch::ConfigWidget(parent_) { 0378 QVBoxLayout* l = new QVBoxLayout(optionsWidget()); 0379 l->addWidget(new QLabel(i18n("This source has no options."), optionsWidget())); 0380 l->addStretch(); 0381 0382 // now add additional fields widget 0383 addFieldsWidget(KinoFetcher::allOptionalFields(), fetcher_ ? fetcher_->optionalFields() : QStringList()); 0384 } 0385 0386 QString KinoFetcher::ConfigWidget::preferredName() const { 0387 return KinoFetcher::defaultName(); 0388 }