File indexing completed on 2024-05-12 16:45:43
0001 /*************************************************************************** 0002 Copyright (C) 2006-2009 Robby Stephenson <robby@periapsis.org> 0003 ***************************************************************************/ 0004 0005 /*************************************************************************** 0006 * * 0007 * This program is free software; you can redistribute it and/or * 0008 * modify it under the terms of the GNU General Public License as * 0009 * published by the Free Software Foundation; either version 2 of * 0010 * the License or (at your option) version 3 or any later version * 0011 * accepted by the membership of KDE e.V. (or its successor approved * 0012 * by the membership of KDE e.V.), which shall act as a proxy * 0013 * defined in Section 14 of version 3 of the license. * 0014 * * 0015 * This program is distributed in the hope that it will be useful, * 0016 * but WITHOUT ANY WARRANTY; without even the implied warranty of * 0017 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * 0018 * GNU General Public License for more details. * 0019 * * 0020 * You should have received a copy of the GNU General Public License * 0021 * along with this program. If not, see <http://www.gnu.org/licenses/>. * 0022 * * 0023 ***************************************************************************/ 0024 0025 #include "animenfofetcher.h" 0026 #include "../utils/guiproxy.h" 0027 #include "../utils/string_utils.h" 0028 #include "../collections/bookcollection.h" 0029 #include "../collections/videocollection.h" 0030 #include "../entry.h" 0031 #include "../fieldformat.h" 0032 #include "../core/filehandler.h" 0033 #include "../images/imagefactory.h" 0034 #include "../tellico_debug.h" 0035 0036 #include <KLocalizedString> 0037 #include <KConfig> 0038 #include <KIO/Job> 0039 #include <KIO/JobUiDelegate> 0040 #include <KJobWidgets/KJobWidgets> 0041 0042 #include <QRegExp> 0043 #include <QLabel> 0044 #include <QFile> 0045 #include <QTextStream> 0046 #include <QVBoxLayout> 0047 #include <QUrlQuery> 0048 0049 namespace { 0050 static const char* ANIMENFO_BASE_URL = "http://www.animenfo.com/search.php"; 0051 } 0052 0053 using namespace Tellico; 0054 using Tellico::Fetch::AnimeNfoFetcher; 0055 0056 AnimeNfoFetcher::AnimeNfoFetcher(QObject* parent_) 0057 : Fetcher(parent_), m_started(false) { 0058 } 0059 0060 AnimeNfoFetcher::~AnimeNfoFetcher() { 0061 } 0062 0063 QString AnimeNfoFetcher::source() const { 0064 return m_name.isEmpty() ? defaultName() : m_name; 0065 } 0066 0067 bool AnimeNfoFetcher::canFetch(int type) const { 0068 return type == Data::Collection::Book || 0069 type == Data::Collection::Bibtex || 0070 type == Data::Collection::Video; 0071 } 0072 0073 void AnimeNfoFetcher::readConfigHook(const KConfigGroup& config_) { 0074 Q_UNUSED(config_); 0075 } 0076 0077 void AnimeNfoFetcher::search() { 0078 m_started = true; 0079 m_matches.clear(); 0080 0081 QUrl u(QString::fromLatin1(ANIMENFO_BASE_URL)); 0082 QUrlQuery q; 0083 q.addQueryItem(QStringLiteral("action"), QStringLiteral("Go")); 0084 q.addQueryItem(QStringLiteral("option"), QStringLiteral("keywords")); 0085 0086 switch(request().collectionType()) { 0087 case Data::Collection::Book: 0088 q.addQueryItem(QStringLiteral("queryin"), QStringLiteral("manga_titles")); 0089 break; 0090 0091 case Data::Collection::Video: 0092 q.addQueryItem(QStringLiteral("queryin"), QStringLiteral("anime_titles")); 0093 break; 0094 0095 default: 0096 myWarning() << "collection type not valid:" << request().collectionType(); 0097 stop(); 0098 return; 0099 } 0100 0101 switch(request().key()) { 0102 case Keyword: 0103 q.addQueryItem(QStringLiteral("query"), request().value()); 0104 break; 0105 0106 default: 0107 myWarning() << "key not recognized: " << request().key(); 0108 stop(); 0109 return; 0110 } 0111 u.setQuery(q); 0112 // myDebug() << "url:" << u; 0113 0114 m_job = KIO::storedGet(u, KIO::NoReload, KIO::HideProgressInfo); 0115 KJobWidgets::setWindow(m_job, GUI::Proxy::widget()); 0116 connect(m_job.data(), &KJob::result, 0117 this, &AnimeNfoFetcher::slotComplete); 0118 } 0119 0120 void AnimeNfoFetcher::stop() { 0121 if(!m_started) { 0122 return; 0123 } 0124 0125 if(m_job) { 0126 m_job->kill(); 0127 m_job = nullptr; 0128 } 0129 m_started = false; 0130 emit signalDone(this); 0131 } 0132 0133 void AnimeNfoFetcher::slotComplete(KJob*) { 0134 // myDebug(); 0135 0136 if(m_job->error()) { 0137 m_job->uiDelegate()->showErrorMessage(); 0138 stop(); 0139 return; 0140 } 0141 0142 const QByteArray data = m_job->data(); 0143 if(data.isEmpty()) { 0144 myDebug() << "no data"; 0145 stop(); 0146 return; 0147 } 0148 0149 // since the fetch is done, don't worry about holding the job pointer 0150 m_job = nullptr; 0151 0152 QString s = Tellico::decodeHTML(data); 0153 #if 0 0154 myWarning() << "Remove debug from animenfofetcher.cpp"; 0155 QFile f(QLatin1String("/tmp/test.html")); 0156 if(f.open(QIODevice::WriteOnly)) { 0157 QTextStream t(&f); 0158 t.setCodec("UTF-8"); 0159 t << s; 0160 } 0161 f.close(); 0162 #endif 0163 0164 QRegExp infoRx(QLatin1String("<td\\s+[^>]*class\\s*=\\s*[\"']anime_info[\"'][^>]*>(.*)</td>"), Qt::CaseInsensitive); 0165 infoRx.setMinimal(true); 0166 QRegExp anchorRx(QLatin1String("<a\\s+[^>]*href\\s*=\\s*[\"'](.*)[\"'][^>]*>(.*)</a>"), Qt::CaseInsensitive); 0167 anchorRx.setMinimal(true); 0168 QRegExp yearRx(QLatin1String("\\d{4}")); 0169 0170 // search page comes in groups of threes 0171 int n = 0; 0172 QString u, t, y; 0173 0174 for(int pos = infoRx.indexIn(s); m_started && pos > -1; pos = infoRx.indexIn(s, pos+1)) { 0175 if(n == 0 && !u.isEmpty()) { 0176 FetchResult* r = new FetchResult(this, t, y); 0177 QUrl url = QUrl(QString::fromLatin1(ANIMENFO_BASE_URL)).resolved(QUrl(u)); 0178 url.setQuery(QString()); 0179 m_matches.insert(r->uid, url); 0180 // don't emit signal until after putting url in matches hash 0181 emit signalResultFound(r); 0182 0183 u.clear(); 0184 t.clear(); 0185 y.clear(); 0186 } 0187 switch(n) { 0188 case 0: // title and url 0189 { 0190 int pos2 = anchorRx.indexIn(infoRx.cap(1)); 0191 if(pos2 > -1) { 0192 u = anchorRx.cap(1); 0193 t = anchorRx.cap(2); 0194 } 0195 } 0196 break; 0197 case 1: // don't case 0198 break; 0199 case 2: 0200 if(yearRx.exactMatch(infoRx.cap(1))) { 0201 y = infoRx.cap(1); 0202 } 0203 break; 0204 } 0205 0206 n = (n+1)%3; 0207 } 0208 0209 // grab last response 0210 if(!u.isEmpty()) { 0211 FetchResult* r = new FetchResult(this, t, y, QString()); 0212 QUrl url = QUrl(QString::fromLatin1(ANIMENFO_BASE_URL)).resolved(QUrl(u)); 0213 url.setQuery(QString()); 0214 m_matches.insert(r->uid, url); 0215 // don't emit signal until after putting url in matches hash 0216 emit signalResultFound(r); 0217 } 0218 0219 stop(); 0220 } 0221 0222 Tellico::Data::EntryPtr AnimeNfoFetcher::fetchEntryHook(uint uid_) { 0223 // if we already grabbed this one, then just pull it out of the dict 0224 Data::EntryPtr entry = m_entries[uid_]; 0225 if(entry) { 0226 return entry; 0227 } 0228 0229 QUrl url = m_matches[uid_]; 0230 if(url.isEmpty()) { 0231 myWarning() << "no url in map"; 0232 return Data::EntryPtr(); 0233 } 0234 0235 QString results = Tellico::decodeHTML(FileHandler::readTextFile(url, true, true)); 0236 if(results.isEmpty()) { 0237 myDebug() << "no text results"; 0238 return Data::EntryPtr(); 0239 } 0240 0241 #if 0 0242 myWarning() << "Remove debug from animenfofetcher.cpp"; 0243 QFile f(QLatin1String("/tmp/test.html")); 0244 if(f.open(QIODevice::WriteOnly)) { 0245 QTextStream t(&f); 0246 t.setCodec("UTF-8"); 0247 t << results; 0248 } 0249 f.close(); 0250 #endif 0251 0252 entry = parseEntry(results, url); 0253 if(!entry) { 0254 myDebug() << "error in processing entry"; 0255 return Data::EntryPtr(); 0256 } 0257 m_entries.insert(uid_, entry); // keep for later 0258 return entry; 0259 } 0260 0261 Tellico::Data::EntryPtr AnimeNfoFetcher::parseEntry(const QString& str_, const QUrl& url_) { 0262 // myDebug(); 0263 // class might be anime_info_top 0264 QRegExp infoRx(QLatin1String("<td\\s+[^>]*class\\s*=\\s*[\"']anime_info[^>]*>(.*)</td>"), Qt::CaseInsensitive); 0265 infoRx.setMinimal(true); 0266 QRegExp tagRx(QLatin1String("<.*>")); 0267 tagRx.setMinimal(true); 0268 QRegExp anchorRx(QLatin1String("<a\\s+[^>]*href\\s*=\\s*[\"'](.*)[\"'][^>]*>(.*)</a>"), Qt::CaseInsensitive); 0269 anchorRx.setMinimal(true); 0270 QRegExp jsRx(QLatin1String("<script.*</script>"), Qt::CaseInsensitive); 0271 jsRx.setMinimal(true); 0272 0273 QString s = str_; 0274 s.remove(jsRx); 0275 0276 Data::CollPtr coll; 0277 switch(request().collectionType()) { 0278 case Data::Collection::Book: 0279 case Data::Collection::Bibtex: 0280 coll = Data::CollPtr(new Data::BookCollection(true)); 0281 break; 0282 0283 case Data::Collection::Video: 0284 coll = Data::CollPtr(new Data::VideoCollection(true)); 0285 break; 0286 0287 default: 0288 return Data::EntryPtr(); 0289 } 0290 0291 // add new fields 0292 Data::FieldPtr f(new Data::Field(QStringLiteral("origtitle"), i18n("Original Title"))); 0293 coll->addField(f); 0294 0295 f = new Data::Field(QStringLiteral("alttitle"), i18n("Alternative Titles"), Data::Field::Table); 0296 f->setFormatType(FieldFormat::FormatTitle); 0297 coll->addField(f); 0298 0299 f = new Data::Field(QStringLiteral("distributor"), i18n("Distributor")); 0300 f->setCategory(i18n("Other People")); 0301 f->setFlags(Data::Field::AllowCompletion | Data::Field::AllowMultiple | Data::Field::AllowGrouped); 0302 f->setFormatType(FieldFormat::FormatPlain); 0303 coll->addField(f); 0304 0305 f = new Data::Field(QStringLiteral("episodes"), i18n("Episodes"), Data::Field::Number); 0306 f->setCategory(i18n("Features")); 0307 coll->addField(f); 0308 0309 f = new Data::Field(QStringLiteral("animenfo"), i18n("AnimeNfo Link"), Data::Field::URL); 0310 f->setCategory(i18n("General")); 0311 coll->addField(f); 0312 0313 f = new Data::Field(QStringLiteral("animenfo-rating"), i18n("AnimeNfo Rating"), Data::Field::Rating); 0314 f->setCategory(i18n("General")); 0315 f->setProperty(QStringLiteral("maximum"), QStringLiteral("10")); 0316 coll->addField(f); 0317 0318 // map captions in HTML to field names 0319 QHash<QString, QString> fieldMap; 0320 fieldMap.insert(QStringLiteral("Title"), QStringLiteral("title")); 0321 fieldMap.insert(QStringLiteral("Japanese Title"), QStringLiteral("origtitle")); 0322 fieldMap.insert(QStringLiteral("Total Episodes"), QStringLiteral("episodes")); 0323 fieldMap.insert(QStringLiteral("Category"), QStringLiteral("keyword")); 0324 fieldMap.insert(QStringLiteral("Genres"), QStringLiteral("genre")); 0325 fieldMap.insert(QStringLiteral("Genre"), QStringLiteral("genre")); 0326 fieldMap.insert(QStringLiteral("Studio"), QStringLiteral("studio")); 0327 fieldMap.insert(QStringLiteral("US Distribution"), QStringLiteral("distributor")); 0328 fieldMap.insert(QStringLiteral("Author"), QStringLiteral("author")); 0329 fieldMap.insert(QStringLiteral("Publisher"), QStringLiteral("publisher")); 0330 fieldMap.insert(QStringLiteral("Director"), QStringLiteral("director")); 0331 fieldMap.insert(QStringLiteral("Script"), QStringLiteral("writer")); 0332 fieldMap.insert(QStringLiteral("Music"), QStringLiteral("composer")); 0333 fieldMap.insert(QStringLiteral("User Rating"), QStringLiteral("animenfo-rating")); 0334 0335 switch(request().collectionType()) { 0336 case Data::Collection::Book: 0337 case Data::Collection::Bibtex: 0338 fieldMap.insert(QStringLiteral("Year Published"), QStringLiteral("pub_year")); 0339 break; 0340 case Data::Collection::Video: 0341 fieldMap.insert(QStringLiteral("Year Published"), QStringLiteral("year")); 0342 break; 0343 default: 0344 break; 0345 } 0346 0347 Data::EntryPtr entry(new Data::Entry(coll)); 0348 0349 QString fullTitle; 0350 0351 int n = 0; 0352 QString key, value; 0353 for(int pos = infoRx.indexIn(s); pos > -1; pos = infoRx.indexIn(s, pos+1)) { 0354 if(n == 0 && !key.isEmpty()) { 0355 if(fieldMap.contains(key)) { 0356 value = value.simplified(); 0357 if(value.endsWith(QLatin1Char(';'))) { 0358 value.chop(1); 0359 } 0360 if(!value.isEmpty() && value != QLatin1String("-")) { 0361 const QString fieldName = fieldMap.value(key); 0362 if(key == QLatin1String("Title")) { 0363 // strip possible trailing year, etc. 0364 fullTitle = value; 0365 value.remove(QRegExp(QLatin1String("\\s*\\([^)]*\\)$"))); 0366 entry->setField(fieldName, value); 0367 } else if(key == QLatin1String("Total Episodes")) { 0368 // strip possible trailing text 0369 value.remove(QRegExp(QLatin1String("[\\D].*$"))); 0370 entry->setField(fieldName, value); 0371 } else if(key == QLatin1String("User Rating")) { 0372 QRegExp rating(QLatin1String("^(.*)/10")); 0373 if(rating.indexIn(value) > -1) { 0374 const double d = rating.cap(1).toDouble(); 0375 entry->setField(fieldName, QString::number(static_cast<int>(d+0.5))); 0376 } 0377 } else if(key == QLatin1String("Year Published")) { 0378 // strip possible trailing text 0379 value.remove(QRegExp(QLatin1String("[\\D;].*$"))); 0380 entry->setField(fieldName, value); 0381 } else { 0382 entry->setField(fieldName, value); 0383 } 0384 if(fieldName == QLatin1String("studio") || 0385 fieldName == QLatin1String("genre") || 0386 fieldName == QLatin1String("script") || 0387 fieldName == QLatin1String("distributor") || 0388 fieldName == QLatin1String("director") || 0389 fieldName == QLatin1String("writer") || 0390 fieldName == QLatin1String("author") || 0391 fieldName == QLatin1String("publisher") || 0392 fieldName == QLatin1String("composer")) { 0393 QStringList values = entry->field(fieldName).split(QRegExp(QLatin1String("\\s*,\\s*"))); 0394 entry->setField(fieldName, values.join(FieldFormat::delimiterString())); 0395 } 0396 } 0397 } 0398 key.clear(); 0399 value.clear(); 0400 } 0401 switch(n) { 0402 case 0: 0403 key = infoRx.cap(1).remove(tagRx); 0404 break; 0405 case 1: 0406 value = infoRx.cap(1).replace(QLatin1String("<br />"), QLatin1String("; ")).remove(tagRx); 0407 break; 0408 } 0409 n = (n+1)%2; 0410 } 0411 entry->setField(QStringLiteral("animenfo"), url_.url()); 0412 0413 // image 0414 QRegExp imgRx(QStringLiteral("<img\\s+[^>]*src\\s*=\\s*[\"']([^>]*)[\"']\\s+[^>]*alt\\s*=\\s*[\"']%1[\"']") 0415 .arg(QRegExp::escape(fullTitle)), Qt::CaseInsensitive); 0416 imgRx.setMinimal(true); 0417 int pos = imgRx.indexIn(s); 0418 if(pos > -1) { 0419 QUrl imgURL = QUrl(QLatin1String(ANIMENFO_BASE_URL)).resolved(QUrl(imgRx.cap(1))); 0420 QString id = ImageFactory::addImage(imgURL, true); 0421 if(!id.isEmpty()) { 0422 entry->setField(QStringLiteral("cover"), id); 0423 } else { 0424 myDebug() << "bad cover" << imgURL.url(); 0425 } 0426 } 0427 0428 // now look for alternative titles and plot 0429 const QString a = QStringLiteral("Alternative titles"); 0430 pos = s.indexOf(a, 0, Qt::CaseInsensitive); 0431 if(pos > -1) { 0432 pos += a.length(); 0433 int pos2 = s.indexOf(QLatin1String("<td class=\"anime_cat_left"), pos+1); 0434 if(pos2 > -1) { 0435 value = s.mid(pos, pos2-pos).simplified(); 0436 value.replace(QLatin1String("<br />"), FieldFormat::rowDelimiterString()); 0437 value = value.remove(tagRx).trimmed(); 0438 entry->setField(QStringLiteral("alttitle"), value); 0439 } 0440 } 0441 0442 pos = s.indexOf(QLatin1String("Description"), pos > -1 ? pos : 0); 0443 if(pos > -1) { 0444 QRegExp descRx(QLatin1String("<td\\s[^>]*class\\s*=\\s*[\"']description[\"'].*>(.*)</td"), Qt::CaseInsensitive); 0445 descRx.setMinimal(true); 0446 pos = descRx.indexIn(s, pos+1); 0447 if(pos > -1) { 0448 entry->setField(QStringLiteral("plot"), descRx.cap(1).remove(tagRx).simplified()); 0449 } 0450 } 0451 0452 pos = s.indexOf(QLatin1String("Voice Talent")); 0453 if(pos > -1) { 0454 QRegExp charRx(QLatin1String("<a href=['\"]/anime/character/display.php.*>(.*)</a>"), Qt::CaseInsensitive); 0455 charRx.setMinimal(true); 0456 QRegExp voiceRx(QLatin1String("<a href=['\"]animeseiyuu.*>(.*)</a>"), Qt::CaseInsensitive); 0457 voiceRx.setMinimal(true); 0458 QStringList castLines; 0459 for(pos = s.indexOf(charRx, pos); pos > -1; pos = s.indexOf(charRx, pos+1)) { 0460 if(voiceRx.indexIn(s, pos) > -1) { 0461 castLines << voiceRx.cap(1) + FieldFormat::columnDelimiterString() + charRx.cap(1); 0462 } 0463 } 0464 entry->setField(QStringLiteral("cast"), castLines.join(FieldFormat::rowDelimiterString())); 0465 } 0466 0467 return entry; 0468 } 0469 0470 Tellico::Fetch::FetchRequest AnimeNfoFetcher::updateRequest(Data::EntryPtr entry_) { 0471 QString t = entry_->field(QStringLiteral("title")); 0472 if(!t.isEmpty()) { 0473 return FetchRequest(Fetch::Keyword, t); 0474 } 0475 return FetchRequest(); 0476 } 0477 0478 Tellico::Fetch::ConfigWidget* AnimeNfoFetcher::configWidget(QWidget* parent_) const { 0479 return new AnimeNfoFetcher::ConfigWidget(parent_, this); 0480 } 0481 0482 QString AnimeNfoFetcher::defaultName() { 0483 return QStringLiteral("AnimeNfo.com"); 0484 } 0485 0486 QString AnimeNfoFetcher::defaultIcon() { 0487 return favIcon("http://animenfo.com"); 0488 } 0489 0490 //static 0491 Tellico::StringHash AnimeNfoFetcher::allOptionalFields() { 0492 StringHash hash; 0493 hash[QStringLiteral("distributor")] = i18n("Distributor"); 0494 hash[QStringLiteral("episodes")] = i18n("Episodes"); 0495 hash[QStringLiteral("origtitle")] = i18n("Original Title"); 0496 hash[QStringLiteral("alttitle")] = i18n("Alternative Titles"); 0497 hash[QStringLiteral("animenfo-rating")] = i18n("AnimeNfo Rating"); 0498 hash[QStringLiteral("animenfo")] = i18n("AnimeNfo Link"); 0499 return hash; 0500 } 0501 0502 AnimeNfoFetcher::ConfigWidget::ConfigWidget(QWidget* parent_, const AnimeNfoFetcher* fetcher_) 0503 : Fetch::ConfigWidget(parent_) { 0504 QVBoxLayout* l = new QVBoxLayout(optionsWidget()); 0505 l->addWidget(new QLabel(i18n("This source has no options."), optionsWidget())); 0506 l->addStretch(); 0507 0508 // now add additional fields widget 0509 addFieldsWidget(AnimeNfoFetcher::allOptionalFields(), fetcher_ ? fetcher_->optionalFields() : QStringList()); 0510 } 0511 0512 QString AnimeNfoFetcher::ConfigWidget::preferredName() const { 0513 return AnimeNfoFetcher::defaultName(); 0514 }