File indexing completed on 2024-05-12 16:45:43

0001 /***************************************************************************
0002     Copyright (C) 2006-2009 Robby Stephenson <robby@periapsis.org>
0003  ***************************************************************************/
0004 
0005 /***************************************************************************
0006  *                                                                         *
0007  *   This program is free software; you can redistribute it and/or         *
0008  *   modify it under the terms of the GNU General Public License as        *
0009  *   published by the Free Software Foundation; either version 2 of        *
0010  *   the License or (at your option) version 3 or any later version        *
0011  *   accepted by the membership of KDE e.V. (or its successor approved     *
0012  *   by the membership of KDE e.V.), which shall act as a proxy            *
0013  *   defined in Section 14 of version 3 of the license.                    *
0014  *                                                                         *
0015  *   This program is distributed in the hope that it will be useful,       *
0016  *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
0017  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
0018  *   GNU General Public License for more details.                          *
0019  *                                                                         *
0020  *   You should have received a copy of the GNU General Public License     *
0021  *   along with this program.  If not, see <http://www.gnu.org/licenses/>. *
0022  *                                                                         *
0023  ***************************************************************************/
0024 
0025 #include "animenfofetcher.h"
0026 #include "../utils/guiproxy.h"
0027 #include "../utils/string_utils.h"
0028 #include "../collections/bookcollection.h"
0029 #include "../collections/videocollection.h"
0030 #include "../entry.h"
0031 #include "../fieldformat.h"
0032 #include "../core/filehandler.h"
0033 #include "../images/imagefactory.h"
0034 #include "../tellico_debug.h"
0035 
0036 #include <KLocalizedString>
0037 #include <KConfig>
0038 #include <KIO/Job>
0039 #include <KIO/JobUiDelegate>
0040 #include <KJobWidgets/KJobWidgets>
0041 
0042 #include <QRegExp>
0043 #include <QLabel>
0044 #include <QFile>
0045 #include <QTextStream>
0046 #include <QVBoxLayout>
0047 #include <QUrlQuery>
0048 
0049 namespace {
0050   static const char* ANIMENFO_BASE_URL = "http://www.animenfo.com/search.php";
0051 }
0052 
0053 using namespace Tellico;
0054 using Tellico::Fetch::AnimeNfoFetcher;
0055 
0056 AnimeNfoFetcher::AnimeNfoFetcher(QObject* parent_)
0057     : Fetcher(parent_), m_started(false) {
0058 }
0059 
0060 AnimeNfoFetcher::~AnimeNfoFetcher() {
0061 }
0062 
0063 QString AnimeNfoFetcher::source() const {
0064   return m_name.isEmpty() ? defaultName() : m_name;
0065 }
0066 
0067 bool AnimeNfoFetcher::canFetch(int type) const {
0068   return type == Data::Collection::Book ||
0069          type == Data::Collection::Bibtex ||
0070          type == Data::Collection::Video;
0071 }
0072 
0073 void AnimeNfoFetcher::readConfigHook(const KConfigGroup& config_) {
0074   Q_UNUSED(config_);
0075 }
0076 
0077 void AnimeNfoFetcher::search() {
0078   m_started = true;
0079   m_matches.clear();
0080 
0081   QUrl u(QString::fromLatin1(ANIMENFO_BASE_URL));
0082   QUrlQuery q;
0083   q.addQueryItem(QStringLiteral("action"),   QStringLiteral("Go"));
0084   q.addQueryItem(QStringLiteral("option"),   QStringLiteral("keywords"));
0085 
0086   switch(request().collectionType()) {
0087     case Data::Collection::Book:
0088       q.addQueryItem(QStringLiteral("queryin"),  QStringLiteral("manga_titles"));
0089       break;
0090 
0091     case Data::Collection::Video:
0092       q.addQueryItem(QStringLiteral("queryin"),  QStringLiteral("anime_titles"));
0093       break;
0094 
0095     default:
0096       myWarning() << "collection type not valid:" << request().collectionType();
0097       stop();
0098       return;
0099   }
0100 
0101   switch(request().key()) {
0102     case Keyword:
0103       q.addQueryItem(QStringLiteral("query"), request().value());
0104       break;
0105 
0106     default:
0107       myWarning() << "key not recognized: " << request().key();
0108       stop();
0109       return;
0110   }
0111   u.setQuery(q);
0112 //  myDebug() << "url:" << u;
0113 
0114   m_job = KIO::storedGet(u, KIO::NoReload, KIO::HideProgressInfo);
0115   KJobWidgets::setWindow(m_job, GUI::Proxy::widget());
0116   connect(m_job.data(), &KJob::result,
0117           this, &AnimeNfoFetcher::slotComplete);
0118 }
0119 
0120 void AnimeNfoFetcher::stop() {
0121   if(!m_started) {
0122     return;
0123   }
0124 
0125   if(m_job) {
0126     m_job->kill();
0127     m_job = nullptr;
0128   }
0129   m_started = false;
0130   emit signalDone(this);
0131 }
0132 
0133 void AnimeNfoFetcher::slotComplete(KJob*) {
0134 //  myDebug();
0135 
0136   if(m_job->error()) {
0137     m_job->uiDelegate()->showErrorMessage();
0138     stop();
0139     return;
0140   }
0141 
0142   const QByteArray data = m_job->data();
0143   if(data.isEmpty()) {
0144     myDebug() << "no data";
0145     stop();
0146     return;
0147   }
0148 
0149   // since the fetch is done, don't worry about holding the job pointer
0150   m_job = nullptr;
0151 
0152   QString s = Tellico::decodeHTML(data);
0153 #if 0
0154   myWarning() << "Remove debug from animenfofetcher.cpp";
0155   QFile f(QLatin1String("/tmp/test.html"));
0156   if(f.open(QIODevice::WriteOnly)) {
0157     QTextStream t(&f);
0158     t.setCodec("UTF-8");
0159     t << s;
0160   }
0161   f.close();
0162 #endif
0163 
0164   QRegExp infoRx(QLatin1String("<td\\s+[^>]*class\\s*=\\s*[\"']anime_info[\"'][^>]*>(.*)</td>"), Qt::CaseInsensitive);
0165   infoRx.setMinimal(true);
0166   QRegExp anchorRx(QLatin1String("<a\\s+[^>]*href\\s*=\\s*[\"'](.*)[\"'][^>]*>(.*)</a>"), Qt::CaseInsensitive);
0167   anchorRx.setMinimal(true);
0168   QRegExp yearRx(QLatin1String("\\d{4}"));
0169 
0170   // search page comes in groups of threes
0171   int n = 0;
0172   QString u, t, y;
0173 
0174   for(int pos = infoRx.indexIn(s); m_started && pos > -1; pos = infoRx.indexIn(s, pos+1)) {
0175     if(n == 0 && !u.isEmpty()) {
0176       FetchResult* r = new FetchResult(this, t, y);
0177       QUrl url = QUrl(QString::fromLatin1(ANIMENFO_BASE_URL)).resolved(QUrl(u));
0178       url.setQuery(QString());
0179       m_matches.insert(r->uid, url);
0180       // don't emit signal until after putting url in matches hash
0181       emit signalResultFound(r);
0182 
0183       u.clear();
0184       t.clear();
0185       y.clear();
0186     }
0187     switch(n) {
0188       case 0: // title and url
0189         {
0190           int pos2 = anchorRx.indexIn(infoRx.cap(1));
0191           if(pos2 > -1) {
0192             u = anchorRx.cap(1);
0193             t = anchorRx.cap(2);
0194           }
0195         }
0196         break;
0197       case 1: // don't case
0198         break;
0199       case 2:
0200         if(yearRx.exactMatch(infoRx.cap(1))) {
0201           y = infoRx.cap(1);
0202         }
0203         break;
0204     }
0205 
0206     n = (n+1)%3;
0207   }
0208 
0209   // grab last response
0210   if(!u.isEmpty()) {
0211     FetchResult* r = new FetchResult(this, t, y, QString());
0212     QUrl url = QUrl(QString::fromLatin1(ANIMENFO_BASE_URL)).resolved(QUrl(u));
0213     url.setQuery(QString());
0214     m_matches.insert(r->uid, url);
0215     // don't emit signal until after putting url in matches hash
0216     emit signalResultFound(r);
0217   }
0218 
0219   stop();
0220 }
0221 
0222 Tellico::Data::EntryPtr AnimeNfoFetcher::fetchEntryHook(uint uid_) {
0223   // if we already grabbed this one, then just pull it out of the dict
0224   Data::EntryPtr entry = m_entries[uid_];
0225   if(entry) {
0226     return entry;
0227   }
0228 
0229   QUrl url = m_matches[uid_];
0230   if(url.isEmpty()) {
0231     myWarning() << "no url in map";
0232     return Data::EntryPtr();
0233   }
0234 
0235   QString results = Tellico::decodeHTML(FileHandler::readTextFile(url, true, true));
0236   if(results.isEmpty()) {
0237     myDebug() << "no text results";
0238     return Data::EntryPtr();
0239   }
0240 
0241 #if 0
0242   myWarning() << "Remove debug from animenfofetcher.cpp";
0243   QFile f(QLatin1String("/tmp/test.html"));
0244   if(f.open(QIODevice::WriteOnly)) {
0245     QTextStream t(&f);
0246     t.setCodec("UTF-8");
0247     t << results;
0248   }
0249   f.close();
0250 #endif
0251 
0252   entry = parseEntry(results, url);
0253   if(!entry) {
0254     myDebug() << "error in processing entry";
0255     return Data::EntryPtr();
0256   }
0257   m_entries.insert(uid_, entry); // keep for later
0258   return entry;
0259 }
0260 
0261 Tellico::Data::EntryPtr AnimeNfoFetcher::parseEntry(const QString& str_, const QUrl& url_) {
0262  // myDebug();
0263  // class might be anime_info_top
0264   QRegExp infoRx(QLatin1String("<td\\s+[^>]*class\\s*=\\s*[\"']anime_info[^>]*>(.*)</td>"), Qt::CaseInsensitive);
0265   infoRx.setMinimal(true);
0266   QRegExp tagRx(QLatin1String("<.*>"));
0267   tagRx.setMinimal(true);
0268   QRegExp anchorRx(QLatin1String("<a\\s+[^>]*href\\s*=\\s*[\"'](.*)[\"'][^>]*>(.*)</a>"), Qt::CaseInsensitive);
0269   anchorRx.setMinimal(true);
0270   QRegExp jsRx(QLatin1String("<script.*</script>"), Qt::CaseInsensitive);
0271   jsRx.setMinimal(true);
0272 
0273   QString s = str_;
0274   s.remove(jsRx);
0275 
0276   Data::CollPtr coll;
0277   switch(request().collectionType()) {
0278     case Data::Collection::Book:
0279     case Data::Collection::Bibtex:
0280       coll = Data::CollPtr(new Data::BookCollection(true));
0281       break;
0282 
0283     case Data::Collection::Video:
0284       coll = Data::CollPtr(new Data::VideoCollection(true));
0285       break;
0286 
0287     default:
0288       return Data::EntryPtr();
0289   }
0290 
0291   // add new fields
0292   Data::FieldPtr f(new Data::Field(QStringLiteral("origtitle"), i18n("Original Title")));
0293   coll->addField(f);
0294 
0295   f = new Data::Field(QStringLiteral("alttitle"), i18n("Alternative Titles"), Data::Field::Table);
0296   f->setFormatType(FieldFormat::FormatTitle);
0297   coll->addField(f);
0298 
0299   f = new Data::Field(QStringLiteral("distributor"), i18n("Distributor"));
0300   f->setCategory(i18n("Other People"));
0301   f->setFlags(Data::Field::AllowCompletion | Data::Field::AllowMultiple | Data::Field::AllowGrouped);
0302   f->setFormatType(FieldFormat::FormatPlain);
0303   coll->addField(f);
0304 
0305   f = new Data::Field(QStringLiteral("episodes"), i18n("Episodes"), Data::Field::Number);
0306   f->setCategory(i18n("Features"));
0307   coll->addField(f);
0308 
0309   f = new Data::Field(QStringLiteral("animenfo"), i18n("AnimeNfo Link"), Data::Field::URL);
0310   f->setCategory(i18n("General"));
0311   coll->addField(f);
0312 
0313   f = new Data::Field(QStringLiteral("animenfo-rating"), i18n("AnimeNfo Rating"), Data::Field::Rating);
0314   f->setCategory(i18n("General"));
0315   f->setProperty(QStringLiteral("maximum"), QStringLiteral("10"));
0316   coll->addField(f);
0317 
0318  // map captions in HTML to field names
0319   QHash<QString, QString> fieldMap;
0320   fieldMap.insert(QStringLiteral("Title"), QStringLiteral("title"));
0321   fieldMap.insert(QStringLiteral("Japanese Title"), QStringLiteral("origtitle"));
0322   fieldMap.insert(QStringLiteral("Total Episodes"), QStringLiteral("episodes"));
0323   fieldMap.insert(QStringLiteral("Category"), QStringLiteral("keyword"));
0324   fieldMap.insert(QStringLiteral("Genres"), QStringLiteral("genre"));
0325   fieldMap.insert(QStringLiteral("Genre"), QStringLiteral("genre"));
0326   fieldMap.insert(QStringLiteral("Studio"), QStringLiteral("studio"));
0327   fieldMap.insert(QStringLiteral("US Distribution"), QStringLiteral("distributor"));
0328   fieldMap.insert(QStringLiteral("Author"), QStringLiteral("author"));
0329   fieldMap.insert(QStringLiteral("Publisher"), QStringLiteral("publisher"));
0330   fieldMap.insert(QStringLiteral("Director"), QStringLiteral("director"));
0331   fieldMap.insert(QStringLiteral("Script"), QStringLiteral("writer"));
0332   fieldMap.insert(QStringLiteral("Music"), QStringLiteral("composer"));
0333   fieldMap.insert(QStringLiteral("User Rating"), QStringLiteral("animenfo-rating"));
0334 
0335   switch(request().collectionType()) {
0336     case Data::Collection::Book:
0337     case Data::Collection::Bibtex:
0338       fieldMap.insert(QStringLiteral("Year Published"), QStringLiteral("pub_year"));
0339       break;
0340     case Data::Collection::Video:
0341       fieldMap.insert(QStringLiteral("Year Published"), QStringLiteral("year"));
0342       break;
0343     default:
0344       break;
0345   }
0346 
0347   Data::EntryPtr entry(new Data::Entry(coll));
0348 
0349   QString fullTitle;
0350 
0351   int n = 0;
0352   QString key, value;
0353   for(int pos = infoRx.indexIn(s); pos > -1; pos = infoRx.indexIn(s, pos+1)) {
0354     if(n == 0 && !key.isEmpty()) {
0355       if(fieldMap.contains(key)) {
0356         value = value.simplified();
0357         if(value.endsWith(QLatin1Char(';'))) {
0358           value.chop(1);
0359         }
0360         if(!value.isEmpty() && value != QLatin1String("-")) {
0361           const QString fieldName = fieldMap.value(key);
0362           if(key == QLatin1String("Title")) {
0363             // strip possible trailing year, etc.
0364             fullTitle = value;
0365             value.remove(QRegExp(QLatin1String("\\s*\\([^)]*\\)$")));
0366             entry->setField(fieldName, value);
0367           } else if(key == QLatin1String("Total Episodes")) {
0368             // strip possible trailing text
0369             value.remove(QRegExp(QLatin1String("[\\D].*$")));
0370             entry->setField(fieldName, value);
0371           } else if(key == QLatin1String("User Rating")) {
0372             QRegExp rating(QLatin1String("^(.*)/10"));
0373             if(rating.indexIn(value) > -1) {
0374               const double d = rating.cap(1).toDouble();
0375               entry->setField(fieldName, QString::number(static_cast<int>(d+0.5)));
0376             }
0377           } else if(key == QLatin1String("Year Published")) {
0378             // strip possible trailing text
0379             value.remove(QRegExp(QLatin1String("[\\D;].*$")));
0380             entry->setField(fieldName, value);
0381           } else {
0382             entry->setField(fieldName, value);
0383           }
0384           if(fieldName == QLatin1String("studio") ||
0385              fieldName == QLatin1String("genre") ||
0386              fieldName == QLatin1String("script") ||
0387              fieldName == QLatin1String("distributor") ||
0388              fieldName == QLatin1String("director") ||
0389              fieldName == QLatin1String("writer") ||
0390              fieldName == QLatin1String("author") ||
0391              fieldName == QLatin1String("publisher") ||
0392              fieldName == QLatin1String("composer")) {
0393             QStringList values = entry->field(fieldName).split(QRegExp(QLatin1String("\\s*,\\s*")));
0394             entry->setField(fieldName, values.join(FieldFormat::delimiterString()));
0395           }
0396         }
0397       }
0398       key.clear();
0399       value.clear();
0400     }
0401     switch(n) {
0402       case 0:
0403         key = infoRx.cap(1).remove(tagRx);
0404         break;
0405       case 1:
0406         value = infoRx.cap(1).replace(QLatin1String("<br />"), QLatin1String("; ")).remove(tagRx);
0407         break;
0408     }
0409     n = (n+1)%2;
0410   }
0411   entry->setField(QStringLiteral("animenfo"), url_.url());
0412 
0413   // image
0414   QRegExp imgRx(QStringLiteral("<img\\s+[^>]*src\\s*=\\s*[\"']([^>]*)[\"']\\s+[^>]*alt\\s*=\\s*[\"']%1[\"']")
0415                                     .arg(QRegExp::escape(fullTitle)), Qt::CaseInsensitive);
0416   imgRx.setMinimal(true);
0417   int pos = imgRx.indexIn(s);
0418   if(pos > -1) {
0419     QUrl imgURL = QUrl(QLatin1String(ANIMENFO_BASE_URL)).resolved(QUrl(imgRx.cap(1)));
0420     QString id = ImageFactory::addImage(imgURL, true);
0421     if(!id.isEmpty()) {
0422       entry->setField(QStringLiteral("cover"), id);
0423     } else {
0424       myDebug() << "bad cover" << imgURL.url();
0425     }
0426   }
0427 
0428   // now look for alternative titles and plot
0429   const QString a = QStringLiteral("Alternative titles");
0430   pos = s.indexOf(a, 0, Qt::CaseInsensitive);
0431   if(pos > -1) {
0432     pos += a.length();
0433     int pos2 = s.indexOf(QLatin1String("<td class=\"anime_cat_left"), pos+1);
0434     if(pos2 > -1) {
0435       value = s.mid(pos, pos2-pos).simplified();
0436       value.replace(QLatin1String("<br />"), FieldFormat::rowDelimiterString());
0437       value = value.remove(tagRx).trimmed();
0438       entry->setField(QStringLiteral("alttitle"), value);
0439     }
0440   }
0441 
0442   pos = s.indexOf(QLatin1String("Description"), pos > -1 ? pos : 0);
0443   if(pos > -1) {
0444     QRegExp descRx(QLatin1String("<td\\s[^>]*class\\s*=\\s*[\"']description[\"'].*>(.*)</td"), Qt::CaseInsensitive);
0445     descRx.setMinimal(true);
0446     pos = descRx.indexIn(s, pos+1);
0447     if(pos > -1) {
0448       entry->setField(QStringLiteral("plot"), descRx.cap(1).remove(tagRx).simplified());
0449     }
0450   }
0451 
0452   pos = s.indexOf(QLatin1String("Voice Talent"));
0453   if(pos > -1) {
0454     QRegExp charRx(QLatin1String("<a href=['\"]/anime/character/display.php.*>(.*)</a>"), Qt::CaseInsensitive);
0455     charRx.setMinimal(true);
0456     QRegExp voiceRx(QLatin1String("<a href=['\"]animeseiyuu.*>(.*)</a>"), Qt::CaseInsensitive);
0457     voiceRx.setMinimal(true);
0458     QStringList castLines;
0459     for(pos = s.indexOf(charRx, pos); pos > -1; pos = s.indexOf(charRx, pos+1)) {
0460       if(voiceRx.indexIn(s, pos) > -1) {
0461         castLines << voiceRx.cap(1) + FieldFormat::columnDelimiterString() + charRx.cap(1);
0462       }
0463     }
0464     entry->setField(QStringLiteral("cast"), castLines.join(FieldFormat::rowDelimiterString()));
0465   }
0466 
0467   return entry;
0468 }
0469 
0470 Tellico::Fetch::FetchRequest AnimeNfoFetcher::updateRequest(Data::EntryPtr entry_) {
0471   QString t = entry_->field(QStringLiteral("title"));
0472   if(!t.isEmpty()) {
0473     return FetchRequest(Fetch::Keyword, t);
0474   }
0475   return FetchRequest();
0476 }
0477 
0478 Tellico::Fetch::ConfigWidget* AnimeNfoFetcher::configWidget(QWidget* parent_) const {
0479   return new AnimeNfoFetcher::ConfigWidget(parent_, this);
0480 }
0481 
0482 QString AnimeNfoFetcher::defaultName() {
0483   return QStringLiteral("AnimeNfo.com");
0484 }
0485 
0486 QString AnimeNfoFetcher::defaultIcon() {
0487   return favIcon("http://animenfo.com");
0488 }
0489 
0490 //static
0491 Tellico::StringHash AnimeNfoFetcher::allOptionalFields() {
0492   StringHash hash;
0493   hash[QStringLiteral("distributor")]     = i18n("Distributor");
0494   hash[QStringLiteral("episodes")]        = i18n("Episodes");
0495   hash[QStringLiteral("origtitle")]       = i18n("Original Title");
0496   hash[QStringLiteral("alttitle")]        = i18n("Alternative Titles");
0497   hash[QStringLiteral("animenfo-rating")] = i18n("AnimeNfo Rating");
0498   hash[QStringLiteral("animenfo")]        = i18n("AnimeNfo Link");
0499   return hash;
0500 }
0501 
0502 AnimeNfoFetcher::ConfigWidget::ConfigWidget(QWidget* parent_, const AnimeNfoFetcher* fetcher_)
0503     : Fetch::ConfigWidget(parent_) {
0504   QVBoxLayout* l = new QVBoxLayout(optionsWidget());
0505   l->addWidget(new QLabel(i18n("This source has no options."), optionsWidget()));
0506   l->addStretch();
0507 
0508   // now add additional fields widget
0509   addFieldsWidget(AnimeNfoFetcher::allOptionalFields(), fetcher_ ? fetcher_->optionalFields() : QStringList());
0510 }
0511 
0512 QString AnimeNfoFetcher::ConfigWidget::preferredName() const {
0513   return AnimeNfoFetcher::defaultName();
0514 }