File indexing completed on 2024-05-12 05:09:40

0001 /***************************************************************************
0002     Copyright (C) 2023 Robby Stephenson <robby@periapsis.org>
0003  ***************************************************************************/
0004 
0005 /***************************************************************************
0006  *                                                                         *
0007  *   This program is free software; you can redistribute it and/or         *
0008  *   modify it under the terms of the GNU General Public License as        *
0009  *   published by the Free Software Foundation; either version 2 of        *
0010  *   the License or (at your option) version 3 or any later version        *
0011  *   accepted by the membership of KDE e.V. (or its successor approved     *
0012  *   by the membership of KDE e.V.), which shall act as a proxy            *
0013  *   defined in Section 14 of version 3 of the license.                    *
0014  *                                                                         *
0015  *   This program is distributed in the hope that it will be useful,       *
0016  *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
0017  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
0018  *   GNU General Public License for more details.                          *
0019  *                                                                         *
0020  *   You should have received a copy of the GNU General Public License     *
0021  *   along with this program.  If not, see <http://www.gnu.org/licenses/>. *
0022  *                                                                         *
0023  ***************************************************************************/
0024 
0025 #include "opdsfetcher.h"
0026 #include "../fieldformat.h"
0027 #include "../collection.h"
0028 #include "../translators/xslthandler.h"
0029 #include "../translators/tellicoimporter.h"
0030 #include "../core/filehandler.h"
0031 #include "../utils/datafileregistry.h"
0032 #include "../utils/guiproxy.h"
0033 #include "../utils/isbnvalidator.h"
0034 #include "../translators/tellico_xml.h"
0035 #include "../tellico_debug.h"
0036 
0037 #include <KLocalizedString>
0038 #include <KIO/Job>
0039 #include <KJobUiDelegate>
0040 #include <KJobWidgets/KJobWidgets>
0041 #include <KAcceleratorManager>
0042 #include <KUrlRequester>
0043 
0044 #include <QLabel>
0045 #include <QGridLayout>
0046 #include <QXmlStreamReader>
0047 #include <QPushButton>
0048 
0049 using namespace Tellico;
0050 using Tellico::Fetch::OPDSFetcher;
0051 
0052 OPDSFetcher::Reader::Reader(const QUrl& catalog_) : catalog(catalog_), isAcquisition(false) {
0053 }
0054 
0055 // read the catalog file and return the search description url
0056 bool OPDSFetcher::Reader::parse() {
0057   opdsText = FileHandler::readDataFile(catalog);
0058   QXmlStreamReader xml(opdsText);
0059   int depth = 0;
0060   while(xml.readNext() != QXmlStreamReader::Invalid) {
0061     switch(xml.tokenType()) {
0062       case QXmlStreamReader::StartElement:
0063         ++depth;
0064         if(depth == 2 && xml.namespaceUri() == Tellico::XML::nsAtom) {
0065           if(xml.name() == QLatin1String("link")) {
0066             auto attributes = xml.attributes();
0067             if(attributes.value(QStringLiteral("rel")) == QLatin1String("search")) {
0068               // found the search url
0069               const auto href = QUrl(attributes.value(QStringLiteral("href")).toString());
0070               searchUrl = catalog.resolved(href);
0071               myLog() << "Search url is" << searchUrl.toDisplayString();
0072             } else if(attributes.value(QStringLiteral("rel")) == QLatin1String("self")) {
0073               // for now, consider the feed an acquisition feed if the self link is labeled as an acquisition feed
0074               isAcquisition = attributes.value(QStringLiteral("type")).contains(QLatin1String("kind=acquisition"));
0075               myLog() << "Catalog kind is 'acquisition'";
0076             }
0077           }
0078         }
0079         break;
0080       case QXmlStreamReader::EndElement:
0081         --depth;
0082         break;
0083       default:
0084         break;
0085     }
0086   }
0087   // valid catalog either has a search url or is an acquisition feed
0088   return !searchUrl.isEmpty() || isAcquisition;
0089 }
0090 
0091 bool OPDSFetcher::Reader::readSearchTemplate() {
0092   myLog() << "Reading catalog:" << catalog.toDisplayString();
0093   if(searchUrl.isEmpty() && !isAcquisition && !parse()) return false;
0094   if(searchUrl.isEmpty()) return false;
0095   //    myDebug() << "Reading search description:" << searchDescriptionUrl;
0096   // read the search description and find the search template
0097   const QByteArray descText = FileHandler::readDataFile(searchUrl);
0098   QXmlStreamReader xml(descText);
0099   int depth = 0;
0100   QString text, shortName, longName;
0101   while(xml.readNext() != QXmlStreamReader::Invalid) {
0102     switch(xml.tokenType()) {
0103       case QXmlStreamReader::StartElement:
0104         ++depth;
0105         if(depth == 2 && xml.name() == QLatin1String("Url") &&
0106                          xml.namespaceUri() == XML::nsOpenSearch) {
0107           auto attributes = xml.attributes();
0108           if(attributes.value(QLatin1String("type")) == QLatin1String("application/atom+xml")) {
0109             searchTemplate = attributes.value(QStringLiteral("template")).toString();
0110           }
0111         }
0112         break;
0113       case QXmlStreamReader::EndElement:
0114         if(depth == 2) {
0115           if(xml.name() == QLatin1String("LongName")) {
0116             longName = text.simplified();
0117           } else if(xml.name() == QLatin1String("ShortName")) {
0118             shortName = text.simplified();
0119           } else if(xml.name() == QLatin1String("Image")) {
0120             icon = text.simplified();
0121           } else if(xml.name() == QLatin1String("Attribution")) {
0122             attribution = text.simplified();
0123           }
0124         }
0125         --depth;
0126         text.clear();
0127         break;
0128       case QXmlStreamReader::Characters:
0129         text += xml.text();
0130         break;
0131       default:
0132         break;
0133     }
0134   }
0135   name = longName.isEmpty() ? shortName : longName;
0136   myLog() << "Search template is" << searchTemplate;
0137   return !searchTemplate.isEmpty();
0138 }
0139 
0140 OPDSFetcher::OPDSFetcher(QObject* parent_)
0141     : Fetcher(parent_), m_xsltHandler(nullptr), m_started(false) {
0142 }
0143 
0144 OPDSFetcher::~OPDSFetcher() {
0145   delete m_xsltHandler;
0146   m_xsltHandler = nullptr;
0147 }
0148 
0149 QString OPDSFetcher::source() const {
0150   return m_name.isEmpty() ? defaultName() : m_name;
0151 }
0152 
0153 QString OPDSFetcher::attribution() const {
0154   return m_attribution;
0155 }
0156 
0157 QString OPDSFetcher::icon() const {
0158   return favIcon(QUrl(m_icon));
0159 }
0160 
0161 bool OPDSFetcher::canSearch(Fetch::FetchKey k) const {
0162   return k == Title || k == Keyword || k == ISBN;
0163 }
0164 
0165 bool OPDSFetcher::canFetch(int type) const {
0166   return type == Data::Collection::Book || type == Data::Collection::Bibtex;
0167 }
0168 
0169 void OPDSFetcher::readConfigHook(const KConfigGroup& config_) {
0170   m_catalog = config_.readEntry("Catalog");
0171   m_searchTemplate = config_.readEntry("SearchTemplate");
0172   m_icon = config_.readEntry("Icon");
0173   m_attribution = config_.readEntry("Attribution");
0174 }
0175 
0176 void OPDSFetcher::saveConfigHook(KConfigGroup& config_) {
0177   if(!m_searchTemplate.isEmpty()) {
0178     config_.writeEntry("SearchTemplate", m_searchTemplate);
0179   }
0180   if(!m_icon.isEmpty()) {
0181     config_.writeEntry("Icon", m_icon);
0182   }
0183   if(!m_attribution.isEmpty()) {
0184     config_.writeEntry("Attribution", m_attribution);
0185   }
0186 }
0187 
0188 void OPDSFetcher::search() {
0189   m_started = true;
0190   if(m_catalog.isEmpty()) {
0191     myDebug() << source() << "- url is not set";
0192     stop();
0193     return;
0194   }
0195 
0196   Reader reader(QUrl::fromUserInput(m_catalog));
0197   if(m_searchTemplate.isEmpty()) {
0198     if(!reader.parse()) {
0199       myDebug() << source() << "- failed to parse";
0200       message(i18n("Tellico is unable to read the search description in the OPDS catalog."), MessageHandler::Error);
0201       stop();
0202       return;
0203     }
0204     if(reader.isAcquisition) {
0205       parseData(reader.opdsText, true /* manualSearch */);
0206       return;
0207     }
0208     if(!reader.readSearchTemplate()) {
0209       myDebug() << source() << "- no search template";
0210       message(i18n("Tellico is unable to read the search description in the OPDS catalog."), MessageHandler::Error);
0211       stop();
0212       return;
0213     }
0214   }
0215   // continue with search
0216   if(m_searchTemplate.isEmpty()) {
0217     m_searchTemplate = reader.searchTemplate;
0218     m_icon = reader.icon;
0219     m_attribution = reader.attribution;
0220   }
0221 
0222   QString searchTerm;
0223   switch(request().key()) {
0224     case Title:
0225     case Keyword:
0226       searchTerm = request().value();
0227       break;
0228 
0229     case ISBN:
0230       {
0231         QString isbn = request().value().section(QLatin1Char(';'), 0);
0232         isbn.remove(QLatin1Char('-'));
0233         searchTerm = isbn;
0234       }
0235       break;
0236 
0237     default:
0238       myWarning() << "key not recognized: " << request().key();
0239       stop();
0240       break;
0241   }
0242 
0243   QString searchUrl = m_searchTemplate;
0244   searchUrl.replace(QStringLiteral("{searchTerms}"), searchTerm);
0245   QUrl u(searchUrl);
0246   myLog() << "Searching" << u.toDisplayString();
0247 
0248   m_job = KIO::storedGet(u, KIO::NoReload, KIO::HideProgressInfo);
0249   KJobWidgets::setWindow(m_job, GUI::Proxy::widget());
0250   connect(m_job.data(), &KJob::result,
0251           this, &OPDSFetcher::slotComplete);
0252 }
0253 
0254 void OPDSFetcher::stop() {
0255   if(!m_started) {
0256     return;
0257   }
0258   if(m_job) {
0259     m_job->kill();
0260     m_job = nullptr;
0261   }
0262 
0263   m_started = false;
0264   emit signalDone(this);
0265 }
0266 
0267 void OPDSFetcher::slotComplete(KJob*) {
0268   if(m_job->error()) {
0269     m_job->uiDelegate()->showErrorMessage();
0270     stop();
0271     return;
0272   }
0273 
0274   QByteArray data = m_job->data();
0275   if(data.isEmpty()) {
0276     stop();
0277     return;
0278   }
0279   // see bug 319662. If fetcher is cancelled, job is killed
0280   // if the pointer is retained, it gets double-deleted
0281   m_job = nullptr;
0282   parseData(data);
0283 }
0284 
0285 void OPDSFetcher::parseData(const QByteArray& data_, bool manualSearch_) {
0286 #if 0
0287   myWarning() << "Remove debug from opdsfetcher.cpp";
0288   QFile f(QString::fromLatin1("/tmp/test.xml"));
0289   if(f.open(QIODevice::WriteOnly)) {
0290     QTextStream t(&f);
0291     t.setCodec("UTF-8");
0292     t << data_;
0293   }
0294   f.close();
0295 #endif
0296 
0297   if(!m_xsltHandler) {
0298     initXSLTHandler();
0299     if(!m_xsltHandler) { // probably an error somewhere in the stylesheet loading
0300       stop();
0301       return;
0302     }
0303   }
0304 
0305   // assume result is always utf-8
0306   QString str = m_xsltHandler->applyStylesheet(QString::fromUtf8(data_.constData(), data_.size()));
0307   Import::TellicoImporter imp(str);
0308   imp.setBaseUrl(QUrl(m_searchTemplate.isEmpty() ? m_catalog : m_searchTemplate));
0309   Data::CollPtr coll = imp.collection();
0310 
0311   if(!coll) {
0312     myDebug() << source() << " - no collection pointer";
0313     stop();
0314     return;
0315   }
0316 
0317   foreach(Data::EntryPtr entry, coll->entries()) {
0318     // if manual search, do poor man's comparison
0319     if(manualSearch_ && !matchesEntry(entry)) continue;
0320     FetchResult* r = new FetchResult(this, entry);
0321     m_entries.insert(r->uid, entry);
0322     emit signalResultFound(r);
0323   }
0324   stop();
0325 }
0326 
0327 bool OPDSFetcher::matchesEntry(Data::EntryPtr entry_) const {
0328   switch(request().key()) {
0329     case Title:
0330       return entry_->title().contains(request().value(), Qt::CaseInsensitive);
0331     case ISBN:
0332       {
0333         ISBNComparison comp;
0334         return comp(entry_->field(QStringLiteral("isbn")), request().value());
0335       }
0336     case Keyword:
0337       return entry_->title().contains(request().value(), Qt::CaseInsensitive) ||
0338         entry_->field(QStringLiteral("author")).contains(request().value(), Qt::CaseInsensitive) ||
0339         entry_->field(QStringLiteral("keyword")).contains(request().value(), Qt::CaseInsensitive) ||
0340         entry_->field(QStringLiteral("publisher")).contains(request().value(), Qt::CaseInsensitive) ||
0341         entry_->field(QStringLiteral("genre")).contains(request().value(), Qt::CaseInsensitive) ||
0342         entry_->field(QStringLiteral("pub_year")).contains(request().value(), Qt::CaseInsensitive) ||
0343         entry_->field(QStringLiteral("plot")).contains(request().value(), Qt::CaseInsensitive);
0344     default:
0345       break;
0346   }
0347   return false;
0348 }
0349 
0350 Tellico::Data::EntryPtr OPDSFetcher::fetchEntryHook(uint uid_) {
0351   auto entry = m_entries[uid_];
0352   if(!entry) return entry;
0353   // check whether the summary shows content from Calibre server and try to compensate
0354   QString plot = entry->field(QStringLiteral("plot"));
0355   static const QByteArray xhtml("<div xmlns=\"http://www.w3.org/1999/xhtml\">");
0356   if(plot.startsWith(QLatin1String(xhtml))) {
0357     plot = plot.mid(xhtml.length());
0358     myLog() << "Detected Calibre-style plot format";
0359     myLog() << "Removing XHTML div";
0360     static const QByteArray divEnd("</div>");
0361     if(plot.endsWith(QLatin1String(divEnd))) {
0362       plot.chop(divEnd.length());
0363     }
0364     static const QRegularExpression ratingRx(QStringLiteral("RATING: (★+)<br/>"));
0365     auto ratingMatch = ratingRx.match(plot);
0366     if(ratingMatch.hasMatch()) {
0367       // length of star match is the rating number
0368       entry->setField(QStringLiteral("rating"), QString::number(ratingMatch.captured(1).length()));
0369       plot.remove(ratingMatch.captured());
0370     }
0371     static const QRegularExpression tagsRX(QStringLiteral("TAGS: (.+?)<br/>"));
0372     auto tagsMatch = tagsRX.match(plot);
0373     if(tagsMatch.hasMatch()) {
0374       entry->setField(QStringLiteral("genre"),
0375                       FieldFormat::splitValue(tagsMatch.captured(1), FieldFormat::CommaRegExpSplit)
0376                                   .join(FieldFormat::delimiterString()));
0377       plot.remove(tagsMatch.captured());
0378     }
0379     static const QRegularExpression seriesRx(QStringLiteral("SERIES: (.+?) \\[(\\d+)\\]<br/>"));
0380     auto seriesMatch = seriesRx.match(plot);
0381     if(seriesMatch.hasMatch()) {
0382       entry->setField(QStringLiteral("series"), seriesMatch.captured(1));
0383       entry->setField(QStringLiteral("series_num"), seriesMatch.captured(2));
0384       plot.remove(seriesMatch.captured());
0385     }
0386     plot.remove(QLatin1String("SUMMARY:<br/>"));
0387     plot = plot.simplified();
0388     if(plot.startsWith(QLatin1String("<p class=\"description\">"))) {
0389       plot = plot.mid(23);
0390       if(plot.endsWith(QLatin1String("</p>"))) {
0391         plot.chop(4);
0392       }
0393     }
0394     entry->setField(QStringLiteral("plot"), plot);
0395   }
0396   return entry;
0397 }
0398 
0399 void OPDSFetcher::initXSLTHandler() {
0400   QString xsltfile = DataFileRegistry::self()->locate(QStringLiteral("atom2tellico.xsl"));
0401   if(xsltfile.isEmpty()) {
0402     myWarning() << "can not locate atom2tellico.xsl.";
0403     return;
0404   }
0405 
0406   QUrl u = QUrl::fromLocalFile(xsltfile);
0407 
0408   delete m_xsltHandler;
0409   m_xsltHandler = new XSLTHandler(u);
0410   if(!m_xsltHandler->isValid()) {
0411     myWarning() << "error in atom2tellico.xsl.";
0412     delete m_xsltHandler;
0413     m_xsltHandler = nullptr;
0414   }
0415 }
0416 
0417 Tellico::Fetch::FetchRequest OPDSFetcher::updateRequest(Data::EntryPtr entry_) {
0418   QString t = entry_->field(QStringLiteral("title"));
0419   if(!t.isEmpty()) {
0420     return FetchRequest(Fetch::Title, t);
0421   }
0422   return FetchRequest();
0423 }
0424 
0425 QString OPDSFetcher::defaultName() {
0426   return i18n("OPDS Catalog");
0427 }
0428 
0429 QString OPDSFetcher::defaultIcon() {
0430   return QStringLiteral("folder-book");
0431 }
0432 
0433 // static
0434 Tellico::StringHash OPDSFetcher::allOptionalFields() {
0435   StringHash hash;
0436   hash[QStringLiteral("url")] = i18n("URL");
0437   return hash;
0438 }
0439 
0440 Tellico::Fetch::ConfigWidget* OPDSFetcher::configWidget(QWidget* parent_) const {
0441   return new ConfigWidget(parent_, this);
0442 }
0443 
0444 OPDSFetcher::ConfigWidget::ConfigWidget(QWidget* parent_, const OPDSFetcher* fetcher_ /*=0*/)
0445     : Fetch::ConfigWidget(parent_) {
0446   QGridLayout* l = new QGridLayout(optionsWidget());
0447   l->setSpacing(4);
0448   l->setColumnStretch(1, 10);
0449 
0450   int row = -1;
0451   QLabel* label = new QLabel(i18n("Catalog: "), optionsWidget());
0452   l->addWidget(label, ++row, 0);
0453   m_catalogEdit = new KUrlRequester(optionsWidget());
0454   connect(m_catalogEdit, &KUrlRequester::textEdited, this, &ConfigWidget::slotSetModified);
0455   l->addWidget(m_catalogEdit, row, 1);
0456   QString w = i18n("Enter the link to the OPDS server.");
0457   label->setWhatsThis(w);
0458   m_catalogEdit->setWhatsThis(w);
0459   label->setBuddy(m_catalogEdit);
0460 
0461   auto verifyButton = new QPushButton(i18n("&Verify Catalog"), optionsWidget());
0462   connect(verifyButton, &QPushButton::clicked,
0463           this, &ConfigWidget::verifyCatalog);
0464   l->addWidget(verifyButton, ++row, 0);
0465   m_statusLabel = new QLabel(optionsWidget());
0466   l->addWidget(m_statusLabel, row, 1);
0467 
0468   l->setRowStretch(++row, 1);
0469 
0470   // now add additional fields widget
0471   addFieldsWidget(OPDSFetcher::allOptionalFields(), fetcher_ ? fetcher_->optionalFields() : QStringList());
0472 
0473   if(fetcher_) {
0474     m_catalogEdit->setText(fetcher_->m_catalog);
0475     m_searchTemplate = fetcher_->m_searchTemplate;
0476     m_icon = fetcher_->m_icon;
0477     m_attribution = fetcher_->m_attribution;
0478   }
0479   KAcceleratorManager::manage(optionsWidget());
0480 }
0481 
0482 void OPDSFetcher::ConfigWidget::saveConfigHook(KConfigGroup& config_) {
0483   QString s = m_catalogEdit->text().trimmed();
0484   if(!s.isEmpty()) {
0485     config_.writeEntry("Catalog", s);
0486     config_.writeEntry("SearchTemplate", m_searchTemplate);
0487     config_.writeEntry("Icon", m_icon);
0488     config_.writeEntry("Attribution", m_attribution);
0489   }
0490 }
0491 
0492 QString OPDSFetcher::ConfigWidget::preferredName() const {
0493   auto u = m_catalogEdit->url();
0494   return m_name.isEmpty() ? (u.isEmpty() ? OPDSFetcher::defaultName() : u.host()) : m_name;
0495 }
0496 
0497 void OPDSFetcher::ConfigWidget::verifyCatalog() {
0498   OPDSFetcher::Reader reader(m_catalogEdit->url());
0499   const int imgSize = 0.8*m_statusLabel->height();
0500   if(reader.readSearchTemplate()) {
0501     m_statusLabel->setPixmap(QIcon::fromTheme(QStringLiteral("emblem-checked")).pixmap(imgSize, imgSize));
0502     slotSetModified();
0503     if(!reader.name.isEmpty()) {
0504       emit signalName(reader.name);
0505     }
0506     m_name = reader.name;
0507     m_searchTemplate = reader.searchTemplate;
0508     m_icon = reader.icon;
0509     m_attribution = reader.attribution;
0510   } else if(reader.isAcquisition) {
0511     m_statusLabel->setPixmap(QIcon::fromTheme(QStringLiteral("emblem-added")).pixmap(imgSize, imgSize));
0512     m_searchTemplate.clear();
0513   } else {
0514     m_statusLabel->setPixmap(QIcon::fromTheme(QStringLiteral("emblem-error")).pixmap(imgSize, imgSize));
0515     m_searchTemplate.clear();
0516     m_icon.clear();
0517     m_attribution.clear();
0518   }
0519 }