File indexing completed on 2024-05-12 05:09:40
0001 /*************************************************************************** 0002 Copyright (C) 2023 Robby Stephenson <robby@periapsis.org> 0003 ***************************************************************************/ 0004 0005 /*************************************************************************** 0006 * * 0007 * This program is free software; you can redistribute it and/or * 0008 * modify it under the terms of the GNU General Public License as * 0009 * published by the Free Software Foundation; either version 2 of * 0010 * the License or (at your option) version 3 or any later version * 0011 * accepted by the membership of KDE e.V. (or its successor approved * 0012 * by the membership of KDE e.V.), which shall act as a proxy * 0013 * defined in Section 14 of version 3 of the license. * 0014 * * 0015 * This program is distributed in the hope that it will be useful, * 0016 * but WITHOUT ANY WARRANTY; without even the implied warranty of * 0017 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * 0018 * GNU General Public License for more details. * 0019 * * 0020 * You should have received a copy of the GNU General Public License * 0021 * along with this program. If not, see <http://www.gnu.org/licenses/>. * 0022 * * 0023 ***************************************************************************/ 0024 0025 #include "opdsfetcher.h" 0026 #include "../fieldformat.h" 0027 #include "../collection.h" 0028 #include "../translators/xslthandler.h" 0029 #include "../translators/tellicoimporter.h" 0030 #include "../core/filehandler.h" 0031 #include "../utils/datafileregistry.h" 0032 #include "../utils/guiproxy.h" 0033 #include "../utils/isbnvalidator.h" 0034 #include "../translators/tellico_xml.h" 0035 #include "../tellico_debug.h" 0036 0037 #include <KLocalizedString> 0038 #include <KIO/Job> 0039 #include <KJobUiDelegate> 0040 #include <KJobWidgets/KJobWidgets> 0041 #include <KAcceleratorManager> 0042 #include <KUrlRequester> 0043 0044 #include <QLabel> 0045 #include <QGridLayout> 0046 #include <QXmlStreamReader> 0047 #include <QPushButton> 0048 0049 using namespace Tellico; 0050 using Tellico::Fetch::OPDSFetcher; 0051 0052 OPDSFetcher::Reader::Reader(const QUrl& catalog_) : catalog(catalog_), isAcquisition(false) { 0053 } 0054 0055 // read the catalog file and return the search description url 0056 bool OPDSFetcher::Reader::parse() { 0057 opdsText = FileHandler::readDataFile(catalog); 0058 QXmlStreamReader xml(opdsText); 0059 int depth = 0; 0060 while(xml.readNext() != QXmlStreamReader::Invalid) { 0061 switch(xml.tokenType()) { 0062 case QXmlStreamReader::StartElement: 0063 ++depth; 0064 if(depth == 2 && xml.namespaceUri() == Tellico::XML::nsAtom) { 0065 if(xml.name() == QLatin1String("link")) { 0066 auto attributes = xml.attributes(); 0067 if(attributes.value(QStringLiteral("rel")) == QLatin1String("search")) { 0068 // found the search url 0069 const auto href = QUrl(attributes.value(QStringLiteral("href")).toString()); 0070 searchUrl = catalog.resolved(href); 0071 myLog() << "Search url is" << searchUrl.toDisplayString(); 0072 } else if(attributes.value(QStringLiteral("rel")) == QLatin1String("self")) { 0073 // for now, consider the feed an acquisition feed if the self link is labeled as an acquisition feed 0074 isAcquisition = attributes.value(QStringLiteral("type")).contains(QLatin1String("kind=acquisition")); 0075 myLog() << "Catalog kind is 'acquisition'"; 0076 } 0077 } 0078 } 0079 break; 0080 case QXmlStreamReader::EndElement: 0081 --depth; 0082 break; 0083 default: 0084 break; 0085 } 0086 } 0087 // valid catalog either has a search url or is an acquisition feed 0088 return !searchUrl.isEmpty() || isAcquisition; 0089 } 0090 0091 bool OPDSFetcher::Reader::readSearchTemplate() { 0092 myLog() << "Reading catalog:" << catalog.toDisplayString(); 0093 if(searchUrl.isEmpty() && !isAcquisition && !parse()) return false; 0094 if(searchUrl.isEmpty()) return false; 0095 // myDebug() << "Reading search description:" << searchDescriptionUrl; 0096 // read the search description and find the search template 0097 const QByteArray descText = FileHandler::readDataFile(searchUrl); 0098 QXmlStreamReader xml(descText); 0099 int depth = 0; 0100 QString text, shortName, longName; 0101 while(xml.readNext() != QXmlStreamReader::Invalid) { 0102 switch(xml.tokenType()) { 0103 case QXmlStreamReader::StartElement: 0104 ++depth; 0105 if(depth == 2 && xml.name() == QLatin1String("Url") && 0106 xml.namespaceUri() == XML::nsOpenSearch) { 0107 auto attributes = xml.attributes(); 0108 if(attributes.value(QLatin1String("type")) == QLatin1String("application/atom+xml")) { 0109 searchTemplate = attributes.value(QStringLiteral("template")).toString(); 0110 } 0111 } 0112 break; 0113 case QXmlStreamReader::EndElement: 0114 if(depth == 2) { 0115 if(xml.name() == QLatin1String("LongName")) { 0116 longName = text.simplified(); 0117 } else if(xml.name() == QLatin1String("ShortName")) { 0118 shortName = text.simplified(); 0119 } else if(xml.name() == QLatin1String("Image")) { 0120 icon = text.simplified(); 0121 } else if(xml.name() == QLatin1String("Attribution")) { 0122 attribution = text.simplified(); 0123 } 0124 } 0125 --depth; 0126 text.clear(); 0127 break; 0128 case QXmlStreamReader::Characters: 0129 text += xml.text(); 0130 break; 0131 default: 0132 break; 0133 } 0134 } 0135 name = longName.isEmpty() ? shortName : longName; 0136 myLog() << "Search template is" << searchTemplate; 0137 return !searchTemplate.isEmpty(); 0138 } 0139 0140 OPDSFetcher::OPDSFetcher(QObject* parent_) 0141 : Fetcher(parent_), m_xsltHandler(nullptr), m_started(false) { 0142 } 0143 0144 OPDSFetcher::~OPDSFetcher() { 0145 delete m_xsltHandler; 0146 m_xsltHandler = nullptr; 0147 } 0148 0149 QString OPDSFetcher::source() const { 0150 return m_name.isEmpty() ? defaultName() : m_name; 0151 } 0152 0153 QString OPDSFetcher::attribution() const { 0154 return m_attribution; 0155 } 0156 0157 QString OPDSFetcher::icon() const { 0158 return favIcon(QUrl(m_icon)); 0159 } 0160 0161 bool OPDSFetcher::canSearch(Fetch::FetchKey k) const { 0162 return k == Title || k == Keyword || k == ISBN; 0163 } 0164 0165 bool OPDSFetcher::canFetch(int type) const { 0166 return type == Data::Collection::Book || type == Data::Collection::Bibtex; 0167 } 0168 0169 void OPDSFetcher::readConfigHook(const KConfigGroup& config_) { 0170 m_catalog = config_.readEntry("Catalog"); 0171 m_searchTemplate = config_.readEntry("SearchTemplate"); 0172 m_icon = config_.readEntry("Icon"); 0173 m_attribution = config_.readEntry("Attribution"); 0174 } 0175 0176 void OPDSFetcher::saveConfigHook(KConfigGroup& config_) { 0177 if(!m_searchTemplate.isEmpty()) { 0178 config_.writeEntry("SearchTemplate", m_searchTemplate); 0179 } 0180 if(!m_icon.isEmpty()) { 0181 config_.writeEntry("Icon", m_icon); 0182 } 0183 if(!m_attribution.isEmpty()) { 0184 config_.writeEntry("Attribution", m_attribution); 0185 } 0186 } 0187 0188 void OPDSFetcher::search() { 0189 m_started = true; 0190 if(m_catalog.isEmpty()) { 0191 myDebug() << source() << "- url is not set"; 0192 stop(); 0193 return; 0194 } 0195 0196 Reader reader(QUrl::fromUserInput(m_catalog)); 0197 if(m_searchTemplate.isEmpty()) { 0198 if(!reader.parse()) { 0199 myDebug() << source() << "- failed to parse"; 0200 message(i18n("Tellico is unable to read the search description in the OPDS catalog."), MessageHandler::Error); 0201 stop(); 0202 return; 0203 } 0204 if(reader.isAcquisition) { 0205 parseData(reader.opdsText, true /* manualSearch */); 0206 return; 0207 } 0208 if(!reader.readSearchTemplate()) { 0209 myDebug() << source() << "- no search template"; 0210 message(i18n("Tellico is unable to read the search description in the OPDS catalog."), MessageHandler::Error); 0211 stop(); 0212 return; 0213 } 0214 } 0215 // continue with search 0216 if(m_searchTemplate.isEmpty()) { 0217 m_searchTemplate = reader.searchTemplate; 0218 m_icon = reader.icon; 0219 m_attribution = reader.attribution; 0220 } 0221 0222 QString searchTerm; 0223 switch(request().key()) { 0224 case Title: 0225 case Keyword: 0226 searchTerm = request().value(); 0227 break; 0228 0229 case ISBN: 0230 { 0231 QString isbn = request().value().section(QLatin1Char(';'), 0); 0232 isbn.remove(QLatin1Char('-')); 0233 searchTerm = isbn; 0234 } 0235 break; 0236 0237 default: 0238 myWarning() << "key not recognized: " << request().key(); 0239 stop(); 0240 break; 0241 } 0242 0243 QString searchUrl = m_searchTemplate; 0244 searchUrl.replace(QStringLiteral("{searchTerms}"), searchTerm); 0245 QUrl u(searchUrl); 0246 myLog() << "Searching" << u.toDisplayString(); 0247 0248 m_job = KIO::storedGet(u, KIO::NoReload, KIO::HideProgressInfo); 0249 KJobWidgets::setWindow(m_job, GUI::Proxy::widget()); 0250 connect(m_job.data(), &KJob::result, 0251 this, &OPDSFetcher::slotComplete); 0252 } 0253 0254 void OPDSFetcher::stop() { 0255 if(!m_started) { 0256 return; 0257 } 0258 if(m_job) { 0259 m_job->kill(); 0260 m_job = nullptr; 0261 } 0262 0263 m_started = false; 0264 emit signalDone(this); 0265 } 0266 0267 void OPDSFetcher::slotComplete(KJob*) { 0268 if(m_job->error()) { 0269 m_job->uiDelegate()->showErrorMessage(); 0270 stop(); 0271 return; 0272 } 0273 0274 QByteArray data = m_job->data(); 0275 if(data.isEmpty()) { 0276 stop(); 0277 return; 0278 } 0279 // see bug 319662. If fetcher is cancelled, job is killed 0280 // if the pointer is retained, it gets double-deleted 0281 m_job = nullptr; 0282 parseData(data); 0283 } 0284 0285 void OPDSFetcher::parseData(const QByteArray& data_, bool manualSearch_) { 0286 #if 0 0287 myWarning() << "Remove debug from opdsfetcher.cpp"; 0288 QFile f(QString::fromLatin1("/tmp/test.xml")); 0289 if(f.open(QIODevice::WriteOnly)) { 0290 QTextStream t(&f); 0291 t.setCodec("UTF-8"); 0292 t << data_; 0293 } 0294 f.close(); 0295 #endif 0296 0297 if(!m_xsltHandler) { 0298 initXSLTHandler(); 0299 if(!m_xsltHandler) { // probably an error somewhere in the stylesheet loading 0300 stop(); 0301 return; 0302 } 0303 } 0304 0305 // assume result is always utf-8 0306 QString str = m_xsltHandler->applyStylesheet(QString::fromUtf8(data_.constData(), data_.size())); 0307 Import::TellicoImporter imp(str); 0308 imp.setBaseUrl(QUrl(m_searchTemplate.isEmpty() ? m_catalog : m_searchTemplate)); 0309 Data::CollPtr coll = imp.collection(); 0310 0311 if(!coll) { 0312 myDebug() << source() << " - no collection pointer"; 0313 stop(); 0314 return; 0315 } 0316 0317 foreach(Data::EntryPtr entry, coll->entries()) { 0318 // if manual search, do poor man's comparison 0319 if(manualSearch_ && !matchesEntry(entry)) continue; 0320 FetchResult* r = new FetchResult(this, entry); 0321 m_entries.insert(r->uid, entry); 0322 emit signalResultFound(r); 0323 } 0324 stop(); 0325 } 0326 0327 bool OPDSFetcher::matchesEntry(Data::EntryPtr entry_) const { 0328 switch(request().key()) { 0329 case Title: 0330 return entry_->title().contains(request().value(), Qt::CaseInsensitive); 0331 case ISBN: 0332 { 0333 ISBNComparison comp; 0334 return comp(entry_->field(QStringLiteral("isbn")), request().value()); 0335 } 0336 case Keyword: 0337 return entry_->title().contains(request().value(), Qt::CaseInsensitive) || 0338 entry_->field(QStringLiteral("author")).contains(request().value(), Qt::CaseInsensitive) || 0339 entry_->field(QStringLiteral("keyword")).contains(request().value(), Qt::CaseInsensitive) || 0340 entry_->field(QStringLiteral("publisher")).contains(request().value(), Qt::CaseInsensitive) || 0341 entry_->field(QStringLiteral("genre")).contains(request().value(), Qt::CaseInsensitive) || 0342 entry_->field(QStringLiteral("pub_year")).contains(request().value(), Qt::CaseInsensitive) || 0343 entry_->field(QStringLiteral("plot")).contains(request().value(), Qt::CaseInsensitive); 0344 default: 0345 break; 0346 } 0347 return false; 0348 } 0349 0350 Tellico::Data::EntryPtr OPDSFetcher::fetchEntryHook(uint uid_) { 0351 auto entry = m_entries[uid_]; 0352 if(!entry) return entry; 0353 // check whether the summary shows content from Calibre server and try to compensate 0354 QString plot = entry->field(QStringLiteral("plot")); 0355 static const QByteArray xhtml("<div xmlns=\"http://www.w3.org/1999/xhtml\">"); 0356 if(plot.startsWith(QLatin1String(xhtml))) { 0357 plot = plot.mid(xhtml.length()); 0358 myLog() << "Detected Calibre-style plot format"; 0359 myLog() << "Removing XHTML div"; 0360 static const QByteArray divEnd("</div>"); 0361 if(plot.endsWith(QLatin1String(divEnd))) { 0362 plot.chop(divEnd.length()); 0363 } 0364 static const QRegularExpression ratingRx(QStringLiteral("RATING: (★+)<br/>")); 0365 auto ratingMatch = ratingRx.match(plot); 0366 if(ratingMatch.hasMatch()) { 0367 // length of star match is the rating number 0368 entry->setField(QStringLiteral("rating"), QString::number(ratingMatch.captured(1).length())); 0369 plot.remove(ratingMatch.captured()); 0370 } 0371 static const QRegularExpression tagsRX(QStringLiteral("TAGS: (.+?)<br/>")); 0372 auto tagsMatch = tagsRX.match(plot); 0373 if(tagsMatch.hasMatch()) { 0374 entry->setField(QStringLiteral("genre"), 0375 FieldFormat::splitValue(tagsMatch.captured(1), FieldFormat::CommaRegExpSplit) 0376 .join(FieldFormat::delimiterString())); 0377 plot.remove(tagsMatch.captured()); 0378 } 0379 static const QRegularExpression seriesRx(QStringLiteral("SERIES: (.+?) \\[(\\d+)\\]<br/>")); 0380 auto seriesMatch = seriesRx.match(plot); 0381 if(seriesMatch.hasMatch()) { 0382 entry->setField(QStringLiteral("series"), seriesMatch.captured(1)); 0383 entry->setField(QStringLiteral("series_num"), seriesMatch.captured(2)); 0384 plot.remove(seriesMatch.captured()); 0385 } 0386 plot.remove(QLatin1String("SUMMARY:<br/>")); 0387 plot = plot.simplified(); 0388 if(plot.startsWith(QLatin1String("<p class=\"description\">"))) { 0389 plot = plot.mid(23); 0390 if(plot.endsWith(QLatin1String("</p>"))) { 0391 plot.chop(4); 0392 } 0393 } 0394 entry->setField(QStringLiteral("plot"), plot); 0395 } 0396 return entry; 0397 } 0398 0399 void OPDSFetcher::initXSLTHandler() { 0400 QString xsltfile = DataFileRegistry::self()->locate(QStringLiteral("atom2tellico.xsl")); 0401 if(xsltfile.isEmpty()) { 0402 myWarning() << "can not locate atom2tellico.xsl."; 0403 return; 0404 } 0405 0406 QUrl u = QUrl::fromLocalFile(xsltfile); 0407 0408 delete m_xsltHandler; 0409 m_xsltHandler = new XSLTHandler(u); 0410 if(!m_xsltHandler->isValid()) { 0411 myWarning() << "error in atom2tellico.xsl."; 0412 delete m_xsltHandler; 0413 m_xsltHandler = nullptr; 0414 } 0415 } 0416 0417 Tellico::Fetch::FetchRequest OPDSFetcher::updateRequest(Data::EntryPtr entry_) { 0418 QString t = entry_->field(QStringLiteral("title")); 0419 if(!t.isEmpty()) { 0420 return FetchRequest(Fetch::Title, t); 0421 } 0422 return FetchRequest(); 0423 } 0424 0425 QString OPDSFetcher::defaultName() { 0426 return i18n("OPDS Catalog"); 0427 } 0428 0429 QString OPDSFetcher::defaultIcon() { 0430 return QStringLiteral("folder-book"); 0431 } 0432 0433 // static 0434 Tellico::StringHash OPDSFetcher::allOptionalFields() { 0435 StringHash hash; 0436 hash[QStringLiteral("url")] = i18n("URL"); 0437 return hash; 0438 } 0439 0440 Tellico::Fetch::ConfigWidget* OPDSFetcher::configWidget(QWidget* parent_) const { 0441 return new ConfigWidget(parent_, this); 0442 } 0443 0444 OPDSFetcher::ConfigWidget::ConfigWidget(QWidget* parent_, const OPDSFetcher* fetcher_ /*=0*/) 0445 : Fetch::ConfigWidget(parent_) { 0446 QGridLayout* l = new QGridLayout(optionsWidget()); 0447 l->setSpacing(4); 0448 l->setColumnStretch(1, 10); 0449 0450 int row = -1; 0451 QLabel* label = new QLabel(i18n("Catalog: "), optionsWidget()); 0452 l->addWidget(label, ++row, 0); 0453 m_catalogEdit = new KUrlRequester(optionsWidget()); 0454 connect(m_catalogEdit, &KUrlRequester::textEdited, this, &ConfigWidget::slotSetModified); 0455 l->addWidget(m_catalogEdit, row, 1); 0456 QString w = i18n("Enter the link to the OPDS server."); 0457 label->setWhatsThis(w); 0458 m_catalogEdit->setWhatsThis(w); 0459 label->setBuddy(m_catalogEdit); 0460 0461 auto verifyButton = new QPushButton(i18n("&Verify Catalog"), optionsWidget()); 0462 connect(verifyButton, &QPushButton::clicked, 0463 this, &ConfigWidget::verifyCatalog); 0464 l->addWidget(verifyButton, ++row, 0); 0465 m_statusLabel = new QLabel(optionsWidget()); 0466 l->addWidget(m_statusLabel, row, 1); 0467 0468 l->setRowStretch(++row, 1); 0469 0470 // now add additional fields widget 0471 addFieldsWidget(OPDSFetcher::allOptionalFields(), fetcher_ ? fetcher_->optionalFields() : QStringList()); 0472 0473 if(fetcher_) { 0474 m_catalogEdit->setText(fetcher_->m_catalog); 0475 m_searchTemplate = fetcher_->m_searchTemplate; 0476 m_icon = fetcher_->m_icon; 0477 m_attribution = fetcher_->m_attribution; 0478 } 0479 KAcceleratorManager::manage(optionsWidget()); 0480 } 0481 0482 void OPDSFetcher::ConfigWidget::saveConfigHook(KConfigGroup& config_) { 0483 QString s = m_catalogEdit->text().trimmed(); 0484 if(!s.isEmpty()) { 0485 config_.writeEntry("Catalog", s); 0486 config_.writeEntry("SearchTemplate", m_searchTemplate); 0487 config_.writeEntry("Icon", m_icon); 0488 config_.writeEntry("Attribution", m_attribution); 0489 } 0490 } 0491 0492 QString OPDSFetcher::ConfigWidget::preferredName() const { 0493 auto u = m_catalogEdit->url(); 0494 return m_name.isEmpty() ? (u.isEmpty() ? OPDSFetcher::defaultName() : u.host()) : m_name; 0495 } 0496 0497 void OPDSFetcher::ConfigWidget::verifyCatalog() { 0498 OPDSFetcher::Reader reader(m_catalogEdit->url()); 0499 const int imgSize = 0.8*m_statusLabel->height(); 0500 if(reader.readSearchTemplate()) { 0501 m_statusLabel->setPixmap(QIcon::fromTheme(QStringLiteral("emblem-checked")).pixmap(imgSize, imgSize)); 0502 slotSetModified(); 0503 if(!reader.name.isEmpty()) { 0504 emit signalName(reader.name); 0505 } 0506 m_name = reader.name; 0507 m_searchTemplate = reader.searchTemplate; 0508 m_icon = reader.icon; 0509 m_attribution = reader.attribution; 0510 } else if(reader.isAcquisition) { 0511 m_statusLabel->setPixmap(QIcon::fromTheme(QStringLiteral("emblem-added")).pixmap(imgSize, imgSize)); 0512 m_searchTemplate.clear(); 0513 } else { 0514 m_statusLabel->setPixmap(QIcon::fromTheme(QStringLiteral("emblem-error")).pixmap(imgSize, imgSize)); 0515 m_searchTemplate.clear(); 0516 m_icon.clear(); 0517 m_attribution.clear(); 0518 } 0519 }