File indexing completed on 2024-05-12 05:09:27

0001 /***************************************************************************
0002     Copyright (C) 2007-2009 Robby Stephenson <robby@periapsis.org>
0003  ***************************************************************************/
0004 
0005 /***************************************************************************
0006  *                                                                         *
0007  *   This program is free software; you can redistribute it and/or         *
0008  *   modify it under the terms of the GNU General Public License as        *
0009  *   published by the Free Software Foundation; either version 2 of        *
0010  *   the License or (at your option) version 3 or any later version        *
0011  *   accepted by the membership of KDE e.V. (or its successor approved     *
0012  *   by the membership of KDE e.V.), which shall act as a proxy            *
0013  *   defined in Section 14 of version 3 of the license.                    *
0014  *                                                                         *
0015  *   This program is distributed in the hope that it will be useful,       *
0016  *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
0017  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
0018  *   GNU General Public License for more details.                          *
0019  *                                                                         *
0020  *   You should have received a copy of the GNU General Public License     *
0021  *   along with this program.  If not, see <http://www.gnu.org/licenses/>. *
0022  *                                                                         *
0023  ***************************************************************************/
0024 
0025 #include "arxivfetcher.h"
0026 #include "../translators/xslthandler.h"
0027 #include "../translators/tellicoimporter.h"
0028 #include "../translators/tellico_xml.h"
0029 #include "../utils/guiproxy.h"
0030 #include "../utils/string_utils.h"
0031 #include "../utils/datafileregistry.h"
0032 #include "../collection.h"
0033 #include "../entry.h"
0034 #include "../core/netaccess.h"
0035 #include "../images/imagefactory.h"
0036 #include "../tellico_debug.h"
0037 
0038 #include <KLocalizedString>
0039 #include <KIO/Job>
0040 #include <KIO/JobUiDelegate>
0041 #include <KConfigGroup>
0042 #include <KJobWidgets/KJobWidgets>
0043 
0044 #include <QDomDocument>
0045 #include <QLabel>
0046 #include <QTextStream>
0047 #include <QPixmap>
0048 #include <QVBoxLayout>
0049 #include <QFile>
0050 #include <QUrlQuery>
0051 
0052 namespace {
0053   static const int ARXIV_RETURNS_PER_REQUEST = 20;
0054   static const char* ARXIV_BASE_URL = "http://export.arxiv.org/api/query";
0055 }
0056 
0057 using namespace Tellico;
0058 using namespace Tellico::Fetch;
0059 using Tellico::Fetch::ArxivFetcher;
0060 
0061 ArxivFetcher::ArxivFetcher(QObject* parent_)
0062     : Fetcher(parent_), m_xsltHandler(nullptr), m_start(0), m_total(-1), m_job(nullptr), m_started(false) {
0063 }
0064 
0065 ArxivFetcher::~ArxivFetcher() {
0066   delete m_xsltHandler;
0067   m_xsltHandler = nullptr;
0068 }
0069 
0070 QString ArxivFetcher::source() const {
0071   return m_name.isEmpty() ? defaultName() : m_name;
0072 }
0073 
0074 bool ArxivFetcher::canSearch(Fetch::FetchKey k) const {
0075   return k == Title || k == Person || k == Keyword || k == ArxivID;
0076 }
0077 
0078 bool ArxivFetcher::canFetch(int type) const {
0079   return type == Data::Collection::Bibtex;
0080 }
0081 
0082 void ArxivFetcher::readConfigHook(const KConfigGroup&) {
0083 }
0084 
0085 void ArxivFetcher::search() {
0086   m_started = true;
0087   m_start = 0;
0088   m_total = -1;
0089   doSearch();
0090 }
0091 
0092 void ArxivFetcher::continueSearch() {
0093   m_started = true;
0094   doSearch();
0095 }
0096 
0097 void ArxivFetcher::doSearch() {
0098   QUrl u = searchURL(request().key(), request().value());
0099   if(u.isEmpty()) {
0100     stop();
0101     return;
0102   }
0103 
0104   m_job = KIO::storedGet(u, KIO::NoReload, KIO::HideProgressInfo);
0105   KJobWidgets::setWindow(m_job, GUI::Proxy::widget());
0106   connect(m_job.data(), &KJob::result,
0107           this, &ArxivFetcher::slotComplete);
0108 }
0109 
0110 void ArxivFetcher::stop() {
0111   if(!m_started) {
0112     return;
0113   }
0114 //  myDebug();
0115   if(m_job) {
0116     m_job->kill();
0117     m_job = nullptr;
0118   }
0119   m_started = false;
0120   emit signalDone(this);
0121 }
0122 
0123 void ArxivFetcher::slotComplete(KJob*) {
0124 //  myDebug();
0125 
0126   if(m_job->error()) {
0127     m_job->uiDelegate()->showErrorMessage();
0128     stop();
0129     return;
0130   }
0131 
0132   QByteArray data = m_job->data();
0133   if(data.isEmpty()) {
0134     myDebug() << "no data";
0135     stop();
0136     return;
0137   }
0138 
0139   // since the fetch is done, don't worry about holding the job pointer
0140   m_job = nullptr;
0141 #if 0
0142   myWarning() << "Remove debug from arxivfetcher.cpp";
0143   QFile f(QLatin1String("/tmp/test.xml"));
0144   if(f.open(QIODevice::WriteOnly)) {
0145     QTextStream t(&f);
0146     t.setCodec("UTF-8");
0147     t << data;
0148   }
0149   f.close();
0150 #endif
0151 
0152   if(!m_xsltHandler) {
0153     initXSLTHandler();
0154     if(!m_xsltHandler) { // probably an error somewhere in the stylesheet loading
0155       stop();
0156       return;
0157     }
0158   }
0159 
0160   if(m_total == -1) {
0161     QDomDocument dom;
0162     if(!dom.setContent(data, true /*namespace*/)) {
0163       myWarning() << "server did not return valid XML.";
0164       stop();
0165       return;
0166     }
0167     // total is top level element, with attribute totalResultsAvailable
0168     QDomNodeList list = dom.elementsByTagNameNS(XML::nsOpenSearch,
0169                                                 QStringLiteral("totalResults"));
0170     if(list.count() > 0) {
0171       m_total = list.item(0).toElement().text().toInt();
0172     }
0173   }
0174 
0175   // assume result is always utf-8
0176   QString str = m_xsltHandler->applyStylesheet(QString::fromUtf8(data.constData(), data.size()));
0177   Import::TellicoImporter imp(str);
0178   Data::CollPtr coll = imp.collection();
0179 
0180   if(!coll) {
0181     myDebug() << "no valid result";
0182     stop();
0183     return;
0184   }
0185 
0186   foreach(Data::EntryPtr entry, coll->entries()) {
0187     if(!m_started) {
0188       // might get aborted
0189       break;
0190     }
0191     FetchResult* r = new FetchResult(this, entry);
0192     m_entries.insert(r->uid, entry);
0193     emit signalResultFound(r);
0194   }
0195 
0196   m_start = m_entries.count();
0197   m_hasMoreResults = m_start < m_total;
0198   stop(); // required
0199 }
0200 
0201 Tellico::Data::EntryPtr ArxivFetcher::fetchEntryHook(uint uid_) {
0202   Data::EntryPtr entry = m_entries[uid_];
0203   // if URL but no cover image, fetch it
0204   if(!entry->field(QStringLiteral("url")).isEmpty()) {
0205     Data::CollPtr coll = entry->collection();
0206     Data::FieldPtr field = coll->fieldByName(QStringLiteral("cover"));
0207     if(!field && !coll->imageFields().isEmpty()) {
0208       field = coll->imageFields().front();
0209     } else if(!field) {
0210       field = Data::Field::createDefaultField(Data::Field::FrontCoverField);
0211       coll->addField(field);
0212     }
0213     if(entry->field(field).isEmpty()) {
0214       QPixmap pix = NetAccess::filePreview(QUrl::fromUserInput(entry->field(QStringLiteral("url"))));
0215       if(!pix.isNull()) {
0216         QString id = ImageFactory::addImage(pix, QStringLiteral("PNG"));
0217         if(!id.isEmpty()) {
0218           entry->setField(field, id);
0219         }
0220       }
0221     }
0222   }
0223   static const QRegularExpression versionRx(QLatin1String("v\\d+$"));
0224   // if the original search was not for a versioned ID, remove it
0225   if(request().key() != ArxivID || !request().value().contains(versionRx)) {
0226     QString arxiv = entry->field(QStringLiteral("arxiv"));
0227     arxiv.remove(versionRx);
0228     entry->setField(QStringLiteral("arxiv"), arxiv);
0229   }
0230   return entry;
0231 }
0232 
0233 void ArxivFetcher::initXSLTHandler() {
0234   QString xsltfile = DataFileRegistry::self()->locate(QStringLiteral("arxiv2tellico.xsl"));
0235   if(xsltfile.isEmpty()) {
0236     myWarning() << "can not locate arxiv2tellico.xsl.";
0237     return;
0238   }
0239 
0240   QUrl u = QUrl::fromLocalFile(xsltfile);
0241 
0242   delete m_xsltHandler;
0243   m_xsltHandler = new XSLTHandler(u);
0244   if(!m_xsltHandler->isValid()) {
0245     myWarning() << "error in arxiv2tellico.xsl.";
0246     delete m_xsltHandler;
0247     m_xsltHandler = nullptr;
0248     return;
0249   }
0250 }
0251 
0252 QUrl ArxivFetcher::searchURL(FetchKey key_, const QString& value_) const {
0253   QUrl u(QString::fromLatin1(ARXIV_BASE_URL));
0254   QUrlQuery q;
0255   q.addQueryItem(QStringLiteral("start"), QString::number(m_start));
0256   q.addQueryItem(QStringLiteral("max_results"), QString::number(ARXIV_RETURNS_PER_REQUEST));
0257 
0258   // quotes should be used if spaces are present
0259   QString value = value_;
0260   value.replace(QLatin1Char(' '), QLatin1Char('+'));
0261   // seems to have problems with dashes, too
0262   value.replace(QLatin1Char('-'), QLatin1Char('+'));
0263 
0264   QString query;
0265   switch(key_) {
0266     case Title:
0267       query = QStringLiteral("ti:%1").arg(value);
0268       break;
0269 
0270     case Person:
0271       query = QStringLiteral("au:%1").arg(value);
0272       break;
0273 
0274     case Keyword:
0275       // keyword gets to use all the words without being quoted
0276       query = QStringLiteral("all:%1").arg(value);
0277       break;
0278 
0279     case ArxivID:
0280       {
0281       // remove prefix and/or version number
0282       QString value = value_;
0283       static const QRegularExpression arxivRx(QLatin1String("^arxiv:"), QRegularExpression::CaseInsensitiveOption);
0284       static const QRegularExpression vRx(QLatin1String("v\\d+$"));
0285       value.remove(arxivRx);
0286       value.remove(vRx);
0287       query = QStringLiteral("id:%1").arg(value);
0288       }
0289       break;
0290 
0291     default:
0292       myWarning() << source() << "- key not recognized:" << request().key();
0293       return QUrl();
0294   }
0295   q.addQueryItem(QStringLiteral("search_query"), query);
0296   u.setQuery(q);
0297 
0298 //  myDebug() << "url: " << u;
0299   return u;
0300 }
0301 
0302 Tellico::Fetch::FetchRequest ArxivFetcher::updateRequest(Data::EntryPtr entry_) {
0303   QString id = entry_->field(QStringLiteral("arxiv"));
0304   if(!id.isEmpty()) {
0305     // remove prefix and/or version number
0306     static const QRegularExpression arxivRx(QLatin1String("^arxiv:"), QRegularExpression::CaseInsensitiveOption);
0307     static const QRegularExpression vRx(QLatin1String("v\\d+$"));
0308     id.remove(arxivRx);
0309     id.remove(vRx);
0310     return FetchRequest(Fetch::ArxivID, id);
0311   }
0312 
0313   // optimistically try searching for title and rely on Collection::sameEntry() to figure things out
0314   QString t = entry_->field(QStringLiteral("title"));
0315   if(!t.isEmpty()) {
0316     return FetchRequest(Fetch::Title, t);
0317   }
0318 
0319   return FetchRequest();
0320 }
0321 
0322 Tellico::Fetch::ConfigWidget* ArxivFetcher::configWidget(QWidget* parent_) const {
0323   return new ArxivFetcher::ConfigWidget(parent_, this);
0324 }
0325 
0326 QString ArxivFetcher::defaultName() {
0327   return QStringLiteral("arXiv.org"); // no translation
0328 }
0329 
0330 QString ArxivFetcher::defaultIcon() {
0331   return favIcon("http://arxiv.org");
0332 }
0333 
0334 ArxivFetcher::ConfigWidget::ConfigWidget(QWidget* parent_, const ArxivFetcher*)
0335     : Fetch::ConfigWidget(parent_) {
0336   QVBoxLayout* l = new QVBoxLayout(optionsWidget());
0337   l->addWidget(new QLabel(i18n("This source has no options."), optionsWidget()));
0338   l->addStretch();
0339 }
0340 
0341 void ArxivFetcher::ConfigWidget::saveConfigHook(KConfigGroup&) {
0342 }
0343 
0344 QString ArxivFetcher::ConfigWidget::preferredName() const {
0345   return ArxivFetcher::defaultName();
0346 }