File indexing completed on 2024-05-12 05:09:35

0001 /***************************************************************************
0002     Copyright (C) 2012 Robby Stephenson <robby@periapsis.org>
0003  ***************************************************************************/
0004 
0005 /***************************************************************************
0006  *                                                                         *
0007  *   This program is free software; you can redistribute it and/or         *
0008  *   modify it under the terms of the GNU General Public License as        *
0009  *   published by the Free Software Foundation; either version 2 of        *
0010  *   the License or (at your option) version 3 or any later version        *
0011  *   accepted by the membership of KDE e.V. (or its successor approved     *
0012  *   by the membership of KDE e.V.), which shall act as a proxy            *
0013  *   defined in Section 14 of version 3 of the license.                    *
0014  *                                                                         *
0015  *   This program is distributed in the hope that it will be useful,       *
0016  *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
0017  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
0018  *   GNU General Public License for more details.                          *
0019  *                                                                         *
0020  *   You should have received a copy of the GNU General Public License     *
0021  *   along with this program.  If not, see <http://www.gnu.org/licenses/>. *
0022  *                                                                         *
0023  ***************************************************************************/
0024 
0025 #include "hathitrustfetcher.h"
0026 #include "../translators/xslthandler.h"
0027 #include "../translators/tellicoimporter.h"
0028 #include "../utils/isbnvalidator.h"
0029 #include "../utils/lccnvalidator.h"
0030 #include "../utils/guiproxy.h"
0031 #include "../utils/string_utils.h"
0032 #include "../utils/datafileregistry.h"
0033 #include "../tellico_debug.h"
0034 
0035 #include <KLocalizedString>
0036 #include <KIO/Job>
0037 #include <KIO/JobUiDelegate>
0038 #include <KJobWidgets>
0039 
0040 #include <QLabel>
0041 #include <QFile>
0042 #include <QTextStream>
0043 #include <QGridLayout>
0044 #include <QTextCodec>
0045 #include <QJsonDocument>
0046 #include <QJsonObject>
0047 #include <QDomDocument>
0048 
0049 namespace {
0050   static const char* HATHITRUST_QUERY_URL = "http://catalog.hathitrust.org/api/volumes/full/json/";
0051 }
0052 
0053 using namespace Tellico;
0054 using Tellico::Fetch::HathiTrustFetcher;
0055 
0056 HathiTrustFetcher::HathiTrustFetcher(QObject* parent_)
0057     : Fetcher(parent_), m_started(false), m_MARC21XMLHandler(nullptr), m_MODSHandler(nullptr) {
0058 }
0059 
0060 HathiTrustFetcher::~HathiTrustFetcher() {
0061   delete m_MARC21XMLHandler;
0062   m_MARC21XMLHandler = nullptr;
0063   delete m_MODSHandler;
0064   m_MODSHandler = nullptr;
0065 }
0066 
0067 QString HathiTrustFetcher::source() const {
0068   return m_name.isEmpty() ? defaultName() : m_name;
0069 }
0070 
0071 bool HathiTrustFetcher::canSearch(Fetch::FetchKey k) const {
0072   return k == ISBN || k == LCCN;
0073 }
0074 
0075 bool HathiTrustFetcher::canFetch(int type) const {
0076   return type == Data::Collection::Book || type == Data::Collection::Bibtex;
0077 }
0078 
0079 void HathiTrustFetcher::readConfigHook(const KConfigGroup&) {
0080 }
0081 
0082 void HathiTrustFetcher::search() {
0083   m_started = true;
0084   doSearch();
0085 }
0086 
0087 void HathiTrustFetcher::doSearch() {
0088   if(request().key() != ISBN && request().key() != LCCN) {
0089     stop();
0090     return;
0091   }
0092 
0093   QUrl u(QString::fromLatin1(HATHITRUST_QUERY_URL));
0094 
0095   QStringList searchValues;
0096   // we split ISBN and LCCN values, which are the only ones we accept
0097   const QStringList searchTerms = FieldFormat::splitValue(request().value());
0098   foreach(const QString& searchTerm, searchTerms) {
0099     if(request().key() == ISBN) {
0100       searchValues += QStringLiteral("isbn:%1").arg(ISBNValidator::cleanValue(searchTerm));
0101     } else {
0102       searchValues += QStringLiteral("lccn:%1").arg(LCCNValidator::formalize(searchTerm));
0103     }
0104   }
0105   u.setPath(u.path() + searchValues.join(QLatin1String("|")));
0106 
0107 //  myDebug() << u;
0108 
0109   m_job = KIO::storedGet(u, KIO::NoReload, KIO::HideProgressInfo);
0110   KJobWidgets::setWindow(m_job, GUI::Proxy::widget());
0111   connect(m_job.data(), &KJob::result, this, &HathiTrustFetcher::slotComplete);
0112 }
0113 
0114 void HathiTrustFetcher::stop() {
0115   if(!m_started) {
0116     return;
0117   }
0118   if(m_job) {
0119     m_job->kill();
0120   }
0121   m_started = false;
0122   emit signalDone(this);
0123 }
0124 
0125 bool HathiTrustFetcher::initMARC21Handler() {
0126   if(m_MARC21XMLHandler) {
0127     return true;
0128   }
0129 
0130   QString xsltfile = DataFileRegistry::self()->locate(QStringLiteral("MARC21slim2MODS3.xsl"));
0131   if(xsltfile.isEmpty()) {
0132     myWarning() << "can not locate MARC21slim2MODS3.xsl.";
0133     return false;
0134   }
0135 
0136   QUrl u = QUrl::fromLocalFile(xsltfile);
0137 
0138   m_MARC21XMLHandler = new XSLTHandler(u);
0139   if(!m_MARC21XMLHandler->isValid()) {
0140     myWarning() << "error in MARC21slim2MODS3.xsl.";
0141     delete m_MARC21XMLHandler;
0142     m_MARC21XMLHandler = nullptr;
0143     return false;
0144   }
0145   return true;
0146 }
0147 
0148 bool HathiTrustFetcher::initMODSHandler() {
0149   if(m_MODSHandler) {
0150     return true;
0151   }
0152 
0153   QString xsltfile = DataFileRegistry::self()->locate(QStringLiteral("mods2tellico.xsl"));
0154   if(xsltfile.isEmpty()) {
0155     myWarning() << "can not locate mods2tellico.xsl.";
0156     return false;
0157   }
0158 
0159   QUrl u = QUrl::fromLocalFile(xsltfile);
0160 
0161   m_MODSHandler = new XSLTHandler(u);
0162   if(!m_MODSHandler->isValid()) {
0163     myWarning() << "error in mods2tellico.xsl.";
0164     delete m_MODSHandler;
0165     m_MODSHandler = nullptr;
0166     // no use in keeping the MARC handlers now
0167     delete m_MARC21XMLHandler;
0168     m_MARC21XMLHandler = nullptr;
0169     return false;
0170   }
0171   return true;
0172 }
0173 
0174 Tellico::Data::EntryPtr HathiTrustFetcher::fetchEntryHook(uint uid_) {
0175   return m_entries.value(uid_);
0176 }
0177 
0178 Tellico::Fetch::FetchRequest HathiTrustFetcher::updateRequest(Data::EntryPtr entry_) {
0179   const QString isbn = entry_->field(QStringLiteral("isbn"));
0180   if(!isbn.isEmpty()) {
0181     return FetchRequest(ISBN, isbn);
0182   }
0183   const QString lccn = entry_->field(QStringLiteral("lccn"));
0184   if(!lccn.isEmpty()) {
0185     return FetchRequest(LCCN, lccn);
0186   }
0187   return FetchRequest();
0188 }
0189 
0190 void HathiTrustFetcher::slotComplete(KJob* job_) {
0191   KIO::StoredTransferJob* job = static_cast<KIO::StoredTransferJob*>(job_);
0192 
0193   if(!initMARC21Handler() || !initMODSHandler()) {
0194     // debug messages are taken care of in the specific methods
0195     stop();
0196     return;
0197   }
0198 
0199   if(job->error()) {
0200     job->uiDelegate()->showErrorMessage();
0201     stop();
0202     return;
0203   }
0204 
0205   QByteArray data = job->data();
0206   if(data.isEmpty()) {
0207     myDebug() << "no data";
0208     stop();
0209     return;
0210   }
0211   // see bug 319662. If fetcher is cancelled, job is killed
0212   // if the pointer is retained, it gets double-deleted
0213   m_job = nullptr;
0214 
0215 #if 0
0216   myWarning() << "Remove debug from hathitrustfetcher.cpp";
0217   QFile f(QString::fromLatin1("/tmp/test.json"));
0218   if(f.open(QIODevice::WriteOnly)) {
0219     QTextStream t(&f);
0220     t.setCodec("UTF-8");
0221     t << data;
0222   }
0223   f.close();
0224 #endif
0225 
0226   QJsonDocument doc = QJsonDocument::fromJson(data);
0227   QVariantMap resultMap = doc.object().toVariantMap();
0228   if(resultMap.isEmpty()) {
0229     myDebug() << "no results";
0230     stop();
0231     return;
0232   }
0233 
0234   QVariantMap::const_iterator i = resultMap.constBegin();
0235   for( ; i != resultMap.constEnd(); ++i) {
0236     const QVariantMap recordMap = i.value().toMap().value(QStringLiteral("records")).toMap();
0237     if(recordMap.isEmpty()) {
0238       myDebug() << "empty result map";
0239       continue;
0240     }
0241     // we know there's a record, so no need to check for existence of first iterator in map
0242     QVariantMap::const_iterator ri = recordMap.constBegin();
0243     if(ri == recordMap.constEnd()) {
0244       myWarning() << "no iterator in record";
0245       continue;
0246     }
0247     QString marcxml = ri.value().toMap().value(QStringLiteral("marc-xml")).toString();
0248     // HathiTrust doesn't always include the XML NS in the JSON results. Assume it's always
0249     // MARC XML and check that
0250     QDomDocument dom;
0251     if(dom.setContent(marcxml, true /* namespace processing */) && dom.documentElement().namespaceURI().isEmpty()) {
0252       const QString rootName = dom.documentElement().tagName();
0253       myDebug() << "no namespace, attempting to set on" << rootName << "element";
0254       QRegularExpression rootRx(QLatin1Char('<') + rootName + QLatin1Char('>'));
0255       QString newRoot = QLatin1Char('<') + rootName + QLatin1String(" xmlns=\"http://www.loc.gov/MARC21/slim\">");
0256       marcxml.replace(rootRx, newRoot);
0257     }
0258     const QString modsxml = m_MARC21XMLHandler->applyStylesheet(marcxml);
0259 
0260     Import::TellicoImporter imp(m_MODSHandler->applyStylesheet(modsxml));
0261     imp.setOptions(imp.options() ^ Import::ImportProgress); // no progress needed
0262     Data::CollPtr coll = imp.collection();
0263     if(!coll) {
0264       myWarning() << "no coll pointer";
0265       continue;
0266     }
0267 
0268     // since the Dewey and LoC field titles have a context in their i18n call here
0269     // but not in the mods2tellico.xsl stylesheet where the field is actually created
0270     // update the field titles here
0271     QHashIterator<QString, QString> i2(allOptionalFields());
0272     while(i2.hasNext()) {
0273       i2.next();
0274       Data::FieldPtr field = coll->fieldByName(i2.key());
0275       if(field) {
0276         field->setTitle(i2.value());
0277         coll->modifyField(field);
0278       }
0279     }
0280 
0281     foreach(Data::EntryPtr entry, coll->entries()) {
0282       FetchResult* r = new FetchResult(this, entry);
0283       m_entries.insert(r->uid, entry);
0284       emit signalResultFound(r);
0285     }
0286   }
0287 
0288   m_hasMoreResults = false; // for now, no continued searches
0289   stop();
0290 }
0291 
0292 Tellico::Fetch::ConfigWidget* HathiTrustFetcher::configWidget(QWidget* parent_) const {
0293   return new HathiTrustFetcher::ConfigWidget(parent_, this);
0294 }
0295 
0296 QString HathiTrustFetcher::defaultName() {
0297   return QStringLiteral("HathiTrust"); // no translation
0298 }
0299 
0300 QString HathiTrustFetcher::defaultIcon() {
0301   return favIcon("http://www.hathitrust.org");
0302 }
0303 
0304 Tellico::StringHash HathiTrustFetcher::allOptionalFields() {
0305   // same ones as z3950fetcher
0306   StringHash hash;
0307   hash[QStringLiteral("address")]  = i18n("Address");
0308   hash[QStringLiteral("abstract")] = i18n("Abstract");
0309   hash[QStringLiteral("illustrator")] = i18n("Illustrator");
0310   hash[QStringLiteral("dewey")] = i18nc("Dewey Decimal classification system", "Dewey Decimal");
0311   hash[QStringLiteral("lcc")] = i18nc("Library of Congress classification system", "LoC Classification");
0312   return hash;
0313 }
0314 
0315 HathiTrustFetcher::ConfigWidget::ConfigWidget(QWidget* parent_, const HathiTrustFetcher* fetcher_)
0316     : Fetch::ConfigWidget(parent_) {
0317   QVBoxLayout* l = new QVBoxLayout(optionsWidget());
0318   l->addWidget(new QLabel(i18n("This source has no options."), optionsWidget()));
0319   l->addStretch();
0320 
0321   // now add additional fields widget
0322   addFieldsWidget(HathiTrustFetcher::allOptionalFields(), fetcher_ ? fetcher_->optionalFields() : QStringList());
0323 }
0324 
0325 void HathiTrustFetcher::ConfigWidget::saveConfigHook(KConfigGroup&) {
0326 }
0327 
0328 QString HathiTrustFetcher::ConfigWidget::preferredName() const {
0329   return HathiTrustFetcher::defaultName();
0330 }