File indexing completed on 2024-05-12 05:09:35
0001 /*************************************************************************** 0002 Copyright (C) 2012 Robby Stephenson <robby@periapsis.org> 0003 ***************************************************************************/ 0004 0005 /*************************************************************************** 0006 * * 0007 * This program is free software; you can redistribute it and/or * 0008 * modify it under the terms of the GNU General Public License as * 0009 * published by the Free Software Foundation; either version 2 of * 0010 * the License or (at your option) version 3 or any later version * 0011 * accepted by the membership of KDE e.V. (or its successor approved * 0012 * by the membership of KDE e.V.), which shall act as a proxy * 0013 * defined in Section 14 of version 3 of the license. * 0014 * * 0015 * This program is distributed in the hope that it will be useful, * 0016 * but WITHOUT ANY WARRANTY; without even the implied warranty of * 0017 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * 0018 * GNU General Public License for more details. * 0019 * * 0020 * You should have received a copy of the GNU General Public License * 0021 * along with this program. If not, see <http://www.gnu.org/licenses/>. * 0022 * * 0023 ***************************************************************************/ 0024 0025 #include "hathitrustfetcher.h" 0026 #include "../translators/xslthandler.h" 0027 #include "../translators/tellicoimporter.h" 0028 #include "../utils/isbnvalidator.h" 0029 #include "../utils/lccnvalidator.h" 0030 #include "../utils/guiproxy.h" 0031 #include "../utils/string_utils.h" 0032 #include "../utils/datafileregistry.h" 0033 #include "../tellico_debug.h" 0034 0035 #include <KLocalizedString> 0036 #include <KIO/Job> 0037 #include <KIO/JobUiDelegate> 0038 #include <KJobWidgets> 0039 0040 #include <QLabel> 0041 #include <QFile> 0042 #include <QTextStream> 0043 #include <QGridLayout> 0044 #include <QTextCodec> 0045 #include <QJsonDocument> 0046 #include <QJsonObject> 0047 #include <QDomDocument> 0048 0049 namespace { 0050 static const char* HATHITRUST_QUERY_URL = "http://catalog.hathitrust.org/api/volumes/full/json/"; 0051 } 0052 0053 using namespace Tellico; 0054 using Tellico::Fetch::HathiTrustFetcher; 0055 0056 HathiTrustFetcher::HathiTrustFetcher(QObject* parent_) 0057 : Fetcher(parent_), m_started(false), m_MARC21XMLHandler(nullptr), m_MODSHandler(nullptr) { 0058 } 0059 0060 HathiTrustFetcher::~HathiTrustFetcher() { 0061 delete m_MARC21XMLHandler; 0062 m_MARC21XMLHandler = nullptr; 0063 delete m_MODSHandler; 0064 m_MODSHandler = nullptr; 0065 } 0066 0067 QString HathiTrustFetcher::source() const { 0068 return m_name.isEmpty() ? defaultName() : m_name; 0069 } 0070 0071 bool HathiTrustFetcher::canSearch(Fetch::FetchKey k) const { 0072 return k == ISBN || k == LCCN; 0073 } 0074 0075 bool HathiTrustFetcher::canFetch(int type) const { 0076 return type == Data::Collection::Book || type == Data::Collection::Bibtex; 0077 } 0078 0079 void HathiTrustFetcher::readConfigHook(const KConfigGroup&) { 0080 } 0081 0082 void HathiTrustFetcher::search() { 0083 m_started = true; 0084 doSearch(); 0085 } 0086 0087 void HathiTrustFetcher::doSearch() { 0088 if(request().key() != ISBN && request().key() != LCCN) { 0089 stop(); 0090 return; 0091 } 0092 0093 QUrl u(QString::fromLatin1(HATHITRUST_QUERY_URL)); 0094 0095 QStringList searchValues; 0096 // we split ISBN and LCCN values, which are the only ones we accept 0097 const QStringList searchTerms = FieldFormat::splitValue(request().value()); 0098 foreach(const QString& searchTerm, searchTerms) { 0099 if(request().key() == ISBN) { 0100 searchValues += QStringLiteral("isbn:%1").arg(ISBNValidator::cleanValue(searchTerm)); 0101 } else { 0102 searchValues += QStringLiteral("lccn:%1").arg(LCCNValidator::formalize(searchTerm)); 0103 } 0104 } 0105 u.setPath(u.path() + searchValues.join(QLatin1String("|"))); 0106 0107 // myDebug() << u; 0108 0109 m_job = KIO::storedGet(u, KIO::NoReload, KIO::HideProgressInfo); 0110 KJobWidgets::setWindow(m_job, GUI::Proxy::widget()); 0111 connect(m_job.data(), &KJob::result, this, &HathiTrustFetcher::slotComplete); 0112 } 0113 0114 void HathiTrustFetcher::stop() { 0115 if(!m_started) { 0116 return; 0117 } 0118 if(m_job) { 0119 m_job->kill(); 0120 } 0121 m_started = false; 0122 emit signalDone(this); 0123 } 0124 0125 bool HathiTrustFetcher::initMARC21Handler() { 0126 if(m_MARC21XMLHandler) { 0127 return true; 0128 } 0129 0130 QString xsltfile = DataFileRegistry::self()->locate(QStringLiteral("MARC21slim2MODS3.xsl")); 0131 if(xsltfile.isEmpty()) { 0132 myWarning() << "can not locate MARC21slim2MODS3.xsl."; 0133 return false; 0134 } 0135 0136 QUrl u = QUrl::fromLocalFile(xsltfile); 0137 0138 m_MARC21XMLHandler = new XSLTHandler(u); 0139 if(!m_MARC21XMLHandler->isValid()) { 0140 myWarning() << "error in MARC21slim2MODS3.xsl."; 0141 delete m_MARC21XMLHandler; 0142 m_MARC21XMLHandler = nullptr; 0143 return false; 0144 } 0145 return true; 0146 } 0147 0148 bool HathiTrustFetcher::initMODSHandler() { 0149 if(m_MODSHandler) { 0150 return true; 0151 } 0152 0153 QString xsltfile = DataFileRegistry::self()->locate(QStringLiteral("mods2tellico.xsl")); 0154 if(xsltfile.isEmpty()) { 0155 myWarning() << "can not locate mods2tellico.xsl."; 0156 return false; 0157 } 0158 0159 QUrl u = QUrl::fromLocalFile(xsltfile); 0160 0161 m_MODSHandler = new XSLTHandler(u); 0162 if(!m_MODSHandler->isValid()) { 0163 myWarning() << "error in mods2tellico.xsl."; 0164 delete m_MODSHandler; 0165 m_MODSHandler = nullptr; 0166 // no use in keeping the MARC handlers now 0167 delete m_MARC21XMLHandler; 0168 m_MARC21XMLHandler = nullptr; 0169 return false; 0170 } 0171 return true; 0172 } 0173 0174 Tellico::Data::EntryPtr HathiTrustFetcher::fetchEntryHook(uint uid_) { 0175 return m_entries.value(uid_); 0176 } 0177 0178 Tellico::Fetch::FetchRequest HathiTrustFetcher::updateRequest(Data::EntryPtr entry_) { 0179 const QString isbn = entry_->field(QStringLiteral("isbn")); 0180 if(!isbn.isEmpty()) { 0181 return FetchRequest(ISBN, isbn); 0182 } 0183 const QString lccn = entry_->field(QStringLiteral("lccn")); 0184 if(!lccn.isEmpty()) { 0185 return FetchRequest(LCCN, lccn); 0186 } 0187 return FetchRequest(); 0188 } 0189 0190 void HathiTrustFetcher::slotComplete(KJob* job_) { 0191 KIO::StoredTransferJob* job = static_cast<KIO::StoredTransferJob*>(job_); 0192 0193 if(!initMARC21Handler() || !initMODSHandler()) { 0194 // debug messages are taken care of in the specific methods 0195 stop(); 0196 return; 0197 } 0198 0199 if(job->error()) { 0200 job->uiDelegate()->showErrorMessage(); 0201 stop(); 0202 return; 0203 } 0204 0205 QByteArray data = job->data(); 0206 if(data.isEmpty()) { 0207 myDebug() << "no data"; 0208 stop(); 0209 return; 0210 } 0211 // see bug 319662. If fetcher is cancelled, job is killed 0212 // if the pointer is retained, it gets double-deleted 0213 m_job = nullptr; 0214 0215 #if 0 0216 myWarning() << "Remove debug from hathitrustfetcher.cpp"; 0217 QFile f(QString::fromLatin1("/tmp/test.json")); 0218 if(f.open(QIODevice::WriteOnly)) { 0219 QTextStream t(&f); 0220 t.setCodec("UTF-8"); 0221 t << data; 0222 } 0223 f.close(); 0224 #endif 0225 0226 QJsonDocument doc = QJsonDocument::fromJson(data); 0227 QVariantMap resultMap = doc.object().toVariantMap(); 0228 if(resultMap.isEmpty()) { 0229 myDebug() << "no results"; 0230 stop(); 0231 return; 0232 } 0233 0234 QVariantMap::const_iterator i = resultMap.constBegin(); 0235 for( ; i != resultMap.constEnd(); ++i) { 0236 const QVariantMap recordMap = i.value().toMap().value(QStringLiteral("records")).toMap(); 0237 if(recordMap.isEmpty()) { 0238 myDebug() << "empty result map"; 0239 continue; 0240 } 0241 // we know there's a record, so no need to check for existence of first iterator in map 0242 QVariantMap::const_iterator ri = recordMap.constBegin(); 0243 if(ri == recordMap.constEnd()) { 0244 myWarning() << "no iterator in record"; 0245 continue; 0246 } 0247 QString marcxml = ri.value().toMap().value(QStringLiteral("marc-xml")).toString(); 0248 // HathiTrust doesn't always include the XML NS in the JSON results. Assume it's always 0249 // MARC XML and check that 0250 QDomDocument dom; 0251 if(dom.setContent(marcxml, true /* namespace processing */) && dom.documentElement().namespaceURI().isEmpty()) { 0252 const QString rootName = dom.documentElement().tagName(); 0253 myDebug() << "no namespace, attempting to set on" << rootName << "element"; 0254 QRegularExpression rootRx(QLatin1Char('<') + rootName + QLatin1Char('>')); 0255 QString newRoot = QLatin1Char('<') + rootName + QLatin1String(" xmlns=\"http://www.loc.gov/MARC21/slim\">"); 0256 marcxml.replace(rootRx, newRoot); 0257 } 0258 const QString modsxml = m_MARC21XMLHandler->applyStylesheet(marcxml); 0259 0260 Import::TellicoImporter imp(m_MODSHandler->applyStylesheet(modsxml)); 0261 imp.setOptions(imp.options() ^ Import::ImportProgress); // no progress needed 0262 Data::CollPtr coll = imp.collection(); 0263 if(!coll) { 0264 myWarning() << "no coll pointer"; 0265 continue; 0266 } 0267 0268 // since the Dewey and LoC field titles have a context in their i18n call here 0269 // but not in the mods2tellico.xsl stylesheet where the field is actually created 0270 // update the field titles here 0271 QHashIterator<QString, QString> i2(allOptionalFields()); 0272 while(i2.hasNext()) { 0273 i2.next(); 0274 Data::FieldPtr field = coll->fieldByName(i2.key()); 0275 if(field) { 0276 field->setTitle(i2.value()); 0277 coll->modifyField(field); 0278 } 0279 } 0280 0281 foreach(Data::EntryPtr entry, coll->entries()) { 0282 FetchResult* r = new FetchResult(this, entry); 0283 m_entries.insert(r->uid, entry); 0284 emit signalResultFound(r); 0285 } 0286 } 0287 0288 m_hasMoreResults = false; // for now, no continued searches 0289 stop(); 0290 } 0291 0292 Tellico::Fetch::ConfigWidget* HathiTrustFetcher::configWidget(QWidget* parent_) const { 0293 return new HathiTrustFetcher::ConfigWidget(parent_, this); 0294 } 0295 0296 QString HathiTrustFetcher::defaultName() { 0297 return QStringLiteral("HathiTrust"); // no translation 0298 } 0299 0300 QString HathiTrustFetcher::defaultIcon() { 0301 return favIcon("http://www.hathitrust.org"); 0302 } 0303 0304 Tellico::StringHash HathiTrustFetcher::allOptionalFields() { 0305 // same ones as z3950fetcher 0306 StringHash hash; 0307 hash[QStringLiteral("address")] = i18n("Address"); 0308 hash[QStringLiteral("abstract")] = i18n("Abstract"); 0309 hash[QStringLiteral("illustrator")] = i18n("Illustrator"); 0310 hash[QStringLiteral("dewey")] = i18nc("Dewey Decimal classification system", "Dewey Decimal"); 0311 hash[QStringLiteral("lcc")] = i18nc("Library of Congress classification system", "LoC Classification"); 0312 return hash; 0313 } 0314 0315 HathiTrustFetcher::ConfigWidget::ConfigWidget(QWidget* parent_, const HathiTrustFetcher* fetcher_) 0316 : Fetch::ConfigWidget(parent_) { 0317 QVBoxLayout* l = new QVBoxLayout(optionsWidget()); 0318 l->addWidget(new QLabel(i18n("This source has no options."), optionsWidget())); 0319 l->addStretch(); 0320 0321 // now add additional fields widget 0322 addFieldsWidget(HathiTrustFetcher::allOptionalFields(), fetcher_ ? fetcher_->optionalFields() : QStringList()); 0323 } 0324 0325 void HathiTrustFetcher::ConfigWidget::saveConfigHook(KConfigGroup&) { 0326 } 0327 0328 QString HathiTrustFetcher::ConfigWidget::preferredName() const { 0329 return HathiTrustFetcher::defaultName(); 0330 }