File indexing completed on 2024-05-12 05:09:27
0001 /*************************************************************************** 0002 Copyright (C) 2007-2009 Robby Stephenson <robby@periapsis.org> 0003 ***************************************************************************/ 0004 0005 /*************************************************************************** 0006 * * 0007 * This program is free software; you can redistribute it and/or * 0008 * modify it under the terms of the GNU General Public License as * 0009 * published by the Free Software Foundation; either version 2 of * 0010 * the License or (at your option) version 3 or any later version * 0011 * accepted by the membership of KDE e.V. (or its successor approved * 0012 * by the membership of KDE e.V.), which shall act as a proxy * 0013 * defined in Section 14 of version 3 of the license. * 0014 * * 0015 * This program is distributed in the hope that it will be useful, * 0016 * but WITHOUT ANY WARRANTY; without even the implied warranty of * 0017 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * 0018 * GNU General Public License for more details. * 0019 * * 0020 * You should have received a copy of the GNU General Public License * 0021 * along with this program. If not, see <http://www.gnu.org/licenses/>. * 0022 * * 0023 ***************************************************************************/ 0024 0025 #include "arxivfetcher.h" 0026 #include "../translators/xslthandler.h" 0027 #include "../translators/tellicoimporter.h" 0028 #include "../translators/tellico_xml.h" 0029 #include "../utils/guiproxy.h" 0030 #include "../utils/string_utils.h" 0031 #include "../utils/datafileregistry.h" 0032 #include "../collection.h" 0033 #include "../entry.h" 0034 #include "../core/netaccess.h" 0035 #include "../images/imagefactory.h" 0036 #include "../tellico_debug.h" 0037 0038 #include <KLocalizedString> 0039 #include <KIO/Job> 0040 #include <KIO/JobUiDelegate> 0041 #include <KConfigGroup> 0042 #include <KJobWidgets/KJobWidgets> 0043 0044 #include <QDomDocument> 0045 #include <QLabel> 0046 #include <QTextStream> 0047 #include <QPixmap> 0048 #include <QVBoxLayout> 0049 #include <QFile> 0050 #include <QUrlQuery> 0051 0052 namespace { 0053 static const int ARXIV_RETURNS_PER_REQUEST = 20; 0054 static const char* ARXIV_BASE_URL = "http://export.arxiv.org/api/query"; 0055 } 0056 0057 using namespace Tellico; 0058 using namespace Tellico::Fetch; 0059 using Tellico::Fetch::ArxivFetcher; 0060 0061 ArxivFetcher::ArxivFetcher(QObject* parent_) 0062 : Fetcher(parent_), m_xsltHandler(nullptr), m_start(0), m_total(-1), m_job(nullptr), m_started(false) { 0063 } 0064 0065 ArxivFetcher::~ArxivFetcher() { 0066 delete m_xsltHandler; 0067 m_xsltHandler = nullptr; 0068 } 0069 0070 QString ArxivFetcher::source() const { 0071 return m_name.isEmpty() ? defaultName() : m_name; 0072 } 0073 0074 bool ArxivFetcher::canSearch(Fetch::FetchKey k) const { 0075 return k == Title || k == Person || k == Keyword || k == ArxivID; 0076 } 0077 0078 bool ArxivFetcher::canFetch(int type) const { 0079 return type == Data::Collection::Bibtex; 0080 } 0081 0082 void ArxivFetcher::readConfigHook(const KConfigGroup&) { 0083 } 0084 0085 void ArxivFetcher::search() { 0086 m_started = true; 0087 m_start = 0; 0088 m_total = -1; 0089 doSearch(); 0090 } 0091 0092 void ArxivFetcher::continueSearch() { 0093 m_started = true; 0094 doSearch(); 0095 } 0096 0097 void ArxivFetcher::doSearch() { 0098 QUrl u = searchURL(request().key(), request().value()); 0099 if(u.isEmpty()) { 0100 stop(); 0101 return; 0102 } 0103 0104 m_job = KIO::storedGet(u, KIO::NoReload, KIO::HideProgressInfo); 0105 KJobWidgets::setWindow(m_job, GUI::Proxy::widget()); 0106 connect(m_job.data(), &KJob::result, 0107 this, &ArxivFetcher::slotComplete); 0108 } 0109 0110 void ArxivFetcher::stop() { 0111 if(!m_started) { 0112 return; 0113 } 0114 // myDebug(); 0115 if(m_job) { 0116 m_job->kill(); 0117 m_job = nullptr; 0118 } 0119 m_started = false; 0120 emit signalDone(this); 0121 } 0122 0123 void ArxivFetcher::slotComplete(KJob*) { 0124 // myDebug(); 0125 0126 if(m_job->error()) { 0127 m_job->uiDelegate()->showErrorMessage(); 0128 stop(); 0129 return; 0130 } 0131 0132 QByteArray data = m_job->data(); 0133 if(data.isEmpty()) { 0134 myDebug() << "no data"; 0135 stop(); 0136 return; 0137 } 0138 0139 // since the fetch is done, don't worry about holding the job pointer 0140 m_job = nullptr; 0141 #if 0 0142 myWarning() << "Remove debug from arxivfetcher.cpp"; 0143 QFile f(QLatin1String("/tmp/test.xml")); 0144 if(f.open(QIODevice::WriteOnly)) { 0145 QTextStream t(&f); 0146 t.setCodec("UTF-8"); 0147 t << data; 0148 } 0149 f.close(); 0150 #endif 0151 0152 if(!m_xsltHandler) { 0153 initXSLTHandler(); 0154 if(!m_xsltHandler) { // probably an error somewhere in the stylesheet loading 0155 stop(); 0156 return; 0157 } 0158 } 0159 0160 if(m_total == -1) { 0161 QDomDocument dom; 0162 if(!dom.setContent(data, true /*namespace*/)) { 0163 myWarning() << "server did not return valid XML."; 0164 stop(); 0165 return; 0166 } 0167 // total is top level element, with attribute totalResultsAvailable 0168 QDomNodeList list = dom.elementsByTagNameNS(XML::nsOpenSearch, 0169 QStringLiteral("totalResults")); 0170 if(list.count() > 0) { 0171 m_total = list.item(0).toElement().text().toInt(); 0172 } 0173 } 0174 0175 // assume result is always utf-8 0176 QString str = m_xsltHandler->applyStylesheet(QString::fromUtf8(data.constData(), data.size())); 0177 Import::TellicoImporter imp(str); 0178 Data::CollPtr coll = imp.collection(); 0179 0180 if(!coll) { 0181 myDebug() << "no valid result"; 0182 stop(); 0183 return; 0184 } 0185 0186 foreach(Data::EntryPtr entry, coll->entries()) { 0187 if(!m_started) { 0188 // might get aborted 0189 break; 0190 } 0191 FetchResult* r = new FetchResult(this, entry); 0192 m_entries.insert(r->uid, entry); 0193 emit signalResultFound(r); 0194 } 0195 0196 m_start = m_entries.count(); 0197 m_hasMoreResults = m_start < m_total; 0198 stop(); // required 0199 } 0200 0201 Tellico::Data::EntryPtr ArxivFetcher::fetchEntryHook(uint uid_) { 0202 Data::EntryPtr entry = m_entries[uid_]; 0203 // if URL but no cover image, fetch it 0204 if(!entry->field(QStringLiteral("url")).isEmpty()) { 0205 Data::CollPtr coll = entry->collection(); 0206 Data::FieldPtr field = coll->fieldByName(QStringLiteral("cover")); 0207 if(!field && !coll->imageFields().isEmpty()) { 0208 field = coll->imageFields().front(); 0209 } else if(!field) { 0210 field = Data::Field::createDefaultField(Data::Field::FrontCoverField); 0211 coll->addField(field); 0212 } 0213 if(entry->field(field).isEmpty()) { 0214 QPixmap pix = NetAccess::filePreview(QUrl::fromUserInput(entry->field(QStringLiteral("url")))); 0215 if(!pix.isNull()) { 0216 QString id = ImageFactory::addImage(pix, QStringLiteral("PNG")); 0217 if(!id.isEmpty()) { 0218 entry->setField(field, id); 0219 } 0220 } 0221 } 0222 } 0223 static const QRegularExpression versionRx(QLatin1String("v\\d+$")); 0224 // if the original search was not for a versioned ID, remove it 0225 if(request().key() != ArxivID || !request().value().contains(versionRx)) { 0226 QString arxiv = entry->field(QStringLiteral("arxiv")); 0227 arxiv.remove(versionRx); 0228 entry->setField(QStringLiteral("arxiv"), arxiv); 0229 } 0230 return entry; 0231 } 0232 0233 void ArxivFetcher::initXSLTHandler() { 0234 QString xsltfile = DataFileRegistry::self()->locate(QStringLiteral("arxiv2tellico.xsl")); 0235 if(xsltfile.isEmpty()) { 0236 myWarning() << "can not locate arxiv2tellico.xsl."; 0237 return; 0238 } 0239 0240 QUrl u = QUrl::fromLocalFile(xsltfile); 0241 0242 delete m_xsltHandler; 0243 m_xsltHandler = new XSLTHandler(u); 0244 if(!m_xsltHandler->isValid()) { 0245 myWarning() << "error in arxiv2tellico.xsl."; 0246 delete m_xsltHandler; 0247 m_xsltHandler = nullptr; 0248 return; 0249 } 0250 } 0251 0252 QUrl ArxivFetcher::searchURL(FetchKey key_, const QString& value_) const { 0253 QUrl u(QString::fromLatin1(ARXIV_BASE_URL)); 0254 QUrlQuery q; 0255 q.addQueryItem(QStringLiteral("start"), QString::number(m_start)); 0256 q.addQueryItem(QStringLiteral("max_results"), QString::number(ARXIV_RETURNS_PER_REQUEST)); 0257 0258 // quotes should be used if spaces are present 0259 QString value = value_; 0260 value.replace(QLatin1Char(' '), QLatin1Char('+')); 0261 // seems to have problems with dashes, too 0262 value.replace(QLatin1Char('-'), QLatin1Char('+')); 0263 0264 QString query; 0265 switch(key_) { 0266 case Title: 0267 query = QStringLiteral("ti:%1").arg(value); 0268 break; 0269 0270 case Person: 0271 query = QStringLiteral("au:%1").arg(value); 0272 break; 0273 0274 case Keyword: 0275 // keyword gets to use all the words without being quoted 0276 query = QStringLiteral("all:%1").arg(value); 0277 break; 0278 0279 case ArxivID: 0280 { 0281 // remove prefix and/or version number 0282 QString value = value_; 0283 static const QRegularExpression arxivRx(QLatin1String("^arxiv:"), QRegularExpression::CaseInsensitiveOption); 0284 static const QRegularExpression vRx(QLatin1String("v\\d+$")); 0285 value.remove(arxivRx); 0286 value.remove(vRx); 0287 query = QStringLiteral("id:%1").arg(value); 0288 } 0289 break; 0290 0291 default: 0292 myWarning() << source() << "- key not recognized:" << request().key(); 0293 return QUrl(); 0294 } 0295 q.addQueryItem(QStringLiteral("search_query"), query); 0296 u.setQuery(q); 0297 0298 // myDebug() << "url: " << u; 0299 return u; 0300 } 0301 0302 Tellico::Fetch::FetchRequest ArxivFetcher::updateRequest(Data::EntryPtr entry_) { 0303 QString id = entry_->field(QStringLiteral("arxiv")); 0304 if(!id.isEmpty()) { 0305 // remove prefix and/or version number 0306 static const QRegularExpression arxivRx(QLatin1String("^arxiv:"), QRegularExpression::CaseInsensitiveOption); 0307 static const QRegularExpression vRx(QLatin1String("v\\d+$")); 0308 id.remove(arxivRx); 0309 id.remove(vRx); 0310 return FetchRequest(Fetch::ArxivID, id); 0311 } 0312 0313 // optimistically try searching for title and rely on Collection::sameEntry() to figure things out 0314 QString t = entry_->field(QStringLiteral("title")); 0315 if(!t.isEmpty()) { 0316 return FetchRequest(Fetch::Title, t); 0317 } 0318 0319 return FetchRequest(); 0320 } 0321 0322 Tellico::Fetch::ConfigWidget* ArxivFetcher::configWidget(QWidget* parent_) const { 0323 return new ArxivFetcher::ConfigWidget(parent_, this); 0324 } 0325 0326 QString ArxivFetcher::defaultName() { 0327 return QStringLiteral("arXiv.org"); // no translation 0328 } 0329 0330 QString ArxivFetcher::defaultIcon() { 0331 return favIcon("http://arxiv.org"); 0332 } 0333 0334 ArxivFetcher::ConfigWidget::ConfigWidget(QWidget* parent_, const ArxivFetcher*) 0335 : Fetch::ConfigWidget(parent_) { 0336 QVBoxLayout* l = new QVBoxLayout(optionsWidget()); 0337 l->addWidget(new QLabel(i18n("This source has no options."), optionsWidget())); 0338 l->addStretch(); 0339 } 0340 0341 void ArxivFetcher::ConfigWidget::saveConfigHook(KConfigGroup&) { 0342 } 0343 0344 QString ArxivFetcher::ConfigWidget::preferredName() const { 0345 return ArxivFetcher::defaultName(); 0346 }