File indexing completed on 2024-11-24 04:34:32

0001 /***************************************************************************
0002  *   SPDX-License-Identifier: GPL-2.0-or-later
0003  *                                                                         *
0004  *   SPDX-FileCopyrightText: 2004-2023 Thomas Fischer <fischer@unix-ag.uni-kl.de>
0005  *                                                                         *
0006  *   This program is free software; you can redistribute it and/or modify  *
0007  *   it under the terms of the GNU General Public License as published by  *
0008  *   the Free Software Foundation; either version 2 of the License, or     *
0009  *   (at your option) any later version.                                   *
0010  *                                                                         *
0011  *   This program is distributed in the hope that it will be useful,       *
0012  *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
0013  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
0014  *   GNU General Public License for more details.                          *
0015  *                                                                         *
0016  *   You should have received a copy of the GNU General Public License     *
0017  *   along with this program; if not, see <https://www.gnu.org/licenses/>. *
0018  ***************************************************************************/
0019 
0020 #include "onlinesearchsemanticscholar.h"
0021 
0022 #include <QJsonObject>
0023 #include <QJsonArray>
0024 #include <QJsonDocument>
0025 #include <QNetworkRequest>
0026 #include <QNetworkReply>
0027 #include <QRegularExpression>
0028 #ifdef HAVE_QTWIDGETS
0029 #include <QLineEdit>
0030 #include <QFormLayout>
0031 #endif // HAVE_QTWIDGETS
0032 
0033 #ifdef HAVE_KF
0034 #include <KConfigGroup>
0035 #include <KLocalizedString>
0036 #endif // HAVE_KF
0037 
0038 #include <KBibTeX>
0039 #include <FileImporterBibTeX>
0040 #include "internalnetworkaccessmanager.h"
0041 #include "onlinesearchabstract_p.h"
0042 #include "logging_networking.h"
0043 
0044 #ifdef HAVE_QTWIDGETS
0045 class OnlineSearchSemanticScholar::OnlineSearchQueryFormSemanticScholar : public OnlineSearchAbstract::Form
0046 {
0047     Q_OBJECT
0048 
0049 private:
0050     QString configGroupName;
0051 
0052     void loadState() {
0053         KConfigGroup configGroup(d->config, configGroupName);
0054         lineEditPaperReference->setText(configGroup.readEntry(QStringLiteral("paperReference"), QString()));
0055     }
0056 
0057 public:
0058     QLineEdit *lineEditPaperReference;
0059 
0060     OnlineSearchQueryFormSemanticScholar(QWidget *widget)
0061             : OnlineSearchAbstract::Form(widget), configGroupName(QStringLiteral("Search Engine Semantic Scholar")) {
0062         QFormLayout *layout = new QFormLayout(this);
0063         layout->setContentsMargins(0, 0, 0, 0);
0064 
0065         lineEditPaperReference = new QLineEdit(this);
0066         lineEditPaperReference->setClearButtonEnabled(true);
0067         lineEditPaperReference->setFocus(Qt::TabFocusReason);
0068         layout->addRow(i18n("Paper Reference:"), lineEditPaperReference);
0069         connect(lineEditPaperReference, &QLineEdit::returnPressed, this, &OnlineSearchQueryFormSemanticScholar::returnPressed);
0070 
0071         loadState();
0072     }
0073 
0074     bool readyToStart() const override {
0075         return !lineEditPaperReference->text().isEmpty();
0076     }
0077 
0078     void copyFromEntry(const Entry &entry) override {
0079         lineEditPaperReference->setText(d->guessFreeText(entry));
0080     }
0081 
0082     void saveState() {
0083         KConfigGroup configGroup(d->config, configGroupName);
0084         configGroup.writeEntry(QStringLiteral("paperReference"), lineEditPaperReference->text());
0085         d->config->sync();
0086     }
0087 };
0088 #endif // HAVE_QTWIDGETS
0089 
0090 
0091 class OnlineSearchSemanticScholar::OnlineSearchSemanticScholarPrivate
0092 {
0093 public:
0094 #ifdef HAVE_QTWIDGETS
0095     OnlineSearchQueryFormSemanticScholar *form;
0096 #endif // HAVE_QTWIDGETS
0097 
0098     OnlineSearchSemanticScholarPrivate(OnlineSearchSemanticScholar *parent)
0099 #ifdef HAVE_QTWIDGETS
0100             : form(nullptr)
0101 #endif // HAVE_QTWIDGETS
0102     {
0103         Q_UNUSED(parent)
0104     }
0105 
0106 #ifdef HAVE_QTWIDGETS
0107     QUrl buildQueryUrl() {
0108         if (form == nullptr) {
0109             qCWarning(LOG_KBIBTEX_NETWORKING) << "Cannot build query url if no form is specified";
0110             return QUrl();
0111         }
0112 
0113         const QRegularExpressionMatch doiRegExpMatch = KBibTeX::doiRegExp.match(form->lineEditPaperReference->text());
0114         if (doiRegExpMatch.hasMatch())
0115             return QUrl(QStringLiteral("https://api.semanticscholar.org/v1/paper/") + doiRegExpMatch.captured(QStringLiteral("doi")));
0116         else {
0117             const QRegularExpressionMatch arXivRegExpMatch = KBibTeX::arXivRegExp.match(form->lineEditPaperReference->text());
0118             if (arXivRegExpMatch.hasMatch())
0119                 return QUrl(QStringLiteral("https://api.semanticscholar.org/v1/paper/arXiv:") + arXivRegExpMatch.captured(QStringLiteral("arxiv")));
0120         }
0121 
0122         return QUrl();
0123     }
0124 #endif // HAVE_QTWIDGETS
0125 
0126     QUrl buildQueryUrl(const QMap<QueryKey, QString> &query, int numResults) {
0127         Q_UNUSED(numResults)
0128 
0129         const QRegularExpressionMatch doiRegExpMatch = KBibTeX::doiRegExp.match(query[QueryKey::FreeText]);
0130         if (doiRegExpMatch.hasMatch())
0131             return QUrl(QStringLiteral("https://api.semanticscholar.org/v1/paper/") + doiRegExpMatch.captured(QStringLiteral("doi")));
0132         else {
0133             const QRegularExpressionMatch arXivRegExpMatch = KBibTeX::arXivRegExp.match(query[QueryKey::FreeText]);
0134             if (arXivRegExpMatch.hasMatch())
0135                 return QUrl(QStringLiteral("https://api.semanticscholar.org/v1/paper/arXiv:") + arXivRegExpMatch.captured(QStringLiteral("arxiv")));
0136         }
0137         return QUrl();
0138     }
0139 
0140     Entry *entryFromJsonObject(const QJsonObject &object) const {
0141         const QString title = object.value(QStringLiteral("title")).toString();
0142         const QString paperId = object.value(QStringLiteral("paperId")).toString();
0143         const int year = object.value(QStringLiteral("year")).toInt(-1);
0144         /// Basic sanity check
0145         if (title.isEmpty() || paperId.isEmpty() || year < 1700)
0146             return nullptr;
0147 
0148         Entry *entry = new Entry(Entry::etMisc, QStringLiteral("SemanticScholar:") + paperId);
0149         entry->insert(Entry::ftTitle, Value() << QSharedPointer<PlainText>(new PlainText(title)));
0150         entry->insert(QStringLiteral("x-paperId"), Value() << QSharedPointer<VerbatimText>(new VerbatimText(paperId)));
0151         entry->insert(Entry::ftYear, Value() << QSharedPointer<PlainText>(new PlainText(QString::number(year))));
0152 
0153         const QString doi = object.value(QStringLiteral("doi")).toString();
0154         const QRegularExpressionMatch doiRegExpMatch = KBibTeX::doiRegExp.match(doi);
0155         if (doiRegExpMatch.hasMatch())
0156             entry->insert(Entry::ftDOI, Value() << QSharedPointer<VerbatimText>(new VerbatimText(doiRegExpMatch.captured(QStringLiteral("doi")))));
0157 
0158         const QString arxivId = object.value(QStringLiteral("arxivId")).toString();
0159         const QRegularExpressionMatch arXivRegExpMatch = KBibTeX::arXivRegExp.match(arxivId);
0160         if (arXivRegExpMatch.hasMatch())
0161             entry->insert(QStringLiteral("eprint"), Value() << QSharedPointer<VerbatimText>(new VerbatimText(arXivRegExpMatch.captured(QStringLiteral("arxiv")))));
0162 
0163         const QJsonArray authorArray = object.value(QStringLiteral("authors")).toArray();
0164         Value authors;
0165         for (const QJsonValue &author : authorArray) {
0166             const QString name = author.toObject().value(QStringLiteral("name")).toString();
0167             if (!name.isEmpty()) {
0168                 QSharedPointer<Person> person = FileImporterBibTeX::personFromString(name);
0169                 if (!person.isNull())
0170                     authors.append(person);
0171             }
0172         }
0173         if (!authors.isEmpty())
0174             entry->insert(Entry::ftAuthor, authors);
0175 
0176         return entry;
0177     }
0178 };
0179 
0180 
0181 OnlineSearchSemanticScholar::OnlineSearchSemanticScholar(QObject *parent)
0182         : OnlineSearchAbstract(parent), d(new OnlineSearchSemanticScholarPrivate(this))
0183 {
0184     /// nothing
0185 }
0186 
0187 OnlineSearchSemanticScholar::~OnlineSearchSemanticScholar()
0188 {
0189     delete d;
0190 }
0191 
0192 #ifdef HAVE_QTWIDGETS
0193 void OnlineSearchSemanticScholar::startSearchFromForm()
0194 {
0195     m_hasBeenCanceled = false;
0196     Q_EMIT progress(curStep = 0, numSteps = 1);
0197 
0198     const QUrl url = d->buildQueryUrl();
0199     if (url.isValid()) {
0200         QNetworkRequest request(url);
0201         QNetworkReply *reply = InternalNetworkAccessManager::instance().get(request);
0202         InternalNetworkAccessManager::instance().setNetworkReplyTimeout(reply);
0203         connect(reply, &QNetworkReply::finished, this, &OnlineSearchSemanticScholar::downloadDone);
0204 
0205         d->form->saveState();
0206     } else
0207         delayedStoppedSearch(resultNoError);
0208 
0209     refreshBusyProperty();
0210 }
0211 #endif // HAVE_QTWIDGETS
0212 
0213 void OnlineSearchSemanticScholar::startSearch(const QMap<QueryKey, QString> &query, int numResults)
0214 {
0215     m_hasBeenCanceled = false;
0216     Q_EMIT progress(curStep = 0, numSteps = 1);
0217 
0218     const QUrl url = d->buildQueryUrl(query, numResults);
0219     if (url.isValid()) {
0220         QNetworkRequest request(url);
0221         QNetworkReply *reply = InternalNetworkAccessManager::instance().get(request);
0222         InternalNetworkAccessManager::instance().setNetworkReplyTimeout(reply);
0223         connect(reply, &QNetworkReply::finished, this, &OnlineSearchSemanticScholar::downloadDone);
0224     } else
0225         delayedStoppedSearch(resultNoError);
0226 
0227     refreshBusyProperty();
0228 }
0229 
0230 QString OnlineSearchSemanticScholar::label() const
0231 {
0232     return i18n("Semantic Scholar");
0233 }
0234 
0235 #ifdef HAVE_QTWIDGETS
0236 OnlineSearchAbstract::Form *OnlineSearchSemanticScholar::customWidget(QWidget *parent)
0237 {
0238     if (d->form == nullptr)
0239         d->form = new OnlineSearchSemanticScholar::OnlineSearchQueryFormSemanticScholar(parent);
0240     return d->form;
0241 }
0242 #endif // HAVE_QTWIDGETS
0243 
0244 QUrl OnlineSearchSemanticScholar::homepage() const
0245 {
0246     return QUrl(QStringLiteral("https://www.semanticscholar.org/"));
0247 }
0248 
0249 void OnlineSearchSemanticScholar::downloadDone()
0250 {
0251     Q_EMIT progress(++curStep, numSteps);
0252     QNetworkReply *reply = static_cast<QNetworkReply *>(sender());
0253 
0254     QUrl redirUrl;
0255     if (handleErrors(reply, redirUrl)) {
0256         if (redirUrl.isValid()) {
0257             /// redirection to another url
0258             ++numSteps;
0259 
0260             QNetworkRequest request(redirUrl);
0261             QNetworkReply *newReply = InternalNetworkAccessManager::instance().get(request);
0262             InternalNetworkAccessManager::instance().setNetworkReplyTimeout(newReply);
0263             connect(newReply, &QNetworkReply::finished, this, &OnlineSearchSemanticScholar::downloadDone);
0264         } else {
0265             QJsonParseError parseError;
0266             const QJsonDocument document = QJsonDocument::fromJson(reply->readAll(), &parseError);
0267             if (parseError.error == QJsonParseError::NoError) {
0268                 if (document.isObject()) {
0269                     Entry *entry = d->entryFromJsonObject(document.object());
0270                     if (entry != nullptr) {
0271                         publishEntry(QSharedPointer<Entry>(entry));
0272                         stopSearch(resultNoError);
0273                     } else {
0274                         qCWarning(LOG_KBIBTEX_NETWORKING) << "Problem with JSON data from Semantic Scholar: Data could not be interpreted as a bibliographic entry";
0275                         stopSearch(resultUnspecifiedError);
0276                     }
0277                 } else {
0278                     qCWarning(LOG_KBIBTEX_NETWORKING) << "Problem with JSON data from Semantic Scholar: Document is not an object";
0279                     stopSearch(resultUnspecifiedError);
0280                 }
0281             } else {
0282                 qCWarning(LOG_KBIBTEX_NETWORKING) << "Problem with JSON data from Semantic Scholar: " << parseError.errorString();
0283                 stopSearch(resultUnspecifiedError);
0284             }
0285         }
0286     }
0287 
0288     refreshBusyProperty();
0289 }
0290 
0291 #include "onlinesearchsemanticscholar.moc"