File indexing completed on 2024-05-19 05:05:37
0001 /*************************************************************************** 0002 * SPDX-License-Identifier: GPL-2.0-or-later 0003 * * 0004 * SPDX-FileCopyrightText: 2004-2023 Thomas Fischer <fischer@unix-ag.uni-kl.de> 0005 * * 0006 * This program is free software; you can redistribute it and/or modify * 0007 * it under the terms of the GNU General Public License as published by * 0008 * the Free Software Foundation; either version 2 of the License, or * 0009 * (at your option) any later version. * 0010 * * 0011 * This program is distributed in the hope that it will be useful, * 0012 * but WITHOUT ANY WARRANTY; without even the implied warranty of * 0013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * 0014 * GNU General Public License for more details. * 0015 * * 0016 * You should have received a copy of the GNU General Public License * 0017 * along with this program; if not, see <https://www.gnu.org/licenses/>. * 0018 ***************************************************************************/ 0019 0020 #include "fileimporterris.h" 0021 0022 #include <QIODevice> 0023 #include <QVector> 0024 #include <QTextStream> 0025 #include <QRegularExpression> 0026 #include <QCoreApplication> 0027 #include <QStringList> 0028 0029 #include <Preferences> 0030 #include <KBibTeX> 0031 #include <Entry> 0032 #include <Value> 0033 #include "fileexporter.h" 0034 #include "fileimporter_p.h" 0035 #include "logging_io.h" 0036 0037 #define appendValue(entry, fieldname, newvalue) { Value value = (entry)->value((fieldname)); value.append((newvalue)); (entry)->insert((fieldname), value); } 0038 #define removeDuplicates(entry, fieldname) { Value value = (entry)->value((fieldname)); if (!(value).isEmpty()) removeDuplicateValueItems((value)); if (!(value).isEmpty()) (entry)->insert((fieldname), value); } 0039 0040 class FileImporterRIS::FileImporterRISPrivate 0041 { 0042 public: 0043 FileImporterRIS *parent; 0044 int referenceCounter; 0045 bool cancelFlag; 0046 bool protectCasing; 0047 0048 typedef struct { 0049 QString key; 0050 QString value; 0051 } 0052 RISitem; 0053 typedef QVector<RISitem> RISitemList; 0054 0055 FileImporterRISPrivate(FileImporterRIS *_parent) 0056 : parent(_parent), referenceCounter(0), cancelFlag(false), protectCasing(false) { 0057 /// nothing 0058 } 0059 0060 RISitemList readElement(QTextStream &textStream) { 0061 RISitemList result; 0062 QString line = textStream.readLine(); 0063 while (!line.startsWith(QStringLiteral("TY - ")) && !textStream.atEnd()) 0064 line = textStream.readLine(); 0065 if (textStream.atEnd()) 0066 return result; 0067 0068 QString key, value; 0069 while (!line.startsWith(QStringLiteral("ER -")) && !textStream.atEnd()) { 0070 if (line.mid(2, 3) == QStringLiteral(" -")) { 0071 if (!value.isEmpty()) { 0072 RISitem item; 0073 item.key = key; 0074 item.value = value; 0075 result.append(item); 0076 } 0077 0078 key = line.left(2); 0079 value = line.mid(6).simplified(); 0080 } else { 0081 line = line.simplified(); 0082 if (line.length() > 1) { 0083 /// multi-line field are joined to one long line 0084 value += QLatin1Char(' ') + line; 0085 } 0086 } 0087 0088 line = textStream.readLine(); 0089 } 0090 if (!line.startsWith(QStringLiteral("ER -")) && textStream.atEnd()) { 0091 qCWarning(LOG_KBIBTEX_IO) << "Expected that entry that starts with 'TY' ends with 'ER' but instead met end of file"; 0092 /// Instead of an 'emit' ... 0093 #if QT_VERSION < QT_VERSION_CHECK(6, 5, 0) 0094 QMetaObject::invokeMethod(parent, "message", Qt::DirectConnection, QGenericReturnArgument(), Q_ARG(FileImporter::MessageSeverity, MessageSeverity::Warning), Q_ARG(QString, QStringLiteral("Expected that entry that starts with 'TY' ends with 'ER' but instead met end of file"))); 0095 #else // QT_VERSION >= QT_VERSION_CHECK(6, 5, 0) 0096 QMetaObject::invokeMethod(parent, "message", Qt::DirectConnection, QMetaMethodReturnArgument(), Q_ARG(FileImporter::MessageSeverity, MessageSeverity::Warning), Q_ARG(QString, QStringLiteral("Expected that entry that starts with 'TY' ends with 'ER' but instead met end of file"))); 0097 #endif 0098 } 0099 if (!value.isEmpty()) { 0100 RISitem item; 0101 item.key = key; 0102 item.value = value; 0103 result.append(item); 0104 } 0105 0106 return result; 0107 } 0108 0109 inline QString optionallyProtectCasing(const QString &text) const { 0110 if (protectCasing) 0111 return QLatin1Char('{') + text + QLatin1Char('}'); 0112 else 0113 return text; 0114 } 0115 0116 Element *nextElement(QTextStream &textStream) { 0117 RISitemList list = readElement(textStream); 0118 if (list.empty()) 0119 return nullptr; 0120 0121 QString entryType = Entry::etMisc; 0122 Entry *entry = new Entry(entryType, QString(QStringLiteral("RIS_%1")).arg(referenceCounter++)); 0123 QString journalName, startPage, endPage, date; 0124 int fieldCounter = 0; 0125 0126 for (RISitemList::iterator it = list.begin(); it != list.end(); ++it) { 0127 if ((*it).key == QStringLiteral("TY")) { 0128 if ((*it).value.startsWith(QStringLiteral("BOOK")) || (*it).value.startsWith(QStringLiteral("SER"))) 0129 entryType = Entry::etBook; 0130 else if ((*it).value.startsWith(QStringLiteral("CHAP"))) 0131 entryType = Entry::etInBook; 0132 else if ((*it).value.startsWith(QStringLiteral("CONF"))) 0133 entryType = Entry::etInProceedings; 0134 else if ((*it).value.startsWith(QStringLiteral("JFULL")) || (*it).value.startsWith(QStringLiteral("JOUR")) || (*it).value.startsWith(QStringLiteral("MGZN"))) 0135 entryType = Entry::etArticle; 0136 else if ((*it).value.startsWith(QStringLiteral("RPRT"))) 0137 entryType = Entry::etTechReport; 0138 else if ((*it).value.startsWith(QStringLiteral("THES"))) 0139 entryType = Entry::etPhDThesis; // FIXME what about etMastersThesis? 0140 else if ((*it).value.startsWith(QStringLiteral("UNPB"))) 0141 entryType = Entry::etUnpublished; 0142 entry->setType(entryType); 0143 } else if ((*it).key == QStringLiteral("AU") || (*it).key == QStringLiteral("A1")) { 0144 Person *person = splitName((*it).value); 0145 if (person != nullptr) 0146 appendValue(entry, Entry::ftAuthor, QSharedPointer<Person>(person)); 0147 } else if ((*it).key == QStringLiteral("ED") || (*it).key == QStringLiteral("A2")) { 0148 Person *person = splitName((*it).value); 0149 if (person != nullptr) 0150 appendValue(entry, Entry::ftEditor, QSharedPointer<Person>(person)); 0151 } else if ((*it).key == QStringLiteral("ID")) { 0152 entry->setId((*it).value); 0153 } else if ((*it).key == QStringLiteral("Y1") || (*it).key == QStringLiteral("PY")) { 0154 date = (*it).value; 0155 } else if ((*it).key == QStringLiteral("Y2")) { 0156 if (date.isEmpty()) 0157 date = (*it).value; 0158 } else if ((*it).key == QStringLiteral("AB") || (*it).key == QStringLiteral("N2")) { 0159 appendValue(entry, Entry::ftAbstract, QSharedPointer<PlainText>(new PlainText((*it).value))); 0160 } else if ((*it).key == QStringLiteral("N1")) { 0161 appendValue(entry, Entry::ftNote, QSharedPointer<PlainText>(new PlainText((*it).value))); 0162 } else if ((*it).key == QStringLiteral("KW")) { 0163 QString text = (*it).value; 0164 const QRegularExpression splitRegExp(text.contains(QStringLiteral(";")) ? QStringLiteral("\\s*[;\\n]\\s*") : (text.contains(QStringLiteral(",")) ? QStringLiteral("\\s*[,\\n]\\s*") : QStringLiteral("\\n"))); 0165 #if QT_VERSION >= 0x050e00 0166 QStringList newKeywords = text.split(splitRegExp, Qt::SkipEmptyParts); 0167 #else // QT_VERSION < 0x050e00 0168 QStringList newKeywords = text.split(splitRegExp, QString::SkipEmptyParts); 0169 #endif // QT_VERSION >= 0x050e00 0170 for (QStringList::Iterator it = newKeywords.begin(); it != newKeywords.end(); ++it) 0171 appendValue(entry, Entry::ftKeywords, QSharedPointer<Keyword>(new Keyword(*it))); 0172 } else if ((*it).key == QStringLiteral("TI") || (*it).key == QStringLiteral("T1")) { 0173 appendValue(entry, Entry::ftTitle, QSharedPointer<PlainText>(new PlainText(optionallyProtectCasing((*it).value)))); 0174 } else if ((*it).key == QStringLiteral("T3")) { 0175 appendValue(entry, Entry::ftSeries, QSharedPointer<PlainText>(new PlainText((*it).value))); 0176 } else if ((*it).key == QStringLiteral("JO") || (*it).key == QStringLiteral("J1") || (*it).key == QStringLiteral("J2")) { 0177 if (journalName.isEmpty()) 0178 journalName = (*it).value; 0179 } else if ((*it).key == QStringLiteral("JF") || (*it).key == QStringLiteral("JA")) { 0180 journalName = (*it).value; 0181 } else if ((*it).key == QStringLiteral("VL")) { 0182 appendValue(entry, Entry::ftVolume, QSharedPointer<PlainText>(new PlainText((*it).value))); 0183 } else if ((*it).key == QStringLiteral("CP")) { 0184 appendValue(entry, Entry::ftChapter, QSharedPointer<PlainText>(new PlainText((*it).value))); 0185 } else if ((*it).key == QStringLiteral("IS")) { 0186 appendValue(entry, Entry::ftNumber, QSharedPointer<PlainText>(new PlainText((*it).value))); 0187 } else if ((*it).key == QStringLiteral("DO") || (*it).key == QStringLiteral("M3")) { 0188 const QRegularExpressionMatch doiRegExpMatch = KBibTeX::doiRegExp.match((*it).value); 0189 if (doiRegExpMatch.hasMatch()) 0190 appendValue(entry, Entry::ftDOI, QSharedPointer<VerbatimText>(new VerbatimText(doiRegExpMatch.captured(QStringLiteral("doi"))))); 0191 } else if ((*it).key == QStringLiteral("PB")) { 0192 appendValue(entry, Entry::ftPublisher, QSharedPointer<PlainText>(new PlainText((*it).value))); 0193 } else if ((*it).key == QStringLiteral("IN")) { 0194 appendValue(entry, Entry::ftSchool, QSharedPointer<PlainText>(new PlainText((*it).value))); 0195 } else if ((*it).key == QStringLiteral("SN")) { 0196 const QString fieldName = entryType == Entry::etBook || entryType == Entry::etInBook ? Entry::ftISBN : Entry::ftISSN; 0197 appendValue(entry, fieldName, QSharedPointer<PlainText>(new PlainText((*it).value))); 0198 } else if ((*it).key == QStringLiteral("CY")) { 0199 appendValue(entry, Entry::ftLocation, QSharedPointer<PlainText>(new PlainText((*it).value))); 0200 } else if ((*it).key == QStringLiteral("AD")) { 0201 appendValue(entry, Entry::ftAddress, QSharedPointer<PlainText>(new PlainText((*it).value))); 0202 } else if ((*it).key == QStringLiteral("L1") || (*it).key == QStringLiteral("L2") || (*it).key == QStringLiteral("L3") || (*it).key == QStringLiteral("UR")) { 0203 QString fieldValue = (*it).value; 0204 fieldValue.replace(QStringLiteral("<Go to ISI>://"), QStringLiteral("isi://")); 0205 const QRegularExpressionMatch doiRegExpMatch = KBibTeX::doiRegExp.match(fieldValue); 0206 const QRegularExpressionMatch urlRegExpMatch = KBibTeX::urlRegExp.match(fieldValue); 0207 const QString fieldName = doiRegExpMatch.hasMatch() ? Entry::ftDOI : (KBibTeX::urlRegExp.match((*it).value).hasMatch() ? Entry::ftUrl : (Preferences::instance().bibliographySystem() == Preferences::BibliographySystem::BibTeX ? Entry::ftLocalFile : Entry::ftFile)); 0208 fieldValue = doiRegExpMatch.hasMatch() ? doiRegExpMatch.captured(QStringLiteral("doi")) : (urlRegExpMatch.hasMatch() ? urlRegExpMatch.captured() : fieldValue); 0209 if (fieldValue.startsWith(QStringLiteral("file:///"))) fieldValue = fieldValue.mid(7); 0210 appendValue(entry, fieldName, QSharedPointer<VerbatimText>(new VerbatimText(fieldValue))); 0211 } else if ((*it).key == QStringLiteral("SP")) { 0212 startPage = (*it).value; 0213 } else if ((*it).key == QStringLiteral("EP")) { 0214 endPage = (*it).value; 0215 } else { 0216 const QString fieldName = QString(QStringLiteral("RISfield_%1_%2")).arg(fieldCounter++).arg((*it).key.left(2)); 0217 appendValue(entry, fieldName, QSharedPointer<PlainText>(new PlainText((*it).value))); 0218 } 0219 } 0220 0221 if (!journalName.isEmpty()) { 0222 const QString fieldName = entryType == Entry::etInBook || entryType == Entry::etInProceedings ? Entry::ftBookTitle : Entry::ftJournal; 0223 Value value = entry->value(fieldName); 0224 value.append(QSharedPointer<PlainText>(new PlainText(optionallyProtectCasing(journalName)))); 0225 entry->insert(fieldName, value); 0226 } 0227 0228 if (!startPage.isEmpty() || !endPage.isEmpty()) { 0229 QString page; 0230 if (startPage.isEmpty()) 0231 page = endPage; 0232 else if (endPage.isEmpty()) 0233 page = startPage; 0234 else 0235 page = startPage + QChar(0x2013) + endPage; 0236 0237 Value value; 0238 value.append(QSharedPointer<PlainText>(new PlainText(page))); 0239 entry->insert(Entry::ftPages, value); 0240 } 0241 0242 #if QT_VERSION >= 0x050e00 0243 QStringList dateFragments = date.split(QStringLiteral("/"), Qt::SkipEmptyParts); 0244 #else // QT_VERSION < 0x050e00 0245 QStringList dateFragments = date.split(QStringLiteral("/"), QString::SkipEmptyParts); 0246 #endif // QT_VERSION >= 0x050e00 0247 if (dateFragments.count() > 0) { 0248 bool ok; 0249 int year = dateFragments[0].toInt(&ok); 0250 if (ok && year > 1000 && year < 3000) { 0251 Value value = entry->value(Entry::ftYear); 0252 value.append(QSharedPointer<PlainText>(new PlainText(QString::number(year)))); 0253 entry->insert(Entry::ftYear, value); 0254 } else { 0255 qCWarning(LOG_KBIBTEX_IO) << "Invalid year: " << dateFragments[0]; 0256 /// Instead of an 'emit' ... 0257 #if QT_VERSION < QT_VERSION_CHECK(6, 5, 0) 0258 QMetaObject::invokeMethod(parent, "message", Qt::DirectConnection, QGenericReturnArgument(), Q_ARG(FileImporter::MessageSeverity, MessageSeverity::Warning), Q_ARG(QString, QString(QStringLiteral("Invalid year: '%1'")).arg(dateFragments[0]))); 0259 #else // QT_VERSION >= QT_VERSION_CHECK(6, 5, 0) 0260 QMetaObject::invokeMethod(parent, "message", Qt::DirectConnection, QMetaMethodReturnArgument(), Q_ARG(FileImporter::MessageSeverity, MessageSeverity::Warning), Q_ARG(QString, QString(QStringLiteral("Invalid year: '%1'")).arg(dateFragments[0]))); 0261 #endif 0262 } 0263 } 0264 if (dateFragments.count() > 1) { 0265 bool ok; 0266 int month = FileExporter::monthStringToNumber(dateFragments[1], &ok); 0267 if (ok && month >= 1 && month <= 12) { 0268 Value value = entry->value(Entry::ftMonth); 0269 value.append(QSharedPointer<MacroKey>(new MacroKey(KBibTeX::MonthsTriple[month - 1]))); 0270 entry->insert(Entry::ftMonth, value); 0271 } else { 0272 qCWarning(LOG_KBIBTEX_IO) << "Invalid month: " << dateFragments[1]; 0273 /// Instead of an 'emit' ... 0274 #if QT_VERSION < QT_VERSION_CHECK(6, 5, 0) 0275 QMetaObject::invokeMethod(parent, "message", Qt::DirectConnection, QGenericReturnArgument(), Q_ARG(FileImporter::MessageSeverity, MessageSeverity::Warning), Q_ARG(QString, QString(QStringLiteral("Invalid month: '%1'")).arg(dateFragments[1]))); 0276 #else // QT_VERSION >= QT_VERSION_CHECK(6, 5, 0) 0277 QMetaObject::invokeMethod(parent, "message", Qt::DirectConnection, QMetaMethodReturnArgument(), Q_ARG(FileImporter::MessageSeverity, MessageSeverity::Warning), Q_ARG(QString, QString(QStringLiteral("Invalid month: '%1'")).arg(dateFragments[1]))); 0278 #endif 0279 } 0280 } 0281 0282 removeDuplicates(entry, Entry::ftDOI); 0283 removeDuplicates(entry, Entry::ftUrl); 0284 0285 return entry; 0286 } 0287 0288 void removeDuplicateValueItems(Value &value) { 0289 if (value.count() < 2) return; /// Values with one or no ValueItem cannot have duplicates 0290 0291 QSet<QString> uniqueStrings; 0292 for (Value::Iterator it = value.begin(); it != value.end();) { 0293 const QString itemString = PlainTextValue::text(*it); 0294 if (uniqueStrings.contains(itemString)) 0295 it = value.erase(it); 0296 else { 0297 uniqueStrings.insert(itemString); 0298 ++it; 0299 } 0300 } 0301 } 0302 }; 0303 0304 FileImporterRIS::FileImporterRIS(QObject *parent) 0305 : FileImporter(parent), d(new FileImporterRISPrivate(this)) 0306 { 0307 // nothing 0308 } 0309 0310 0311 FileImporterRIS::~FileImporterRIS() 0312 { 0313 delete d; 0314 } 0315 0316 File *FileImporterRIS::load(QIODevice *iodevice) 0317 { 0318 check_if_iodevice_invalid(iodevice); 0319 0320 d->cancelFlag = false; 0321 d->referenceCounter = 0; 0322 QTextStream textStream(iodevice); 0323 0324 File *result = new File(); 0325 while (!d->cancelFlag && !textStream.atEnd()) { 0326 Q_EMIT progress(textStream.pos(), iodevice->size()); 0327 QCoreApplication::instance()->processEvents(); 0328 Element *element = d->nextElement(textStream); 0329 if (element != nullptr) 0330 result->append(QSharedPointer<Element>(element)); 0331 QCoreApplication::instance()->processEvents(); 0332 } 0333 Q_EMIT progress(100, 100); 0334 0335 if (d->cancelFlag) { 0336 delete result; 0337 result = nullptr; 0338 } 0339 0340 iodevice->close(); 0341 0342 if (result != nullptr) 0343 result->setProperty(File::ProtectCasing, static_cast<int>(d->protectCasing ? Qt::Checked : Qt::Unchecked)); 0344 0345 return result; 0346 } 0347 0348 bool FileImporterRIS::guessCanDecode(const QString &text) 0349 { 0350 return text.indexOf(QStringLiteral("TY - ")) >= 0; 0351 } 0352 0353 void FileImporterRIS::setProtectCasing(bool protectCasing) 0354 { 0355 d->protectCasing = protectCasing; 0356 } 0357 0358 void FileImporterRIS::cancel() 0359 { 0360 d->cancelFlag = true; 0361 }