File indexing completed on 2024-05-19 05:05:35
0001 /*************************************************************************** 0002 * SPDX-License-Identifier: GPL-2.0-or-later 0003 * * 0004 * SPDX-FileCopyrightText: 2004-2022 Thomas Fischer <fischer@unix-ag.uni-kl.de> 0005 * * 0006 * This program is free software; you can redistribute it and/or modify * 0007 * it under the terms of the GNU General Public License as published by * 0008 * the Free Software Foundation; either version 2 of the License, or * 0009 * (at your option) any later version. * 0010 * * 0011 * This program is distributed in the hope that it will be useful, * 0012 * but WITHOUT ANY WARRANTY; without even the implied warranty of * 0013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * 0014 * GNU General Public License for more details. * 0015 * * 0016 * You should have received a copy of the GNU General Public License * 0017 * along with this program; if not, see <https://www.gnu.org/licenses/>. * 0018 ***************************************************************************/ 0019 0020 #include "fileimporter.h" 0021 0022 #include <QBuffer> 0023 #include <QTextStream> 0024 #include <QStringList> 0025 #include <QRegularExpression> 0026 #include <QFileInfo> 0027 0028 #include <Value> 0029 #include "fileimporterbibtex.h" 0030 #ifdef HAVE_POPPLERQT5 0031 #include "fileimporterpdf.h" 0032 #endif // HAVE_POPPLERQT5 0033 #include "fileimporterris.h" 0034 #include "fileimporterbibutils.h" 0035 #include "logging_io.h" 0036 0037 FileImporter::FileImporter(QObject *parent) 0038 : QObject(parent) 0039 { 0040 /// nothing 0041 } 0042 0043 FileImporter::~FileImporter() 0044 { 0045 /// nothing 0046 } 0047 0048 FileImporter *FileImporter::factory(const QFileInfo &fileInfo, QObject *parent) 0049 { 0050 const QString ending = fileInfo.completeSuffix().toLower(); 0051 0052 #ifdef HAVE_POPPLERQT5 0053 if (ending.endsWith(QStringLiteral("pdf"))) { 0054 return new FileImporterPDF(parent); 0055 } else 0056 #endif // HAVE_POPPLERQT5 0057 if (ending.endsWith(QStringLiteral("ris"))) { 0058 return new FileImporterRIS(parent); 0059 } else if (BibUtils::available() && ending.endsWith(QStringLiteral("isi"))) { 0060 FileImporterBibUtils *fileImporterBibUtils = new FileImporterBibUtils(parent); 0061 fileImporterBibUtils->setFormat(BibUtils::Format::ISI); 0062 return fileImporterBibUtils; 0063 } else { 0064 FileImporterBibTeX *fileImporterBibTeX = new FileImporterBibTeX(parent); 0065 fileImporterBibTeX->setCommentHandling(FileImporterBibTeX::CommentHandling::Keep); 0066 return fileImporterBibTeX; 0067 } 0068 } 0069 0070 FileImporter *FileImporter::factory(const QUrl &url, QObject *parent) 0071 { 0072 const QFileInfo fileInfo(url.fileName()); 0073 return factory(fileInfo, parent); 0074 } 0075 0076 File *FileImporter::fromString(const QString &text) 0077 { 0078 if (text.isEmpty()) { 0079 qCWarning(LOG_KBIBTEX_IO) << "Cannot create File object from empty string"; 0080 return nullptr; 0081 } 0082 0083 QBuffer buffer; 0084 buffer.open(QIODevice::WriteOnly); 0085 buffer.write(text.toUtf8()); 0086 buffer.close(); 0087 0088 buffer.open(QIODevice::ReadOnly); 0089 File *result = load(&buffer); 0090 if (result == nullptr) 0091 qCWarning(LOG_KBIBTEX_IO) << "Creating File object from" << buffer.size() << "Bytes of data failed"; 0092 buffer.close(); 0093 0094 return result; 0095 } 0096 0097 Person *FileImporter::splitName(const QString &name) 0098 { 0099 // TODO Merge with FileImporterBibTeX::personFromString and FileImporterBibTeX::contextSensitiveSplit 0100 QString firstName; 0101 QString lastName; 0102 QString suffix; 0103 0104 if (!name.contains(QLatin1Char(','))) { 0105 static const QRegularExpression splittingRegExp(QStringLiteral("[ ]+")); 0106 const QStringList segments = name.split(splittingRegExp); 0107 0108 /** PubMed uses a special writing style for names, where the last name is followed by 0109 * single capital letters, each being the first letter of each first name 0110 * So, check how many single capital letters are at the end of the given segment list */ 0111 int singleCapitalLettersCounter = 0; 0112 int p = segments.count() - 1; 0113 while (segments[p].length() == 1 && segments[p][0].isUpper()) { 0114 --p; 0115 ++singleCapitalLettersCounter; 0116 } 0117 0118 if (singleCapitalLettersCounter > 0) { 0119 /** This is a special case for names from PubMed, which are formatted like "Fischer T A" 0120 * all segment values until the first single letter segment are last name parts */ 0121 for (int i = 0; i < p; ++i) 0122 lastName.append(segments[i]).append(QStringLiteral(" ")); 0123 lastName.append(segments[p]); 0124 /// Single letter segments are first name parts 0125 for (int i = p + 1; i < segments.count() - 1; ++i) 0126 firstName.append(segments[i]).append(QStringLiteral(" ")); 0127 firstName.append(segments[segments.count() - 1]); 0128 } else { 0129 int from = segments.count() - 1; 0130 if (looksLikeSuffix(segments[from])) { 0131 suffix = segments[from]; 0132 --from; 0133 } 0134 lastName = segments[from]; ///< Initialize last name with last segment 0135 /// Check for lower case parts of the last name such as "van", "von", "de", ... 0136 while (from > 0) { 0137 if (segments[from - 1].compare(segments[from - 1].toLower()) != 0) 0138 break; 0139 --from; 0140 lastName.prepend(QStringLiteral(" ")); 0141 lastName.prepend(segments[from]); 0142 } 0143 0144 if (from > 0) { 0145 firstName = *segments.begin(); /// First name initialized with first segment 0146 for (QStringList::ConstIterator it = ++segments.begin(); from > 1; ++it, --from) { 0147 firstName.append(QStringLiteral(" ")); 0148 firstName.append(*it); 0149 } 0150 } 0151 } 0152 } else { 0153 const QStringList segments = name.split(QStringLiteral(",")); 0154 /// segments.count() must be >=2 0155 if (segments.count() == 2) { 0156 /// Most probably "Smith, Adam" 0157 lastName = segments[0].trimmed(); 0158 firstName = segments[1].trimmed(); 0159 } else if (segments.count() == 3 && looksLikeSuffix(segments[2])) { 0160 /// Most probably "Smith, Adam, Jr." 0161 lastName = segments[0].trimmed(); 0162 firstName = segments[1].trimmed(); 0163 suffix = segments[2].trimmed(); 0164 } else 0165 qCWarning(LOG_KBIBTEX_IO) << "Too many commas in name:" << name; 0166 } 0167 0168 return new Person(firstName, lastName, suffix); 0169 } 0170 0171 bool FileImporter::looksLikeSuffix(const QString &suffix) 0172 { 0173 const QString normalizedSuffix = suffix.trimmed().toLower(); 0174 return normalizedSuffix == QStringLiteral("jr") 0175 || normalizedSuffix == QStringLiteral("jr.") 0176 || normalizedSuffix == QStringLiteral("sr") 0177 || normalizedSuffix == QStringLiteral("sr.") 0178 || normalizedSuffix == QStringLiteral("ii") 0179 || normalizedSuffix == QStringLiteral("iii") 0180 || normalizedSuffix == QStringLiteral("iv"); 0181 } 0182 0183 // #include "fileimporter.moc"