File indexing completed on 2024-05-19 05:05:35

0001 /***************************************************************************
0002  *   SPDX-License-Identifier: GPL-2.0-or-later
0003  *                                                                         *
0004  *   SPDX-FileCopyrightText: 2004-2022 Thomas Fischer <fischer@unix-ag.uni-kl.de>
0005  *                                                                         *
0006  *   This program is free software; you can redistribute it and/or modify  *
0007  *   it under the terms of the GNU General Public License as published by  *
0008  *   the Free Software Foundation; either version 2 of the License, or     *
0009  *   (at your option) any later version.                                   *
0010  *                                                                         *
0011  *   This program is distributed in the hope that it will be useful,       *
0012  *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
0013  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
0014  *   GNU General Public License for more details.                          *
0015  *                                                                         *
0016  *   You should have received a copy of the GNU General Public License     *
0017  *   along with this program; if not, see <https://www.gnu.org/licenses/>. *
0018  ***************************************************************************/
0019 
0020 #include "fileimporter.h"
0021 
0022 #include <QBuffer>
0023 #include <QTextStream>
0024 #include <QStringList>
0025 #include <QRegularExpression>
0026 #include <QFileInfo>
0027 
0028 #include <Value>
0029 #include "fileimporterbibtex.h"
0030 #ifdef HAVE_POPPLERQT5
0031 #include "fileimporterpdf.h"
0032 #endif // HAVE_POPPLERQT5
0033 #include "fileimporterris.h"
0034 #include "fileimporterbibutils.h"
0035 #include "logging_io.h"
0036 
0037 FileImporter::FileImporter(QObject *parent)
0038         : QObject(parent)
0039 {
0040     /// nothing
0041 }
0042 
0043 FileImporter::~FileImporter()
0044 {
0045     /// nothing
0046 }
0047 
0048 FileImporter *FileImporter::factory(const QFileInfo &fileInfo, QObject *parent)
0049 {
0050     const QString ending = fileInfo.completeSuffix().toLower();
0051 
0052 #ifdef HAVE_POPPLERQT5
0053     if (ending.endsWith(QStringLiteral("pdf"))) {
0054         return new FileImporterPDF(parent);
0055     } else
0056 #endif // HAVE_POPPLERQT5
0057         if (ending.endsWith(QStringLiteral("ris"))) {
0058             return new FileImporterRIS(parent);
0059         } else if (BibUtils::available() && ending.endsWith(QStringLiteral("isi"))) {
0060             FileImporterBibUtils *fileImporterBibUtils = new FileImporterBibUtils(parent);
0061             fileImporterBibUtils->setFormat(BibUtils::Format::ISI);
0062             return fileImporterBibUtils;
0063         } else {
0064             FileImporterBibTeX *fileImporterBibTeX = new FileImporterBibTeX(parent);
0065             fileImporterBibTeX->setCommentHandling(FileImporterBibTeX::CommentHandling::Keep);
0066             return fileImporterBibTeX;
0067         }
0068 }
0069 
0070 FileImporter *FileImporter::factory(const QUrl &url, QObject *parent)
0071 {
0072     const QFileInfo fileInfo(url.fileName());
0073     return factory(fileInfo, parent);
0074 }
0075 
0076 File *FileImporter::fromString(const QString &text)
0077 {
0078     if (text.isEmpty()) {
0079         qCWarning(LOG_KBIBTEX_IO) << "Cannot create File object from empty string";
0080         return nullptr;
0081     }
0082 
0083     QBuffer buffer;
0084     buffer.open(QIODevice::WriteOnly);
0085     buffer.write(text.toUtf8());
0086     buffer.close();
0087 
0088     buffer.open(QIODevice::ReadOnly);
0089     File *result = load(&buffer);
0090     if (result == nullptr)
0091         qCWarning(LOG_KBIBTEX_IO) << "Creating File object from" << buffer.size() << "Bytes of data failed";
0092     buffer.close();
0093 
0094     return result;
0095 }
0096 
0097 Person *FileImporter::splitName(const QString &name)
0098 {
0099     // TODO Merge with FileImporterBibTeX::personFromString and FileImporterBibTeX::contextSensitiveSplit
0100     QString firstName;
0101     QString lastName;
0102     QString suffix;
0103 
0104     if (!name.contains(QLatin1Char(','))) {
0105         static const QRegularExpression splittingRegExp(QStringLiteral("[ ]+"));
0106         const QStringList segments = name.split(splittingRegExp);
0107 
0108         /** PubMed uses a special writing style for names, where the last name is followed by
0109           * single capital letters, each being the first letter of each first name
0110           * So, check how many single capital letters are at the end of the given segment list */
0111         int singleCapitalLettersCounter = 0;
0112         int p = segments.count() - 1;
0113         while (segments[p].length() == 1 && segments[p][0].isUpper()) {
0114             --p;
0115             ++singleCapitalLettersCounter;
0116         }
0117 
0118         if (singleCapitalLettersCounter > 0) {
0119             /** This is a special case for names from PubMed, which are formatted like "Fischer T A"
0120               * all segment values until the first single letter segment are last name parts */
0121             for (int i = 0; i < p; ++i)
0122                 lastName.append(segments[i]).append(QStringLiteral(" "));
0123             lastName.append(segments[p]);
0124             /// Single letter segments are first name parts
0125             for (int i = p + 1; i < segments.count() - 1; ++i)
0126                 firstName.append(segments[i]).append(QStringLiteral(" "));
0127             firstName.append(segments[segments.count() - 1]);
0128         } else {
0129             int from = segments.count() - 1;
0130             if (looksLikeSuffix(segments[from])) {
0131                 suffix = segments[from];
0132                 --from;
0133             }
0134             lastName = segments[from]; ///< Initialize last name with last segment
0135             /// Check for lower case parts of the last name such as "van", "von", "de", ...
0136             while (from > 0) {
0137                 if (segments[from - 1].compare(segments[from - 1].toLower()) != 0)
0138                     break;
0139                 --from;
0140                 lastName.prepend(QStringLiteral(" "));
0141                 lastName.prepend(segments[from]);
0142             }
0143 
0144             if (from > 0) {
0145                 firstName = *segments.begin(); /// First name initialized with first segment
0146                 for (QStringList::ConstIterator it = ++segments.begin(); from > 1; ++it, --from) {
0147                     firstName.append(QStringLiteral(" "));
0148                     firstName.append(*it);
0149                 }
0150             }
0151         }
0152     } else {
0153         const QStringList segments = name.split(QStringLiteral(","));
0154         /// segments.count() must be >=2
0155         if (segments.count() == 2) {
0156             /// Most probably "Smith, Adam"
0157             lastName = segments[0].trimmed();
0158             firstName = segments[1].trimmed();
0159         } else if (segments.count() == 3 && looksLikeSuffix(segments[2])) {
0160             /// Most probably "Smith, Adam, Jr."
0161             lastName = segments[0].trimmed();
0162             firstName = segments[1].trimmed();
0163             suffix = segments[2].trimmed();
0164         } else
0165             qCWarning(LOG_KBIBTEX_IO) << "Too many commas in name:" << name;
0166     }
0167 
0168     return new Person(firstName, lastName, suffix);
0169 }
0170 
0171 bool FileImporter::looksLikeSuffix(const QString &suffix)
0172 {
0173     const QString normalizedSuffix = suffix.trimmed().toLower();
0174     return normalizedSuffix == QStringLiteral("jr")
0175            || normalizedSuffix == QStringLiteral("jr.")
0176            || normalizedSuffix == QStringLiteral("sr")
0177            || normalizedSuffix == QStringLiteral("sr.")
0178            || normalizedSuffix == QStringLiteral("ii")
0179            || normalizedSuffix == QStringLiteral("iii")
0180            || normalizedSuffix == QStringLiteral("iv");
0181 }
0182 
0183 // #include "fileimporter.moc"