File indexing completed on 2024-05-19 05:05:36

0001 /***************************************************************************
0002  *   SPDX-License-Identifier: GPL-2.0-or-later
0003  *                                                                         *
0004  *   SPDX-FileCopyrightText: 2004-2023 Thomas Fischer <fischer@unix-ag.uni-kl.de>
0005  *                                                                         *
0006  *   This program is free software; you can redistribute it and/or modify  *
0007  *   it under the terms of the GNU General Public License as published by  *
0008  *   the Free Software Foundation; either version 2 of the License, or     *
0009  *   (at your option) any later version.                                   *
0010  *                                                                         *
0011  *   This program is distributed in the hope that it will be useful,       *
0012  *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
0013  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
0014  *   GNU General Public License for more details.                          *
0015  *                                                                         *
0016  *   You should have received a copy of the GNU General Public License     *
0017  *   along with this program; if not, see <https://www.gnu.org/licenses/>. *
0018  ***************************************************************************/
0019 
0020 #include "fileimporterbibtex.h"
0021 
0022 #include <QTextCodec>
0023 #include <QIODevice>
0024 #include <QRegularExpression>
0025 #include <QCoreApplication>
0026 #include <QStringList>
0027 
0028 #include <BibTeXEntries>
0029 #include <BibTeXFields>
0030 #include <Preferences>
0031 #include <File>
0032 #include <Comment>
0033 #include <Macro>
0034 #include <Preamble>
0035 #include <Entry>
0036 #include <Element>
0037 #include <Value>
0038 #include "encoder.h"
0039 #include "encoderlatex.h"
0040 #include "fileimporter_p.h"
0041 #include "logging_io.h"
0042 
0043 #define qint64toint(a) (static_cast<int>(qMax(0LL,qMin(0x7fffffffLL,(a)))))
0044 
0045 class FileImporterBibTeX::Private
0046 {
0047 private:
0048     FileImporterBibTeX *parent;
0049 
0050 public:
0051     static const QStringList keysForPersonDetection;
0052 
0053     /// Set via @see setCommentHandling
0054     CommentHandling commentHandling;
0055 
0056     enum class Token {
0057         At = 1, BracketOpen = 2, BracketClose = 3, AlphaNumText = 4, Comma = 5, Assign = 6, Doublecross = 7, EndOfFile = 0xffff, Unknown = -1
0058     };
0059 
0060     enum class CommaContainment { None, Contains };
0061 
0062     typedef struct Statistics {
0063         /// Used to determine if file prefers quotation marks over
0064         /// curly brackets or the other way around
0065         int countCurlyBrackets;
0066 
0067         int countQuotationMarks, countFirstNameFirst, countLastNameFirst;
0068         QHash<QString, int> countCommentContext;
0069         int countProtectedTitle, countUnprotectedTitle;
0070         int countSortedByIdentifier, countNotSortedByIdentifier;
0071         QString mostRecentListSeparator;
0072 
0073         Statistics()
0074                 : countCurlyBrackets(0), countQuotationMarks(0), countFirstNameFirst(0),
0075               countLastNameFirst(0), countProtectedTitle(0), countUnprotectedTitle(0),
0076               countSortedByIdentifier(0), countNotSortedByIdentifier(0)
0077         {
0078             /// nothing
0079         }
0080     } Statistics;
0081 
0082     typedef struct State {
0083         QTextStream *textStream;
0084         /// Low-level character operations
0085         QChar prevChar, nextChar;
0086         /// Current line
0087         int lineNo;
0088         QString prevLine, currentLine;
0089         QSet<QString> knownElementIds;
0090 
0091         State(QTextStream *_textStream)
0092                 : textStream(_textStream), lineNo(1)
0093         {
0094             /// nothing
0095         }
0096     } State;
0097 
0098     Private(FileImporterBibTeX *p)
0099             : parent(p), commentHandling(CommentHandling::Ignore)
0100     {
0101         // TODO
0102     }
0103 
0104     bool readChar(State &state)
0105     {
0106         /// Memorize previous char
0107         state.prevChar = state.nextChar;
0108 
0109         if (state.textStream->atEnd()) {
0110             /// At end of data stream
0111             state.nextChar = QChar::Null;
0112             return false;
0113         }
0114 
0115         /// Read next char
0116         *state.textStream >> state.nextChar;
0117 
0118         /// Test for new line
0119         if (state.nextChar == QLatin1Char('\n')) {
0120             /// Update variables tracking line numbers and line content
0121             ++state.lineNo;
0122             state.prevLine = state.currentLine;
0123             state.currentLine.clear();
0124         } else {
0125             /// Add read char to current line
0126             state.currentLine.append(state.nextChar);
0127         }
0128 
0129         return true;
0130     }
0131 
0132     bool skipWhiteChar(State &state)
0133     {
0134         bool result = true;
0135         while ((state.nextChar.isSpace() || state.nextChar == QLatin1Char('\t') || state.nextChar == QLatin1Char('\n') || state.nextChar == QLatin1Char('\r')) && result) result = readChar(state);
0136         return result;
0137     }
0138 
0139     bool skipNewline(State &state)
0140     {
0141         if (state.nextChar == QLatin1Char('\r')) {
0142             const bool result = readChar(state);
0143             if (result && state.nextChar == QLatin1Char('\n'))
0144                 // Windows linebreak: CR LF
0145                 return readChar(state);
0146         } else if (state.nextChar == QLatin1Char('\n')) {
0147             // Linux/Unix linebreak: LF
0148             return readChar(state);
0149         }
0150         return false;
0151     }
0152 
0153     Token nextToken(State &state)
0154     {
0155         if (!skipWhiteChar(state)) {
0156             /// Some error occurred while reading from data stream
0157             return Token::EndOfFile;
0158         }
0159 
0160         Token result = Token::Unknown;
0161 
0162         switch (state.nextChar.toLatin1()) {
0163         case '@':
0164             result = Token::At;
0165             break;
0166         case '{':
0167         case '(':
0168             result = Token::BracketOpen;
0169             break;
0170         case '}':
0171         case ')':
0172             result = Token::BracketClose;
0173             break;
0174         case ',':
0175             result = Token::Comma;
0176             break;
0177         case '=':
0178             result = Token::Assign;
0179             break;
0180         case '#':
0181             result = Token::Doublecross;
0182             break;
0183         default:
0184             if (state.textStream->atEnd())
0185                 result = Token::EndOfFile;
0186         }
0187 
0188         if (state.nextChar != QLatin1Char('%')) {
0189             /// Unclean solution, but necessary for comments
0190             /// that have a percent sign as a prefix
0191             readChar(state);
0192         }
0193         return result;
0194     }
0195 
0196 // FIXME duplicate
0197     static void parsePersonList(const QString &text, Value &value, CommaContainment *comma, const int line_number, QObject *parent)
0198     {
0199         static const QString tokenAnd = QStringLiteral("and");
0200         static const QString tokenOthers = QStringLiteral("others");
0201         static QStringList tokens;
0202         contextSensitiveSplit(text, tokens);
0203 
0204         if (tokens.count() > 0) {
0205             if (tokens[0] == tokenAnd) {
0206                 qCInfo(LOG_KBIBTEX_IO) << "Person list starts with" << tokenAnd << "near line" << line_number;
0207                 if (parent != nullptr)
0208 #if QT_VERSION < QT_VERSION_CHECK(6, 5, 0)
0209                     QMetaObject::invokeMethod(parent, "message", Qt::DirectConnection, QGenericReturnArgument(), Q_ARG(FileImporter::MessageSeverity, MessageSeverity::Warning), Q_ARG(QString, QString(QStringLiteral("Person list starts with 'and' near line %1")).arg(line_number)));
0210 #else // QT_VERSION >= QT_VERSION_CHECK(6, 5, 0)
0211                     QMetaObject::invokeMethod(parent, "message", Qt::DirectConnection, QMetaMethodReturnArgument(), Q_ARG(FileImporter::MessageSeverity, MessageSeverity::Warning), Q_ARG(QString, QString(QStringLiteral("Person list starts with 'and' near line %1")).arg(line_number)));
0212 #endif
0213             } else if (tokens.count() > 1 && tokens[tokens.count() - 1] == tokenAnd) {
0214                 qCInfo(LOG_KBIBTEX_IO) << "Person list ends with" << tokenAnd << "near line" << line_number;
0215                 if (parent != nullptr)
0216 #if QT_VERSION < QT_VERSION_CHECK(6, 5, 0)
0217                     QMetaObject::invokeMethod(parent, "message", Qt::DirectConnection, QGenericReturnArgument(), Q_ARG(FileImporter::MessageSeverity, MessageSeverity::Warning), Q_ARG(QString, QString(QStringLiteral("Person list ends with 'and' near line %1")).arg(line_number)));
0218 #else // QT_VERSION >= QT_VERSION_CHECK(6, 5, 0)
0219                     QMetaObject::invokeMethod(parent, "message", Qt::DirectConnection, QMetaMethodReturnArgument(), Q_ARG(FileImporter::MessageSeverity, MessageSeverity::Warning), Q_ARG(QString, QString(QStringLiteral("Person list ends with 'and' near line %1")).arg(line_number)));
0220 #endif
0221             }
0222             if (tokens[0] == tokenOthers) {
0223                 qCInfo(LOG_KBIBTEX_IO) << "Person list starts with" << tokenOthers << "near line" << line_number;
0224                 if (parent != nullptr)
0225 #if QT_VERSION < QT_VERSION_CHECK(6, 5, 0)
0226                     QMetaObject::invokeMethod(parent, "message", Qt::DirectConnection, QGenericReturnArgument(), Q_ARG(FileImporter::MessageSeverity, MessageSeverity::Warning), Q_ARG(QString, QString(QStringLiteral("Person list starts with 'others' near line %1")).arg(line_number)));
0227 #else // QT_VERSION >= QT_VERSION_CHECK(6, 5, 0)
0228                     QMetaObject::invokeMethod(parent, "message", Qt::DirectConnection, QMetaMethodReturnArgument(), Q_ARG(FileImporter::MessageSeverity, MessageSeverity::Warning), Q_ARG(QString, QString(QStringLiteral("Person list starts with 'others' near line %1")).arg(line_number)));
0229 #endif
0230             } else if (tokens[tokens.count() - 1] == tokenOthers && (tokens.count() < 3 || tokens[tokens.count() - 2] != tokenAnd)) {
0231                 qCInfo(LOG_KBIBTEX_IO) << "Person list ends with" << tokenOthers << "but is not preceded with name and" << tokenAnd << "near line" << line_number;
0232                 if (parent != nullptr)
0233 #if QT_VERSION < QT_VERSION_CHECK(6, 5, 0)
0234                     QMetaObject::invokeMethod(parent, "message", Qt::DirectConnection, QGenericReturnArgument(), Q_ARG(FileImporter::MessageSeverity, MessageSeverity::Warning), Q_ARG(QString, QString(QStringLiteral("Person list ends with 'others' but is not preceded with name and 'and' near line %1")).arg(line_number)));
0235 #else // QT_VERSION >= QT_VERSION_CHECK(6, 5, 0)
0236                     QMetaObject::invokeMethod(parent, "message", Qt::DirectConnection, QMetaMethodReturnArgument(), Q_ARG(FileImporter::MessageSeverity, MessageSeverity::Warning), Q_ARG(QString, QString(QStringLiteral("Person list ends with 'others' but is not preceded with name and 'and' near line %1")).arg(line_number)));
0237 #endif
0238             }
0239         }
0240 
0241         int nameStart = 0;
0242         QString prevToken;
0243         for (int i = 0; i < tokens.count(); ++i) {
0244             if (tokens[i] == tokenAnd) {
0245                 if (prevToken == tokenAnd) {
0246                     qCInfo(LOG_KBIBTEX_IO) << "Two subsequent" << tokenAnd << "found in person list near line" << line_number;
0247                     if (parent != nullptr)
0248 #if QT_VERSION < QT_VERSION_CHECK(6, 5, 0)
0249                         QMetaObject::invokeMethod(parent, "message", Qt::DirectConnection, QGenericReturnArgument(), Q_ARG(FileImporter::MessageSeverity, MessageSeverity::Warning), Q_ARG(QString, QString(QStringLiteral("Two subsequent 'and' found in person list near line %1")).arg(line_number)));
0250 #else // QT_VERSION >= QT_VERSION_CHECK(6, 5, 0)
0251                         QMetaObject::invokeMethod(parent, "message", Qt::DirectConnection, QMetaMethodReturnArgument(), Q_ARG(FileImporter::MessageSeverity, MessageSeverity::Warning), Q_ARG(QString, QString(QStringLiteral("Two subsequent 'and' found in person list near line %1")).arg(line_number)));
0252 #endif
0253                 } else if (nameStart < i) {
0254                     const QSharedPointer<Person> person = personFromTokenList(tokens.mid(nameStart, i - nameStart), comma, line_number, parent);
0255                     if (!person.isNull())
0256                         value.append(person);
0257                     else {
0258                         qCInfo(LOG_KBIBTEX_IO) << "Text" << tokens.mid(nameStart, i - nameStart).join(QLatin1Char(' ')) << "does not form a name near line" << line_number;
0259                         if (parent != nullptr)
0260 #if QT_VERSION < QT_VERSION_CHECK(6, 5, 0)
0261                             QMetaObject::invokeMethod(parent, "message", Qt::DirectConnection, QGenericReturnArgument(), Q_ARG(FileImporter::MessageSeverity, MessageSeverity::Warning), Q_ARG(QString, QString(QStringLiteral("Text '%1' does not form a name near line %2")).arg(tokens.mid(nameStart, i - nameStart).join(QLatin1Char(' '))).arg(line_number)));
0262 #else // QT_VERSION >= QT_VERSION_CHECK(6, 5, 0)
0263                             QMetaObject::invokeMethod(parent, "message", Qt::DirectConnection, QMetaMethodReturnArgument(), Q_ARG(FileImporter::MessageSeverity, MessageSeverity::Warning), Q_ARG(QString, QString(QStringLiteral("Text '%1' does not form a name near line %2")).arg(tokens.mid(nameStart, i - nameStart).join(QLatin1Char(' '))).arg(line_number)));
0264 #endif
0265                     }
0266                 } else {
0267                     qCInfo(LOG_KBIBTEX_IO) << "Found" << tokenAnd << "but no name before it near line" << line_number;
0268                     if (parent != nullptr)
0269 #if QT_VERSION < QT_VERSION_CHECK(6, 5, 0)
0270                         QMetaObject::invokeMethod(parent, "message", Qt::DirectConnection, QGenericReturnArgument(), Q_ARG(FileImporter::MessageSeverity, MessageSeverity::Warning), Q_ARG(QString, QString(QStringLiteral("Found 'and' but no name before it near line %1")).arg(line_number)));
0271 #else // QT_VERSION >= QT_VERSION_CHECK(6, 5, 0)
0272                         QMetaObject::invokeMethod(parent, "message", Qt::DirectConnection, QMetaMethodReturnArgument(), Q_ARG(FileImporter::MessageSeverity, MessageSeverity::Warning), Q_ARG(QString, QString(QStringLiteral("Found 'and' but no name before it near line %1")).arg(line_number)));
0273 #endif
0274                 }
0275                 nameStart = i + 1;
0276             } else if (tokens[i] == tokenOthers) {
0277                 if (i < tokens.count() - 1) {
0278                     qCInfo(LOG_KBIBTEX_IO) << "Special word" << tokenOthers << "found before last position in person name near line" << line_number;
0279                     if (parent != nullptr)
0280 #if QT_VERSION < QT_VERSION_CHECK(6, 5, 0)
0281                         QMetaObject::invokeMethod(parent, "message", Qt::DirectConnection, QGenericReturnArgument(), Q_ARG(FileImporter::MessageSeverity, MessageSeverity::Warning), Q_ARG(QString, QString(QStringLiteral("Special word 'others' found before last position in person name near line %1")).arg(line_number)));
0282 #else // QT_VERSION >= QT_VERSION_CHECK(6, 5, 0)
0283                         QMetaObject::invokeMethod(parent, "message", Qt::DirectConnection, QMetaMethodReturnArgument(), Q_ARG(FileImporter::MessageSeverity, MessageSeverity::Warning), Q_ARG(QString, QString(QStringLiteral("Special word 'others' found before last position in person name near line %1")).arg(line_number)));
0284 #endif
0285                 } else
0286                     value.append(QSharedPointer<PlainText>(new PlainText(QStringLiteral("others"))));
0287                 nameStart = tokens.count() + 1;
0288             }
0289             prevToken = tokens[i];
0290         }
0291 
0292         if (nameStart < tokens.count()) {
0293             const QSharedPointer<Person> person = personFromTokenList(tokens.mid(nameStart), comma, line_number, parent);
0294             if (!person.isNull())
0295                 value.append(person);
0296             else {
0297                 qCInfo(LOG_KBIBTEX_IO) << "Text" << tokens.mid(nameStart).join(QLatin1Char(' ')) << "does not form a name near line" << line_number;
0298                 if (parent != nullptr)
0299 #if QT_VERSION < QT_VERSION_CHECK(6, 5, 0)
0300                     QMetaObject::invokeMethod(parent, "message", Qt::DirectConnection, QGenericReturnArgument(), Q_ARG(FileImporter::MessageSeverity, MessageSeverity::Warning), Q_ARG(QString, QString(QStringLiteral("Text '%1' does not form a name near line %2")).arg(tokens.mid(nameStart).join(QLatin1Char(' '))).arg(line_number)));
0301 #else // QT_VERSION >= QT_VERSION_CHECK(6, 5, 0)
0302                     QMetaObject::invokeMethod(parent, "message", Qt::DirectConnection, QMetaMethodReturnArgument(), Q_ARG(FileImporter::MessageSeverity, MessageSeverity::Warning), Q_ARG(QString, QString(QStringLiteral("Text '%1' does not form a name near line %2")).arg(tokens.mid(nameStart).join(QLatin1Char(' '))).arg(line_number)));
0303 #endif
0304             }
0305         }
0306     }
0307 
0308     Token readValue(Value &value, const QString &key, Statistics &statistics, State &state)
0309     {
0310         Token token = Token::Unknown;
0311         const QString iKey = key.toLower();
0312         static const QSet<QString> verbatimKeys {Entry::ftColor.toLower(), Entry::ftCrossRef.toLower(), Entry::ftXData.toLower()};
0313 
0314         do {
0315             bool isStringKey = false;
0316             const QString rawText = readString(isStringKey, statistics, state);
0317             if (rawText.isNull())
0318                 return Token::EndOfFile;
0319             QString text = EncoderLaTeX::instance().decode(rawText);
0320             /// For all entries except for abstracts and a few more 'verbatim-y' fields ...
0321             if (iKey != Entry::ftAbstract && !(iKey.startsWith(Entry::ftUrl) && !iKey.startsWith(Entry::ftUrlDate)) && !iKey.startsWith(Entry::ftLocalFile) && !iKey.startsWith(Entry::ftFile)) {
0322                 /// ... remove redundant spaces including newlines
0323                 text = bibtexAwareSimplify(text);
0324             }
0325             /// Abstracts will keep their formatting (regarding line breaks)
0326             /// as requested by Thomas Jensch via mail (20 October 2010)
0327 
0328             /// Maintain statistics on if (book) titles are protected
0329             /// by surrounding curly brackets
0330             if (!text.isEmpty() && (iKey == Entry::ftTitle || iKey == Entry::ftBookTitle)) {
0331                 if (text[0] == QLatin1Char('{') && text[text.length() - 1] == QLatin1Char('}'))
0332                     ++statistics.countProtectedTitle;
0333                 else
0334                     ++statistics.countUnprotectedTitle;
0335             }
0336 
0337             if (keysForPersonDetection.contains(iKey)) {
0338                 if (isStringKey)
0339                     value.append(QSharedPointer<MacroKey>(new MacroKey(text)));
0340                 else {
0341                     CommaContainment comma = CommaContainment::Contains;
0342                     parsePersonList(text, value, &comma, state.lineNo, parent);
0343 
0344                     /// Update statistics on name formatting
0345                     if (comma == CommaContainment::Contains)
0346                         ++statistics.countLastNameFirst;
0347                     else
0348                         ++statistics.countFirstNameFirst;
0349                 }
0350             } else if (iKey == Entry::ftPages) {
0351                 static const QRegularExpression rangeInAscii(QStringLiteral("\\s*--?\\s*"));
0352                 text.replace(rangeInAscii, QChar(0x2013));
0353                 if (isStringKey)
0354                     value.append(QSharedPointer<MacroKey>(new MacroKey(text)));
0355                 else
0356                     value.append(QSharedPointer<PlainText>(new PlainText(text)));
0357             } else if ((iKey.startsWith(Entry::ftUrl) && !iKey.startsWith(Entry::ftUrlDate)) || iKey.startsWith(Entry::ftLocalFile) || iKey.startsWith(Entry::ftFile) || iKey == QStringLiteral("ee") || iKey == QStringLiteral("biburl")) {
0358                 if (isStringKey)
0359                     value.append(QSharedPointer<MacroKey>(new MacroKey(text)));
0360                 else {
0361                     /// Assumption: in fields like Url or LocalFile, file names are separated by ;
0362                     static const QRegularExpression semicolonSpace = QRegularExpression(QStringLiteral("[;]\\s*"));
0363 #if QT_VERSION >= 0x050e00
0364                     const QStringList fileList = rawText.split(semicolonSpace, Qt::SkipEmptyParts);
0365 #else // QT_VERSION < 0x050e00
0366                     const QStringList fileList = rawText.split(semicolonSpace, QString::SkipEmptyParts);
0367 #endif // QT_VERSION >= 0x050e00
0368                     for (QString filename : fileList) {
0369                         QString comment;
0370                         bool hasComment = false; ///< need to have extra flag for comment, as even an empty comment counts as comment
0371                         if (iKey == Entry::ftFile) {
0372                             /// Check 'file' field for a JabRef-specific formatting, extract filename
0373                             /// Example of JabRef-specific value:
0374                             ///     Some optional text:path/to/file\_name.pdf:PDF
0375                             /// Regular expression will try to extract filename, then decode some LaTeX-isms
0376                             /// to get  path/to/file_name.pdf  for in above example
0377                             static const QRegularExpression jabrefFileRegExp(QStringLiteral("^([^:]*):(.*?):([A-Z]+|pdf)$"));
0378                             const QRegularExpressionMatch jabrefFileRegExpMatch = jabrefFileRegExp.match(filename);
0379                             if (jabrefFileRegExpMatch.hasMatch()) {
0380                                 hasComment = true;
0381                                 comment =  EncoderLaTeX::instance().decode(jabrefFileRegExpMatch.captured(1));
0382                                 filename =  EncoderLaTeX::instance().decode(jabrefFileRegExpMatch.captured(2));
0383 
0384                                 /// Furthermore, if the file came from Windows, drive letters may have been written as follows:
0385                                 ///    C$\backslash$:/Users/joedoe/filename.pdf
0386                                 static const QRegularExpression windowsDriveBackslashRegExp(QStringLiteral("^([A-Z])\\$\\\\backslash\\$(:.*)$"));
0387                                 const QRegularExpressionMatch windowsDriveBackslashRegExpMatch = windowsDriveBackslashRegExp.match(filename);
0388                                 if (windowsDriveBackslashRegExpMatch.hasMatch()) {
0389                                     filename = windowsDriveBackslashRegExpMatch.captured(1) + windowsDriveBackslashRegExpMatch.captured(2);
0390                                 } else if (filename.startsWith(QStringLiteral("home/"))) {
0391                                     /// If filename is a relative path but by name looks almost like it should be an absolute path
0392                                     /// (starting with some suspicious strings), prepend a slash
0393                                     filename.prepend(QLatin1Char('/'));
0394                                 }
0395                             }
0396                         }
0397 
0398                         VerbatimText *verbatimText = new VerbatimText(filename);
0399                         if (hasComment)
0400                             verbatimText->setComment(comment);
0401                         value.append(QSharedPointer<VerbatimText>(verbatimText));
0402                     }
0403                 }
0404             } else if (iKey == Entry::ftMonth) {
0405                 if (isStringKey) {
0406                     static const QRegularExpression monthThreeChars(QStringLiteral("^[a-z]{3}"), QRegularExpression::CaseInsensitiveOption);
0407                     if (monthThreeChars.match(text).hasMatch())
0408                         text = text.left(3).toLower();
0409                     value.append(QSharedPointer<MacroKey>(new MacroKey(text)));
0410                 } else
0411                     value.append(QSharedPointer<PlainText>(new PlainText(text)));
0412             } else if (iKey.startsWith(Entry::ftDOI)) {
0413                 if (isStringKey)
0414                     value.append(QSharedPointer<MacroKey>(new MacroKey(text)));
0415                 else {
0416                     /// Take care of "; " which separates multiple DOIs, but which may baffle the regexp
0417                     QString preprocessedText = rawText;
0418                     preprocessedText.replace(QStringLiteral("; "), QStringLiteral(" "));
0419                     /// Extract everything that looks like a DOI using a regular expression,
0420                     /// ignore everything else
0421                     QRegularExpressionMatchIterator doiRegExpMatchIt = KBibTeX::doiRegExp.globalMatch(preprocessedText);
0422                     while (doiRegExpMatchIt.hasNext()) {
0423                         const QRegularExpressionMatch doiRegExpMatch = doiRegExpMatchIt.next();
0424                         value.append(QSharedPointer<VerbatimText>(new VerbatimText(doiRegExpMatch.captured(QStringLiteral("doi")))));
0425                     }
0426                 }
0427             } else if (iKey == Entry::ftKeywords) {
0428                 if (isStringKey)
0429                     value.append(QSharedPointer<MacroKey>(new MacroKey(text)));
0430                 else {
0431                     char splitChar;
0432                     const QList<QSharedPointer<Keyword> > keywords = splitKeywords(text, &splitChar);
0433                     for (const auto &keyword : keywords)
0434                         value.append(keyword);
0435                     /// Memorize (some) split characters for later use
0436                     /// (e.g. when writing file again)
0437                     if (splitChar == ';')
0438                         statistics.mostRecentListSeparator = QStringLiteral("; ");
0439                     else if (splitChar == ',')
0440                         statistics.mostRecentListSeparator = QStringLiteral(", ");
0441 
0442                 }
0443             } else if (verbatimKeys.contains(iKey)) {
0444                 if (isStringKey)
0445                     value.append(QSharedPointer<MacroKey>(new MacroKey(text)));
0446                 else
0447                     value.append(QSharedPointer<VerbatimText>(new VerbatimText(rawText)));
0448             } else {
0449                 if (isStringKey)
0450                     value.append(QSharedPointer<MacroKey>(new MacroKey(text)));
0451                 else
0452                     value.append(QSharedPointer<PlainText>(new PlainText(text)));
0453             }
0454 
0455             token = nextToken(state);
0456         } while (token == Token::Doublecross);
0457 
0458         return token;
0459     }
0460 
0461     QString readBracketString(State &state)
0462     {
0463         static const QChar backslash = QLatin1Char('\\');
0464         QString result(0, QChar()); ///< Construct an empty but non-null string
0465         const QChar openingBracket = state.nextChar;
0466         const QChar closingBracket = openingBracket == QLatin1Char('{') ? QLatin1Char('}') : (openingBracket == QLatin1Char('(') ? QLatin1Char(')') : QChar());
0467         Q_ASSERT_X(!closingBracket.isNull(), "QString FileImporterBibTeX::readBracketString()", "openingBracket==state.nextChar is neither '{' nor '('");
0468         int counter = 1;
0469 
0470         if (!readChar(state)) {
0471             /// Some error occurred while reading from data stream
0472             return QString(); ///< return null QString
0473         }
0474 
0475         while (!state.nextChar.isNull()) {
0476             if (state.nextChar == openingBracket && state.prevChar != backslash)
0477                 ++counter;
0478             else if (state.nextChar == closingBracket && state.prevChar != backslash)
0479                 --counter;
0480 
0481             if (counter == 0) {
0482                 break;
0483             } else
0484                 result.append(state.nextChar);
0485 
0486             if (!readChar(state)) {
0487                 /// Some error occurred while reading from data stream
0488                 return QString(); ///< return null QString
0489             }
0490         }
0491 
0492         if (!readChar(state)) {
0493             /// Some error occurred while reading from data stream
0494             return QString(); ///< return null QString
0495         }
0496         return result;
0497     }
0498 
0499     QString readSimpleString(State &state, const QString &until = QString(), const bool readNestedCurlyBrackets = false)
0500     {
0501         static const QString extraAlphaNumChars = QString(QStringLiteral("?'`-_:.+/$\\\"&"));
0502 
0503         QString result; ///< 'result' is Null on purpose: simple strings cannot be empty in contrast to e.g. quoted strings
0504 
0505         if (!skipWhiteChar(state)) {
0506             /// Some error occurred while reading from data stream
0507             return QString(); ///< return null QString
0508         }
0509 
0510         QChar prevChar = QChar(0x00);
0511         while (!state.nextChar.isNull()) {
0512             if (readNestedCurlyBrackets && state.nextChar == QLatin1Char('{') && prevChar != QLatin1Char('\\')) {
0513                 int depth = 1;
0514                 while (depth > 0) {
0515                     result.append(state.nextChar);
0516                     prevChar = state.nextChar;
0517                     if (!readChar(state)) return result;
0518                     if (state.nextChar == QLatin1Char('{') && prevChar != QLatin1Char('\\')) ++depth;
0519                     else if (state.nextChar == QLatin1Char('}') && prevChar != QLatin1Char('\\')) --depth;
0520                 }
0521                 result.append(state.nextChar);
0522                 prevChar = state.nextChar;
0523                 if (!readChar(state)) return result;
0524             }
0525 
0526             const ushort nextCharUnicode = state.nextChar.unicode();
0527             if (!until.isEmpty()) {
0528                 /// Variable "until" has user-defined value
0529                 if (state.nextChar == QLatin1Char('\n') || state.nextChar == QLatin1Char('\r') || until.contains(state.nextChar)) {
0530                     /// Force break on line-breaks or if one of the "until" chars has been read
0531                     break;
0532                 } else {
0533                     /// Append read character to final result
0534                     result.append(state.nextChar);
0535                 }
0536             } else if ((nextCharUnicode >= static_cast<ushort>('a') && nextCharUnicode <= static_cast<ushort>('z')) || (nextCharUnicode >= static_cast<ushort>('A') && nextCharUnicode <= static_cast<ushort>('Z')) || (nextCharUnicode >= static_cast<ushort>('0') && nextCharUnicode <= static_cast<ushort>('9')) || extraAlphaNumChars.contains(state.nextChar)) {
0537                 /// Accept default set of alpha-numeric characters
0538                 result.append(state.nextChar);
0539             } else
0540                 break;
0541             prevChar = state.nextChar;
0542             if (!readChar(state)) break;
0543         }
0544         return result;
0545     }
0546 
0547     QString readQuotedString(State &state)
0548     {
0549         QString result(0, QChar()); ///< Construct an empty but non-null string
0550 
0551         Q_ASSERT_X(state.nextChar == QLatin1Char('"'), "QString FileImporterBibTeX::readQuotedString()", "state.nextChar is not '\"'");
0552 
0553         if (!readChar(state)) {
0554             /// Some error occurred while reading from data stream
0555             return QString(); ///< return null QString
0556         }
0557 
0558         while (!state.nextChar.isNull()) {
0559             if (state.nextChar == QLatin1Char('"') && state.prevChar != QLatin1Char('\\') && state.prevChar != QLatin1Char('{'))
0560                 break;
0561             else
0562                 result.append(state.nextChar);
0563 
0564             if (!readChar(state)) {
0565                 /// Some error occurred while reading from data stream
0566                 return QString(); ///< return null QString
0567             }
0568         }
0569 
0570         if (!readChar(state)) {
0571             /// Some error occurred while reading from data stream
0572             return QString(); ///< return null QString
0573         }
0574 
0575         /// Remove protection around quotation marks
0576         result.replace(QStringLiteral("{\"}"), QStringLiteral("\""));
0577 
0578         return result;
0579     }
0580 
0581     QString readString(bool &isStringKey, Statistics &statistics, State &state)
0582     {
0583         /// Most often it is not a string key
0584         isStringKey = false;
0585 
0586         if (!skipWhiteChar(state)) {
0587             /// Some error occurred while reading from data stream
0588             return QString(); ///< return null QString
0589         }
0590 
0591         switch (state.nextChar.toLatin1()) {
0592         case '{':
0593         case '(': {
0594             ++statistics.countCurlyBrackets;
0595             const QString result = readBracketString(state);
0596             return result;
0597         }
0598         case '"': {
0599             ++statistics.countQuotationMarks;
0600             const QString result = readQuotedString(state);
0601             return result;
0602         }
0603         default:
0604             isStringKey = true;
0605             const QString result = readSimpleString(state);
0606             return result;
0607         }
0608     }
0609 
0610     bool readCharUntil(const QString &until, State &state)
0611     {
0612         Q_ASSERT_X(!until.isEmpty(), "bool  FileImporterBibTeX::readCharUntil(const QString &until)", "\"until\" is empty or invalid");
0613         bool result = true;
0614         while (!until.contains(state.nextChar) && (result = readChar(state)));
0615         return result;
0616     }
0617 
0618     QString readLine(State &state)
0619     {
0620         QString result;
0621         while (state.nextChar != QLatin1Char('\n') && state.nextChar != QLatin1Char('\r') && readChar(state))
0622             result.append(state.nextChar);
0623         return result;
0624     }
0625 
0626     Macro *readMacroElement(Statistics &statistics, State &state)
0627     {
0628         Token token = nextToken(state);
0629         while (token != Token::BracketOpen) {
0630             if (token == Token::EndOfFile) {
0631 #if QT_VERSION >= 0x050e00
0632                 qCWarning(LOG_KBIBTEX_IO) << "Error in parsing macro near line" << state.lineNo << "(" << state.prevLine << Qt::endl << state.currentLine << "): Opening curly brace '{' expected";
0633 #else // QT_VERSION < 0x050e00
0634                 qCWarning(LOG_KBIBTEX_IO) << "Error in parsing macro near line" << state.lineNo << "(" << state.prevLine << endl << state.currentLine << "): Opening curly brace '{' expected";
0635 #endif // QT_VERSION >= 0x050e00
0636                 /// Instead of an 'emit' ...
0637 #if QT_VERSION < QT_VERSION_CHECK(6, 5, 0)
0638                 QMetaObject::invokeMethod(parent, "message", Qt::DirectConnection, QGenericReturnArgument(), Q_ARG(FileImporter::MessageSeverity, MessageSeverity::Error), Q_ARG(QString, QString(QStringLiteral("Error in parsing macro near line %1: Opening curly brace '{' expected")).arg(state.lineNo)));
0639 #else // QT_VERSION >= QT_VERSION_CHECK(6, 5, 0)
0640                 QMetaObject::invokeMethod(parent, "message", Qt::DirectConnection, QMetaMethodReturnArgument(), Q_ARG(FileImporter::MessageSeverity, MessageSeverity::Error), Q_ARG(QString, QString(QStringLiteral("Error in parsing macro near line %1: Opening curly brace '{' expected")).arg(state.lineNo)));
0641 #endif
0642                 return nullptr;
0643             }
0644             token = nextToken(state);
0645         }
0646 
0647         QString key = readSimpleString(state);
0648 
0649         if (key.isEmpty()) {
0650             /// Cope with empty keys,
0651             /// duplicates are handled further below
0652             key = QStringLiteral("EmptyId");
0653         } else if (!Encoder::containsOnlyAscii(key)) {
0654             /// Try to avoid non-ascii characters in ids
0655             const QString newKey = Encoder::instance().convertToPlainAscii(key);
0656             qCWarning(LOG_KBIBTEX_IO) << "Macro key" << key << "near line" << state.lineNo << "contains non-ASCII characters, converted to" << newKey;
0657             /// Instead of an 'emit' ...
0658 #if QT_VERSION < QT_VERSION_CHECK(6, 5, 0)
0659             QMetaObject::invokeMethod(parent, "message", Qt::DirectConnection, QGenericReturnArgument(), Q_ARG(FileImporter::MessageSeverity, MessageSeverity::Warning), Q_ARG(QString, QString(QStringLiteral("Macro key '%1'  near line %2 contains non-ASCII characters, converted to '%3'")).arg(key).arg(state.lineNo).arg(newKey)));
0660 #else // QT_VERSION >= QT_VERSION_CHECK(6, 5, 0)
0661             QMetaObject::invokeMethod(parent, "message", Qt::DirectConnection, QMetaMethodReturnArgument(), Q_ARG(FileImporter::MessageSeverity, MessageSeverity::Warning), Q_ARG(QString, QString(QStringLiteral("Macro key '%1'  near line %2 contains non-ASCII characters, converted to '%3'")).arg(key).arg(state.lineNo).arg(newKey)));
0662 #endif
0663             key = newKey;
0664         }
0665 
0666         /// Check for duplicate entry ids, avoid collisions
0667         if (state.knownElementIds.contains(key)) {
0668             static const QString newIdPattern = QStringLiteral("%1-%2");
0669             int idx = 2;
0670             QString newKey = newIdPattern.arg(key).arg(idx);
0671             while (state.knownElementIds.contains(newKey))
0672                 newKey = newIdPattern.arg(key).arg(++idx);
0673             qCDebug(LOG_KBIBTEX_IO) << "Duplicate macro key" << key << ", using replacement key" << newKey;
0674             /// Instead of an 'emit' ...
0675 #if QT_VERSION < QT_VERSION_CHECK(6, 5, 0)
0676             QMetaObject::invokeMethod(parent, "message", Qt::DirectConnection, QGenericReturnArgument(), Q_ARG(FileImporter::MessageSeverity, MessageSeverity::Warning), Q_ARG(QString, QString(QStringLiteral("Duplicate macro key '%1', using replacement key '%2'")).arg(key, newKey)));
0677 #else // QT_VERSION >= QT_VERSION_CHECK(6, 5, 0)
0678             QMetaObject::invokeMethod(parent, "message", Qt::DirectConnection, QMetaMethodReturnArgument(), Q_ARG(FileImporter::MessageSeverity, MessageSeverity::Warning), Q_ARG(QString, QString(QStringLiteral("Duplicate macro key '%1', using replacement key '%2'")).arg(key, newKey)));
0679 #endif
0680             key = newKey;
0681         }
0682         state.knownElementIds.insert(key);
0683 
0684         if (nextToken(state) != Token::Assign) {
0685 #if QT_VERSION >= 0x050e00
0686             qCCritical(LOG_KBIBTEX_IO) << "Error in parsing macro" << key << "near line" << state.lineNo << "(" << state.prevLine << Qt::endl << state.currentLine << "): Assign symbol '=' expected";
0687 #else // QT_VERSION < 0x050e00
0688             qCCritical(LOG_KBIBTEX_IO) << "Error in parsing macro" << key << "near line" << state.lineNo << "(" << state.prevLine << endl << state.currentLine << "): Assign symbol '=' expected";
0689 #endif // QT_VERSION >= 0x050e00
0690             /// Instead of an 'emit' ...
0691 #if QT_VERSION < QT_VERSION_CHECK(6, 5, 0)
0692             QMetaObject::invokeMethod(parent, "message", Qt::DirectConnection, QGenericReturnArgument(), Q_ARG(FileImporter::MessageSeverity, MessageSeverity::Error), Q_ARG(QString, QString(QStringLiteral("Error in parsing macro '%1' near line %2: Assign symbol '=' expected")).arg(key).arg(state.lineNo)));
0693 #else // QT_VERSION >= QT_VERSION_CHECK(6, 5, 0)
0694             QMetaObject::invokeMethod(parent, "message", Qt::DirectConnection, QMetaMethodReturnArgument(), Q_ARG(FileImporter::MessageSeverity, MessageSeverity::Error), Q_ARG(QString, QString(QStringLiteral("Error in parsing macro '%1' near line %2: Assign symbol '=' expected")).arg(key).arg(state.lineNo)));
0695 #endif
0696             return nullptr;
0697         }
0698 
0699         Macro *macro = new Macro(key);
0700         do {
0701             bool isStringKey = false;
0702             QString text = readString(isStringKey, statistics, state);
0703             if (text.isNull()) {
0704 #if QT_VERSION >= 0x050e00
0705                 qCWarning(LOG_KBIBTEX_IO) << "Error in parsing macro" << key << "near line" << state.lineNo << "(" << state.prevLine << Qt::endl << state.currentLine << "): Could not read macro's text";
0706 #else // QT_VERSION < 0x050e00
0707                 qCWarning(LOG_KBIBTEX_IO) << "Error in parsing macro" << key << "near line" << state.lineNo << "(" << state.prevLine << endl << state.currentLine << "): Could not read macro's text";
0708 #endif // QT_VERSION >= 0x050e00
0709                 /// Instead of an 'emit' ...
0710 #if QT_VERSION < QT_VERSION_CHECK(6, 5, 0)
0711                 QMetaObject::invokeMethod(parent, "message", Qt::DirectConnection, QGenericReturnArgument(), Q_ARG(FileImporter::MessageSeverity, MessageSeverity::Error), Q_ARG(QString, QString(QStringLiteral("Error in parsing macro '%1' near line %2: Could not read macro's text")).arg(key).arg(state.lineNo)));
0712 #else // QT_VERSION >= QT_VERSION_CHECK(6, 5, 0)
0713                 QMetaObject::invokeMethod(parent, "message", Qt::DirectConnection, QMetaMethodReturnArgument(), Q_ARG(FileImporter::MessageSeverity, MessageSeverity::Error), Q_ARG(QString, QString(QStringLiteral("Error in parsing macro '%1' near line %2: Could not read macro's text")).arg(key).arg(state.lineNo)));
0714 #endif
0715                 delete macro;
0716                 return nullptr;
0717             }
0718             text = EncoderLaTeX::instance().decode(bibtexAwareSimplify(text));
0719             if (isStringKey)
0720                 macro->value().append(QSharedPointer<MacroKey>(new MacroKey(text)));
0721             else
0722                 macro->value().append(QSharedPointer<PlainText>(new PlainText(text)));
0723 
0724             token = nextToken(state);
0725         } while (token == Token::Doublecross);
0726 
0727         return macro;
0728     }
0729 
0730     Comment *readCommentElement(State &state)
0731     {
0732         if (!readCharUntil(QStringLiteral("{("), state))
0733             return nullptr;
0734         return new Comment(EncoderLaTeX::instance().decode(readBracketString(state)), Preferences::CommentContext::Command);
0735     }
0736 
0737     Comment *readPlainCommentElement(const QString &initialRead, State &state)
0738     {
0739         const QString firstLine {rstrip(EncoderLaTeX::instance().decode(initialRead + readLine(state)))};
0740         if (firstLine.length() > 0 && firstLine[0] == QLatin1Char('%')) {
0741             QStringList lines{{firstLine}};
0742             // Read all lines that start with '%', compute common prefix, and remove prefix from all lines
0743             // Stop when encountering a line that starts without '%'
0744             while (skipNewline(state) && state.nextChar == QLatin1Char('%')) {
0745                 const QString nextLine {rstrip(EncoderLaTeX::instance().decode(QStringLiteral("%") + readLine(state)))};
0746                 lines.append(nextLine);
0747             }
0748 
0749             int commonPrefixLen {0};
0750             for (; commonPrefixLen < firstLine.length(); ++commonPrefixLen)
0751                 if (firstLine[commonPrefixLen] != QLatin1Char(' ') && firstLine[commonPrefixLen] != QLatin1Char('%'))
0752                     break;
0753             int longestLinLength = firstLine.length();
0754             bool first = true;
0755             for (const QString &line : lines) {
0756                 if (first) {
0757                     first = false;
0758                     continue;
0759                 }
0760                 commonPrefixLen = qMin(commonPrefixLen, line.length());
0761                 longestLinLength = qMax(longestLinLength, line.length());
0762                 for (int i = 0; i < commonPrefixLen; ++i)
0763                     if (line[i] != firstLine[i]) {
0764                         commonPrefixLen = i;
0765                         break;
0766                     }
0767             }
0768             const QString prefix {firstLine.left(commonPrefixLen)};
0769             QString text;
0770             text.reserve(longestLinLength * lines.length());
0771             for (const QString &line : lines)
0772                 text.append(line.mid(commonPrefixLen)).append(QStringLiteral("\n"));
0773 
0774             return new Comment(rstrip(text), Preferences::CommentContext::Prefix, prefix);
0775         } else if (firstLine.length() > 0) {
0776             QStringList lines{{firstLine}};
0777             // Read all lines until a line is either empty or starts with '@'
0778             while (skipNewline(state) && state.nextChar != QLatin1Char('\n') && state.nextChar != QLatin1Char('\r') && state.nextChar != QLatin1Char('@')) {
0779                 const QChar firstLineChar {state.nextChar};
0780                 const QString nextLine {rstrip(EncoderLaTeX::instance().decode(QString(firstLineChar) + readLine(state)))};
0781                 lines.append(nextLine);
0782             }
0783             return new Comment(lines.join(QStringLiteral("\n")), Preferences::CommentContext::Verbatim);
0784         } else {
0785             // Maybe a line with only spaces?
0786             return nullptr;
0787         }
0788     }
0789 
0790     QString tokenidToString(Token token)
0791     {
0792         switch (token) {
0793         case Token::At: return QString(QStringLiteral("At"));
0794         case Token::BracketClose: return QString(QStringLiteral("BracketClose"));
0795         case Token::BracketOpen: return QString(QStringLiteral("BracketOpen"));
0796         case Token::AlphaNumText: return QString(QStringLiteral("AlphaNumText"));
0797         case Token::Assign: return QString(QStringLiteral("Assign"));
0798         case Token::Comma: return QString(QStringLiteral("Comma"));
0799         case Token::Doublecross: return QString(QStringLiteral("Doublecross"));
0800         case Token::EndOfFile: return QString(QStringLiteral("EOF"));
0801         case Token::Unknown: return QString(QStringLiteral("Unknown"));
0802         default: {
0803             qCWarning(LOG_KBIBTEX_IO) << "Encountered an unsupported Token:" << static_cast<int>(token);
0804             return QString(QStringLiteral("Unknown"));
0805         }
0806         }
0807     }
0808 
0809     Preamble *readPreambleElement(Statistics &statistics, State &state)
0810     {
0811         Token token = nextToken(state);
0812         while (token != Token::BracketOpen) {
0813             if (token == Token::EndOfFile) {
0814 #if QT_VERSION >= 0x050e00
0815                 qCWarning(LOG_KBIBTEX_IO) << "Error in parsing preamble near line" << state.lineNo << "(" << state.prevLine << Qt::endl << state.currentLine << "): Opening curly brace '{' expected";
0816 #else // QT_VERSION < 0x050e00
0817                 qCWarning(LOG_KBIBTEX_IO) << "Error in parsing preamble near line" << state.lineNo << "(" << state.prevLine << endl << state.currentLine << "): Opening curly brace '{' expected";
0818 #endif // QT_VERSION >= 0x050e00
0819                 /// Instead of an 'emit' ...
0820 #if QT_VERSION < QT_VERSION_CHECK(6, 5, 0)
0821                 QMetaObject::invokeMethod(parent, "message", Qt::DirectConnection, QGenericReturnArgument(), Q_ARG(FileImporter::MessageSeverity, MessageSeverity::Error), Q_ARG(QString, QString(QStringLiteral("Error in parsing preamble near line %1: Opening curly brace '{' expected")).arg(state.lineNo)));
0822 #else // QT_VERSION >= QT_VERSION_CHECK(6, 5, 0)
0823                 QMetaObject::invokeMethod(parent, "message", Qt::DirectConnection, QMetaMethodReturnArgument(), Q_ARG(FileImporter::MessageSeverity, MessageSeverity::Error), Q_ARG(QString, QString(QStringLiteral("Error in parsing preamble near line %1: Opening curly brace '{' expected")).arg(state.lineNo)));
0824 #endif
0825                 return nullptr;
0826             }
0827             token = nextToken(state);
0828         }
0829 
0830         Preamble *preamble = new Preamble();
0831         do {
0832             bool isStringKey = false;
0833             QString text = readString(isStringKey, statistics, state);
0834             if (text.isNull()) {
0835 #if QT_VERSION >= 0x050e00
0836                 qCWarning(LOG_KBIBTEX_IO) << "Error in parsing preamble near line" << state.lineNo << "(" << state.prevLine << Qt::endl << state.currentLine << "): Could not read preamble's text";
0837 #else // QT_VERSION < 0x050e00
0838                 qCWarning(LOG_KBIBTEX_IO) << "Error in parsing preamble near line" << state.lineNo << "(" << state.prevLine << endl << state.currentLine << "): Could not read preamble's text";
0839 #endif // QT_VERSION >= 0x050e00
0840                 /// Instead of an 'emit' ...
0841 #if QT_VERSION < QT_VERSION_CHECK(6, 5, 0)
0842                 QMetaObject::invokeMethod(parent, "message", Qt::DirectConnection, QGenericReturnArgument(), Q_ARG(FileImporter::MessageSeverity, MessageSeverity::Error), Q_ARG(QString, QString(QStringLiteral("Error in parsing preamble near line %1: Could not read preamble's text")).arg(state.lineNo)));
0843 #else // QT_VERSION >= QT_VERSION_CHECK(6, 5, 0)
0844                 QMetaObject::invokeMethod(parent, "message", Qt::DirectConnection, QMetaMethodReturnArgument(), Q_ARG(FileImporter::MessageSeverity, MessageSeverity::Error), Q_ARG(QString, QString(QStringLiteral("Error in parsing preamble near line %1: Could not read preamble's text")).arg(state.lineNo)));
0845 #endif
0846                 delete preamble;
0847                 return nullptr;
0848             }
0849             /// Remember: strings from preamble do not get encoded,
0850             /// may contain raw LaTeX commands and code
0851             text = bibtexAwareSimplify(text);
0852             if (isStringKey)
0853                 preamble->value().append(QSharedPointer<MacroKey>(new MacroKey(text)));
0854             else
0855                 preamble->value().append(QSharedPointer<PlainText>(new PlainText(text)));
0856 
0857             token = nextToken(state);
0858         } while (token == Token::Doublecross);
0859 
0860         return preamble;
0861     }
0862 
0863     Entry *readEntryElement(const QString &typeString, Statistics &statistics, State &state)
0864     {
0865         const KBibTeX::Casing keywordCasing = Preferences::instance().bibTeXKeywordCasing();
0866 
0867         Token token = nextToken(state);
0868         while (token != Token::BracketOpen) {
0869             if (token == Token::EndOfFile) {
0870 #if QT_VERSION >= 0x050e00
0871                 qCWarning(LOG_KBIBTEX_IO) << "Error in parsing entry near line" << state.lineNo << "(" << state.prevLine << Qt::endl << state.currentLine << "): Opening curly brace '{' expected";
0872 #else // QT_VERSION < 0x050e00
0873                 qCWarning(LOG_KBIBTEX_IO) << "Error in parsing entry near line" << state.lineNo << "(" << state.prevLine << endl << state.currentLine << "): Opening curly brace '{' expected";
0874 #endif // QT_VERSION >= 0x050e00
0875                 /// Instead of an 'emit' ...
0876 #if QT_VERSION < QT_VERSION_CHECK(6, 5, 0)
0877                 QMetaObject::invokeMethod(parent, "message", Qt::DirectConnection, QGenericReturnArgument(), Q_ARG(FileImporter::MessageSeverity, MessageSeverity::Error), Q_ARG(QString, QString(QStringLiteral("Error in parsing entry near line %1: Opening curly brace '{' expected")).arg(state.lineNo)));
0878 #else // QT_VERSION >= QT_VERSION_CHECK(6, 5, 0)
0879                 QMetaObject::invokeMethod(parent, "message", Qt::DirectConnection, QMetaMethodReturnArgument(), Q_ARG(FileImporter::MessageSeverity, MessageSeverity::Error), Q_ARG(QString, QString(QStringLiteral("Error in parsing entry near line %1: Opening curly brace '{' expected")).arg(state.lineNo)));
0880 #endif
0881                 return nullptr;
0882             }
0883             token = nextToken(state);
0884         }
0885 
0886         QString id = readSimpleString(state, QStringLiteral(",}"), true).trimmed();
0887         if (id.isEmpty()) {
0888             if (state.nextChar == QLatin1Char(',') || state.nextChar == QLatin1Char('}')) {
0889                 /// Cope with empty ids,
0890                 /// duplicates are handled further below
0891                 id = QStringLiteral("EmptyId");
0892             }
0893             else {
0894 #if QT_VERSION >= 0x050e00
0895                 qCWarning(LOG_KBIBTEX_IO) << "Error in parsing entry near line" << state.lineNo << ":" << state.prevLine << Qt::endl << state.currentLine << "): Could not read entry id";
0896 #else // QT_VERSION < 0x050e00
0897                 qCWarning(LOG_KBIBTEX_IO) << "Error in parsing entry near line" << state.lineNo << ":" << state.prevLine << endl << state.currentLine << "): Could not read entry id";
0898 #endif // QT_VERSION >= 0x050e00
0899                 /// Instead of an 'emit' ...
0900 #if QT_VERSION < QT_VERSION_CHECK(6, 5, 0)
0901                 QMetaObject::invokeMethod(parent, "message", Qt::DirectConnection, QGenericReturnArgument(), Q_ARG(FileImporter::MessageSeverity, MessageSeverity::Error), Q_ARG(QString, QString(QStringLiteral("Error in parsing preambentryle near line %1: Could not read entry id")).arg(state.lineNo)));
0902 #else // QT_VERSION >= QT_VERSION_CHECK(6, 5, 0)
0903                 QMetaObject::invokeMethod(parent, "message", Qt::DirectConnection, QMetaMethodReturnArgument(), Q_ARG(FileImporter::MessageSeverity, MessageSeverity::Error), Q_ARG(QString, QString(QStringLiteral("Error in parsing preambentryle near line %1: Could not read entry id")).arg(state.lineNo)));
0904 #endif
0905                 return nullptr;
0906             }
0907         } else {
0908             if (id.contains(QStringLiteral("\\")) || id.contains(QStringLiteral("{"))) {
0909                 const QString newId = EncoderLaTeX::instance().decode(id);
0910                 qCWarning(LOG_KBIBTEX_IO) << "Entry id" << id << "near line" << state.lineNo << "contains backslashes or curly brackets, converted to" << newId;
0911                 /// Instead of an 'emit' ...
0912 #if QT_VERSION < QT_VERSION_CHECK(6, 5, 0)
0913                 QMetaObject::invokeMethod(parent, "message", Qt::DirectConnection, QGenericReturnArgument(), Q_ARG(FileImporter::MessageSeverity, MessageSeverity::Warning), Q_ARG(QString, QString(QStringLiteral("Entry id '%1' near line %2 contains backslashes or curly brackets, converted to '%3'")).arg(id).arg(state.lineNo).arg(newId)));
0914 #else // QT_VERSION >= QT_VERSION_CHECK(6, 5, 0)
0915                 QMetaObject::invokeMethod(parent, "message", Qt::DirectConnection, QMetaMethodReturnArgument(), Q_ARG(FileImporter::MessageSeverity, MessageSeverity::Warning), Q_ARG(QString, QString(QStringLiteral("Entry id '%1' near line %2 contains backslashes or curly brackets, converted to '%3'")).arg(id).arg(state.lineNo).arg(newId)));
0916 #endif
0917                 id = newId;
0918             }
0919             if (!Encoder::containsOnlyAscii(id)) {
0920                 /// Try to avoid non-ascii characters in ids
0921                 const QString newId = Encoder::instance().convertToPlainAscii(id);
0922                 qCWarning(LOG_KBIBTEX_IO) << "Entry id" << id << "near line" << state.lineNo << "contains non-ASCII characters, converted to" << newId;
0923                 /// Instead of an 'emit' ...
0924 #if QT_VERSION < QT_VERSION_CHECK(6, 5, 0)
0925                 QMetaObject::invokeMethod(parent, "message", Qt::DirectConnection, QGenericReturnArgument(), Q_ARG(FileImporter::MessageSeverity, MessageSeverity::Warning), Q_ARG(QString, QString(QStringLiteral("Entry id '%1' near line %2 contains non-ASCII characters, converted to '%3'")).arg(id).arg(state.lineNo).arg(newId)));
0926 #else // QT_VERSION >= QT_VERSION_CHECK(6, 5, 0)
0927                 QMetaObject::invokeMethod(parent, "message", Qt::DirectConnection, QMetaMethodReturnArgument(), Q_ARG(FileImporter::MessageSeverity, MessageSeverity::Warning), Q_ARG(QString, QString(QStringLiteral("Entry id '%1' near line %2 contains non-ASCII characters, converted to '%3'")).arg(id).arg(state.lineNo).arg(newId)));
0928 #endif
0929                 id = newId;
0930             }
0931         }
0932         static const QVector<QChar> invalidIdCharacters = {QLatin1Char('{'), QLatin1Char('}'), QLatin1Char(',')};
0933         for (const QChar &invalidIdCharacter : invalidIdCharacters)
0934             if (id.contains(invalidIdCharacter)) {
0935                 qCWarning(LOG_KBIBTEX_IO) << "Entry id" << id << "near line" << state.lineNo << "contains invalid character" << invalidIdCharacter;
0936                 /// Instead of an 'emit' ...
0937 #if QT_VERSION < QT_VERSION_CHECK(6, 5, 0)
0938                 QMetaObject::invokeMethod(parent, "message", Qt::DirectConnection, QGenericReturnArgument(), Q_ARG(FileImporter::MessageSeverity, MessageSeverity::Error), Q_ARG(QString, QString(QStringLiteral("Entry id '%1' near line %2 contains invalid character '%3'")).arg(id).arg(state.lineNo).arg(invalidIdCharacter)));
0939 #else // QT_VERSION >= QT_VERSION_CHECK(6, 5, 0)
0940                 QMetaObject::invokeMethod(parent, "message", Qt::DirectConnection, QMetaMethodReturnArgument(), Q_ARG(FileImporter::MessageSeverity, MessageSeverity::Error), Q_ARG(QString, QString(QStringLiteral("Entry id '%1' near line %2 contains invalid character '%3'")).arg(id).arg(state.lineNo).arg(invalidIdCharacter)));
0941 #endif
0942                 return nullptr;
0943             }
0944 
0945         /// Check for duplicate entry ids, avoid collisions
0946         if (state.knownElementIds.contains(id)) {
0947             static const QString newIdPattern = QStringLiteral("%1-%2");
0948             int idx = 2;
0949             QString newId = newIdPattern.arg(id).arg(idx);
0950             while (state.knownElementIds.contains(newId))
0951                 newId = newIdPattern.arg(id).arg(++idx);
0952             qCDebug(LOG_KBIBTEX_IO) << "Duplicate id" << id << "near line" << state.lineNo << ", using replacement id" << newId;
0953             /// Instead of an 'emit' ...
0954 #if QT_VERSION < QT_VERSION_CHECK(6, 5, 0)
0955             QMetaObject::invokeMethod(parent, "message", Qt::DirectConnection, QGenericReturnArgument(), Q_ARG(FileImporter::MessageSeverity, MessageSeverity::Info), Q_ARG(QString, QString(QStringLiteral("Duplicate id '%1' near line %2, using replacement id '%3'")).arg(id).arg(state.lineNo).arg(newId)));
0956 #else // QT_VERSION >= QT_VERSION_CHECK(6, 5, 0)
0957             QMetaObject::invokeMethod(parent, "message", Qt::DirectConnection, QMetaMethodReturnArgument(), Q_ARG(FileImporter::MessageSeverity, MessageSeverity::Info), Q_ARG(QString, QString(QStringLiteral("Duplicate id '%1' near line %2, using replacement id '%3'")).arg(id).arg(state.lineNo).arg(newId)));
0958 #endif
0959             id = newId;
0960         }
0961         state.knownElementIds.insert(id);
0962 
0963         Entry *entry = new Entry(BibTeXEntries::instance().format(typeString), id);
0964 
0965         token = nextToken(state);
0966         do {
0967             if (token == Token::BracketClose)
0968                 break;
0969             else if (token == Token::EndOfFile) {
0970 #if QT_VERSION >= 0x050e00
0971                 qCWarning(LOG_KBIBTEX_IO) << "Unexpected end of data in entry" << id << "near line" << state.lineNo << ":" << state.prevLine << Qt::endl << state.currentLine;
0972 #else // QT_VERSION < 0x050e00
0973                 qCWarning(LOG_KBIBTEX_IO) << "Unexpected end of data in entry" << id << "near line" << state.lineNo << ":" << state.prevLine << endl << state.currentLine;
0974 #endif // QT_VERSION >= 0x050e00
0975                 /// Instead of an 'emit' ...
0976 #if QT_VERSION < QT_VERSION_CHECK(6, 5, 0)
0977                 QMetaObject::invokeMethod(parent, "message", Qt::DirectConnection, QGenericReturnArgument(), Q_ARG(FileImporter::MessageSeverity, MessageSeverity::Error), Q_ARG(QString, QString(QStringLiteral("Unexpected end of data in entry '%1' near line %2")).arg(id).arg(state.lineNo)));
0978 #else // QT_VERSION >= QT_VERSION_CHECK(6, 5, 0)
0979                 QMetaObject::invokeMethod(parent, "message", Qt::DirectConnection, QMetaMethodReturnArgument(), Q_ARG(FileImporter::MessageSeverity, MessageSeverity::Error), Q_ARG(QString, QString(QStringLiteral("Unexpected end of data in entry '%1' near line %2")).arg(id).arg(state.lineNo)));
0980 #endif
0981                 delete entry;
0982                 return nullptr;
0983             } else if (token != Token::Comma) {
0984                 if (state.nextChar.isLetter()) {
0985 #if QT_VERSION >= 0x050e00
0986                     qCWarning(LOG_KBIBTEX_IO) << "Error in parsing entry" << id << "near line" << state.lineNo << "(" << state.prevLine << Qt::endl << state.currentLine << "): Comma symbol ',' expected but got character" << state.nextChar << "(token" << tokenidToString(token) << ")";
0987 #else // QT_VERSION < 0x050e00
0988                     qCWarning(LOG_KBIBTEX_IO) << "Error in parsing entry" << id << "near line" << state.lineNo << "(" << state.prevLine << endl << state.currentLine << "): Comma symbol ',' expected but got character" << state.nextChar << "(token" << tokenidToString(token) << ")";
0989 #endif // QT_VERSION >= 0x050e00
0990                     /// Instead of an 'emit' ...
0991 #if QT_VERSION < QT_VERSION_CHECK(6, 5, 0)
0992                     QMetaObject::invokeMethod(parent, "message", Qt::DirectConnection, QGenericReturnArgument(), Q_ARG(FileImporter::MessageSeverity, MessageSeverity::Error), Q_ARG(QString, QString(QStringLiteral("Error in parsing entry '%1' near line %2: Comma symbol ',' expected but got character '%3' (token %4)")).arg(id).arg(state.lineNo).arg(state.nextChar).arg(tokenidToString(token))));
0993 #else // QT_VERSION >= QT_VERSION_CHECK(6, 5, 0)
0994                     QMetaObject::invokeMethod(parent, "message", Qt::DirectConnection, QMetaMethodReturnArgument(), Q_ARG(FileImporter::MessageSeverity, MessageSeverity::Error), Q_ARG(QString, QString(QStringLiteral("Error in parsing entry '%1' near line %2: Comma symbol ',' expected but got character '%3' (token %4)")).arg(id).arg(state.lineNo).arg(state.nextChar).arg(tokenidToString(token))));
0995 #endif
0996                 } else if (state.nextChar.isPrint()) {
0997 #if QT_VERSION >= 0x050e00
0998                     qCWarning(LOG_KBIBTEX_IO) << "Error in parsing entry" << id << "near line" << state.lineNo << "(" << state.prevLine << Qt::endl << state.currentLine << "): Comma symbol ',' expected but got character" << state.nextChar << "(" << QString(QStringLiteral("0x%1")).arg(state.nextChar.unicode(), 4, 16, QLatin1Char('0')) << ", token" << tokenidToString(token) << ")";
0999 #else // QT_VERSION < 0x050e00
1000                     qCWarning(LOG_KBIBTEX_IO) << "Error in parsing entry" << id << "near line" << state.lineNo << "(" << state.prevLine << endl << state.currentLine << "): Comma symbol ',' expected but got character" << state.nextChar << "(" << QString(QStringLiteral("0x%1")).arg(state.nextChar.unicode(), 4, 16, QLatin1Char('0')) << ", token" << tokenidToString(token) << ")";
1001 #endif // QT_VERSION >= 0x050e00
1002                     /// Instead of an 'emit' ...
1003 #if QT_VERSION < QT_VERSION_CHECK(6, 5, 0)
1004                     QMetaObject::invokeMethod(parent, "message", Qt::DirectConnection, QGenericReturnArgument(), Q_ARG(FileImporter::MessageSeverity, MessageSeverity::Error), Q_ARG(QString, QString(QStringLiteral("Error in parsing entry '%1' near line %2: Comma symbol ',' expected but got character '%3' (0x%4, token %5)")).arg(id).arg(state.lineNo).arg(state.nextChar).arg(state.nextChar.unicode(), 4, 16, QLatin1Char('0')).arg(tokenidToString(token))));
1005 #else // QT_VERSION >= QT_VERSION_CHECK(6, 5, 0)
1006                     QMetaObject::invokeMethod(parent, "message", Qt::DirectConnection, QMetaMethodReturnArgument(), Q_ARG(FileImporter::MessageSeverity, MessageSeverity::Error), Q_ARG(QString, QString(QStringLiteral("Error in parsing entry '%1' near line %2: Comma symbol ',' expected but got character '%3' (0x%4, token %5)")).arg(id).arg(state.lineNo).arg(state.nextChar).arg(state.nextChar.unicode(), 4, 16, QLatin1Char('0')).arg(tokenidToString(token))));
1007 #endif
1008                 } else {
1009 #if QT_VERSION >= 0x050e00
1010                     qCWarning(LOG_KBIBTEX_IO) << "Error in parsing entry" << id << "near line" << state.lineNo << "(" << state.prevLine << Qt::endl << state.currentLine << "): Comma symbol (,) expected but got character" << QString(QStringLiteral("0x%1")).arg(state.nextChar.unicode(), 4, 16, QLatin1Char('0')) << "(token" << tokenidToString(token) << ")";
1011 #else // QT_VERSION < 0x050e00
1012                     qCWarning(LOG_KBIBTEX_IO) << "Error in parsing entry" << id << "near line" << state.lineNo << "(" << state.prevLine << endl << state.currentLine << "): Comma symbol (,) expected but got character" << QString(QStringLiteral("0x%1")).arg(state.nextChar.unicode(), 4, 16, QLatin1Char('0')) << "(token" << tokenidToString(token) << ")";
1013 #endif // QT_VERSION >= 0x050e00
1014                     /// Instead of an 'emit' ...
1015 #if QT_VERSION < QT_VERSION_CHECK(6, 5, 0)
1016                     QMetaObject::invokeMethod(parent, "message", Qt::DirectConnection, QGenericReturnArgument(), Q_ARG(FileImporter::MessageSeverity, MessageSeverity::Error), Q_ARG(QString, QString(QStringLiteral("Error in parsing entry '%1' near line %2: Comma symbol ',' expected but got character 0x%3 (token %4)")).arg(id).arg(state.lineNo).arg(state.nextChar.unicode(), 4, 16, QLatin1Char('0')).arg(tokenidToString(token))));
1017 #else // QT_VERSION >= QT_VERSION_CHECK(6, 5, 0)
1018                     QMetaObject::invokeMethod(parent, "message", Qt::DirectConnection, QMetaMethodReturnArgument(), Q_ARG(FileImporter::MessageSeverity, MessageSeverity::Error), Q_ARG(QString, QString(QStringLiteral("Error in parsing entry '%1' near line %2: Comma symbol ',' expected but got character 0x%3 (token %4)")).arg(id).arg(state.lineNo).arg(state.nextChar.unicode(), 4, 16, QLatin1Char('0')).arg(tokenidToString(token))));
1019 #endif
1020                 }
1021                 delete entry;
1022                 return nullptr;
1023             }
1024 
1025             QString keyName = BibTeXFields::instance().format(readSimpleString(state), keywordCasing);
1026             if (keyName.isEmpty()) {
1027                 token = nextToken(state);
1028                 if (token == Token::BracketClose) {
1029                     /// Most often it is the case that the previous line ended with a comma,
1030                     /// implying that this entry continues, but instead it gets closed by
1031                     /// a closing curly bracket.
1032 #if QT_VERSION >= 0x050e00
1033                     qCDebug(LOG_KBIBTEX_IO) << "Issue while parsing entry" << id << "near line" << state.lineNo << "(" << state.prevLine << Qt::endl << state.currentLine << "): Last key-value pair ended with a non-conformant comma, ignoring that";
1034 #else // QT_VERSION < 0x050e00
1035                     qCDebug(LOG_KBIBTEX_IO) << "Issue while parsing entry" << id << "near line" << state.lineNo << "(" << state.prevLine << endl << state.currentLine << "): Last key-value pair ended with a non-conformant comma, ignoring that";
1036 #endif // QT_VERSION >= 0x050e00
1037                     /// Instead of an 'emit' ...
1038 #if QT_VERSION < QT_VERSION_CHECK(6, 5, 0)
1039                     QMetaObject::invokeMethod(parent, "message", Qt::DirectConnection, QGenericReturnArgument(), Q_ARG(FileImporter::MessageSeverity, MessageSeverity::Info), Q_ARG(QString, QString(QStringLiteral("Issue while parsing entry '%1' near line %2: Last key-value pair ended with a non-conformant comma, ignoring that")).arg(id).arg(state.lineNo)));
1040 #else // QT_VERSION >= QT_VERSION_CHECK(6, 5, 0)
1041                     QMetaObject::invokeMethod(parent, "message", Qt::DirectConnection, QMetaMethodReturnArgument(), Q_ARG(FileImporter::MessageSeverity, MessageSeverity::Info), Q_ARG(QString, QString(QStringLiteral("Issue while parsing entry '%1' near line %2: Last key-value pair ended with a non-conformant comma, ignoring that")).arg(id).arg(state.lineNo)));
1042 #endif
1043                     break;
1044                 } else {
1045                     /// Something looks terribly wrong
1046 #if QT_VERSION >= 0x050e00
1047                     qCWarning(LOG_KBIBTEX_IO) << "Error in parsing entry" << id << "near line" << state.lineNo << "(" << state.prevLine << Qt::endl << state.currentLine << "): Closing curly bracket expected, but found" << tokenidToString(token);
1048 #else // QT_VERSION < 0x050e00
1049                     qCWarning(LOG_KBIBTEX_IO) << "Error in parsing entry" << id << "near line" << state.lineNo << "(" << state.prevLine << endl << state.currentLine << "): Closing curly bracket expected, but found" << tokenidToString(token);
1050 #endif // QT_VERSION >= 0x050e00
1051                     /// Instead of an 'emit' ...
1052 #if QT_VERSION < QT_VERSION_CHECK(6, 5, 0)
1053                     QMetaObject::invokeMethod(parent, "message", Qt::DirectConnection, QGenericReturnArgument(), Q_ARG(FileImporter::MessageSeverity, MessageSeverity::Error), Q_ARG(QString, QString(QStringLiteral("Error in parsing entry '%1' near line %2: Closing curly bracket expected, but found %3")).arg(id).arg(state.lineNo).arg(tokenidToString(token))));
1054 #else // QT_VERSION >= QT_VERSION_CHECK(6, 5, 0)
1055                     QMetaObject::invokeMethod(parent, "message", Qt::DirectConnection, QMetaMethodReturnArgument(), Q_ARG(FileImporter::MessageSeverity, MessageSeverity::Error), Q_ARG(QString, QString(QStringLiteral("Error in parsing entry '%1' near line %2: Closing curly bracket expected, but found %3")).arg(id).arg(state.lineNo).arg(tokenidToString(token))));
1056 #endif
1057                     delete entry;
1058                     return nullptr;
1059                 }
1060             }
1061             /// Try to avoid non-ascii characters in keys
1062             const QString newkeyName = Encoder::instance().convertToPlainAscii(keyName);
1063             if (newkeyName != keyName) {
1064                 qCWarning(LOG_KBIBTEX_IO) << "Field name " << keyName << "near line" << state.lineNo << "contains non-ASCII characters, converted to" << newkeyName;
1065                 /// Instead of an 'emit' ...
1066 #if QT_VERSION < QT_VERSION_CHECK(6, 5, 0)
1067                 QMetaObject::invokeMethod(parent, "message", Qt::DirectConnection, QGenericReturnArgument(), Q_ARG(FileImporter::MessageSeverity, MessageSeverity::Warning), Q_ARG(QString, QString(QStringLiteral("Field name '%1' near line %2 contains non-ASCII characters, converted to '%3'")).arg(keyName).arg(state.lineNo).arg(newkeyName)));
1068 #else // QT_VERSION >= QT_VERSION_CHECK(6, 5, 0)
1069                 QMetaObject::invokeMethod(parent, "message", Qt::DirectConnection, QMetaMethodReturnArgument(), Q_ARG(FileImporter::MessageSeverity, MessageSeverity::Warning), Q_ARG(QString, QString(QStringLiteral("Field name '%1' near line %2 contains non-ASCII characters, converted to '%3'")).arg(keyName).arg(state.lineNo).arg(newkeyName)));
1070 #endif
1071                 keyName = newkeyName;
1072             }
1073 
1074             token = nextToken(state);
1075             if (token != Token::Assign) {
1076 #if QT_VERSION >= 0x050e00
1077                 qCWarning(LOG_KBIBTEX_IO) << "Error in parsing entry" << id << ", field name" << keyName << "near line" << state.lineNo  << "(" << state.prevLine << Qt::endl << state.currentLine << "): Assign symbol '=' expected after field name";
1078 #else // QT_VERSION < 0x050e00
1079                 qCWarning(LOG_KBIBTEX_IO) << "Error in parsing entry" << id << ", field name" << keyName << "near line" << state.lineNo  << "(" << state.prevLine << endl << state.currentLine << "): Assign symbol '=' expected after field name";
1080 #endif // QT_VERSION >= 0x050e00
1081                 /// Instead of an 'emit' ...
1082 #if QT_VERSION < QT_VERSION_CHECK(6, 5, 0)
1083                 QMetaObject::invokeMethod(parent, "message", Qt::DirectConnection, QGenericReturnArgument(), Q_ARG(FileImporter::MessageSeverity, MessageSeverity::Error), Q_ARG(QString, QString(QStringLiteral("Error in parsing entry '%1', field name '%2' near line %3: Assign symbol '=' expected after field name")).arg(id, keyName).arg(state.lineNo)));
1084 #else // QT_VERSION >= QT_VERSION_CHECK(6, 5, 0)
1085                 QMetaObject::invokeMethod(parent, "message", Qt::DirectConnection, QMetaMethodReturnArgument(), Q_ARG(FileImporter::MessageSeverity, MessageSeverity::Error), Q_ARG(QString, QString(QStringLiteral("Error in parsing entry '%1', field name '%2' near line %3: Assign symbol '=' expected after field name")).arg(id, keyName).arg(state.lineNo)));
1086 #endif
1087                 delete entry;
1088                 return nullptr;
1089             }
1090 
1091             Value value;
1092 
1093             /// check for duplicate fields
1094             if (entry->contains(keyName)) {
1095                 if (keyName.toLower() == Entry::ftKeywords || keyName.toLower() == Entry::ftUrl) {
1096                     /// Special handling of keywords and URLs: instead of using fallback names
1097                     /// like "keywords2", "keywords3", ..., append new keywords to
1098                     /// already existing keyword value
1099                     value = entry->value(keyName);
1100                 } else if (keysForPersonDetection.contains(keyName.toLower())) {
1101                     /// Special handling of authors and editors: instead of using fallback names
1102                     /// like "author2", "author3", ..., append new authors to
1103                     /// already existing author value
1104                     value = entry->value(keyName);
1105                 } else {
1106                     int i = 2;
1107                     QString appendix = QString::number(i);
1108                     while (entry->contains(keyName + appendix)) {
1109                         ++i;
1110                         appendix = QString::number(i);
1111                     }
1112 #if QT_VERSION >= 0x050e00
1113                     qCDebug(LOG_KBIBTEX_IO) << "Entry" << id << "already contains a key" << keyName << "near line" << state.lineNo << "(" << state.prevLine << Qt::endl << state.currentLine << "), using" << (keyName + appendix);
1114 #else // QT_VERSION < 0x050e00
1115                     qCDebug(LOG_KBIBTEX_IO) << "Entry" << id << "already contains a key" << keyName << "near line" << state.lineNo << "(" << state.prevLine << endl << state.currentLine << "), using" << (keyName + appendix);
1116 #endif // QT_VERSION >= 0x050e00
1117                     /// Instead of an 'emit' ...
1118 #if QT_VERSION < QT_VERSION_CHECK(6, 5, 0)
1119                     QMetaObject::invokeMethod(parent, "message", Qt::DirectConnection, QGenericReturnArgument(), Q_ARG(FileImporter::MessageSeverity, MessageSeverity::Warning), Q_ARG(QString, QString(QStringLiteral("Entry '%1' already contains a key '%2' near line %4, using '%3'")).arg(id, keyName, keyName + appendix).arg(state.lineNo)));
1120 #else // QT_VERSION >= QT_VERSION_CHECK(6, 5, 0)
1121                     QMetaObject::invokeMethod(parent, "message", Qt::DirectConnection, QMetaMethodReturnArgument(), Q_ARG(FileImporter::MessageSeverity, MessageSeverity::Warning), Q_ARG(QString, QString(QStringLiteral("Entry '%1' already contains a key '%2' near line %4, using '%3'")).arg(id, keyName, keyName + appendix).arg(state.lineNo)));
1122 #endif
1123                     keyName += appendix;
1124                 }
1125             }
1126 
1127             token = readValue(value, keyName, statistics, state);
1128             if (token != Token::BracketClose && token != Token::Comma) {
1129 #if QT_VERSION >= 0x050e00
1130                 qCWarning(LOG_KBIBTEX_IO) << "Failed to read value in entry" << id << ", field name" << keyName << "near line" << state.lineNo  << "(" << state.prevLine << Qt::endl << state.currentLine << ")";
1131 #else // QT_VERSION < 0x050e00
1132                 qCWarning(LOG_KBIBTEX_IO) << "Failed to read value in entry" << id << ", field name" << keyName << "near line" << state.lineNo  << "(" << state.prevLine << endl << state.currentLine << ")";
1133 #endif // QT_VERSION >= 0x050e00
1134                 /// Instead of an 'emit' ...
1135 #if QT_VERSION < QT_VERSION_CHECK(6, 5, 0)
1136                 QMetaObject::invokeMethod(parent, "message", Qt::DirectConnection, QGenericReturnArgument(), Q_ARG(FileImporter::MessageSeverity, MessageSeverity::Error), Q_ARG(QString, QString(QStringLiteral("Failed to read value in entry '%1', field name '%2' near line %3")).arg(id, keyName).arg(state.lineNo)));
1137 #else // QT_VERSION >= QT_VERSION_CHECK(6, 5, 0)
1138                 QMetaObject::invokeMethod(parent, "message", Qt::DirectConnection, QMetaMethodReturnArgument(), Q_ARG(FileImporter::MessageSeverity, MessageSeverity::Error), Q_ARG(QString, QString(QStringLiteral("Failed to read value in entry '%1', field name '%2' near line %3")).arg(id, keyName).arg(state.lineNo)));
1139 #endif
1140                 delete entry;
1141                 return nullptr;
1142             }
1143 
1144             entry->insert(keyName, value);
1145         } while (true);
1146 
1147         return entry;
1148     }
1149 
1150     Element *nextElement(Statistics &statistics, State &state)
1151     {
1152         Token token = nextToken(state);
1153 
1154         if (token == Token::At) {
1155             const QString elementType = readSimpleString(state);
1156             const QString elementTypeLower = elementType.toLower();
1157 
1158             if (elementTypeLower == QStringLiteral("comment")) {
1159                 Comment *comment {readCommentElement(state)};
1160                 if (comment != nullptr)
1161                     statistics.countCommentContext.insert(QStringLiteral("@"), statistics.countCommentContext.value(QStringLiteral("@"), 0) + 1);
1162                 return comment;
1163             } else if (elementTypeLower == QStringLiteral("string"))
1164                 return readMacroElement(statistics, state);
1165             else if (elementTypeLower == QStringLiteral("preamble"))
1166                 return readPreambleElement(statistics, state);
1167             else if (elementTypeLower == QStringLiteral("import")) {
1168                 qCDebug(LOG_KBIBTEX_IO) << "Skipping potential HTML/JavaScript @import statement near line" << state.lineNo;
1169                 /// Instead of an 'emit' ...
1170 #if QT_VERSION < QT_VERSION_CHECK(6, 5, 0)
1171                 QMetaObject::invokeMethod(parent, "message", Qt::DirectConnection, QGenericReturnArgument(), Q_ARG(FileImporter::MessageSeverity, MessageSeverity::Info), Q_ARG(QString, QString(QStringLiteral("Skipping potential HTML/JavaScript @import statement near line %1")).arg(state.lineNo)));
1172 #else // QT_VERSION >= QT_VERSION_CHECK(6, 5, 0)
1173                 QMetaObject::invokeMethod(parent, "message", Qt::DirectConnection, QMetaMethodReturnArgument(), Q_ARG(FileImporter::MessageSeverity, MessageSeverity::Info), Q_ARG(QString, QString(QStringLiteral("Skipping potential HTML/JavaScript @import statement near line %1")).arg(state.lineNo)));
1174 #endif
1175                 return nullptr;
1176             } else if (!elementType.isEmpty())
1177                 return readEntryElement(elementType, statistics, state);
1178             else {
1179                 qCWarning(LOG_KBIBTEX_IO) << "Element type after '@' is empty or invalid near line" << state.lineNo;
1180                 /// Instead of an 'emit' ...
1181 #if QT_VERSION < QT_VERSION_CHECK(6, 5, 0)
1182                 QMetaObject::invokeMethod(parent, "message", Qt::DirectConnection, QGenericReturnArgument(), Q_ARG(FileImporter::MessageSeverity, MessageSeverity::Error), Q_ARG(QString, QString(QStringLiteral("Element type after '@' is empty or invalid near line %1")).arg(state.lineNo)));
1183 #else // QT_VERSION >= QT_VERSION_CHECK(6, 5, 0)
1184                 QMetaObject::invokeMethod(parent, "message", Qt::DirectConnection, QMetaMethodReturnArgument(), Q_ARG(FileImporter::MessageSeverity, MessageSeverity::Error), Q_ARG(QString, QString(QStringLiteral("Element type after '@' is empty or invalid near line %1")).arg(state.lineNo)));
1185 #endif
1186                 return nullptr;
1187             }
1188         } else if (token == Token::Unknown && state.nextChar == QLatin1Char('%')) {
1189             // Do not complain about LaTeX-like comments, just eat them
1190             Comment *comment {readPlainCommentElement(QString(state.nextChar), state)};
1191             if (comment != nullptr)
1192                 statistics.countCommentContext.insert(comment->prefix(), statistics.countCommentContext.value(comment->prefix(), 0) + 1);
1193             return comment;
1194         } else if (token == Token::Unknown) {
1195             if (state.nextChar.isLetter()) {
1196 #if QT_VERSION >= 0x050e00
1197                 qCDebug(LOG_KBIBTEX_IO) << "Unknown character" << state.nextChar << "near line" << state.lineNo << "(" << state.prevLine << Qt::endl << state.currentLine << ")" << ", treating as comment";
1198 #else // QT_VERSION < 0x050e00
1199                 qCDebug(LOG_KBIBTEX_IO) << "Unknown character" << state.nextChar << "near line" << state.lineNo << "(" << state.prevLine << endl << state.currentLine << ")" << ", treating as comment";
1200 #endif // QT_VERSION >= 0x050e00
1201                 /// Instead of an 'emit' ...
1202 #if QT_VERSION < QT_VERSION_CHECK(6, 5, 0)
1203                 QMetaObject::invokeMethod(parent, "message", Qt::DirectConnection, QGenericReturnArgument(), Q_ARG(FileImporter::MessageSeverity, MessageSeverity::Info), Q_ARG(QString, QString(QStringLiteral("Unknown character '%1' near line %2, treating as comment")).arg(state.nextChar).arg(state.lineNo)));
1204 #else // QT_VERSION >= QT_VERSION_CHECK(6, 5, 0)
1205                 QMetaObject::invokeMethod(parent, "message", Qt::DirectConnection, QMetaMethodReturnArgument(), Q_ARG(FileImporter::MessageSeverity, MessageSeverity::Info), Q_ARG(QString, QString(QStringLiteral("Unknown character '%1' near line %2, treating as comment")).arg(state.nextChar).arg(state.lineNo)));
1206 #endif
1207             } else if (state.nextChar.isPrint()) {
1208 #if QT_VERSION >= 0x050e00
1209                 qCDebug(LOG_KBIBTEX_IO) << "Unknown character" << state.nextChar << "(" << QString(QStringLiteral("0x%1")).arg(state.nextChar.unicode(), 4, 16, QLatin1Char('0')) << ") near line" << state.lineNo << "(" << state.prevLine << Qt::endl << state.currentLine << ")" << ", treating as comment";
1210 #else // QT_VERSION < 0x050e00
1211                 qCDebug(LOG_KBIBTEX_IO) << "Unknown character" << state.nextChar << "(" << QString(QStringLiteral("0x%1")).arg(state.nextChar.unicode(), 4, 16, QLatin1Char('0')) << ") near line" << state.lineNo << "(" << state.prevLine << endl << state.currentLine << ")" << ", treating as comment";
1212 #endif // QT_VERSION >= 0x050e00
1213                 /// Instead of an 'emit' ...
1214 #if QT_VERSION < QT_VERSION_CHECK(6, 5, 0)
1215                 QMetaObject::invokeMethod(parent, "message", Qt::DirectConnection, QGenericReturnArgument(), Q_ARG(FileImporter::MessageSeverity, MessageSeverity::Info), Q_ARG(QString, QString(QStringLiteral("Unknown character '%1' (0x%2) near line %3, treating as comment")).arg(state.nextChar).arg(state.nextChar.unicode(), 4, 16, QLatin1Char('0')).arg(state.lineNo)));
1216 #else // QT_VERSION >= QT_VERSION_CHECK(6, 5, 0)
1217                 QMetaObject::invokeMethod(parent, "message", Qt::DirectConnection, QMetaMethodReturnArgument(), Q_ARG(FileImporter::MessageSeverity, MessageSeverity::Info), Q_ARG(QString, QString(QStringLiteral("Unknown character '%1' (0x%2) near line %3, treating as comment")).arg(state.nextChar).arg(state.nextChar.unicode(), 4, 16, QLatin1Char('0')).arg(state.lineNo)));
1218 #endif
1219             } else {
1220 #if QT_VERSION >= 0x050e00
1221                 qCDebug(LOG_KBIBTEX_IO) << "Unknown character" << QString(QStringLiteral("0x%1")).arg(state.nextChar.unicode(), 4, 16, QLatin1Char('0')) << "near line" << state.lineNo << "(" << state.prevLine << Qt::endl << state.currentLine << ")" << ", treating as comment";
1222 #else // QT_VERSION < 0x050e00
1223                 qCDebug(LOG_KBIBTEX_IO) << "Unknown character" << QString(QStringLiteral("0x%1")).arg(state.nextChar.unicode(), 4, 16, QLatin1Char('0')) << "near line" << state.lineNo << "(" << state.prevLine << endl << state.currentLine << ")" << ", treating as comment";
1224 #endif // QT_VERSION >= 0x050e00
1225                 /// Instead of an 'emit' ...
1226 #if QT_VERSION < QT_VERSION_CHECK(6, 5, 0)
1227                 QMetaObject::invokeMethod(parent, "message", Qt::DirectConnection, QGenericReturnArgument(), Q_ARG(FileImporter::MessageSeverity, MessageSeverity::Info), Q_ARG(QString, QString(QStringLiteral("Unknown character 0x%1 near line %2, treating as comment")).arg(state.nextChar.unicode(), 4, 16, QLatin1Char('0')).arg(state.lineNo)));
1228 #else // QT_VERSION >= QT_VERSION_CHECK(6, 5, 0)
1229                 QMetaObject::invokeMethod(parent, "message", Qt::DirectConnection, QMetaMethodReturnArgument(), Q_ARG(FileImporter::MessageSeverity, MessageSeverity::Info), Q_ARG(QString, QString(QStringLiteral("Unknown character 0x%1 near line %2, treating as comment")).arg(state.nextChar.unicode(), 4, 16, QLatin1Char('0')).arg(state.lineNo)));
1230 #endif
1231             }
1232 
1233             Comment *comment {readPlainCommentElement(QString(state.prevChar) + state.nextChar, state)};
1234             if (comment != nullptr)
1235                 statistics.countCommentContext.insert(QString(), statistics.countCommentContext.value(QString(), 0) + 1);
1236             return comment;
1237         }
1238 
1239         if (token != Token::EndOfFile) {
1240 #if QT_VERSION >= 0x050e00
1241             qCWarning(LOG_KBIBTEX_IO) << "Don't know how to parse next token of type" << tokenidToString(token) << "in line" << state.lineNo << "(" << state.prevLine << Qt::endl << state.currentLine << ")";
1242 #else // QT_VERSION < 0x050e00
1243             qCWarning(LOG_KBIBTEX_IO) << "Don't know how to parse next token of type" << tokenidToString(token) << "in line" << state.lineNo << "(" << state.prevLine << endl << state.currentLine << ")";
1244 #endif // QT_VERSION >= 0x050e00
1245             /// Instead of an 'emit' ...
1246 #if QT_VERSION < QT_VERSION_CHECK(6, 5, 0)
1247             QMetaObject::invokeMethod(parent, "message", Qt::DirectConnection, QGenericReturnArgument(), Q_ARG(FileImporter::MessageSeverity, MessageSeverity::Error), Q_ARG(QString, QString(QStringLiteral("Don't know how to parse next token of type %1 in line %2")).arg(tokenidToString(token)).arg(state.lineNo)));
1248 #else // QT_VERSION >= QT_VERSION_CHECK(6, 5, 0)
1249             QMetaObject::invokeMethod(parent, "message", Qt::DirectConnection, QMetaMethodReturnArgument(), Q_ARG(FileImporter::MessageSeverity, MessageSeverity::Error), Q_ARG(QString, QString(QStringLiteral("Don't know how to parse next token of type %1 in line %2")).arg(tokenidToString(token)).arg(state.lineNo)));
1250 #endif
1251         }
1252 
1253         return nullptr;
1254     }
1255 
1256 
1257     static QSharedPointer<Person> personFromString(const QString &name, CommaContainment *comma, const int line_number, QObject *parent)
1258     {
1259         // TODO Merge with FileImporter::splitName and FileImporterBibTeX::contextSensitiveSplit
1260         static QStringList tokens;
1261         contextSensitiveSplit(name, tokens);
1262         return personFromTokenList(tokens, comma, line_number, parent);
1263     }
1264 
1265     static QSharedPointer<Person> personFromTokenList(const QStringList &tokens, CommaContainment *comma, const int line_number, QObject *parent)
1266     {
1267         if (comma != nullptr) *comma = CommaContainment::None;
1268 
1269         /// Simple case: provided list of tokens is empty, return invalid Person
1270         if (tokens.isEmpty())
1271             return QSharedPointer<Person>();
1272 
1273         /**
1274          * The sequence of tokens may contain in up to two of its elements one comma each:
1275          * {"Tuckwell,", "Peter,", "Jr."}. In this case, fill three string lists:
1276          * one with tokens before the first comma, one with tokens after the second commas,
1277          * and one with tokens after the second commas. If commas appear in the middle of a
1278          * token, split token into two new tokens and add them to two different string lists.
1279          * The comma itself will not be part of any string in the string lists.
1280          * Example:
1281          * partA = ( "Tuckwell" );  partB = ( "Peter" );  partC = ( "Jr." )
1282          * If a comma was found, boolean variable gotComma is set.
1283          */
1284         QStringList partA, partB, partC;
1285         int commaCount = 0;
1286         for (const QString &token : tokens) {
1287             /// Position where comma was found, or -1 if no comma in token
1288             int p = -1;
1289             if (commaCount < 2) {
1290                 /// Only check if token contains comma
1291                 /// if no comma was found before
1292                 int bracketCounter = 0;
1293                 for (int i = 0; i < token.length(); ++i) {
1294                     /// Consider opening curly brackets
1295                     if (token[i] == QLatin1Char('{')) ++bracketCounter;
1296                     /// Consider closing curly brackets
1297                     else if (token[i] == QLatin1Char('}')) --bracketCounter;
1298                     /// Only if outside any open curly bracket environments
1299                     /// consider comma characters
1300                     else if (bracketCounter == 0 && token[i] == QLatin1Char(',')) {
1301                         /// Memorize comma's position and break from loop
1302                         p = i;
1303                         break;
1304                     } else if (bracketCounter < 0) {
1305                         /// Should never happen: more closing brackets than opening ones
1306                         qCWarning(LOG_KBIBTEX_IO) << "Opening and closing brackets do not match near line" << line_number;
1307                         if (parent != nullptr)
1308 #if QT_VERSION < QT_VERSION_CHECK(6, 5, 0)
1309                             QMetaObject::invokeMethod(parent, "message", Qt::DirectConnection, QGenericReturnArgument(), Q_ARG(FileImporter::MessageSeverity, MessageSeverity::Warning), Q_ARG(QString, QString(QStringLiteral("Opening and closing brackets do not match near line %1")).arg(line_number)));
1310 #else // QT_VERSION >= QT_VERSION_CHECK(6, 5, 0)
1311                             QMetaObject::invokeMethod(parent, "message", Qt::DirectConnection, QMetaMethodReturnArgument(), Q_ARG(FileImporter::MessageSeverity, MessageSeverity::Warning), Q_ARG(QString, QString(QStringLiteral("Opening and closing brackets do not match near line %1")).arg(line_number)));
1312 #endif
1313                     }
1314                 }
1315             }
1316 
1317             if (p >= 0) {
1318                 if (commaCount == 0) {
1319                     if (p > 0) partA.append(token.left(p));
1320                     if (p < token.length() - 1) partB.append(token.mid(p + 1));
1321                 } else if (commaCount == 1) {
1322                     if (p > 0) partB.append(token.left(p));
1323                     if (p < token.length() - 1) partC.append(token.mid(p + 1));
1324                 }
1325                 ++commaCount;
1326             } else if (commaCount == 0)
1327                 partA.append(token);
1328             else if (commaCount == 1)
1329                 partB.append(token);
1330             else if (commaCount == 2)
1331                 partC.append(token);
1332         }
1333         if (commaCount > 0) {
1334             if (comma != nullptr) *comma = CommaContainment::Contains;
1335             return QSharedPointer<Person>(new Person(partC.isEmpty() ? partB.join(QLatin1Char(' ')) : partC.join(QLatin1Char(' ')), partA.join(QLatin1Char(' ')), partC.isEmpty() ? QString() : partB.join(QLatin1Char(' '))));
1336         }
1337 
1338         /**
1339          * PubMed uses a special writing style for names, where the
1340          * last name is followed by single capital letters, each being
1341          * the first letter of each first name. Example: Tuckwell P H
1342          * So, check how many single capital letters are at the end of
1343          * the given token list
1344          */
1345         partA.clear(); partB.clear();
1346         bool singleCapitalLetters = true;
1347         QStringList::ConstIterator it = tokens.constEnd();
1348         while (it != tokens.constBegin()) {
1349             --it;
1350             if (singleCapitalLetters && it->length() == 1 && it->at(0).isUpper())
1351                 partB.prepend(*it);
1352             else {
1353                 singleCapitalLetters = false;
1354                 partA.prepend(*it);
1355             }
1356         }
1357         if (!partB.isEmpty()) {
1358             /// Name was actually given in PubMed format
1359             return QSharedPointer<Person>(new Person(partB.join(QLatin1Char(' ')), partA.join(QLatin1Char(' '))));
1360         }
1361 
1362         /**
1363          * Normally, the last upper case token in a name is the last name
1364          * (last names consisting of multiple space-separated parts *have*
1365          * to be protected by {...}), but some languages have fill words
1366          * in lower case belonging to the last name as well (example: "van").
1367          * In addition, some languages have capital case letters as well
1368          * (example: "Di Cosmo").
1369          * Exception: Special keywords such as "Jr." can be appended to the
1370          * name, not counted as part of the last name.
1371          */
1372         partA.clear(); partB.clear(); partC.clear();
1373         static const QSet<QString> capitalCaseLastNameFragments {QStringLiteral("Di")};
1374         it = tokens.constEnd();
1375         while (it != tokens.constBegin()) {
1376             --it;
1377             if (partB.isEmpty() && (it->toLower().startsWith(QStringLiteral("jr")) || it->toLower().startsWith(QStringLiteral("sr")) || it->toLower().startsWith(QStringLiteral("iii"))))
1378                 /// handle name suffices like "Jr" or "III."
1379                 partC.prepend(*it);
1380             else if (partB.isEmpty() || it->at(0).isLower() || capitalCaseLastNameFragments.contains(*it))
1381                 partB.prepend(*it);
1382             else
1383                 partA.prepend(*it);
1384         }
1385         if (!partB.isEmpty()) {
1386             /// Name was actually like "Peter Ole van der Tuckwell",
1387             /// split into "Peter Ole" and "van der Tuckwell"
1388             return QSharedPointer<Person>(new Person(partA.join(QLatin1Char(' ')), partB.join(QLatin1Char(' ')), partC.isEmpty() ? QString() : partC.join(QLatin1Char(' '))));
1389         }
1390 
1391         qCWarning(LOG_KBIBTEX_IO) << "Don't know how to handle name" << tokens.join(QLatin1Char(' ')) << "near line" << line_number;
1392         if (parent != nullptr)
1393 #if QT_VERSION < QT_VERSION_CHECK(6, 5, 0)
1394             QMetaObject::invokeMethod(parent, "message", Qt::DirectConnection, QGenericReturnArgument(), Q_ARG(FileImporter::MessageSeverity, MessageSeverity::Warning), Q_ARG(QString, QString(QStringLiteral("Don't know how to handle name '%1' near line %2")).arg(tokens.join(QLatin1Char(' '))).arg(line_number)));
1395 #else // QT_VERSION >= QT_VERSION_CHECK(6, 5, 0)
1396             QMetaObject::invokeMethod(parent, "message", Qt::DirectConnection, QMetaMethodReturnArgument(), Q_ARG(FileImporter::MessageSeverity, MessageSeverity::Warning), Q_ARG(QString, QString(QStringLiteral("Don't know how to handle name '%1' near line %2")).arg(tokens.join(QLatin1Char(' '))).arg(line_number)));
1397 #endif
1398         return QSharedPointer<Person>();
1399     }
1400 
1401 };
1402 
1403 const QStringList FileImporterBibTeX::Private::keysForPersonDetection {Entry::ftAuthor, Entry::ftEditor, QStringLiteral("bookauthor") /** used by JSTOR */};
1404 
1405 
1406 FileImporterBibTeX::FileImporterBibTeX(QObject *parent)
1407         : FileImporter(parent), d(new Private(this)), m_cancelFlag(false)
1408 {
1409     /// nothing
1410 }
1411 
1412 FileImporterBibTeX::~FileImporterBibTeX()
1413 {
1414     delete d;
1415 }
1416 
1417 File *FileImporterBibTeX::fromString(const QString &rawText)
1418 {
1419     if (rawText.isEmpty()) {
1420         qCInfo(LOG_KBIBTEX_IO) << "BibTeX data converted to string is empty";
1421         Q_EMIT message(MessageSeverity::Warning, QStringLiteral("BibTeX data converted to string is empty"));
1422         return new File();
1423     }
1424 
1425     File *result = new File();
1426 
1427     /** Remove HTML code from the input source */
1428     // FIXME HTML data should be removed somewhere else? onlinesearch ...
1429     const int originalLength = rawText.length();
1430     QString internalRawText = rawText;
1431     internalRawText = internalRawText.remove(KBibTeX::htmlRegExp);
1432     const int afterHTMLremovalLength = internalRawText.length();
1433     if (originalLength != afterHTMLremovalLength) {
1434         qCInfo(LOG_KBIBTEX_IO) << (originalLength - afterHTMLremovalLength) << "characters of HTML tags have been removed";
1435         Q_EMIT message(MessageSeverity::Info, QString(QStringLiteral("%1 characters of HTML tags have been removed")).arg(originalLength - afterHTMLremovalLength));
1436     }
1437 
1438     Private::Statistics statistics;
1439     Private::State state(new QTextStream(&internalRawText, QIODevice::ReadOnly));
1440     d->readChar(state);
1441 
1442     bool gotAtLeastOneElement = false;
1443     QString previousEntryId;
1444     while (!state.nextChar.isNull() && !m_cancelFlag && !state.textStream->atEnd()) {
1445         Q_EMIT progress(qint64toint(state.textStream->pos()), internalRawText.length());
1446         Element *element = d->nextElement(statistics, state);
1447 
1448         if (element != nullptr) {
1449             gotAtLeastOneElement = true;
1450             if (d->commentHandling == CommentHandling::Keep || !Comment::isComment(*element)) {
1451                 result->append(QSharedPointer<Element>(element));
1452 
1453                 Entry *currentEntry = dynamic_cast<Entry *>(element);
1454                 if (currentEntry != nullptr) {
1455                     if (!previousEntryId.isEmpty()) {
1456                         if (currentEntry->id() >= previousEntryId)
1457                             ++statistics.countSortedByIdentifier;
1458                         else
1459                             ++statistics.countNotSortedByIdentifier;
1460                     }
1461                     previousEntryId = currentEntry->id();
1462                 }
1463             } else
1464                 delete element;
1465         }
1466     }
1467 
1468     if (!gotAtLeastOneElement) {
1469         qCWarning(LOG_KBIBTEX_IO) << "In non-empty input, did not find a single BibTeX element";
1470         Q_EMIT message(MessageSeverity::Error, QStringLiteral("In non-empty input, did not find a single BibTeX element"));
1471         delete result;
1472         result = nullptr;
1473     }
1474 
1475     Q_EMIT progress(100, 100);
1476 
1477     if (m_cancelFlag) {
1478         qCWarning(LOG_KBIBTEX_IO) << "Loading bibliography data has been canceled";
1479         Q_EMIT message(MessageSeverity::Error, QStringLiteral("Loading bibliography data has been canceled"));
1480         delete result;
1481         result = nullptr;
1482     }
1483 
1484     delete state.textStream;
1485 
1486     if (result != nullptr) {
1487         /// Set the file's preferences for string delimiters
1488         /// deduced from statistics built while parsing the file
1489         result->setProperty(File::StringDelimiter, statistics.countQuotationMarks > statistics.countCurlyBrackets ? QStringLiteral("\"\"") : QStringLiteral("{}"));
1490         /// Set the file's preferences for name formatting
1491         result->setProperty(File::NameFormatting, statistics.countFirstNameFirst > statistics.countLastNameFirst ? Preferences::personNameFormatFirstLast : Preferences::personNameFormatLastFirst);
1492         /// Set the file's preferences for title protected
1493         Qt::CheckState triState = (statistics.countProtectedTitle > statistics.countUnprotectedTitle * 4) ? Qt::Checked : ((statistics.countProtectedTitle * 4 < statistics.countUnprotectedTitle) ? Qt::Unchecked : Qt::PartiallyChecked);
1494         result->setProperty(File::ProtectCasing, static_cast<int>(triState));
1495         // Set the file's preferences for comment context
1496         QString commentContextMapKey;
1497         int commentContextMapValue = -1;
1498         for (QHash<QString, int>::ConstIterator it = statistics.countCommentContext.constBegin(); it != statistics.countCommentContext.constEnd(); ++it)
1499             if (it.value() > commentContextMapValue) {
1500                 commentContextMapKey = it.key();
1501                 commentContextMapValue = it.value();
1502             }
1503         if (commentContextMapValue < 0) {
1504             // No comments in BibTeX file? Use value from Preferences ...
1505             result->setProperty(File::CommentContext, static_cast<int>(Preferences::instance().bibTeXCommentContext()));
1506             result->setProperty(File::CommentPrefix, Preferences::instance().bibTeXCommentPrefix());
1507         } else if (commentContextMapKey == QStringLiteral("@")) {
1508             result->setProperty(File::CommentContext, static_cast<int>(Preferences::CommentContext::Command));
1509             result->setProperty(File::CommentPrefix, QString());
1510         } else if (commentContextMapKey.isEmpty()) {
1511             result->setProperty(File::CommentContext, static_cast<int>(Preferences::CommentContext::Verbatim));
1512             result->setProperty(File::CommentPrefix, QString());
1513         } else {
1514             result->setProperty(File::CommentContext, static_cast<int>(Preferences::CommentContext::Prefix));
1515             result->setProperty(File::CommentPrefix, commentContextMapKey);
1516         }
1517         if (!statistics.mostRecentListSeparator.isEmpty())
1518             result->setProperty(File::ListSeparator, statistics.mostRecentListSeparator);
1519         /// Set the file's preference to have the entries sorted by identifier
1520         result->setProperty(File::SortedByIdentifier, statistics.countSortedByIdentifier >= statistics.countNotSortedByIdentifier * 10);
1521         // TODO gather more statistics for keyword casing etc.
1522     }
1523 
1524     return result;
1525 }
1526 
1527 File *FileImporterBibTeX::load(QIODevice *iodevice)
1528 {
1529     m_cancelFlag = false;
1530 
1531     check_if_iodevice_invalid(iodevice);
1532 
1533     QByteArray rawData = iodevice->readAll();
1534     iodevice->close();
1535 
1536     bool encodingMayGetDeterminedByRawData = true;
1537     QString encoding(Preferences::instance().bibTeXEncoding()); ///< default value taken from Preferences
1538     if (rawData.length() >= 8 && rawData.at(0) != 0 && rawData.at(1) == 0 && rawData.at(2) == 0 && rawData.at(3) == 0 && rawData.at(4) != 0 && rawData.at(5) == 0 && rawData.at(6) == 0 && rawData.at(7) == 0) {
1539         /// UTF-32LE (Little Endian)
1540         encoding = QStringLiteral("UTF-32LE");
1541         encodingMayGetDeterminedByRawData = false;
1542     } else if (rawData.length() >= 4 && static_cast<unsigned char>(rawData.at(0)) == 0xff && static_cast<unsigned char>(rawData.at(1)) == 0xfe && rawData.at(2) == 0 && rawData.at(3) == 0) {
1543         /// UTF-32LE (Little Endian) with BOM
1544         encoding = QStringLiteral("UTF-32LE");
1545         rawData = rawData.mid(4); ///< skip BOM
1546         encodingMayGetDeterminedByRawData = false;
1547     } else if (rawData.length() >= 8 && rawData.at(0) == 0 && rawData.at(1) == 0 && rawData.at(2) == 0 && rawData.at(3) != 0 && rawData.at(4) == 0 && rawData.at(5) == 0 && rawData.at(6) == 0 && rawData.at(7) != 0) {
1548         /// UTF-32BE (Big Endian)
1549         encoding = QStringLiteral("UTF-32BE");
1550         encodingMayGetDeterminedByRawData = false;
1551     } else if (rawData.length() >= 4 && static_cast<unsigned char>(rawData.at(0)) == 0 && static_cast<unsigned char>(rawData.at(1)) == 0 && static_cast<unsigned char>(rawData.at(2)) == 0xfe && static_cast<unsigned char>(rawData.at(3)) == 0xff) {
1552         /// UTF-32BE (Big Endian) with BOM
1553         encoding = QStringLiteral("UTF-32BE");
1554         rawData = rawData.mid(4); ///< skip BOM
1555         encodingMayGetDeterminedByRawData = false;
1556     } else if (rawData.length() >= 6 && rawData.at(0) != 0 && rawData.at(1) == 0 && rawData.at(2) != 0 && rawData.at(3) == 0 && rawData.at(4) != 0 && rawData.at(5) == 0) {
1557         /// UTF-16LE (Little Endian)
1558         encoding = QStringLiteral("UTF-16LE");
1559         encodingMayGetDeterminedByRawData = false;
1560     } else if (rawData.length() >= 4 && static_cast<unsigned char>(rawData.at(0)) == 0xff && static_cast<unsigned char>(rawData.at(1)) == 0xfe && rawData.at(2) != 0 && rawData.at(3) == 0) {
1561         /// UTF-16LE (Little Endian) with BOM
1562         encoding = QStringLiteral("UTF-16LE");
1563         rawData = rawData.mid(2); ///< skip BOM
1564         encodingMayGetDeterminedByRawData = false;
1565     } else if (rawData.length() >= 6 && rawData.at(0) == 0 && rawData.at(1) != 0 && rawData.at(2) == 0 && rawData.at(3) != 0 && rawData.at(4) == 0 && rawData.at(5) != 0) {
1566         /// UTF-16BE (Big Endian)
1567         encoding = QStringLiteral("UTF-16BE");
1568         encodingMayGetDeterminedByRawData = false;
1569     } else if (rawData.length() >= 4 && static_cast<unsigned char>(rawData.at(0)) == 0xfe && static_cast<unsigned char>(rawData.at(1)) == 0xff && rawData.at(2) == 0 && rawData.at(3) != 0) {
1570         /// UTF-16BE (Big Endian) with BOM
1571         encoding = QStringLiteral("UTF-16BE");
1572         rawData = rawData.mid(2); ///< skip BOM
1573         encodingMayGetDeterminedByRawData = false;
1574     } else if (rawData.length() >= 3 && static_cast<unsigned char>(rawData.at(0)) == 0xef && static_cast<unsigned char>(rawData.at(1)) == 0xbb && static_cast<unsigned char>(rawData.at(2)) == 0xbf) {
1575         /// UTF-8 BOM
1576         encoding = QStringLiteral("UTF-8");
1577         rawData = rawData.mid(3); ///< skip BOM
1578         encodingMayGetDeterminedByRawData = false;
1579     } else {
1580         /// Assuming that encoding is ASCII-compatible, thus it is possible
1581         /// to search for a byte sequence containin ASCII text
1582         const QByteArray rawDataBeginning = rawData.left(8192);
1583         const int xkbibtexencodingpos = qMax(rawDataBeginning.indexOf("@comment{x-kbibtex-encoding="), rawDataBeginning.indexOf("@Comment{x-kbibtex-encoding="));
1584         if (xkbibtexencodingpos >= 0) {
1585             int i = xkbibtexencodingpos + 28, l = 0;
1586             encoding.clear();
1587             encoding.reserve(32);
1588             while (l < 32 && rawData.at(i) >= 0x20 && rawData.at(i) != QLatin1Char('\n') && rawData.at(i) != QLatin1Char('\r') && rawData.at(i) != QLatin1Char('}') && rawData.at(i) != QLatin1Char(')') && static_cast<unsigned char>(rawData.at(i)) < 0x80) {
1589                 encoding.append(QLatin1Char(rawData.at(i)));
1590                 ++i;
1591                 ++l;
1592             }
1593             rawData = rawData.left(xkbibtexencodingpos) + rawData.mid(i + 1); ///< remove encoding comment
1594             encodingMayGetDeterminedByRawData = encoding.isEmpty();
1595         } else {
1596             const int jabrefencodingpos = qMax(rawDataBeginning.indexOf("% Encoding:"), rawDataBeginning.indexOf("% encoding:"));
1597             if (jabrefencodingpos >= 0) {
1598                 int i = jabrefencodingpos + 11, l = 0;
1599                 encoding.clear();
1600                 encoding.reserve(32);
1601                 while (l < 32 && rawData.at(i) >= 0x20 && rawData.at(i) != QLatin1Char('\n') && rawData.at(i) != QLatin1Char('\r') && rawData.at(i) != QLatin1Char('}') && rawData.at(i) != QLatin1Char(')') && static_cast<unsigned char>(rawData.at(i)) < 0x80) {
1602                     encoding.append(QLatin1Char(rawData.at(i)));
1603                     ++i;
1604                     ++l;
1605                 }
1606                 encoding = encoding.trimmed();
1607                 rawData = rawData.left(jabrefencodingpos) + rawData.mid(i + 1); ///< remove encoding comment
1608                 encodingMayGetDeterminedByRawData = encoding.isEmpty();
1609             } else {
1610                 bool prevByteHadMSBset = false;
1611                 bool prevPrevByteHadMSBset = false;
1612                 for (const char &c : rawDataBeginning) {
1613                     const bool hasMSBset{static_cast<unsigned char>(c) >= 128};
1614                     if (!prevPrevByteHadMSBset && prevByteHadMSBset && !hasMSBset) {
1615                         // There was a single byte which had its most-significant bit (MSB) set,
1616                         // surrounded by pure-ASCII bytes. As at least in UTF-8 no single bytes
1617                         // with MSB set exist, guess that the data is ISO-8859-15, which seems
1618                         // to be the most popular non-ASCII and non-Unicode encoding
1619                         encoding = QStringLiteral("ISO-8859-15");
1620                         encodingMayGetDeterminedByRawData = false;
1621                         break;
1622                     }
1623                     prevPrevByteHadMSBset = prevByteHadMSBset;
1624                     prevByteHadMSBset = hasMSBset;
1625                 }
1626             }
1627         }
1628     }
1629 
1630     if (encoding.isEmpty()) {
1631         encoding = Preferences::instance().bibTeXEncoding(); ///< just in case something went wrong
1632         encodingMayGetDeterminedByRawData = true;
1633     }
1634 
1635     if (encodingMayGetDeterminedByRawData) {
1636         // Unclear which encoding raw data makes use of, so test for
1637         // two popular choices: (1) only ASCII (means 'LaTeX' encoding)
1638         // and (2) UTF-8
1639         bool hasUTF8 = false;
1640         bool outsideUTF8 = false;
1641         const int len = qMin(2048, rawData.length() - 3);
1642         for (int i = 0; i < len; ++i) {
1643             const char c1 = rawData.at(i);
1644             if ((c1 & 0x80) == 0) {
1645                 // This character is probably ASCII, so ignore it
1646             } else {
1647                 const char c2 = rawData.at(i + 1);
1648                 if ((c1 & 0xe0) == 0xc0 && (c2 & 0xc0) == 0x80) {
1649                     // This is a two-byte UTF-8 symbol
1650                     hasUTF8 = true;
1651                     ++i;
1652                 } else {
1653                     const char c3 = rawData.at(i + 2);
1654                     if ((c1 & 0xf0) == 0xe0 && (c2 & 0xc0) == 0x80 && (c3 & 0xc0) == 0x80) {
1655                         // This is a three-byte UTF-8 symbol
1656                         hasUTF8 = true;
1657                         i += 2;
1658                     } else {
1659                         const char c4 = rawData.at(i + 3);
1660                         if ((c1 & 0xf8) == 0xf0 && (c2 & 0xc0) == 0x80 && (c3 & 0xc0) == 0x80 && (c4 & 0xc0) == 0x80) {
1661                             // This is a four-byte UTF-8 symbol
1662                             hasUTF8 = true;
1663                             i += 3;
1664                         } else {
1665                             outsideUTF8 = true;
1666                             break; //< No point in further testing more raw data
1667                         }
1668                     }
1669                 }
1670             }
1671         }
1672 
1673         if (!outsideUTF8) {
1674             encoding = hasUTF8 ? QStringLiteral("UTF-8") : QStringLiteral("LaTeX");
1675             encodingMayGetDeterminedByRawData = false; //< Now the encoding is known
1676         }
1677     }
1678 
1679     encoding = encoding.toLower();
1680     if (encoding == QStringLiteral("us-ascii")) {
1681         qCDebug(LOG_KBIBTEX_IO) << "Replacing deprecated encoding 'US-ASCII' with 'LaTeX'";
1682         encoding = QStringLiteral("latex"); //< encoding 'US-ASCII' is deprecated in favour of 'LaTeX'
1683     }
1684     // For encoding 'LaTeX', fall back to encoding 'UTF-8' when creating
1685     // a QTextCodec instance, but keep 'LaTeX' as the bibliography's 'actual' encoding (used as its encoding property)
1686     QTextCodec *codec = QTextCodec::codecForName(encoding == QStringLiteral("latex") ? "utf-8" : encoding.toLatin1());
1687     if (codec == nullptr) {
1688         qCWarning(LOG_KBIBTEX_IO) << "Could not determine codec for encoding" << encoding;
1689         Q_EMIT message(MessageSeverity::Warning, QString(QStringLiteral("Could not determine codec for encoding '%1'")).arg(encoding));
1690         return nullptr;
1691     }
1692     QString rawText = codec->toUnicode(rawData);
1693 
1694     /// Remove deprecated 'x-kbibtex-personnameformatting' from BibTeX raw text
1695     const int posPersonNameFormatting = rawText.indexOf(QStringLiteral("@comment{x-kbibtex-personnameformatting="));
1696     if (posPersonNameFormatting >= 0) {
1697         const int endOfPersonNameFormatting = rawText.indexOf(QLatin1Char('}'), posPersonNameFormatting + 39);
1698         if (endOfPersonNameFormatting > 0)
1699             rawText = rawText.left(posPersonNameFormatting) + rawText.mid(endOfPersonNameFormatting + 1);
1700     }
1701 
1702     File *result = fromString(rawText);
1703     /// In the File object's property, store the encoding used to load the data
1704     result->setProperty(File::Encoding, encoding);
1705 
1706     return result;
1707 }
1708 
1709 bool FileImporterBibTeX::guessCanDecode(const QString &rawText)
1710 {
1711     static const QRegularExpression bibtexLikeText(QStringLiteral("@\\w+\\{.+\\}"));
1712     QString text = EncoderLaTeX::instance().decode(rawText);
1713     return bibtexLikeText.match(text).hasMatch();
1714 }
1715 
1716 void FileImporterBibTeX::cancel()
1717 {
1718     m_cancelFlag = true;
1719 }
1720 
1721 QList<QSharedPointer<Keyword> > FileImporterBibTeX::splitKeywords(const QString &text, char *usedSplitChar)
1722 {
1723     QList<QSharedPointer<Keyword> > result;
1724     static const QHash<char, QRegularExpression> splitAlong = {
1725         {'\n', QRegularExpression(QStringLiteral("\\s*\n\\s*"))},
1726         {';', QRegularExpression(QStringLiteral("\\s*;\\s*"))},
1727         {',', QRegularExpression(QStringLiteral("\\s*,\\s*"))}
1728     };
1729     if (usedSplitChar != nullptr)
1730         *usedSplitChar = '\0';
1731 
1732     for (auto it = splitAlong.constBegin(); it != splitAlong.constEnd(); ++it) {
1733         /// check if character is contained in text (should be cheap to test)
1734         if (text.contains(QLatin1Char(it.key()))) {
1735             /// split text along a pattern like spaces-splitchar-spaces
1736             /// extract keywords
1737             static const QRegularExpression unneccessarySpacing(QStringLiteral("[ \n\r\t]+"));
1738 #if QT_VERSION >= 0x050e00
1739             const QStringList keywords = text.split(it.value(), Qt::SkipEmptyParts).replaceInStrings(unneccessarySpacing, QStringLiteral(" "));
1740 #else // QT_VERSION < 0x050e00
1741             const QStringList keywords = text.split(it.value(), QString::SkipEmptyParts).replaceInStrings(unneccessarySpacing, QStringLiteral(" "));
1742 #endif // QT_VERSION >= 0x050e00
1743             /// build QList of Keyword objects from keywords
1744             for (const QString &keyword : keywords) {
1745                 result.append(QSharedPointer<Keyword>(new Keyword(keyword)));
1746             }
1747             /// Memorize (some) split characters for later use
1748             /// (e.g. when writing file again)
1749             if (usedSplitChar != nullptr)
1750                 *usedSplitChar = it.key();
1751             /// no more splits necessary
1752             break;
1753         }
1754     }
1755 
1756     /// no split was performed, so whole text must be a single keyword
1757     if (result.isEmpty())
1758         result.append(QSharedPointer<Keyword>(new Keyword(text)));
1759 
1760     return result;
1761 }
1762 
1763 QList<QSharedPointer<Person> > FileImporterBibTeX::splitNames(const QString &text)
1764 {
1765     /// Case: Smith, John and Johnson, Tim
1766     /// Case: Smith, John and Fulkerson, Ford and Johnson, Tim
1767     /// Case: Smith, John, Fulkerson, Ford, and Johnson, Tim
1768     /// Case: John Smith and Tim Johnson
1769     /// Case: John Smith and Ford Fulkerson and Tim Johnson
1770     /// Case: Smith, John, Johnson, Tim
1771     /// Case: Smith, John, Fulkerson, Ford, Johnson, Tim
1772     /// Case: John Smith, Tim Johnson
1773     /// Case: John Smith, Tim Johnson, Ford Fulkerson
1774     /// Case: Smith, John ;  Johnson, Tim ;  Fulkerson, Ford (IEEE Xplore)
1775     /// German case: Robert A. Gehring und Bernd Lutterbeck
1776 
1777     QString internalText = text;
1778 
1779     /// Remove invalid characters such as dots or (double) daggers for footnotes
1780     static const QList<QChar> invalidChars {QChar(0x00b7), QChar(0x2020), QChar(0x2217), QChar(0x2021), QChar(0x002a), QChar(0x21d1) /** Upwards double arrow */};
1781     for (const auto &invalidChar : invalidChars)
1782         /// Replacing daggers with commas ensures that they act as persons' names separator
1783         internalText = internalText.replace(invalidChar, QLatin1Char(','));
1784     /// Remove numbers to footnotes
1785     static const QRegularExpression numberFootnoteRegExp(QStringLiteral("(\\w)\\d+\\b"));
1786     internalText = internalText.replace(numberFootnoteRegExp, QStringLiteral("\\1"));
1787     /// Remove academic degrees
1788     static const QRegularExpression academicDegreesRegExp(QStringLiteral("(,\\s*)?(MA|PhD)\\b"));
1789     internalText = internalText.remove(academicDegreesRegExp);
1790     /// Remove email addresses
1791     static const QRegularExpression emailAddressRegExp(QStringLiteral("\\b[a-zA-Z0-9][a-zA-Z0-9._-]+[a-zA-Z0-9]@[a-z0-9][a-z0-9-]*([.][a-z0-9-]+)*([.][a-z]+)+\\b"));
1792     internalText = internalText.remove(emailAddressRegExp);
1793 
1794     /// Split input string into tokens which are either name components (first or last name)
1795     /// or full names (composed of first and last name), depending on the input string's structure
1796     static const QRegularExpression split(QStringLiteral("\\s*([,]+|[,]*\\b[au]nd\\b|[;]|&|\u00b7|\u2022|\\n|\\s{4,})\\s*"));
1797 #if QT_VERSION >= 0x050e00
1798     const QStringList authorTokenList = internalText.split(split, Qt::SkipEmptyParts);
1799 #else // QT_VERSION < 0x050e00
1800     const QStringList authorTokenList = internalText.split(split, QString::SkipEmptyParts);
1801 #endif // QT_VERSION >= 0x050e00
1802 
1803     bool containsSpace = true;
1804     for (QStringList::ConstIterator it = authorTokenList.constBegin(); containsSpace && it != authorTokenList.constEnd(); ++it)
1805         containsSpace = (*it).contains(QLatin1Char(' '));
1806 
1807     QList<QSharedPointer<Person> > result;
1808     result.reserve(authorTokenList.size());
1809     if (containsSpace) {
1810         /// Tokens look like "John Smith"
1811         for (const QString &authorToken : authorTokenList) {
1812             QSharedPointer<Person> person = Private::personFromString(authorToken, nullptr, 1, nullptr);
1813             if (!person.isNull())
1814                 result.append(person);
1815         }
1816     } else {
1817         /// Tokens look like "Smith" or "John"
1818         /// Assumption: two consecutive tokens form a name
1819         for (QStringList::ConstIterator it = authorTokenList.constBegin(); it != authorTokenList.constEnd(); ++it) {
1820             QString lastname = *it;
1821             ++it;
1822             if (it != authorTokenList.constEnd()) {
1823                 lastname += QStringLiteral(", ") + (*it);
1824                 QSharedPointer<Person> person = Private::personFromString(lastname, nullptr, 1, nullptr);
1825                 if (!person.isNull())
1826                     result.append(person);
1827             } else
1828                 break;
1829         }
1830     }
1831 
1832     return result;
1833 }
1834 
1835 QSharedPointer<Person> FileImporterBibTeX::personFromString(const QString &name, const int line_number, QObject *parent)
1836 {
1837     // TODO Merge with FileImporter::splitName
1838     return Private::personFromString(name, nullptr, line_number, parent);
1839 }
1840 
1841 void FileImporterBibTeX::parsePersonList(const QString &text, Value &value, const int line_number, QObject *parent)
1842 {
1843     Private::parsePersonList(text, value, nullptr, line_number, parent);
1844 }
1845 
1846 
1847 void FileImporterBibTeX::contextSensitiveSplit(const QString &text, QStringList &segments)
1848 {
1849     // TODO Merge with FileImporter::splitName and FileImporterBibTeX::personFromString
1850     int bracketCounter = 0; ///< keep track of opening and closing brackets: {...}
1851     QString buffer;
1852     int len = text.length();
1853     segments.clear(); ///< empty list for results before proceeding
1854 
1855     for (int pos = 0; pos < len; ++pos) {
1856         if (text[pos] == QLatin1Char('{'))
1857             ++bracketCounter;
1858         else if (text[pos] == QLatin1Char('}'))
1859             --bracketCounter;
1860 
1861         if (text[pos].isSpace() && bracketCounter == 0) {
1862             if (!buffer.isEmpty()) {
1863                 segments.append(buffer);
1864                 buffer.clear();
1865             }
1866         } else
1867             buffer.append(text[pos]);
1868     }
1869 
1870     if (!buffer.isEmpty())
1871         segments.append(buffer);
1872 }
1873 
1874 QString FileImporterBibTeX::bibtexAwareSimplify(const QString &text)
1875 {
1876     QString result;
1877     int i = 0;
1878 
1879     /// Consume initial spaces ...
1880     while (i < text.length() && text[i].isSpace()) ++i;
1881     /// ... but if there have been spaces (i.e. i>0), then record a single space only
1882     if (i > 0)
1883         result.append(QStringLiteral(" "));
1884 
1885     while (i < text.length()) {
1886         /// Consume non-spaces
1887         while (i < text.length() && !text[i].isSpace()) {
1888             result.append(text[i]);
1889             ++i;
1890         }
1891 
1892         /// String may end with a non-space
1893         if (i >= text.length()) break;
1894 
1895         /// Consume spaces, ...
1896         while (i < text.length() && text[i].isSpace()) ++i;
1897         /// ... but record only a single space
1898         result.append(QStringLiteral(" "));
1899     }
1900 
1901     return result;
1902 }
1903 
1904 QString FileImporterBibTeX::rstrip(const QString &text)
1905 {
1906     for (int p = text.length() - 1; p >= 0; --p)
1907         if (!text.at(p).isSpace())
1908             return text.left(p + 1);
1909     return QString();
1910 }
1911 
1912 void FileImporterBibTeX::setCommentHandling(CommentHandling commentHandling) {
1913     d->commentHandling = commentHandling;
1914 }