File indexing completed on 2024-05-19 05:05:52

0001 /***************************************************************************
0002  *   SPDX-License-Identifier: GPL-2.0-or-later
0003  *                                                                         *
0004  *   SPDX-FileCopyrightText: 2004-2019 Thomas Fischer <fischer@unix-ag.uni-kl.de>
0005  *   SPDX-FileCopyrightText: 2014 Pino Toscano <pino@kde.org>
0006  *                                                                         *
0007  *   This program is free software; you can redistribute it and/or modify  *
0008  *   it under the terms of the GNU General Public License as published by  *
0009  *   the Free Software Foundation; either version 2 of the License, or     *
0010  *   (at your option) any later version.                                   *
0011  *                                                                         *
0012  *   This program is distributed in the hope that it will be useful,       *
0013  *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
0014  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
0015  *   GNU General Public License for more details.                          *
0016  *                                                                         *
0017  *   You should have received a copy of the GNU General Public License     *
0018  *   along with this program; if not, see <https://www.gnu.org/licenses/>. *
0019  ***************************************************************************/
0020 
0021 #include <QtTest>
0022 
0023 #include <QCryptographicHash>
0024 #include <QTemporaryFile>
0025 
0026 #ifdef WRITE_RAWDATAFILE
0027 #include <QFile>
0028 #endif // WRITE_RAWDATAFILE
0029 
0030 #include <File>
0031 #include <Entry>
0032 #include <FileImporterBibTeX>
0033 #include <FileExporterBibTeX>
0034 /// Provides definition of TESTSET_DIRECTORY
0035 #include "test-config.h"
0036 #ifndef WRITE_RAWDATAFILE
0037 #include "kbibtexfilestest-rawdata.h"
0038 #endif // WRITE_RAWDATAFILE
0039 #include "logging_test.h"
0040 
0041 typedef struct {
0042     QString filename;
0043 #ifndef WRITE_RAWDATAFILE
0044     int numElements, numEntries;
0045     QString lastEntryId, lastEntryLastAuthorLastName;
0046     QByteArray hashLastAuthors, hashFilesUrlsDoi;
0047 #endif // WRITE_RAWDATAFILE
0048 } TestFile;
0049 
0050 Q_DECLARE_METATYPE(TestFile)
0051 
0052 class KBibTeXFilesTest : public QObject
0053 {
0054     Q_OBJECT
0055 
0056 private Q_SLOTS:
0057     void initTestCase();
0058 #ifdef WRITE_RAWDATAFILE
0059     void cleanupTestCase();
0060 #endif // WRITE_RAWDATAFILE
0061     void testFiles_data();
0062     void testFiles();
0063 
0064 private:
0065     /**
0066      * Load a bibliography file and checks a number of known properties
0067      * such as number of elements/entries or the hash sum of authors' last names.
0068      * It is the caller's responsibility to pass a valid argument to @p outFile
0069      * and later delete the returned File object.
0070      *
0071      * @param absoluteFilename biblography file to laod
0072      * @param currentTestFile data structure holding the baseline values
0073      * @param outFile returns pointer to the opened file
0074      */
0075     void loadFile(const QString &absoluteFilename, const TestFile &currentTestFile, File **outFile);
0076 
0077     /**
0078      * Save a bibliography in a temporary file.
0079      * It is the caller's responsibility to pass a valid argument to @p outFile,
0080      * which will hold the temporary file's name upon successful return.
0081      *
0082      * @param file bibliography data structure to be saved
0083      * @param currentTestFile baseline data structure used to determine temporary file's name
0084      * @param outFile returns the temporary file's name
0085      */
0086     void saveFile(File *file, const TestFile &currentTestFile, QString *outFile);
0087 
0088     /**
0089      * Create and fill a TestFile data structure based on the provided values.
0090      *
0091      * @param filename Bibliography file's filename
0092      * @param numElements Number of elements to expect in bibliography
0093      * @param numEntries Number of entries to expect in bibliography
0094      * @param lastEntryId Identifier of last entry in bibliography
0095      * @param lastEntryLastAuthorLastName Last author's last name in bibliography
0096      * @param hashLastAuthors The hash sum over all authors/editors in bibliography
0097      * @param hashFilesUrlsDoi The hash sum over all URLs and DOIs in bibliography
0098      * @return An initialized TestFile data structure
0099      */
0100     TestFile createTestFile(const QString &filename
0101 #ifndef WRITE_RAWDATAFILE
0102                             , int numElements, int numEntries, const QString &lastEntryId, const QString &lastEntryLastAuthorLastName, const QByteArray &hashLastAuthors, const QByteArray &hashFilesUrlsDoi
0103 #endif // WRITE_RAWDATAFILE
0104                            );
0105 };
0106 
0107 
0108 void KBibTeXFilesTest::initTestCase()
0109 {
0110     qRegisterMetaType<TestFile>("TestFile");
0111 
0112 #ifdef WRITE_RAWDATAFILE
0113     QFile rawDataFile("kbibtexfilestest-rawdata.h");
0114     if (rawDataFile.open(QFile::WriteOnly)) {
0115         QTextStream ts(&rawDataFile);
0116 #if QT_VERSION >= 0x050e00
0117         ts << QStringLiteral("/********************************************************************************") << Qt::endl << Qt::endl;
0118         ts << QStringLiteral("SPDX-License-Identifier: BSD-2-Clause") << Qt::endl;
0119         ts << QStringLiteral("SPDX-FileCopyrightText: ") << QDate::currentDate().year() << QStringLiteral(" Thomas Fischer <fischer@unix-ag.uni-kl.de> and others") << Qt::endl << Qt::endl;
0120         ts << QStringLiteral("********************************************************************************/") << Qt::endl << Qt::endl;
0121         ts << QStringLiteral("#ifndef KBIBTEX_FILES_TEST_RAWDATA_H") << Qt::endl << QStringLiteral("#define KBIBTEX_FILES_TEST_RAWDATA_H") << Qt::endl << Qt::endl;
0122 #else // QT_VERSION < 0x050e00
0123         ts << QStringLiteral("/********************************************************************************") << endl << endl;
0124         ts << QStringLiteral("SPDX-License-Identifier: BSD-2-Clause") << endl;
0125         ts << QStringLiteral("SPDX-FileCopyrightText: ") << QDate::currentDate().year() << QStringLiteral(" Thomas Fischer <fischer@unix-ag.uni-kl.de> and others") << endl << endl;
0126         ts << QStringLiteral("********************************************************************************/") << endl << endl;
0127         ts << QStringLiteral("#ifndef KBIBTEX_FILES_TEST_RAWDATA_H") << endl << QStringLiteral("#define KBIBTEX_FILES_TEST_RAWDATA_H") << endl << endl;
0128 #endif // QT_VERSION >= 0x050e00
0129         rawDataFile.close();
0130     }
0131 #endif // WRITE_RAWDATAFILE
0132 }
0133 
0134 #ifdef WRITE_RAWDATAFILE
0135 void KBibTeXFilesTest::cleanupTestCase()
0136 {
0137     QFile rawDataFile("kbibtexfilestest-rawdata.h");
0138     if (rawDataFile.open(QFile::Append)) {
0139         QTextStream ts(&rawDataFile);
0140 #if QT_VERSION >= 0x050e00
0141         ts << Qt::endl << QStringLiteral("#endif // KBIBTEX_FILES_TEST_RAWDATA_H") << Qt::endl;
0142 #else // QT_VERSION < 0x050e00
0143         ts << endl << QStringLiteral("#endif // KBIBTEX_FILES_TEST_RAWDATA_H") << endl;
0144 #endif // QT_VERSION >= 0x050e00
0145         rawDataFile.close();
0146     }
0147 }
0148 #endif // WRITE_RAWDATAFILE
0149 
0150 void KBibTeXFilesTest::testFiles_data()
0151 {
0152     QTest::addColumn<TestFile>("testFile");
0153     QTest::newRow("bug19489.bib") << createTestFile(QStringLiteral("bib/bug19489.bib")
0154 #ifndef WRITE_RAWDATAFILE
0155                                   , bug19489NumElements, bug19489NumEntries, bug19489LastEntryId, bug19489LastAuthor, bug19489LastAuthors, bug19489FilesUrlsDoi
0156 #endif // WRITE_RAWDATAFILE
0157                                                    );
0158     QTest::newRow("names-with-braces.bib") << createTestFile(QStringLiteral("bib/names-with-braces.bib")
0159 #ifndef WRITE_RAWDATAFILE
0160                                            , nameswithbracesNumElements, nameswithbracesNumEntries, nameswithbracesLastEntryId, nameswithbracesLastAuthor, nameswithbracesLastAuthors, nameswithbracesFilesUrlsDoi
0161 #endif // WRITE_RAWDATAFILE
0162                                                             );
0163     QTest::newRow("duplicates.bib") << createTestFile(QStringLiteral("bib/duplicates.bib")
0164 #ifndef WRITE_RAWDATAFILE
0165                                     , duplicatesNumElements,  duplicatesNumEntries, duplicatesLastEntryId, duplicatesLastAuthor, duplicatesLastAuthors, duplicatesFilesUrlsDoi
0166 #endif // WRITE_RAWDATAFILE
0167                                                      );
0168     QTest::newRow("minix.bib") << createTestFile(QStringLiteral("bib/minix.bib")
0169 #ifndef WRITE_RAWDATAFILE
0170                                , minixNumElements, minixNumEntries, minixLastEntryId, minixLastAuthor, minixLastAuthors, minixFilesUrlsDoi
0171 #endif // WRITE_RAWDATAFILE
0172                                                 );
0173     QTest::newRow("bug19484-refs.bib") << createTestFile(QStringLiteral("bib/bug19484-refs.bib")
0174 #ifndef WRITE_RAWDATAFILE
0175                                        , bug19484refsNumElements, bug19484refsNumEntries, bug19484refsLastEntryId, bug19484refsLastAuthor, bug19484refsLastAuthors, bug19484refsFilesUrlsDoi
0176 #endif // WRITE_RAWDATAFILE
0177                                                         );
0178     QTest::newRow("bug19362-file15701-database.bib") << createTestFile(QStringLiteral("bib/bug19362-file15701-database.bib")
0179 #ifndef WRITE_RAWDATAFILE
0180             , bug19362file15701databaseNumElements, bug19362file15701databaseNumEntries, bug19362file15701databaseLastEntryId, bug19362file15701databaseLastAuthor, bug19362file15701databaseLastAuthors, bug19362file15701databaseFilesUrlsDoi
0181 #endif // WRITE_RAWDATAFILE
0182                                                                       );
0183     QTest::newRow("digiplay.bib") << createTestFile(QStringLiteral("bib/digiplay.bib")
0184 #ifndef WRITE_RAWDATAFILE
0185                                   , digiplayNumElements, digiplayNumEntries, digiplayLastEntryId, digiplayLastAuthor, digiplayLastAuthors, digiplayFilesUrlsDoi
0186 #endif // WRITE_RAWDATAFILE
0187                                                    );
0188     QTest::newRow("backslash.bib") << createTestFile(QStringLiteral("bib/backslash.bib")
0189 #ifndef WRITE_RAWDATAFILE
0190                                    , backslashNumElements, backslashNumEntries, backslashLastEntryId, backslashLastAuthor, backslashLastAuthors, backslashFilesUrlsDoi
0191 #endif // WRITE_RAWDATAFILE
0192                                                     );
0193     QTest::newRow("bug379443-attachment105313-IOPEXPORT_BIB.bib") << createTestFile(QStringLiteral("bib/bug379443-attachment105313-IOPEXPORT_BIB.bib")
0194 #ifndef WRITE_RAWDATAFILE
0195             , bug379443attachment105313IOPEXPORTBIBNumElements, bug379443attachment105313IOPEXPORTBIBNumEntries, bug379443attachment105313IOPEXPORTBIBLastEntryId, bug379443attachment105313IOPEXPORTBIBLastAuthor, bug379443attachment105313IOPEXPORTBIBLastAuthors, bug379443attachment105313IOPEXPORTBIBFilesUrlsDoi
0196 #endif // WRITE_RAWDATAFILE
0197                                                                                    );
0198     QTest::newRow("bug21870-polito.bib") << createTestFile(QStringLiteral("bib/bug21870-polito.bib")
0199 #ifndef WRITE_RAWDATAFILE
0200                                          , bug21870politoNumElements, bug21870politoNumEntries, bug21870politoLastEntryId, bug21870politoLastAuthor, bug21870politoLastAuthors, bug21870politoFilesUrlsDoi
0201 #endif // WRITE_RAWDATAFILE
0202                                                           );
0203     QTest::newRow("cloud-duplicates.bib") << createTestFile(QStringLiteral("bib/cloud-duplicates.bib")
0204 #ifndef WRITE_RAWDATAFILE
0205                                           , cloudduplicatesNumElements, cloudduplicatesNumEntries, cloudduplicatesLastEntryId, cloudduplicatesLastAuthor, cloudduplicatesLastAuthors, cloudduplicatesFilesUrlsDoi
0206 #endif // WRITE_RAWDATAFILE
0207                                                            );
0208 }
0209 
0210 void KBibTeXFilesTest::testFiles()
0211 {
0212     QFETCH(TestFile, testFile);
0213 
0214     const QString absoluteFilename = QLatin1String(TESTSET_DIRECTORY "/") + testFile.filename;
0215     QVERIFY(QFileInfo::exists(absoluteFilename));
0216 
0217     /// First load the file ...
0218     File *file = nullptr;
0219     loadFile(absoluteFilename, testFile, &file);
0220     QVERIFY(file);
0221 
0222 #ifndef WRITE_RAWDATAFILE
0223     /// ... then save it again to file ...
0224     QString tempFileName;
0225     saveFile(file, testFile, &tempFileName);
0226     QVERIFY(!tempFileName.isEmpty());
0227 
0228     /// ... and finally try to load again the newly saved version
0229     File *file2 = nullptr;
0230     loadFile(tempFileName, testFile, &file2);
0231     QVERIFY(file2);
0232 
0233     QFile::remove(tempFileName);
0234 #endif // WRITE_RAWDATAFILE
0235 
0236     delete file;
0237 #ifndef WRITE_RAWDATAFILE
0238     delete file2;
0239 #endif // WRITE_RAWDATAFILE
0240 }
0241 
0242 void KBibTeXFilesTest::loadFile(const QString &absoluteFilename, const TestFile &currentTestFile, File **outFile)
0243 {
0244     *outFile = nullptr;
0245 
0246     FileImporterBibTeX *importer = nullptr;
0247     if (currentTestFile.filename.endsWith(QStringLiteral(".bib"))) {
0248         importer = new FileImporterBibTeX(this);
0249         importer->setCommentHandling(FileImporterBibTeX::CommentHandling::Keep);
0250     } else {
0251         QFAIL(qPrintable(QString::fromLatin1("Don't know format of '%1'").arg(currentTestFile.filename)));
0252     }
0253 
0254     QFile file(absoluteFilename);
0255     if (file.open(QFile::ReadOnly)) {
0256         const QByteArray fileData = file.readAll();
0257         file.close();
0258         const QByteArray hashData = QCryptographicHash::hash(fileData, QCryptographicHash::Md5);
0259         qCInfo(LOG_KBIBTEX_TEST) << "MD5 for file" << absoluteFilename << "is" << hashData.toHex();
0260     }
0261 
0262     File *bibTeXFile = nullptr;
0263     QVERIFY(file.open(QFile::ReadOnly));
0264     bibTeXFile = importer->load(&file);
0265     file.close();
0266 
0267     qCInfo(LOG_KBIBTEX_TEST) << (bibTeXFile == nullptr ? "bibTeXFile is NULL" : (bibTeXFile->isEmpty() ? "bibTeXFile is EMPTY" : QString(QStringLiteral("bibTeXFile contains %1 elements")).arg(bibTeXFile->count()).toLatin1()));
0268     QVERIFY(bibTeXFile);
0269     QVERIFY(!bibTeXFile->isEmpty());
0270 
0271     QStringList lastAuthorsList, filesUrlsDoiList;
0272     lastAuthorsList.reserve(bibTeXFile->size());
0273     const int numElements = bibTeXFile->count();
0274     int numEntries = 0;
0275     QString lastEntryId, lastEntryLastAuthorLastName;
0276     for (const auto &element : const_cast<const File &>(*bibTeXFile)) {
0277         QSharedPointer<Entry> entry = element.dynamicCast<Entry>();
0278         if (!entry.isNull()) {
0279             ++numEntries;
0280             lastEntryId = entry->id();
0281 
0282             Value authors = entry->value(Entry::ftAuthor);
0283             if (!authors.isEmpty()) {
0284                 ValueItem *vi = authors.last().data();
0285                 Person *p = dynamic_cast<Person *>(vi);
0286                 if (p != nullptr) {
0287                     lastEntryLastAuthorLastName = p->lastName();
0288                 } else
0289                     lastEntryLastAuthorLastName.clear();
0290             } else {
0291                 Value editors = entry->value(Entry::ftEditor);
0292                 if (!editors.isEmpty()) {
0293                     ValueItem *vi = editors.last().data();
0294                     Person *p = dynamic_cast<Person *>(vi);
0295                     if (p != nullptr) {
0296                         lastEntryLastAuthorLastName = p->lastName();
0297                     } else
0298                         lastEntryLastAuthorLastName.clear();
0299                 } else
0300                     lastEntryLastAuthorLastName.clear();
0301             }
0302 
0303             if (!lastEntryLastAuthorLastName.isEmpty()) {
0304                 if (lastEntryLastAuthorLastName[0] == QLatin1Char('{') && lastEntryLastAuthorLastName[lastEntryLastAuthorLastName.length() - 1] == QLatin1Char('}'))
0305                     lastEntryLastAuthorLastName = lastEntryLastAuthorLastName.mid(1, lastEntryLastAuthorLastName.length() - 2);
0306                 lastAuthorsList << lastEntryLastAuthorLastName;
0307             }
0308 
0309             static const QStringList stems {Entry::ftUrl, Entry::ftDOI, Entry::ftLocalFile, Entry::ftFile};
0310             for (const QString &stem : stems) {
0311                 for (int index = 1; index < 100; ++index) {
0312                     const QString field = index == 1 ? stem : QString(QStringLiteral("%1%2")).arg(stem).arg(index);
0313                     const Value v = entry->value(field);
0314                     for (const QSharedPointer<ValueItem> &vi : v) {
0315                         filesUrlsDoiList << PlainTextValue::text(vi);
0316                     }
0317                     if (v.isEmpty() && index > 10) break;
0318                 }
0319             }
0320         }
0321     }
0322 
0323 #ifdef WRITE_RAWDATAFILE
0324     static const QRegularExpression filenameStemRegExp(QStringLiteral("/?([^/]+)[.]bib$"));
0325     const QString filenameStem = filenameStemRegExp.match(currentTestFile.filename).captured(1).remove(QLatin1Char('-')).remove(QLatin1Char('_'));
0326     QFile rawDataFile("kbibtexfilestest-rawdata.h");
0327     if (rawDataFile.open(QFile::Append)) {
0328         QTextStream ts(&rawDataFile);
0329         ts << QStringLiteral("static const int ") << filenameStem << QStringLiteral("NumElements = ") << QString::number(numElements) << QStringLiteral(";\n");
0330         ts << QStringLiteral("static const int ") << filenameStem << QStringLiteral("NumEntries = ") << QString::number(numEntries) << QStringLiteral(";\n");
0331         ts << QStringLiteral("static const QString ") << filenameStem << QStringLiteral("LastEntryId = QStringLiteral(\"") << lastEntryId << QStringLiteral("\");\n");
0332         ts << QStringLiteral("static const QString ") << filenameStem << QStringLiteral("LastAuthor = QStringLiteral(\"") << lastEntryLastAuthorLastName << QStringLiteral("\");\n");
0333         rawDataFile.close();
0334     }
0335 #else // WRITE_RAWDATAFILE
0336     QCOMPARE(currentTestFile.numElements, numElements);
0337     QCOMPARE(currentTestFile.numEntries, numEntries);
0338     QCOMPARE(currentTestFile.lastEntryId, lastEntryId);
0339     QCOMPARE(currentTestFile.lastEntryLastAuthorLastName, lastEntryLastAuthorLastName);
0340 #endif // WRITE_RAWDATAFILE
0341 
0342     QCryptographicHash hashLastAuthors(QCryptographicHash::Md5);
0343     for (const QString &lastAuthor : const_cast<const QStringList &>(lastAuthorsList)) {
0344         const QByteArray lastAuthorUtf8 = lastAuthor.toUtf8();
0345         hashLastAuthors.addData(lastAuthorUtf8);
0346     }
0347 #ifdef WRITE_RAWDATAFILE
0348     if (rawDataFile.open(QFile::Append)) {
0349         QTextStream ts(&rawDataFile);
0350         ts << QStringLiteral("static const QByteArray ") << filenameStem << QStringLiteral("LastAuthors = QByteArray::fromHex(\"") << hashLastAuthors.result().toHex() << QStringLiteral("\");\n");
0351         rawDataFile.close();
0352     }
0353 #else // WRITE_RAWDATAFILE
0354     QCOMPARE(currentTestFile.hashLastAuthors, hashLastAuthors.result());
0355 #endif // WRITE_RAWDATAFILE
0356 
0357     QCryptographicHash hashFilesUrlsDoi(QCryptographicHash::Md5);
0358     for (const QString &filesUrlsDoi : const_cast<const QStringList &>(filesUrlsDoiList)) {
0359         const QByteArray filesUrlsDoiUtf8 = filesUrlsDoi.toUtf8();
0360         hashFilesUrlsDoi.addData(filesUrlsDoiUtf8);
0361     }
0362 #ifdef WRITE_RAWDATAFILE
0363     if (rawDataFile.open(QFile::Append)) {
0364         QTextStream ts(&rawDataFile);
0365         ts << QStringLiteral("static const QByteArray ") << filenameStem << QStringLiteral("FilesUrlsDoi = QByteArray::fromHex(\"") << hashFilesUrlsDoi.result().toHex() << QStringLiteral("\");\n");
0366         rawDataFile.close();
0367     }
0368 #else // WRITE_RAWDATAFILE
0369     QCOMPARE(hashFilesUrlsDoi.result(), currentTestFile.hashFilesUrlsDoi);
0370 #endif // WRITE_RAWDATAFILE
0371 
0372     delete importer;
0373 
0374     *outFile = bibTeXFile;
0375 }
0376 
0377 void KBibTeXFilesTest::saveFile(File *file, const TestFile &currentTestFile, QString *outFile)
0378 {
0379     *outFile = QString();
0380 
0381     FileExporter *exporter = nullptr;
0382     if (currentTestFile.filename.endsWith(QStringLiteral(".bib"))) {
0383         FileExporterBibTeX *bibTeXExporter = new FileExporterBibTeX(this);
0384         bibTeXExporter->setEncoding(QStringLiteral("utf-8"));
0385         exporter = bibTeXExporter;
0386     } else {
0387         QFAIL(qPrintable(QString::fromLatin1("Don't know format of '%1'").arg(currentTestFile.filename)));
0388     }
0389 
0390     QTemporaryFile tempFile(QDir::tempPath() + QStringLiteral("/XXXXXX.") + QFileInfo(currentTestFile.filename).fileName());
0391     /// It is the function caller's responsibility to remove the temporary file later
0392     tempFile.setAutoRemove(false);
0393     QVERIFY(tempFile.open());
0394     QVERIFY(exporter->save(&tempFile, file));
0395 
0396     *outFile = tempFile.fileName();
0397 }
0398 
0399 TestFile KBibTeXFilesTest::createTestFile(const QString &filename
0400 #ifndef WRITE_RAWDATAFILE
0401         , int numElements, int numEntries, const QString &lastEntryId, const QString &lastEntryLastAuthorLastName, const QByteArray &hashLastAuthors, const QByteArray &hashFilesUrlsDoi
0402 #endif // WRITE_RAWDATAFILE
0403                                          )
0404 {
0405     TestFile r;
0406     r.filename = filename;
0407 #ifndef WRITE_RAWDATAFILE
0408     r.numElements = numElements;
0409     r.numEntries = numEntries;
0410     r.lastEntryId = lastEntryId;
0411     r.lastEntryLastAuthorLastName = lastEntryLastAuthorLastName;
0412     r.hashLastAuthors = hashLastAuthors;
0413     r.hashFilesUrlsDoi = hashFilesUrlsDoi;
0414 #endif // WRITE_RAWDATAFILE
0415     return r;
0416 }
0417 
0418 QTEST_MAIN(KBibTeXFilesTest)
0419 
0420 #include "kbibtexfilestest.moc"