File indexing completed on 2024-05-12 05:17:30

0001 /*
0002     SPDX-FileCopyrightText: 2018 Volker Krause <vkrause@kde.org>
0003 
0004     SPDX-License-Identifier: LGPL-2.0-or-later
0005 */
0006 
0007 #include "testhelpers.h"
0008 
0009 #include <KItinerary/ExtractorEngine>
0010 #include <KItinerary/ExtractorPostprocessor>
0011 #include <KItinerary/ExtractorValidator>
0012 #include <KItinerary/JsonLdDocument>
0013 #include <KItinerary/Reservation>
0014 #include <KItinerary/Ticket>
0015 
0016 #include <KMime/Message>
0017 
0018 #include <QDebug>
0019 #include <QDir>
0020 #include <QDirIterator>
0021 #include <QFile>
0022 #include <QJsonArray>
0023 #include <QJsonDocument>
0024 #include <QObject>
0025 #include <QTest>
0026 
0027 void initLocale()
0028 {
0029     // use some exotic locale to ensure the date/time parsing doesn't just work by luck
0030     qputenv("LANG", "fr_FR");
0031 }
0032 
0033 Q_CONSTRUCTOR_FUNCTION(initLocale)
0034 
0035 using namespace KItinerary;
0036 
0037 /** Note: this test requires external test data that is not publicly available,
0038  *  ie. real-world unmodified booking documents.
0039  *  This data cannot be shared for containing privacy-sensitive data and copyrighted
0040  *  material (e.g. airline logos).
0041  */
0042 class ExtractorTest : public QObject
0043 {
0044     Q_OBJECT
0045 private:
0046     ExtractorEngine m_engine;
0047 
0048 private Q_SLOTS:
0049     void initTestCase()
0050     {
0051         //m_engine.setUseSeparateProcess(true);
0052     }
0053 
0054     void testExtract_data()
0055     {
0056         QTest::addColumn<QString>("contextFile");
0057         QTest::addColumn<QString>("inputFile");
0058 
0059         for (const QDir baseDir :  {QStringLiteral(SOURCE_DIR "/extractordata"), QStringLiteral(SOURCE_DIR "/../../kitinerary-tests")}) {
0060             if (!baseDir.exists()) {
0061                 continue;
0062             }
0063 
0064             QDirIterator it(baseDir.path(), {QStringLiteral("*.txt"), QStringLiteral("*.html"), QStringLiteral("*.pdf"), QStringLiteral("*.pkpass"), QStringLiteral("*.ics"), QStringLiteral("*.eml"), QStringLiteral("*.mbox"), QStringLiteral("*.bin"), QStringLiteral("*.png"), QStringLiteral("*.jpg"), QStringLiteral("*.har"), QStringLiteral("*.in.json"), QStringLiteral("*.gif")}, QDir::Files | QDir::Readable | QDir::NoSymLinks, QDirIterator::Subdirectories);
0065             while (it.hasNext()) {
0066                 it.next();
0067                 // ignore context files
0068                 if (it.fileName() == QLatin1StringView("context.eml")) {
0069                   continue;
0070                 }
0071 
0072                 QFileInfo contextFi(it.fileInfo().absolutePath() +
0073                                     QLatin1StringView("/context.eml"));
0074                 QTest::newRow((contextFi.dir().dirName() + QLatin1Char('-') + it.fileName()).toLatin1().constData())
0075                     << contextFi.absoluteFilePath()
0076                     << it.fileInfo().absoluteFilePath();
0077             }
0078         }
0079     }
0080 
0081     void testExtract()
0082     {
0083         QFETCH(QString, contextFile);
0084         QFETCH(QString, inputFile);
0085 
0086         m_engine.clear();
0087         if (inputFile.endsWith(QLatin1StringView(".png")) ||
0088             inputFile.endsWith(QLatin1String(".pdf")) ||
0089             inputFile.endsWith(QLatin1String(".jpg")) ||
0090             inputFile.endsWith(QLatin1String(".gif"))) {
0091           m_engine.setHints(ExtractorEngine::ExtractFullPageRasterImages);
0092         } else if (inputFile.endsWith(QLatin1StringView(".ics"))) {
0093           m_engine.setHints(ExtractorEngine::ExtractGenericIcalEvents);
0094         } else {
0095           m_engine.setHints(ExtractorEngine::NoHint);
0096         }
0097 
0098         QFile inFile(inputFile);
0099         const auto openFlags = inputFile.endsWith(QLatin1StringView(".txt"))
0100                                    ? QFile::Text
0101                                    : QFile::ReadOnly;
0102         QVERIFY(inFile.open(QFile::ReadOnly | openFlags));
0103 
0104         QFile cf(contextFile);
0105         KMime::Message contextMsg;
0106         if (cf.open(QFile::ReadOnly)) {
0107             contextMsg.setContent(cf.readAll());
0108             contextMsg.parse();
0109             m_engine.setContext(QVariant::fromValue(&contextMsg), u"message/rfc822");
0110         } else if (inputFile.endsWith(QLatin1StringView(".eml"))) {
0111           contextMsg.setContent(inFile.readAll());
0112           inFile.seek(0);
0113           contextMsg.parse();
0114           m_engine.setContext(QVariant::fromValue(&contextMsg),
0115                               u"message/rfc822");
0116         } else {
0117           m_engine.setContextDate(QDateTime({2018, 1, 1}, {0, 0}));
0118         }
0119 
0120         m_engine.setData(inFile.readAll(), inputFile);
0121         auto jsonResult = m_engine.extract();
0122 
0123         const auto expectedSkip =
0124             QFile::exists(inputFile + QLatin1StringView(".skip"));
0125         if (jsonResult.isEmpty() && expectedSkip) {
0126             QSKIP("nothing extracted");
0127             return;
0128         }
0129         QVERIFY(!jsonResult.isEmpty());
0130         const auto result = JsonLdDocument::fromJson(jsonResult);
0131         ExtractorPostprocessor postproc;
0132         postproc.setContextDate(contextMsg.date()->dateTime());
0133         postproc.process(result);
0134         auto postProcResult = postproc.result();
0135 
0136         ExtractorValidator validator;
0137         validator.setAcceptOnlyCompleteElements(false);
0138         postProcResult.erase(std::remove_if(postProcResult.begin(), postProcResult.end(), [&validator](const auto &elem) {
0139             return !validator.isValidElement(elem);
0140         }), postProcResult.end());
0141 
0142         if (postProcResult.isEmpty() && expectedSkip) {
0143             QSKIP("result filtered");
0144             return;
0145         }
0146         if (postProcResult.isEmpty()) {
0147             qDebug() << "Result discarded in post processing:";
0148             qDebug().noquote() << QJsonDocument(jsonResult).toJson();
0149         }
0150         QVERIFY(!postProcResult.isEmpty());
0151 
0152         const auto encodedResult = JsonLdDocument::toJson(postProcResult);
0153         QCOMPARE(encodedResult.size(), postProcResult.size());
0154 
0155         const QString refFile = inputFile + QLatin1StringView(".json");
0156         if (!QFile::exists(refFile) && !expectedSkip) {
0157             QFile f(refFile);
0158             QVERIFY(f.open(QFile::WriteOnly));
0159             f.write(QJsonDocument(encodedResult).toJson());
0160             return;
0161         }
0162 
0163         QFile f(refFile);
0164         QVERIFY(f.open(QFile::ReadOnly));
0165         const auto refDoc = QJsonDocument::fromJson(f.readAll());
0166         QVERIFY(Test::compareJson(refFile, encodedResult, refDoc.array()));
0167 
0168         // verify ticket token prefixes are valid and properly stripped
0169         for (const auto &res : postProcResult) {
0170             Ticket ticket;
0171             if (JsonLd::canConvert<Reservation>(res)) {
0172                 ticket = JsonLd::convert<Reservation>(res).reservedTicket().value<Ticket>();
0173             } else if (JsonLd::isA<Ticket>(res)) {
0174                 ticket = res.value<Ticket>();
0175             } else {
0176                 continue;
0177             }
0178 
0179             if (ticket.ticketTokenType() == Token::Unknown || ticket.ticketTokenType() == Token::Url || ticket.ticketToken().isEmpty()) {
0180                 continue;
0181             }
0182             const auto tokenData = ticket.ticketTokenData();
0183             if (tokenData.userType() == QMetaType::QString) {
0184                 QVERIFY(tokenData.toString() != ticket.ticketToken());
0185             }
0186         }
0187     }
0188 
0189     void testNegative()
0190     {
0191         m_engine.clear();
0192         m_engine.setData("%PDF-1.4\nINVALID!!!!");
0193         QCOMPARE(m_engine.extract(), QJsonArray());
0194     }
0195 };
0196 
0197 QTEST_GUILESS_MAIN(ExtractorTest)
0198 
0199 #include "extractortest.moc"