File indexing completed on 2024-05-12 05:17:30
0001 /* 0002 SPDX-FileCopyrightText: 2018 Volker Krause <vkrause@kde.org> 0003 0004 SPDX-License-Identifier: LGPL-2.0-or-later 0005 */ 0006 0007 #include "testhelpers.h" 0008 0009 #include <KItinerary/ExtractorEngine> 0010 #include <KItinerary/ExtractorPostprocessor> 0011 #include <KItinerary/ExtractorValidator> 0012 #include <KItinerary/JsonLdDocument> 0013 #include <KItinerary/Reservation> 0014 #include <KItinerary/Ticket> 0015 0016 #include <KMime/Message> 0017 0018 #include <QDebug> 0019 #include <QDir> 0020 #include <QDirIterator> 0021 #include <QFile> 0022 #include <QJsonArray> 0023 #include <QJsonDocument> 0024 #include <QObject> 0025 #include <QTest> 0026 0027 void initLocale() 0028 { 0029 // use some exotic locale to ensure the date/time parsing doesn't just work by luck 0030 qputenv("LANG", "fr_FR"); 0031 } 0032 0033 Q_CONSTRUCTOR_FUNCTION(initLocale) 0034 0035 using namespace KItinerary; 0036 0037 /** Note: this test requires external test data that is not publicly available, 0038 * ie. real-world unmodified booking documents. 0039 * This data cannot be shared for containing privacy-sensitive data and copyrighted 0040 * material (e.g. airline logos). 0041 */ 0042 class ExtractorTest : public QObject 0043 { 0044 Q_OBJECT 0045 private: 0046 ExtractorEngine m_engine; 0047 0048 private Q_SLOTS: 0049 void initTestCase() 0050 { 0051 //m_engine.setUseSeparateProcess(true); 0052 } 0053 0054 void testExtract_data() 0055 { 0056 QTest::addColumn<QString>("contextFile"); 0057 QTest::addColumn<QString>("inputFile"); 0058 0059 for (const QDir baseDir : {QStringLiteral(SOURCE_DIR "/extractordata"), QStringLiteral(SOURCE_DIR "/../../kitinerary-tests")}) { 0060 if (!baseDir.exists()) { 0061 continue; 0062 } 0063 0064 QDirIterator it(baseDir.path(), {QStringLiteral("*.txt"), QStringLiteral("*.html"), QStringLiteral("*.pdf"), QStringLiteral("*.pkpass"), QStringLiteral("*.ics"), QStringLiteral("*.eml"), QStringLiteral("*.mbox"), QStringLiteral("*.bin"), QStringLiteral("*.png"), QStringLiteral("*.jpg"), QStringLiteral("*.har"), QStringLiteral("*.in.json"), QStringLiteral("*.gif")}, QDir::Files | QDir::Readable | QDir::NoSymLinks, QDirIterator::Subdirectories); 0065 while (it.hasNext()) { 0066 it.next(); 0067 // ignore context files 0068 if (it.fileName() == QLatin1StringView("context.eml")) { 0069 continue; 0070 } 0071 0072 QFileInfo contextFi(it.fileInfo().absolutePath() + 0073 QLatin1StringView("/context.eml")); 0074 QTest::newRow((contextFi.dir().dirName() + QLatin1Char('-') + it.fileName()).toLatin1().constData()) 0075 << contextFi.absoluteFilePath() 0076 << it.fileInfo().absoluteFilePath(); 0077 } 0078 } 0079 } 0080 0081 void testExtract() 0082 { 0083 QFETCH(QString, contextFile); 0084 QFETCH(QString, inputFile); 0085 0086 m_engine.clear(); 0087 if (inputFile.endsWith(QLatin1StringView(".png")) || 0088 inputFile.endsWith(QLatin1String(".pdf")) || 0089 inputFile.endsWith(QLatin1String(".jpg")) || 0090 inputFile.endsWith(QLatin1String(".gif"))) { 0091 m_engine.setHints(ExtractorEngine::ExtractFullPageRasterImages); 0092 } else if (inputFile.endsWith(QLatin1StringView(".ics"))) { 0093 m_engine.setHints(ExtractorEngine::ExtractGenericIcalEvents); 0094 } else { 0095 m_engine.setHints(ExtractorEngine::NoHint); 0096 } 0097 0098 QFile inFile(inputFile); 0099 const auto openFlags = inputFile.endsWith(QLatin1StringView(".txt")) 0100 ? QFile::Text 0101 : QFile::ReadOnly; 0102 QVERIFY(inFile.open(QFile::ReadOnly | openFlags)); 0103 0104 QFile cf(contextFile); 0105 KMime::Message contextMsg; 0106 if (cf.open(QFile::ReadOnly)) { 0107 contextMsg.setContent(cf.readAll()); 0108 contextMsg.parse(); 0109 m_engine.setContext(QVariant::fromValue(&contextMsg), u"message/rfc822"); 0110 } else if (inputFile.endsWith(QLatin1StringView(".eml"))) { 0111 contextMsg.setContent(inFile.readAll()); 0112 inFile.seek(0); 0113 contextMsg.parse(); 0114 m_engine.setContext(QVariant::fromValue(&contextMsg), 0115 u"message/rfc822"); 0116 } else { 0117 m_engine.setContextDate(QDateTime({2018, 1, 1}, {0, 0})); 0118 } 0119 0120 m_engine.setData(inFile.readAll(), inputFile); 0121 auto jsonResult = m_engine.extract(); 0122 0123 const auto expectedSkip = 0124 QFile::exists(inputFile + QLatin1StringView(".skip")); 0125 if (jsonResult.isEmpty() && expectedSkip) { 0126 QSKIP("nothing extracted"); 0127 return; 0128 } 0129 QVERIFY(!jsonResult.isEmpty()); 0130 const auto result = JsonLdDocument::fromJson(jsonResult); 0131 ExtractorPostprocessor postproc; 0132 postproc.setContextDate(contextMsg.date()->dateTime()); 0133 postproc.process(result); 0134 auto postProcResult = postproc.result(); 0135 0136 ExtractorValidator validator; 0137 validator.setAcceptOnlyCompleteElements(false); 0138 postProcResult.erase(std::remove_if(postProcResult.begin(), postProcResult.end(), [&validator](const auto &elem) { 0139 return !validator.isValidElement(elem); 0140 }), postProcResult.end()); 0141 0142 if (postProcResult.isEmpty() && expectedSkip) { 0143 QSKIP("result filtered"); 0144 return; 0145 } 0146 if (postProcResult.isEmpty()) { 0147 qDebug() << "Result discarded in post processing:"; 0148 qDebug().noquote() << QJsonDocument(jsonResult).toJson(); 0149 } 0150 QVERIFY(!postProcResult.isEmpty()); 0151 0152 const auto encodedResult = JsonLdDocument::toJson(postProcResult); 0153 QCOMPARE(encodedResult.size(), postProcResult.size()); 0154 0155 const QString refFile = inputFile + QLatin1StringView(".json"); 0156 if (!QFile::exists(refFile) && !expectedSkip) { 0157 QFile f(refFile); 0158 QVERIFY(f.open(QFile::WriteOnly)); 0159 f.write(QJsonDocument(encodedResult).toJson()); 0160 return; 0161 } 0162 0163 QFile f(refFile); 0164 QVERIFY(f.open(QFile::ReadOnly)); 0165 const auto refDoc = QJsonDocument::fromJson(f.readAll()); 0166 QVERIFY(Test::compareJson(refFile, encodedResult, refDoc.array())); 0167 0168 // verify ticket token prefixes are valid and properly stripped 0169 for (const auto &res : postProcResult) { 0170 Ticket ticket; 0171 if (JsonLd::canConvert<Reservation>(res)) { 0172 ticket = JsonLd::convert<Reservation>(res).reservedTicket().value<Ticket>(); 0173 } else if (JsonLd::isA<Ticket>(res)) { 0174 ticket = res.value<Ticket>(); 0175 } else { 0176 continue; 0177 } 0178 0179 if (ticket.ticketTokenType() == Token::Unknown || ticket.ticketTokenType() == Token::Url || ticket.ticketToken().isEmpty()) { 0180 continue; 0181 } 0182 const auto tokenData = ticket.ticketTokenData(); 0183 if (tokenData.userType() == QMetaType::QString) { 0184 QVERIFY(tokenData.toString() != ticket.ticketToken()); 0185 } 0186 } 0187 } 0188 0189 void testNegative() 0190 { 0191 m_engine.clear(); 0192 m_engine.setData("%PDF-1.4\nINVALID!!!!"); 0193 QCOMPARE(m_engine.extract(), QJsonArray()); 0194 } 0195 }; 0196 0197 QTEST_GUILESS_MAIN(ExtractorTest) 0198 0199 #include "extractortest.moc"