File indexing completed on 2024-04-28 15:22:18

0001 /*
0002     SPDX-FileCopyrightText: 2018 Stefan Brüns <stefan.bruens@rwth-aachen.de>
0003 
0004     SPDX-License-Identifier: LGPL-2.1-only OR LGPL-3.0-only OR LicenseRef-KDE-Accepted-LGPL
0005 */
0006 
0007 #include "xmlextractortest.h"
0008 
0009 #include <QTest>
0010 #include <QTemporaryFile>
0011 #include <QMimeDatabase>
0012 
0013 #include "simpleextractionresult.h"
0014 #include "indexerextractortestsconfig.h"
0015 #include "extractors/xmlextractor.h"
0016 #include "mimeutils.h"
0017 
0018 using namespace KFileMetaData;
0019 
0020 XmlExtractorTests::XmlExtractorTests(QObject* parent) :
0021     QObject(parent)
0022 {
0023 }
0024 
0025 QString XmlExtractorTests::testFilePath(const QString& fileName) const
0026 {
0027     return QLatin1String(INDEXER_TESTS_SAMPLE_FILES_PATH) + QLatin1Char('/') + fileName;
0028 }
0029 
0030 void XmlExtractorTests::testNoExtraction()
0031 {
0032     XmlExtractor plugin{this};
0033 
0034     QString fileName = testFilePath(QStringLiteral("test_with_metadata.svg"));
0035     QMimeDatabase mimeDb;
0036     QString mimeType = MimeUtils::strictMimeType(fileName, mimeDb).name();
0037     QVERIFY(plugin.mimetypes().contains(mimeType));
0038 
0039     SimpleExtractionResult result(fileName, mimeType,
0040             ExtractionResult::ExtractNothing);
0041     plugin.extract(&result);
0042 
0043     QCOMPARE(result.types().size(), 1);
0044     QCOMPARE(result.types().at(0), Type::Image);
0045     QCOMPARE(result.properties().size(), 0);
0046 }
0047 
0048 void XmlExtractorTests::benchMarkXmlExtractor()
0049 {
0050     XmlExtractor plugin(this);
0051 
0052     // generate a test file with varying number of words per line
0053     QTemporaryFile file(QStringLiteral("XXXXXX.xml"));
0054     QVERIFY(file.open());
0055 
0056     int count = 0;
0057     file.write("<xml>\n");
0058     QByteArray chunk("<text>foo bar </text>");
0059     for (int line = 0; line < 10000; ++line) {
0060         // staircase pattern, 0, 1, 2, ... 98, 0, 0, 1 ... chunks per line
0061         for (int i = 0; i < line % 100; ++i) {
0062             count++;
0063             file.write(chunk);
0064         }
0065         file.write("\n");
0066     }
0067     file.write("</xml>\n");
0068     file.close();
0069 
0070     SimpleExtractionResult result(file.fileName(), QStringLiteral("application/xml"));
0071 
0072     plugin.extract(&result);
0073 
0074     QString content = QStringLiteral("foo bar\n");
0075     content.replace(QLatin1Char('\n'), QLatin1Char(' '));
0076     QCOMPARE(result.text().left(8), content.left(8));
0077     QCOMPARE(result.text().size(), 8 * count);
0078 
0079     QBENCHMARK {
0080         plugin.extract(&result);
0081     }
0082 }
0083 
0084 void XmlExtractorTests::testXmlExtractor()
0085 {
0086     XmlExtractor plugin{this};
0087 
0088     QString fileName = testFilePath(QStringLiteral("test_with_metadata.svg"));
0089     QMimeDatabase mimeDb;
0090     QString mimeType = MimeUtils::strictMimeType(fileName, mimeDb).name();
0091     QVERIFY(plugin.mimetypes().contains(mimeType));
0092 
0093     SimpleExtractionResult result(fileName, mimeType,
0094             ExtractionResult::ExtractMetaData | ExtractionResult::ExtractPlainText);
0095     plugin.extract(&result);
0096 
0097     QString content = QStringLiteral("Some text\n");
0098 
0099     QCOMPARE(result.types().size(), 1);
0100     QCOMPARE(result.types().at(0), Type::Image);
0101 
0102     QCOMPARE(result.properties().size(), 1);
0103     QCOMPARE(result.properties().value(Property::Title).toString(), QStringLiteral("Document Title"));
0104 
0105     content.replace(QLatin1Char('\n'), QLatin1Char(' '));
0106     QCOMPARE(result.text(), content);
0107 }
0108 
0109 void XmlExtractorTests::testXmlExtractorNoContent()
0110 {
0111     XmlExtractor plugin{this};
0112 
0113     SimpleExtractionResult result(testFilePath(QStringLiteral("test_with_metadata.svg")),
0114             QStringLiteral("image/svg"),
0115             ExtractionResult::ExtractMetaData);
0116     plugin.extract(&result);
0117 
0118     QCOMPARE(result.types().size(), 1);
0119     QCOMPARE(result.types().at(0), Type::Image);
0120 
0121     QCOMPARE(result.properties().size(), 1);
0122     QCOMPARE(result.properties().value(Property::Title).toString(), QStringLiteral("Document Title"));
0123 
0124     QVERIFY(result.text().isEmpty());
0125 }
0126 
0127 void XmlExtractorTests::testXmlExtractorNoContentDcterms()
0128 {
0129     XmlExtractor plugin{this};
0130 
0131     SimpleExtractionResult result(testFilePath(QStringLiteral("test_dcterms.svg")),
0132             QStringLiteral("image/svg"),
0133             ExtractionResult::ExtractMetaData);
0134     plugin.extract(&result);
0135 
0136     QCOMPARE(result.types().size(), 1);
0137     QCOMPARE(result.types().at(0), Type::Image);
0138 
0139     const auto properties = result.properties();
0140     QCOMPARE(properties.size(), 6);
0141     QCOMPARE(properties.value(Property::Description).toString(),
0142              QStringLiteral("A test document for Dublin Core Terms namespace"));
0143     QCOMPARE(properties.value(Property::Title).toString(), QStringLiteral("Document Title"));
0144     QCOMPARE(properties.value(Property::Author).toString(), QStringLiteral("Stefan Brüns"));
0145     QCOMPARE(properties.value(Property::Language).toString(), QStringLiteral("en"));
0146 
0147     const auto subjects = QVariantList(properties.lowerBound(Property::Subject),
0148                                        properties.upperBound(Property::Subject));
0149     QCOMPARE(subjects.size(), 2);
0150     QCOMPARE(subjects, QVariantList({QStringLiteral("Testing"), QStringLiteral("Dublin Core")}));
0151 
0152     QVERIFY(result.text().isEmpty());
0153 }
0154 
0155 void XmlExtractorTests::testXmlExtractorContainer()
0156 {
0157     XmlExtractor plugin{this};
0158 
0159     SimpleExtractionResult result(testFilePath(QStringLiteral("test_with_container.svg")),
0160             QStringLiteral("image/svg"),
0161             ExtractionResult::ExtractMetaData | ExtractionResult::ExtractPlainText);
0162     plugin.extract(&result);
0163 
0164     QString content = QStringLiteral("Some text below <a>\n");
0165 
0166     QCOMPARE(result.types().size(), 1);
0167     QCOMPARE(result.types().at(0), Type::Image);
0168 
0169     QCOMPARE(result.properties().size(), 0);
0170 
0171     content.replace(QLatin1Char('\n'), QLatin1Char(' '));
0172     QCOMPARE(result.text(), content);
0173 }
0174 
0175 void XmlExtractorTests::testXmlExtractorMathML()
0176 {
0177     XmlExtractor plugin{this};
0178 
0179     SimpleExtractionResult result(testFilePath(QStringLiteral("test.mml")),
0180             QStringLiteral("application/mathml+xml"),
0181             ExtractionResult::ExtractMetaData | ExtractionResult::ExtractPlainText);
0182     plugin.extract(&result);
0183 
0184     QString content = QStringLiteral("1 + 1 = 2\n");
0185 
0186     QCOMPARE(result.types().size(), 1);
0187     QCOMPARE(result.types().at(0), Type::Text);
0188 
0189     QCOMPARE(result.properties().size(), 0);
0190 
0191     content.replace(QLatin1Char('\n'), QLatin1Char(' '));
0192     QCOMPARE(result.text(), content);
0193 }
0194 
0195 
0196 QTEST_GUILESS_MAIN(XmlExtractorTests)
0197 
0198 #include "moc_xmlextractortest.cpp"