File indexing completed on 2025-02-16 13:03:38
0001 /* 0002 SPDX-FileCopyrightText: 2018 Stefan Brüns <stefan.bruens@rwth-aachen.de> 0003 0004 SPDX-License-Identifier: LGPL-2.1-only OR LGPL-3.0-only OR LicenseRef-KDE-Accepted-LGPL 0005 */ 0006 0007 #include "xmlextractortest.h" 0008 0009 #include <QTest> 0010 #include <QTemporaryFile> 0011 #include <QMimeDatabase> 0012 0013 #include "simpleextractionresult.h" 0014 #include "indexerextractortestsconfig.h" 0015 #include "extractors/xmlextractor.h" 0016 #include "mimeutils.h" 0017 0018 using namespace KFileMetaData; 0019 0020 XmlExtractorTests::XmlExtractorTests(QObject* parent) : 0021 QObject(parent) 0022 { 0023 } 0024 0025 QString XmlExtractorTests::testFilePath(const QString& fileName) const 0026 { 0027 return QLatin1String(INDEXER_TESTS_SAMPLE_FILES_PATH) + QLatin1Char('/') + fileName; 0028 } 0029 0030 void XmlExtractorTests::testNoExtraction() 0031 { 0032 XmlExtractor plugin{this}; 0033 0034 QString fileName = testFilePath(QStringLiteral("test_with_metadata.svg")); 0035 QMimeDatabase mimeDb; 0036 QString mimeType = MimeUtils::strictMimeType(fileName, mimeDb).name(); 0037 QVERIFY(plugin.mimetypes().contains(mimeType)); 0038 0039 SimpleExtractionResult result(fileName, mimeType, 0040 ExtractionResult::ExtractNothing); 0041 plugin.extract(&result); 0042 0043 QCOMPARE(result.types().size(), 1); 0044 QCOMPARE(result.types().at(0), Type::Image); 0045 QCOMPARE(result.properties().size(), 0); 0046 } 0047 0048 void XmlExtractorTests::benchMarkXmlExtractor() 0049 { 0050 XmlExtractor plugin(this); 0051 0052 // generate a test file with varying number of words per line 0053 QTemporaryFile file(QStringLiteral("XXXXXX.xml")); 0054 QVERIFY(file.open()); 0055 0056 int count = 0; 0057 file.write("<xml>\n"); 0058 QByteArray chunk("<text>foo bar </text>"); 0059 for (int line = 0; line < 10000; ++line) { 0060 // staircase pattern, 0, 1, 2, ... 98, 0, 0, 1 ... chunks per line 0061 for (int i = 0; i < line % 100; ++i) { 0062 count++; 0063 file.write(chunk); 0064 } 0065 file.write("\n"); 0066 } 0067 file.write("</xml>\n"); 0068 file.close(); 0069 0070 SimpleExtractionResult result(file.fileName(), QStringLiteral("application/xml")); 0071 0072 plugin.extract(&result); 0073 0074 QString content = QStringLiteral("foo bar\n"); 0075 content.replace(QLatin1Char('\n'), QLatin1Char(' ')); 0076 QCOMPARE(result.text().left(8), content.left(8)); 0077 QCOMPARE(result.text().size(), 8 * count); 0078 0079 QBENCHMARK { 0080 plugin.extract(&result); 0081 } 0082 } 0083 0084 void XmlExtractorTests::testXmlExtractor() 0085 { 0086 XmlExtractor plugin{this}; 0087 0088 QString fileName = testFilePath(QStringLiteral("test_with_metadata.svg")); 0089 QMimeDatabase mimeDb; 0090 QString mimeType = MimeUtils::strictMimeType(fileName, mimeDb).name(); 0091 QVERIFY(plugin.mimetypes().contains(mimeType)); 0092 0093 SimpleExtractionResult result(fileName, mimeType, 0094 ExtractionResult::ExtractMetaData | ExtractionResult::ExtractPlainText); 0095 plugin.extract(&result); 0096 0097 QString content = QStringLiteral("Some text\n"); 0098 0099 QCOMPARE(result.types().size(), 1); 0100 QCOMPARE(result.types().at(0), Type::Image); 0101 0102 QCOMPARE(result.properties().size(), 1); 0103 QCOMPARE(result.properties().value(Property::Title).toString(), QStringLiteral("Document Title")); 0104 0105 content.replace(QLatin1Char('\n'), QLatin1Char(' ')); 0106 QCOMPARE(result.text(), content); 0107 } 0108 0109 void XmlExtractorTests::testXmlExtractorNoContent() 0110 { 0111 XmlExtractor plugin{this}; 0112 0113 SimpleExtractionResult result(testFilePath(QStringLiteral("test_with_metadata.svg")), 0114 QStringLiteral("image/svg"), 0115 ExtractionResult::ExtractMetaData); 0116 plugin.extract(&result); 0117 0118 QCOMPARE(result.types().size(), 1); 0119 QCOMPARE(result.types().at(0), Type::Image); 0120 0121 QCOMPARE(result.properties().size(), 1); 0122 QCOMPARE(result.properties().value(Property::Title).toString(), QStringLiteral("Document Title")); 0123 0124 QVERIFY(result.text().isEmpty()); 0125 } 0126 0127 void XmlExtractorTests::testXmlExtractorNoContentDcterms() 0128 { 0129 XmlExtractor plugin{this}; 0130 0131 SimpleExtractionResult result(testFilePath(QStringLiteral("test_dcterms.svg")), 0132 QStringLiteral("image/svg"), 0133 ExtractionResult::ExtractMetaData); 0134 plugin.extract(&result); 0135 0136 QCOMPARE(result.types().size(), 1); 0137 QCOMPARE(result.types().at(0), Type::Image); 0138 0139 const auto properties = result.properties(); 0140 QCOMPARE(properties.size(), 6); 0141 QCOMPARE(properties.value(Property::Description).toString(), 0142 QStringLiteral("A test document for Dublin Core Terms namespace")); 0143 QCOMPARE(properties.value(Property::Title).toString(), QStringLiteral("Document Title")); 0144 QCOMPARE(properties.value(Property::Author).toString(), QStringLiteral("Stefan Brüns")); 0145 QCOMPARE(properties.value(Property::Language).toString(), QStringLiteral("en")); 0146 0147 const auto subjects = QVariantList(properties.lowerBound(Property::Subject), 0148 properties.upperBound(Property::Subject)); 0149 QCOMPARE(subjects.size(), 2); 0150 QCOMPARE(subjects, QVariantList({QStringLiteral("Testing"), QStringLiteral("Dublin Core")})); 0151 0152 QVERIFY(result.text().isEmpty()); 0153 } 0154 0155 void XmlExtractorTests::testXmlExtractorContainer() 0156 { 0157 XmlExtractor plugin{this}; 0158 0159 SimpleExtractionResult result(testFilePath(QStringLiteral("test_with_container.svg")), 0160 QStringLiteral("image/svg"), 0161 ExtractionResult::ExtractMetaData | ExtractionResult::ExtractPlainText); 0162 plugin.extract(&result); 0163 0164 QString content = QStringLiteral("Some text below <a>\n"); 0165 0166 QCOMPARE(result.types().size(), 1); 0167 QCOMPARE(result.types().at(0), Type::Image); 0168 0169 QCOMPARE(result.properties().size(), 0); 0170 0171 content.replace(QLatin1Char('\n'), QLatin1Char(' ')); 0172 QCOMPARE(result.text(), content); 0173 } 0174 0175 void XmlExtractorTests::testXmlExtractorMathML() 0176 { 0177 XmlExtractor plugin{this}; 0178 0179 SimpleExtractionResult result(testFilePath(QStringLiteral("test.mml")), 0180 QStringLiteral("application/mathml+xml"), 0181 ExtractionResult::ExtractMetaData | ExtractionResult::ExtractPlainText); 0182 plugin.extract(&result); 0183 0184 QString content = QStringLiteral("1 + 1 = 2\n"); 0185 0186 QCOMPARE(result.types().size(), 1); 0187 QCOMPARE(result.types().at(0), Type::Text); 0188 0189 QCOMPARE(result.properties().size(), 0); 0190 0191 content.replace(QLatin1Char('\n'), QLatin1Char(' ')); 0192 QCOMPARE(result.text(), content); 0193 } 0194 0195 0196 QTEST_GUILESS_MAIN(XmlExtractorTests) 0197 0198 #include "moc_xmlextractortest.cpp"