File indexing completed on 2024-05-12 05:17:30
0001 /* 0002 SPDX-FileCopyrightText: 2018 Volker Krause <vkrause@kde.org> 0003 0004 SPDX-License-Identifier: LGPL-2.0-or-later 0005 */ 0006 0007 #include <KItinerary/HtmlDocument> 0008 #include <config-kitinerary.h> 0009 0010 #include <QFile> 0011 #include <QTest> 0012 0013 using namespace KItinerary; 0014 0015 class HtmlDocumentTest : public QObject 0016 { 0017 Q_OBJECT 0018 private Q_SLOTS: 0019 void testElementWalking() 0020 { 0021 QFile f(QStringLiteral(SOURCE_DIR "/structureddata/os-two-leg-invalid-microdata.html")); 0022 QVERIFY(f.open(QFile::ReadOnly)); 0023 #if HAVE_LIBXML2 0024 std::unique_ptr<HtmlDocument> doc(HtmlDocument::fromData(f.readAll())); 0025 QVERIFY(doc); 0026 auto elem = doc->root(); 0027 QVERIFY(!elem.isNull()); 0028 QCOMPARE(elem.name(), QLatin1StringView("html")); 0029 QCOMPARE(elem.attribute(QLatin1StringView("lang")), 0030 QLatin1String("de")); 0031 QVERIFY(elem.nextSibling().isNull()); 0032 QVERIFY(elem.parent().isNull()); 0033 0034 elem = elem.firstChild(); 0035 QVERIFY(!elem.isNull()); 0036 QCOMPARE(elem.name(), QLatin1StringView("head")); 0037 elem = elem.nextSibling(); 0038 QVERIFY(!elem.isNull()); 0039 QCOMPARE(elem.name(), QLatin1StringView("body")); 0040 QCOMPARE(elem.parent().name(), QLatin1StringView("html")); 0041 0042 auto res = doc->eval(QStringLiteral("/html")); 0043 auto nodes = res.toList(); 0044 QCOMPARE(nodes.size(), 1); 0045 QCOMPARE(nodes.at(0).value<HtmlElement>().name(), 0046 QLatin1StringView("html")); 0047 nodes = doc->eval(QStringLiteral("//body")).toList(); 0048 QCOMPARE(nodes.size(), 1); 0049 nodes = doc->eval(QStringLiteral("//link")).toList(); 0050 QCOMPARE(nodes.size(), 6); 0051 0052 nodes = doc->eval(QStringLiteral("/html/@lang")).toList(); 0053 QCOMPARE(nodes.at(0).value<HtmlElement>().content(), 0054 QLatin1StringView("de")); 0055 nodes = doc->eval(QStringLiteral("//div[@itemtype=\"http://schema.org/FlightReservation\"]")).toList(); 0056 QCOMPARE(nodes.size(), 2); 0057 elem = nodes.at(0).value<HtmlElement>(); 0058 QCOMPARE(elem.attributes().size(), 2); 0059 QVERIFY(elem.attributes().contains(QLatin1StringView("itemscope"))); 0060 QVERIFY(elem.attributes().contains(QLatin1StringView("itemtype"))); 0061 nodes = elem.eval(QStringLiteral("./link")).toList(); 0062 QCOMPARE(nodes.size(), 3); 0063 #endif 0064 } 0065 0066 void testContentAccess() 0067 { 0068 QFile f(QStringLiteral(SOURCE_DIR "/structureddata/hotel-json-ld-fallback.html")); 0069 QVERIFY(f.open(QFile::ReadOnly)); 0070 #if HAVE_LIBXML2 0071 std::unique_ptr<HtmlDocument> doc(HtmlDocument::fromData(f.readAll())); 0072 QVERIFY(doc); 0073 auto elem = doc->root(); 0074 QVERIFY(!elem.isNull()); 0075 QVERIFY(elem.content().isEmpty()); 0076 0077 elem = elem.firstChild().firstChild().nextSibling(); 0078 QCOMPARE(elem.name(), QLatin1StringView("script")); 0079 QCOMPARE(elem.attribute(QLatin1StringView("type")), 0080 QLatin1String("application/ld+json")); 0081 QCOMPARE(elem.attributes().size(), 1); 0082 QCOMPARE(elem.attributes().at(0), QLatin1StringView("type")); 0083 const auto s = elem.content(); 0084 QVERIFY(s.contains(QLatin1StringView("checkoutDate"))); 0085 0086 elem = doc->root().firstChild().nextSibling().firstChild(); 0087 QCOMPARE(elem.name(), QLatin1StringView("p")); 0088 QCOMPARE(elem.content(), 0089 QLatin1StringView("random content\ncan be invalid")); 0090 #endif 0091 } 0092 0093 void testContentProcessing() 0094 { 0095 QFile f(QStringLiteral(SOURCE_DIR "/misc/test.html")); 0096 QVERIFY(f.open(QFile::ReadOnly)); 0097 #if HAVE_LIBXML2 0098 std::unique_ptr<HtmlDocument> doc(HtmlDocument::fromData(f.readAll().replace("<CR>", "\r"))); 0099 QVERIFY(doc); 0100 auto elem = doc->root(); 0101 QVERIFY(!elem.isNull()); 0102 QVERIFY(elem.content().isEmpty()); 0103 QVERIFY(elem.recursiveContent().contains(QLatin1StringView("spaces"))); 0104 0105 elem = elem.firstChild().firstChild(); 0106 QCOMPARE(elem.name(), QLatin1StringView("p")); 0107 QCOMPARE(elem.content(), QLatin1StringView("word1\nword2")); 0108 QCOMPARE(elem.recursiveContent(), QLatin1StringView("word1\nword2")); 0109 0110 elem = elem.nextSibling(); 0111 QCOMPARE(elem.name(), QLatin1StringView("p")); 0112 QCOMPARE(elem.content(), QLatin1StringView("lots of spaces")); 0113 QCOMPARE(elem.recursiveContent(), QLatin1StringView("lots of spaces")); 0114 0115 auto elems = doc->eval(QStringLiteral("//*[text()[normalize-space(.)='lots of spaces']]")).toList(); 0116 QCOMPARE(elems.size(), 1); 0117 QCOMPARE(elems.at(0).value<HtmlElement>().name(), 0118 QLatin1StringView("p")); 0119 elems = doc->eval(QStringLiteral("//*[text()='lots of spaces']")).toList(); 0120 QCOMPARE(elems.size(), 0); 0121 0122 elem = elem.nextSibling(); 0123 QCOMPARE(elem.content(), QString::fromUtf8("인천공항")); 0124 QCOMPARE(elem.recursiveContent(), QString::fromUtf8("인천공항")); 0125 0126 elem = elem.nextSibling(); 0127 QCOMPARE(elem.content(), QLatin1StringView("a b")); 0128 QCOMPARE(elem.recursiveContent(), QLatin1StringView("a b")); 0129 0130 elem = elem.nextSibling(); 0131 QCOMPARE(elem.content(), QLatin1StringView("a&b")); 0132 QCOMPARE(elem.recursiveContent(), QLatin1StringView("a&b")); 0133 elem = elem.nextSibling(); 0134 QCOMPARE(elem.content(), QLatin1StringView("a&b")); 0135 QCOMPARE(elem.recursiveContent(), QLatin1StringView("a&b")); 0136 0137 elem = elem.nextSibling(); 0138 QCOMPARE(elem.content(), QLatin1StringView("non breaking")); 0139 QCOMPARE(elem.recursiveContent(), QLatin1StringView("non breaking")); 0140 elem = elem.nextSibling(); 0141 QCOMPARE(elem.content(), QLatin1StringView("windows\nline\nbreaks")); 0142 QCOMPARE(elem.recursiveContent(), 0143 QLatin1StringView("windows\nline\nbreaks")); 0144 #endif 0145 } 0146 }; 0147 0148 QTEST_GUILESS_MAIN(HtmlDocumentTest) 0149 0150 #include "htmldocumenttest.moc"