File indexing completed on 2024-05-12 05:17:30

0001 /*
0002     SPDX-FileCopyrightText: 2018 Volker Krause <vkrause@kde.org>
0003 
0004     SPDX-License-Identifier: LGPL-2.0-or-later
0005 */
0006 
0007 #include <KItinerary/HtmlDocument>
0008 #include <config-kitinerary.h>
0009 
0010 #include <QFile>
0011 #include <QTest>
0012 
0013 using namespace KItinerary;
0014 
0015 class HtmlDocumentTest : public QObject
0016 {
0017     Q_OBJECT
0018 private Q_SLOTS:
0019     void testElementWalking()
0020     {
0021         QFile f(QStringLiteral(SOURCE_DIR "/structureddata/os-two-leg-invalid-microdata.html"));
0022         QVERIFY(f.open(QFile::ReadOnly));
0023 #if HAVE_LIBXML2
0024         std::unique_ptr<HtmlDocument> doc(HtmlDocument::fromData(f.readAll()));
0025         QVERIFY(doc);
0026         auto elem = doc->root();
0027         QVERIFY(!elem.isNull());
0028         QCOMPARE(elem.name(), QLatin1StringView("html"));
0029         QCOMPARE(elem.attribute(QLatin1StringView("lang")),
0030                  QLatin1String("de"));
0031         QVERIFY(elem.nextSibling().isNull());
0032         QVERIFY(elem.parent().isNull());
0033 
0034         elem = elem.firstChild();
0035         QVERIFY(!elem.isNull());
0036         QCOMPARE(elem.name(), QLatin1StringView("head"));
0037         elem = elem.nextSibling();
0038         QVERIFY(!elem.isNull());
0039         QCOMPARE(elem.name(), QLatin1StringView("body"));
0040         QCOMPARE(elem.parent().name(), QLatin1StringView("html"));
0041 
0042         auto res = doc->eval(QStringLiteral("/html"));
0043         auto nodes = res.toList();
0044         QCOMPARE(nodes.size(), 1);
0045         QCOMPARE(nodes.at(0).value<HtmlElement>().name(),
0046                  QLatin1StringView("html"));
0047         nodes = doc->eval(QStringLiteral("//body")).toList();
0048         QCOMPARE(nodes.size(), 1);
0049         nodes = doc->eval(QStringLiteral("//link")).toList();
0050         QCOMPARE(nodes.size(), 6);
0051 
0052         nodes = doc->eval(QStringLiteral("/html/@lang")).toList();
0053         QCOMPARE(nodes.at(0).value<HtmlElement>().content(),
0054                  QLatin1StringView("de"));
0055         nodes = doc->eval(QStringLiteral("//div[@itemtype=\"http://schema.org/FlightReservation\"]")).toList();
0056         QCOMPARE(nodes.size(), 2);
0057         elem = nodes.at(0).value<HtmlElement>();
0058         QCOMPARE(elem.attributes().size(), 2);
0059         QVERIFY(elem.attributes().contains(QLatin1StringView("itemscope")));
0060         QVERIFY(elem.attributes().contains(QLatin1StringView("itemtype")));
0061         nodes = elem.eval(QStringLiteral("./link")).toList();
0062         QCOMPARE(nodes.size(), 3);
0063 #endif
0064     }
0065 
0066     void testContentAccess()
0067     {
0068         QFile f(QStringLiteral(SOURCE_DIR "/structureddata/hotel-json-ld-fallback.html"));
0069         QVERIFY(f.open(QFile::ReadOnly));
0070 #if HAVE_LIBXML2
0071         std::unique_ptr<HtmlDocument> doc(HtmlDocument::fromData(f.readAll()));
0072         QVERIFY(doc);
0073         auto elem = doc->root();
0074         QVERIFY(!elem.isNull());
0075         QVERIFY(elem.content().isEmpty());
0076 
0077         elem = elem.firstChild().firstChild().nextSibling();
0078         QCOMPARE(elem.name(), QLatin1StringView("script"));
0079         QCOMPARE(elem.attribute(QLatin1StringView("type")),
0080                  QLatin1String("application/ld+json"));
0081         QCOMPARE(elem.attributes().size(), 1);
0082         QCOMPARE(elem.attributes().at(0), QLatin1StringView("type"));
0083         const auto s = elem.content();
0084         QVERIFY(s.contains(QLatin1StringView("checkoutDate")));
0085 
0086         elem = doc->root().firstChild().nextSibling().firstChild();
0087         QCOMPARE(elem.name(), QLatin1StringView("p"));
0088         QCOMPARE(elem.content(),
0089                  QLatin1StringView("random content\ncan be invalid"));
0090 #endif
0091     }
0092 
0093     void testContentProcessing()
0094     {
0095         QFile f(QStringLiteral(SOURCE_DIR "/misc/test.html"));
0096         QVERIFY(f.open(QFile::ReadOnly));
0097 #if HAVE_LIBXML2
0098         std::unique_ptr<HtmlDocument> doc(HtmlDocument::fromData(f.readAll().replace("<CR>", "\r")));
0099         QVERIFY(doc);
0100         auto elem = doc->root();
0101         QVERIFY(!elem.isNull());
0102         QVERIFY(elem.content().isEmpty());
0103         QVERIFY(elem.recursiveContent().contains(QLatin1StringView("spaces")));
0104 
0105         elem = elem.firstChild().firstChild();
0106         QCOMPARE(elem.name(), QLatin1StringView("p"));
0107         QCOMPARE(elem.content(), QLatin1StringView("word1\nword2"));
0108         QCOMPARE(elem.recursiveContent(), QLatin1StringView("word1\nword2"));
0109 
0110         elem = elem.nextSibling();
0111         QCOMPARE(elem.name(), QLatin1StringView("p"));
0112         QCOMPARE(elem.content(), QLatin1StringView("lots of spaces"));
0113         QCOMPARE(elem.recursiveContent(), QLatin1StringView("lots of spaces"));
0114 
0115         auto elems = doc->eval(QStringLiteral("//*[text()[normalize-space(.)='lots of spaces']]")).toList();
0116         QCOMPARE(elems.size(), 1);
0117         QCOMPARE(elems.at(0).value<HtmlElement>().name(),
0118                  QLatin1StringView("p"));
0119         elems = doc->eval(QStringLiteral("//*[text()='lots of spaces']")).toList();
0120         QCOMPARE(elems.size(), 0);
0121 
0122         elem = elem.nextSibling();
0123         QCOMPARE(elem.content(), QString::fromUtf8("인천공항"));
0124         QCOMPARE(elem.recursiveContent(), QString::fromUtf8("인천공항"));
0125 
0126         elem = elem.nextSibling();
0127         QCOMPARE(elem.content(), QLatin1StringView("a b"));
0128         QCOMPARE(elem.recursiveContent(), QLatin1StringView("a b"));
0129 
0130         elem = elem.nextSibling();
0131         QCOMPARE(elem.content(), QLatin1StringView("a&b"));
0132         QCOMPARE(elem.recursiveContent(), QLatin1StringView("a&b"));
0133         elem = elem.nextSibling();
0134         QCOMPARE(elem.content(), QLatin1StringView("a&b"));
0135         QCOMPARE(elem.recursiveContent(), QLatin1StringView("a&b"));
0136 
0137         elem = elem.nextSibling();
0138         QCOMPARE(elem.content(), QLatin1StringView("non breaking"));
0139         QCOMPARE(elem.recursiveContent(), QLatin1StringView("non breaking"));
0140         elem = elem.nextSibling();
0141         QCOMPARE(elem.content(), QLatin1StringView("windows\nline\nbreaks"));
0142         QCOMPARE(elem.recursiveContent(),
0143                  QLatin1StringView("windows\nline\nbreaks"));
0144 #endif
0145     }
0146 };
0147 
0148 QTEST_GUILESS_MAIN(HtmlDocumentTest)
0149 
0150 #include "htmldocumenttest.moc"