File indexing completed on 2025-02-23 05:00:16

0001 /*
0002    SPDX-FileCopyrightText: 2016-2024 Laurent Montel <montel@kde.org>
0003 
0004    SPDX-License-Identifier: LGPL-2.0-or-later
0005 */
0006 
0007 #include "urlhashingtest.h"
0008 #include "../urlhashing.h"
0009 #include <QTest>
0010 #include <QUrl>
0011 
0012 UrlHashingTest::UrlHashingTest(QObject *parent)
0013     : QObject(parent)
0014 {
0015 }
0016 
0017 UrlHashingTest::~UrlHashingTest() = default;
0018 
0019 void UrlHashingTest::shouldCanonicalizeUrl_data()
0020 {
0021     QTest::addColumn<QString>("input");
0022     QTest::addColumn<QString>("output");
0023 #if 0
0024     Canonicalize("http://host/%25%32%35") = "http://host/%25";
0025     Canonicalize("http://host/%25%32%35%25%32%35") = "http://host/%25%25";
0026     Canonicalize("http://host/%2525252525252525") = "http://host/%25";
0027     Canonicalize("http://host/asdf%25%32%35asd") = "http://host/asdf%25asd";
0028     Canonicalize("http://host/%%%25%32%35asd%%") = "http://host/%25%25%25asd%25%25";
0029     Canonicalize("http://www.google.com/") = "http://www.google.com/";
0030     Canonicalize("http://%31%36%38%2e%31%38%38%2e%39%39%2e%32%36/%2E%73%65%63%75%72%65/%77%77%77%2E%65%62%61%79%2E%63%6F%6D/") = "http://168.188.99.26/.secure/www.ebay.com/";
0031     Canonicalize("http://195.127.0.11/uploads/%20%20%20%20/.verify/.eBaysecure=updateuserdataxplimnbqmn-xplmvalidateinfoswqpcmlx=hgplmcx/")
0032         = "http://195.127.0.11/uploads/%20%20%20%20/.verify/.eBaysecure=updateuserdataxplimnbqmn-xplmvalidateinfoswqpcmlx=hgplmcx/";
0033     Canonicalize("http://host%23.com/%257Ea%2521b%2540c%2523d%2524e%25f%255E00%252611%252A22%252833%252944_55%252B") = "http://host%23.com/~a!b@c%23d$e%25f^00&11*22(33)44_55+";
0034     Canonicalize("http://3279880203/blah") = "http://195.127.0.11/blah";
0035     Canonicalize("http://www.google.com/blah/..") = "http://www.google.com/";
0036     Canonicalize("www.google.com/") = "http://www.google.com/";
0037     Canonicalize("www.google.com") = "http://www.google.com/";
0038     Canonicalize("http://www.evil.com/blah#frag") = "http://www.evil.com/blah";
0039     Canonicalize("http://www.GOOgle.com/") = "http://www.google.com/";
0040     Canonicalize("http://www.google.com.../") = "http://www.google.com/";
0041     Canonicalize("http://www.google.com/foo\tbar\rbaz\n2") = "http://www.google.com/foobarbaz2";
0042     Canonicalize("http://www.google.com/q?") = "http://www.google.com/q?";
0043     Canonicalize("http://www.google.com/q?r?") = "http://www.google.com/q?r?";
0044     Canonicalize("http://www.google.com/q?r?s") = "http://www.google.com/q?r?s";
0045     Canonicalize("http://evil.com/foo#bar#baz") = "http://evil.com/foo";
0046     Canonicalize("http://evil.com/foo;") = "http://evil.com/foo;";
0047     Canonicalize("http://evil.com/foo?bar;") = "http://evil.com/foo?bar;";
0048     Canonicalize("http://\x01\x80.com/") = "http://%01%80.com/";
0049     Canonicalize("http://notrailingslash.com") = "http://notrailingslash.com/";
0050     Canonicalize("http://www.gotaport.com:1234/") = "http://www.gotaport.com/";
0051     Canonicalize("  http://www.google.com/  ") = "http://www.google.com/";
0052     Canonicalize("http:// leadingspace.com/") = "http://%20leadingspace.com/";
0053     Canonicalize("http://%20leadingspace.com/") = "http://%20leadingspace.com/";
0054     Canonicalize("%20leadingspace.com/") = "http://%20leadingspace.com/";
0055     Canonicalize("https://www.securesite.com/") = "https://www.securesite.com/";
0056     Canonicalize("http://host.com/ab%23cd") = "http://host.com/ab%23cd";
0057     Canonicalize("http://host.com//twoslashes?more//slashes") = "http://host.com/twoslashes?more//slashes";
0058 #endif
0059 
0060     QTest::newRow("empty") << QString() << QString();
0061 
0062     QTest::newRow("http://host/%25%32%35") << QStringLiteral("http://host/%25%32%35") << QStringLiteral("http://host/%25");
0063     QTest::newRow("http://host/%25%32%35%25%32%35") << QStringLiteral("http://host/%25%32%35%25%32%35") << QStringLiteral("http://host/%25%25");
0064     QTest::newRow("http://host/%2525252525252525") << QStringLiteral("http://host/%2525252525252525") << QStringLiteral("http://host/%25");
0065     QTest::newRow("http://host/asdf%25%32%35asd") << QStringLiteral("http://host/asdf%25%32%35asd") << QStringLiteral("http://host/asdf%25asd");
0066     QTest::newRow("http://host/%%%25%32%35asd%%") << QStringLiteral("http://host/%%%25%32%35asd%%") << QStringLiteral("http://host/%25%25%25asd%25%25");
0067     QTest::newRow("http://www.google.com/") << QStringLiteral("http://www.google.com/") << QStringLiteral("http://www.google.com/");
0068     QTest::newRow("http://%31%36%38%2e%31%38%38%2e%39%39%2e%32%36/%2E%73%65%63%75%72%65/%77%77%77%2E%65%62%61%79%2E%63%6F%6D/")
0069         << QStringLiteral("http://%31%36%38%2e%31%38%38%2e%39%39%2e%32%36/%2E%73%65%63%75%72%65/%77%77%77%2E%65%62%61%79%2E%63%6F%6D/")
0070         << QStringLiteral("http://168.188.99.26/.secure/www.ebay.com/");
0071     QTest::newRow("test8")
0072         << QStringLiteral("http://195.127.0.11/uploads/%20%20%20%20/.verify/.eBaysecure=updateuserdataxplimnbqmn-xplmvalidateinfoswqpcmlx=hgplmcx/")
0073         << QStringLiteral("http://195.127.0.11/uploads/%20%20%20%20/.verify/.eBaysecure=updateuserdataxplimnbqmn-xplmvalidateinfoswqpcmlx=hgplmcx/");
0074     QTest::newRow("test9") << QStringLiteral("http://host%23.com/%257Ea%2521b%2540c%2523d%2524e%25f%255E00%252611%252A22%252833%252944_55%252B")
0075                            << QStringLiteral("http://host%23.com/~a!b@c%23d$e%25f^00&11*22(33)44_55+");
0076     QTest::newRow("http://3279880203/blah") << QStringLiteral("http://3279880203/blah") << QStringLiteral("http://195.127.0.11/blah");
0077     QTest::newRow("http://www.google.com/blah/..") << QStringLiteral("http://www.google.com/blah/..") << QStringLiteral("http://www.google.com/");
0078     QTest::newRow("www.google.com/") << QStringLiteral("www.google.com/") << QStringLiteral("http://www.google.com/");
0079     QTest::newRow("www.google.com") << QStringLiteral("www.google.com") << QStringLiteral("http://www.google.com/");
0080     QTest::newRow("http://www.evil.com/blah#frag") << QStringLiteral("http://www.evil.com/blah#frag") << QStringLiteral("http://www.evil.com/blah");
0081     QTest::newRow("http://www.GOOgle.com/") << QStringLiteral("http://www.GOOgle.com/") << QStringLiteral("http://www.google.com/");
0082     QTest::newRow("http://www.google.com.../") << QStringLiteral("http://www.google.com.../") << QStringLiteral("http://www.google.com/");
0083     QTest::newRow("http://www.google.com/foo\tbar\rbaz\n2")
0084         << QStringLiteral("http://www.google.com/foo\tbar\rbaz\n2") << QStringLiteral("http://www.google.com/foobarbaz2");
0085     QTest::newRow("http://www.google.com/q?") << QStringLiteral("http://www.google.com/q?") << QStringLiteral("http://www.google.com/q?");
0086     QTest::newRow("http://www.google.com/q?r?") << QStringLiteral("http://www.google.com/q?r?") << QStringLiteral("http://www.google.com/q?r?");
0087     QTest::newRow("http://www.google.com/q?r?s") << QStringLiteral("http://www.google.com/q?r?s") << QStringLiteral("http://www.google.com/q?r?s");
0088     QTest::newRow("http://evil.com/foo#bar#baz") << QStringLiteral("http://evil.com/foo#bar#baz") << QStringLiteral("http://evil.com/foo");
0089     QTest::newRow("http://evil.com/foo;") << QStringLiteral("http://evil.com/foo;") << QStringLiteral("http://evil.com/foo;");
0090     QTest::newRow("http://evil.com/foo?bar;") << QStringLiteral("http://evil.com/foo?bar;") << QStringLiteral("http://evil.com/foo?bar;");
0091     QTest::newRow("http://\x01\x80.com/") << QStringLiteral("http://\x01\x80.com/") << QStringLiteral("http://%01%80.com/");
0092     QTest::newRow("http://notrailingslash.com") << QStringLiteral("http://notrailingslash.com") << QStringLiteral("http://notrailingslash.com/");
0093     QTest::newRow("http://www.gotaport.com:1234/") << QStringLiteral("http://www.gotaport.com:1234/") << QStringLiteral("http://www.gotaport.com/");
0094     QTest::newRow("  http://www.google.com/  ") << QStringLiteral("  http://www.google.com/  ") << QStringLiteral("http://www.google.com/");
0095     QTest::newRow("http:// leadingspace.com/") << QStringLiteral("http:// leadingspace.com/") << QStringLiteral("http://%20leadingspace.com/");
0096     QTest::newRow("http://%20leadingspace.com/") << QStringLiteral("http://%20leadingspace.com/") << QStringLiteral("http://%20leadingspace.com/");
0097     QTest::newRow("%20leadingspace.com/") << QStringLiteral("%20leadingspace.com/") << QStringLiteral("http://%20leadingspace.com/");
0098     QTest::newRow("https://www.securesite.com/") << QStringLiteral("https://www.securesite.com/") << QStringLiteral("https://www.securesite.com/");
0099     QTest::newRow("http://host.com/ab%23cd") << QStringLiteral("http://host.com/ab%23cd") << QStringLiteral("http://host.com/ab%23cd");
0100     QTest::newRow("http://host.com//twoslashes?more//slashes")
0101         << QStringLiteral("http://host.com//twoslashes?more//slashes") << QStringLiteral("http://host.com/twoslashes?more//slashes");
0102 }
0103 
0104 void UrlHashingTest::shouldCanonicalizeUrl()
0105 {
0106     QFETCH(QString, input);
0107     QFETCH(QString, output);
0108     input = input.trimmed();
0109     QEXPECT_FAIL("http://host/%2525252525252525", "Not supported yet", Continue);
0110     QEXPECT_FAIL("http://\x01\x80.com/", "Not supported yet", Continue);
0111     QEXPECT_FAIL("%20leadingspace.com/", "Not supported yet", Continue);
0112     QEXPECT_FAIL("http://%20leadingspace.com/", "Not supported yet", Continue);
0113     QEXPECT_FAIL("http://www.google.com.../", "Not supported yet", Continue);
0114 
0115     QEXPECT_FAIL("http://http/host%23.com/%7Ea%21b%40c%23d%24e%f%5E00%2611%2A22%2833%2944_55%2B", "Not supported yet", Continue);
0116     QEXPECT_FAIL("http://host/%%%25%32%35asd%%", "Not supported yet", Continue);
0117     QEXPECT_FAIL("http://host/%25252525252525", "Not supported yet", Continue);
0118     QEXPECT_FAIL("http:// leadingspace.com/", "Not supported yet", Continue);
0119     QEXPECT_FAIL("test9", "Not supported yet", Continue);
0120     QCOMPARE(WebEngineViewer::UrlHashing::canonicalizeUrl(QUrl::fromUserInput(input)), output);
0121 }
0122 
0123 void UrlHashingTest::shouldGenerateHostPath_data()
0124 {
0125     QTest::addColumn<QString>("input");
0126     QTest::addColumn<QStringList>("hosts");
0127     QTest::addColumn<QStringList>("paths");
0128     QTest::newRow("empty") << QString() << QStringList() << QStringList();
0129     QStringList hosts;
0130     QStringList paths;
0131     hosts << QStringLiteral("b.c") << QStringLiteral("a.b.c");
0132     paths << QStringLiteral("/") << QStringLiteral("/1/") << QStringLiteral("/1/2.html") << QStringLiteral("/1/2.html?param=1");
0133     QTest::newRow("http://a.b.c/1/2.html?param=1") << QStringLiteral("http://a.b.c/1/2.html?param=1") << hosts << paths;
0134     hosts.clear();
0135     paths.clear();
0136     hosts << QStringLiteral("f.g") << QStringLiteral("e.f.g") << QStringLiteral("d.e.f.g") << QStringLiteral("c.d.e.f.g") << QStringLiteral("a.b.c.d.e.f.g");
0137     paths << QStringLiteral("/") << QStringLiteral("/1.html");
0138     QTest::newRow("http://a.b.c.d.e.f.g/1.html") << QStringLiteral("http://a.b.c.d.e.f.g/1.html") << hosts << paths;
0139 
0140     hosts.clear();
0141     paths.clear();
0142     hosts << QStringLiteral("a.b");
0143     paths << QStringLiteral("/") << QStringLiteral("/saw-cgi/") << QStringLiteral("/saw-cgi/eBayISAPI.dll/");
0144     QTest::newRow("http://a.b/saw-cgi/eBayISAPI.dll/") << QStringLiteral("http://a.b/saw-cgi/eBayISAPI.dll/") << hosts << paths;
0145 }
0146 
0147 void UrlHashingTest::shouldGenerateHostPath()
0148 {
0149     QFETCH(QString, input);
0150     QFETCH(QStringList, hosts);
0151     QFETCH(QStringList, paths);
0152     QString result = WebEngineViewer::UrlHashing::canonicalizeUrl(QUrl::fromUserInput(input));
0153     QUrl url(result);
0154 
0155     QCOMPARE(WebEngineViewer::UrlHashing::generateHostsToCheck(url.host()), hosts);
0156     QCOMPARE(WebEngineViewer::UrlHashing::generatePathsToCheck(url.path(), url.query()), paths);
0157 }
0158 
0159 void UrlHashingTest::shouldGenerateHashList_data()
0160 {
0161     QTest::addColumn<QUrl>("input");
0162     QTest::addColumn<int>("numberItems");
0163     QTest::newRow("http://a.b/saw-cgi/eBayISAPI.dll/") << QUrl(QStringLiteral("http://a.b/saw-cgi/eBayISAPI.dll/")) << 3;
0164 }
0165 
0166 void UrlHashingTest::shouldGenerateHashList()
0167 {
0168     QFETCH(QUrl, input);
0169     QFETCH(int, numberItems);
0170     WebEngineViewer::UrlHashing hashing(input);
0171     QCOMPARE(hashing.hashList().count(), numberItems);
0172 }
0173 
0174 QTEST_GUILESS_MAIN(UrlHashingTest)
0175 
0176 #include "moc_urlhashingtest.cpp"