File indexing completed on 2024-04-28 15:25:52

0001 /*
0002     This file is part of the KDE libraries
0003     SPDX-FileCopyrightText: 2008 Andreas Hartmetz <ahartmetz@gmail.com>
0004 
0005     SPDX-License-Identifier: LGPL-2.0-or-later
0006 */
0007 
0008 #include "httpheadertokenizetest.h"
0009 
0010 #include <QTest>
0011 
0012 #include <QByteArray>
0013 #include <QDebug>
0014 #include <QHash>
0015 
0016 #include <parsinghelpers.h>
0017 
0018 #include <parsinghelpers.cpp>
0019 
0020 // QT5 TODO QTEST_GUILESS_MAIN(HeaderTokenizeTest)
0021 QTEST_MAIN(HeaderTokenizeTest)
0022 
0023 /* some possible fields that can be used for test headers
0024         {"accept-ranges", false},
0025         {"cache-control", true},
0026         {"connection", true},
0027         {"content-disposition", false}, //is multi-valued in a way, but with ";" separator!
0028         {"content-encoding", true},
0029         {"content-language", true},
0030         {"content-length", false},
0031         {"content-location", false},
0032         {"content-md5", false},
0033         {"content-type", false},
0034         {"date", false},
0035         {"dav", true}, //RFC 2518
0036         {"etag", false},
0037         {"expires", false},
0038         {"keep-alive", false}, //RFC 2068
0039         {"last-modified", false},
0040         {"link", false}, //RFC 2068, multi-valued with ";" separator
0041         {"location", false},
0042 */
0043 
0044 // no use testing many different headers, just a couple each of the multi-valued
0045 // and the single-valued group to make sure that corner cases work both if there
0046 // are already entries for the header and if there are no entries.
0047 static const char messyHeader[] =
0048     "\n"
0049     "accept-ranges:foo\r\n"
0050     "connection: one\r\n"
0051     " t_\r\n"
0052     " wo,\r\n"
0053     "\tthree\r\n"
0054     "accept-ranges:42\n"
0055     "accept-Ranges:\tmaybe \r"
0056     " or not\n"
0057     "CoNNectIoN:four, , ,,   , \r\n"
0058     " :fi:ve\r\n"
0059     ":invalid stuff\r\n"
0060     "\tinvalid: connection:close\t\r"
0061     "connection: Six, seven ,, , eight\r" // one malformed newline...
0062     "\n\r "; // two malformed newlines; end of header. also observe the trailing space.
0063 
0064 // tab separates values, newline separates header lines. the first word is the key.
0065 static const char messyResult[] =
0066     "accept-ranges\tfoo\t42\tmaybe   or not\n"
0067     "connection\tone   t_   wo\tthree\tfour\t:fi:ve\tSix\tseven\teight";
0068 
0069 static const char redirectHeader[] =
0070     //"HTTP/1.1 302 Moved Temporarily\r\n"
0071     "Location: http://www.hertz.de/rentacar/index.jsp?bsc=t&targetPage=reservationOnHomepage.jsp\r\n"
0072     "Connection:close\r\n"
0073     "Cache-Control: no-cache\r\n"
0074     "Pragma: no-cache\r\n"
0075     "\r\n";
0076 
0077 static const char redirectResult[] =
0078     "cache-control\tno-cache\n"
0079     "connection\tclose\n"
0080     "location\thttp://www.hertz.de/rentacar/index.jsp?bsc=t&targetPage=reservationOnHomepage.jsp\n"
0081     "pragma\tno-cache";
0082 
0083 static const int bufSize = 4096;
0084 char buffer[bufSize];
0085 
0086 void HeaderTokenizeTest::testMessyHeader()
0087 {
0088     // Copy the header into a writable buffer
0089     for (int i = 0; i < bufSize; i++) {
0090         buffer[i] = 0;
0091     }
0092     strcpy(buffer, messyHeader);
0093 
0094     HeaderTokenizer tokenizer(buffer);
0095     int tokenizeEnd = tokenizer.tokenize(0, strlen(messyHeader));
0096     QCOMPARE(tokenizeEnd, (int)(strlen(messyHeader) - 1));
0097 
0098     // If the output of the tokenizer contains all the terms that should be there and
0099     // exactly the number of terms that should be there then it's exactly correct.
0100     // We are lax wrt trailing whitespace, by the way. It does neither explicitly matter
0101     // nor not matter according to the standard. Internal whitespace similarly should not
0102     // matter but we have to be exact because the tokenizer does not move strings around,
0103     // it only overwrites \r and \n in case of line continuations.
0104 
0105     int nValues = 0;
0106     const QList<QByteArray> messyResultsList = QByteArray(messyResult).split('\n');
0107     for (const QByteArray &ba : messyResultsList) {
0108         QList<QByteArray> values = ba.split('\t');
0109         QByteArray key = values.takeFirst();
0110         nValues += values.count();
0111 
0112         QList<QByteArray> comparisonValues;
0113         for (const auto [startIdx, endIdx] : tokenizer.value(key).beginEnd) {
0114             comparisonValues.append(QByteArray(buffer + startIdx, endIdx - startIdx));
0115         }
0116 
0117         QCOMPARE(comparisonValues.count(), values.count());
0118         for (int i = 0; i < values.count(); i++) {
0119             QVERIFY(comparisonValues[i].startsWith(values[i]));
0120         }
0121     }
0122 
0123     int nValues2 = 0;
0124     HeaderTokenizer::ConstIterator it = tokenizer.constBegin();
0125     for (; it != tokenizer.constEnd(); ++it) {
0126         nValues2 += it.value().beginEnd.count();
0127     }
0128     QCOMPARE(nValues2, nValues);
0129 
0130     return; // comment out for parsed header dump to stdout
0131 
0132     it = tokenizer.constBegin();
0133     for (; it != tokenizer.constEnd(); ++it) {
0134         if (!it.value().beginEnd.isEmpty()) {
0135             qDebug() << it.key() << ":";
0136         }
0137         for (const auto [startIdx, endIdx] : it.value().beginEnd) {
0138             qDebug() << "  " << QByteArray(buffer + startIdx, endIdx - startIdx);
0139         }
0140     }
0141 }
0142 
0143 void HeaderTokenizeTest::testRedirectHeader()
0144 {
0145     // Copy the header into a writable buffer
0146     for (int i = 0; i < bufSize; i++) {
0147         buffer[i] = 0;
0148     }
0149     strcpy(buffer, redirectHeader);
0150 
0151     HeaderTokenizer tokenizer(buffer);
0152     int tokenizeEnd = tokenizer.tokenize(0, strlen(redirectHeader));
0153     QCOMPARE(tokenizeEnd, (int)strlen(redirectHeader));
0154 
0155     int nValues = 0;
0156     const QList<QByteArray> redirectResults = QByteArray(redirectResult).split('\n');
0157     for (const QByteArray &ba : redirectResults) {
0158         QList<QByteArray> values = ba.split('\t');
0159         QByteArray key = values.takeFirst();
0160         nValues += values.count();
0161 
0162         QList<QByteArray> comparisonValues;
0163         for (const auto [startIdx, endIdx] : tokenizer.value(key).beginEnd) {
0164             comparisonValues.append(QByteArray(buffer + startIdx, endIdx - startIdx));
0165         }
0166 
0167         QCOMPARE(comparisonValues.count(), values.count());
0168         for (int i = 0; i < values.count(); i++) {
0169             QVERIFY(comparisonValues[i].startsWith(values[i]));
0170         }
0171     }
0172 
0173     int nValues2 = 0;
0174     HeaderTokenizer::ConstIterator it = tokenizer.constBegin();
0175     for (; it != tokenizer.constEnd(); ++it) {
0176         nValues2 += it.value().beginEnd.count();
0177     }
0178     QCOMPARE(nValues2, nValues);
0179 
0180     // Fix compiler warning
0181     (void)contentDispositionParser;
0182 }
0183 
0184 #include "moc_httpheadertokenizetest.cpp"