File indexing completed on 2024-05-19 05:23:09
0001 /* -*- c++ -*- 0002 tests/lexertest.cpp 0003 0004 This file is part of the testsuite of KSieve, 0005 the KDE internet mail/usenet news message filtering library. 0006 SPDX-FileCopyrightText: 2003 Marc Mutz <mutz@kde.org> 0007 0008 SPDX-License-Identifier: GPL-2.0-only 0009 */ 0010 0011 #include "lexer.h" 0012 using KSieve::Lexer; 0013 0014 #include "error.h" 0015 using KSieve::Error; 0016 0017 #include <QString> 0018 0019 #include <cstdlib> 0020 #include <iostream> 0021 0022 using std::cerr; 0023 using std::cout; 0024 using std::endl; 0025 0026 static const char *token2string(Lexer::Token t) 0027 { 0028 switch (t) { 0029 #define CASE(x) \ 0030 case Lexer::x: \ 0031 return #x 0032 CASE(None); 0033 CASE(HashComment); 0034 CASE(BracketComment); 0035 CASE(Identifier); 0036 CASE(Tag); 0037 CASE(Number); 0038 CASE(MultiLineString); 0039 CASE(QuotedString); 0040 CASE(Special); 0041 CASE(LineFeeds); 0042 } 0043 return ""; 0044 #undef CASE 0045 } 0046 0047 struct TestCase { 0048 const char *name; 0049 const char *string; 0050 struct { 0051 Lexer::Token token; 0052 const char *result; 0053 } expected[16]; // end with { None, 0 } 0054 Error::Type expectedError; 0055 int errorLine, errorCol; 0056 }; 0057 0058 static const TestCase testcases[] = { 0059 // 0060 // Whitespace: 0061 // 0062 0063 {"Null script", nullptr, {{Lexer::None, nullptr}}, Error::None, 0, 0}, 0064 0065 {"Empty script", "", {{Lexer::None, nullptr}}, Error::None, 0, 0}, 0066 0067 {"Whitespace-only script", " \t\n\t \n", {{Lexer::LineFeeds, "2"}, {Lexer::None, nullptr}}, Error::None, 0, 0}, 0068 0069 {"Lone CR", "\r", {{Lexer::None, nullptr}}, Error::CRWithoutLF, 0, 1}, 0070 0071 {"CR+Space", "\r ", {{Lexer::None, nullptr}}, Error::CRWithoutLF, 0, 1}, 0072 0073 {"CRLF alone", "\r\n", {{Lexer::LineFeeds, "1"}, {Lexer::None, nullptr}}, Error::None, 0, 0}, 0074 0075 // 0076 // hash comments: 0077 // 0078 0079 {"Basic hash comment (no newline)", "#comment", {{Lexer::HashComment, "comment"}, {Lexer::None, nullptr}}, Error::None, 0, 0}, 0080 0081 {"Basic hash comment (LF)", "#comment\n", {{Lexer::HashComment, "comment"}, {Lexer::None, nullptr}}, Error::None, 0, 0}, 0082 0083 {"Basic hash comment (CRLF)", "#comment\r\n", {{Lexer::HashComment, "comment"}, {Lexer::None, nullptr}}, Error::None, 0, 0}, 0084 0085 {"Basic hash comment (CR)", "#comment\r", {{Lexer::HashComment, nullptr}}, Error::CRWithoutLF, 0, 9}, 0086 0087 {"Non-UTF-8 in hash comment", "#\xA9 copyright", {{Lexer::HashComment, nullptr}}, Error::InvalidUTF8, 0, 12}, 0088 0089 // 0090 // bracket comments: 0091 // 0092 0093 {"Basic bracket comment", "/* comment */", {{Lexer::BracketComment, " comment "}, {Lexer::None, nullptr}}, Error::None, 0, 0}, 0094 0095 {"Basic bracket comment - missing trailing slash", "/* comment *", {{Lexer::BracketComment, nullptr}}, Error::UnfinishedBracketComment, 0, 0}, 0096 0097 {"Basic bracket comment - missing trailing asterisk + slash", "/* comment ", {{Lexer::BracketComment, nullptr}}, Error::UnfinishedBracketComment, 0, 0}, 0098 0099 {"Basic bracket comment - missing leading slash", "* comment */", {{Lexer::None, nullptr}}, Error::IllegalCharacter, 0, 0}, 0100 0101 {"Basic bracket comment - missing leading asterisk + slash", 0102 "comment */", 0103 {{Lexer::Identifier, "comment"}, {Lexer::None, nullptr}}, 0104 Error::IllegalCharacter, 0105 0, 0106 8}, 0107 0108 {"Basic multiline bracket comment (LF)", 0109 "/* comment\ncomment */", 0110 {{Lexer::BracketComment, " comment\ncomment "}, {Lexer::None, nullptr}}, 0111 Error::None, 0112 0, 0113 0}, 0114 0115 {"Basic multiline bracket comment (CRLF)", 0116 "/* comment\r\ncomment */", 0117 {{Lexer::BracketComment, " comment\ncomment "}, {Lexer::None, nullptr}}, 0118 Error::None, 0119 0, 0120 0}, 0121 0122 {"Basic multiline bracket comment (CR)", "/* comment\rcomment */", {{Lexer::BracketComment, nullptr}}, Error::CRWithoutLF, 0, 11}, 0123 0124 {"Non-UTF-8 in bracket comment", "/*\xA9 copyright*/", {{Lexer::BracketComment, nullptr}}, Error::InvalidUTF8, 0, 14}, 0125 0126 // 0127 // numbers: 0128 // 0129 {"Basic number 1", "1", {{Lexer::Number, "1"}, {Lexer::None, nullptr}}, Error::None, 0, 0}, 0130 {"Basic number 01", "01", {{Lexer::Number, "01"}, {Lexer::None, nullptr}}, Error::None, 0, 0}, 0131 {"Qualified number 1k", "1k", {{Lexer::Number, "1k"}, {Lexer::None, nullptr}}, Error::None, 0, 0}, 0132 {"Qualified number 1M", "1M", {{Lexer::Number, "1M"}, {Lexer::None, nullptr}}, Error::None, 0, 0}, 0133 {"Qualified number 1G", "1G", {{Lexer::Number, "1G"}, {Lexer::None, nullptr}}, Error::None, 0, 0}, 0134 // 0135 // identifiers: 0136 // 0137 {"Basic identifier \"id\"", "id", {{Lexer::Identifier, "id"}, {Lexer::None, nullptr}}, Error::None, 0, 0}, 0138 {"Basic identifier \"_id\"", "_id", {{Lexer::Identifier, "_id"}, {Lexer::None, nullptr}}, Error::None, 0, 0}, 0139 // 0140 // tags: 0141 // 0142 {"Basic tag \":tag\"", ":tag", {{Lexer::Tag, "tag"}, {Lexer::None, nullptr}}, Error::None, 0, 0}, 0143 {"Basic tag \":_tag\"", ":_tag", {{Lexer::Tag, "_tag"}, {Lexer::None, nullptr}}, Error::None, 0, 0}, 0144 // 0145 // specials: 0146 // 0147 {"Basic special \"{}[]();,\"", 0148 "{}[]();,", 0149 {{Lexer::Special, "{"}, 0150 {Lexer::Special, "}"}, 0151 {Lexer::Special, "["}, 0152 {Lexer::Special, "]"}, 0153 {Lexer::Special, "("}, 0154 {Lexer::Special, ")"}, 0155 {Lexer::Special, ";"}, 0156 {Lexer::Special, ","}, 0157 {Lexer::None, nullptr}}, 0158 Error::None, 0159 0, 0160 0}, 0161 // 0162 // quoted-string: 0163 // 0164 {"Basic quoted string \"foo\"", "\"foo\"", {{Lexer::QuotedString, "foo"}, {Lexer::None, nullptr}}, Error::None, 0, 0}, 0165 {"Basic quoted string, UTF-8", 0166 "\"foo\xC3\xB1" 0167 "foo\"", // fooäfoo 0168 {{Lexer::QuotedString, 0169 "foo\xC3\xB1" 0170 "foo"}, 0171 {Lexer::None, nullptr}}, 0172 Error::None, 0173 0, 0174 0}, 0175 {"Quoted string, escaped '\"'", R"("foo\"bar")", {{Lexer::QuotedString, "foo\"bar"}, {Lexer::None, nullptr}}, Error::None, 0, 0}, 0176 {"Quoted string, escaped '\\'", R"("foo\\bar")", {{Lexer::QuotedString, "foo\\bar"}, {Lexer::None, nullptr}}, Error::None, 0, 0}, 0177 {"Quoted string, excessive escapes", R"("\fo\o")", {{Lexer::QuotedString, "foo"}, {Lexer::None, nullptr}}, Error::None, 0, 0}, 0178 {"Quoted string across lines (LF)", "\"foo\nbar\"", {{Lexer::QuotedString, "foo\nbar"}, {Lexer::None, nullptr}}, Error::None, 0, 0}, 0179 {"Quoted string across lines (CRLF)", "\"foo\r\nbar\"", {{Lexer::QuotedString, "foo\nbar"}, {Lexer::None, nullptr}}, Error::None, 0, 0}, 0180 // 0181 // multiline strings: 0182 // 0183 {"Basic multiline string I (LF)", "text:\nfoo\n.", {{Lexer::MultiLineString, "foo" /* "foo\n" ? */}, {Lexer::None, nullptr}}, Error::None, 0, 0}, 0184 {"Basic multiline string I (CRLF)", "text:\r\nfoo\r\n.", {{Lexer::MultiLineString, "foo" /* "foo\n" ? */}, {Lexer::None, nullptr}}, Error::None, 0, 0}, 0185 {"Basic multiline string II (LF)", "text:\nfoo\n.\n", {{Lexer::MultiLineString, "foo" /* "foo\n" ? */}, {Lexer::None, nullptr}}, Error::None, 0, 0}, 0186 {"Basic multiline string II (CRLF)", "text:\r\nfoo\r\n.\r\n", {{Lexer::MultiLineString, "foo" /* "foo\n" ? */}, {Lexer::None, nullptr}}, Error::None, 0, 0}, 0187 {"Dotstuffed multiline string (LF)", "text:\n..foo\n.", {{Lexer::MultiLineString, ".foo" /* ".foo\n" ? */}, {Lexer::None, nullptr}}, Error::None, 0, 0}, 0188 {"Dotstuffed multiline string (CRLF)", 0189 "text:\r\n..foo\r\n.", 0190 {{Lexer::MultiLineString, ".foo" /* ".foo\n" ? */}, {Lexer::None, nullptr}}, 0191 Error::None, 0192 0, 0193 0}, 0194 {"Incompletely dotstuffed multiline string (LF)", 0195 "text:\n.foo\n.", 0196 {{Lexer::MultiLineString, ".foo" /* ".foo\n" ? */}, {Lexer::None, nullptr}}, 0197 Error::None, 0198 0, 0199 0}, 0200 {"Incompletely dotstuffed multiline string (CRLF)", 0201 "text:\r\n.foo\r\n.", 0202 {{Lexer::MultiLineString, ".foo" /* ".foo\n" ? */}, {Lexer::None, nullptr}}, 0203 Error::None, 0204 0, 0205 0}, 0206 {"Multiline with a line with only one '.'", 0207 "text:\r\nfoo\r\n..\r\nbar\r\n.", 0208 {{Lexer::MultiLineString, "foo\n.\nbar"}, {Lexer::None, nullptr}}, 0209 Error::None, 0210 0, 0211 0}, 0212 0213 // 0214 // Errors in single tokens: 0215 // 0216 0217 // 0218 // numbers: 0219 // 0220 {"Number, unknown qualifier", "100f", {{Lexer::Number, "100"}}, Error::UnexpectedCharacter, 0, 3}, 0221 {"Negative number", "-100", {{Lexer::None, nullptr}}, Error::IllegalCharacter, 0, 0}, 0222 // 0223 // identifiers: 0224 // 0225 {"Identifier, leading digits", "0id", {{Lexer::Number, "0"}}, Error::UnexpectedCharacter, 0, 1}, 0226 {"Identifier, embedded umlaut", "idäid", {{Lexer::Identifier, "id"}}, Error::IllegalCharacter, 0, 2}, 0227 // 0228 // tags: 0229 // 0230 {"Lone ':' (at end)", ":", {{Lexer::Tag, nullptr}}, Error::UnexpectedCharacter, 0, 0}, 0231 {"Lone ':' (in stream)", ": ", {{Lexer::Tag, nullptr}}, Error::UnexpectedCharacter, 0, 1}, 0232 {"Tag, leading digits", ":0tag", {{Lexer::Tag, nullptr}}, Error::NoLeadingDigits, 0, 1}, 0233 {"Tag, embedded umlaut", ":tagätag", {{Lexer::Tag, "tag"}}, Error::IllegalCharacter, 0, 4}, 0234 // 0235 // specials: (none) 0236 // quoted string: 0237 // 0238 {"Premature end of quoted string", "\"foo", {{Lexer::QuotedString, "foo"}}, Error::PrematureEndOfQuotedString, 0, 0}, 0239 {"Invalid UTF-8 in quoted string", 0240 "\"foo\xC0\xA0" 0241 "foo\"", 0242 {{Lexer::QuotedString, "foo"}}, 0243 Error::InvalidUTF8, 0244 0, 0245 4}, 0246 0247 // 0248 // Whitespace / token separation: valid 0249 // 0250 0251 {"Two identifiers with linebreaks", 0252 "foo\nbar\n", 0253 {{Lexer::Identifier, "foo"}, {Lexer::LineFeeds, "1"}, {Lexer::Identifier, "bar"}, {Lexer::LineFeeds, "1"}, {Lexer::None, nullptr}}, 0254 Error::None, 0255 0, 0256 0}, 0257 0258 // 0259 // Whitespace / token separation: invalid 0260 // 0261 }; 0262 0263 static const int numTestCases = sizeof testcases / sizeof *testcases; 0264 0265 int main(int argc, char *argv[]) 0266 { 0267 if (argc == 2) { // manual test 0268 const char *scursor = argv[1]; 0269 const char *const send = argv[1] + qstrlen(argv[1]); 0270 0271 Lexer lexer(scursor, send); 0272 0273 cout << "Begin" << endl; 0274 while (!lexer.atEnd()) { 0275 QString result; 0276 Lexer::Token token = lexer.nextToken(result); 0277 if (lexer.error()) { 0278 cout << "Error " << token2string(token) << ": \"" << lexer.error().asString().toLatin1().constData() << "\" at (" << lexer.error().line() << "," 0279 << lexer.error().column() << ")" << endl; 0280 break; 0281 } else { 0282 cout << "Got " << token2string(token) << ": \"" << result.toUtf8().data() << "\" at (" << lexer.line() << "," << lexer.column() << ")" << endl; 0283 } 0284 } 0285 cout << "End" << endl; 0286 } else if (argc == 1) { // automated test 0287 bool success = true; 0288 for (int i = 0; i < numTestCases; ++i) { 0289 bool ok = true; 0290 const TestCase &t = testcases[i]; 0291 const char *const send = t.string + qstrlen(t.string); 0292 Lexer lexer(t.string, send, Lexer::IncludeComments); 0293 cerr << t.name << ":"; 0294 for (int j = 0; !lexer.atEnd(); ++j) { 0295 QString result; 0296 Lexer::Token token = lexer.nextToken(result); 0297 Error error = lexer.error(); 0298 if (t.expected[j].token != token) { 0299 ok = false; 0300 cerr << " expected token " << token2string(t.expected[j].token) << ", got " << token2string(token); 0301 } 0302 if (QString::fromUtf8(t.expected[j].result) != result) { 0303 ok = false; 0304 if (t.expected[j].result) { 0305 cerr << " expected string \"" << t.expected[j].result << "\""; 0306 } else { 0307 cerr << " expected null string"; 0308 } 0309 if (!result.toUtf8().isNull()) { 0310 cerr << ", got \"" << result.toUtf8().data() << "\""; 0311 } else { 0312 cerr << ", got null string"; 0313 } 0314 } 0315 if (error && error.type() != t.expectedError) { 0316 ok = false; 0317 cerr << " expected error #" << (int)t.expectedError << ", got #" << (int)error.type(); 0318 } 0319 if (error && (error.line() != t.errorLine || error.column() != t.errorCol)) { 0320 ok = false; 0321 cerr << " expected position (" << t.errorLine << "," << t.errorCol << "), got (" << error.line() << "," << error.column() << ")"; 0322 } 0323 if (error) { 0324 goto ErrorOut; 0325 } 0326 if (t.expected[j].token == Lexer::None && t.expected[j].result == nullptr) { 0327 break; 0328 } 0329 } 0330 if (!lexer.atEnd()) { 0331 ok = false; 0332 cerr << " premature end of expected token list"; 0333 } 0334 ErrorOut: 0335 if (ok) { 0336 cerr << " ok"; 0337 } 0338 cerr << endl; 0339 if (!ok) { 0340 success = false; 0341 } 0342 } 0343 if (!success) { 0344 return 1; 0345 } 0346 } else { // usage error 0347 cerr << "usage: lexertest [ <string> ]" << endl; 0348 exit(1); 0349 } 0350 0351 return 0; 0352 }