File indexing completed on 2024-11-24 04:53:36
0001 /* 0002 This file is part of the kimap library. 0003 Copyright (C) 2007 Tom Albers <tomalbers@kde.nl> 0004 Copyright (c) 2007 Allen Winter <winter@kde.org> 0005 0006 This library is free software; you can redistribute it and/or 0007 modify it under the terms of the GNU Library General Public 0008 License version 2 as published by the Free Software Foundation. 0009 0010 This library is distributed in the hope that it will be useful, 0011 but WITHOUT ANY WARRANTY; without even the implied warranty of 0012 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 0013 Library General Public License for more details. 0014 0015 You should have received a copy of the GNU Library General Public License 0016 along with this library; see the file COPYING.LIB. If not, write to 0017 the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 0018 Boston, MA 02110-1301, USA. 0019 */ 0020 0021 #include <QDebug> 0022 #include <QTest> 0023 #include "test_rfccodecs.h" 0024 #include "Imap/Parser/3rdparty/rfccodecs.h" 0025 #include "Imap/Encoders.h" 0026 0027 typedef QMap<QByteArray, QByteArray> MapByteArrayByteArray; 0028 Q_DECLARE_METATYPE(MapByteArrayByteArray) 0029 0030 using namespace KIMAP; 0031 0032 void RFCCodecsTest::testIMAPEncoding() 0033 { 0034 QString encoded, decoded; 0035 0036 encoded = encodeImapFolderName( QStringLiteral("Test.Frode Rønning") ); 0037 QVERIFY( encoded == "Test.Frode R&APg-nning" ); 0038 decoded = decodeImapFolderName( "Test.Frode R&APg-nning" ); 0039 QVERIFY( decoded == QString::fromUtf8("Test.Frode Rønning") ); 0040 0041 encoded = encodeImapFolderName( QStringLiteral("Test.tom & jerry") ); 0042 QVERIFY( encoded == "Test.tom &- jerry" ); 0043 decoded = decodeImapFolderName( "Test.tom &- jerry" ); 0044 QVERIFY( decoded == "Test.tom & jerry" ); 0045 0046 // Try to feed already encoded 0047 encoded = encodeImapFolderName( QStringLiteral("Test.Cl&AOE-udio") ); 0048 QVERIFY( encoded == "Test.Cl&-AOE-udio" ); 0049 decoded = decodeImapFolderName( "Test.Cl&-AOE-udio" ); 0050 QVERIFY( decoded == "Test.Cl&AOE-udio" ); 0051 } 0052 0053 void RFCCodecsTest::testDecodeRFC2047String() 0054 { 0055 QFETCH( QByteArray, raw ); 0056 QFETCH( QString, decoded ); 0057 0058 QString res = Imap::decodeRFC2047String( raw ); 0059 0060 if ( res != decoded ) { 0061 if ( res.size() != decoded.size() ) { 0062 qDebug() << "Different size:" << res.size() << decoded.size(); 0063 } 0064 int size = qMin( res.size(), decoded.size() ); 0065 for ( int i = 0; i < size; ++i ) { 0066 QChar c1 = res.at(i); 0067 QChar c2 = decoded.at(i); 0068 if ( c1 == c2 ) { 0069 qDebug() << "OK" << i << QString::number( c1.unicode(), 16 ).prepend("0x") << c1; 0070 } else { 0071 qDebug() << "Offset" << i << QString::number( c1.unicode(), 16 ).prepend("0x") 0072 << QString::number( c2.unicode(), 16 ).prepend("0x") << c1 << c2; 0073 } 0074 } 0075 } 0076 0077 QCOMPARE( res, decoded ); 0078 } 0079 0080 void RFCCodecsTest::testDecodeRFC2047String_data() 0081 { 0082 QTest::addColumn<QByteArray>("raw"); 0083 QTest::addColumn<QString>("decoded"); 0084 0085 QTest::newRow("katuska-suject") 0086 << QByteArray("=?UTF-8?Q?moc=20pros=C3=ADm,=20mohl=20by=20ses=20na=20to=20kouk?= =?UTF-8?Q?nout=3F=20cht=C4=9Bla=20bych=20to=20m=C3=ADt=20spr=C3=A1vn?= =?UTF-8?Q?=C4=9B:,)?=") 0087 << QStringLiteral("moc prosím, mohl by ses na to kouknout? chtěla bych to mít správně:,)"); 0088 0089 QTest::newRow("jirka-prives") 0090 << QByteArray("=?UTF-8?Q?P=C5=AFj=C4=8Den=C3=AD=20p=C5=99=C3=ADv=C4=9Bsu=20na=20lod?=\r\n" 0091 "=?UTF-8?Q?=C4=9B?=") 0092 << QStringLiteral("Půjčení přívěsu na lodě"); 0093 0094 QTest::newRow("second-word-encoded") 0095 << QByteArray("Domen =?UTF-8?Q?Ko=C5=BEar?=") 0096 << QStringLiteral("Domen Kožar"); 0097 0098 QTest::newRow("B-iso-1-jkt") 0099 << QByteArray("=?ISO-8859-1?B?SmFuIEt1bmRy4XQ=?=") 0100 << QStringLiteral("Jan Kundrát"); 0101 0102 QTest::newRow("Q-iso-2-jkt") 0103 << QByteArray("=?ISO-8859-2?Q?Jan_Kundr=E1t?=") 0104 << QStringLiteral("Jan Kundrát"); 0105 0106 QTest::newRow("Q-iso-3-with-lang") 0107 << QByteArray("=?ISO-8859-2*CS?Q?Jan_Kundr=E1t?=") 0108 << QStringLiteral("Jan Kundrát"); 0109 0110 QTest::newRow("buggy-no-space-between-encoded-words") 0111 << QByteArray("=?ISO-8859-2?Q?Jan_Kundr=E1t?=XX=?ISO-8859-2?Q?Jan_Kundr=E1t?=") 0112 << QStringLiteral("Jan KundrátXXJan Kundrát"); 0113 0114 QTest::newRow("B-utf8-vodakove") 0115 << QByteArray("=?UTF-8?B?W3ZvZF0gUmU6IGthemltaXIgdnlyYXplbiB6ZSB6YXNpbGFuaSBza3VwaW55?= " 0116 "=?UTF-8?B?IChqZXN0bGkgbmUsIHRhayB0byBuZWN0aSBrYXppbWlyZSBhIHByaXpuZWog?= " 0117 "=?UTF-8?B?c2UgOm8p?=") 0118 << QStringLiteral("[vod] Re: kazimir vyrazen ze zasilani skupiny (jestli ne, tak to necti kazimire a priznej se :o)"); 0119 0120 QTest::newRow("Q-iso-2-ceskosaske") 0121 << QByteArray("=?ISO-8859-2?Q?=C8eskosask=E9_=A9v=FDcarsko=3A_podzimn=ED_?= " 0122 "=?ISO-8859-2?Q?nostalgie?=") 0123 << QStringLiteral("Českosaské Švýcarsko: podzimní nostalgie"); 0124 0125 QTest::newRow("B-utf8-empty") 0126 // careful to prevent the compiler from interpreting this is a trigraph/ 0127 << QByteArray("=?UTF-8?B?" "?=") 0128 << QStringLiteral(""); 0129 0130 // This is in violation from RFC2047, but some mailers do produce this 0131 QTest::newRow("Q-utf8-multiword-upc") 0132 << QByteArray("=?utf-8?q?Studie pro podnikov=C3=A9 z=C3=A1kazn=C3=ADky spole=C4=8Dnosti UPC Business?=") 0133 << QStringLiteral("Studie pro podnikové zákazníky společnosti UPC Business"); 0134 0135 // Again, this violates RFC2047 0136 QTest::newRow("Q-utf8-multiword-csa") 0137 << QByteArray("=?utf-8?Q?HOLIDAYS Czech Airlines?=") 0138 << QStringLiteral("HOLIDAYS Czech Airlines"); 0139 0140 // No spaces around the encoded-word 0141 // Vaguely inspired by http://notmuchmail.org/pipermail/notmuch/2013/015594.html, except that this check for both 0142 // leading and trailing space. Looks like GMime is said to support both of these as well. 0143 QTest::newRow("no-space-around-encoded-words") 0144 << QByteArray("From=?UTF-8?Q?Thomas=20L=C3=BCbking=20?=<thomas.luebking@gmail.com>") 0145 << QStringLiteral("FromThomas Lübking <thomas.luebking@gmail.com>"); 0146 0147 QTest::newRow("unescaped") 0148 << QByteArray("blesmrt") 0149 << QStringLiteral("blesmrt"); 0150 0151 QTest::newRow("rfc2047-ex-1") 0152 << QByteArray("(=?ISO-8859-1?Q?a?=)") 0153 << QStringLiteral("(a)"); 0154 0155 QTest::newRow("rfc2047-ex-2") 0156 << QByteArray("(=?ISO-8859-1?Q?a?= b)") 0157 << QStringLiteral("(a b)"); 0158 0159 QTest::newRow("rfc2047-ex-3") 0160 << QByteArray("(=?ISO-8859-1?Q?a?= =?ISO-8859-1?Q?b?=)") 0161 << QStringLiteral("(ab)"); 0162 0163 QTest::newRow("rfc2047-ex-4") 0164 << QByteArray("(=?ISO-8859-1?Q?a?= \n \t =?ISO-8859-1?Q?b?=)") 0165 << QStringLiteral("(ab)"); 0166 0167 QTest::newRow("rfc2047-ex-5") 0168 << QByteArray("(=?ISO-8859-1?Q?a_b?=)") 0169 << QStringLiteral("(a b)"); 0170 0171 QTest::newRow("rfc2047-ex-6") 0172 << QByteArray("(=?ISO-8859-1?Q?a?= =?ISO-8859-2?Q?_b?=)") 0173 << QStringLiteral("(a b)"); 0174 0175 QTest::newRow("ascii") 0176 << QByteArray("foo bar baz blah ble") 0177 << QStringLiteral("foo bar baz blah ble"); 0178 0179 QTest::newRow("tb-ascii-then-unicode") 0180 << QByteArray("[foo] johoho tohlencto je ale pekne =?UTF-8?B?YmzEmyBzbXJ0IHRyb2o=?=\n" 0181 " =?UTF-8?B?aXRhIHMgbWF0b3ZvdSBvbWFja291?=") 0182 << QStringLiteral("[foo] johoho tohlencto je ale pekne blě smrt trojita s matovou omackou"); 0183 0184 QTest::newRow("ascii-then-unicode-then-ascii") 0185 << QByteArray("[foo] johoho tohlencto je ale pekne =?UTF-8?B?YmzEmyBzbXJ0IHRyb2o=?=\n" 0186 " =?UTF-8?B?aXRhIHMgbWF0b3ZvdSBvbWFja291?= blabla") 0187 << QStringLiteral("[foo] johoho tohlencto je ale pekne blě smrt trojita s matovou omackou blabla"); 0188 0189 QTest::newRow("QP-malformed-1") 0190 << QByteArray("=?ISO-8859-2?Q?Jan_Kundr=xxt?=") 0191 << QStringLiteral("Jan Kundr=xxt"); 0192 0193 QTest::newRow("unrecognized-encoding") 0194 << QByteArray("=?trojitapwnedencoding?Q?=c4=9b=c5=a1=c4=8d?=") 0195 << QStringLiteral("ěšč"); 0196 } 0197 0198 void RFCCodecsTest::testEncodeRFC2047StringAsciiPrefix() 0199 { 0200 QFETCH(QString, input); 0201 QFETCH(QByteArray, encoded); 0202 0203 QCOMPARE(Imap::encodeRFC2047StringWithAsciiPrefix(input), encoded); 0204 QCOMPARE(Imap::decodeRFC2047String(Imap::encodeRFC2047StringWithAsciiPrefix(input)), input); 0205 } 0206 0207 void RFCCodecsTest::testEncodeRFC2047StringAsciiPrefix_data() 0208 { 0209 QTest::addColumn<QString>("input"); 0210 QTest::addColumn<QByteArray>("encoded"); 0211 0212 QTest::newRow("empty") << QString() << QByteArray(); 0213 QTest::newRow("simple-ascii") << QStringLiteral("ahoj") << QByteArray("ahoj"); 0214 QTest::newRow("simple-ascii-multiword") 0215 << QStringLiteral("ahoj, johoho! at tece rum!") 0216 << QByteArray("ahoj, johoho! at tece rum!"); 0217 QTest::newRow("jan-kundrat") << QStringLiteral("Jan Kundrát") << QByteArray("Jan =?iso-8859-1?Q?Kundr=E1t?="); 0218 QTest::newRow("jan-kundrat-e") << QStringLiteral("Jan Kundrát ě") << QByteArray("Jan =?utf-8?B?S3VuZHLDoXQgxJs=?="); 0219 QTest::newRow("czech") << QStringLiteral("ě") << QByteArray("=?utf-8?B?xJs=?="); 0220 QTest::newRow("trojita-subjects") << QStringLiteral("[trojita] foo bar blesmrt") << QByteArray("[trojita] foo bar blesmrt"); 0221 QTest::newRow("trojita-subjects-utf") << QStringLiteral("[trojita] foo bar ěščřžýáíé") 0222 << QByteArray("[trojita] foo bar =?utf-8?B?xJvFocSNxZnFvsO9w6HDrcOp?="); 0223 0224 QTest::newRow("crlf") << QStringLiteral("\r\n") 0225 << QByteArray("=?iso-8859-1?Q?=0D=0A?="); 0226 0227 QTest::newRow("long-text-with-utf") 0228 // again, be careful with that trigraph 0229 << QString::fromUtf8("[Trojitá - Bug #553] (New) Subject \"=?UTF-8?B?" "?=\" not decoded ěščřžýáíé") 0230 << QByteArray("=?utf-8?B?W1Ryb2ppdMOhIC0gQnVnICM1NTNdIChOZXcpIFN1YmplY3QgIj0/VVRGLTg/Qg==?=\r\n" 0231 " =?utf-8?B?Pz89IiBub3QgZGVjb2RlZCDEm8WhxI3FmcW+w73DocOtw6k=?="); 0232 0233 // Make sure that QP-specials are escaped 0234 QTest::newRow("prevent-unescaped-rfc2047") << QStringLiteral("ble =?") << QByteArray("ble =?iso-8859-1?Q?=3D=3F?="); 0235 0236 QTest::newRow("empty-subject") 0237 << QStringLiteral("Subject: ") 0238 << QByteArray("Subject: "); 0239 0240 // Is this actually correct? 0241 QTest::newRow("spaces-in-subject") 0242 << QStringLiteral("Subject: ") 0243 << QByteArray("Subject: "); 0244 0245 QTest::newRow("subject-newline") 0246 << QStringLiteral("Subject: \n") 0247 << QByteArray("Subject: =?iso-8859-1?Q?=0A?="); 0248 0249 QTest::newRow("correct-prefix-wrapping-utf") 0250 << QString::fromUtf8("Prefix: .1.........2.........3.........4.........5.........6.........7 23456 " 0251 "seventy-six bytes has been used before the 'seventy' word appeared. Let's force UTF-8 now: " 0252 "ěščřžýáíé") 0253 // Yep, this isn't great, the second "line" shall actually *be* separated by a newline, so that the total length of any 0254 // line is smaller than 78 chars. The thing is, this is not really easy. 0255 << QByteArray("Prefix: .1.........2.........3.........4.........5.........6.........7 23456" 0256 " =?utf-8?B?c2V2ZW50eS1zaXggYnl0ZXMgaGFzIGJlZW4gdXNlZCBiZWZvcmUgdGhlICdzZQ==?=\r\n" 0257 " =?utf-8?B?dmVudHknIHdvcmQgYXBwZWFyZWQuIExldCdzIGZvcmNlIFVURi04IG5vdzog?=\r\n" 0258 " =?utf-8?B?xJvFocSNxZnFvsO9w6HDrcOp?="); 0259 0260 QTest::newRow("correct-prefix-wrapping-latin1") 0261 << QString::fromUtf8("Prefix: .1.........2.........3.........4.........5.........6.........7 23456 " 0262 "seventy-six bytes has been used before the 'seventy' word appeared. Let's force Latin-1 now: á") 0263 // Same issue as with correct-prefix-wrapping-utf 0264 << QByteArray("Prefix: .1.........2.........3.........4.........5.........6.........7 23456" 0265 " =?iso-8859-1?Q?seventy-six_bytes_has_been_used_before_the_'seventy'_word_?=\r\n" 0266 " =?iso-8859-1?Q?appeared._Let's_force_Latin-1_now:_=E1?="); 0267 0268 } 0269 0270 /** @short Check that the "phrase" production of RFC2047 is special wrt. e.g. quoting some special characters */ 0271 void RFCCodecsTest::testEncodeRFC2047Phrase() 0272 { 0273 QFETCH(QString, text); 0274 QFETCH(QByteArray, encoded); 0275 // wrapped in QString to make sure the test renders the output 0276 QCOMPARE(QString::fromUtf8(Imap::encodeRFC2047Phrase(text)), QString::fromUtf8(encoded)); 0277 0278 // Check that the data survive the roundtrip. 0279 if (!encoded.startsWith('"')) { 0280 // This is a special case; the encodeRFC2047Phrase is magic because it auto-adds quotes if needed, while 0281 // the corresponding decoder assumes that the RFC5322-style quoting has been already undone. 0282 QCOMPARE(Imap::decodeRFC2047String(Imap::encodeRFC2047Phrase(text)), text); 0283 } 0284 } 0285 0286 void RFCCodecsTest::testEncodeRFC2047Phrase_data() 0287 { 0288 QTest::addColumn<QString>("text"); 0289 QTest::addColumn<QByteArray>("encoded"); 0290 0291 QTest::newRow("dummy text") << QStringLiteral("foo bar") << QByteArray("foo bar"); 0292 QTest::newRow("latin1") << QStringLiteral("Jan Kundrát") << QByteArray("=?iso-8859-1?Q?Jan_Kundr=E1t?="); 0293 QTest::newRow("utf-8") << QStringLiteral("Ελληνικά") << QByteArray("=?utf-8?B?zpXOu867zrfOvc65zrrOrA==?="); 0294 0295 QTest::newRow("ascii-parentheses") << QStringLiteral("Foo Bar (Test Thing)") << QByteArray("\"Foo Bar (Test Thing)\""); 0296 QTest::newRow("latin1-parentheses") << QStringLiteral("Jan Kundrát (Test Thing)") << QByteArray("=?iso-8859-1?Q?Jan_Kundr=E1t_=28Test_Thing=29?="); 0297 QTest::newRow("utf8-parentheses") << QStringLiteral("Ελληνικά (Test Thing)") << QByteArray("=?utf-8?B?zpXOu867zrfOvc65zrrOrCAoVGVzdCBUaGluZyk=?="); 0298 } 0299 0300 void RFCCodecsTest::testRfc2231Decoding() 0301 { 0302 QFETCH(MapByteArrayByteArray, params); 0303 QFETCH(QByteArray, key); 0304 QFETCH(QString, expected); 0305 0306 QCOMPARE(Imap::extractRfc2231Param(params, key), expected); 0307 } 0308 0309 void RFCCodecsTest::testRfc2231Decoding_data() 0310 { 0311 QTest::addColumn<MapByteArrayByteArray>("params"); 0312 QTest::addColumn<QByteArray>("key"); 0313 QTest::addColumn<QString>("expected"); 0314 0315 MapByteArrayByteArray map; 0316 // just continuation 0317 map["URL*0"] = "ftp://"; 0318 map["URL*1"] = "cs.utk.edu/pub/moore/bulk-mailer/bulk-mailer.tar"; 0319 // nothing fancy 0320 map["completeURL"] = "ftp://cs.utk.edu/pub/moore/bulk-mailer/bulk-mailer.tar"; 0321 // just the lang/encoding 0322 map["completeTitle*"] = "us-ascii'en-us'This%20is%20%2A%2A%2Afun%2A%2A%2A"; 0323 // combined continuation and lang/encoding 0324 map["title*0*"] = "us-ascii'en'This%20is%20even%20more%20"; 0325 map["title*1*"] = "%2A%2A%2Afun%2A%2A%2A%20"; 0326 map["title*2"] = "isn't it!"; 0327 // similar to the above, but all values end with a star 0328 map["title2*0*"] = "us-ascii'en'This%20is%20even%20more%20"; 0329 map["title2*1*"] = "%2A%2A%2Afun%2A%2A%2A%20"; 0330 map["title2*2*"] = "isn't it!"; 0331 // the middle one is missing a star 0332 map["title3*0*"] = "us-ascii'en'This%20is%20even%20more%20"; 0333 map["title3*1"] = "%2A%2A%2Afun%2A%2A%2A%20"; 0334 map["title3*2*"] = "isn't it!"; 0335 // some utf-8 bits 0336 map["raw-utf8"] = "\xc4\x9b\xc5\xa1\xc4\x8d"; 0337 map["utf8-2047"] = "=?utf8?Q?=c4=9b=c5=a1=c4=8d?="; 0338 map["utf8-wo-lang*"] = "utf8''%c4%9b%c5%a1%c4%8d"; 0339 map["utf8-wo-lang-wo-enc*"] = "''%c4%9b%c5%a1%c4%8d"; 0340 map["utf8-en*"] = "utf8'en'%c4%9b%c5%a1%c4%8d"; 0341 map["utf8-wo-enc-lang*"] = "'en'%c4%9b%c5%a1%c4%8d"; 0342 0343 QTest::newRow("notfound") << map << QByteArray("notfound") << QString(); 0344 QTest::newRow("boring") << map << QByteArray("completeURL") << QStringLiteral("ftp://cs.utk.edu/pub/moore/bulk-mailer/bulk-mailer.tar"); 0345 QTest::newRow("continuation") << map << QByteArray("URL") << QStringLiteral("ftp://cs.utk.edu/pub/moore/bulk-mailer/bulk-mailer.tar"); 0346 QTest::newRow("lang") << map << QByteArray("completeTitle") << QStringLiteral("This is ***fun***"); 0347 QTest::newRow("continuation-lang-wo-stars") << map << QByteArray("title") << QStringLiteral("This is even more ***fun*** isn't it!"); 0348 QTest::newRow("continuation-lang-all-stars") << map << QByteArray("title2") << QStringLiteral("This is even more ***fun*** isn't it!"); 0349 QTest::newRow("continuation-lang-wo-star-in-the-middle") << map << QByteArray("title3") << QStringLiteral("This is even more ***fun*** isn't it!"); 0350 QTest::newRow("raw-utf8") << map << QByteArray("raw-utf8") << QStringLiteral("ěšč"); 0351 QTest::newRow("utf8-2047") << map << QByteArray("utf8-2047") << QStringLiteral("ěšč"); 0352 QTest::newRow("utf8-wo-lang") << map << QByteArray("utf8-wo-lang") << QStringLiteral("ěšč"); 0353 QTest::newRow("utf8-wo-lang-wo-enc") << map << QByteArray("utf8-wo-lang-wo-enc") << QStringLiteral("ěšč"); 0354 QTest::newRow("utf8-en") << map << QByteArray("utf8-en") << QStringLiteral("ěšč"); 0355 QTest::newRow("utf8-wo-enc-lang") << map << QByteArray("utf8-wo-enc-lang") << QStringLiteral("ěšč"); 0356 } 0357 0358 void RFCCodecsTest::testRfc2231Encoding() 0359 { 0360 QFETCH(QString, unicode); 0361 QFETCH(QByteArray, serialized); 0362 0363 QCOMPARE(QString::fromUtf8(Imap::encodeRfc2231Parameter("x", unicode)), QString::fromUtf8(serialized)); 0364 } 0365 0366 void RFCCodecsTest::testRfc2231Encoding_data() 0367 { 0368 QTest::addColumn<QString>("unicode"); 0369 QTest::addColumn<QByteArray>("serialized"); 0370 0371 QTest::newRow("empty") << QString() << QByteArray("x=\"\""); 0372 QTest::newRow("ascii") << QStringLiteral("ahoj") << QByteArray("x=ahoj"); 0373 QTest::newRow("filename") << QStringLiteral("AhojZ-jak-se_masz-039.txt") << QByteArray("x=AhojZ-jak-se_masz-039.txt"); 0374 QTest::newRow("utf-8") << QStringLiteral("ěšč") << QByteArray("x*=utf-8''%C4%9B%C5%A1%C4%8D"); 0375 QTest::newRow("question-mark") << QStringLiteral("?") << QByteArray("x*=utf-8''%3F"); 0376 } 0377 0378 QTEST_GUILESS_MAIN( RFCCodecsTest )