File indexing completed on 2024-11-24 04:53:36

0001 /*
0002    This file is part of the kimap library.
0003    Copyright (C) 2007 Tom Albers <tomalbers@kde.nl>
0004    Copyright (c) 2007 Allen Winter <winter@kde.org>
0005 
0006    This library is free software; you can redistribute it and/or
0007    modify it under the terms of the GNU Library General Public
0008    License version 2 as published by the Free Software Foundation.
0009 
0010    This library is distributed in the hope that it will be useful,
0011    but WITHOUT ANY WARRANTY; without even the implied warranty of
0012    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
0013    Library General Public License for more details.
0014 
0015    You should have received a copy of the GNU Library General Public License
0016    along with this library; see the file COPYING.LIB.  If not, write to
0017    the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
0018    Boston, MA 02110-1301, USA.
0019 */
0020 
0021 #include <QDebug>
0022 #include <QTest>
0023 #include "test_rfccodecs.h"
0024 #include "Imap/Parser/3rdparty/rfccodecs.h"
0025 #include "Imap/Encoders.h"
0026 
0027 typedef QMap<QByteArray, QByteArray> MapByteArrayByteArray;
0028 Q_DECLARE_METATYPE(MapByteArrayByteArray)
0029 
0030 using namespace KIMAP;
0031 
0032 void RFCCodecsTest::testIMAPEncoding()
0033 {
0034   QString encoded, decoded;
0035 
0036   encoded = encodeImapFolderName( QStringLiteral("Test.Frode Rønning") );
0037   QVERIFY( encoded == "Test.Frode R&APg-nning" );
0038   decoded = decodeImapFolderName( "Test.Frode R&APg-nning" );
0039   QVERIFY( decoded == QString::fromUtf8("Test.Frode Rønning") );
0040 
0041   encoded = encodeImapFolderName( QStringLiteral("Test.tom & jerry") );
0042   QVERIFY( encoded == "Test.tom &- jerry" );
0043   decoded = decodeImapFolderName( "Test.tom &- jerry" );
0044   QVERIFY( decoded == "Test.tom & jerry" );
0045 
0046   // Try to feed already encoded
0047   encoded = encodeImapFolderName( QStringLiteral("Test.Cl&AOE-udio") );
0048   QVERIFY( encoded == "Test.Cl&-AOE-udio" );
0049   decoded = decodeImapFolderName( "Test.Cl&-AOE-udio" );
0050   QVERIFY( decoded == "Test.Cl&AOE-udio" );
0051 }
0052 
0053 void RFCCodecsTest::testDecodeRFC2047String()
0054 {
0055     QFETCH( QByteArray, raw );
0056     QFETCH( QString, decoded );
0057 
0058     QString res = Imap::decodeRFC2047String( raw );
0059 
0060     if ( res != decoded ) {
0061         if ( res.size() != decoded.size() ) {
0062             qDebug() << "Different size:" << res.size() << decoded.size();
0063         }
0064         int size = qMin( res.size(), decoded.size() );
0065         for ( int i = 0; i < size; ++i ) {
0066             QChar c1 = res.at(i);
0067             QChar c2 = decoded.at(i);
0068             if ( c1 == c2 ) {
0069                 qDebug() << "OK" << i << QString::number( c1.unicode(), 16 ).prepend("0x") << c1;
0070             } else {
0071                 qDebug() << "Offset" << i << QString::number( c1.unicode(), 16 ).prepend("0x")
0072                         << QString::number( c2.unicode(), 16 ).prepend("0x") << c1 << c2;
0073             }
0074         }
0075     }
0076 
0077     QCOMPARE( res, decoded );
0078 }
0079 
0080 void RFCCodecsTest::testDecodeRFC2047String_data()
0081 {
0082     QTest::addColumn<QByteArray>("raw");
0083     QTest::addColumn<QString>("decoded");
0084 
0085     QTest::newRow("katuska-suject")
0086         << QByteArray("=?UTF-8?Q?moc=20pros=C3=ADm,=20mohl=20by=20ses=20na=20to=20kouk?= =?UTF-8?Q?nout=3F=20cht=C4=9Bla=20bych=20to=20m=C3=ADt=20spr=C3=A1vn?= =?UTF-8?Q?=C4=9B:,)?=")
0087         << QStringLiteral("moc prosím, mohl by ses na to kouknout? chtěla bych to mít správně:,)");
0088 
0089     QTest::newRow("jirka-prives")
0090         << QByteArray("=?UTF-8?Q?P=C5=AFj=C4=8Den=C3=AD=20p=C5=99=C3=ADv=C4=9Bsu=20na=20lod?=\r\n"
0091                       "=?UTF-8?Q?=C4=9B?=")
0092         << QStringLiteral("Půjčení přívěsu na lodě");
0093 
0094     QTest::newRow("second-word-encoded")
0095         << QByteArray("Domen =?UTF-8?Q?Ko=C5=BEar?=")
0096         << QStringLiteral("Domen Kožar");
0097 
0098     QTest::newRow("B-iso-1-jkt")
0099         << QByteArray("=?ISO-8859-1?B?SmFuIEt1bmRy4XQ=?=")
0100         << QStringLiteral("Jan Kundrát");
0101 
0102     QTest::newRow("Q-iso-2-jkt")
0103         << QByteArray("=?ISO-8859-2?Q?Jan_Kundr=E1t?=")
0104         << QStringLiteral("Jan Kundrát");
0105 
0106     QTest::newRow("Q-iso-3-with-lang")
0107         << QByteArray("=?ISO-8859-2*CS?Q?Jan_Kundr=E1t?=")
0108         << QStringLiteral("Jan Kundrát");
0109 
0110     QTest::newRow("buggy-no-space-between-encoded-words")
0111         << QByteArray("=?ISO-8859-2?Q?Jan_Kundr=E1t?=XX=?ISO-8859-2?Q?Jan_Kundr=E1t?=")
0112         << QStringLiteral("Jan KundrátXXJan Kundrát");
0113 
0114     QTest::newRow("B-utf8-vodakove")
0115         << QByteArray("=?UTF-8?B?W3ZvZF0gUmU6IGthemltaXIgdnlyYXplbiB6ZSB6YXNpbGFuaSBza3VwaW55?= "
0116                       "=?UTF-8?B?IChqZXN0bGkgbmUsIHRhayB0byBuZWN0aSBrYXppbWlyZSBhIHByaXpuZWog?= "
0117                       "=?UTF-8?B?c2UgOm8p?=")
0118         << QStringLiteral("[vod] Re: kazimir vyrazen ze zasilani skupiny (jestli ne, tak to necti kazimire a priznej se :o)");
0119 
0120     QTest::newRow("Q-iso-2-ceskosaske")
0121         << QByteArray("=?ISO-8859-2?Q?=C8eskosask=E9_=A9v=FDcarsko=3A_podzimn=ED_?= "
0122                       "=?ISO-8859-2?Q?nostalgie?=")
0123         << QStringLiteral("Českosaské Švýcarsko: podzimní nostalgie");
0124 
0125     QTest::newRow("B-utf8-empty")
0126         // careful to prevent the compiler from interpreting this is a trigraph/
0127         << QByteArray("=?UTF-8?B?" "?=")
0128         << QStringLiteral("");
0129 
0130     // This is in violation from RFC2047, but some mailers do produce this
0131     QTest::newRow("Q-utf8-multiword-upc")
0132         << QByteArray("=?utf-8?q?Studie pro podnikov=C3=A9 z=C3=A1kazn=C3=ADky spole=C4=8Dnosti UPC Business?=")
0133         << QStringLiteral("Studie pro podnikové zákazníky společnosti UPC Business");
0134 
0135     // Again, this violates RFC2047
0136     QTest::newRow("Q-utf8-multiword-csa")
0137         << QByteArray("=?utf-8?Q?HOLIDAYS Czech Airlines?=")
0138         << QStringLiteral("HOLIDAYS Czech Airlines");
0139 
0140     // No spaces around the encoded-word
0141     // Vaguely inspired by http://notmuchmail.org/pipermail/notmuch/2013/015594.html, except that this check for both
0142     // leading and trailing space. Looks like GMime is said to support both of these as well.
0143     QTest::newRow("no-space-around-encoded-words")
0144         << QByteArray("From=?UTF-8?Q?Thomas=20L=C3=BCbking=20?=<thomas.luebking@gmail.com>")
0145         << QStringLiteral("FromThomas Lübking <thomas.luebking@gmail.com>");
0146 
0147     QTest::newRow("unescaped")
0148         << QByteArray("blesmrt")
0149         << QStringLiteral("blesmrt");
0150 
0151     QTest::newRow("rfc2047-ex-1")
0152         << QByteArray("(=?ISO-8859-1?Q?a?=)")
0153         << QStringLiteral("(a)");
0154 
0155     QTest::newRow("rfc2047-ex-2")
0156         << QByteArray("(=?ISO-8859-1?Q?a?= b)")
0157         << QStringLiteral("(a b)");
0158 
0159     QTest::newRow("rfc2047-ex-3")
0160         << QByteArray("(=?ISO-8859-1?Q?a?=  =?ISO-8859-1?Q?b?=)")
0161         << QStringLiteral("(ab)");
0162 
0163     QTest::newRow("rfc2047-ex-4")
0164         << QByteArray("(=?ISO-8859-1?Q?a?= \n \t =?ISO-8859-1?Q?b?=)")
0165         << QStringLiteral("(ab)");
0166 
0167     QTest::newRow("rfc2047-ex-5")
0168         << QByteArray("(=?ISO-8859-1?Q?a_b?=)")
0169         << QStringLiteral("(a b)");
0170 
0171     QTest::newRow("rfc2047-ex-6")
0172         << QByteArray("(=?ISO-8859-1?Q?a?= =?ISO-8859-2?Q?_b?=)")
0173         << QStringLiteral("(a b)");
0174 
0175     QTest::newRow("ascii")
0176         << QByteArray("foo bar baz  blah ble")
0177         << QStringLiteral("foo bar baz  blah ble");
0178 
0179     QTest::newRow("tb-ascii-then-unicode")
0180         << QByteArray("[foo] johoho tohlencto je ale pekne =?UTF-8?B?YmzEmyBzbXJ0IHRyb2o=?=\n"
0181                       " =?UTF-8?B?aXRhIHMgbWF0b3ZvdSBvbWFja291?=")
0182         << QStringLiteral("[foo] johoho tohlencto je ale pekne blě smrt trojita s matovou omackou");
0183 
0184     QTest::newRow("ascii-then-unicode-then-ascii")
0185         << QByteArray("[foo] johoho tohlencto je ale pekne =?UTF-8?B?YmzEmyBzbXJ0IHRyb2o=?=\n"
0186                       " =?UTF-8?B?aXRhIHMgbWF0b3ZvdSBvbWFja291?= blabla")
0187         << QStringLiteral("[foo] johoho tohlencto je ale pekne blě smrt trojita s matovou omackou blabla");
0188 
0189     QTest::newRow("QP-malformed-1")
0190         << QByteArray("=?ISO-8859-2?Q?Jan_Kundr=xxt?=")
0191         << QStringLiteral("Jan Kundr=xxt");
0192 
0193     QTest::newRow("unrecognized-encoding")
0194         << QByteArray("=?trojitapwnedencoding?Q?=c4=9b=c5=a1=c4=8d?=")
0195         << QStringLiteral("ěšč");
0196 }
0197 
0198 void RFCCodecsTest::testEncodeRFC2047StringAsciiPrefix()
0199 {
0200     QFETCH(QString, input);
0201     QFETCH(QByteArray, encoded);
0202 
0203     QCOMPARE(Imap::encodeRFC2047StringWithAsciiPrefix(input), encoded);
0204     QCOMPARE(Imap::decodeRFC2047String(Imap::encodeRFC2047StringWithAsciiPrefix(input)), input);
0205 }
0206 
0207 void RFCCodecsTest::testEncodeRFC2047StringAsciiPrefix_data()
0208 {
0209     QTest::addColumn<QString>("input");
0210     QTest::addColumn<QByteArray>("encoded");
0211 
0212     QTest::newRow("empty") << QString() << QByteArray();
0213     QTest::newRow("simple-ascii") << QStringLiteral("ahoj") << QByteArray("ahoj");
0214     QTest::newRow("simple-ascii-multiword")
0215             << QStringLiteral("ahoj, johoho! at tece rum!")
0216             << QByteArray("ahoj, johoho! at tece rum!");
0217     QTest::newRow("jan-kundrat") << QStringLiteral("Jan Kundrát") << QByteArray("Jan =?iso-8859-1?Q?Kundr=E1t?=");
0218     QTest::newRow("jan-kundrat-e") << QStringLiteral("Jan Kundrát ě") << QByteArray("Jan =?utf-8?B?S3VuZHLDoXQgxJs=?=");
0219     QTest::newRow("czech") << QStringLiteral("ě") << QByteArray("=?utf-8?B?xJs=?=");
0220     QTest::newRow("trojita-subjects") << QStringLiteral("[trojita] foo bar blesmrt") << QByteArray("[trojita] foo bar blesmrt");
0221     QTest::newRow("trojita-subjects-utf") << QStringLiteral("[trojita] foo bar ěščřžýáíé")
0222         << QByteArray("[trojita] foo bar =?utf-8?B?xJvFocSNxZnFvsO9w6HDrcOp?=");
0223 
0224     QTest::newRow("crlf") << QStringLiteral("\r\n")
0225         << QByteArray("=?iso-8859-1?Q?=0D=0A?=");
0226 
0227     QTest::newRow("long-text-with-utf")
0228         // again, be careful with that trigraph
0229         << QString::fromUtf8("[Trojitá - Bug #553] (New) Subject \"=?UTF-8?B?" "?=\" not decoded ěščřžýáíé")
0230         << QByteArray("=?utf-8?B?W1Ryb2ppdMOhIC0gQnVnICM1NTNdIChOZXcpIFN1YmplY3QgIj0/VVRGLTg/Qg==?=\r\n"
0231                       " =?utf-8?B?Pz89IiBub3QgZGVjb2RlZCDEm8WhxI3FmcW+w73DocOtw6k=?=");
0232 
0233     // Make sure that QP-specials are escaped
0234     QTest::newRow("prevent-unescaped-rfc2047") << QStringLiteral("ble =?") << QByteArray("ble =?iso-8859-1?Q?=3D=3F?=");
0235 
0236     QTest::newRow("empty-subject")
0237         << QStringLiteral("Subject: ")
0238         << QByteArray("Subject: ");
0239 
0240     // Is this actually correct?
0241     QTest::newRow("spaces-in-subject")
0242         << QStringLiteral("Subject:  ")
0243         << QByteArray("Subject:  ");
0244 
0245     QTest::newRow("subject-newline")
0246         << QStringLiteral("Subject: \n")
0247         << QByteArray("Subject: =?iso-8859-1?Q?=0A?=");
0248 
0249     QTest::newRow("correct-prefix-wrapping-utf")
0250         << QString::fromUtf8("Prefix: .1.........2.........3.........4.........5.........6.........7 23456 "
0251                              "seventy-six bytes has been used before the 'seventy' word appeared. Let's force UTF-8 now: "
0252                              "ěščřžýáíé")
0253         // Yep, this isn't great, the second "line" shall actually *be* separated by a newline, so that the total length of any
0254         // line is smaller than 78 chars. The thing is, this is not really easy.
0255         << QByteArray("Prefix: .1.........2.........3.........4.........5.........6.........7 23456"
0256                       " =?utf-8?B?c2V2ZW50eS1zaXggYnl0ZXMgaGFzIGJlZW4gdXNlZCBiZWZvcmUgdGhlICdzZQ==?=\r\n"
0257                       " =?utf-8?B?dmVudHknIHdvcmQgYXBwZWFyZWQuIExldCdzIGZvcmNlIFVURi04IG5vdzog?=\r\n"
0258                       " =?utf-8?B?xJvFocSNxZnFvsO9w6HDrcOp?=");
0259 
0260     QTest::newRow("correct-prefix-wrapping-latin1")
0261         << QString::fromUtf8("Prefix: .1.........2.........3.........4.........5.........6.........7 23456 "
0262                              "seventy-six bytes has been used before the 'seventy' word appeared. Let's force Latin-1 now: á")
0263         // Same issue as with correct-prefix-wrapping-utf
0264         << QByteArray("Prefix: .1.........2.........3.........4.........5.........6.........7 23456"
0265                       " =?iso-8859-1?Q?seventy-six_bytes_has_been_used_before_the_'seventy'_word_?=\r\n"
0266                       " =?iso-8859-1?Q?appeared._Let's_force_Latin-1_now:_=E1?=");
0267 
0268 }
0269 
0270 /** @short Check that the "phrase" production of RFC2047 is special wrt. e.g. quoting some special characters */
0271 void RFCCodecsTest::testEncodeRFC2047Phrase()
0272 {
0273     QFETCH(QString, text);
0274     QFETCH(QByteArray, encoded);
0275     // wrapped in QString to make sure the test renders the output
0276     QCOMPARE(QString::fromUtf8(Imap::encodeRFC2047Phrase(text)), QString::fromUtf8(encoded));
0277 
0278     // Check that the data survive the roundtrip.
0279     if (!encoded.startsWith('"')) {
0280         // This is a special case; the encodeRFC2047Phrase is magic because it auto-adds quotes if needed, while
0281         // the corresponding decoder assumes that the RFC5322-style quoting has been already undone.
0282         QCOMPARE(Imap::decodeRFC2047String(Imap::encodeRFC2047Phrase(text)), text);
0283     }
0284 }
0285 
0286 void RFCCodecsTest::testEncodeRFC2047Phrase_data()
0287 {
0288     QTest::addColumn<QString>("text");
0289     QTest::addColumn<QByteArray>("encoded");
0290 
0291     QTest::newRow("dummy text") << QStringLiteral("foo bar") << QByteArray("foo bar");
0292     QTest::newRow("latin1") << QStringLiteral("Jan Kundrát") << QByteArray("=?iso-8859-1?Q?Jan_Kundr=E1t?=");
0293     QTest::newRow("utf-8") << QStringLiteral("Ελληνικά") << QByteArray("=?utf-8?B?zpXOu867zrfOvc65zrrOrA==?=");
0294 
0295     QTest::newRow("ascii-parentheses") << QStringLiteral("Foo Bar (Test Thing)") << QByteArray("\"Foo Bar (Test Thing)\"");
0296     QTest::newRow("latin1-parentheses") << QStringLiteral("Jan Kundrát (Test Thing)") << QByteArray("=?iso-8859-1?Q?Jan_Kundr=E1t_=28Test_Thing=29?=");
0297     QTest::newRow("utf8-parentheses") << QStringLiteral("Ελληνικά (Test Thing)") << QByteArray("=?utf-8?B?zpXOu867zrfOvc65zrrOrCAoVGVzdCBUaGluZyk=?=");
0298 }
0299 
0300 void RFCCodecsTest::testRfc2231Decoding()
0301 {
0302     QFETCH(MapByteArrayByteArray, params);
0303     QFETCH(QByteArray, key);
0304     QFETCH(QString, expected);
0305 
0306     QCOMPARE(Imap::extractRfc2231Param(params, key), expected);
0307 }
0308 
0309 void RFCCodecsTest::testRfc2231Decoding_data()
0310 {
0311     QTest::addColumn<MapByteArrayByteArray>("params");
0312     QTest::addColumn<QByteArray>("key");
0313     QTest::addColumn<QString>("expected");
0314 
0315     MapByteArrayByteArray map;
0316     // just continuation
0317     map["URL*0"] = "ftp://";
0318     map["URL*1"] = "cs.utk.edu/pub/moore/bulk-mailer/bulk-mailer.tar";
0319     // nothing fancy
0320     map["completeURL"] = "ftp://cs.utk.edu/pub/moore/bulk-mailer/bulk-mailer.tar";
0321     // just the lang/encoding
0322     map["completeTitle*"] = "us-ascii'en-us'This%20is%20%2A%2A%2Afun%2A%2A%2A";
0323     // combined continuation and lang/encoding
0324     map["title*0*"] = "us-ascii'en'This%20is%20even%20more%20";
0325     map["title*1*"] = "%2A%2A%2Afun%2A%2A%2A%20";
0326     map["title*2"] = "isn't it!";
0327     // similar to the above, but all values end with a star
0328     map["title2*0*"] = "us-ascii'en'This%20is%20even%20more%20";
0329     map["title2*1*"] = "%2A%2A%2Afun%2A%2A%2A%20";
0330     map["title2*2*"] = "isn't it!";
0331     // the middle one is missing a star
0332     map["title3*0*"] = "us-ascii'en'This%20is%20even%20more%20";
0333     map["title3*1"] = "%2A%2A%2Afun%2A%2A%2A%20";
0334     map["title3*2*"] = "isn't it!";
0335     // some utf-8 bits
0336     map["raw-utf8"] = "\xc4\x9b\xc5\xa1\xc4\x8d";
0337     map["utf8-2047"] = "=?utf8?Q?=c4=9b=c5=a1=c4=8d?=";
0338     map["utf8-wo-lang*"] = "utf8''%c4%9b%c5%a1%c4%8d";
0339     map["utf8-wo-lang-wo-enc*"] = "''%c4%9b%c5%a1%c4%8d";
0340     map["utf8-en*"] = "utf8'en'%c4%9b%c5%a1%c4%8d";
0341     map["utf8-wo-enc-lang*"] = "'en'%c4%9b%c5%a1%c4%8d";
0342 
0343     QTest::newRow("notfound") << map << QByteArray("notfound") << QString();
0344     QTest::newRow("boring") << map << QByteArray("completeURL") << QStringLiteral("ftp://cs.utk.edu/pub/moore/bulk-mailer/bulk-mailer.tar");
0345     QTest::newRow("continuation") << map << QByteArray("URL") << QStringLiteral("ftp://cs.utk.edu/pub/moore/bulk-mailer/bulk-mailer.tar");
0346     QTest::newRow("lang") << map << QByteArray("completeTitle") << QStringLiteral("This is ***fun***");
0347     QTest::newRow("continuation-lang-wo-stars") << map << QByteArray("title") << QStringLiteral("This is even more ***fun*** isn't it!");
0348     QTest::newRow("continuation-lang-all-stars") << map << QByteArray("title2") << QStringLiteral("This is even more ***fun*** isn't it!");
0349     QTest::newRow("continuation-lang-wo-star-in-the-middle") << map << QByteArray("title3") << QStringLiteral("This is even more ***fun*** isn't it!");
0350     QTest::newRow("raw-utf8") << map << QByteArray("raw-utf8") << QStringLiteral("ěšč");
0351     QTest::newRow("utf8-2047") << map << QByteArray("utf8-2047") << QStringLiteral("ěšč");
0352     QTest::newRow("utf8-wo-lang") << map << QByteArray("utf8-wo-lang") << QStringLiteral("ěšč");
0353     QTest::newRow("utf8-wo-lang-wo-enc") << map << QByteArray("utf8-wo-lang-wo-enc") << QStringLiteral("ěšč");
0354     QTest::newRow("utf8-en") << map << QByteArray("utf8-en") << QStringLiteral("ěšč");
0355     QTest::newRow("utf8-wo-enc-lang") << map << QByteArray("utf8-wo-enc-lang") << QStringLiteral("ěšč");
0356 }
0357 
0358 void RFCCodecsTest::testRfc2231Encoding()
0359 {
0360     QFETCH(QString, unicode);
0361     QFETCH(QByteArray, serialized);
0362 
0363     QCOMPARE(QString::fromUtf8(Imap::encodeRfc2231Parameter("x", unicode)), QString::fromUtf8(serialized));
0364 }
0365 
0366 void RFCCodecsTest::testRfc2231Encoding_data()
0367 {
0368     QTest::addColumn<QString>("unicode");
0369     QTest::addColumn<QByteArray>("serialized");
0370 
0371     QTest::newRow("empty") << QString() << QByteArray("x=\"\"");
0372     QTest::newRow("ascii") << QStringLiteral("ahoj") << QByteArray("x=ahoj");
0373     QTest::newRow("filename") << QStringLiteral("AhojZ-jak-se_masz-039.txt") << QByteArray("x=AhojZ-jak-se_masz-039.txt");
0374     QTest::newRow("utf-8") << QStringLiteral("ěšč") << QByteArray("x*=utf-8''%C4%9B%C5%A1%C4%8D");
0375     QTest::newRow("question-mark") << QStringLiteral("?") << QByteArray("x*=utf-8''%3F");
0376 }
0377 
0378 QTEST_GUILESS_MAIN( RFCCodecsTest )