File indexing completed on 2024-04-14 14:18:25

0001 /*
0002     SPDX-FileCopyrightText: 2006 Volker Krause <vkrause@kde.org>
0003 
0004     SPDX-License-Identifier: LGPL-2.0-only
0005 */
0006 
0007 #include <QTest>
0008 
0009 #include "rfc2047test.h"
0010 
0011 #include "../src/kcodecs.h"
0012 
0013 using namespace KCodecs;
0014 
0015 QTEST_MAIN(RFC2047Test)
0016 
0017 void RFC2047Test::testRFC2047decode_data()
0018 {
0019     QTest::addColumn<QByteArray>("input");
0020     QTest::addColumn<QByteArray>("expectedCharset");
0021     QTest::addColumn<QByteArray>("defaultCharset");
0022     QTest::addColumn<bool>("forceCharset");
0023     QTest::addColumn<QString>("expectedResult");
0024 
0025     /* clang-format off */
0026     QTest::newRow("empty") << QByteArray()
0027                            << QByteArray() << QByteArray("utf-8") << false
0028                            << QString();
0029     QTest::newRow("identity") << QByteArray("bla")
0030                               << QByteArray() << QByteArray("utf-8") << false
0031                               << QString::fromLatin1("bla");
0032 
0033     QTest::newRow("utf-8") << QByteArray("=?utf-8?q?Ingo=20Kl=C3=B6cker?= <kloecker@kde.org>")
0034                            << QByteArray("UTF-8") << QByteArray("utf-8") << false
0035                            << QString::fromUtf8("Ingo Klöcker <kloecker@kde.org>");
0036     QTest::newRow("utf-8") << QByteArray("=?utf-8?q?Ingo=20Kl=C3=B6cker?= <kloecker@kde.org>")
0037                            << QByteArray("UTF-8") << QByteArray("iso8859-1") << false
0038                            << QString::fromUtf8("Ingo Klöcker <kloecker@kde.org>");
0039     QTest::newRow("utf-8") << QByteArray("=?utf-8?q?Ingo=20Kl=C3=B6cker?=")
0040                            << QByteArray("UTF-8") << QByteArray("utf-8") << false
0041                            << QString::fromUtf8("Ingo Klöcker");
0042 
0043 
0044     QTest::newRow("whitespaces") << QByteArray("=?utf-8?q?Ingo=20Kl=C3=B6cker?=       =?utf-8?q?Ingo=20Kl=C3=B6cker?=")
0045                                  << QByteArray("UTF-8") << QByteArray("utf-8") << false
0046                                  << QString::fromUtf8("Ingo KlöckerIngo Klöcker");
0047     QTest::newRow("whitespaces") << QByteArray("=?utf-8?q?Ingo=20Kl=C3=B6cker?=  foo  =?utf-8?q?Ingo=20Kl=C3=B6cker?=")
0048                                  << QByteArray("UTF-8") << QByteArray("utf-8") << false
0049                                  << QString::fromUtf8("Ingo Klöcker  foo  Ingo Klöcker");
0050 
0051     QTest::newRow("iso-8859-1") << QByteArray("=?ISO-8859-1?Q?Andr=E9s_Ot=F3n?=")
0052                                 << QByteArray("ISO-8859-1") << QByteArray("utf-8") << false
0053                                 << QString::fromUtf8("Andrés Otón");
0054     QTest::newRow("iso-8859-2") << QByteArray("=?iso-8859-2?q?Rafa=B3_Rzepecki?=")
0055                                 << QByteArray("ISO-8859-2") << QByteArray("utf-8") << false
0056                                 << QString::fromUtf8("Rafał Rzepecki");
0057     QTest::newRow("iso-8859-9") << QByteArray("=?iso-8859-9?Q?S=2E=C7a=F0lar?= Onur")
0058                                 << QByteArray("ISO-8859-9") << QByteArray("utf-8") << false
0059                                 << QString::fromUtf8("S.Çağlar Onur");
0060     QTest::newRow("iso-8859-15") << QByteArray("Rafael =?iso-8859-15?q?Rodr=EDguez?=")
0061                                  << QByteArray("ISO-8859-15") << QByteArray("utf-8") << false
0062                                  << QString::fromUtf8("Rafael Rodríguez");
0063 
0064     QTest::newRow("wrong charset") << QByteArray("=?iso-8859-1?q?Ingo=20Kl=C3=B6cker?=")
0065                                    << QByteArray("UTF-8") << QByteArray("utf-8") << true
0066                                    << QString::fromUtf8("Ingo Klöcker");
0067 
0068     // language parameter according to RFC 2231, section 5
0069     QTest::newRow("RFC-2331") << QByteArray("From: =?US-ASCII*EN?Q?Keith_Moore?= <moore@cs.utk.edu>")
0070                               << QByteArray("US-ASCII") << QByteArray("utf-8") << false
0071                               << QString::fromUtf8("From: Keith Moore <moore@cs.utk.edu>");
0072 
0073     QTest::newRow("broken QP") << QByteArray("Subject: =?iso-8859-1?Q?Belangrijk=3a=20Verhuizing=20FTP=20server?=")
0074                                << QByteArray("ISO-8859-1") << QByteArray("utf-8") << false
0075                                << QString::fromUtf8("Subject: Belangrijk: Verhuizing FTP server");
0076 
0077     // mixed charsets, based on bug 125542
0078     QTest::newRow("mixed charsets") << QByteArray("Subject: =?utf-8?q?Ingo=20Kl=C3=B6cker?= unencoded words =?iso-8859-9?Q?S=2E=C7a=F0lar?=")
0079                                     << QByteArray("UTF-8") << QByteArray("utf-8") << false
0080                                     << QString::fromUtf8("Subject: Ingo Klöcker unencoded words S.Çağlar");
0081     QTest::newRow("mixed charsets-125542") << QByteArray("Subject: =?koi8-r?b?5MXMz9fJINrB?= HP Pavillion =?iso-8859-5?b?KNzV3N7g2PjQIN/e4dXR3d4p?=")
0082                                     << QByteArray("UTF-8") << QByteArray("us-ascii") << false
0083                                     << QString::fromUtf8("Subject: Делови за HP Pavillion (меморија посебно)");
0084 
0085     // illegal characters which are already encoded in the given encoding but are not ASCII (bug 206417)
0086     QTest::newRow("illegal characters") << QByteArray("Subject: =?utf-8?Q?пиѿилл,=20=D0=B4=D0=BE=D0=B1=D1=80=D1=8B=D0=B9=20=D0=B4=D0=B5=D0=BD=D1=8C?=")
0087                                         << QByteArray("UTF-8") << QByteArray("utf-8") << false
0088                                         << QString::fromUtf8("Subject: пиѿилл, добрый день");
0089     const auto iso88591Encoded = QByteArray::fromHex("D6C4DCF6E4FC"); // "ÖÄÜöäü" in ISO-8859-1 encoding - this is not valid UTF-8 though and thus rejected by MSVC in string literals
0090     QTest::newRow("illegal characters") << QByteArray("Subject: =?iso-8859-1?Q?") + iso88591Encoded + "?="
0091                                         << QByteArray("ISO-8859-1") << QByteArray("utf-8") << false
0092                                         << QString::fromLatin1("Subject: " + iso88591Encoded);
0093 
0094 
0095     QTest::newRow("small data") << QByteArray("=?iso-8859-1?Q?c?=")
0096                                 << QByteArray("ISO-8859-1") << QByteArray("utf-8") << false
0097                                 << QString::fromUtf8("c");
0098     /* clang-format on */
0099 }
0100 
0101 void RFC2047Test::testRFC2047decode()
0102 {
0103     QFETCH(QByteArray, input);
0104     QFETCH(QByteArray, expectedCharset);
0105     QFETCH(QByteArray, defaultCharset);
0106     QFETCH(bool, forceCharset);
0107     QFETCH(QString, expectedResult);
0108 
0109     QByteArray detectedCharset;
0110 
0111     const KCodecs::CharsetOption options = forceCharset ? KCodecs::ForceDefaultCharset : KCodecs::NoOption;
0112     const QString result = KCodecs::decodeRFC2047String(input, &detectedCharset, defaultCharset, options);
0113 
0114     QCOMPARE(result, expectedResult);
0115     QCOMPARE(detectedCharset, expectedCharset);
0116 }
0117 
0118 void RFC2047Test::testInvalidDecode_data()
0119 {
0120     QTest::addColumn<QByteArray>("input");
0121     QTest::addColumn<QString>("expectedResult");
0122 
0123     QTest::newRow("") << QByteArray("=") << QString::fromUtf8("=");
0124     QTest::newRow("") << QByteArray("=?") << QString::fromUtf8("=?");
0125     QTest::newRow("") << QByteArray("=?a?b?=") << QString::fromUtf8("=?a?b?=");
0126     QTest::newRow("") << QByteArray("=?a?b?c?") << QString::fromUtf8("=?a?b?c?");
0127     QTest::newRow("") << QByteArray("=?a??c?=") << QString::fromUtf8("=?a??c?=");
0128 }
0129 
0130 void RFC2047Test::testInvalidDecode()
0131 {
0132     QFETCH(QByteArray, input);
0133     QFETCH(QString, expectedResult);
0134 
0135     QByteArray encCharset;
0136 
0137     const QString result = KCodecs::decodeRFC2047String(input, &encCharset);
0138     QCOMPARE(result, expectedResult);
0139 }
0140 
0141 void RFC2047Test::testRFC2047encode_data()
0142 {
0143     QTest::addColumn<QString>("input");
0144     QTest::addColumn<QByteArray>("encoding");
0145     QTest::addColumn<QByteArray>("expectedResult");
0146 
0147     /* clang-format off */
0148     QTest::newRow("empty") << QString()
0149                            << QByteArray("utf-8")
0150                            << QByteArray();
0151     QTest::newRow("identity") << QString::fromUtf8("bla")
0152                               << QByteArray("utf-8")
0153                               << QByteArray("bla");
0154     QTest::newRow("QP") << QString::fromUtf8("Ingo Klöcker <kloecker@kde.org>")
0155                         << QByteArray("utf-8")
0156                         << QByteArray("=?UTF-8?q?Ingo=20Kl=C3=B6cker?= <kloecker@kde.org>");
0157 
0158     QTest::newRow("utf-8 fallback") << QString::fromUtf8("æſðđŋħł")
0159                                     << QByteArray("latin1")
0160                                     << QByteArray("=?UTF-8?B?w6bFv8OwxJHFi8SnxYI=?=");
0161     /* clang-format on */
0162 }
0163 
0164 void RFC2047Test::testRFC2047encode()
0165 {
0166     QFETCH(QString, input);
0167     QFETCH(QByteArray, encoding);
0168     QFETCH(QByteArray, expectedResult);
0169 
0170     const QByteArray result = KCodecs::encodeRFC2047String(input, encoding);
0171 
0172     // expected value is probably wrong, libkmime will chose 'B' instead of 'Q' encoding
0173     QEXPECT_FAIL("QP", "KCodecs will choose 'B' instead of 'Q' encoding", Continue);
0174     QCOMPARE(result, expectedResult);
0175 }
0176 
0177 #include "moc_rfc2047test.cpp"