File indexing completed on 2025-02-09 04:20:45
0001 /* 0002 SPDX-FileCopyrightText: 2006 Volker Krause <vkrause@kde.org> 0003 0004 SPDX-License-Identifier: LGPL-2.0-only 0005 */ 0006 0007 #include <QTest> 0008 0009 #include "rfc2047test.h" 0010 0011 #include "../src/kcodecs.h" 0012 0013 using namespace KCodecs; 0014 0015 QTEST_MAIN(RFC2047Test) 0016 0017 void RFC2047Test::testRFC2047decode_data() 0018 { 0019 QTest::addColumn<QByteArray>("input"); 0020 QTest::addColumn<QByteArray>("expectedCharset"); 0021 QTest::addColumn<QByteArray>("defaultCharset"); 0022 QTest::addColumn<bool>("forceCharset"); 0023 QTest::addColumn<QString>("expectedResult"); 0024 0025 /* clang-format off */ 0026 QTest::newRow("empty") << QByteArray() 0027 << QByteArray() << QByteArray("utf-8") << false 0028 << QString(); 0029 QTest::newRow("identity") << QByteArray("bla") 0030 << QByteArray() << QByteArray("utf-8") << false 0031 << QString::fromLatin1("bla"); 0032 0033 QTest::newRow("utf-8") << QByteArray("=?utf-8?q?Ingo=20Kl=C3=B6cker?= <kloecker@kde.org>") 0034 << QByteArray("UTF-8") << QByteArray("utf-8") << false 0035 << QString::fromUtf8("Ingo Klöcker <kloecker@kde.org>"); 0036 QTest::newRow("utf-8") << QByteArray("=?utf-8?q?Ingo=20Kl=C3=B6cker?= <kloecker@kde.org>") 0037 << QByteArray("UTF-8") << QByteArray("iso8859-1") << false 0038 << QString::fromUtf8("Ingo Klöcker <kloecker@kde.org>"); 0039 QTest::newRow("utf-8") << QByteArray("=?utf-8?q?Ingo=20Kl=C3=B6cker?=") 0040 << QByteArray("UTF-8") << QByteArray("utf-8") << false 0041 << QString::fromUtf8("Ingo Klöcker"); 0042 0043 0044 QTest::newRow("whitespaces") << QByteArray("=?utf-8?q?Ingo=20Kl=C3=B6cker?= =?utf-8?q?Ingo=20Kl=C3=B6cker?=") 0045 << QByteArray("UTF-8") << QByteArray("utf-8") << false 0046 << QString::fromUtf8("Ingo KlöckerIngo Klöcker"); 0047 QTest::newRow("whitespaces") << QByteArray("=?utf-8?q?Ingo=20Kl=C3=B6cker?= foo =?utf-8?q?Ingo=20Kl=C3=B6cker?=") 0048 << QByteArray("UTF-8") << QByteArray("utf-8") << false 0049 << QString::fromUtf8("Ingo Klöcker foo Ingo Klöcker"); 0050 0051 QTest::newRow("iso-8859-1") << QByteArray("=?ISO-8859-1?Q?Andr=E9s_Ot=F3n?=") 0052 << QByteArray("ISO-8859-1") << QByteArray("utf-8") << false 0053 << QString::fromUtf8("Andrés Otón"); 0054 QTest::newRow("iso-8859-2") << QByteArray("=?iso-8859-2?q?Rafa=B3_Rzepecki?=") 0055 << QByteArray("ISO-8859-2") << QByteArray("utf-8") << false 0056 << QString::fromUtf8("Rafał Rzepecki"); 0057 QTest::newRow("iso-8859-9") << QByteArray("=?iso-8859-9?Q?S=2E=C7a=F0lar?= Onur") 0058 << QByteArray("ISO-8859-9") << QByteArray("utf-8") << false 0059 << QString::fromUtf8("S.Çağlar Onur"); 0060 QTest::newRow("iso-8859-15") << QByteArray("Rafael =?iso-8859-15?q?Rodr=EDguez?=") 0061 << QByteArray("ISO-8859-15") << QByteArray("utf-8") << false 0062 << QString::fromUtf8("Rafael Rodríguez"); 0063 0064 QTest::newRow("wrong charset") << QByteArray("=?iso-8859-1?q?Ingo=20Kl=C3=B6cker?=") 0065 << QByteArray("UTF-8") << QByteArray("utf-8") << true 0066 << QString::fromUtf8("Ingo Klöcker"); 0067 0068 // language parameter according to RFC 2231, section 5 0069 QTest::newRow("RFC-2331") << QByteArray("From: =?US-ASCII*EN?Q?Keith_Moore?= <moore@cs.utk.edu>") 0070 << QByteArray("US-ASCII") << QByteArray("utf-8") << false 0071 << QString::fromUtf8("From: Keith Moore <moore@cs.utk.edu>"); 0072 0073 QTest::newRow("broken QP") << QByteArray("Subject: =?iso-8859-1?Q?Belangrijk=3a=20Verhuizing=20FTP=20server?=") 0074 << QByteArray("ISO-8859-1") << QByteArray("utf-8") << false 0075 << QString::fromUtf8("Subject: Belangrijk: Verhuizing FTP server"); 0076 0077 // mixed charsets, based on bug 125542 0078 QTest::newRow("mixed charsets") << QByteArray("Subject: =?utf-8?q?Ingo=20Kl=C3=B6cker?= unencoded words =?iso-8859-9?Q?S=2E=C7a=F0lar?=") 0079 << QByteArray("UTF-8") << QByteArray("utf-8") << false 0080 << QString::fromUtf8("Subject: Ingo Klöcker unencoded words S.Çağlar"); 0081 QTest::newRow("mixed charsets-125542") << QByteArray("Subject: =?koi8-r?b?5MXMz9fJINrB?= HP Pavillion =?iso-8859-5?b?KNzV3N7g2PjQIN/e4dXR3d4p?=") 0082 << QByteArray("UTF-8") << QByteArray("us-ascii") << false 0083 << QString::fromUtf8("Subject: Делови за HP Pavillion (меморија посебно)"); 0084 0085 // illegal characters which are already encoded in the given encoding but are not ASCII (bug 206417) 0086 QTest::newRow("illegal characters") << QByteArray("Subject: =?utf-8?Q?пиѿилл,=20=D0=B4=D0=BE=D0=B1=D1=80=D1=8B=D0=B9=20=D0=B4=D0=B5=D0=BD=D1=8C?=") 0087 << QByteArray("UTF-8") << QByteArray("utf-8") << false 0088 << QString::fromUtf8("Subject: пиѿилл, добрый день"); 0089 const auto iso88591Encoded = QByteArray::fromHex("D6C4DCF6E4FC"); // "ÖÄÜöäü" in ISO-8859-1 encoding - this is not valid UTF-8 though and thus rejected by MSVC in string literals 0090 QTest::newRow("illegal characters") << QByteArray("Subject: =?iso-8859-1?Q?") + iso88591Encoded + "?=" 0091 << QByteArray("ISO-8859-1") << QByteArray("utf-8") << false 0092 << QString::fromLatin1("Subject: " + iso88591Encoded); 0093 0094 0095 QTest::newRow("small data") << QByteArray("=?iso-8859-1?Q?c?=") 0096 << QByteArray("ISO-8859-1") << QByteArray("utf-8") << false 0097 << QString::fromUtf8("c"); 0098 /* clang-format on */ 0099 } 0100 0101 void RFC2047Test::testRFC2047decode() 0102 { 0103 QFETCH(QByteArray, input); 0104 QFETCH(QByteArray, expectedCharset); 0105 QFETCH(QByteArray, defaultCharset); 0106 QFETCH(bool, forceCharset); 0107 QFETCH(QString, expectedResult); 0108 0109 QByteArray detectedCharset; 0110 0111 const KCodecs::CharsetOption options = forceCharset ? KCodecs::ForceDefaultCharset : KCodecs::NoOption; 0112 const QString result = KCodecs::decodeRFC2047String(input, &detectedCharset, defaultCharset, options); 0113 0114 QCOMPARE(result, expectedResult); 0115 QCOMPARE(detectedCharset, expectedCharset); 0116 } 0117 0118 void RFC2047Test::testInvalidDecode_data() 0119 { 0120 QTest::addColumn<QByteArray>("input"); 0121 QTest::addColumn<QString>("expectedResult"); 0122 0123 QTest::newRow("") << QByteArray("=") << QString::fromUtf8("="); 0124 QTest::newRow("") << QByteArray("=?") << QString::fromUtf8("=?"); 0125 QTest::newRow("") << QByteArray("=?a?b?=") << QString::fromUtf8("=?a?b?="); 0126 QTest::newRow("") << QByteArray("=?a?b?c?") << QString::fromUtf8("=?a?b?c?"); 0127 QTest::newRow("") << QByteArray("=?a??c?=") << QString::fromUtf8("=?a??c?="); 0128 } 0129 0130 void RFC2047Test::testInvalidDecode() 0131 { 0132 QFETCH(QByteArray, input); 0133 QFETCH(QString, expectedResult); 0134 0135 QByteArray encCharset; 0136 0137 const QString result = KCodecs::decodeRFC2047String(input, &encCharset); 0138 QCOMPARE(result, expectedResult); 0139 } 0140 0141 void RFC2047Test::testRFC2047encode_data() 0142 { 0143 QTest::addColumn<QString>("input"); 0144 QTest::addColumn<QByteArray>("encoding"); 0145 QTest::addColumn<QByteArray>("expectedResult"); 0146 0147 /* clang-format off */ 0148 QTest::newRow("empty") << QString() 0149 << QByteArray("utf-8") 0150 << QByteArray(); 0151 QTest::newRow("identity") << QString::fromUtf8("bla") 0152 << QByteArray("utf-8") 0153 << QByteArray("bla"); 0154 QTest::newRow("QP") << QString::fromUtf8("Ingo Klöcker <kloecker@kde.org>") 0155 << QByteArray("utf-8") 0156 << QByteArray("=?UTF-8?q?Ingo=20Kl=C3=B6cker?= <kloecker@kde.org>"); 0157 0158 QTest::newRow("utf-8 fallback") << QString::fromUtf8("æſðđŋħł") 0159 << QByteArray("latin1") 0160 << QByteArray("=?UTF-8?B?w6bFv8OwxJHFi8SnxYI=?="); 0161 /* clang-format on */ 0162 } 0163 0164 void RFC2047Test::testRFC2047encode() 0165 { 0166 QFETCH(QString, input); 0167 QFETCH(QByteArray, encoding); 0168 QFETCH(QByteArray, expectedResult); 0169 0170 const QByteArray result = KCodecs::encodeRFC2047String(input, encoding); 0171 0172 // expected value is probably wrong, libkmime will choose 'B' instead of 'Q' encoding 0173 QEXPECT_FAIL("QP", "KCodecs will choose 'B' instead of 'Q' encoding", Continue); 0174 QCOMPARE(result, expectedResult); 0175 } 0176 0177 #include "moc_rfc2047test.cpp"