Warning, file /frameworks/kcodecs/autotests/kcharsetstest.cpp was not indexed or was modified since last indexation (in which case cross-reference links may be missing, inaccurate or erroneous).
0001 /* 0002 SPDX-FileCopyrightText: 2011 Romain Perier <bambi@kubuntu.org> 0003 0004 SPDX-License-Identifier: GPL-2.0-or-later 0005 */ 0006 0007 #include "kcharsetstest.h" 0008 0009 #include "kcharsets_p.h" 0010 #include <QDebug> 0011 #include <QString> 0012 #include <QTest> 0013 #include <QTextCodec> 0014 #include <kcharsets.h> 0015 0016 static bool encodingNameHasADescription(const QString &encodingName, const QStringList &descriptions) 0017 { 0018 return std::any_of(descriptions.cbegin(), descriptions.cend(), [&encodingName](const QString &description) { 0019 return description.contains(encodingName); 0020 }); 0021 } 0022 0023 void KCharsetsTest::testSingleton() 0024 { 0025 QVERIFY(KCharsets::charsets() != nullptr); 0026 QCOMPARE(KCharsets::charsets(), KCharsets::charsets()); 0027 } 0028 0029 void KCharsetsTest::testCodecForName_data() 0030 { 0031 QTest::addColumn<QString>("codec"); 0032 QTest::addColumn<QString>("expectedCodecFromKDE"); 0033 QTest::addColumn<QString>("expectedCodecFromQt"); 0034 0035 QTest::newRow("utf8") << "utf8" 0036 << "UTF-8" 0037 << "UTF-8"; 0038 QTest::newRow("utf-8") << "utf-8" 0039 << "UTF-8" 0040 << "UTF-8"; 0041 QTest::newRow("UTF8") << "UTF8" 0042 << "UTF-8" 0043 << "UTF-8"; 0044 QTest::newRow("UTF-8") << "UTF-8" 0045 << "UTF-8" 0046 << "UTF-8"; 0047 0048 QTest::newRow("Big5") << "Big5" 0049 << "Big5" 0050 << "Big5"; 0051 QTest::newRow("Big5-HKSCS") << "Big5-HKSCS" 0052 << "Big5-HKSCS" 0053 << "Big5-HKSCS"; 0054 QTest::newRow("EUC-JP") << "EUC-JP" 0055 << "EUC-JP" 0056 << "EUC-JP"; 0057 QTest::newRow("EUC-KR") << "EUC-KR" 0058 << "EUC-KR" 0059 << "EUC-KR"; 0060 QTest::newRow("CP 949") << "CP 949" 0061 << "CP 949" 0062 << "CP 949"; 0063 QTest::newRow("GB18030") << "GB18030" 0064 << "GB18030" 0065 << "GB18030"; 0066 QTest::newRow("GB2312") << "GB2312" 0067 << "GB2312" 0068 << "GB2312"; 0069 QTest::newRow("GBK") << "GBK" 0070 << "GBK" 0071 << "GBK"; 0072 QTest::newRow("IBM850") << "IBM850" 0073 << "IBM850" 0074 << "IBM850"; 0075 QTest::newRow("IBM866") << "IBM866" 0076 << "IBM866" 0077 << "IBM866"; 0078 QTest::newRow("IBM874") << "IBM874" 0079 << "IBM874" 0080 << "IBM874"; 0081 QTest::newRow("ISO 10646-UCS-2") << "ISO 10646-UCS-2" 0082 << "ISO 10646-UCS-2" 0083 << "ISO 10646-UCS-2"; 0084 QTest::newRow("ISO 8859-1") << "ISO 8859-1" 0085 << "ISO 8859-1" 0086 << "ISO 8859-1"; 0087 QTest::newRow("ISO 8859-11") << "ISO 8859-11" 0088 << "ISO 8859-11" 0089 << "ISO 8859-11"; 0090 QTest::newRow("ISO 8859-13") << "ISO 8859-13" 0091 << "ISO 8859-13" 0092 << "ISO 8859-13"; 0093 QTest::newRow("ISO 8859-14") << "ISO 8859-14" 0094 << "ISO 8859-14" 0095 << "ISO 8859-14"; 0096 QTest::newRow("ISO 8859-15") << "ISO 8859-15" 0097 << "ISO 8859-15" 0098 << "ISO 8859-15"; 0099 QTest::newRow("ISO 8859-16") << "ISO 8859-16" 0100 << "ISO 8859-16" 0101 << "ISO 8859-16"; 0102 QTest::newRow("ISO 8859-2") << "ISO 8859-2" 0103 << "ISO 8859-2" 0104 << "ISO 8859-2"; 0105 QTest::newRow("ISO 8859-3") << "ISO 8859-3" 0106 << "ISO 8859-3" 0107 << "ISO 8859-3"; 0108 QTest::newRow("ISO 8859-4") << "ISO 8859-4" 0109 << "ISO 8859-4" 0110 << "ISO 8859-4"; 0111 QTest::newRow("ISO 8859-5") << "ISO 8859-5" 0112 << "ISO 8859-5" 0113 << "ISO 8859-5"; 0114 QTest::newRow("ISO 8859-6") << "ISO 8859-6" 0115 << "ISO 8859-6" 0116 << "ISO 8859-6"; 0117 QTest::newRow("ISO 8859-7") << "ISO 8859-7" 0118 << "ISO 8859-7" 0119 << "ISO 8859-7"; 0120 QTest::newRow("ISO 8859-8") << "ISO 8859-8" 0121 << "ISO 8859-8" 0122 << "ISO 8859-8"; 0123 QTest::newRow("ISO 8859-8-I") << "ISO 8859-8-I" 0124 << "ISO 8859-8-I" 0125 << "ISO 8859-8-I"; 0126 QTest::newRow("ISO 8859-9") << "ISO 8859-9" 0127 << "ISO 8859-9" 0128 << "ISO 8859-9"; 0129 QTest::newRow("KOI8-R") << "KOI8-R" 0130 << "KOI8-R" 0131 << "KOI8-R"; 0132 QTest::newRow("KOI8-U") << "KOI8-U" 0133 << "KOI8-U" 0134 << "KOI8-U"; 0135 QTest::newRow("TIS620") << "TIS620" 0136 << "TIS620" 0137 << "TIS620"; 0138 QTest::newRow("TSCII") << "TSCII" 0139 << "TSCII" 0140 << "TSCII"; 0141 QTest::newRow("UTF-16") << "UTF-16" 0142 << "UTF-16" 0143 << "UTF-16"; 0144 QTest::newRow("UTF-8") << "UTF-8" 0145 << "UTF-8" 0146 << "UTF-8"; 0147 QTest::newRow("cp 1250") << "cp 1250" 0148 << "cp 1250" 0149 << "cp 1250"; 0150 QTest::newRow("cp 1251") << "cp 1251" 0151 << "cp 1251" 0152 << "cp 1251"; 0153 QTest::newRow("cp 1252") << "cp 1252" 0154 << "cp 1252" 0155 << "cp 1252"; 0156 QTest::newRow("cp 1253") << "cp 1253" 0157 << "cp 1253" 0158 << "cp 1253"; 0159 QTest::newRow("cp 1254") << "cp 1254" 0160 << "cp 1254" 0161 << "cp 1254"; 0162 QTest::newRow("cp 1255") << "cp 1255" 0163 << "cp 1255" 0164 << "cp 1255"; 0165 QTest::newRow("cp 1256") << "cp 1256" 0166 << "cp 1256" 0167 << "cp 1256"; 0168 QTest::newRow("cp 1257") << "cp 1257" 0169 << "cp 1257" 0170 << "cp 1257"; 0171 QTest::newRow("jis7") << "jis7" 0172 << "jis7" 0173 << "jis7"; 0174 QTest::newRow("sjis") << "sjis" 0175 << "sjis" 0176 << "sjis"; 0177 QTest::newRow("ucs2") << "ucs2" 0178 << "ucs2" 0179 << "ucs2"; 0180 QTest::newRow("utf7") << "utf7" 0181 << "utf7" 0182 << "utf7"; 0183 QTest::newRow("windows-1258") << "windows-1258" 0184 << "windows-1258" 0185 << "windows-1258"; 0186 QTest::newRow("winsami2") << "winsami2" 0187 << "winsami2" 0188 << "winsami2"; 0189 QTest::newRow("US-ASCII") << "US-ASCII" 0190 << "US-ASCII" 0191 << "US-ASCII"; 0192 } 0193 0194 void KCharsetsTest::testCodecForName() 0195 { 0196 KCharsets *singleton = KCharsets::charsets(); 0197 0198 QFETCH(QString, codec); 0199 QFETCH(QString, expectedCodecFromKDE); 0200 QFETCH(QString, expectedCodecFromQt); 0201 0202 if (QTextCodec::codecForName(codec.toLocal8Bit()) == nullptr) { 0203 qWarning() << "codec " << codec << "is not supported by QTextCodec !"; 0204 return; 0205 } 0206 0207 QVERIFY(QTextCodec::codecForName(expectedCodecFromKDE.toLocal8Bit()) != nullptr); 0208 QCOMPARE(singleton->d->codecForName(codec)->name(), QTextCodec::codecForName(expectedCodecFromKDE.toLocal8Bit())->name()); 0209 0210 QVERIFY(QTextCodec::codecForName(expectedCodecFromQt.toLocal8Bit()) != nullptr); 0211 QCOMPARE(QTextCodec::codecForName(codec.toLocal8Bit())->name(), QTextCodec::codecForName(expectedCodecFromQt.toLocal8Bit())->name()); 0212 } 0213 0214 void KCharsetsTest::testFromEntity() 0215 { 0216 KCharsets *singleton = KCharsets::charsets(); 0217 0218 QCOMPARE(singleton->fromEntity(QString::fromLatin1("Ӓ")), QChar(1234)); 0219 QCOMPARE(singleton->fromEntity(QString::fromLatin1("ሴ")), QChar(0x1234)); 0220 QCOMPARE(singleton->fromEntity(QString::fromLatin1("lt")), QChar::fromLatin1('<')); 0221 QCOMPARE(singleton->fromEntity(QString::fromLatin1("gt")), QChar::fromLatin1('>')); 0222 QCOMPARE(singleton->fromEntity(QString::fromLatin1("quot")), QChar::fromLatin1('"')); 0223 QCOMPARE(singleton->fromEntity(QString::fromLatin1("amp")), QChar::fromLatin1('&')); 0224 QCOMPARE(singleton->fromEntity(QString::fromLatin1("apos")), QChar::fromLatin1('\'')); 0225 } 0226 0227 void KCharsetsTest::testToEntity() 0228 { 0229 QSKIP("KCharsets::toEntity test not implemented."); 0230 } 0231 0232 void KCharsetsTest::testResolveEntities() 0233 { 0234 KCharsets *singleton = KCharsets::charsets(); 0235 0236 QCOMPARE(singleton->resolveEntities(QString::fromLatin1(""'<Hello &World>'"")), 0237 QString::fromLatin1("\"\'<Hello &World>\'\"")); 0238 } 0239 0240 void KCharsetsTest::testEncodingNames() 0241 { 0242 KCharsets *singleton = KCharsets::charsets(); 0243 0244 QCOMPARE(singleton->availableEncodingNames().count(), singleton->descriptiveEncodingNames().count()); 0245 0246 for (const QString &encodingName : singleton->availableEncodingNames()) { 0247 bool ok = false; 0248 0249 if (encodingName == QLatin1String("ucs2") || encodingName == QLatin1String("ISO 10646-UCS-2")) { 0250 singleton->d->codecForName(QStringLiteral("UTF-16"), ok); 0251 } else if (encodingName == QLatin1String("utf7")) { 0252 continue; 0253 } else { 0254 singleton->d->codecForName(encodingName, ok); 0255 } 0256 // The availability of some of the charsets below depends on whether Qt was built with ICU... 0257 if (!ok) { 0258 if (encodingName == QLatin1String("jis7")) { 0259 QEXPECT_FAIL("", "jis7 is missing in Qt", Continue); 0260 } 0261 if (encodingName == QLatin1String("winsami2")) { 0262 QEXPECT_FAIL("", "winsami2 is missing in Qt", Continue); 0263 } 0264 if (encodingName == QLatin1String("ISO 8859-16")) { // ICU bug? 0265 QEXPECT_FAIL("", "ISO 8859-16 is missing in Qt", Continue); 0266 } 0267 } 0268 0269 if (!ok) { 0270 qDebug() << "Error:" << encodingName << "not found"; 0271 QVERIFY(false); 0272 } 0273 QVERIFY(encodingNameHasADescription(encodingName, singleton->descriptiveEncodingNames())); 0274 QVERIFY(!singleton->descriptionForEncoding(encodingName).isEmpty()); 0275 QCOMPARE(singleton->encodingForName(singleton->descriptionForEncoding(encodingName)), encodingName); 0276 } 0277 } 0278 0279 void KCharsetsTest::testUsAsciiEncoding_data() 0280 { 0281 QTest::addColumn<QString>("codecName"); 0282 0283 QTest::newRow("normal-name") << QStringLiteral("US-ASCII"); 0284 QTest::newRow("alias-name") << QStringLiteral("IBM367"); 0285 } 0286 0287 void KCharsetsTest::testUsAsciiEncoding() 0288 { 0289 QFETCH(QString, codecName); 0290 0291 KCharsets *singleton = KCharsets::charsets(); 0292 0293 bool ok = false; 0294 QTextCodec *codec = singleton->d->codecForName(codecName, ok); 0295 QVERIFY(ok); 0296 0297 // compatible text 0298 const QString successUnicodeText = QStringLiteral("Testname"); 0299 0300 QTextCodec::ConverterState successConverterState; 0301 0302 const QByteArray successEncoded8Bit = codec->fromUnicode(successUnicodeText.constData(), successUnicodeText.length(), &successConverterState); 0303 0304 const QByteArray successExpected8Bit = QByteArrayLiteral("Testname"); 0305 QCOMPARE(successConverterState.invalidChars, 0); 0306 QCOMPARE(successEncoded8Bit, successExpected8Bit); 0307 0308 // incompatible text 0309 const QString failUnicodeText = QStringLiteral("Testnäme"); 0310 0311 QTextCodec::ConverterState failConverterState; 0312 0313 const QByteArray failEncoded8Bit = codec->fromUnicode(failUnicodeText.constData(), failUnicodeText.length(), &failConverterState); 0314 0315 const QByteArray failExpected8Bit = QByteArrayLiteral("Testn?me"); 0316 QCOMPARE(failConverterState.invalidChars, 1); 0317 QCOMPARE(failEncoded8Bit, failExpected8Bit); 0318 } 0319 0320 void KCharsetsTest::testUsAsciiDecoding_data() 0321 { 0322 QTest::addColumn<QString>("codecName"); 0323 0324 QTest::newRow("normal-name") << QStringLiteral("US-ASCII"); 0325 QTest::newRow("alias-name") << QStringLiteral("IBM367"); 0326 } 0327 0328 void KCharsetsTest::testUsAsciiDecoding() 0329 { 0330 QFETCH(QString, codecName); 0331 0332 KCharsets *singleton = KCharsets::charsets(); 0333 0334 bool ok = false; 0335 QTextCodec *codec = singleton->d->codecForName(codecName, ok); 0336 QVERIFY(ok); 0337 0338 // compatible text 0339 const QByteArray success8BitString = QByteArrayLiteral("Testname"); 0340 0341 QTextCodec::ConverterState successConverterState; 0342 0343 const QString successUnicodeString = codec->toUnicode(success8BitString.constData(), success8BitString.length(), &successConverterState); 0344 0345 const QString successExpectedString = QStringLiteral("Testname"); 0346 QCOMPARE(successConverterState.invalidChars, 0); 0347 QCOMPARE(successUnicodeString, successExpectedString); 0348 0349 // incompatible text, with "ä" in latin1 0350 /* clang-format off */ 0351 const QByteArray fail8BitString = QByteArrayLiteral("Testn""\xE4""me"); 0352 /* clang-format on */ 0353 0354 QTextCodec::ConverterState failConverterState; 0355 0356 const QString failUnicodeString = codec->toUnicode(fail8BitString.constData(), fail8BitString.length(), &failConverterState); 0357 0358 const QString failExpectedString = QStringLiteral("Testn?me"); 0359 QCOMPARE(failConverterState.invalidChars, 1); 0360 QCOMPARE(failUnicodeString, failExpectedString); 0361 } 0362 0363 QTEST_MAIN(KCharsetsTest) 0364 0365 #include "moc_kcharsetstest.cpp"