File indexing completed on 2024-03-24 15:25:30

0001 /*
0002     SPDX-FileCopyrightText: 2011 Romain Perier <bambi@kubuntu.org>
0003 
0004     SPDX-License-Identifier: GPL-2.0-or-later
0005 */
0006 
0007 #include "kcharsetstest.h"
0008 
0009 #include "kcharsets_p.h"
0010 #include <QDebug>
0011 #include <QString>
0012 #include <QTest>
0013 #include <QTextCodec>
0014 #include <kcharsets.h>
0015 
0016 static bool encodingNameHasADescription(const QString &encodingName, const QStringList &descriptions)
0017 {
0018     return std::any_of(descriptions.cbegin(), descriptions.cend(), [&encodingName](const QString &description) {
0019         return description.contains(encodingName);
0020     });
0021 }
0022 
0023 void KCharsetsTest::testSingleton()
0024 {
0025     QVERIFY(KCharsets::charsets() != nullptr);
0026     QCOMPARE(KCharsets::charsets(), KCharsets::charsets());
0027 }
0028 
0029 void KCharsetsTest::testCodecForName_data()
0030 {
0031     QTest::addColumn<QString>("codec");
0032     QTest::addColumn<QString>("expectedCodecFromKDE");
0033     QTest::addColumn<QString>("expectedCodecFromQt");
0034 
0035     QTest::newRow("utf8") << "utf8"
0036                           << "UTF-8"
0037                           << "UTF-8";
0038     QTest::newRow("utf-8") << "utf-8"
0039                            << "UTF-8"
0040                            << "UTF-8";
0041     QTest::newRow("UTF8") << "UTF8"
0042                           << "UTF-8"
0043                           << "UTF-8";
0044     QTest::newRow("UTF-8") << "UTF-8"
0045                            << "UTF-8"
0046                            << "UTF-8";
0047 
0048     QTest::newRow("Big5") << "Big5"
0049                           << "Big5"
0050                           << "Big5";
0051     QTest::newRow("Big5-HKSCS") << "Big5-HKSCS"
0052                                 << "Big5-HKSCS"
0053                                 << "Big5-HKSCS";
0054     QTest::newRow("EUC-JP") << "EUC-JP"
0055                             << "EUC-JP"
0056                             << "EUC-JP";
0057     QTest::newRow("EUC-KR") << "EUC-KR"
0058                             << "EUC-KR"
0059                             << "EUC-KR";
0060     QTest::newRow("CP 949") << "CP 949"
0061                             << "CP 949"
0062                             << "CP 949";
0063     QTest::newRow("GB18030") << "GB18030"
0064                              << "GB18030"
0065                              << "GB18030";
0066     QTest::newRow("GB2312") << "GB2312"
0067                             << "GB2312"
0068                             << "GB2312";
0069     QTest::newRow("GBK") << "GBK"
0070                          << "GBK"
0071                          << "GBK";
0072     QTest::newRow("IBM850") << "IBM850"
0073                             << "IBM850"
0074                             << "IBM850";
0075     QTest::newRow("IBM866") << "IBM866"
0076                             << "IBM866"
0077                             << "IBM866";
0078     QTest::newRow("IBM874") << "IBM874"
0079                             << "IBM874"
0080                             << "IBM874";
0081     QTest::newRow("ISO 10646-UCS-2") << "ISO 10646-UCS-2"
0082                                      << "ISO 10646-UCS-2"
0083                                      << "ISO 10646-UCS-2";
0084     QTest::newRow("ISO 8859-1") << "ISO 8859-1"
0085                                 << "ISO 8859-1"
0086                                 << "ISO 8859-1";
0087     QTest::newRow("ISO 8859-11") << "ISO 8859-11"
0088                                  << "ISO 8859-11"
0089                                  << "ISO 8859-11";
0090     QTest::newRow("ISO 8859-13") << "ISO 8859-13"
0091                                  << "ISO 8859-13"
0092                                  << "ISO 8859-13";
0093     QTest::newRow("ISO 8859-14") << "ISO 8859-14"
0094                                  << "ISO 8859-14"
0095                                  << "ISO 8859-14";
0096     QTest::newRow("ISO 8859-15") << "ISO 8859-15"
0097                                  << "ISO 8859-15"
0098                                  << "ISO 8859-15";
0099     QTest::newRow("ISO 8859-16") << "ISO 8859-16"
0100                                  << "ISO 8859-16"
0101                                  << "ISO 8859-16";
0102     QTest::newRow("ISO 8859-2") << "ISO 8859-2"
0103                                 << "ISO 8859-2"
0104                                 << "ISO 8859-2";
0105     QTest::newRow("ISO 8859-3") << "ISO 8859-3"
0106                                 << "ISO 8859-3"
0107                                 << "ISO 8859-3";
0108     QTest::newRow("ISO 8859-4") << "ISO 8859-4"
0109                                 << "ISO 8859-4"
0110                                 << "ISO 8859-4";
0111     QTest::newRow("ISO 8859-5") << "ISO 8859-5"
0112                                 << "ISO 8859-5"
0113                                 << "ISO 8859-5";
0114     QTest::newRow("ISO 8859-6") << "ISO 8859-6"
0115                                 << "ISO 8859-6"
0116                                 << "ISO 8859-6";
0117     QTest::newRow("ISO 8859-7") << "ISO 8859-7"
0118                                 << "ISO 8859-7"
0119                                 << "ISO 8859-7";
0120     QTest::newRow("ISO 8859-8") << "ISO 8859-8"
0121                                 << "ISO 8859-8"
0122                                 << "ISO 8859-8";
0123     QTest::newRow("ISO 8859-8-I") << "ISO 8859-8-I"
0124                                   << "ISO 8859-8-I"
0125                                   << "ISO 8859-8-I";
0126     QTest::newRow("ISO 8859-9") << "ISO 8859-9"
0127                                 << "ISO 8859-9"
0128                                 << "ISO 8859-9";
0129     QTest::newRow("KOI8-R") << "KOI8-R"
0130                             << "KOI8-R"
0131                             << "KOI8-R";
0132     QTest::newRow("KOI8-U") << "KOI8-U"
0133                             << "KOI8-U"
0134                             << "KOI8-U";
0135     QTest::newRow("TIS620") << "TIS620"
0136                             << "TIS620"
0137                             << "TIS620";
0138     QTest::newRow("TSCII") << "TSCII"
0139                            << "TSCII"
0140                            << "TSCII";
0141     QTest::newRow("UTF-16") << "UTF-16"
0142                             << "UTF-16"
0143                             << "UTF-16";
0144     QTest::newRow("UTF-8") << "UTF-8"
0145                            << "UTF-8"
0146                            << "UTF-8";
0147     QTest::newRow("cp 1250") << "cp 1250"
0148                              << "cp 1250"
0149                              << "cp 1250";
0150     QTest::newRow("cp 1251") << "cp 1251"
0151                              << "cp 1251"
0152                              << "cp 1251";
0153     QTest::newRow("cp 1252") << "cp 1252"
0154                              << "cp 1252"
0155                              << "cp 1252";
0156     QTest::newRow("cp 1253") << "cp 1253"
0157                              << "cp 1253"
0158                              << "cp 1253";
0159     QTest::newRow("cp 1254") << "cp 1254"
0160                              << "cp 1254"
0161                              << "cp 1254";
0162     QTest::newRow("cp 1255") << "cp 1255"
0163                              << "cp 1255"
0164                              << "cp 1255";
0165     QTest::newRow("cp 1256") << "cp 1256"
0166                              << "cp 1256"
0167                              << "cp 1256";
0168     QTest::newRow("cp 1257") << "cp 1257"
0169                              << "cp 1257"
0170                              << "cp 1257";
0171     QTest::newRow("jis7") << "jis7"
0172                           << "jis7"
0173                           << "jis7";
0174     QTest::newRow("sjis") << "sjis"
0175                           << "sjis"
0176                           << "sjis";
0177     QTest::newRow("ucs2") << "ucs2"
0178                           << "ucs2"
0179                           << "ucs2";
0180     QTest::newRow("utf7") << "utf7"
0181                           << "utf7"
0182                           << "utf7";
0183     QTest::newRow("windows-1258") << "windows-1258"
0184                                   << "windows-1258"
0185                                   << "windows-1258";
0186     QTest::newRow("winsami2") << "winsami2"
0187                               << "winsami2"
0188                               << "winsami2";
0189     QTest::newRow("US-ASCII") << "US-ASCII"
0190                               << "US-ASCII"
0191                               << "US-ASCII";
0192 }
0193 
0194 void KCharsetsTest::testCodecForName()
0195 {
0196     KCharsets *singleton = KCharsets::charsets();
0197 
0198     QFETCH(QString, codec);
0199     QFETCH(QString, expectedCodecFromKDE);
0200     QFETCH(QString, expectedCodecFromQt);
0201 
0202     if (QTextCodec::codecForName(codec.toLocal8Bit()) == nullptr) {
0203         qWarning() << "codec " << codec << "is not supported by QTextCodec !";
0204         return;
0205     }
0206 
0207     QVERIFY(QTextCodec::codecForName(expectedCodecFromKDE.toLocal8Bit()) != nullptr);
0208     QCOMPARE(singleton->d->codecForName(codec)->name(), QTextCodec::codecForName(expectedCodecFromKDE.toLocal8Bit())->name());
0209 
0210     QVERIFY(QTextCodec::codecForName(expectedCodecFromQt.toLocal8Bit()) != nullptr);
0211     QCOMPARE(QTextCodec::codecForName(codec.toLocal8Bit())->name(), QTextCodec::codecForName(expectedCodecFromQt.toLocal8Bit())->name());
0212 }
0213 
0214 void KCharsetsTest::testFromEntity()
0215 {
0216     KCharsets *singleton = KCharsets::charsets();
0217 
0218     QCOMPARE(singleton->fromEntity(QString::fromLatin1("&#1234")), QChar(1234));
0219     QCOMPARE(singleton->fromEntity(QString::fromLatin1("&#x1234")), QChar(0x1234));
0220     QCOMPARE(singleton->fromEntity(QString::fromLatin1("lt")), QChar::fromLatin1('<'));
0221     QCOMPARE(singleton->fromEntity(QString::fromLatin1("gt")), QChar::fromLatin1('>'));
0222     QCOMPARE(singleton->fromEntity(QString::fromLatin1("quot")), QChar::fromLatin1('"'));
0223     QCOMPARE(singleton->fromEntity(QString::fromLatin1("amp")), QChar::fromLatin1('&'));
0224     QCOMPARE(singleton->fromEntity(QString::fromLatin1("apos")), QChar::fromLatin1('\''));
0225 }
0226 
0227 void KCharsetsTest::testToEntity()
0228 {
0229     QSKIP("KCharsets::toEntity test not implemented.");
0230 }
0231 
0232 void KCharsetsTest::testResolveEntities()
0233 {
0234     KCharsets *singleton = KCharsets::charsets();
0235 
0236     QCOMPARE(singleton->resolveEntities(QString::fromLatin1("&quot;&apos;&lt;Hello &amp;World&gt;&apos;&quot;")),
0237              QString::fromLatin1("\"\'<Hello &World>\'\""));
0238 }
0239 
0240 void KCharsetsTest::testEncodingNames()
0241 {
0242     KCharsets *singleton = KCharsets::charsets();
0243 
0244     QCOMPARE(singleton->availableEncodingNames().count(), singleton->descriptiveEncodingNames().count());
0245 
0246     for (const QString &encodingName : singleton->availableEncodingNames()) {
0247         bool ok = false;
0248 
0249         if (encodingName == QLatin1String("ucs2") || encodingName == QLatin1String("ISO 10646-UCS-2")) {
0250             singleton->d->codecForName(QStringLiteral("UTF-16"), ok);
0251         } else if (encodingName == QLatin1String("utf7")) {
0252             continue;
0253         } else {
0254             singleton->d->codecForName(encodingName, ok);
0255         }
0256         // The availability of some of the charsets below depends on whether Qt was built with ICU...
0257         if (!ok) {
0258             if (encodingName == QLatin1String("jis7")) {
0259                 QEXPECT_FAIL("", "jis7 is missing in Qt", Continue);
0260             }
0261             if (encodingName == QLatin1String("winsami2")) {
0262                 QEXPECT_FAIL("", "winsami2 is missing in Qt", Continue);
0263             }
0264             if (encodingName == QLatin1String("ISO 8859-16")) { // ICU bug?
0265                 QEXPECT_FAIL("", "ISO 8859-16 is missing in Qt", Continue);
0266             }
0267         }
0268 
0269         if (!ok) {
0270             qDebug() << "Error:" << encodingName << "not found";
0271             QVERIFY(false);
0272         }
0273         QVERIFY(encodingNameHasADescription(encodingName, singleton->descriptiveEncodingNames()));
0274         QVERIFY(!singleton->descriptionForEncoding(encodingName).isEmpty());
0275         QCOMPARE(singleton->encodingForName(singleton->descriptionForEncoding(encodingName)), encodingName);
0276     }
0277 }
0278 
0279 void KCharsetsTest::testUsAsciiEncoding_data()
0280 {
0281     QTest::addColumn<QString>("codecName");
0282 
0283     QTest::newRow("normal-name") << QStringLiteral("US-ASCII");
0284     QTest::newRow("alias-name") << QStringLiteral("IBM367");
0285 }
0286 
0287 void KCharsetsTest::testUsAsciiEncoding()
0288 {
0289     QFETCH(QString, codecName);
0290 
0291     KCharsets *singleton = KCharsets::charsets();
0292 
0293     bool ok = false;
0294     QTextCodec *codec = singleton->d->codecForName(codecName, ok);
0295     QVERIFY(ok);
0296 
0297     // compatible text
0298     const QString successUnicodeText = QStringLiteral("Testname");
0299 
0300     QTextCodec::ConverterState successConverterState;
0301 
0302     const QByteArray successEncoded8Bit = codec->fromUnicode(successUnicodeText.constData(), successUnicodeText.length(), &successConverterState);
0303 
0304     const QByteArray successExpected8Bit = QByteArrayLiteral("Testname");
0305     QCOMPARE(successConverterState.invalidChars, 0);
0306     QCOMPARE(successEncoded8Bit, successExpected8Bit);
0307 
0308     // incompatible text
0309     const QString failUnicodeText = QStringLiteral("Testnäme");
0310 
0311     QTextCodec::ConverterState failConverterState;
0312 
0313     const QByteArray failEncoded8Bit = codec->fromUnicode(failUnicodeText.constData(), failUnicodeText.length(), &failConverterState);
0314 
0315     const QByteArray failExpected8Bit = QByteArrayLiteral("Testn?me");
0316     QCOMPARE(failConverterState.invalidChars, 1);
0317     QCOMPARE(failEncoded8Bit, failExpected8Bit);
0318 }
0319 
0320 void KCharsetsTest::testUsAsciiDecoding_data()
0321 {
0322     QTest::addColumn<QString>("codecName");
0323 
0324     QTest::newRow("normal-name") << QStringLiteral("US-ASCII");
0325     QTest::newRow("alias-name") << QStringLiteral("IBM367");
0326 }
0327 
0328 void KCharsetsTest::testUsAsciiDecoding()
0329 {
0330     QFETCH(QString, codecName);
0331 
0332     KCharsets *singleton = KCharsets::charsets();
0333 
0334     bool ok = false;
0335     QTextCodec *codec = singleton->d->codecForName(codecName, ok);
0336     QVERIFY(ok);
0337 
0338     // compatible text
0339     const QByteArray success8BitString = QByteArrayLiteral("Testname");
0340 
0341     QTextCodec::ConverterState successConverterState;
0342 
0343     const QString successUnicodeString = codec->toUnicode(success8BitString.constData(), success8BitString.length(), &successConverterState);
0344 
0345     const QString successExpectedString = QStringLiteral("Testname");
0346     QCOMPARE(successConverterState.invalidChars, 0);
0347     QCOMPARE(successUnicodeString, successExpectedString);
0348 
0349     // incompatible text, with "ä" in latin1
0350     /* clang-format off */
0351     const QByteArray fail8BitString = QByteArrayLiteral("Testn""\xE4""me");
0352     /* clang-format on */
0353 
0354     QTextCodec::ConverterState failConverterState;
0355 
0356     const QString failUnicodeString = codec->toUnicode(fail8BitString.constData(), fail8BitString.length(), &failConverterState);
0357 
0358     const QString failExpectedString = QStringLiteral("Testn?me");
0359     QCOMPARE(failConverterState.invalidChars, 1);
0360     QCOMPARE(failUnicodeString, failExpectedString);
0361 }
0362 
0363 QTEST_MAIN(KCharsetsTest)
0364 
0365 #include "moc_kcharsetstest.cpp"