File indexing completed on 2024-12-08 12:24:06
0001 /* 0002 This file is part of the Kate project. 0003 SPDX-FileCopyrightText: 2021 Jan Paul Batrina <jpmbatrina01@gmail.com> 0004 SPDX-License-Identifier: LGPL-2.0-or-later 0005 */ 0006 0007 #include "encodingtest.h" 0008 #include "katetextbuffer.h" 0009 0010 QTEST_MAIN(KateEncodingTest) 0011 0012 void KateEncodingTest::utfBomTest() 0013 { 0014 // setup stuff 0015 Kate::TextBuffer buffer(nullptr); 0016 buffer.setFallbackTextCodec(QTextCodec::codecForName("UTF-8")); 0017 bool encodingErrors; 0018 bool tooLongLinesWrapped; 0019 bool success; 0020 int longestLineLoaded; 0021 QString prefixText; 0022 0023 // utf8 tests 0024 buffer.setTextCodec(QTextCodec::codecForName("UTF-8")); 0025 success = buffer.load(QLatin1String(TEST_DATA_DIR "encoding/utf8.txt"), encodingErrors, tooLongLinesWrapped, longestLineLoaded, true); 0026 QVERIFY(success && !encodingErrors); 0027 QVERIFY(!buffer.generateByteOrderMark()); // file has no bom 0028 // since the utf8 bom is 3 bytes, the first 3 chars should not be the bom 0029 prefixText = buffer.text().left(3); 0030 QCOMPARE(prefixText, QLatin1String("Tes")); 0031 0032 buffer.setTextCodec(QTextCodec::codecForName("UTF-8")); 0033 success = buffer.load(QLatin1String(TEST_DATA_DIR "encoding/utf8-bom-only.txt"), encodingErrors, tooLongLinesWrapped, longestLineLoaded, true); 0034 QVERIFY(success && !encodingErrors); 0035 QVERIFY(buffer.generateByteOrderMark()); 0036 // aside from bom, file is empty so there should be no text 0037 prefixText = buffer.text(); 0038 QVERIFY(prefixText.isEmpty()); 0039 0040 // utf16 tests 0041 buffer.setTextCodec(QTextCodec::codecForName("UTF-16")); 0042 success = buffer.load(QLatin1String(TEST_DATA_DIR "encoding/utf16.txt"), encodingErrors, tooLongLinesWrapped, longestLineLoaded, true); 0043 QVERIFY(success && !encodingErrors); 0044 QVERIFY(buffer.generateByteOrderMark()); 0045 // since the utf16 bom is 2 bytes, the first 2 chars should not be the bom 0046 prefixText = buffer.text().left(2); 0047 QCOMPARE(prefixText, QLatin1String("Te")); 0048 0049 buffer.setTextCodec(QTextCodec::codecForName("UTF-16")); 0050 success = buffer.load(QLatin1String(TEST_DATA_DIR "encoding/utf16be.txt"), encodingErrors, tooLongLinesWrapped, longestLineLoaded, true); 0051 QVERIFY(success && !encodingErrors); 0052 QVERIFY(buffer.generateByteOrderMark()); 0053 // since the utf16 bom is 2 bytes, the first 2 chars should not be the bom 0054 prefixText = buffer.text().left(2); 0055 QCOMPARE(prefixText, QLatin1String("Te")); 0056 0057 // utf32 tests 0058 buffer.setTextCodec(QTextCodec::codecForName("UTF-32")); 0059 success = buffer.load(QLatin1String(TEST_DATA_DIR "encoding/utf32.txt"), encodingErrors, tooLongLinesWrapped, longestLineLoaded, true); 0060 QVERIFY(success && !encodingErrors); 0061 QVERIFY(buffer.generateByteOrderMark()); 0062 // since the utf16 bom is 4 bytes, the first 4 chars should not be the bom 0063 prefixText = buffer.text().left(4); 0064 QCOMPARE(prefixText, QLatin1String("Test")); 0065 0066 buffer.setTextCodec(QTextCodec::codecForName("UTF-32")); 0067 success = buffer.load(QLatin1String(TEST_DATA_DIR "encoding/utf32be.txt"), encodingErrors, tooLongLinesWrapped, longestLineLoaded, true); 0068 QVERIFY(success && !encodingErrors); 0069 QVERIFY(buffer.generateByteOrderMark()); 0070 // since the utf16 bom is 4 bytes, the first 4 chars should not be the bom 0071 prefixText = buffer.text().left(4); 0072 QCOMPARE(prefixText, QLatin1String("Test")); 0073 0074 // Ensure that a mismatching bom is not processed (e.g. utf8 bom should not be used for utf16) 0075 buffer.setTextCodec(QTextCodec::codecForName("UTF-16")); 0076 success = buffer.load(QLatin1String(TEST_DATA_DIR "encoding/utf8-bom-only.txt"), encodingErrors, tooLongLinesWrapped, longestLineLoaded, true); 0077 QVERIFY(success && !encodingErrors); 0078 // even though the file does not have a bom, Kate::TextBuffer::setTextCodec always enables bom generation 0079 // for utf16 and utf32 since the byte order is useful and relevant for reading the file 0080 QVERIFY(buffer.generateByteOrderMark()); 0081 prefixText = buffer.text(); 0082 // 0xFFBBEF is processed as a single char 0xBBEF, which is a hangul character 0083 QCOMPARE(prefixText.front(), QChar(0xBBEF)); 0084 } 0085 0086 void KateEncodingTest::nonUtfNoBomTest() 0087 { 0088 // setup stuff 0089 Kate::TextBuffer buffer(nullptr); 0090 buffer.setFallbackTextCodec(QTextCodec::codecForName("UTF-8")); 0091 bool encodingErrors; 0092 bool tooLongLinesWrapped; 0093 bool success; 0094 int longestLineLoaded; 0095 QString prefixText; 0096 0097 // latin15, should not contain any bom 0098 buffer.setTextCodec(QTextCodec::codecForName("ISO 8859-15")); 0099 success = buffer.load(QLatin1String(TEST_DATA_DIR "encoding/latin15.txt"), encodingErrors, tooLongLinesWrapped, longestLineLoaded, true); 0100 QVERIFY(success && !encodingErrors); 0101 QVERIFY(!buffer.generateByteOrderMark()); 0102 prefixText = buffer.text().left(4); 0103 QCOMPARE(prefixText, QLatin1String("Test")); 0104 0105 // Even if a bom is somehow found, it should be processed normally as text for non-UTF char sets 0106 buffer.setTextCodec(QTextCodec::codecForName("ISO 8859-15")); 0107 success = buffer.load(QLatin1String(TEST_DATA_DIR "encoding/latin15-with-utf8-bom.txt"), encodingErrors, tooLongLinesWrapped, longestLineLoaded, true); 0108 QVERIFY(success && !encodingErrors); 0109 QVERIFY(!buffer.generateByteOrderMark()); // utf8 bom shouldn't be processed 0110 // the utf8 bom is 0xEFBBBF, which is "" in Latin15 0111 prefixText = buffer.text().left(3); 0112 QCOMPARE(prefixText, QStringLiteral("")); 0113 } 0114 0115 #include "moc_encodingtest.cpp"