File indexing completed on 2024-09-15 12:05:28
0001 /** 0002 * test_filter.cpp 0003 * 0004 * SPDX-FileCopyrightText: 2004 Zack Rusin <zack@kde.org> 0005 * 0006 * SPDX-License-Identifier: LGPL-2.1-or-later 0007 */ 0008 0009 #include "test_filter.h" 0010 #include "tokenizer_p.h" 0011 0012 #include <QTest> 0013 #include <QVector> 0014 0015 QTEST_GUILESS_MAIN(SonnetFilterTest) 0016 0017 using namespace Sonnet; 0018 0019 struct Hit { 0020 Hit(const QString &w, int s) 0021 : word(w) 0022 , start(s) 0023 { 0024 } 0025 QString word; 0026 int start; 0027 }; 0028 0029 void SonnetFilterTest::testLatin() 0030 { 0031 QString buffer(QStringLiteral(" This is a sample thing. Please test me ... He's don't Le'Clerk.")); 0032 QList<Hit> hits; 0033 hits.append(Hit(QStringLiteral("This"), 2)); 0034 hits.append(Hit(QStringLiteral("is"), 7)); 0035 hits.append(Hit(QStringLiteral("a"), 14)); 0036 hits.append(Hit(QStringLiteral("sample"), 16)); 0037 hits.append(Hit(QStringLiteral("thing"), 23)); 0038 hits.append(Hit(QStringLiteral("Please"), 30)); 0039 hits.append(Hit(QStringLiteral("test"), 37)); 0040 hits.append(Hit(QStringLiteral("me"), 42)); 0041 hits.append(Hit(QStringLiteral("He's"), 53)); 0042 hits.append(Hit(QStringLiteral("don't"), 58)); 0043 hits.append(Hit(QStringLiteral("Le'Clerk"), 64)); 0044 0045 WordTokenizer tokenizer; 0046 tokenizer.setBuffer(buffer); 0047 0048 Token w; 0049 int hitNumber = 0; 0050 while (tokenizer.hasNext()) { 0051 w = tokenizer.next(); 0052 QCOMPARE(w.toString(), hits[hitNumber].word); 0053 QCOMPARE(w.position(), hits[hitNumber].start); 0054 ++hitNumber; 0055 } 0056 QCOMPARE(hitNumber, hits.count()); 0057 } 0058 0059 static QVector<ushort> convertToUnicode(const QString &str) 0060 { 0061 QVector<ushort> unicode; 0062 for (int i = 0; i < str.length(); ++i) { 0063 unicode += str[i].unicode(); 0064 } 0065 return unicode; 0066 } 0067 0068 void SonnetFilterTest::testIndic() 0069 { 0070 QString buffer; 0071 QList<Hit> hits; 0072 hits.append(Hit(QString::fromUtf8("मराठी"), 0)); 0073 hits.append(Hit(QString::fromUtf8("भाषा"), 6)); 0074 hits.append(Hit(QString::fromUtf8("महाराष्ट्र"), 11)); 0075 hits.append(Hit(QString::fromUtf8("व"), 22)); 0076 hits.append(Hit(QString::fromUtf8("गोवा"), 24)); 0077 hits.append(Hit(QString::fromUtf8("राज्याची"), 29)); 0078 hits.append(Hit(QString::fromUtf8("राजभाषा"), 38)); 0079 hits.append(Hit(QString::fromUtf8("असून"), 46)); 0080 hits.append(Hit(QString::fromUtf8("सुमारे"), 51)); 0081 hits.append(Hit(QString::fromUtf8("९"), 58)); // This is the number 9, so we don't spell-check it 0082 hits.append(Hit(QString::fromUtf8("कोटी"), 60)); 0083 hits.append(Hit(QString::fromUtf8("लोकांची"), 65)); 0084 hits.append(Hit(QString::fromUtf8("मातृभाषा"), 73)); 0085 hits.append(Hit(QString::fromUtf8("आहे"), 82)); 0086 hits.append(Hit(QString::fromUtf8("मराठी"), 87)); 0087 hits.append(Hit(QString::fromUtf8("भाषा"), 93)); 0088 hits.append(Hit(QString::fromUtf8("कमीत"), 98)); 0089 hits.append(Hit(QString::fromUtf8("कमी"), 103)); 0090 hits.append(Hit(QString::fromUtf8("१०००"), 107)); // just a number 0091 hits.append(Hit(QString::fromUtf8("वर्षापासून"), 112)); 0092 hits.append(Hit(QString::fromUtf8("अस्तित्वात"), 123)); 0093 hits.append(Hit(QString::fromUtf8("आहे"), 134)); 0094 0095 buffer = QString::fromUtf8("मराठी भाषा महाराष्ट्र व गोवा राज्याची राजभाषा असून सुमारे ९ कोटी लोकांची मातृभाषा आहे. मराठी भाषा कमीत कमी १००० वर्षापासून अस्तित्वात आहे."); 0096 0097 WordTokenizer tokenizer; 0098 tokenizer.setBuffer(buffer); 0099 Token w; 0100 int hitNumber = 0; 0101 while (tokenizer.hasNext()) { 0102 w = tokenizer.next(); 0103 QVector<ushort> unicode = convertToUnicode(w.toString()); 0104 QCOMPARE(w.toString(), hits[hitNumber].word); 0105 QCOMPARE(w.position(), hits[hitNumber].start); 0106 ++hitNumber; 0107 } 0108 QCOMPARE(hitNumber, hits.count()); 0109 } 0110 0111 void SonnetFilterTest::testSentence() 0112 { 0113 QString buffer(QStringLiteral("This is a sample thing. Please test me ... He's don't Le'Clerk.")); 0114 QList<Hit> hits; 0115 hits.append(Hit(QStringLiteral("This is a sample thing. "), 0)); 0116 hits.append(Hit(QStringLiteral("Please test me ... "), 28)); 0117 hits.append(Hit(QStringLiteral("He's don't Le'Clerk."), 51)); 0118 0119 SentenceTokenizer tokenizer; 0120 tokenizer.setBuffer(buffer); 0121 0122 Token w; 0123 int hitNumber = 0; 0124 while (tokenizer.hasNext()) { 0125 w = tokenizer.next(); 0126 QCOMPARE(w.toString(), hits[hitNumber].word); 0127 QCOMPARE(w.position(), hits[hitNumber].start); 0128 ++hitNumber; 0129 } 0130 QCOMPARE(hitNumber, hits.count()); 0131 } 0132 0133 #include "moc_test_filter.cpp"