File indexing completed on 2024-05-12 04:33:30
0001 /* 0002 SPDX-FileCopyrightText: 2013 Albert Astals Cid <aacid@kde.org> 0003 0004 SPDX-License-Identifier: GPL-2.0-or-later 0005 */ 0006 0007 // clazy:excludeall=qstring-allocations 0008 0009 #include <QMimeDatabase> 0010 #include <QSignalSpy> 0011 #include <QTest> 0012 0013 #include "../core/document.h" 0014 #include "../core/page.h" 0015 #include "../core/textpage.h" 0016 #include "../settings_core.h" 0017 0018 Q_DECLARE_METATYPE(Okular::Document::SearchStatus) 0019 0020 class SearchFinishedReceiver : public QObject 0021 { 0022 Q_OBJECT 0023 0024 public Q_SLOTS: 0025 void searchFinished(int id, Okular::Document::SearchStatus status) 0026 { 0027 m_id = id; 0028 m_status = status; 0029 } 0030 0031 public: 0032 int m_id; 0033 Okular::Document::SearchStatus m_status; 0034 }; 0035 0036 class SearchTest : public QObject 0037 { 0038 Q_OBJECT 0039 0040 private Q_SLOTS: 0041 void initTestCase(); 0042 void testNextAndPrevious(); 0043 void test311232(); 0044 void test323262(); 0045 void test323263(); 0046 void test430243(); 0047 void testDottedI(); 0048 void testHyphenAtEndOfLineWithoutYOverlap(); 0049 void testHyphenWithYOverlap(); 0050 void testHyphenAtEndOfPage(); 0051 void testOneColumn(); 0052 void testTwoColumns(); 0053 }; 0054 0055 void SearchTest::initTestCase() 0056 { 0057 qRegisterMetaType<Okular::Document::SearchStatus>(); 0058 Okular::SettingsCore::instance(QStringLiteral("searchtest")); 0059 } 0060 0061 static void createTextPage(const QVector<QString> &text, const QVector<Okular::NormalizedRect> &rect, Okular::TextPage *&tp, Okular::Page *&page) 0062 { 0063 tp = new Okular::TextPage(); 0064 for (int i = 0; i < text.size(); i++) { 0065 tp->append(text[i], rect[i]); 0066 } 0067 0068 // The Page::setTextPage method invokes the layout analysis algorithms tested by some tests here 0069 // and also sets the tp->d->m_page field (the latter was used in older versions of Okular by 0070 // TextPage::stringLengthAdaptedWithHyphen). 0071 // Note that calling "delete page;" will delete the TextPage as well. 0072 page = new Okular::Page(1, 100, 100, Okular::Rotation0); 0073 page->setTextPage(tp); 0074 } 0075 0076 #define CREATE_PAGE \ 0077 QCOMPARE(text.size(), rect.size()); \ 0078 Okular::Page *page; \ 0079 Okular::TextPage *tp; \ 0080 createTextPage(text, rect, tp, page); 0081 0082 #define TEST_NEXT_PREV(searchType, expectedStatus) \ 0083 { \ 0084 Okular::RegularAreaRect *result = tp->findText(0, searchString, searchType, Qt::CaseSensitive, NULL); \ 0085 QCOMPARE(!!result, expectedStatus); \ 0086 delete result; \ 0087 } 0088 0089 // The test testNextAndPrevious checks that 0090 // a) if one starts a new search, then the first or last match is found, depending on the search direction 0091 // (2 cases: FromTop/FromBottom) 0092 // b) if the last search has found a match, 0093 // then clicking the "Next" button moves to the next occurrence an "Previous" to the previous one 0094 // (if there is any). Altogether there are four combinations of the last search and new search 0095 // direction: Next-Next, Previous-Previous, Next-Previous, Previous-Next; the first two combination 0096 // have two subcases (the new search may give a match or not, so altogether 6 cases to test). 0097 // This gives 8 cases altogether. By taking into account the cases where the last search has given no match, 0098 // we would have 4 more cases (Next (no match)-Next, Next (no match)-Previous, Previous (no match)-Previous, 0099 // Previous (no match)-Next), but those are more the business of Okular::Document::searchText rather than 0100 // Okular::TextPage (at least in the multi-page case). 0101 0102 // We have four test situations: four documents and four corresponding search strings. 0103 // The first situation (document="ababa", search string="b") is a generic one where the 0104 // two matches are not side-by-side and neither the first character nor the last character of 0105 // the document match. The only special thing is that the search string has only length 1. 0106 // The second situation (document="abab", search string="ab") is notable for that the two occurrences 0107 // of the search string are side-by-side with no characters in between, so some off-by-one errors 0108 // would be detected by this test. As the first match starts at the beginning at the document the 0109 // last match ends at the end of the document, it also detects off-by-one errors for finding the first/last match. 0110 // The third situation (document="abababa", search string="aba") is notable for it shows whether 0111 // the next match is allowed to contain letters from the previous one: currently it is not 0112 //(as in the majority of browsers, viewers and editors), and therefore "abababa" is considered to 0113 // contain not three but two occurrences of "aba" (if one starts search from the beginning of the document). 0114 // The fourth situation (document="a ba b", search string="a b") demonstrates the case when one TinyTextEntity 0115 // contains multiple characters that are contained in different matches (namely, the middle "ba" is one TinyTextEntity); 0116 // in particular, since these matches are side-by-side, this test would detect some off-by-one 0117 // offset errors. 0118 0119 void SearchTest::testNextAndPrevious() 0120 { 0121 #define TEST_NEXT_PREV_SITUATION_COUNT 4 0122 0123 QVector<QString> texts[TEST_NEXT_PREV_SITUATION_COUNT] = {QVector<QString>() << QStringLiteral("a") << QStringLiteral("b") << QStringLiteral("a") << QStringLiteral("b") << QStringLiteral("a"), 0124 QVector<QString>() << QStringLiteral("a") << QStringLiteral("b") << QStringLiteral("a") << QStringLiteral("b"), 0125 QVector<QString>() << QStringLiteral("a") << QStringLiteral("b") << QStringLiteral("a") << QStringLiteral("b") << QStringLiteral("a") << QStringLiteral("b") 0126 << QStringLiteral("a"), 0127 QVector<QString>() << QStringLiteral("a") << QStringLiteral(" ") << QStringLiteral("ba") << QStringLiteral(" ") << QStringLiteral("b")}; 0128 0129 QString searchStrings[TEST_NEXT_PREV_SITUATION_COUNT] = {QStringLiteral("b"), QStringLiteral("ab"), QStringLiteral("aba"), QStringLiteral("a b")}; 0130 0131 for (int i = 0; i < TEST_NEXT_PREV_SITUATION_COUNT; i++) { 0132 const QVector<QString> &text = texts[i]; 0133 const QString &searchString = searchStrings[i]; 0134 0135 QVector<Okular::NormalizedRect> rect; 0136 0137 for (int i = 0; i < text.size(); i++) { 0138 rect << Okular::NormalizedRect(0.1 * i, 0.0, 0.1 * (i + 1), 0.1); 0139 } 0140 0141 CREATE_PAGE; 0142 0143 // Test 3 of the 8 cases listed above: 0144 // FromTop, Next-Next (match) and Next-Next (no match) 0145 TEST_NEXT_PREV(Okular::FromTop, true); 0146 TEST_NEXT_PREV(Okular::NextResult, true); 0147 TEST_NEXT_PREV(Okular::NextResult, false); 0148 0149 // Test 5 cases: FromBottom, Previous-Previous (match), Previous-Next, 0150 // Next-Previous, Previous-Previous (no match) 0151 TEST_NEXT_PREV(Okular::FromBottom, true); 0152 TEST_NEXT_PREV(Okular::PreviousResult, true); 0153 TEST_NEXT_PREV(Okular::NextResult, true); 0154 TEST_NEXT_PREV(Okular::PreviousResult, true); 0155 TEST_NEXT_PREV(Okular::PreviousResult, false); 0156 0157 delete page; 0158 } 0159 } 0160 0161 void SearchTest::test311232() 0162 { 0163 Okular::Document d(nullptr); 0164 SearchFinishedReceiver receiver; 0165 QSignalSpy spy(&d, &Okular::Document::searchFinished); 0166 0167 QObject::connect(&d, &Okular::Document::searchFinished, &receiver, &SearchFinishedReceiver::searchFinished); 0168 0169 const QString testFile = QStringLiteral(KDESRCDIR "data/file1.pdf"); 0170 QMimeDatabase db; 0171 const QMimeType mime = db.mimeTypeForFile(testFile); 0172 d.openDocument(testFile, QUrl(), mime); 0173 0174 const int searchId = 0; 0175 d.searchText(searchId, QStringLiteral(" i "), true, Qt::CaseSensitive, Okular::Document::NextMatch, false, QColor()); 0176 QTRY_COMPARE(spy.count(), 1); 0177 QCOMPARE(receiver.m_id, searchId); 0178 QCOMPARE(receiver.m_status, Okular::Document::MatchFound); 0179 0180 d.continueSearch(searchId, Okular::Document::PreviousMatch); 0181 QTRY_COMPARE(spy.count(), 2); 0182 QCOMPARE(receiver.m_id, searchId); 0183 QCOMPARE(receiver.m_status, Okular::Document::NoMatchFound); 0184 } 0185 0186 void SearchTest::test323262() 0187 { 0188 QVector<QString> text; 0189 text << QStringLiteral("a\n"); 0190 0191 QVector<Okular::NormalizedRect> rect; 0192 rect << Okular::NormalizedRect(1, 2, 3, 4); 0193 0194 CREATE_PAGE; 0195 0196 Okular::RegularAreaRect *result = tp->findText(0, QStringLiteral("a"), Okular::FromBottom, Qt::CaseSensitive, nullptr); 0197 QVERIFY(result); 0198 delete result; 0199 0200 delete page; 0201 } 0202 0203 void SearchTest::test323263() 0204 { 0205 QVector<QString> text; 0206 text << QStringLiteral("a") << QStringLiteral("a") << QStringLiteral("b"); 0207 0208 QVector<Okular::NormalizedRect> rect; 0209 rect << Okular::NormalizedRect(0, 0, 1, 1) << Okular::NormalizedRect(1, 0, 2, 1) << Okular::NormalizedRect(2, 0, 3, 1); 0210 0211 CREATE_PAGE; 0212 0213 Okular::RegularAreaRect *result = tp->findText(0, QStringLiteral("ab"), Okular::FromTop, Qt::CaseSensitive, nullptr); 0214 QVERIFY(result); 0215 Okular::RegularAreaRect expected; 0216 expected.append(rect[1]); 0217 expected.append(rect[2]); 0218 expected.simplify(); 0219 QCOMPARE(*result, expected); 0220 delete result; 0221 0222 delete page; 0223 } 0224 0225 void SearchTest::test430243() 0226 { 0227 // 778 is COMBINING RING ABOVE 0228 // 197 is LATIN CAPITAL LETTER A WITH RING ABOVE 0229 QVector<QString> text; 0230 text << QStringLiteral("A") << QString(QChar(778)); 0231 0232 QVector<Okular::NormalizedRect> rect; 0233 rect << Okular::NormalizedRect(0, 0, 1, 1) << Okular::NormalizedRect(1, 0, 2, 1); 0234 0235 CREATE_PAGE; 0236 0237 Okular::RegularAreaRect *result = tp->findText(0, QString(QChar(197)), Okular::FromTop, Qt::CaseSensitive, nullptr); 0238 QVERIFY(result); 0239 Okular::RegularAreaRect expected; 0240 expected.append(rect[0] | rect[1]); 0241 QCOMPARE(*result, expected); 0242 delete result; 0243 0244 delete page; 0245 } 0246 0247 void SearchTest::testDottedI() 0248 { 0249 // Earlier versions of okular had the bug that the letter "İ" (capital dotter i) did not match itself 0250 // in case-insensitive mode (this was caused by an unnecessary call of toLower() and the fact that 0251 // QString::fromUtf8("İ").compare(QString::fromUtf8("İ").toLower(), Qt::CaseInsensitive) == FALSE, 0252 // at least in Qt 4.8). 0253 0254 // In the future it would be nice to add support for matching "İ"<->"i" and "I"<->"ı" in case-insensitive 0255 // mode as well (QString::compare does not match them, at least in non-Turkish locales, since it follows 0256 // the Unicode case-folding rules https://www.unicode.org/Public/6.2.0/ucd/CaseFolding.txt). 0257 0258 QVector<QString> text; 0259 text << QStringLiteral("İ"); 0260 0261 QVector<Okular::NormalizedRect> rect; 0262 rect << Okular::NormalizedRect(1, 2, 3, 4); 0263 0264 CREATE_PAGE; 0265 0266 Okular::RegularAreaRect *result = tp->findText(0, QStringLiteral("İ"), Okular::FromTop, Qt::CaseInsensitive, nullptr); 0267 QVERIFY(result); 0268 delete result; 0269 0270 delete page; 0271 } 0272 0273 void SearchTest::testHyphenAtEndOfLineWithoutYOverlap() 0274 { 0275 QVector<QString> text; 0276 text << QStringLiteral("super-") << QStringLiteral("cali-\n") << QStringLiteral("fragilistic") << QStringLiteral("-") << QStringLiteral("expiali") << QStringLiteral("-\n") << QStringLiteral("docious"); 0277 0278 QVector<Okular::NormalizedRect> rect; 0279 rect << Okular::NormalizedRect(0.4, 0.0, 0.9, 0.1) << Okular::NormalizedRect(0.0, 0.1, 0.6, 0.2) << Okular::NormalizedRect(0.0, 0.2, 0.8, 0.3) << Okular::NormalizedRect(0.8, 0.2, 0.9, 0.3) << Okular::NormalizedRect(0.0, 0.3, 0.8, 0.4) 0280 << Okular::NormalizedRect(0.8, 0.3, 0.9, 0.4) << Okular::NormalizedRect(0.0, 0.4, 0.7, 0.5); 0281 0282 CREATE_PAGE; 0283 0284 Okular::RegularAreaRect *result = tp->findText(0, QStringLiteral("supercalifragilisticexpialidocious"), Okular::FromTop, Qt::CaseSensitive, nullptr); 0285 QVERIFY(result); 0286 Okular::RegularAreaRect expected; 0287 for (int i = 0; i < text.size(); i++) { 0288 expected.append(rect[i]); 0289 } 0290 expected.simplify(); 0291 QCOMPARE(*result, expected); 0292 delete result; 0293 0294 result = tp->findText(0, QStringLiteral("supercalifragilisticexpialidocious"), Okular::FromBottom, Qt::CaseSensitive, nullptr); 0295 QVERIFY(result); 0296 QCOMPARE(*result, expected); 0297 delete result; 0298 0299 // If the user is looking for the text explicitly with the hyphen also find it 0300 result = tp->findText(0, QStringLiteral("super-cali-fragilistic"), Okular::FromTop, Qt::CaseSensitive, nullptr); 0301 QVERIFY(result); 0302 delete result; 0303 0304 // If the user is looking for the text explicitly with the hyphen also find it 0305 result = tp->findText(0, QStringLiteral("super-cali-fragilistic"), Okular::FromBottom, Qt::CaseSensitive, nullptr); 0306 QVERIFY(result); 0307 delete result; 0308 0309 delete page; 0310 } 0311 0312 #define CREATE_PAGE_AND_TEST_SEARCH(searchString, matchExpected) \ 0313 { \ 0314 CREATE_PAGE; \ 0315 \ 0316 Okular::RegularAreaRect *result = tp->findText(0, QStringLiteral(searchString), Okular::FromTop, Qt::CaseSensitive, NULL); \ 0317 \ 0318 QCOMPARE(!!result, matchExpected); \ 0319 \ 0320 delete result; \ 0321 delete page; \ 0322 } 0323 0324 void SearchTest::testHyphenWithYOverlap() 0325 { 0326 QVector<QString> text; 0327 text << QStringLiteral("a-") << QStringLiteral("b"); 0328 0329 QVector<Okular::NormalizedRect> rect(2); 0330 0331 // different lines (50% y-coordinate overlap), first rectangle has larger height 0332 rect[0] = Okular::NormalizedRect(0.0, 0.0, 0.9, 0.35); 0333 rect[1] = Okular::NormalizedRect(0.0, 0.3, 0.2, 0.4); 0334 CREATE_PAGE_AND_TEST_SEARCH("ab", true); 0335 0336 // different lines (50% y-coordinate overlap), second rectangle has larger height 0337 rect[0] = Okular::NormalizedRect(0.0, 0.0, 0.9, 0.1); 0338 rect[1] = Okular::NormalizedRect(0.0, 0.05, 0.2, 0.4); 0339 CREATE_PAGE_AND_TEST_SEARCH("ab", true); 0340 0341 // same line (90% y-coordinate overlap), first rectangle has larger height 0342 rect[0] = Okular::NormalizedRect(0.0, 0.0, 0.4, 0.2); 0343 rect[1] = Okular::NormalizedRect(0.4, 0.11, 0.6, 0.21); 0344 CREATE_PAGE_AND_TEST_SEARCH("ab", false); 0345 CREATE_PAGE_AND_TEST_SEARCH("a-b", true); 0346 0347 // same line (90% y-coordinate overlap), second rectangle has larger height 0348 rect[0] = Okular::NormalizedRect(0.0, 0.0, 0.4, 0.1); 0349 rect[1] = Okular::NormalizedRect(0.4, 0.01, 0.6, 0.2); 0350 CREATE_PAGE_AND_TEST_SEARCH("ab", false); 0351 CREATE_PAGE_AND_TEST_SEARCH("a-b", true); 0352 } 0353 0354 void SearchTest::testHyphenAtEndOfPage() 0355 { 0356 // Tests for segmentation fault that would occur if 0357 // we tried look ahead (for determining whether the 0358 // next character is at the same line) at the end of the page. 0359 0360 QVector<QString> text; 0361 text << QStringLiteral("a-"); 0362 0363 QVector<Okular::NormalizedRect> rect; 0364 rect << Okular::NormalizedRect(0, 0, 1, 1); 0365 0366 CREATE_PAGE; 0367 0368 { 0369 Okular::RegularAreaRect *result = tp->findText(0, QStringLiteral("a"), Okular::FromTop, Qt::CaseSensitive, nullptr); 0370 QVERIFY(result); 0371 delete result; 0372 } 0373 0374 { 0375 Okular::RegularAreaRect *result = tp->findText(0, QStringLiteral("a"), Okular::FromBottom, Qt::CaseSensitive, nullptr); 0376 QVERIFY(result); 0377 delete result; 0378 } 0379 0380 delete page; 0381 } 0382 0383 void SearchTest::testOneColumn() 0384 { 0385 // Tests that the layout analysis algorithm does not create too many columns. 0386 // Bug 326207 was caused by the fact that if all the horizontal breaks in a line 0387 // had the same length and were smaller than vertical breaks between lines then 0388 // the horizontal breaks were treated as column separators. 0389 //(Note that "same length" means "same length after rounding rectangles to integer pixels". 0390 // The resolution used by the XY Cut algorithm with a square page is 1000 x 1000, 0391 // and the horizontal spaces in the example are 0.1, so they are indeed both exactly 100 pixels.) 0392 0393 QVector<QString> text; 0394 text << QStringLiteral("Only") << QStringLiteral("one") << QStringLiteral("column") << QStringLiteral("here"); 0395 0396 // characters and line breaks have length 0.05, word breaks 0.1 0397 QVector<Okular::NormalizedRect> rect; 0398 rect << Okular::NormalizedRect(0.0, 0.0, 0.2, 0.1) << Okular::NormalizedRect(0.3, 0.0, 0.5, 0.1) << Okular::NormalizedRect(0.6, 0.0, 0.9, 0.1) << Okular::NormalizedRect(0.0, 0.15, 0.2, 0.25); 0399 0400 CREATE_PAGE; 0401 0402 Okular::RegularAreaRect *result = tp->findText(0, QStringLiteral("Only one column"), Okular::FromTop, Qt::CaseSensitive, nullptr); 0403 QVERIFY(result); 0404 delete result; 0405 0406 delete page; 0407 } 0408 0409 void SearchTest::testTwoColumns() 0410 { 0411 // Tests that the layout analysis algorithm can detect two columns. 0412 0413 QVector<QString> text; 0414 text << QStringLiteral("This") << QStringLiteral("text") << QStringLiteral("in") << QStringLiteral("two") << QStringLiteral("is") << QStringLiteral("set") << QStringLiteral("columns."); 0415 0416 // characters, word breaks and line breaks have length 0.05 0417 QVector<Okular::NormalizedRect> rect; 0418 rect << Okular::NormalizedRect(0.0, 0.0, 0.20, 0.1) << Okular::NormalizedRect(0.25, 0.0, 0.45, 0.1) << Okular::NormalizedRect(0.6, 0.0, 0.7, 0.1) << Okular::NormalizedRect(0.75, 0.0, 0.9, 0.1) 0419 << Okular::NormalizedRect(0.0, 0.15, 0.1, 0.25) << Okular::NormalizedRect(0.15, 0.15, 0.3, 0.25) << Okular::NormalizedRect(0.6, 0.15, 1.0, 0.25); 0420 0421 CREATE_PAGE; 0422 0423 Okular::RegularAreaRect *result = tp->findText(0, QStringLiteral("This text in"), Okular::FromTop, Qt::CaseSensitive, nullptr); 0424 QVERIFY(!result); 0425 delete result; 0426 0427 delete page; 0428 } 0429 0430 QTEST_MAIN(SearchTest) 0431 #include "searchtest.moc"