File indexing completed on 2024-05-12 16:06:05

0001 /*
0002     SPDX-FileCopyrightText: 2013 Albert Astals Cid <aacid@kde.org>
0003 
0004     SPDX-License-Identifier: GPL-2.0-or-later
0005 */
0006 
0007 // clazy:excludeall=qstring-allocations
0008 
0009 #include <QMimeDatabase>
0010 #include <QSignalSpy>
0011 #include <QTest>
0012 
0013 #include "../core/document.h"
0014 #include "../core/page.h"
0015 #include "../core/textpage.h"
0016 #include "../settings_core.h"
0017 
0018 Q_DECLARE_METATYPE(Okular::Document::SearchStatus)
0019 
0020 class SearchFinishedReceiver : public QObject
0021 {
0022     Q_OBJECT
0023 
0024 public Q_SLOTS:
0025     void searchFinished(int id, Okular::Document::SearchStatus status)
0026     {
0027         m_id = id;
0028         m_status = status;
0029     }
0030 
0031 public:
0032     int m_id;
0033     Okular::Document::SearchStatus m_status;
0034 };
0035 
0036 class SearchTest : public QObject
0037 {
0038     Q_OBJECT
0039 
0040 private Q_SLOTS:
0041     void initTestCase();
0042     void testNextAndPrevious();
0043     void test311232();
0044     void test323262();
0045     void test323263();
0046     void test430243();
0047     void testDottedI();
0048     void testHyphenAtEndOfLineWithoutYOverlap();
0049     void testHyphenWithYOverlap();
0050     void testHyphenAtEndOfPage();
0051     void testOneColumn();
0052     void testTwoColumns();
0053 };
0054 
0055 void SearchTest::initTestCase()
0056 {
0057     qRegisterMetaType<Okular::Document::SearchStatus>();
0058     Okular::SettingsCore::instance(QStringLiteral("searchtest"));
0059 }
0060 
0061 static void createTextPage(const QVector<QString> &text, const QVector<Okular::NormalizedRect> &rect, Okular::TextPage *&tp, Okular::Page *&page)
0062 {
0063     tp = new Okular::TextPage();
0064     for (int i = 0; i < text.size(); i++) {
0065         tp->append(text[i], rect[i]);
0066     }
0067 
0068     // The Page::setTextPage method invokes the layout analysis algorithms tested by some tests here
0069     // and also sets the tp->d->m_page field (the latter was used in older versions of Okular by
0070     // TextPage::stringLengthAdaptedWithHyphen).
0071     // Note that calling "delete page;" will delete the TextPage as well.
0072     page = new Okular::Page(1, 100, 100, Okular::Rotation0);
0073     page->setTextPage(tp);
0074 }
0075 
0076 #define CREATE_PAGE                                                                                                                                                                                                                            \
0077     QCOMPARE(text.size(), rect.size());                                                                                                                                                                                                        \
0078     Okular::Page *page;                                                                                                                                                                                                                        \
0079     Okular::TextPage *tp;                                                                                                                                                                                                                      \
0080     createTextPage(text, rect, tp, page);
0081 
0082 #define TEST_NEXT_PREV(searchType, expectedStatus)                                                                                                                                                                                             \
0083     {                                                                                                                                                                                                                                          \
0084         Okular::RegularAreaRect *result = tp->findText(0, searchString, searchType, Qt::CaseSensitive, NULL);                                                                                                                                  \
0085         QCOMPARE(!!result, expectedStatus);                                                                                                                                                                                                    \
0086         delete result;                                                                                                                                                                                                                         \
0087     }
0088 
0089 // The test testNextAndPrevious checks that
0090 // a) if one starts a new search, then the first or last match is found, depending on the search direction
0091 //   (2 cases: FromTop/FromBottom)
0092 // b) if the last search has found a match,
0093 //   then clicking the "Next" button moves to the next occurrence an "Previous" to the previous one
0094 //   (if there is any). Altogether there are four combinations of the last search and new search
0095 //   direction: Next-Next, Previous-Previous, Next-Previous, Previous-Next; the first two combination
0096 //   have two subcases (the new search may give a match or not, so altogether 6 cases to test).
0097 // This gives 8 cases altogether. By taking into account the cases where the last search has given no match,
0098 // we would have 4 more cases (Next (no match)-Next, Next (no match)-Previous, Previous (no match)-Previous,
0099 // Previous (no match)-Next), but those are more the business of Okular::Document::searchText rather than
0100 // Okular::TextPage (at least in the multi-page case).
0101 
0102 //   We have four test situations: four documents and four corresponding search strings.
0103 //   The first situation (document="ababa", search string="b") is a generic one where the
0104 // two matches are not side-by-side and neither the first character nor the last character of
0105 // the document match. The only special thing is that the search string has only length 1.
0106 //   The second situation (document="abab", search string="ab") is notable for that the two occurrences
0107 // of the search string are side-by-side with no characters in between, so some off-by-one errors
0108 // would be detected by this test. As the first match starts at the beginning at the document the
0109 // last match ends at the end of the document, it also detects off-by-one errors for finding the first/last match.
0110 //   The third situation (document="abababa", search string="aba") is notable for it shows whether
0111 // the next match is allowed to contain letters from the previous one: currently it is not
0112 //(as in the majority of browsers, viewers and editors), and therefore "abababa" is considered to
0113 // contain not three but two occurrences of "aba" (if one starts search from the beginning of the document).
0114 //   The fourth situation (document="a ba b", search string="a b") demonstrates the case when one TinyTextEntity
0115 // contains multiple characters that are contained in different matches (namely, the middle "ba" is one TinyTextEntity);
0116 // in particular, since these matches are side-by-side, this test would detect some off-by-one
0117 // offset errors.
0118 
0119 void SearchTest::testNextAndPrevious()
0120 {
0121 #define TEST_NEXT_PREV_SITUATION_COUNT 4
0122 
0123     QVector<QString> texts[TEST_NEXT_PREV_SITUATION_COUNT] = {QVector<QString>() << QStringLiteral("a") << QStringLiteral("b") << QStringLiteral("a") << QStringLiteral("b") << QStringLiteral("a"),
0124                                                               QVector<QString>() << QStringLiteral("a") << QStringLiteral("b") << QStringLiteral("a") << QStringLiteral("b"),
0125                                                               QVector<QString>() << QStringLiteral("a") << QStringLiteral("b") << QStringLiteral("a") << QStringLiteral("b") << QStringLiteral("a") << QStringLiteral("b")
0126                                                                                  << QStringLiteral("a"),
0127                                                               QVector<QString>() << QStringLiteral("a") << QStringLiteral(" ") << QStringLiteral("ba") << QStringLiteral(" ") << QStringLiteral("b")};
0128 
0129     QString searchStrings[TEST_NEXT_PREV_SITUATION_COUNT] = {QStringLiteral("b"), QStringLiteral("ab"), QStringLiteral("aba"), QStringLiteral("a b")};
0130 
0131     for (int i = 0; i < TEST_NEXT_PREV_SITUATION_COUNT; i++) {
0132         const QVector<QString> &text = texts[i];
0133         const QString &searchString = searchStrings[i];
0134 
0135         QVector<Okular::NormalizedRect> rect;
0136 
0137         for (int i = 0; i < text.size(); i++) {
0138             rect << Okular::NormalizedRect(0.1 * i, 0.0, 0.1 * (i + 1), 0.1);
0139         }
0140 
0141         CREATE_PAGE;
0142 
0143         // Test 3 of the 8 cases listed above:
0144         // FromTop, Next-Next (match) and Next-Next (no match)
0145         TEST_NEXT_PREV(Okular::FromTop, true);
0146         TEST_NEXT_PREV(Okular::NextResult, true);
0147         TEST_NEXT_PREV(Okular::NextResult, false);
0148 
0149         // Test 5 cases: FromBottom, Previous-Previous (match), Previous-Next,
0150         // Next-Previous, Previous-Previous (no match)
0151         TEST_NEXT_PREV(Okular::FromBottom, true);
0152         TEST_NEXT_PREV(Okular::PreviousResult, true);
0153         TEST_NEXT_PREV(Okular::NextResult, true);
0154         TEST_NEXT_PREV(Okular::PreviousResult, true);
0155         TEST_NEXT_PREV(Okular::PreviousResult, false);
0156 
0157         delete page;
0158     }
0159 }
0160 
0161 void SearchTest::test311232()
0162 {
0163     Okular::Document d(nullptr);
0164     SearchFinishedReceiver receiver;
0165     QSignalSpy spy(&d, &Okular::Document::searchFinished);
0166 
0167     QObject::connect(&d, &Okular::Document::searchFinished, &receiver, &SearchFinishedReceiver::searchFinished);
0168 
0169     const QString testFile = QStringLiteral(KDESRCDIR "data/file1.pdf");
0170     QMimeDatabase db;
0171     const QMimeType mime = db.mimeTypeForFile(testFile);
0172     d.openDocument(testFile, QUrl(), mime);
0173 
0174     const int searchId = 0;
0175     d.searchText(searchId, QStringLiteral(" i "), true, Qt::CaseSensitive, Okular::Document::NextMatch, false, QColor());
0176     QTRY_COMPARE(spy.count(), 1);
0177     QCOMPARE(receiver.m_id, searchId);
0178     QCOMPARE(receiver.m_status, Okular::Document::MatchFound);
0179 
0180     d.continueSearch(searchId, Okular::Document::PreviousMatch);
0181     QTRY_COMPARE(spy.count(), 2);
0182     QCOMPARE(receiver.m_id, searchId);
0183     QCOMPARE(receiver.m_status, Okular::Document::NoMatchFound);
0184 }
0185 
0186 void SearchTest::test323262()
0187 {
0188     QVector<QString> text;
0189     text << QStringLiteral("a\n");
0190 
0191     QVector<Okular::NormalizedRect> rect;
0192     rect << Okular::NormalizedRect(1, 2, 3, 4);
0193 
0194     CREATE_PAGE;
0195 
0196     Okular::RegularAreaRect *result = tp->findText(0, QStringLiteral("a"), Okular::FromBottom, Qt::CaseSensitive, nullptr);
0197     QVERIFY(result);
0198     delete result;
0199 
0200     delete page;
0201 }
0202 
0203 void SearchTest::test323263()
0204 {
0205     QVector<QString> text;
0206     text << QStringLiteral("a") << QStringLiteral("a") << QStringLiteral("b");
0207 
0208     QVector<Okular::NormalizedRect> rect;
0209     rect << Okular::NormalizedRect(0, 0, 1, 1) << Okular::NormalizedRect(1, 0, 2, 1) << Okular::NormalizedRect(2, 0, 3, 1);
0210 
0211     CREATE_PAGE;
0212 
0213     Okular::RegularAreaRect *result = tp->findText(0, QStringLiteral("ab"), Okular::FromTop, Qt::CaseSensitive, nullptr);
0214     QVERIFY(result);
0215     Okular::RegularAreaRect expected;
0216     expected.append(rect[1]);
0217     expected.append(rect[2]);
0218     expected.simplify();
0219     QCOMPARE(*result, expected);
0220     delete result;
0221 
0222     delete page;
0223 }
0224 
0225 void SearchTest::test430243()
0226 {
0227     // 778 is COMBINING RING ABOVE
0228     // 197 is LATIN CAPITAL LETTER A WITH RING ABOVE
0229     QVector<QString> text;
0230     text << QStringLiteral("A") << QString(QChar(778));
0231 
0232     QVector<Okular::NormalizedRect> rect;
0233     rect << Okular::NormalizedRect(0, 0, 1, 1) << Okular::NormalizedRect(1, 0, 2, 1);
0234 
0235     CREATE_PAGE;
0236 
0237     Okular::RegularAreaRect *result = tp->findText(0, QString(QChar(197)), Okular::FromTop, Qt::CaseSensitive, nullptr);
0238     QVERIFY(result);
0239     Okular::RegularAreaRect expected;
0240     expected.append(rect[0] | rect[1]);
0241     QCOMPARE(*result, expected);
0242     delete result;
0243 
0244     delete page;
0245 }
0246 
0247 void SearchTest::testDottedI()
0248 {
0249     // Earlier versions of okular had the bug that the letter "İ" (capital dotter i) did not match itself
0250     // in case-insensitive mode (this was caused by an unnecessary call of toLower() and the fact that
0251     // QString::fromUtf8("İ").compare(QString::fromUtf8("İ").toLower(), Qt::CaseInsensitive) == FALSE,
0252     // at least in Qt 4.8).
0253 
0254     // In the future it would be nice to add support for matching "İ"<->"i" and "I"<->"ı" in case-insensitive
0255     // mode as well (QString::compare does not match them, at least in non-Turkish locales, since it follows
0256     // the Unicode case-folding rules https://www.unicode.org/Public/6.2.0/ucd/CaseFolding.txt).
0257 
0258     QVector<QString> text;
0259     text << QStringLiteral("İ");
0260 
0261     QVector<Okular::NormalizedRect> rect;
0262     rect << Okular::NormalizedRect(1, 2, 3, 4);
0263 
0264     CREATE_PAGE;
0265 
0266     Okular::RegularAreaRect *result = tp->findText(0, QStringLiteral("İ"), Okular::FromTop, Qt::CaseInsensitive, nullptr);
0267     QVERIFY(result);
0268     delete result;
0269 
0270     delete page;
0271 }
0272 
0273 void SearchTest::testHyphenAtEndOfLineWithoutYOverlap()
0274 {
0275     QVector<QString> text;
0276     text << QStringLiteral("super-") << QStringLiteral("cali-\n") << QStringLiteral("fragilistic") << QStringLiteral("-") << QStringLiteral("expiali") << QStringLiteral("-\n") << QStringLiteral("docious");
0277 
0278     QVector<Okular::NormalizedRect> rect;
0279     rect << Okular::NormalizedRect(0.4, 0.0, 0.9, 0.1) << Okular::NormalizedRect(0.0, 0.1, 0.6, 0.2) << Okular::NormalizedRect(0.0, 0.2, 0.8, 0.3) << Okular::NormalizedRect(0.8, 0.2, 0.9, 0.3) << Okular::NormalizedRect(0.0, 0.3, 0.8, 0.4)
0280          << Okular::NormalizedRect(0.8, 0.3, 0.9, 0.4) << Okular::NormalizedRect(0.0, 0.4, 0.7, 0.5);
0281 
0282     CREATE_PAGE;
0283 
0284     Okular::RegularAreaRect *result = tp->findText(0, QStringLiteral("supercalifragilisticexpialidocious"), Okular::FromTop, Qt::CaseSensitive, nullptr);
0285     QVERIFY(result);
0286     Okular::RegularAreaRect expected;
0287     for (int i = 0; i < text.size(); i++) {
0288         expected.append(rect[i]);
0289     }
0290     expected.simplify();
0291     QCOMPARE(*result, expected);
0292     delete result;
0293 
0294     result = tp->findText(0, QStringLiteral("supercalifragilisticexpialidocious"), Okular::FromBottom, Qt::CaseSensitive, nullptr);
0295     QVERIFY(result);
0296     QCOMPARE(*result, expected);
0297     delete result;
0298 
0299     // If the user is looking for the text explicitly with the hyphen also find it
0300     result = tp->findText(0, QStringLiteral("super-cali-fragilistic"), Okular::FromTop, Qt::CaseSensitive, nullptr);
0301     QVERIFY(result);
0302     delete result;
0303 
0304     // If the user is looking for the text explicitly with the hyphen also find it
0305     result = tp->findText(0, QStringLiteral("super-cali-fragilistic"), Okular::FromBottom, Qt::CaseSensitive, nullptr);
0306     QVERIFY(result);
0307     delete result;
0308 
0309     delete page;
0310 }
0311 
0312 #define CREATE_PAGE_AND_TEST_SEARCH(searchString, matchExpected)                                                                                                                                                                               \
0313     {                                                                                                                                                                                                                                          \
0314         CREATE_PAGE;                                                                                                                                                                                                                           \
0315                                                                                                                                                                                                                                                \
0316         Okular::RegularAreaRect *result = tp->findText(0, QStringLiteral(searchString), Okular::FromTop, Qt::CaseSensitive, NULL);                                                                                                             \
0317                                                                                                                                                                                                                                                \
0318         QCOMPARE(!!result, matchExpected);                                                                                                                                                                                                     \
0319                                                                                                                                                                                                                                                \
0320         delete result;                                                                                                                                                                                                                         \
0321         delete page;                                                                                                                                                                                                                           \
0322     }
0323 
0324 void SearchTest::testHyphenWithYOverlap()
0325 {
0326     QVector<QString> text;
0327     text << QStringLiteral("a-") << QStringLiteral("b");
0328 
0329     QVector<Okular::NormalizedRect> rect(2);
0330 
0331     // different lines (50% y-coordinate overlap), first rectangle has larger height
0332     rect[0] = Okular::NormalizedRect(0.0, 0.0, 0.9, 0.35);
0333     rect[1] = Okular::NormalizedRect(0.0, 0.3, 0.2, 0.4);
0334     CREATE_PAGE_AND_TEST_SEARCH("ab", true);
0335 
0336     // different lines (50% y-coordinate overlap), second rectangle has larger height
0337     rect[0] = Okular::NormalizedRect(0.0, 0.0, 0.9, 0.1);
0338     rect[1] = Okular::NormalizedRect(0.0, 0.05, 0.2, 0.4);
0339     CREATE_PAGE_AND_TEST_SEARCH("ab", true);
0340 
0341     // same line (90% y-coordinate overlap), first rectangle has larger height
0342     rect[0] = Okular::NormalizedRect(0.0, 0.0, 0.4, 0.2);
0343     rect[1] = Okular::NormalizedRect(0.4, 0.11, 0.6, 0.21);
0344     CREATE_PAGE_AND_TEST_SEARCH("ab", false);
0345     CREATE_PAGE_AND_TEST_SEARCH("a-b", true);
0346 
0347     // same line (90% y-coordinate overlap), second rectangle has larger height
0348     rect[0] = Okular::NormalizedRect(0.0, 0.0, 0.4, 0.1);
0349     rect[1] = Okular::NormalizedRect(0.4, 0.01, 0.6, 0.2);
0350     CREATE_PAGE_AND_TEST_SEARCH("ab", false);
0351     CREATE_PAGE_AND_TEST_SEARCH("a-b", true);
0352 }
0353 
0354 void SearchTest::testHyphenAtEndOfPage()
0355 {
0356     // Tests for segmentation fault that would occur if
0357     // we tried look ahead (for determining whether the
0358     // next character is at the same line) at the end of the page.
0359 
0360     QVector<QString> text;
0361     text << QStringLiteral("a-");
0362 
0363     QVector<Okular::NormalizedRect> rect;
0364     rect << Okular::NormalizedRect(0, 0, 1, 1);
0365 
0366     CREATE_PAGE;
0367 
0368     {
0369         Okular::RegularAreaRect *result = tp->findText(0, QStringLiteral("a"), Okular::FromTop, Qt::CaseSensitive, nullptr);
0370         QVERIFY(result);
0371         delete result;
0372     }
0373 
0374     {
0375         Okular::RegularAreaRect *result = tp->findText(0, QStringLiteral("a"), Okular::FromBottom, Qt::CaseSensitive, nullptr);
0376         QVERIFY(result);
0377         delete result;
0378     }
0379 
0380     delete page;
0381 }
0382 
0383 void SearchTest::testOneColumn()
0384 {
0385     // Tests that the layout analysis algorithm does not create too many columns.
0386     // Bug 326207 was caused by the fact that if all the horizontal breaks in a line
0387     // had the same length and were smaller than vertical breaks between lines then
0388     // the horizontal breaks were treated as column separators.
0389     //(Note that "same length" means "same length after rounding rectangles to integer pixels".
0390     // The resolution used by the XY Cut algorithm with a square page is 1000 x 1000,
0391     // and the horizontal spaces in the example are 0.1, so they are indeed both exactly 100 pixels.)
0392 
0393     QVector<QString> text;
0394     text << QStringLiteral("Only") << QStringLiteral("one") << QStringLiteral("column") << QStringLiteral("here");
0395 
0396     // characters and line breaks have length 0.05, word breaks 0.1
0397     QVector<Okular::NormalizedRect> rect;
0398     rect << Okular::NormalizedRect(0.0, 0.0, 0.2, 0.1) << Okular::NormalizedRect(0.3, 0.0, 0.5, 0.1) << Okular::NormalizedRect(0.6, 0.0, 0.9, 0.1) << Okular::NormalizedRect(0.0, 0.15, 0.2, 0.25);
0399 
0400     CREATE_PAGE;
0401 
0402     Okular::RegularAreaRect *result = tp->findText(0, QStringLiteral("Only one column"), Okular::FromTop, Qt::CaseSensitive, nullptr);
0403     QVERIFY(result);
0404     delete result;
0405 
0406     delete page;
0407 }
0408 
0409 void SearchTest::testTwoColumns()
0410 {
0411     // Tests that the layout analysis algorithm can detect two columns.
0412 
0413     QVector<QString> text;
0414     text << QStringLiteral("This") << QStringLiteral("text") << QStringLiteral("in") << QStringLiteral("two") << QStringLiteral("is") << QStringLiteral("set") << QStringLiteral("columns.");
0415 
0416     // characters, word breaks and line breaks have length 0.05
0417     QVector<Okular::NormalizedRect> rect;
0418     rect << Okular::NormalizedRect(0.0, 0.0, 0.20, 0.1) << Okular::NormalizedRect(0.25, 0.0, 0.45, 0.1) << Okular::NormalizedRect(0.6, 0.0, 0.7, 0.1) << Okular::NormalizedRect(0.75, 0.0, 0.9, 0.1)
0419          << Okular::NormalizedRect(0.0, 0.15, 0.1, 0.25) << Okular::NormalizedRect(0.15, 0.15, 0.3, 0.25) << Okular::NormalizedRect(0.6, 0.15, 1.0, 0.25);
0420 
0421     CREATE_PAGE;
0422 
0423     Okular::RegularAreaRect *result = tp->findText(0, QStringLiteral("This text in"), Okular::FromTop, Qt::CaseSensitive, nullptr);
0424     QVERIFY(!result);
0425     delete result;
0426 
0427     delete page;
0428 }
0429 
0430 QTEST_MAIN(SearchTest)
0431 #include "searchtest.moc"