File indexing completed on 2024-04-14 03:49:36

0001 /*
0002     This file is part of the KDE Baloo project.
0003     SPDX-FileCopyrightText: 2015 Vishesh Handa <vhanda@kde.org>
0004 
0005     SPDX-License-Identifier: LGPL-2.1-or-later
0006 */
0007 
0008 #include "database.h"
0009 #include "transaction.h"
0010 #include "document.h"
0011 #include "termgenerator.h"
0012 #include "enginequery.h"
0013 #include "idutils.h"
0014 #include "query.h"
0015 
0016 #include <memory>
0017 #include <QTest>
0018 #include <QTemporaryDir>
0019 
0020 using namespace Baloo;
0021 
0022 class SortedIdVector : public QVector<quint64> {
0023     public:
0024         SortedIdVector(const QVector<quint64>& list)
0025         : QVector<quint64>(list) {
0026             std::sort(begin(), end());
0027         }
0028         SortedIdVector(std::initializer_list<quint64> args)
0029         : SortedIdVector(QVector<quint64>(args)) {}
0030 };
0031 
0032 char *toString(const QVector<quint64> &idlist)
0033 {
0034     QByteArray text("IDs[");
0035     text += QByteArray::number(idlist.size()) + "]:";
0036     for (auto id : idlist) {
0037         text += " " + QByteArray::number(id, 16);
0038     }
0039     return qstrdup(text.data());
0040 }
0041 
0042 namespace {
0043 QVector<quint64> execQuery(const Transaction& tr, const EngineQuery& query)
0044 {
0045     PostingIterator* it = tr.postingIterator(query);
0046     if (!it) {
0047         return {};
0048     }
0049 
0050     QVector<quint64> results;
0051     while (it->next()) {
0052         results << it->docId();
0053     }
0054     return results;
0055 }
0056 } // namespace
0057 
0058 class QueryTest : public QObject
0059 {
0060     Q_OBJECT
0061 private Q_SLOTS:
0062     void initTestCase() {
0063         dir = std::make_unique<QTemporaryDir>();
0064     }
0065 
0066     void init() {
0067         dbDir = std::make_unique<QTemporaryDir>();
0068         db = std::make_unique<Database>(dbDir->path());
0069         db->open(Database::CreateDatabase);
0070         setenv("BALOO_DB_PATH", dbDir->path().toStdString().c_str(), 1);
0071 
0072         m_parentId = filePathToId(QFile::encodeName(dir->path()));
0073         m_id1 = m_parentId + 1;
0074         m_id2 = m_parentId + 2;
0075         m_id3 = m_parentId + 3;
0076         m_id4 = m_parentId + 4;
0077         m_id5 = m_parentId + 5;
0078         m_id6 = m_parentId + 6;
0079         m_id7 = m_parentId + 7;
0080         m_id8 = m_parentId + 8;
0081         m_id9 = m_parentId + 9;
0082 
0083         insertDocuments();
0084         insertTagDocuments();
0085     }
0086 
0087     void cleanup() {
0088         db.reset();
0089         dbDir.reset();
0090     }
0091 
0092     void testTermEqual();
0093     void testTermStartsWith();
0094     void testTermPhrase_data();
0095     void testTermPhrase();
0096 
0097     void testTagTerm_data();
0098     void testTagTerm();
0099     void testTagTermPhrase_data();
0100     void testTagTermPhrase();
0101 
0102     void testSearchstringParser();
0103     void testSearchstringParser_data();
0104 
0105 private:
0106     std::unique_ptr<QTemporaryDir> dir;
0107     std::unique_ptr<QTemporaryDir> dbDir;
0108     std::unique_ptr<Database> db;
0109     quint64 m_parentId = 0;
0110 
0111     void insertDocuments();
0112     void addDocument(Transaction* tr,const QString& text, quint64 id, const QString& fileName)
0113     {
0114         Document doc;
0115         QString url = dir->path() + QLatin1Char('/') + fileName;
0116         doc.setUrl(QFile::encodeName(url));
0117 
0118         TermGenerator tg(doc);
0119         tg.indexText(text);
0120         tg.indexFileNameText(fileName);
0121         doc.setId(id);
0122         doc.setParentId(m_parentId);
0123         doc.setMTime(1);
0124         doc.setCTime(2);
0125 
0126         tr->addDocument(doc);
0127     }
0128 
0129     void renameDocument(Transaction* tr, quint64 id, const QString& newName)
0130     {
0131         Document doc;
0132 
0133         TermGenerator tg(doc);
0134         tg.indexFileNameText(newName);
0135         doc.setId(id);
0136         doc.setParentId(m_parentId);
0137         doc.setUrl(QFile::encodeName(newName));
0138 
0139         tr->replaceDocument(doc, FileNameTerms | DocumentUrl);
0140     }
0141 
0142     void insertTagDocuments();
0143     void addTagDocument(Transaction* tr,const QStringList& tags, quint64 id, const QString& fileName)
0144     {
0145         Document doc;
0146         QString url = dir->path() + QLatin1Char('/') + fileName;
0147         doc.setUrl(QFile::encodeName(url));
0148 
0149         TermGenerator tg(doc);
0150         tg.indexText(QStringLiteral("text/plain"), QByteArray("M"));
0151         for (const QString& tag : tags) {
0152             tg.indexXattrText(tag, QByteArray("TA"));
0153         }
0154         tg.indexFileNameText(fileName);
0155         doc.setId(id);
0156         doc.setParentId(m_parentId);
0157         doc.setMTime(3);
0158         doc.setCTime(4);
0159 
0160         tr->addDocument(doc);
0161     }
0162 
0163     quint64 m_id1;
0164     quint64 m_id2;
0165     quint64 m_id3;
0166     quint64 m_id4;
0167     quint64 m_id5;
0168     quint64 m_id6;
0169     quint64 m_id7;
0170     quint64 m_id8;
0171     quint64 m_id9;
0172 };
0173 
0174 void QueryTest::insertDocuments()
0175 {
0176     Transaction tr(db.get(), Transaction::ReadWrite);
0177     addDocument(&tr, QStringLiteral("The quick brown fox jumped over the crazy dog"), m_id1, QStringLiteral("file1.txt"));
0178     addDocument(&tr, QStringLiteral("The quick brown fox jumped over the lazy dog"), m_id7, QStringLiteral("file7_lazy"));
0179     addDocument(&tr, QStringLiteral("A quick brown fox ran around a easy dog"), m_id8, QStringLiteral("file8_dog"));
0180     addDocument(&tr, QStringLiteral("The night is dark and full of terror"), m_id2, QStringLiteral("file2"));
0181     addDocument(&tr, QStringLiteral("Don't feel sorry for yourself. Only assholes do that"), m_id3, QStringLiteral("file3"));
0182     addDocument(&tr, QStringLiteral("Only the dead stay 17 forever. crazy"), m_id4, QStringLiteral("file4"));
0183     addDocument(&tr, QStringLiteral("Some content with isolated dot . Test it"), m_id9, QStringLiteral("file - with hyphen.txt"));
0184 
0185     renameDocument(&tr, m_id8, QStringLiteral("file8_easy"));
0186     tr.commit();
0187 }
0188 
0189 void QueryTest::insertTagDocuments()
0190 {
0191     Transaction tr(db.get(), Transaction::ReadWrite);
0192     addTagDocument(&tr, {
0193     QStringLiteral("One"),
0194     QStringLiteral("Two"),
0195     QStringLiteral("Three"),
0196     QStringLiteral("Four"),
0197     QStringLiteral("F1")
0198     }, m_id5, QStringLiteral("tagFile1"));
0199     addTagDocument(&tr, {
0200     QStringLiteral("One"),
0201     QStringLiteral("Two-Three"),
0202     QStringLiteral("Four"),
0203     QStringLiteral("F2")
0204     }, m_id6, QStringLiteral("tagFile2"));
0205     tr.commit();
0206 }
0207 
0208 void QueryTest::testTermEqual()
0209 {
0210     EngineQuery q("the");
0211 
0212     QVector<quint64> result = SortedIdVector{m_id1, m_id2, m_id4, m_id7};
0213     Transaction tr(db.get(), Transaction::ReadOnly);
0214     QCOMPARE(execQuery(tr, q), result);
0215 }
0216 
0217 void QueryTest::testTermStartsWith()
0218 {
0219     EngineQuery q("for", EngineQuery::StartsWith);
0220 
0221     QVector<quint64> result = SortedIdVector{m_id3, m_id4};
0222     Transaction tr(db.get(), Transaction::ReadOnly);
0223     QCOMPARE(execQuery(tr, q), result);
0224 }
0225 
0226 void QueryTest::testTermPhrase_data()
0227 {
0228     QTest::addColumn<QByteArrayList>("phrase");
0229     QTest::addColumn<QVector<quint64>>("contentMatches");
0230     QTest::addColumn<QVector<quint64>>("filenameMatches");
0231 
0232     auto addRow = [](const char* name, const QByteArrayList& phrase,
0233                      const QVector<quint64> contentMatches,
0234                      const QVector<quint64> filenameMatches)
0235         { QTest::addRow("%s", name) << phrase << contentMatches << filenameMatches;};
0236 
0237     // Content matches
0238     addRow("Crazy dog",        {QByteArrayLiteral("crazy"), QByteArrayLiteral("dog")},  SortedIdVector{ m_id1 }, {});
0239     addRow("Lazy dog",         {QByteArrayLiteral("lazy"),  QByteArrayLiteral("dog")},  SortedIdVector{ m_id7 }, {});
0240     addRow("Brown fox",        {QByteArrayLiteral("brown"), QByteArrayLiteral("fox")},  SortedIdVector{ m_id1, m_id7, m_id8 }, {});
0241     addRow("Dog",              {QByteArrayLiteral("dog")},                              SortedIdVector{ m_id1, m_id7, m_id8 }, {});
0242     // Filename matches
0243     addRow("Crazy dog file 1", {QByteArrayLiteral("file1")},                            {}, SortedIdVector{ m_id1 });
0244     addRow("Crazy dog file 2", {QByteArrayLiteral("file1"), QByteArrayLiteral("txt")},  {}, SortedIdVector{ m_id1 });
0245     addRow("Lazy dog file 1",  {QByteArrayLiteral("file7")},                            {}, SortedIdVector{ m_id7 });
0246     addRow("Lazy dog file 2",  {QByteArrayLiteral("file7"), QByteArrayLiteral("lazy")}, {}, SortedIdVector{ m_id7 });
0247     // Matches content and filename
0248     addRow("Lazy both",        {QByteArrayLiteral("lazy")},                             { m_id7 }, { m_id7 });
0249     addRow("Easy both",        {QByteArrayLiteral("easy")},                             { m_id8 }, { m_id8 });
0250 }
0251 
0252 void QueryTest::testTermPhrase()
0253 {
0254     QFETCH(QByteArrayList, phrase);
0255     QFETCH(QVector<quint64>, contentMatches);
0256     QFETCH(QVector<quint64>, filenameMatches);
0257 
0258     QVector<EngineQuery> queries;
0259     for (const QByteArray& term : phrase) {
0260         queries << EngineQuery(term);
0261     }
0262     EngineQuery q(queries);
0263 
0264     Transaction tr(db.get(), Transaction::ReadOnly);
0265     QCOMPARE(execQuery(tr, q), contentMatches);
0266 
0267     queries.clear();
0268     const QByteArray fPrefix = QByteArrayLiteral("F");
0269     for (QByteArray term : phrase) {
0270         term = fPrefix + term;
0271         queries << EngineQuery(term);
0272     }
0273     EngineQuery qf(queries);
0274     QCOMPARE(execQuery(tr, qf), filenameMatches);
0275 }
0276 
0277 void QueryTest::testTagTerm_data()
0278 {
0279     QTest::addColumn<QByteArray>("term");
0280     QTest::addColumn<QVector<quint64>>("matchIds");
0281 
0282     QTest::addRow("Simple match") << QByteArray("one")
0283         << QVector<quint64> { m_id5, m_id6 };
0284     QTest::addRow("Only one") << QByteArray("f1")
0285         << QVector<quint64> { m_id5 };
0286     QTest::addRow("Also from phrase") << QByteArray("three")
0287         << QVector<quint64> { m_id5, m_id6 };
0288 }
0289 
0290 void QueryTest::testTagTerm()
0291 {
0292     QFETCH(QByteArray, term);
0293     QFETCH(QVector<quint64>, matchIds);
0294 
0295     QByteArray prefix{"TA"};
0296     EngineQuery q(prefix + term);
0297 
0298     Transaction tr(db.get(), Transaction::ReadOnly);
0299     QCOMPARE(execQuery(tr, q), matchIds);
0300 }
0301 
0302 void QueryTest::testTagTermPhrase_data()
0303 {
0304     QTest::addColumn<QByteArrayList>("terms");
0305     QTest::addColumn<QVector<quint64>>("matchIds");
0306 
0307     QTest::addRow("Simple match") << QByteArrayList({"one"})
0308         << QVector<quint64> { m_id5, m_id6 };
0309     QTest::addRow("Apart") << QByteArrayList({"two", "four"})
0310         << QVector<quint64> { };
0311     QTest::addRow("Adjacent") << QByteArrayList({"three", "four"})
0312         << QVector<quint64> { };
0313     QTest::addRow("Only phrase") << QByteArrayList({"two", "three"})
0314         << QVector<quint64> { m_id6 };
0315 }
0316 
0317 void QueryTest::testTagTermPhrase()
0318 {
0319     QFETCH(QByteArrayList, terms);
0320     QFETCH(QVector<quint64>, matchIds);
0321 
0322     QByteArray prefix{"TA"};
0323     QVector<EngineQuery> queries;
0324     for (const QByteArray& term : terms) {
0325         queries << EngineQuery(prefix + term);
0326     }
0327 
0328     EngineQuery q(queries);
0329 
0330     Transaction tr(db.get(), Transaction::ReadOnly);
0331     auto res = execQuery(tr, q);
0332     QCOMPARE(res, matchIds);
0333 }
0334 
0335 void QueryTest::testSearchstringParser()
0336 {
0337     QFETCH(QString, searchString);
0338     QFETCH(QStringList, expectedFiles);
0339 
0340     Query q;
0341     q.setSearchString(searchString);
0342 
0343     auto res = q.exec();
0344     QStringList matches;
0345     while (res.next()) {
0346         auto path = res.filePath();
0347         auto name = path.section(QLatin1Char('/'), -1, -1);
0348         matches.append(name);
0349     }
0350     QEXPECT_FAIL("Match 'dot . Test'", "Bug 407664: Tries to match isolated dot", Continue);
0351     QEXPECT_FAIL("Match 'file - with hyphen.txt'", "Bug 407664: Tries to match hyphen", Continue);
0352     QCOMPARE(matches, expectedFiles);
0353 }
0354 
0355 void QueryTest::testSearchstringParser_data()
0356 {
0357     QTest::addColumn<QString>("searchString");
0358     QTest::addColumn<QStringList>("expectedFiles");
0359 
0360     auto addRow = [](const QString& searchString,
0361                      const QStringList& filenameMatches)
0362     {
0363         QTest::addRow("Match '%s'", qPrintable(searchString)) << searchString << filenameMatches;
0364     };
0365 
0366     addRow(QStringLiteral("crazy"), { QStringLiteral("file1.txt"), QStringLiteral("file4") });
0367     addRow(QStringLiteral("content:crazy"), { QStringLiteral("file1.txt"), QStringLiteral("file4") });
0368     addRow(QStringLiteral("content:dog"), { QStringLiteral("file1.txt"), QStringLiteral("file7_lazy"), QStringLiteral("file8_easy") });
0369     addRow(QStringLiteral("filename:dog"), {});
0370     addRow(QStringLiteral("filename:easy"), { QStringLiteral("file8_easy") });
0371     addRow(QStringLiteral("content:for"), { QStringLiteral("file3"), QStringLiteral("file4") });
0372     addRow(QStringLiteral("content=for"), { QStringLiteral("file3") });
0373     addRow(QStringLiteral("content=don't"), { QStringLiteral("file3") });
0374     addRow(QStringLiteral("content=yourself"), { QStringLiteral("file3") });
0375     addRow(QStringLiteral("content=\"over the\""), { QStringLiteral("file1.txt"), QStringLiteral("file7_lazy") });
0376     addRow(QStringLiteral("content=\"over the crazy dog\""), { QStringLiteral("file1.txt") });
0377     addRow(QStringLiteral("content=\"over the dog\""), {});
0378     addRow(QStringLiteral("quick AND crazy AND dog"), { QStringLiteral("file1.txt") });
0379     addRow(QStringLiteral("quick crazy dog"), { QStringLiteral("file1.txt") });
0380     addRow(QStringLiteral("\"quick brown\" dog"), { QStringLiteral("file1.txt"), QStringLiteral("file7_lazy"), QStringLiteral("file8_easy") });
0381     addRow(QStringLiteral("\"quick brown\" the dog"), { QStringLiteral("file1.txt"), QStringLiteral("file7_lazy") });
0382     addRow(QStringLiteral("\"quick brown\" content=\"the dog\""), {});
0383     addRow(QStringLiteral("\"quick brown\" content=\"'the dog'\""), {});
0384     addRow(QStringLiteral("\"quick brown\" content:\"the dog\""), {});
0385     addRow(QStringLiteral("\"quick brown\" content:\"'the dog'\""), {});
0386     addRow(QStringLiteral("\"quick brown\" \"the crazy dog\""), { QStringLiteral("file1.txt") });
0387     addRow(QStringLiteral("content=for OR filename:eas"), { QStringLiteral("file3"), QStringLiteral("file8_easy") });
0388     addRow(QStringLiteral("for sorry"), { QStringLiteral("file3") });
0389     addRow(QStringLiteral("over OR terror"), {  QStringLiteral("file1.txt"), QStringLiteral("file2"), QStringLiteral("file7_lazy") });
0390 
0391     addRow(QStringLiteral("tag:f1"), {  QStringLiteral("tagFile1") });
0392     addRow(QStringLiteral("tag:f2"), {  QStringLiteral("tagFile2") });
0393     addRow(QStringLiteral("tag:one"), {  QStringLiteral("tagFile1"), QStringLiteral("tagFile2") });
0394     addRow(QStringLiteral("tag:two"), {  QStringLiteral("tagFile1"), QStringLiteral("tagFile2") });
0395     addRow(QStringLiteral("tag:two AND tag:three"), {  QStringLiteral("tagFile1"), QStringLiteral("tagFile2") });
0396     addRow(QStringLiteral("tag:two-three"), { QStringLiteral("tagFile2") });
0397 
0398     addRow(QStringLiteral("filename:hyphen"), { QStringLiteral("file - with hyphen.txt") });
0399     addRow(QStringLiteral("file with hyphen.txt"), { QStringLiteral("file - with hyphen.txt") });
0400     addRow(QStringLiteral("file - with hyphen.txt"), { QStringLiteral("file - with hyphen.txt") });
0401     addRow(QStringLiteral("\"file - with hyphen.txt\""), { QStringLiteral("file - with hyphen.txt") });
0402     addRow(QStringLiteral("content:dot"), { QStringLiteral("file - with hyphen.txt") });
0403     addRow(QStringLiteral("dot . Test"), { QStringLiteral("file - with hyphen.txt") });
0404     addRow(QStringLiteral("\"dot . Test\""), { QStringLiteral("file - with hyphen.txt") });
0405 }
0406 
0407 QTEST_MAIN(QueryTest)
0408 
0409 #include "querytest.moc"