File indexing completed on 2023-09-24 04:01:54

0001 /*
0002     This file is part of the KDE Baloo project.
0003     SPDX-FileCopyrightText: 2015 Vishesh Handa <vhanda@kde.org>
0004 
0005     SPDX-License-Identifier: LGPL-2.1-or-later
0006 */
0007 
0008 #include "database.h"
0009 #include "transaction.h"
0010 #include "document.h"
0011 #include "termgenerator.h"
0012 #include "enginequery.h"
0013 #include "idutils.h"
0014 #include "query.h"
0015 
0016 #include <memory>
0017 #include <QTest>
0018 #include <QTemporaryDir>
0019 
0020 using namespace Baloo;
0021 
0022 class SortedIdVector : public QVector<quint64> {
0023     public:
0024         SortedIdVector(const QVector<quint64>& list)
0025         : QVector<quint64>(list) {
0026             std::sort(begin(), end());
0027         }
0028         SortedIdVector(std::initializer_list<quint64> args)
0029         : SortedIdVector(QVector<quint64>(args)) {}
0030 };
0031 
0032 char *toString(const QVector<quint64> &idlist)
0033 {
0034     QByteArray text("IDs[");
0035     text += QByteArray::number(idlist.size()) + "]:";
0036     for (auto id : idlist) {
0037         text += " " + QByteArray::number(id, 16);
0038     }
0039     return qstrdup(text.data());
0040 }
0041 
0042 namespace {
0043 QVector<quint64> execQuery(const Transaction& tr, const EngineQuery& query)
0044 {
0045     PostingIterator* it = tr.postingIterator(query);
0046     if (!it) {
0047         return {};
0048     }
0049 
0050     QVector<quint64> results;
0051     while (it->next()) {
0052         results << it->docId();
0053     }
0054     return results;
0055 }
0056 } // namespace
0057 
0058 class QueryTest : public QObject
0059 {
0060     Q_OBJECT
0061 private Q_SLOTS:
0062     void initTestCase() {
0063         dir = std::make_unique<QTemporaryDir>();
0064     }
0065 
0066     void init() {
0067         dbDir = std::make_unique<QTemporaryDir>();
0068         db = std::make_unique<Database>(dbDir->path());
0069         db->open(Database::CreateDatabase);
0070         setenv("BALOO_DB_PATH", dbDir->path().toStdString().c_str(), 1);
0071 
0072         m_parentId = filePathToId(QFile::encodeName(dir->path()));
0073         m_id1 = m_parentId + 1;
0074         m_id2 = m_parentId + 2;
0075         m_id3 = m_parentId + 3;
0076         m_id4 = m_parentId + 4;
0077         m_id5 = m_parentId + 5;
0078         m_id6 = m_parentId + 6;
0079         m_id7 = m_parentId + 7;
0080         m_id8 = m_parentId + 8;
0081 
0082         insertDocuments();
0083     }
0084 
0085     void cleanup() {
0086         db.reset();
0087         dbDir.reset();
0088     }
0089 
0090     void testTermEqual();
0091     void testTermStartsWith();
0092     void testTermAnd();
0093     void testTermOr();
0094     void testTermPhrase_data();
0095     void testTermPhrase();
0096 
0097     void testTagTermAnd_data();
0098     void testTagTermAnd();
0099     void testTagTermPhrase_data();
0100     void testTagTermPhrase();
0101 
0102     void testSearchstringParser();
0103     void testSearchstringParser_data();
0104 
0105 private:
0106     std::unique_ptr<QTemporaryDir> dir;
0107     std::unique_ptr<QTemporaryDir> dbDir;
0108     std::unique_ptr<Database> db;
0109     quint64 m_parentId = 0;
0110 
0111     void insertDocuments();
0112     void addDocument(Transaction* tr,const QString& text, quint64 id, const QString& fileName)
0113     {
0114         Document doc;
0115         QString url = dir->path() + QLatin1Char('/') + fileName;
0116         doc.setUrl(QFile::encodeName(url));
0117 
0118         TermGenerator tg(doc);
0119         tg.indexText(text);
0120         tg.indexFileNameText(fileName);
0121         doc.setId(id);
0122         doc.setParentId(m_parentId);
0123         doc.setMTime(1);
0124         doc.setCTime(2);
0125 
0126         tr->addDocument(doc);
0127     }
0128 
0129     void renameDocument(Transaction* tr, quint64 id, const QString& newName)
0130     {
0131         Document doc;
0132 
0133         TermGenerator tg(doc);
0134         tg.indexFileNameText(newName);
0135         doc.setId(id);
0136         doc.setParentId(m_parentId);
0137         doc.setUrl(QFile::encodeName(newName));
0138 
0139         tr->replaceDocument(doc, FileNameTerms | DocumentUrl);
0140     }
0141 
0142     void insertTagDocuments();
0143     void addTagDocument(Transaction* tr,const QStringList& tags, quint64 id, const QString& fileName)
0144     {
0145         Document doc;
0146         QString url = dir->path() + QLatin1Char('/') + fileName;
0147         doc.setUrl(QFile::encodeName(url));
0148 
0149         TermGenerator tg(doc);
0150         tg.indexText(QStringLiteral("text/plain"), QByteArray("M"));
0151         for (const QString& tag : tags) {
0152             tg.indexXattrText(tag, QByteArray("TA"));
0153         }
0154         tg.indexFileNameText(fileName);
0155         doc.setId(id);
0156         doc.setParentId(m_parentId);
0157         doc.setMTime(3);
0158         doc.setCTime(4);
0159 
0160         tr->addDocument(doc);
0161     }
0162 
0163     quint64 m_id1;
0164     quint64 m_id2;
0165     quint64 m_id3;
0166     quint64 m_id4;
0167     quint64 m_id5;
0168     quint64 m_id6;
0169     quint64 m_id7;
0170     quint64 m_id8;
0171 };
0172 
0173 
0174 void QueryTest::insertDocuments()
0175 {
0176     Transaction tr(db.get(), Transaction::ReadWrite);
0177     addDocument(&tr, QStringLiteral("The quick brown fox jumped over the crazy dog"), m_id1, QStringLiteral("file1.txt"));
0178     addDocument(&tr, QStringLiteral("The quick brown fox jumped over the lazy dog"), m_id7, QStringLiteral("file7_lazy"));
0179     addDocument(&tr, QStringLiteral("A quick brown fox ran around a easy dog"), m_id8, QStringLiteral("file8_dog"));
0180     addDocument(&tr, QStringLiteral("The night is dark and full of terror"), m_id2, QStringLiteral("file2"));
0181     addDocument(&tr, QStringLiteral("Don't feel sorry for yourself. Only assholes do that"), m_id3, QStringLiteral("file3"));
0182     addDocument(&tr, QStringLiteral("Only the dead stay 17 forever. crazy"), m_id4, QStringLiteral("file4"));
0183 
0184     renameDocument(&tr, m_id8, QStringLiteral("file8_easy"));
0185     tr.commit();
0186 }
0187 
0188 void QueryTest::insertTagDocuments()
0189 {
0190     Transaction tr(db.get(), Transaction::ReadWrite);
0191     addTagDocument(&tr, {
0192     QStringLiteral("One"),
0193     QStringLiteral("Two"),
0194     QStringLiteral("Three"),
0195     QStringLiteral("Four"),
0196     QStringLiteral("F1")
0197     }, m_id5, QStringLiteral("tagFile1"));
0198     addTagDocument(&tr, {
0199     QStringLiteral("One"),
0200     QStringLiteral("Two-Three"),
0201     QStringLiteral("Four"),
0202     QStringLiteral("F2")
0203     }, m_id6, QStringLiteral("tagFile2"));
0204     tr.commit();
0205 }
0206 
0207 void QueryTest::testTermEqual()
0208 {
0209     EngineQuery q("the");
0210 
0211     QVector<quint64> result = SortedIdVector{m_id1, m_id2, m_id4, m_id7};
0212     Transaction tr(db.get(), Transaction::ReadOnly);
0213     QCOMPARE(execQuery(tr, q), result);
0214 }
0215 
0216 void QueryTest::testTermStartsWith()
0217 {
0218     EngineQuery q("for", EngineQuery::StartsWith);
0219 
0220     QVector<quint64> result = SortedIdVector{m_id3, m_id4};
0221     Transaction tr(db.get(), Transaction::ReadOnly);
0222     QCOMPARE(execQuery(tr, q), result);
0223 }
0224 
0225 void QueryTest::testTermAnd()
0226 {
0227     QVector<EngineQuery> queries;
0228     queries << EngineQuery("for");
0229     queries << EngineQuery("sorry");
0230 
0231     EngineQuery q(queries, EngineQuery::And);
0232 
0233     QVector<quint64> result = {m_id3};
0234     Transaction tr(db.get(), Transaction::ReadOnly);
0235     QCOMPARE(execQuery(tr, q), result);
0236 }
0237 
0238 void QueryTest::testTermOr()
0239 {
0240     QVector<EngineQuery> queries;
0241     queries << EngineQuery("over");
0242     queries << EngineQuery("terror");
0243 
0244     EngineQuery q(queries, EngineQuery::Or);
0245 
0246     QVector<quint64> result = SortedIdVector{m_id1, m_id2, m_id7};
0247     Transaction tr(db.get(), Transaction::ReadOnly);
0248     QCOMPARE(execQuery(tr, q), result);
0249 }
0250 
0251 void QueryTest::testTermPhrase_data()
0252 {
0253     QTest::addColumn<QByteArrayList>("phrase");
0254     QTest::addColumn<QVector<quint64>>("contentMatches");
0255     QTest::addColumn<QVector<quint64>>("filenameMatches");
0256 
0257     auto addRow = [](const char* name, const QByteArrayList& phrase,
0258                      const QVector<quint64> contentMatches,
0259                      const QVector<quint64> filenameMatches)
0260         { QTest::addRow("%s", name) << phrase << contentMatches << filenameMatches;};
0261 
0262     // Content matches
0263     addRow("Crazy dog",        {QByteArrayLiteral("crazy"), QByteArrayLiteral("dog")},  SortedIdVector{ m_id1 }, {});
0264     addRow("Lazy dog",         {QByteArrayLiteral("lazy"),  QByteArrayLiteral("dog")},  SortedIdVector{ m_id7 }, {});
0265     addRow("Brown fox",        {QByteArrayLiteral("brown"), QByteArrayLiteral("fox")},  SortedIdVector{ m_id1, m_id7, m_id8 }, {});
0266     addRow("Dog",              {QByteArrayLiteral("dog")},                              SortedIdVector{ m_id1, m_id7, m_id8 }, {});
0267     // Filename matches
0268     addRow("Crazy dog file 1", {QByteArrayLiteral("file1")},                            {}, SortedIdVector{ m_id1 });
0269     addRow("Crazy dog file 2", {QByteArrayLiteral("file1"), QByteArrayLiteral("txt")},  {}, SortedIdVector{ m_id1 });
0270     addRow("Lazy dog file 1",  {QByteArrayLiteral("file7")},                            {}, SortedIdVector{ m_id7 });
0271     addRow("Lazy dog file 2",  {QByteArrayLiteral("file7"), QByteArrayLiteral("lazy")}, {}, SortedIdVector{ m_id7 });
0272     // Matches content and filename
0273     addRow("Lazy both",        {QByteArrayLiteral("lazy")},                             { m_id7 }, { m_id7 });
0274     addRow("Easy both",        {QByteArrayLiteral("easy")},                             { m_id8 }, { m_id8 });
0275 }
0276 
0277 void QueryTest::testTermPhrase()
0278 {
0279     QFETCH(QByteArrayList, phrase);
0280     QFETCH(QVector<quint64>, contentMatches);
0281     QFETCH(QVector<quint64>, filenameMatches);
0282 
0283     QVector<EngineQuery> queries;
0284     for (const QByteArray& term : phrase) {
0285         queries << EngineQuery(term);
0286     }
0287     EngineQuery q(queries, EngineQuery::Phrase);
0288 
0289     Transaction tr(db.get(), Transaction::ReadOnly);
0290     QCOMPARE(execQuery(tr, q), contentMatches);
0291 
0292     queries.clear();
0293     const QByteArray fPrefix = QByteArrayLiteral("F");
0294     for (QByteArray term : phrase) {
0295         term = fPrefix + term;
0296         queries << EngineQuery(term);
0297     }
0298     EngineQuery qf(queries, EngineQuery::Phrase);
0299     QCOMPARE(execQuery(tr, qf), filenameMatches);
0300 }
0301 
0302 void QueryTest::testTagTermAnd_data()
0303 {
0304     QTest::addColumn<QByteArrayList>("terms");
0305     QTest::addColumn<QVector<quint64>>("matchIds");
0306 
0307     QTest::addRow("Simple match") << QByteArrayList({"one", "four"})
0308         << QVector<quint64> { m_id5, m_id6 };
0309     QTest::addRow("Only one") << QByteArrayList({"one", "f1"})
0310         << QVector<quint64> { m_id5 };
0311     QTest::addRow("Also from phrase") << QByteArrayList({"two", "three"})
0312         << QVector<quint64> { m_id5, m_id6 };
0313 }
0314 
0315 void QueryTest::testTagTermAnd()
0316 {
0317     insertTagDocuments();
0318     QFETCH(QByteArrayList, terms);
0319     QFETCH(QVector<quint64>, matchIds);
0320 
0321     QByteArray prefix{"TA"};
0322     QVector<EngineQuery> queries;
0323     for (const QByteArray& term : terms) {
0324         queries << EngineQuery(prefix + term);
0325     }
0326 
0327     EngineQuery q(queries, EngineQuery::And);
0328 
0329     Transaction tr(db.get(), Transaction::ReadOnly);
0330     QCOMPARE(execQuery(tr, q), matchIds);
0331 }
0332 
0333 void QueryTest::testTagTermPhrase_data()
0334 {
0335     QTest::addColumn<QByteArrayList>("terms");
0336     QTest::addColumn<QVector<quint64>>("matchIds");
0337 
0338     QTest::addRow("Simple match") << QByteArrayList({"one"})
0339         << QVector<quint64> { m_id5, m_id6 };
0340     QTest::addRow("Apart") << QByteArrayList({"two", "four"})
0341         << QVector<quint64> { };
0342     QTest::addRow("Adjacent") << QByteArrayList({"three", "four"})
0343         << QVector<quint64> { };
0344     QTest::addRow("Only phrase") << QByteArrayList({"two", "three"})
0345         << QVector<quint64> { m_id6 };
0346 }
0347 
0348 void QueryTest::testTagTermPhrase()
0349 {
0350     insertTagDocuments();
0351     QFETCH(QByteArrayList, terms);
0352     QFETCH(QVector<quint64>, matchIds);
0353 
0354     QByteArray prefix{"TA"};
0355     QVector<EngineQuery> queries;
0356     for (const QByteArray& term : terms) {
0357         queries << EngineQuery(prefix + term);
0358     }
0359 
0360     EngineQuery q(queries, EngineQuery::Phrase);
0361 
0362     Transaction tr(db.get(), Transaction::ReadOnly);
0363     auto res = execQuery(tr, q);
0364     QCOMPARE(res, matchIds);
0365 }
0366 
0367 void QueryTest::testSearchstringParser()
0368 {
0369     QFETCH(QString, searchString);
0370     QFETCH(QStringList, expectedFiles);
0371 
0372     Query q;
0373     q.setSearchString(searchString);
0374 
0375     auto res = q.exec();
0376     QStringList matches;
0377     while (res.next()) {
0378         auto path = res.filePath();
0379         auto name = path.section(QLatin1Char('/'), -1, -1);
0380         matches.append(name);
0381     }
0382     QEXPECT_FAIL("Match '\"quick brown\" content:\"the dog\"'", "Broken quoting", Continue);
0383     QCOMPARE(matches, expectedFiles);
0384 }
0385 
0386 void QueryTest::testSearchstringParser_data()
0387 {
0388     QTest::addColumn<QString>("searchString");
0389     QTest::addColumn<QStringList>("expectedFiles");
0390 
0391     auto addRow = [](const QString& searchString,
0392                      const QStringList& filenameMatches)
0393     {
0394         QTest::addRow("Match '%s'", qPrintable(searchString)) << searchString << filenameMatches;
0395     };
0396 
0397     addRow(QStringLiteral("crazy"), { QStringLiteral("file1.txt"), QStringLiteral("file4") });
0398     addRow(QStringLiteral("content:crazy"), { QStringLiteral("file1.txt"), QStringLiteral("file4") });
0399     addRow(QStringLiteral("content:dog"), { QStringLiteral("file1.txt"), QStringLiteral("file7_lazy"), QStringLiteral("file8_easy") });
0400     addRow(QStringLiteral("filename:dog"), {});
0401     addRow(QStringLiteral("filename:easy"), { QStringLiteral("file8_easy") });
0402     addRow(QStringLiteral("content:for"), { QStringLiteral("file3"), QStringLiteral("file4") });
0403     addRow(QStringLiteral("content=for"), { QStringLiteral("file3") });
0404     addRow(QStringLiteral("content=\"over the\""), { QStringLiteral("file1.txt"), QStringLiteral("file7_lazy") });
0405     addRow(QStringLiteral("content=\"over the crazy dog\""), { QStringLiteral("file1.txt") });
0406     addRow(QStringLiteral("content=\"over the dog\""), {});
0407     addRow(QStringLiteral("quick AND crazy AND dog"), { QStringLiteral("file1.txt") });
0408     addRow(QStringLiteral("quick crazy dog"), { QStringLiteral("file1.txt") });
0409     addRow(QStringLiteral("\"quick brown\" dog"), { QStringLiteral("file1.txt"), QStringLiteral("file7_lazy"), QStringLiteral("file8_easy") });
0410     addRow(QStringLiteral("\"quick brown\" the dog"), { QStringLiteral("file1.txt"), QStringLiteral("file7_lazy") });
0411     addRow(QStringLiteral("\"quick brown\" content=\"the dog\""), {});
0412     addRow(QStringLiteral("\"quick brown\" content=\"'the dog'\""), {});
0413     addRow(QStringLiteral("\"quick brown\" content:\"the dog\""), {});
0414     addRow(QStringLiteral("\"quick brown\" content:\"'the dog'\""), {});
0415     addRow(QStringLiteral("\"quick brown\" \"the crazy dog\""), { QStringLiteral("file1.txt") });
0416     addRow(QStringLiteral("content=for OR filename:eas"), { QStringLiteral("file3"), QStringLiteral("file8_easy") });
0417 }
0418 
0419 QTEST_MAIN(QueryTest)
0420 
0421 #include "querytest.moc"