File indexing completed on 2023-09-24 04:01:54
0001 /* 0002 This file is part of the KDE Baloo project. 0003 SPDX-FileCopyrightText: 2015 Vishesh Handa <vhanda@kde.org> 0004 0005 SPDX-License-Identifier: LGPL-2.1-or-later 0006 */ 0007 0008 #include "database.h" 0009 #include "transaction.h" 0010 #include "document.h" 0011 #include "termgenerator.h" 0012 #include "enginequery.h" 0013 #include "idutils.h" 0014 #include "query.h" 0015 0016 #include <memory> 0017 #include <QTest> 0018 #include <QTemporaryDir> 0019 0020 using namespace Baloo; 0021 0022 class SortedIdVector : public QVector<quint64> { 0023 public: 0024 SortedIdVector(const QVector<quint64>& list) 0025 : QVector<quint64>(list) { 0026 std::sort(begin(), end()); 0027 } 0028 SortedIdVector(std::initializer_list<quint64> args) 0029 : SortedIdVector(QVector<quint64>(args)) {} 0030 }; 0031 0032 char *toString(const QVector<quint64> &idlist) 0033 { 0034 QByteArray text("IDs["); 0035 text += QByteArray::number(idlist.size()) + "]:"; 0036 for (auto id : idlist) { 0037 text += " " + QByteArray::number(id, 16); 0038 } 0039 return qstrdup(text.data()); 0040 } 0041 0042 namespace { 0043 QVector<quint64> execQuery(const Transaction& tr, const EngineQuery& query) 0044 { 0045 PostingIterator* it = tr.postingIterator(query); 0046 if (!it) { 0047 return {}; 0048 } 0049 0050 QVector<quint64> results; 0051 while (it->next()) { 0052 results << it->docId(); 0053 } 0054 return results; 0055 } 0056 } // namespace 0057 0058 class QueryTest : public QObject 0059 { 0060 Q_OBJECT 0061 private Q_SLOTS: 0062 void initTestCase() { 0063 dir = std::make_unique<QTemporaryDir>(); 0064 } 0065 0066 void init() { 0067 dbDir = std::make_unique<QTemporaryDir>(); 0068 db = std::make_unique<Database>(dbDir->path()); 0069 db->open(Database::CreateDatabase); 0070 setenv("BALOO_DB_PATH", dbDir->path().toStdString().c_str(), 1); 0071 0072 m_parentId = filePathToId(QFile::encodeName(dir->path())); 0073 m_id1 = m_parentId + 1; 0074 m_id2 = m_parentId + 2; 0075 m_id3 = m_parentId + 3; 0076 m_id4 = m_parentId + 4; 0077 m_id5 = m_parentId + 5; 0078 m_id6 = m_parentId + 6; 0079 m_id7 = m_parentId + 7; 0080 m_id8 = m_parentId + 8; 0081 0082 insertDocuments(); 0083 } 0084 0085 void cleanup() { 0086 db.reset(); 0087 dbDir.reset(); 0088 } 0089 0090 void testTermEqual(); 0091 void testTermStartsWith(); 0092 void testTermAnd(); 0093 void testTermOr(); 0094 void testTermPhrase_data(); 0095 void testTermPhrase(); 0096 0097 void testTagTermAnd_data(); 0098 void testTagTermAnd(); 0099 void testTagTermPhrase_data(); 0100 void testTagTermPhrase(); 0101 0102 void testSearchstringParser(); 0103 void testSearchstringParser_data(); 0104 0105 private: 0106 std::unique_ptr<QTemporaryDir> dir; 0107 std::unique_ptr<QTemporaryDir> dbDir; 0108 std::unique_ptr<Database> db; 0109 quint64 m_parentId = 0; 0110 0111 void insertDocuments(); 0112 void addDocument(Transaction* tr,const QString& text, quint64 id, const QString& fileName) 0113 { 0114 Document doc; 0115 QString url = dir->path() + QLatin1Char('/') + fileName; 0116 doc.setUrl(QFile::encodeName(url)); 0117 0118 TermGenerator tg(doc); 0119 tg.indexText(text); 0120 tg.indexFileNameText(fileName); 0121 doc.setId(id); 0122 doc.setParentId(m_parentId); 0123 doc.setMTime(1); 0124 doc.setCTime(2); 0125 0126 tr->addDocument(doc); 0127 } 0128 0129 void renameDocument(Transaction* tr, quint64 id, const QString& newName) 0130 { 0131 Document doc; 0132 0133 TermGenerator tg(doc); 0134 tg.indexFileNameText(newName); 0135 doc.setId(id); 0136 doc.setParentId(m_parentId); 0137 doc.setUrl(QFile::encodeName(newName)); 0138 0139 tr->replaceDocument(doc, FileNameTerms | DocumentUrl); 0140 } 0141 0142 void insertTagDocuments(); 0143 void addTagDocument(Transaction* tr,const QStringList& tags, quint64 id, const QString& fileName) 0144 { 0145 Document doc; 0146 QString url = dir->path() + QLatin1Char('/') + fileName; 0147 doc.setUrl(QFile::encodeName(url)); 0148 0149 TermGenerator tg(doc); 0150 tg.indexText(QStringLiteral("text/plain"), QByteArray("M")); 0151 for (const QString& tag : tags) { 0152 tg.indexXattrText(tag, QByteArray("TA")); 0153 } 0154 tg.indexFileNameText(fileName); 0155 doc.setId(id); 0156 doc.setParentId(m_parentId); 0157 doc.setMTime(3); 0158 doc.setCTime(4); 0159 0160 tr->addDocument(doc); 0161 } 0162 0163 quint64 m_id1; 0164 quint64 m_id2; 0165 quint64 m_id3; 0166 quint64 m_id4; 0167 quint64 m_id5; 0168 quint64 m_id6; 0169 quint64 m_id7; 0170 quint64 m_id8; 0171 }; 0172 0173 0174 void QueryTest::insertDocuments() 0175 { 0176 Transaction tr(db.get(), Transaction::ReadWrite); 0177 addDocument(&tr, QStringLiteral("The quick brown fox jumped over the crazy dog"), m_id1, QStringLiteral("file1.txt")); 0178 addDocument(&tr, QStringLiteral("The quick brown fox jumped over the lazy dog"), m_id7, QStringLiteral("file7_lazy")); 0179 addDocument(&tr, QStringLiteral("A quick brown fox ran around a easy dog"), m_id8, QStringLiteral("file8_dog")); 0180 addDocument(&tr, QStringLiteral("The night is dark and full of terror"), m_id2, QStringLiteral("file2")); 0181 addDocument(&tr, QStringLiteral("Don't feel sorry for yourself. Only assholes do that"), m_id3, QStringLiteral("file3")); 0182 addDocument(&tr, QStringLiteral("Only the dead stay 17 forever. crazy"), m_id4, QStringLiteral("file4")); 0183 0184 renameDocument(&tr, m_id8, QStringLiteral("file8_easy")); 0185 tr.commit(); 0186 } 0187 0188 void QueryTest::insertTagDocuments() 0189 { 0190 Transaction tr(db.get(), Transaction::ReadWrite); 0191 addTagDocument(&tr, { 0192 QStringLiteral("One"), 0193 QStringLiteral("Two"), 0194 QStringLiteral("Three"), 0195 QStringLiteral("Four"), 0196 QStringLiteral("F1") 0197 }, m_id5, QStringLiteral("tagFile1")); 0198 addTagDocument(&tr, { 0199 QStringLiteral("One"), 0200 QStringLiteral("Two-Three"), 0201 QStringLiteral("Four"), 0202 QStringLiteral("F2") 0203 }, m_id6, QStringLiteral("tagFile2")); 0204 tr.commit(); 0205 } 0206 0207 void QueryTest::testTermEqual() 0208 { 0209 EngineQuery q("the"); 0210 0211 QVector<quint64> result = SortedIdVector{m_id1, m_id2, m_id4, m_id7}; 0212 Transaction tr(db.get(), Transaction::ReadOnly); 0213 QCOMPARE(execQuery(tr, q), result); 0214 } 0215 0216 void QueryTest::testTermStartsWith() 0217 { 0218 EngineQuery q("for", EngineQuery::StartsWith); 0219 0220 QVector<quint64> result = SortedIdVector{m_id3, m_id4}; 0221 Transaction tr(db.get(), Transaction::ReadOnly); 0222 QCOMPARE(execQuery(tr, q), result); 0223 } 0224 0225 void QueryTest::testTermAnd() 0226 { 0227 QVector<EngineQuery> queries; 0228 queries << EngineQuery("for"); 0229 queries << EngineQuery("sorry"); 0230 0231 EngineQuery q(queries, EngineQuery::And); 0232 0233 QVector<quint64> result = {m_id3}; 0234 Transaction tr(db.get(), Transaction::ReadOnly); 0235 QCOMPARE(execQuery(tr, q), result); 0236 } 0237 0238 void QueryTest::testTermOr() 0239 { 0240 QVector<EngineQuery> queries; 0241 queries << EngineQuery("over"); 0242 queries << EngineQuery("terror"); 0243 0244 EngineQuery q(queries, EngineQuery::Or); 0245 0246 QVector<quint64> result = SortedIdVector{m_id1, m_id2, m_id7}; 0247 Transaction tr(db.get(), Transaction::ReadOnly); 0248 QCOMPARE(execQuery(tr, q), result); 0249 } 0250 0251 void QueryTest::testTermPhrase_data() 0252 { 0253 QTest::addColumn<QByteArrayList>("phrase"); 0254 QTest::addColumn<QVector<quint64>>("contentMatches"); 0255 QTest::addColumn<QVector<quint64>>("filenameMatches"); 0256 0257 auto addRow = [](const char* name, const QByteArrayList& phrase, 0258 const QVector<quint64> contentMatches, 0259 const QVector<quint64> filenameMatches) 0260 { QTest::addRow("%s", name) << phrase << contentMatches << filenameMatches;}; 0261 0262 // Content matches 0263 addRow("Crazy dog", {QByteArrayLiteral("crazy"), QByteArrayLiteral("dog")}, SortedIdVector{ m_id1 }, {}); 0264 addRow("Lazy dog", {QByteArrayLiteral("lazy"), QByteArrayLiteral("dog")}, SortedIdVector{ m_id7 }, {}); 0265 addRow("Brown fox", {QByteArrayLiteral("brown"), QByteArrayLiteral("fox")}, SortedIdVector{ m_id1, m_id7, m_id8 }, {}); 0266 addRow("Dog", {QByteArrayLiteral("dog")}, SortedIdVector{ m_id1, m_id7, m_id8 }, {}); 0267 // Filename matches 0268 addRow("Crazy dog file 1", {QByteArrayLiteral("file1")}, {}, SortedIdVector{ m_id1 }); 0269 addRow("Crazy dog file 2", {QByteArrayLiteral("file1"), QByteArrayLiteral("txt")}, {}, SortedIdVector{ m_id1 }); 0270 addRow("Lazy dog file 1", {QByteArrayLiteral("file7")}, {}, SortedIdVector{ m_id7 }); 0271 addRow("Lazy dog file 2", {QByteArrayLiteral("file7"), QByteArrayLiteral("lazy")}, {}, SortedIdVector{ m_id7 }); 0272 // Matches content and filename 0273 addRow("Lazy both", {QByteArrayLiteral("lazy")}, { m_id7 }, { m_id7 }); 0274 addRow("Easy both", {QByteArrayLiteral("easy")}, { m_id8 }, { m_id8 }); 0275 } 0276 0277 void QueryTest::testTermPhrase() 0278 { 0279 QFETCH(QByteArrayList, phrase); 0280 QFETCH(QVector<quint64>, contentMatches); 0281 QFETCH(QVector<quint64>, filenameMatches); 0282 0283 QVector<EngineQuery> queries; 0284 for (const QByteArray& term : phrase) { 0285 queries << EngineQuery(term); 0286 } 0287 EngineQuery q(queries, EngineQuery::Phrase); 0288 0289 Transaction tr(db.get(), Transaction::ReadOnly); 0290 QCOMPARE(execQuery(tr, q), contentMatches); 0291 0292 queries.clear(); 0293 const QByteArray fPrefix = QByteArrayLiteral("F"); 0294 for (QByteArray term : phrase) { 0295 term = fPrefix + term; 0296 queries << EngineQuery(term); 0297 } 0298 EngineQuery qf(queries, EngineQuery::Phrase); 0299 QCOMPARE(execQuery(tr, qf), filenameMatches); 0300 } 0301 0302 void QueryTest::testTagTermAnd_data() 0303 { 0304 QTest::addColumn<QByteArrayList>("terms"); 0305 QTest::addColumn<QVector<quint64>>("matchIds"); 0306 0307 QTest::addRow("Simple match") << QByteArrayList({"one", "four"}) 0308 << QVector<quint64> { m_id5, m_id6 }; 0309 QTest::addRow("Only one") << QByteArrayList({"one", "f1"}) 0310 << QVector<quint64> { m_id5 }; 0311 QTest::addRow("Also from phrase") << QByteArrayList({"two", "three"}) 0312 << QVector<quint64> { m_id5, m_id6 }; 0313 } 0314 0315 void QueryTest::testTagTermAnd() 0316 { 0317 insertTagDocuments(); 0318 QFETCH(QByteArrayList, terms); 0319 QFETCH(QVector<quint64>, matchIds); 0320 0321 QByteArray prefix{"TA"}; 0322 QVector<EngineQuery> queries; 0323 for (const QByteArray& term : terms) { 0324 queries << EngineQuery(prefix + term); 0325 } 0326 0327 EngineQuery q(queries, EngineQuery::And); 0328 0329 Transaction tr(db.get(), Transaction::ReadOnly); 0330 QCOMPARE(execQuery(tr, q), matchIds); 0331 } 0332 0333 void QueryTest::testTagTermPhrase_data() 0334 { 0335 QTest::addColumn<QByteArrayList>("terms"); 0336 QTest::addColumn<QVector<quint64>>("matchIds"); 0337 0338 QTest::addRow("Simple match") << QByteArrayList({"one"}) 0339 << QVector<quint64> { m_id5, m_id6 }; 0340 QTest::addRow("Apart") << QByteArrayList({"two", "four"}) 0341 << QVector<quint64> { }; 0342 QTest::addRow("Adjacent") << QByteArrayList({"three", "four"}) 0343 << QVector<quint64> { }; 0344 QTest::addRow("Only phrase") << QByteArrayList({"two", "three"}) 0345 << QVector<quint64> { m_id6 }; 0346 } 0347 0348 void QueryTest::testTagTermPhrase() 0349 { 0350 insertTagDocuments(); 0351 QFETCH(QByteArrayList, terms); 0352 QFETCH(QVector<quint64>, matchIds); 0353 0354 QByteArray prefix{"TA"}; 0355 QVector<EngineQuery> queries; 0356 for (const QByteArray& term : terms) { 0357 queries << EngineQuery(prefix + term); 0358 } 0359 0360 EngineQuery q(queries, EngineQuery::Phrase); 0361 0362 Transaction tr(db.get(), Transaction::ReadOnly); 0363 auto res = execQuery(tr, q); 0364 QCOMPARE(res, matchIds); 0365 } 0366 0367 void QueryTest::testSearchstringParser() 0368 { 0369 QFETCH(QString, searchString); 0370 QFETCH(QStringList, expectedFiles); 0371 0372 Query q; 0373 q.setSearchString(searchString); 0374 0375 auto res = q.exec(); 0376 QStringList matches; 0377 while (res.next()) { 0378 auto path = res.filePath(); 0379 auto name = path.section(QLatin1Char('/'), -1, -1); 0380 matches.append(name); 0381 } 0382 QEXPECT_FAIL("Match '\"quick brown\" content:\"the dog\"'", "Broken quoting", Continue); 0383 QCOMPARE(matches, expectedFiles); 0384 } 0385 0386 void QueryTest::testSearchstringParser_data() 0387 { 0388 QTest::addColumn<QString>("searchString"); 0389 QTest::addColumn<QStringList>("expectedFiles"); 0390 0391 auto addRow = [](const QString& searchString, 0392 const QStringList& filenameMatches) 0393 { 0394 QTest::addRow("Match '%s'", qPrintable(searchString)) << searchString << filenameMatches; 0395 }; 0396 0397 addRow(QStringLiteral("crazy"), { QStringLiteral("file1.txt"), QStringLiteral("file4") }); 0398 addRow(QStringLiteral("content:crazy"), { QStringLiteral("file1.txt"), QStringLiteral("file4") }); 0399 addRow(QStringLiteral("content:dog"), { QStringLiteral("file1.txt"), QStringLiteral("file7_lazy"), QStringLiteral("file8_easy") }); 0400 addRow(QStringLiteral("filename:dog"), {}); 0401 addRow(QStringLiteral("filename:easy"), { QStringLiteral("file8_easy") }); 0402 addRow(QStringLiteral("content:for"), { QStringLiteral("file3"), QStringLiteral("file4") }); 0403 addRow(QStringLiteral("content=for"), { QStringLiteral("file3") }); 0404 addRow(QStringLiteral("content=\"over the\""), { QStringLiteral("file1.txt"), QStringLiteral("file7_lazy") }); 0405 addRow(QStringLiteral("content=\"over the crazy dog\""), { QStringLiteral("file1.txt") }); 0406 addRow(QStringLiteral("content=\"over the dog\""), {}); 0407 addRow(QStringLiteral("quick AND crazy AND dog"), { QStringLiteral("file1.txt") }); 0408 addRow(QStringLiteral("quick crazy dog"), { QStringLiteral("file1.txt") }); 0409 addRow(QStringLiteral("\"quick brown\" dog"), { QStringLiteral("file1.txt"), QStringLiteral("file7_lazy"), QStringLiteral("file8_easy") }); 0410 addRow(QStringLiteral("\"quick brown\" the dog"), { QStringLiteral("file1.txt"), QStringLiteral("file7_lazy") }); 0411 addRow(QStringLiteral("\"quick brown\" content=\"the dog\""), {}); 0412 addRow(QStringLiteral("\"quick brown\" content=\"'the dog'\""), {}); 0413 addRow(QStringLiteral("\"quick brown\" content:\"the dog\""), {}); 0414 addRow(QStringLiteral("\"quick brown\" content:\"'the dog'\""), {}); 0415 addRow(QStringLiteral("\"quick brown\" \"the crazy dog\""), { QStringLiteral("file1.txt") }); 0416 addRow(QStringLiteral("content=for OR filename:eas"), { QStringLiteral("file3"), QStringLiteral("file8_easy") }); 0417 } 0418 0419 QTEST_MAIN(QueryTest) 0420 0421 #include "querytest.moc"