File indexing completed on 2024-04-14 03:49:36
0001 /* 0002 This file is part of the KDE Baloo project. 0003 SPDX-FileCopyrightText: 2015 Vishesh Handa <vhanda@kde.org> 0004 0005 SPDX-License-Identifier: LGPL-2.1-or-later 0006 */ 0007 0008 #include "database.h" 0009 #include "transaction.h" 0010 #include "document.h" 0011 #include "termgenerator.h" 0012 #include "enginequery.h" 0013 #include "idutils.h" 0014 #include "query.h" 0015 0016 #include <memory> 0017 #include <QTest> 0018 #include <QTemporaryDir> 0019 0020 using namespace Baloo; 0021 0022 class SortedIdVector : public QVector<quint64> { 0023 public: 0024 SortedIdVector(const QVector<quint64>& list) 0025 : QVector<quint64>(list) { 0026 std::sort(begin(), end()); 0027 } 0028 SortedIdVector(std::initializer_list<quint64> args) 0029 : SortedIdVector(QVector<quint64>(args)) {} 0030 }; 0031 0032 char *toString(const QVector<quint64> &idlist) 0033 { 0034 QByteArray text("IDs["); 0035 text += QByteArray::number(idlist.size()) + "]:"; 0036 for (auto id : idlist) { 0037 text += " " + QByteArray::number(id, 16); 0038 } 0039 return qstrdup(text.data()); 0040 } 0041 0042 namespace { 0043 QVector<quint64> execQuery(const Transaction& tr, const EngineQuery& query) 0044 { 0045 PostingIterator* it = tr.postingIterator(query); 0046 if (!it) { 0047 return {}; 0048 } 0049 0050 QVector<quint64> results; 0051 while (it->next()) { 0052 results << it->docId(); 0053 } 0054 return results; 0055 } 0056 } // namespace 0057 0058 class QueryTest : public QObject 0059 { 0060 Q_OBJECT 0061 private Q_SLOTS: 0062 void initTestCase() { 0063 dir = std::make_unique<QTemporaryDir>(); 0064 } 0065 0066 void init() { 0067 dbDir = std::make_unique<QTemporaryDir>(); 0068 db = std::make_unique<Database>(dbDir->path()); 0069 db->open(Database::CreateDatabase); 0070 setenv("BALOO_DB_PATH", dbDir->path().toStdString().c_str(), 1); 0071 0072 m_parentId = filePathToId(QFile::encodeName(dir->path())); 0073 m_id1 = m_parentId + 1; 0074 m_id2 = m_parentId + 2; 0075 m_id3 = m_parentId + 3; 0076 m_id4 = m_parentId + 4; 0077 m_id5 = m_parentId + 5; 0078 m_id6 = m_parentId + 6; 0079 m_id7 = m_parentId + 7; 0080 m_id8 = m_parentId + 8; 0081 m_id9 = m_parentId + 9; 0082 0083 insertDocuments(); 0084 insertTagDocuments(); 0085 } 0086 0087 void cleanup() { 0088 db.reset(); 0089 dbDir.reset(); 0090 } 0091 0092 void testTermEqual(); 0093 void testTermStartsWith(); 0094 void testTermPhrase_data(); 0095 void testTermPhrase(); 0096 0097 void testTagTerm_data(); 0098 void testTagTerm(); 0099 void testTagTermPhrase_data(); 0100 void testTagTermPhrase(); 0101 0102 void testSearchstringParser(); 0103 void testSearchstringParser_data(); 0104 0105 private: 0106 std::unique_ptr<QTemporaryDir> dir; 0107 std::unique_ptr<QTemporaryDir> dbDir; 0108 std::unique_ptr<Database> db; 0109 quint64 m_parentId = 0; 0110 0111 void insertDocuments(); 0112 void addDocument(Transaction* tr,const QString& text, quint64 id, const QString& fileName) 0113 { 0114 Document doc; 0115 QString url = dir->path() + QLatin1Char('/') + fileName; 0116 doc.setUrl(QFile::encodeName(url)); 0117 0118 TermGenerator tg(doc); 0119 tg.indexText(text); 0120 tg.indexFileNameText(fileName); 0121 doc.setId(id); 0122 doc.setParentId(m_parentId); 0123 doc.setMTime(1); 0124 doc.setCTime(2); 0125 0126 tr->addDocument(doc); 0127 } 0128 0129 void renameDocument(Transaction* tr, quint64 id, const QString& newName) 0130 { 0131 Document doc; 0132 0133 TermGenerator tg(doc); 0134 tg.indexFileNameText(newName); 0135 doc.setId(id); 0136 doc.setParentId(m_parentId); 0137 doc.setUrl(QFile::encodeName(newName)); 0138 0139 tr->replaceDocument(doc, FileNameTerms | DocumentUrl); 0140 } 0141 0142 void insertTagDocuments(); 0143 void addTagDocument(Transaction* tr,const QStringList& tags, quint64 id, const QString& fileName) 0144 { 0145 Document doc; 0146 QString url = dir->path() + QLatin1Char('/') + fileName; 0147 doc.setUrl(QFile::encodeName(url)); 0148 0149 TermGenerator tg(doc); 0150 tg.indexText(QStringLiteral("text/plain"), QByteArray("M")); 0151 for (const QString& tag : tags) { 0152 tg.indexXattrText(tag, QByteArray("TA")); 0153 } 0154 tg.indexFileNameText(fileName); 0155 doc.setId(id); 0156 doc.setParentId(m_parentId); 0157 doc.setMTime(3); 0158 doc.setCTime(4); 0159 0160 tr->addDocument(doc); 0161 } 0162 0163 quint64 m_id1; 0164 quint64 m_id2; 0165 quint64 m_id3; 0166 quint64 m_id4; 0167 quint64 m_id5; 0168 quint64 m_id6; 0169 quint64 m_id7; 0170 quint64 m_id8; 0171 quint64 m_id9; 0172 }; 0173 0174 void QueryTest::insertDocuments() 0175 { 0176 Transaction tr(db.get(), Transaction::ReadWrite); 0177 addDocument(&tr, QStringLiteral("The quick brown fox jumped over the crazy dog"), m_id1, QStringLiteral("file1.txt")); 0178 addDocument(&tr, QStringLiteral("The quick brown fox jumped over the lazy dog"), m_id7, QStringLiteral("file7_lazy")); 0179 addDocument(&tr, QStringLiteral("A quick brown fox ran around a easy dog"), m_id8, QStringLiteral("file8_dog")); 0180 addDocument(&tr, QStringLiteral("The night is dark and full of terror"), m_id2, QStringLiteral("file2")); 0181 addDocument(&tr, QStringLiteral("Don't feel sorry for yourself. Only assholes do that"), m_id3, QStringLiteral("file3")); 0182 addDocument(&tr, QStringLiteral("Only the dead stay 17 forever. crazy"), m_id4, QStringLiteral("file4")); 0183 addDocument(&tr, QStringLiteral("Some content with isolated dot . Test it"), m_id9, QStringLiteral("file - with hyphen.txt")); 0184 0185 renameDocument(&tr, m_id8, QStringLiteral("file8_easy")); 0186 tr.commit(); 0187 } 0188 0189 void QueryTest::insertTagDocuments() 0190 { 0191 Transaction tr(db.get(), Transaction::ReadWrite); 0192 addTagDocument(&tr, { 0193 QStringLiteral("One"), 0194 QStringLiteral("Two"), 0195 QStringLiteral("Three"), 0196 QStringLiteral("Four"), 0197 QStringLiteral("F1") 0198 }, m_id5, QStringLiteral("tagFile1")); 0199 addTagDocument(&tr, { 0200 QStringLiteral("One"), 0201 QStringLiteral("Two-Three"), 0202 QStringLiteral("Four"), 0203 QStringLiteral("F2") 0204 }, m_id6, QStringLiteral("tagFile2")); 0205 tr.commit(); 0206 } 0207 0208 void QueryTest::testTermEqual() 0209 { 0210 EngineQuery q("the"); 0211 0212 QVector<quint64> result = SortedIdVector{m_id1, m_id2, m_id4, m_id7}; 0213 Transaction tr(db.get(), Transaction::ReadOnly); 0214 QCOMPARE(execQuery(tr, q), result); 0215 } 0216 0217 void QueryTest::testTermStartsWith() 0218 { 0219 EngineQuery q("for", EngineQuery::StartsWith); 0220 0221 QVector<quint64> result = SortedIdVector{m_id3, m_id4}; 0222 Transaction tr(db.get(), Transaction::ReadOnly); 0223 QCOMPARE(execQuery(tr, q), result); 0224 } 0225 0226 void QueryTest::testTermPhrase_data() 0227 { 0228 QTest::addColumn<QByteArrayList>("phrase"); 0229 QTest::addColumn<QVector<quint64>>("contentMatches"); 0230 QTest::addColumn<QVector<quint64>>("filenameMatches"); 0231 0232 auto addRow = [](const char* name, const QByteArrayList& phrase, 0233 const QVector<quint64> contentMatches, 0234 const QVector<quint64> filenameMatches) 0235 { QTest::addRow("%s", name) << phrase << contentMatches << filenameMatches;}; 0236 0237 // Content matches 0238 addRow("Crazy dog", {QByteArrayLiteral("crazy"), QByteArrayLiteral("dog")}, SortedIdVector{ m_id1 }, {}); 0239 addRow("Lazy dog", {QByteArrayLiteral("lazy"), QByteArrayLiteral("dog")}, SortedIdVector{ m_id7 }, {}); 0240 addRow("Brown fox", {QByteArrayLiteral("brown"), QByteArrayLiteral("fox")}, SortedIdVector{ m_id1, m_id7, m_id8 }, {}); 0241 addRow("Dog", {QByteArrayLiteral("dog")}, SortedIdVector{ m_id1, m_id7, m_id8 }, {}); 0242 // Filename matches 0243 addRow("Crazy dog file 1", {QByteArrayLiteral("file1")}, {}, SortedIdVector{ m_id1 }); 0244 addRow("Crazy dog file 2", {QByteArrayLiteral("file1"), QByteArrayLiteral("txt")}, {}, SortedIdVector{ m_id1 }); 0245 addRow("Lazy dog file 1", {QByteArrayLiteral("file7")}, {}, SortedIdVector{ m_id7 }); 0246 addRow("Lazy dog file 2", {QByteArrayLiteral("file7"), QByteArrayLiteral("lazy")}, {}, SortedIdVector{ m_id7 }); 0247 // Matches content and filename 0248 addRow("Lazy both", {QByteArrayLiteral("lazy")}, { m_id7 }, { m_id7 }); 0249 addRow("Easy both", {QByteArrayLiteral("easy")}, { m_id8 }, { m_id8 }); 0250 } 0251 0252 void QueryTest::testTermPhrase() 0253 { 0254 QFETCH(QByteArrayList, phrase); 0255 QFETCH(QVector<quint64>, contentMatches); 0256 QFETCH(QVector<quint64>, filenameMatches); 0257 0258 QVector<EngineQuery> queries; 0259 for (const QByteArray& term : phrase) { 0260 queries << EngineQuery(term); 0261 } 0262 EngineQuery q(queries); 0263 0264 Transaction tr(db.get(), Transaction::ReadOnly); 0265 QCOMPARE(execQuery(tr, q), contentMatches); 0266 0267 queries.clear(); 0268 const QByteArray fPrefix = QByteArrayLiteral("F"); 0269 for (QByteArray term : phrase) { 0270 term = fPrefix + term; 0271 queries << EngineQuery(term); 0272 } 0273 EngineQuery qf(queries); 0274 QCOMPARE(execQuery(tr, qf), filenameMatches); 0275 } 0276 0277 void QueryTest::testTagTerm_data() 0278 { 0279 QTest::addColumn<QByteArray>("term"); 0280 QTest::addColumn<QVector<quint64>>("matchIds"); 0281 0282 QTest::addRow("Simple match") << QByteArray("one") 0283 << QVector<quint64> { m_id5, m_id6 }; 0284 QTest::addRow("Only one") << QByteArray("f1") 0285 << QVector<quint64> { m_id5 }; 0286 QTest::addRow("Also from phrase") << QByteArray("three") 0287 << QVector<quint64> { m_id5, m_id6 }; 0288 } 0289 0290 void QueryTest::testTagTerm() 0291 { 0292 QFETCH(QByteArray, term); 0293 QFETCH(QVector<quint64>, matchIds); 0294 0295 QByteArray prefix{"TA"}; 0296 EngineQuery q(prefix + term); 0297 0298 Transaction tr(db.get(), Transaction::ReadOnly); 0299 QCOMPARE(execQuery(tr, q), matchIds); 0300 } 0301 0302 void QueryTest::testTagTermPhrase_data() 0303 { 0304 QTest::addColumn<QByteArrayList>("terms"); 0305 QTest::addColumn<QVector<quint64>>("matchIds"); 0306 0307 QTest::addRow("Simple match") << QByteArrayList({"one"}) 0308 << QVector<quint64> { m_id5, m_id6 }; 0309 QTest::addRow("Apart") << QByteArrayList({"two", "four"}) 0310 << QVector<quint64> { }; 0311 QTest::addRow("Adjacent") << QByteArrayList({"three", "four"}) 0312 << QVector<quint64> { }; 0313 QTest::addRow("Only phrase") << QByteArrayList({"two", "three"}) 0314 << QVector<quint64> { m_id6 }; 0315 } 0316 0317 void QueryTest::testTagTermPhrase() 0318 { 0319 QFETCH(QByteArrayList, terms); 0320 QFETCH(QVector<quint64>, matchIds); 0321 0322 QByteArray prefix{"TA"}; 0323 QVector<EngineQuery> queries; 0324 for (const QByteArray& term : terms) { 0325 queries << EngineQuery(prefix + term); 0326 } 0327 0328 EngineQuery q(queries); 0329 0330 Transaction tr(db.get(), Transaction::ReadOnly); 0331 auto res = execQuery(tr, q); 0332 QCOMPARE(res, matchIds); 0333 } 0334 0335 void QueryTest::testSearchstringParser() 0336 { 0337 QFETCH(QString, searchString); 0338 QFETCH(QStringList, expectedFiles); 0339 0340 Query q; 0341 q.setSearchString(searchString); 0342 0343 auto res = q.exec(); 0344 QStringList matches; 0345 while (res.next()) { 0346 auto path = res.filePath(); 0347 auto name = path.section(QLatin1Char('/'), -1, -1); 0348 matches.append(name); 0349 } 0350 QEXPECT_FAIL("Match 'dot . Test'", "Bug 407664: Tries to match isolated dot", Continue); 0351 QEXPECT_FAIL("Match 'file - with hyphen.txt'", "Bug 407664: Tries to match hyphen", Continue); 0352 QCOMPARE(matches, expectedFiles); 0353 } 0354 0355 void QueryTest::testSearchstringParser_data() 0356 { 0357 QTest::addColumn<QString>("searchString"); 0358 QTest::addColumn<QStringList>("expectedFiles"); 0359 0360 auto addRow = [](const QString& searchString, 0361 const QStringList& filenameMatches) 0362 { 0363 QTest::addRow("Match '%s'", qPrintable(searchString)) << searchString << filenameMatches; 0364 }; 0365 0366 addRow(QStringLiteral("crazy"), { QStringLiteral("file1.txt"), QStringLiteral("file4") }); 0367 addRow(QStringLiteral("content:crazy"), { QStringLiteral("file1.txt"), QStringLiteral("file4") }); 0368 addRow(QStringLiteral("content:dog"), { QStringLiteral("file1.txt"), QStringLiteral("file7_lazy"), QStringLiteral("file8_easy") }); 0369 addRow(QStringLiteral("filename:dog"), {}); 0370 addRow(QStringLiteral("filename:easy"), { QStringLiteral("file8_easy") }); 0371 addRow(QStringLiteral("content:for"), { QStringLiteral("file3"), QStringLiteral("file4") }); 0372 addRow(QStringLiteral("content=for"), { QStringLiteral("file3") }); 0373 addRow(QStringLiteral("content=don't"), { QStringLiteral("file3") }); 0374 addRow(QStringLiteral("content=yourself"), { QStringLiteral("file3") }); 0375 addRow(QStringLiteral("content=\"over the\""), { QStringLiteral("file1.txt"), QStringLiteral("file7_lazy") }); 0376 addRow(QStringLiteral("content=\"over the crazy dog\""), { QStringLiteral("file1.txt") }); 0377 addRow(QStringLiteral("content=\"over the dog\""), {}); 0378 addRow(QStringLiteral("quick AND crazy AND dog"), { QStringLiteral("file1.txt") }); 0379 addRow(QStringLiteral("quick crazy dog"), { QStringLiteral("file1.txt") }); 0380 addRow(QStringLiteral("\"quick brown\" dog"), { QStringLiteral("file1.txt"), QStringLiteral("file7_lazy"), QStringLiteral("file8_easy") }); 0381 addRow(QStringLiteral("\"quick brown\" the dog"), { QStringLiteral("file1.txt"), QStringLiteral("file7_lazy") }); 0382 addRow(QStringLiteral("\"quick brown\" content=\"the dog\""), {}); 0383 addRow(QStringLiteral("\"quick brown\" content=\"'the dog'\""), {}); 0384 addRow(QStringLiteral("\"quick brown\" content:\"the dog\""), {}); 0385 addRow(QStringLiteral("\"quick brown\" content:\"'the dog'\""), {}); 0386 addRow(QStringLiteral("\"quick brown\" \"the crazy dog\""), { QStringLiteral("file1.txt") }); 0387 addRow(QStringLiteral("content=for OR filename:eas"), { QStringLiteral("file3"), QStringLiteral("file8_easy") }); 0388 addRow(QStringLiteral("for sorry"), { QStringLiteral("file3") }); 0389 addRow(QStringLiteral("over OR terror"), { QStringLiteral("file1.txt"), QStringLiteral("file2"), QStringLiteral("file7_lazy") }); 0390 0391 addRow(QStringLiteral("tag:f1"), { QStringLiteral("tagFile1") }); 0392 addRow(QStringLiteral("tag:f2"), { QStringLiteral("tagFile2") }); 0393 addRow(QStringLiteral("tag:one"), { QStringLiteral("tagFile1"), QStringLiteral("tagFile2") }); 0394 addRow(QStringLiteral("tag:two"), { QStringLiteral("tagFile1"), QStringLiteral("tagFile2") }); 0395 addRow(QStringLiteral("tag:two AND tag:three"), { QStringLiteral("tagFile1"), QStringLiteral("tagFile2") }); 0396 addRow(QStringLiteral("tag:two-three"), { QStringLiteral("tagFile2") }); 0397 0398 addRow(QStringLiteral("filename:hyphen"), { QStringLiteral("file - with hyphen.txt") }); 0399 addRow(QStringLiteral("file with hyphen.txt"), { QStringLiteral("file - with hyphen.txt") }); 0400 addRow(QStringLiteral("file - with hyphen.txt"), { QStringLiteral("file - with hyphen.txt") }); 0401 addRow(QStringLiteral("\"file - with hyphen.txt\""), { QStringLiteral("file - with hyphen.txt") }); 0402 addRow(QStringLiteral("content:dot"), { QStringLiteral("file - with hyphen.txt") }); 0403 addRow(QStringLiteral("dot . Test"), { QStringLiteral("file - with hyphen.txt") }); 0404 addRow(QStringLiteral("\"dot . Test\""), { QStringLiteral("file - with hyphen.txt") }); 0405 } 0406 0407 QTEST_MAIN(QueryTest) 0408 0409 #include "querytest.moc"