File indexing completed on 2024-04-21 03:51:48
0001 /* 0002 This file is part of the KDE Baloo Project 0003 SPDX-FileCopyrightText: 2013-2015 Vishesh Handa <vhanda@kde.org> 0004 0005 SPDX-License-Identifier: LGPL-2.1-only OR LGPL-3.0-only OR LicenseRef-KDE-Accepted-LGPL 0006 */ 0007 0008 #include "baloodebug.h" 0009 #include "searchstore.h" 0010 #include "global.h" 0011 0012 #include "database.h" 0013 #include "term.h" 0014 #include "transaction.h" 0015 #include "enginequery.h" 0016 #include "termgenerator.h" 0017 #include "andpostingiterator.h" 0018 #include "orpostingiterator.h" 0019 0020 #include <QDateTime> 0021 0022 #include <KFileMetaData/PropertyInfo> 0023 #include <KFileMetaData/TypeInfo> 0024 #include <KFileMetaData/Types> 0025 0026 #include <algorithm> 0027 #include <array> 0028 #include <tuple> 0029 0030 namespace Baloo { 0031 0032 namespace { 0033 QPair<quint32, quint32> calculateTimeRange(const QDateTime& dt, Term::Comparator com) 0034 { 0035 Q_ASSERT(dt.isValid()); 0036 0037 if (com == Term::Equal) { 0038 // Timestamps in DB are quint32 relative to Epoch (1970...2106) 0039 auto start = static_cast<quint32>(dt.date().startOfDay().toSecsSinceEpoch()); 0040 auto end = static_cast<quint32>(dt.date().endOfDay().toSecsSinceEpoch()); 0041 return {start, end}; 0042 } 0043 0044 quint32 timet = dt.toSecsSinceEpoch(); 0045 if (com == Term::LessEqual) { 0046 return {0, timet}; 0047 } 0048 if (com == Term::Less) { 0049 return {0, timet - 1}; 0050 } 0051 if (com == Term::GreaterEqual) { 0052 return {timet, std::numeric_limits<quint32>::max()}; 0053 } 0054 if (com == Term::Greater) { 0055 return {timet + 1, std::numeric_limits<quint32>::max()}; 0056 } 0057 0058 Q_ASSERT_X(0, __func__, "mtime query must contain a valid comparator"); 0059 return {0, 0}; 0060 } 0061 0062 struct InternalProperty { 0063 const char* propertyName; 0064 const char* prefix; 0065 QMetaType::Type valueType; 0066 }; 0067 constexpr std::array<InternalProperty, 7> internalProperties{{{"content", "", QMetaType::QString}, 0068 {"filename", "F", QMetaType::QString}, 0069 {"mimetype", "M", QMetaType::QString}, 0070 {"rating", "R", QMetaType::Int}, 0071 {"tag", "TAG-", QMetaType::QString}, 0072 {"tags", "TA", QMetaType::QString}, 0073 {"usercomment", "C", QMetaType::QString}}}; 0074 0075 std::pair<QByteArray, QMetaType::Type> propertyInfo(const QByteArray &property) 0076 { 0077 auto it = std::find_if(std::begin(internalProperties), std::end(internalProperties), 0078 [&property] (const InternalProperty& entry) { return property == entry.propertyName; }); 0079 if (it != std::end(internalProperties)) { 0080 return { (*it).prefix, (*it).valueType }; 0081 } else { 0082 KFileMetaData::PropertyInfo pi = KFileMetaData::PropertyInfo::fromName(QString::fromUtf8(property)); 0083 if (pi.property() == KFileMetaData::Property::Empty) { 0084 return {QByteArray(), QMetaType::UnknownType}; 0085 } 0086 int propPrefix = static_cast<int>(pi.property()); 0087 return {QByteArray('X' + QByteArray::number(propPrefix) + '-'), pi.valueType()}; 0088 } 0089 } 0090 0091 EngineQuery constructEqualsQuery(const QByteArray& prefix, const QString& value) 0092 { 0093 // We use the TermGenerator to normalize the words in the value and to 0094 // split it into other words. If we split the words, we then add them as a 0095 // phrase query. 0096 const QByteArrayList terms = TermGenerator::termList(value); 0097 0098 QVector<EngineQuery> queries; 0099 queries.reserve(terms.size()); 0100 for (const QByteArray& term : terms) { 0101 QByteArray arr = prefix + term; 0102 // FIXME - compatibility hack, to find truncated terms with old 0103 // DBs, remove on next DB bump 0104 if (arr.size() > 25) { 0105 queries << EngineQuery(arr.left(25), EngineQuery::StartsWith); 0106 } else { 0107 queries << EngineQuery(arr); 0108 } 0109 } 0110 0111 if (queries.isEmpty()) { 0112 return EngineQuery(); 0113 } else if (queries.size() == 1) { 0114 return queries.first(); 0115 } else { 0116 return EngineQuery(queries); 0117 } 0118 } 0119 0120 EngineQuery constructContainsQuery(const QByteArray& prefix, const QString& value) 0121 { 0122 auto query = constructEqualsQuery(prefix, value); 0123 if (query.op() == EngineQuery::Equal) { 0124 if (query.term().size() >= 3) { 0125 query.setOp(EngineQuery::StartsWith); 0126 } 0127 } 0128 return query; 0129 } 0130 0131 EngineQuery constructTypeQuery(const QString& value) 0132 { 0133 Q_ASSERT(!value.isEmpty()); 0134 0135 KFileMetaData::TypeInfo ti = KFileMetaData::TypeInfo::fromName(value); 0136 if (ti == KFileMetaData::Type::Empty) { 0137 qCDebug(BALOO) << "Type" << value << "does not exist"; 0138 return EngineQuery(); 0139 } 0140 int num = static_cast<int>(ti.type()); 0141 0142 return EngineQuery('T' + QByteArray::number(num)); 0143 } 0144 } // namespace 0145 0146 SearchStore::SearchStore() 0147 : m_db(nullptr) 0148 { 0149 m_db = globalDatabaseInstance(); 0150 if (!m_db->open(Database::ReadOnlyDatabase)) { 0151 m_db = nullptr; 0152 } 0153 } 0154 0155 SearchStore::~SearchStore() 0156 { 0157 } 0158 0159 // Return the result with-in [offset, offset + limit) 0160 ResultList SearchStore::exec(const Term& term, uint offset, int limit, bool sortResults) 0161 { 0162 if (!m_db || !m_db->isOpen()) { 0163 return ResultList(); 0164 } 0165 0166 Transaction tr(m_db, Transaction::ReadOnly); 0167 std::unique_ptr<PostingIterator> it(constructQuery(&tr, term)); 0168 if (!it) { 0169 return ResultList(); 0170 } 0171 0172 if (sortResults) { 0173 QVector<std::pair<quint64, quint32>> resultIds; 0174 while (it->next()) { 0175 quint64 id = it->docId(); 0176 quint32 mtime = tr.documentTimeInfo(id).mTime; 0177 resultIds << std::pair<quint64, quint32>{id, mtime}; 0178 0179 Q_ASSERT(id > 0); 0180 } 0181 0182 // Not enough results within range, no need to sort. 0183 if (offset >= static_cast<uint>(resultIds.size())) { 0184 return ResultList(); 0185 } 0186 0187 auto compFunc = [](const std::pair<quint64, quint32>& lhs, 0188 const std::pair<quint64, quint32>& rhs) { 0189 return lhs.second > rhs.second; 0190 }; 0191 0192 std::sort(resultIds.begin(), resultIds.end(), compFunc); 0193 if (limit < 0) { 0194 limit = resultIds.size(); 0195 } 0196 0197 ResultList results; 0198 const uint end = qMin(static_cast<uint>(resultIds.size()), offset + static_cast<uint>(limit)); 0199 results.reserve(end - offset); 0200 for (uint i = offset; i < end; i++) { 0201 const quint64 id = resultIds[i].first; 0202 Result res{tr.documentUrl(id), id}; 0203 0204 results.emplace_back(res); 0205 } 0206 0207 return results; 0208 } 0209 else { 0210 ResultList results; 0211 uint ulimit = limit < 0 ? UINT_MAX : limit; 0212 0213 while (offset && it->next()) { 0214 offset--; 0215 } 0216 0217 while (ulimit && it->next()) { 0218 const quint64 id = it->docId(); 0219 Q_ASSERT(id > 0); 0220 Result res{tr.documentUrl(id), id}; 0221 Q_ASSERT(!res.filePath.isEmpty()); 0222 0223 results.emplace_back(res); 0224 0225 ulimit--; 0226 } 0227 0228 return results; 0229 } 0230 } 0231 0232 PostingIterator* SearchStore::constructQuery(Transaction* tr, const Term& term) 0233 { 0234 Q_ASSERT(tr); 0235 0236 if (term.operation() == Term::And || term.operation() == Term::Or) { 0237 const QList<Term> subTerms = term.subTerms(); 0238 QVector<PostingIterator*> vec; 0239 vec.reserve(subTerms.size()); 0240 0241 for (const Term& t : subTerms) { 0242 auto iterator = constructQuery(tr, t); 0243 // constructQuery returns a nullptr to signal an empty list 0244 if (iterator) { 0245 vec << iterator; 0246 } else if (term.operation() == Term::And) { 0247 return nullptr; 0248 } 0249 } 0250 0251 if (vec.isEmpty()) { 0252 return nullptr; 0253 } else if (vec.size() == 1) { 0254 return vec.takeFirst(); 0255 } 0256 0257 if (term.operation() == Term::And) { 0258 return new AndPostingIterator(vec); 0259 } else { 0260 return new OrPostingIterator(vec); 0261 } 0262 } 0263 0264 if (term.value().isNull()) { 0265 return nullptr; 0266 } 0267 Q_ASSERT(term.value().isValid()); 0268 Q_ASSERT(term.comparator() != Term::Auto); 0269 Q_ASSERT(term.comparator() == Term::Contains ? term.value().typeId() == QMetaType::QString : true); 0270 0271 const QVariant value = term.value(); 0272 const QByteArray property = term.property().toLower().toUtf8(); 0273 0274 if (property == "type" || property == "kind") { 0275 EngineQuery q = constructTypeQuery(value.toString()); 0276 return tr->postingIterator(q); 0277 } 0278 else if (property == "includefolder") { 0279 const QByteArray folder = value.toString().toUtf8(); 0280 0281 if (folder.isEmpty()) { 0282 return nullptr; 0283 } 0284 if (!folder.startsWith('/')) { 0285 return nullptr; 0286 } 0287 0288 quint64 id = tr->documentId(folder); 0289 if (!id) { 0290 qCDebug(BALOO) << "Folder" << value.toString() << "not indexed"; 0291 return nullptr; 0292 } 0293 0294 return tr->docUrlIter(id); 0295 } 0296 else if (property == "modified" || property == "mtime") { 0297 if (value.typeId() == QMetaType::QByteArray) { 0298 // Used by Baloo::Query 0299 QByteArray ba = value.toByteArray(); 0300 Q_ASSERT(ba.size() >= 4); 0301 0302 int year = ba.mid(0, 4).toInt(); 0303 int month = ba.mid(4, 2).toInt(); 0304 int day = ba.mid(6, 2).toInt(); 0305 0306 Q_ASSERT(year); 0307 0308 // uses 0 to represent whole month or whole year 0309 month = month >= 0 && month <= 12 ? month : 0; 0310 day = day >= 0 && day <= 31 ? day : 0; 0311 0312 QDate startDate(year, month ? month : 1, day ? day : 1); 0313 QDate endDate(startDate); 0314 0315 if (month == 0) { 0316 endDate.setDate(endDate.year(), 12, 31); 0317 } else if (day == 0) { 0318 endDate.setDate(endDate.year(), endDate.month(), endDate.daysInMonth()); 0319 } 0320 0321 return tr->mTimeRangeIter(startDate.startOfDay().toSecsSinceEpoch(), endDate.endOfDay().toSecsSinceEpoch()); 0322 } else if (value.typeId() == QMetaType::QString) { 0323 const QDateTime dt = value.toDateTime(); 0324 QPair<quint32, quint32> timerange = calculateTimeRange(dt, term.comparator()); 0325 if ((timerange.first == 0) && (timerange.second == 0)) { 0326 return nullptr; 0327 } 0328 return tr->mTimeRangeIter(timerange.first, timerange.second); 0329 } else { 0330 Q_ASSERT_X(0, "SearchStore::constructQuery", "modified property must contain date/datetime values"); 0331 return nullptr; 0332 } 0333 } else if (property == "tag") { 0334 if (term.comparator() == Term::Equal) { 0335 const QByteArray prefix = "TAG-"; 0336 EngineQuery q = EngineQuery(prefix + value.toByteArray()); 0337 return tr->postingIterator(q); 0338 } else if (term.comparator() == Term::Contains) { 0339 const QByteArray prefix = "TA"; 0340 EngineQuery q = constructEqualsQuery(prefix, value.toString()); 0341 return tr->postingIterator(q); 0342 } else { 0343 Q_ASSERT(0); 0344 return nullptr; 0345 } 0346 } else if (property == "") { 0347 Term cterm(QStringLiteral("content"), term.value(), term.comparator()); 0348 Term fterm(QStringLiteral("filename"), term.value(), term.comparator()); 0349 return constructQuery(tr, Term{cterm, Term::Operation::Or, fterm}); 0350 } 0351 0352 QByteArray prefix; 0353 QMetaType::Type valueType = QMetaType::QString; 0354 if (!property.isEmpty()) { 0355 std::tie(prefix, valueType) = propertyInfo(property); 0356 if (valueType == QMetaType::UnknownType) { 0357 return nullptr; 0358 } 0359 } 0360 0361 auto com = term.comparator(); 0362 if (com == Term::Contains && valueType == QMetaType::Int) { 0363 com = Term::Equal; 0364 } 0365 if (com == Term::Contains) { 0366 EngineQuery q = constructContainsQuery(prefix, value.toString()); 0367 return tr->postingIterator(q); 0368 } 0369 0370 if (com == Term::Equal) { 0371 EngineQuery q = constructEqualsQuery(prefix, value.toString()); 0372 return tr->postingIterator(q); 0373 } 0374 0375 PostingDB::Comparator pcom; 0376 if (com == Term::Greater || com == Term::GreaterEqual) { 0377 pcom = PostingDB::GreaterEqual; 0378 } else if (com == Term::Less || com == Term::LessEqual) { 0379 pcom = PostingDB::LessEqual; 0380 } 0381 0382 // FIXME -- has to be kept in sync with the code from 0383 // Baloo::Result::add 0384 if (valueType == QMetaType::Int) { 0385 qlonglong intVal = value.toLongLong(); 0386 0387 if (term.comparator() == Term::Greater) { 0388 intVal++; 0389 } else if (term.comparator() == Term::Less) { 0390 intVal--; 0391 } 0392 0393 return tr->postingCompIterator(prefix, intVal, pcom); 0394 0395 } else if (valueType == QMetaType::Double) { 0396 double dVal = value.toDouble(); 0397 return tr->postingCompIterator(prefix, dVal, pcom); 0398 0399 } else if (valueType == QMetaType::QDateTime) { 0400 QDateTime dt = value.toDateTime(); 0401 const QByteArray ba = dt.toString(Qt::ISODate).toUtf8(); 0402 return tr->postingCompIterator(prefix, ba, pcom); 0403 0404 } else { 0405 qCDebug(BALOO) << "Comparison must be with an integer"; 0406 } 0407 0408 return nullptr; 0409 } 0410 0411 } // namespace Baloo