File indexing completed on 2024-04-28 15:17:40
0001 /* 0002 This file is part of the KDE Baloo Project 0003 SPDX-FileCopyrightText: 2013-2015 Vishesh Handa <vhanda@kde.org> 0004 0005 SPDX-License-Identifier: LGPL-2.1-only OR LGPL-3.0-only OR LicenseRef-KDE-Accepted-LGPL 0006 */ 0007 0008 #include "baloodebug.h" 0009 #include "searchstore.h" 0010 #include "global.h" 0011 0012 #include "database.h" 0013 #include "term.h" 0014 #include "transaction.h" 0015 #include "enginequery.h" 0016 #include "termgenerator.h" 0017 #include "andpostingiterator.h" 0018 #include "orpostingiterator.h" 0019 0020 #include <QDateTime> 0021 0022 #include <KFileMetaData/PropertyInfo> 0023 #include <KFileMetaData/TypeInfo> 0024 #include <KFileMetaData/Types> 0025 0026 #include <algorithm> 0027 #include <array> 0028 #include <tuple> 0029 0030 namespace Baloo { 0031 0032 namespace { 0033 QPair<quint32, quint32> calculateTimeRange(const QDateTime& dt, Term::Comparator com) 0034 { 0035 Q_ASSERT(dt.isValid()); 0036 0037 if (com == Term::Equal) { 0038 // Timestamps in DB are quint32 relative to Epoch (1970...2106) 0039 auto start = static_cast<quint32>(dt.date().startOfDay().toSecsSinceEpoch()); 0040 auto end = static_cast<quint32>(dt.date().endOfDay().toSecsSinceEpoch()); 0041 return {start, end}; 0042 } 0043 0044 quint32 timet = dt.toSecsSinceEpoch(); 0045 if (com == Term::LessEqual) { 0046 return {0, timet}; 0047 } 0048 if (com == Term::Less) { 0049 return {0, timet - 1}; 0050 } 0051 if (com == Term::GreaterEqual) { 0052 return {timet, std::numeric_limits<quint32>::max()}; 0053 } 0054 if (com == Term::Greater) { 0055 return {timet + 1, std::numeric_limits<quint32>::max()}; 0056 } 0057 0058 Q_ASSERT_X(0, __func__, "mtime query must contain a valid comparator"); 0059 return {0, 0}; 0060 } 0061 0062 struct InternalProperty { 0063 const char* propertyName; 0064 const char* prefix; 0065 QVariant::Type valueType; 0066 }; 0067 constexpr std::array<InternalProperty, 7> internalProperties {{ 0068 { "content", "", QVariant::String }, 0069 { "filename", "F", QVariant::String }, 0070 { "mimetype", "M", QVariant::String }, 0071 { "rating", "R", QVariant::Int }, 0072 { "tag", "TAG-", QVariant::String }, 0073 { "tags", "TA", QVariant::String }, 0074 { "usercomment", "C", QVariant::String } 0075 }}; 0076 0077 std::pair<QByteArray, QVariant::Type> propertyInfo(const QByteArray& property) 0078 { 0079 auto it = std::find_if(std::begin(internalProperties), std::end(internalProperties), 0080 [&property] (const InternalProperty& entry) { return property == entry.propertyName; }); 0081 if (it != std::end(internalProperties)) { 0082 return { (*it).prefix, (*it).valueType }; 0083 } else { 0084 KFileMetaData::PropertyInfo pi = KFileMetaData::PropertyInfo::fromName(QString::fromUtf8(property)); 0085 if (pi.property() == KFileMetaData::Property::Empty) { 0086 return { QByteArray(), QVariant::Invalid }; 0087 } 0088 int propPrefix = static_cast<int>(pi.property()); 0089 return { 'X' + QByteArray::number(propPrefix) + '-', pi.valueType() }; 0090 } 0091 } 0092 0093 EngineQuery constructEqualsQuery(const QByteArray& prefix, const QString& value) 0094 { 0095 // We use the TermGenerator to normalize the words in the value and to 0096 // split it into other words. If we split the words, we then add them as a 0097 // phrase query. 0098 const QByteArrayList terms = TermGenerator::termList(value); 0099 0100 QVector<EngineQuery> queries; 0101 queries.reserve(terms.size()); 0102 for (const QByteArray& term : terms) { 0103 QByteArray arr = prefix + term; 0104 // FIXME - compatibility hack, to find truncated terms with old 0105 // DBs, remove on next DB bump 0106 if (arr.size() > 25) { 0107 queries << EngineQuery(arr.left(25), EngineQuery::StartsWith); 0108 } else { 0109 queries << EngineQuery(arr); 0110 } 0111 } 0112 0113 if (queries.isEmpty()) { 0114 return EngineQuery(); 0115 } else if (queries.size() == 1) { 0116 return queries.first(); 0117 } else { 0118 return EngineQuery(queries); 0119 } 0120 } 0121 0122 EngineQuery constructContainsQuery(const QByteArray& prefix, const QString& value) 0123 { 0124 auto query = constructEqualsQuery(prefix, value); 0125 if (query.op() == EngineQuery::Equal) { 0126 if (query.term().size() >= 3) { 0127 query.setOp(EngineQuery::StartsWith); 0128 } 0129 } 0130 return query; 0131 } 0132 0133 EngineQuery constructTypeQuery(const QString& value) 0134 { 0135 Q_ASSERT(!value.isEmpty()); 0136 0137 KFileMetaData::TypeInfo ti = KFileMetaData::TypeInfo::fromName(value); 0138 if (ti == KFileMetaData::Type::Empty) { 0139 qCDebug(BALOO) << "Type" << value << "does not exist"; 0140 return EngineQuery(); 0141 } 0142 int num = static_cast<int>(ti.type()); 0143 0144 return EngineQuery('T' + QByteArray::number(num)); 0145 } 0146 } // namespace 0147 0148 SearchStore::SearchStore() 0149 : m_db(nullptr) 0150 { 0151 m_db = globalDatabaseInstance(); 0152 if (!m_db->open(Database::ReadOnlyDatabase)) { 0153 m_db = nullptr; 0154 } 0155 } 0156 0157 SearchStore::~SearchStore() 0158 { 0159 } 0160 0161 // Return the result with-in [offset, offset + limit) 0162 ResultList SearchStore::exec(const Term& term, uint offset, int limit, bool sortResults) 0163 { 0164 if (!m_db || !m_db->isOpen()) { 0165 return ResultList(); 0166 } 0167 0168 Transaction tr(m_db, Transaction::ReadOnly); 0169 std::unique_ptr<PostingIterator> it(constructQuery(&tr, term)); 0170 if (!it) { 0171 return ResultList(); 0172 } 0173 0174 if (sortResults) { 0175 QVector<std::pair<quint64, quint32>> resultIds; 0176 while (it->next()) { 0177 quint64 id = it->docId(); 0178 quint32 mtime = tr.documentTimeInfo(id).mTime; 0179 resultIds << std::pair<quint64, quint32>{id, mtime}; 0180 0181 Q_ASSERT(id > 0); 0182 } 0183 0184 // Not enough results within range, no need to sort. 0185 if (offset >= static_cast<uint>(resultIds.size())) { 0186 return ResultList(); 0187 } 0188 0189 auto compFunc = [](const std::pair<quint64, quint32>& lhs, 0190 const std::pair<quint64, quint32>& rhs) { 0191 return lhs.second > rhs.second; 0192 }; 0193 0194 std::sort(resultIds.begin(), resultIds.end(), compFunc); 0195 if (limit < 0) { 0196 limit = resultIds.size(); 0197 } 0198 0199 ResultList results; 0200 const uint end = qMin(static_cast<uint>(resultIds.size()), offset + static_cast<uint>(limit)); 0201 results.reserve(end - offset); 0202 for (uint i = offset; i < end; i++) { 0203 const quint64 id = resultIds[i].first; 0204 Result res{tr.documentUrl(id), id}; 0205 0206 results.emplace_back(res); 0207 } 0208 0209 return results; 0210 } 0211 else { 0212 ResultList results; 0213 uint ulimit = limit < 0 ? UINT_MAX : limit; 0214 0215 while (offset && it->next()) { 0216 offset--; 0217 } 0218 0219 while (ulimit && it->next()) { 0220 const quint64 id = it->docId(); 0221 Q_ASSERT(id > 0); 0222 Result res{tr.documentUrl(id), id}; 0223 Q_ASSERT(!res.filePath.isEmpty()); 0224 0225 results.emplace_back(res); 0226 0227 ulimit--; 0228 } 0229 0230 return results; 0231 } 0232 } 0233 0234 PostingIterator* SearchStore::constructQuery(Transaction* tr, const Term& term) 0235 { 0236 Q_ASSERT(tr); 0237 0238 if (term.operation() == Term::And || term.operation() == Term::Or) { 0239 const QList<Term> subTerms = term.subTerms(); 0240 QVector<PostingIterator*> vec; 0241 vec.reserve(subTerms.size()); 0242 0243 for (const Term& t : subTerms) { 0244 auto iterator = constructQuery(tr, t); 0245 // constructQuery returns a nullptr to signal an empty list 0246 if (iterator) { 0247 vec << iterator; 0248 } else if (term.operation() == Term::And) { 0249 return nullptr; 0250 } 0251 } 0252 0253 if (vec.isEmpty()) { 0254 return nullptr; 0255 } else if (vec.size() == 1) { 0256 return vec.takeFirst(); 0257 } 0258 0259 if (term.operation() == Term::And) { 0260 return new AndPostingIterator(vec); 0261 } else { 0262 return new OrPostingIterator(vec); 0263 } 0264 } 0265 0266 if (term.value().isNull()) { 0267 return nullptr; 0268 } 0269 Q_ASSERT(term.value().isValid()); 0270 Q_ASSERT(term.comparator() != Term::Auto); 0271 Q_ASSERT(term.comparator() == Term::Contains ? term.value().type() == QVariant::String : true); 0272 0273 const QVariant value = term.value(); 0274 const QByteArray property = term.property().toLower().toUtf8(); 0275 0276 if (property == "type" || property == "kind") { 0277 EngineQuery q = constructTypeQuery(value.toString()); 0278 return tr->postingIterator(q); 0279 } 0280 else if (property == "includefolder") { 0281 const QByteArray folder = value.toString().toUtf8(); 0282 0283 if (folder.isEmpty()) { 0284 return nullptr; 0285 } 0286 if (!folder.startsWith('/')) { 0287 return nullptr; 0288 } 0289 0290 quint64 id = tr->documentId(folder); 0291 if (!id) { 0292 qCDebug(BALOO) << "Folder" << value.toString() << "not indexed"; 0293 return nullptr; 0294 } 0295 0296 return tr->docUrlIter(id); 0297 } 0298 else if (property == "modified" || property == "mtime") { 0299 if (value.type() == QVariant::ByteArray) { 0300 // Used by Baloo::Query 0301 QByteArray ba = value.toByteArray(); 0302 Q_ASSERT(ba.size() >= 4); 0303 0304 int year = ba.mid(0, 4).toInt(); 0305 int month = ba.mid(4, 2).toInt(); 0306 int day = ba.mid(6, 2).toInt(); 0307 0308 Q_ASSERT(year); 0309 0310 // uses 0 to represent whole month or whole year 0311 month = month >= 0 && month <= 12 ? month : 0; 0312 day = day >= 0 && day <= 31 ? day : 0; 0313 0314 QDate startDate(year, month ? month : 1, day ? day : 1); 0315 QDate endDate(startDate); 0316 0317 if (month == 0) { 0318 endDate.setDate(endDate.year(), 12, 31); 0319 } else if (day == 0) { 0320 endDate.setDate(endDate.year(), endDate.month(), endDate.daysInMonth()); 0321 } 0322 0323 return tr->mTimeRangeIter(startDate.startOfDay().toSecsSinceEpoch(), endDate.endOfDay().toSecsSinceEpoch()); 0324 } 0325 else if (value.type() == QVariant::String) { 0326 const QDateTime dt = value.toDateTime(); 0327 QPair<quint32, quint32> timerange = calculateTimeRange(dt, term.comparator()); 0328 if ((timerange.first == 0) && (timerange.second == 0)) { 0329 return nullptr; 0330 } 0331 return tr->mTimeRangeIter(timerange.first, timerange.second); 0332 } 0333 else { 0334 Q_ASSERT_X(0, "SearchStore::constructQuery", "modified property must contain date/datetime values"); 0335 return nullptr; 0336 } 0337 } else if (property == "tag") { 0338 if (term.comparator() == Term::Equal) { 0339 const QByteArray prefix = "TAG-"; 0340 EngineQuery q = EngineQuery(prefix + value.toByteArray()); 0341 return tr->postingIterator(q); 0342 } else if (term.comparator() == Term::Contains) { 0343 const QByteArray prefix = "TA"; 0344 EngineQuery q = constructEqualsQuery(prefix, value.toString()); 0345 return tr->postingIterator(q); 0346 } else { 0347 Q_ASSERT(0); 0348 return nullptr; 0349 } 0350 } else if (property == "") { 0351 Term cterm(QStringLiteral("content"), term.value(), term.comparator()); 0352 Term fterm(QStringLiteral("filename"), term.value(), term.comparator()); 0353 return constructQuery(tr, Term{cterm, Term::Operation::Or, fterm}); 0354 } 0355 0356 QByteArray prefix; 0357 QVariant::Type valueType = QVariant::String; 0358 if (!property.isEmpty()) { 0359 std::tie(prefix, valueType) = propertyInfo(property); 0360 if (valueType == QVariant::Invalid) { 0361 return nullptr; 0362 } 0363 } 0364 0365 auto com = term.comparator(); 0366 if (com == Term::Contains && valueType == QVariant::Int) { 0367 com = Term::Equal; 0368 } 0369 if (com == Term::Contains) { 0370 EngineQuery q = constructContainsQuery(prefix, value.toString()); 0371 return tr->postingIterator(q); 0372 } 0373 0374 if (com == Term::Equal) { 0375 EngineQuery q = constructEqualsQuery(prefix, value.toString()); 0376 return tr->postingIterator(q); 0377 } 0378 0379 PostingDB::Comparator pcom; 0380 if (com == Term::Greater || com == Term::GreaterEqual) { 0381 pcom = PostingDB::GreaterEqual; 0382 } else if (com == Term::Less || com == Term::LessEqual) { 0383 pcom = PostingDB::LessEqual; 0384 } 0385 0386 // FIXME -- has to be kept in sync with the code from 0387 // Baloo::Result::add 0388 if (valueType == QVariant::Int) { 0389 qlonglong intVal = value.toLongLong(); 0390 0391 if (term.comparator() == Term::Greater) { 0392 intVal++; 0393 } else if (term.comparator() == Term::Less) { 0394 intVal--; 0395 } 0396 0397 return tr->postingCompIterator(prefix, intVal, pcom); 0398 0399 } else if (valueType == QVariant::Double) { 0400 double dVal = value.toDouble(); 0401 return tr->postingCompIterator(prefix, dVal, pcom); 0402 0403 } else if (valueType == QVariant::DateTime) { 0404 QDateTime dt = value.toDateTime(); 0405 const QByteArray ba = dt.toString(Qt::ISODate).toUtf8(); 0406 return tr->postingCompIterator(prefix, ba, pcom); 0407 0408 } else { 0409 qCDebug(BALOO) << "Comparison must be with an integer"; 0410 } 0411 0412 return nullptr; 0413 } 0414 0415 } // namespace Baloo