File indexing completed on 2024-04-28 15:17:40

0001 /*
0002     This file is part of the KDE Baloo Project
0003     SPDX-FileCopyrightText: 2013-2015 Vishesh Handa <vhanda@kde.org>
0004 
0005     SPDX-License-Identifier: LGPL-2.1-only OR LGPL-3.0-only OR LicenseRef-KDE-Accepted-LGPL
0006 */
0007 
0008 #include "baloodebug.h"
0009 #include "searchstore.h"
0010 #include "global.h"
0011 
0012 #include "database.h"
0013 #include "term.h"
0014 #include "transaction.h"
0015 #include "enginequery.h"
0016 #include "termgenerator.h"
0017 #include "andpostingiterator.h"
0018 #include "orpostingiterator.h"
0019 
0020 #include <QDateTime>
0021 
0022 #include <KFileMetaData/PropertyInfo>
0023 #include <KFileMetaData/TypeInfo>
0024 #include <KFileMetaData/Types>
0025 
0026 #include <algorithm>
0027 #include <array>
0028 #include <tuple>
0029 
0030 namespace Baloo {
0031 
0032 namespace {
0033 QPair<quint32, quint32> calculateTimeRange(const QDateTime& dt, Term::Comparator com)
0034 {
0035     Q_ASSERT(dt.isValid());
0036 
0037     if (com == Term::Equal) {
0038         // Timestamps in DB are quint32 relative to Epoch (1970...2106)
0039         auto start = static_cast<quint32>(dt.date().startOfDay().toSecsSinceEpoch());
0040         auto end = static_cast<quint32>(dt.date().endOfDay().toSecsSinceEpoch());
0041         return {start, end};
0042     }
0043 
0044     quint32 timet = dt.toSecsSinceEpoch();
0045     if (com == Term::LessEqual) {
0046         return {0, timet};
0047     }
0048     if (com == Term::Less) {
0049         return {0, timet - 1};
0050     }
0051     if (com == Term::GreaterEqual) {
0052         return {timet, std::numeric_limits<quint32>::max()};
0053     }
0054     if (com == Term::Greater) {
0055         return {timet + 1, std::numeric_limits<quint32>::max()};
0056     }
0057 
0058     Q_ASSERT_X(0, __func__, "mtime query must contain a valid comparator");
0059     return {0, 0};
0060 }
0061 
0062 struct InternalProperty {
0063     const char* propertyName;
0064     const char* prefix;
0065     QVariant::Type valueType;
0066 };
0067 constexpr std::array<InternalProperty, 7> internalProperties {{
0068     { "content",     "",     QVariant::String },
0069     { "filename",    "F",    QVariant::String },
0070     { "mimetype",    "M",    QVariant::String },
0071     { "rating",      "R",    QVariant::Int    },
0072     { "tag",         "TAG-", QVariant::String },
0073     { "tags",        "TA",   QVariant::String },
0074     { "usercomment", "C",    QVariant::String }
0075 }};
0076 
0077 std::pair<QByteArray, QVariant::Type> propertyInfo(const QByteArray& property)
0078 {
0079     auto it = std::find_if(std::begin(internalProperties), std::end(internalProperties),
0080         [&property] (const InternalProperty& entry) { return property == entry.propertyName; });
0081     if (it != std::end(internalProperties)) {
0082         return { (*it).prefix, (*it).valueType };
0083     } else {
0084         KFileMetaData::PropertyInfo pi = KFileMetaData::PropertyInfo::fromName(QString::fromUtf8(property));
0085         if (pi.property() == KFileMetaData::Property::Empty) {
0086             return { QByteArray(), QVariant::Invalid };
0087         }
0088         int propPrefix = static_cast<int>(pi.property());
0089         return { 'X' + QByteArray::number(propPrefix) + '-', pi.valueType() };
0090     }
0091 }
0092 
0093 EngineQuery constructEqualsQuery(const QByteArray& prefix, const QString& value)
0094 {
0095     // We use the TermGenerator to normalize the words in the value and to
0096     // split it into other words. If we split the words, we then add them as a
0097     // phrase query.
0098     const QByteArrayList terms = TermGenerator::termList(value);
0099 
0100     QVector<EngineQuery> queries;
0101     queries.reserve(terms.size());
0102     for (const QByteArray& term : terms) {
0103         QByteArray arr = prefix + term;
0104         // FIXME - compatibility hack, to find truncated terms with old
0105         // DBs, remove on next DB bump
0106         if (arr.size() > 25) {
0107             queries << EngineQuery(arr.left(25), EngineQuery::StartsWith);
0108         } else {
0109             queries << EngineQuery(arr);
0110         }
0111     }
0112 
0113     if (queries.isEmpty()) {
0114         return EngineQuery();
0115     } else if (queries.size() == 1) {
0116         return queries.first();
0117     } else {
0118         return EngineQuery(queries);
0119     }
0120 }
0121 
0122 EngineQuery constructContainsQuery(const QByteArray& prefix, const QString& value)
0123 {
0124     auto query = constructEqualsQuery(prefix, value);
0125     if (query.op() == EngineQuery::Equal) {
0126         if (query.term().size() >= 3) {
0127             query.setOp(EngineQuery::StartsWith);
0128     }
0129     }
0130     return query;
0131 }
0132 
0133 EngineQuery constructTypeQuery(const QString& value)
0134 {
0135     Q_ASSERT(!value.isEmpty());
0136 
0137     KFileMetaData::TypeInfo ti = KFileMetaData::TypeInfo::fromName(value);
0138     if (ti == KFileMetaData::Type::Empty) {
0139         qCDebug(BALOO) << "Type" << value << "does not exist";
0140         return EngineQuery();
0141     }
0142     int num = static_cast<int>(ti.type());
0143 
0144     return EngineQuery('T' + QByteArray::number(num));
0145 }
0146 } // namespace
0147 
0148 SearchStore::SearchStore()
0149     : m_db(nullptr)
0150 {
0151     m_db = globalDatabaseInstance();
0152     if (!m_db->open(Database::ReadOnlyDatabase)) {
0153         m_db = nullptr;
0154     }
0155 }
0156 
0157 SearchStore::~SearchStore()
0158 {
0159 }
0160 
0161 // Return the result with-in [offset, offset + limit)
0162 ResultList SearchStore::exec(const Term& term, uint offset, int limit, bool sortResults)
0163 {
0164     if (!m_db || !m_db->isOpen()) {
0165         return ResultList();
0166     }
0167 
0168     Transaction tr(m_db, Transaction::ReadOnly);
0169     std::unique_ptr<PostingIterator> it(constructQuery(&tr, term));
0170     if (!it) {
0171         return ResultList();
0172     }
0173 
0174     if (sortResults) {
0175         QVector<std::pair<quint64, quint32>> resultIds;
0176         while (it->next()) {
0177             quint64 id = it->docId();
0178             quint32 mtime = tr.documentTimeInfo(id).mTime;
0179             resultIds << std::pair<quint64, quint32>{id, mtime};
0180 
0181             Q_ASSERT(id > 0);
0182         }
0183 
0184         // Not enough results within range, no need to sort.
0185         if (offset >= static_cast<uint>(resultIds.size())) {
0186             return ResultList();
0187         }
0188 
0189         auto compFunc = [](const std::pair<quint64, quint32>& lhs,
0190                            const std::pair<quint64, quint32>& rhs) {
0191             return lhs.second > rhs.second;
0192         };
0193 
0194         std::sort(resultIds.begin(), resultIds.end(), compFunc);
0195         if (limit < 0) {
0196             limit = resultIds.size();
0197         }
0198 
0199         ResultList results;
0200         const uint end = qMin(static_cast<uint>(resultIds.size()), offset + static_cast<uint>(limit));
0201         results.reserve(end - offset);
0202         for (uint i = offset; i < end; i++) {
0203             const quint64 id = resultIds[i].first;
0204             Result res{tr.documentUrl(id), id};
0205 
0206             results.emplace_back(res);
0207         }
0208 
0209         return results;
0210     }
0211     else {
0212         ResultList results;
0213         uint ulimit = limit < 0 ? UINT_MAX : limit;
0214 
0215         while (offset && it->next()) {
0216             offset--;
0217         }
0218 
0219         while (ulimit && it->next()) {
0220             const quint64 id = it->docId();
0221             Q_ASSERT(id > 0);
0222             Result res{tr.documentUrl(id), id};
0223             Q_ASSERT(!res.filePath.isEmpty());
0224 
0225             results.emplace_back(res);
0226 
0227             ulimit--;
0228         }
0229 
0230         return results;
0231     }
0232 }
0233 
0234 PostingIterator* SearchStore::constructQuery(Transaction* tr, const Term& term)
0235 {
0236     Q_ASSERT(tr);
0237 
0238     if (term.operation() == Term::And || term.operation() == Term::Or) {
0239         const QList<Term> subTerms = term.subTerms();
0240         QVector<PostingIterator*> vec;
0241         vec.reserve(subTerms.size());
0242 
0243         for (const Term& t : subTerms) {
0244             auto iterator = constructQuery(tr, t);
0245             // constructQuery returns a nullptr to signal an empty list
0246             if (iterator) {
0247                 vec << iterator;
0248             } else if (term.operation() == Term::And) {
0249                 return nullptr;
0250             }
0251         }
0252 
0253         if (vec.isEmpty()) {
0254             return nullptr;
0255         } else if (vec.size() == 1) {
0256             return vec.takeFirst();
0257         }
0258 
0259         if (term.operation() == Term::And) {
0260             return new AndPostingIterator(vec);
0261         } else {
0262             return new OrPostingIterator(vec);
0263         }
0264     }
0265 
0266     if (term.value().isNull()) {
0267         return nullptr;
0268     }
0269     Q_ASSERT(term.value().isValid());
0270     Q_ASSERT(term.comparator() != Term::Auto);
0271     Q_ASSERT(term.comparator() == Term::Contains ? term.value().type() == QVariant::String : true);
0272 
0273     const QVariant value = term.value();
0274     const QByteArray property = term.property().toLower().toUtf8();
0275 
0276     if (property == "type" || property == "kind") {
0277         EngineQuery q = constructTypeQuery(value.toString());
0278         return tr->postingIterator(q);
0279     }
0280     else if (property == "includefolder") {
0281         const QByteArray folder = value.toString().toUtf8();
0282 
0283         if (folder.isEmpty()) {
0284             return nullptr;
0285         }
0286         if (!folder.startsWith('/')) {
0287             return nullptr;
0288         }
0289 
0290         quint64 id = tr->documentId(folder);
0291         if (!id) {
0292             qCDebug(BALOO) << "Folder" << value.toString() << "not indexed";
0293             return nullptr;
0294         }
0295 
0296         return tr->docUrlIter(id);
0297     }
0298     else if (property == "modified" || property == "mtime") {
0299         if (value.type() == QVariant::ByteArray) {
0300             // Used by Baloo::Query
0301             QByteArray ba = value.toByteArray();
0302             Q_ASSERT(ba.size() >= 4);
0303 
0304             int year = ba.mid(0, 4).toInt();
0305             int month = ba.mid(4, 2).toInt();
0306             int day = ba.mid(6, 2).toInt();
0307 
0308             Q_ASSERT(year);
0309 
0310             // uses 0 to represent whole month or whole year
0311             month = month >= 0 && month <= 12 ? month : 0;
0312             day = day >= 0 && day <= 31 ? day : 0;
0313 
0314             QDate startDate(year, month ? month : 1, day ? day : 1);
0315             QDate endDate(startDate);
0316 
0317             if (month == 0) {
0318                 endDate.setDate(endDate.year(), 12, 31);
0319             } else if (day == 0) {
0320                 endDate.setDate(endDate.year(), endDate.month(), endDate.daysInMonth());
0321             }
0322 
0323             return tr->mTimeRangeIter(startDate.startOfDay().toSecsSinceEpoch(), endDate.endOfDay().toSecsSinceEpoch());
0324         }
0325         else if (value.type() == QVariant::String) {
0326             const QDateTime dt = value.toDateTime();
0327             QPair<quint32, quint32> timerange = calculateTimeRange(dt, term.comparator());
0328             if ((timerange.first == 0) && (timerange.second == 0)) {
0329                 return nullptr;
0330             }
0331             return tr->mTimeRangeIter(timerange.first, timerange.second);
0332         }
0333         else {
0334             Q_ASSERT_X(0, "SearchStore::constructQuery", "modified property must contain date/datetime values");
0335             return nullptr;
0336         }
0337     } else if (property == "tag") {
0338         if (term.comparator() == Term::Equal) {
0339             const QByteArray prefix = "TAG-";
0340             EngineQuery q = EngineQuery(prefix + value.toByteArray());
0341             return tr->postingIterator(q);
0342         } else if (term.comparator() == Term::Contains) {
0343             const QByteArray prefix = "TA";
0344             EngineQuery q = constructEqualsQuery(prefix, value.toString());
0345             return tr->postingIterator(q);
0346         } else {
0347             Q_ASSERT(0);
0348             return nullptr;
0349         }
0350     } else if (property == "") {
0351         Term cterm(QStringLiteral("content"), term.value(), term.comparator());
0352         Term fterm(QStringLiteral("filename"), term.value(), term.comparator());
0353         return constructQuery(tr, Term{cterm, Term::Operation::Or, fterm});
0354     }
0355 
0356     QByteArray prefix;
0357     QVariant::Type valueType = QVariant::String;
0358     if (!property.isEmpty()) {
0359         std::tie(prefix, valueType) = propertyInfo(property);
0360         if (valueType == QVariant::Invalid) {
0361             return nullptr;
0362         }
0363     }
0364 
0365     auto com = term.comparator();
0366     if (com == Term::Contains && valueType == QVariant::Int) {
0367         com = Term::Equal;
0368     }
0369     if (com == Term::Contains) {
0370         EngineQuery q = constructContainsQuery(prefix, value.toString());
0371         return tr->postingIterator(q);
0372     }
0373 
0374     if (com == Term::Equal) {
0375         EngineQuery q = constructEqualsQuery(prefix, value.toString());
0376         return tr->postingIterator(q);
0377     }
0378 
0379     PostingDB::Comparator pcom;
0380     if (com == Term::Greater || com == Term::GreaterEqual) {
0381         pcom = PostingDB::GreaterEqual;
0382     } else if (com == Term::Less || com == Term::LessEqual) {
0383         pcom = PostingDB::LessEqual;
0384     }
0385 
0386     // FIXME -- has to be kept in sync with the code from
0387     // Baloo::Result::add
0388     if (valueType == QVariant::Int) {
0389         qlonglong intVal = value.toLongLong();
0390 
0391         if (term.comparator() == Term::Greater) {
0392             intVal++;
0393         } else if (term.comparator() == Term::Less) {
0394             intVal--;
0395         }
0396 
0397         return tr->postingCompIterator(prefix, intVal, pcom);
0398 
0399     } else if (valueType == QVariant::Double) {
0400         double dVal = value.toDouble();
0401         return tr->postingCompIterator(prefix, dVal, pcom);
0402 
0403     } else if (valueType == QVariant::DateTime) {
0404         QDateTime dt = value.toDateTime();
0405         const QByteArray ba = dt.toString(Qt::ISODate).toUtf8();
0406         return tr->postingCompIterator(prefix, ba, pcom);
0407 
0408     } else {
0409         qCDebug(BALOO) << "Comparison must be with an integer";
0410     }
0411 
0412     return nullptr;
0413 }
0414 
0415 } // namespace Baloo