File indexing completed on 2024-04-21 03:51:48

0001 /*
0002     This file is part of the KDE Baloo Project
0003     SPDX-FileCopyrightText: 2013-2015 Vishesh Handa <vhanda@kde.org>
0004 
0005     SPDX-License-Identifier: LGPL-2.1-only OR LGPL-3.0-only OR LicenseRef-KDE-Accepted-LGPL
0006 */
0007 
0008 #include "baloodebug.h"
0009 #include "searchstore.h"
0010 #include "global.h"
0011 
0012 #include "database.h"
0013 #include "term.h"
0014 #include "transaction.h"
0015 #include "enginequery.h"
0016 #include "termgenerator.h"
0017 #include "andpostingiterator.h"
0018 #include "orpostingiterator.h"
0019 
0020 #include <QDateTime>
0021 
0022 #include <KFileMetaData/PropertyInfo>
0023 #include <KFileMetaData/TypeInfo>
0024 #include <KFileMetaData/Types>
0025 
0026 #include <algorithm>
0027 #include <array>
0028 #include <tuple>
0029 
0030 namespace Baloo {
0031 
0032 namespace {
0033 QPair<quint32, quint32> calculateTimeRange(const QDateTime& dt, Term::Comparator com)
0034 {
0035     Q_ASSERT(dt.isValid());
0036 
0037     if (com == Term::Equal) {
0038         // Timestamps in DB are quint32 relative to Epoch (1970...2106)
0039         auto start = static_cast<quint32>(dt.date().startOfDay().toSecsSinceEpoch());
0040         auto end = static_cast<quint32>(dt.date().endOfDay().toSecsSinceEpoch());
0041         return {start, end};
0042     }
0043 
0044     quint32 timet = dt.toSecsSinceEpoch();
0045     if (com == Term::LessEqual) {
0046         return {0, timet};
0047     }
0048     if (com == Term::Less) {
0049         return {0, timet - 1};
0050     }
0051     if (com == Term::GreaterEqual) {
0052         return {timet, std::numeric_limits<quint32>::max()};
0053     }
0054     if (com == Term::Greater) {
0055         return {timet + 1, std::numeric_limits<quint32>::max()};
0056     }
0057 
0058     Q_ASSERT_X(0, __func__, "mtime query must contain a valid comparator");
0059     return {0, 0};
0060 }
0061 
0062 struct InternalProperty {
0063     const char* propertyName;
0064     const char* prefix;
0065     QMetaType::Type valueType;
0066 };
0067 constexpr std::array<InternalProperty, 7> internalProperties{{{"content", "", QMetaType::QString},
0068                                                               {"filename", "F", QMetaType::QString},
0069                                                               {"mimetype", "M", QMetaType::QString},
0070                                                               {"rating", "R", QMetaType::Int},
0071                                                               {"tag", "TAG-", QMetaType::QString},
0072                                                               {"tags", "TA", QMetaType::QString},
0073                                                               {"usercomment", "C", QMetaType::QString}}};
0074 
0075 std::pair<QByteArray, QMetaType::Type> propertyInfo(const QByteArray &property)
0076 {
0077     auto it = std::find_if(std::begin(internalProperties), std::end(internalProperties),
0078         [&property] (const InternalProperty& entry) { return property == entry.propertyName; });
0079     if (it != std::end(internalProperties)) {
0080         return { (*it).prefix, (*it).valueType };
0081     } else {
0082         KFileMetaData::PropertyInfo pi = KFileMetaData::PropertyInfo::fromName(QString::fromUtf8(property));
0083         if (pi.property() == KFileMetaData::Property::Empty) {
0084             return {QByteArray(), QMetaType::UnknownType};
0085         }
0086         int propPrefix = static_cast<int>(pi.property());
0087         return {QByteArray('X' + QByteArray::number(propPrefix) + '-'), pi.valueType()};
0088     }
0089 }
0090 
0091 EngineQuery constructEqualsQuery(const QByteArray& prefix, const QString& value)
0092 {
0093     // We use the TermGenerator to normalize the words in the value and to
0094     // split it into other words. If we split the words, we then add them as a
0095     // phrase query.
0096     const QByteArrayList terms = TermGenerator::termList(value);
0097 
0098     QVector<EngineQuery> queries;
0099     queries.reserve(terms.size());
0100     for (const QByteArray& term : terms) {
0101         QByteArray arr = prefix + term;
0102         // FIXME - compatibility hack, to find truncated terms with old
0103         // DBs, remove on next DB bump
0104         if (arr.size() > 25) {
0105             queries << EngineQuery(arr.left(25), EngineQuery::StartsWith);
0106         } else {
0107             queries << EngineQuery(arr);
0108         }
0109     }
0110 
0111     if (queries.isEmpty()) {
0112         return EngineQuery();
0113     } else if (queries.size() == 1) {
0114         return queries.first();
0115     } else {
0116         return EngineQuery(queries);
0117     }
0118 }
0119 
0120 EngineQuery constructContainsQuery(const QByteArray& prefix, const QString& value)
0121 {
0122     auto query = constructEqualsQuery(prefix, value);
0123     if (query.op() == EngineQuery::Equal) {
0124         if (query.term().size() >= 3) {
0125             query.setOp(EngineQuery::StartsWith);
0126     }
0127     }
0128     return query;
0129 }
0130 
0131 EngineQuery constructTypeQuery(const QString& value)
0132 {
0133     Q_ASSERT(!value.isEmpty());
0134 
0135     KFileMetaData::TypeInfo ti = KFileMetaData::TypeInfo::fromName(value);
0136     if (ti == KFileMetaData::Type::Empty) {
0137         qCDebug(BALOO) << "Type" << value << "does not exist";
0138         return EngineQuery();
0139     }
0140     int num = static_cast<int>(ti.type());
0141 
0142     return EngineQuery('T' + QByteArray::number(num));
0143 }
0144 } // namespace
0145 
0146 SearchStore::SearchStore()
0147     : m_db(nullptr)
0148 {
0149     m_db = globalDatabaseInstance();
0150     if (!m_db->open(Database::ReadOnlyDatabase)) {
0151         m_db = nullptr;
0152     }
0153 }
0154 
0155 SearchStore::~SearchStore()
0156 {
0157 }
0158 
0159 // Return the result with-in [offset, offset + limit)
0160 ResultList SearchStore::exec(const Term& term, uint offset, int limit, bool sortResults)
0161 {
0162     if (!m_db || !m_db->isOpen()) {
0163         return ResultList();
0164     }
0165 
0166     Transaction tr(m_db, Transaction::ReadOnly);
0167     std::unique_ptr<PostingIterator> it(constructQuery(&tr, term));
0168     if (!it) {
0169         return ResultList();
0170     }
0171 
0172     if (sortResults) {
0173         QVector<std::pair<quint64, quint32>> resultIds;
0174         while (it->next()) {
0175             quint64 id = it->docId();
0176             quint32 mtime = tr.documentTimeInfo(id).mTime;
0177             resultIds << std::pair<quint64, quint32>{id, mtime};
0178 
0179             Q_ASSERT(id > 0);
0180         }
0181 
0182         // Not enough results within range, no need to sort.
0183         if (offset >= static_cast<uint>(resultIds.size())) {
0184             return ResultList();
0185         }
0186 
0187         auto compFunc = [](const std::pair<quint64, quint32>& lhs,
0188                            const std::pair<quint64, quint32>& rhs) {
0189             return lhs.second > rhs.second;
0190         };
0191 
0192         std::sort(resultIds.begin(), resultIds.end(), compFunc);
0193         if (limit < 0) {
0194             limit = resultIds.size();
0195         }
0196 
0197         ResultList results;
0198         const uint end = qMin(static_cast<uint>(resultIds.size()), offset + static_cast<uint>(limit));
0199         results.reserve(end - offset);
0200         for (uint i = offset; i < end; i++) {
0201             const quint64 id = resultIds[i].first;
0202             Result res{tr.documentUrl(id), id};
0203 
0204             results.emplace_back(res);
0205         }
0206 
0207         return results;
0208     }
0209     else {
0210         ResultList results;
0211         uint ulimit = limit < 0 ? UINT_MAX : limit;
0212 
0213         while (offset && it->next()) {
0214             offset--;
0215         }
0216 
0217         while (ulimit && it->next()) {
0218             const quint64 id = it->docId();
0219             Q_ASSERT(id > 0);
0220             Result res{tr.documentUrl(id), id};
0221             Q_ASSERT(!res.filePath.isEmpty());
0222 
0223             results.emplace_back(res);
0224 
0225             ulimit--;
0226         }
0227 
0228         return results;
0229     }
0230 }
0231 
0232 PostingIterator* SearchStore::constructQuery(Transaction* tr, const Term& term)
0233 {
0234     Q_ASSERT(tr);
0235 
0236     if (term.operation() == Term::And || term.operation() == Term::Or) {
0237         const QList<Term> subTerms = term.subTerms();
0238         QVector<PostingIterator*> vec;
0239         vec.reserve(subTerms.size());
0240 
0241         for (const Term& t : subTerms) {
0242             auto iterator = constructQuery(tr, t);
0243             // constructQuery returns a nullptr to signal an empty list
0244             if (iterator) {
0245                 vec << iterator;
0246             } else if (term.operation() == Term::And) {
0247                 return nullptr;
0248             }
0249         }
0250 
0251         if (vec.isEmpty()) {
0252             return nullptr;
0253         } else if (vec.size() == 1) {
0254             return vec.takeFirst();
0255         }
0256 
0257         if (term.operation() == Term::And) {
0258             return new AndPostingIterator(vec);
0259         } else {
0260             return new OrPostingIterator(vec);
0261         }
0262     }
0263 
0264     if (term.value().isNull()) {
0265         return nullptr;
0266     }
0267     Q_ASSERT(term.value().isValid());
0268     Q_ASSERT(term.comparator() != Term::Auto);
0269     Q_ASSERT(term.comparator() == Term::Contains ? term.value().typeId() == QMetaType::QString : true);
0270 
0271     const QVariant value = term.value();
0272     const QByteArray property = term.property().toLower().toUtf8();
0273 
0274     if (property == "type" || property == "kind") {
0275         EngineQuery q = constructTypeQuery(value.toString());
0276         return tr->postingIterator(q);
0277     }
0278     else if (property == "includefolder") {
0279         const QByteArray folder = value.toString().toUtf8();
0280 
0281         if (folder.isEmpty()) {
0282             return nullptr;
0283         }
0284         if (!folder.startsWith('/')) {
0285             return nullptr;
0286         }
0287 
0288         quint64 id = tr->documentId(folder);
0289         if (!id) {
0290             qCDebug(BALOO) << "Folder" << value.toString() << "not indexed";
0291             return nullptr;
0292         }
0293 
0294         return tr->docUrlIter(id);
0295     }
0296     else if (property == "modified" || property == "mtime") {
0297         if (value.typeId() == QMetaType::QByteArray) {
0298             // Used by Baloo::Query
0299             QByteArray ba = value.toByteArray();
0300             Q_ASSERT(ba.size() >= 4);
0301 
0302             int year = ba.mid(0, 4).toInt();
0303             int month = ba.mid(4, 2).toInt();
0304             int day = ba.mid(6, 2).toInt();
0305 
0306             Q_ASSERT(year);
0307 
0308             // uses 0 to represent whole month or whole year
0309             month = month >= 0 && month <= 12 ? month : 0;
0310             day = day >= 0 && day <= 31 ? day : 0;
0311 
0312             QDate startDate(year, month ? month : 1, day ? day : 1);
0313             QDate endDate(startDate);
0314 
0315             if (month == 0) {
0316                 endDate.setDate(endDate.year(), 12, 31);
0317             } else if (day == 0) {
0318                 endDate.setDate(endDate.year(), endDate.month(), endDate.daysInMonth());
0319             }
0320 
0321             return tr->mTimeRangeIter(startDate.startOfDay().toSecsSinceEpoch(), endDate.endOfDay().toSecsSinceEpoch());
0322         } else if (value.typeId() == QMetaType::QString) {
0323             const QDateTime dt = value.toDateTime();
0324             QPair<quint32, quint32> timerange = calculateTimeRange(dt, term.comparator());
0325             if ((timerange.first == 0) && (timerange.second == 0)) {
0326                 return nullptr;
0327             }
0328             return tr->mTimeRangeIter(timerange.first, timerange.second);
0329         } else {
0330             Q_ASSERT_X(0, "SearchStore::constructQuery", "modified property must contain date/datetime values");
0331             return nullptr;
0332         }
0333     } else if (property == "tag") {
0334         if (term.comparator() == Term::Equal) {
0335             const QByteArray prefix = "TAG-";
0336             EngineQuery q = EngineQuery(prefix + value.toByteArray());
0337             return tr->postingIterator(q);
0338         } else if (term.comparator() == Term::Contains) {
0339             const QByteArray prefix = "TA";
0340             EngineQuery q = constructEqualsQuery(prefix, value.toString());
0341             return tr->postingIterator(q);
0342         } else {
0343             Q_ASSERT(0);
0344             return nullptr;
0345         }
0346     } else if (property == "") {
0347         Term cterm(QStringLiteral("content"), term.value(), term.comparator());
0348         Term fterm(QStringLiteral("filename"), term.value(), term.comparator());
0349         return constructQuery(tr, Term{cterm, Term::Operation::Or, fterm});
0350     }
0351 
0352     QByteArray prefix;
0353     QMetaType::Type valueType = QMetaType::QString;
0354     if (!property.isEmpty()) {
0355         std::tie(prefix, valueType) = propertyInfo(property);
0356         if (valueType == QMetaType::UnknownType) {
0357             return nullptr;
0358         }
0359     }
0360 
0361     auto com = term.comparator();
0362     if (com == Term::Contains && valueType == QMetaType::Int) {
0363         com = Term::Equal;
0364     }
0365     if (com == Term::Contains) {
0366         EngineQuery q = constructContainsQuery(prefix, value.toString());
0367         return tr->postingIterator(q);
0368     }
0369 
0370     if (com == Term::Equal) {
0371         EngineQuery q = constructEqualsQuery(prefix, value.toString());
0372         return tr->postingIterator(q);
0373     }
0374 
0375     PostingDB::Comparator pcom;
0376     if (com == Term::Greater || com == Term::GreaterEqual) {
0377         pcom = PostingDB::GreaterEqual;
0378     } else if (com == Term::Less || com == Term::LessEqual) {
0379         pcom = PostingDB::LessEqual;
0380     }
0381 
0382     // FIXME -- has to be kept in sync with the code from
0383     // Baloo::Result::add
0384     if (valueType == QMetaType::Int) {
0385         qlonglong intVal = value.toLongLong();
0386 
0387         if (term.comparator() == Term::Greater) {
0388             intVal++;
0389         } else if (term.comparator() == Term::Less) {
0390             intVal--;
0391         }
0392 
0393         return tr->postingCompIterator(prefix, intVal, pcom);
0394 
0395     } else if (valueType == QMetaType::Double) {
0396         double dVal = value.toDouble();
0397         return tr->postingCompIterator(prefix, dVal, pcom);
0398 
0399     } else if (valueType == QMetaType::QDateTime) {
0400         QDateTime dt = value.toDateTime();
0401         const QByteArray ba = dt.toString(Qt::ISODate).toUtf8();
0402         return tr->postingCompIterator(prefix, ba, pcom);
0403 
0404     } else {
0405         qCDebug(BALOO) << "Comparison must be with an integer";
0406     }
0407 
0408     return nullptr;
0409 }
0410 
0411 } // namespace Baloo