File indexing completed on 2024-04-28 07:40:04

0001 /*
0002     This file is part of the KDE Baloo project.
0003     SPDX-FileCopyrightText: 2015 Vishesh Handa <me@vhanda.in>
0004 
0005     SPDX-License-Identifier: LGPL-2.1-or-later
0006 */
0007 
0008 #include "enginedebug.h"
0009 #include "postingdb.h"
0010 #include "orpostingiterator.h"
0011 #include "postingcodec.h"
0012 
0013 using namespace Baloo;
0014 
0015 PostingDB::PostingDB(MDB_dbi dbi, MDB_txn* txn)
0016     : m_txn(txn)
0017     , m_dbi(dbi)
0018 {
0019     Q_ASSERT(txn != nullptr);
0020     Q_ASSERT(dbi != 0);
0021 }
0022 
0023 PostingDB::~PostingDB()
0024 {
0025 }
0026 
0027 MDB_dbi PostingDB::create(MDB_txn* txn)
0028 {
0029     MDB_dbi dbi = 0;
0030     int rc = mdb_dbi_open(txn, "postingdb", MDB_CREATE, &dbi);
0031     if (rc) {
0032         qCWarning(ENGINE) << "PostingDB::create" << mdb_strerror(rc);
0033         return 0;
0034     }
0035 
0036     return dbi;
0037 }
0038 
0039 MDB_dbi PostingDB::open(MDB_txn* txn)
0040 {
0041     MDB_dbi dbi = 0;
0042     int rc = mdb_dbi_open(txn, "postingdb", 0, &dbi);
0043     if (rc) {
0044         qCWarning(ENGINE) << "PostingDB::open" << mdb_strerror(rc);
0045         return 0;
0046     }
0047 
0048     return dbi;
0049 }
0050 
0051 void PostingDB::put(const QByteArray& term, const PostingList& list)
0052 {
0053     Q_ASSERT(!term.isEmpty());
0054     Q_ASSERT(!list.isEmpty());
0055 
0056     MDB_val key;
0057     key.mv_size = term.size();
0058     key.mv_data = static_cast<void*>(const_cast<char*>(term.constData()));
0059 
0060     QByteArray arr = PostingCodec::encode(list);
0061 
0062     MDB_val val;
0063     val.mv_size = arr.size();
0064     val.mv_data = static_cast<void*>(arr.data());
0065 
0066     int rc = mdb_put(m_txn, m_dbi, &key, &val, 0);
0067     if (rc) {
0068         qCWarning(ENGINE) << "PostingDB::put" << mdb_strerror(rc);
0069     }
0070 }
0071 
0072 PostingList PostingDB::get(const QByteArray& term)
0073 {
0074     Q_ASSERT(!term.isEmpty());
0075 
0076     MDB_val key;
0077     key.mv_size = term.size();
0078     key.mv_data = static_cast<void*>(const_cast<char*>(term.constData()));
0079 
0080     MDB_val val{0, nullptr};
0081     int rc = mdb_get(m_txn, m_dbi, &key, &val);
0082     if (rc) {
0083         if (rc != MDB_NOTFOUND) {
0084             qCDebug(ENGINE) << "PostingDB::get" << term << mdb_strerror(rc);
0085         }
0086         return PostingList();
0087     }
0088 
0089     QByteArray arr = QByteArray::fromRawData(static_cast<char*>(val.mv_data), val.mv_size);
0090 
0091     return PostingCodec::decode(arr);
0092 }
0093 
0094 void PostingDB::del(const QByteArray& term)
0095 {
0096     Q_ASSERT(!term.isEmpty());
0097 
0098     MDB_val key;
0099     key.mv_size = term.size();
0100     key.mv_data = static_cast<void*>(const_cast<char*>(term.constData()));
0101 
0102     int rc = mdb_del(m_txn, m_dbi, &key, nullptr);
0103     if (rc != 0 && rc != MDB_NOTFOUND) {
0104         qCDebug(ENGINE) << "PostingDB::del" << term << mdb_strerror(rc);
0105     }
0106 }
0107 
0108 QVector< QByteArray > PostingDB::fetchTermsStartingWith(const QByteArray& term)
0109 {
0110     MDB_val key;
0111     key.mv_size = term.size();
0112     key.mv_data = static_cast<void*>(const_cast<char*>(term.constData()));
0113 
0114     MDB_cursor* cursor;
0115     int rc = mdb_cursor_open(m_txn, m_dbi, &cursor);
0116     if (rc) {
0117         qCWarning(ENGINE) << "PostingDB::fetchTermsStartingWith" << mdb_strerror(rc);
0118         return {};
0119     }
0120 
0121     QVector<QByteArray> terms;
0122     rc = mdb_cursor_get(cursor, &key, nullptr, MDB_SET_RANGE);
0123     while (rc == 0) {
0124         const QByteArray arr(static_cast<char*>(key.mv_data), key.mv_size);
0125         if (!arr.startsWith(term)) {
0126             break;
0127         }
0128         terms << arr;
0129         rc = mdb_cursor_get(cursor, &key, nullptr, MDB_NEXT);
0130     }
0131     if (rc != MDB_NOTFOUND) {
0132         qCDebug(ENGINE) << "PostingDB::fetchTermsStartingWith" << mdb_strerror(rc);
0133     }
0134 
0135     mdb_cursor_close(cursor);
0136     return terms;
0137 }
0138 
0139 class DBPostingIterator : public PostingIterator {
0140 public:
0141     DBPostingIterator(void* data, uint size);
0142     quint64 docId() const override;
0143     quint64 next() override;
0144 
0145 private:
0146     const QVector<quint64> m_vec;
0147     int m_pos;
0148 };
0149 
0150 PostingIterator* PostingDB::iter(const QByteArray& term)
0151 {
0152     MDB_val key;
0153     key.mv_size = term.size();
0154     key.mv_data = static_cast<void*>(const_cast<char*>(term.constData()));
0155 
0156     MDB_val val;
0157     int rc = mdb_get(m_txn, m_dbi, &key, &val);
0158     if (rc) {
0159         qCDebug(ENGINE) << "PostingDB::iter" << term << mdb_strerror(rc);
0160         return nullptr;
0161     }
0162 
0163     return new DBPostingIterator(val.mv_data, val.mv_size);
0164 }
0165 
0166 //
0167 // Posting Iterator
0168 //
0169 DBPostingIterator::DBPostingIterator(void* data, uint size)
0170     : m_vec(PostingCodec().decode(QByteArray(static_cast<char*>(data), size)))
0171     , m_pos(-1)
0172 {
0173 }
0174 
0175 quint64 DBPostingIterator::docId() const
0176 {
0177     if (m_pos < 0 || m_pos >= m_vec.size()) {
0178         return 0;
0179     }
0180 
0181     return m_vec[m_pos];
0182 }
0183 
0184 quint64 DBPostingIterator::next()
0185 {
0186     if (m_pos >= m_vec.size() - 1) {
0187         m_pos = m_vec.size();
0188         return 0;
0189     }
0190 
0191     m_pos++;
0192     return m_vec[m_pos];
0193 }
0194 
0195 template <typename Validator>
0196 PostingIterator* PostingDB::iter(const QByteArray& prefix, Validator validate)
0197 {
0198     Q_ASSERT(!prefix.isEmpty());
0199 
0200     MDB_val key;
0201     key.mv_size = prefix.size();
0202     key.mv_data = static_cast<void*>(const_cast<char*>(prefix.constData()));
0203 
0204     MDB_cursor* cursor;
0205     int rc = mdb_cursor_open(m_txn, m_dbi, &cursor);
0206 
0207     if (rc) {
0208         qCWarning(ENGINE) << "PostingDB::regexpIter" << mdb_strerror(rc);
0209         return nullptr;
0210     }
0211 
0212     QVector<PostingIterator*> termIterators;
0213 
0214     MDB_val val;
0215     rc = mdb_cursor_get(cursor, &key, &val, MDB_SET_RANGE);
0216     while (rc == 0) {
0217         const QByteArray arr(static_cast<char*>(key.mv_data), key.mv_size);
0218         if (!arr.startsWith(prefix)) {
0219             break;
0220         }
0221         if (validate(arr)) {
0222             termIterators << new DBPostingIterator(val.mv_data, val.mv_size);
0223         }
0224         rc = mdb_cursor_get(cursor, &key, &val, MDB_NEXT);
0225     }
0226 
0227     if (rc != 0 && rc != MDB_NOTFOUND) {
0228         qCWarning(ENGINE) << "PostingDB::regexpIter" << mdb_strerror(rc);
0229     }
0230 
0231     mdb_cursor_close(cursor);
0232     if (termIterators.isEmpty()) {
0233         return nullptr;
0234     }
0235     return new OrPostingIterator(termIterators);
0236 }
0237 
0238 PostingIterator* PostingDB::prefixIter(const QByteArray& prefix)
0239 {
0240     auto validate = [] (const QByteArray& arr) {
0241         Q_UNUSED(arr);
0242         return true;
0243     };
0244     return iter(prefix, validate);
0245 }
0246 
0247 PostingIterator* PostingDB::regexpIter(const QRegularExpression& regexp, const QByteArray& prefix)
0248 {
0249     int prefixLen = prefix.length();
0250     auto validate = [&regexp, prefixLen] (const QByteArray& arr) {
0251         QString term = QString::fromUtf8(arr.mid(prefixLen));
0252         return regexp.match(term).hasMatch();
0253     };
0254 
0255     return iter(prefix, validate);
0256 }
0257 
0258 PostingIterator* PostingDB::compIter(const QByteArray& prefix, qlonglong comVal, PostingDB::Comparator com)
0259 {
0260     int prefixLen = prefix.length();
0261     auto validate = [prefixLen, comVal, com] (const QByteArray& arr) {
0262         bool ok = false;
0263         auto val = QByteArray::fromRawData(arr.constData() + prefixLen, arr.length() - prefixLen).toLongLong(&ok);
0264         return ok && ((com == LessEqual && val <= comVal) || (com == GreaterEqual && val >= comVal));
0265     };
0266     return iter(prefix, validate);
0267 }
0268 
0269 PostingIterator* PostingDB::compIter(const QByteArray& prefix, double comVal, PostingDB::Comparator com)
0270 {
0271     int prefixLen = prefix.length();
0272     auto validate = [prefixLen, comVal, com] (const QByteArray& arr) {
0273         bool ok = false;
0274         auto val = QByteArray::fromRawData(arr.constData() + prefixLen, arr.length() - prefixLen).toDouble(&ok);
0275         return ok && ((com == LessEqual && val <= comVal) ||
0276                       (com == GreaterEqual && val >= comVal));
0277     };
0278     return iter(prefix, validate);
0279 }
0280 
0281 PostingIterator* PostingDB::compIter(const QByteArray& prefix, const QByteArray& comVal, PostingDB::Comparator com)
0282 {
0283     int prefixLen = prefix.length();
0284     auto validate = [prefixLen, comVal, com] (const QByteArray& arr) {
0285         auto val = QByteArray::fromRawData(arr.constData() + prefixLen, arr.length() - prefixLen);
0286         return ((com == LessEqual && val <= comVal) ||
0287                 (com == GreaterEqual && val >= comVal));
0288     };
0289     return iter(prefix, validate);
0290 }
0291 
0292 QMap<QByteArray, PostingList> PostingDB::toTestMap() const
0293 {
0294     MDB_cursor* cursor;
0295     mdb_cursor_open(m_txn, m_dbi, &cursor);
0296 
0297     MDB_val key = {0, nullptr};
0298     MDB_val val;
0299 
0300     QMap<QByteArray, PostingList> map;
0301     while (1) {
0302         int rc = mdb_cursor_get(cursor, &key, &val, MDB_NEXT);
0303         if (rc == MDB_NOTFOUND) {
0304             break;
0305         }
0306         if (rc) {
0307             qCDebug(ENGINE) << "PostingDB::toTestMap" << mdb_strerror(rc);
0308             break;
0309         }
0310 
0311         const QByteArray ba(static_cast<char*>(key.mv_data), key.mv_size);
0312         const PostingList plist = PostingCodec::decode(QByteArray(static_cast<char*>(val.mv_data), val.mv_size));
0313         map.insert(ba, plist);
0314     }
0315 
0316     mdb_cursor_close(cursor);
0317     return map;
0318 }