File indexing completed on 2024-03-24 03:54:29
0001 /* 0002 This file is part of the KDE Baloo project. 0003 SPDX-FileCopyrightText: 2015 Vishesh Handa <me@vhanda.in> 0004 0005 SPDX-License-Identifier: LGPL-2.1-or-later 0006 */ 0007 0008 #include "enginedebug.h" 0009 #include "postingdb.h" 0010 #include "orpostingiterator.h" 0011 #include "postingcodec.h" 0012 0013 using namespace Baloo; 0014 0015 PostingDB::PostingDB(MDB_dbi dbi, MDB_txn* txn) 0016 : m_txn(txn) 0017 , m_dbi(dbi) 0018 { 0019 Q_ASSERT(txn != nullptr); 0020 Q_ASSERT(dbi != 0); 0021 } 0022 0023 PostingDB::~PostingDB() 0024 { 0025 } 0026 0027 MDB_dbi PostingDB::create(MDB_txn* txn) 0028 { 0029 MDB_dbi dbi = 0; 0030 int rc = mdb_dbi_open(txn, "postingdb", MDB_CREATE, &dbi); 0031 if (rc) { 0032 qCWarning(ENGINE) << "PostingDB::create" << mdb_strerror(rc); 0033 return 0; 0034 } 0035 0036 return dbi; 0037 } 0038 0039 MDB_dbi PostingDB::open(MDB_txn* txn) 0040 { 0041 MDB_dbi dbi = 0; 0042 int rc = mdb_dbi_open(txn, "postingdb", 0, &dbi); 0043 if (rc) { 0044 qCWarning(ENGINE) << "PostingDB::open" << mdb_strerror(rc); 0045 return 0; 0046 } 0047 0048 return dbi; 0049 } 0050 0051 void PostingDB::put(const QByteArray& term, const PostingList& list) 0052 { 0053 Q_ASSERT(!term.isEmpty()); 0054 Q_ASSERT(!list.isEmpty()); 0055 0056 MDB_val key; 0057 key.mv_size = term.size(); 0058 key.mv_data = static_cast<void*>(const_cast<char*>(term.constData())); 0059 0060 QByteArray arr = PostingCodec::encode(list); 0061 0062 MDB_val val; 0063 val.mv_size = arr.size(); 0064 val.mv_data = static_cast<void*>(arr.data()); 0065 0066 int rc = mdb_put(m_txn, m_dbi, &key, &val, 0); 0067 if (rc) { 0068 qCWarning(ENGINE) << "PostingDB::put" << mdb_strerror(rc); 0069 } 0070 } 0071 0072 PostingList PostingDB::get(const QByteArray& term) 0073 { 0074 Q_ASSERT(!term.isEmpty()); 0075 0076 MDB_val key; 0077 key.mv_size = term.size(); 0078 key.mv_data = static_cast<void*>(const_cast<char*>(term.constData())); 0079 0080 MDB_val val{0, nullptr}; 0081 int rc = mdb_get(m_txn, m_dbi, &key, &val); 0082 if (rc) { 0083 if (rc != MDB_NOTFOUND) { 0084 qCDebug(ENGINE) << "PostingDB::get" << term << mdb_strerror(rc); 0085 } 0086 return PostingList(); 0087 } 0088 0089 QByteArray arr = QByteArray::fromRawData(static_cast<char*>(val.mv_data), val.mv_size); 0090 0091 return PostingCodec::decode(arr); 0092 } 0093 0094 void PostingDB::del(const QByteArray& term) 0095 { 0096 Q_ASSERT(!term.isEmpty()); 0097 0098 MDB_val key; 0099 key.mv_size = term.size(); 0100 key.mv_data = static_cast<void*>(const_cast<char*>(term.constData())); 0101 0102 int rc = mdb_del(m_txn, m_dbi, &key, nullptr); 0103 if (rc != 0 && rc != MDB_NOTFOUND) { 0104 qCDebug(ENGINE) << "PostingDB::del" << term << mdb_strerror(rc); 0105 } 0106 } 0107 0108 QVector< QByteArray > PostingDB::fetchTermsStartingWith(const QByteArray& term) 0109 { 0110 MDB_val key; 0111 key.mv_size = term.size(); 0112 key.mv_data = static_cast<void*>(const_cast<char*>(term.constData())); 0113 0114 MDB_cursor* cursor; 0115 int rc = mdb_cursor_open(m_txn, m_dbi, &cursor); 0116 if (rc) { 0117 qCWarning(ENGINE) << "PostingDB::fetchTermsStartingWith" << mdb_strerror(rc); 0118 return {}; 0119 } 0120 0121 QVector<QByteArray> terms; 0122 rc = mdb_cursor_get(cursor, &key, nullptr, MDB_SET_RANGE); 0123 while (rc == 0) { 0124 const QByteArray arr(static_cast<char*>(key.mv_data), key.mv_size); 0125 if (!arr.startsWith(term)) { 0126 break; 0127 } 0128 terms << arr; 0129 rc = mdb_cursor_get(cursor, &key, nullptr, MDB_NEXT); 0130 } 0131 if (rc != MDB_NOTFOUND) { 0132 qCDebug(ENGINE) << "PostingDB::fetchTermsStartingWith" << mdb_strerror(rc); 0133 } 0134 0135 mdb_cursor_close(cursor); 0136 return terms; 0137 } 0138 0139 class DBPostingIterator : public PostingIterator { 0140 public: 0141 DBPostingIterator(void* data, uint size); 0142 quint64 docId() const override; 0143 quint64 next() override; 0144 0145 private: 0146 const QVector<quint64> m_vec; 0147 int m_pos; 0148 }; 0149 0150 PostingIterator* PostingDB::iter(const QByteArray& term) 0151 { 0152 MDB_val key; 0153 key.mv_size = term.size(); 0154 key.mv_data = static_cast<void*>(const_cast<char*>(term.constData())); 0155 0156 MDB_val val; 0157 int rc = mdb_get(m_txn, m_dbi, &key, &val); 0158 if (rc) { 0159 qCDebug(ENGINE) << "PostingDB::iter" << term << mdb_strerror(rc); 0160 return nullptr; 0161 } 0162 0163 return new DBPostingIterator(val.mv_data, val.mv_size); 0164 } 0165 0166 // 0167 // Posting Iterator 0168 // 0169 DBPostingIterator::DBPostingIterator(void* data, uint size) 0170 : m_vec(PostingCodec().decode(QByteArray(static_cast<char*>(data), size))) 0171 , m_pos(-1) 0172 { 0173 } 0174 0175 quint64 DBPostingIterator::docId() const 0176 { 0177 if (m_pos < 0 || m_pos >= m_vec.size()) { 0178 return 0; 0179 } 0180 0181 return m_vec[m_pos]; 0182 } 0183 0184 quint64 DBPostingIterator::next() 0185 { 0186 if (m_pos >= m_vec.size() - 1) { 0187 m_pos = m_vec.size(); 0188 return 0; 0189 } 0190 0191 m_pos++; 0192 return m_vec[m_pos]; 0193 } 0194 0195 template <typename Validator> 0196 PostingIterator* PostingDB::iter(const QByteArray& prefix, Validator validate) 0197 { 0198 Q_ASSERT(!prefix.isEmpty()); 0199 0200 MDB_val key; 0201 key.mv_size = prefix.size(); 0202 key.mv_data = static_cast<void*>(const_cast<char*>(prefix.constData())); 0203 0204 MDB_cursor* cursor; 0205 int rc = mdb_cursor_open(m_txn, m_dbi, &cursor); 0206 0207 if (rc) { 0208 qCWarning(ENGINE) << "PostingDB::regexpIter" << mdb_strerror(rc); 0209 return nullptr; 0210 } 0211 0212 QVector<PostingIterator*> termIterators; 0213 0214 MDB_val val; 0215 rc = mdb_cursor_get(cursor, &key, &val, MDB_SET_RANGE); 0216 while (rc == 0) { 0217 const QByteArray arr(static_cast<char*>(key.mv_data), key.mv_size); 0218 if (!arr.startsWith(prefix)) { 0219 break; 0220 } 0221 if (validate(arr)) { 0222 termIterators << new DBPostingIterator(val.mv_data, val.mv_size); 0223 } 0224 rc = mdb_cursor_get(cursor, &key, &val, MDB_NEXT); 0225 } 0226 0227 if (rc != 0 && rc != MDB_NOTFOUND) { 0228 qCWarning(ENGINE) << "PostingDB::regexpIter" << mdb_strerror(rc); 0229 } 0230 0231 mdb_cursor_close(cursor); 0232 if (termIterators.isEmpty()) { 0233 return nullptr; 0234 } 0235 return new OrPostingIterator(termIterators); 0236 } 0237 0238 PostingIterator* PostingDB::prefixIter(const QByteArray& prefix) 0239 { 0240 auto validate = [] (const QByteArray& arr) { 0241 Q_UNUSED(arr); 0242 return true; 0243 }; 0244 return iter(prefix, validate); 0245 } 0246 0247 PostingIterator* PostingDB::regexpIter(const QRegularExpression& regexp, const QByteArray& prefix) 0248 { 0249 int prefixLen = prefix.length(); 0250 auto validate = [®exp, prefixLen] (const QByteArray& arr) { 0251 QString term = QString::fromUtf8(arr.mid(prefixLen)); 0252 return regexp.match(term).hasMatch(); 0253 }; 0254 0255 return iter(prefix, validate); 0256 } 0257 0258 PostingIterator* PostingDB::compIter(const QByteArray& prefix, qlonglong comVal, PostingDB::Comparator com) 0259 { 0260 int prefixLen = prefix.length(); 0261 auto validate = [prefixLen, comVal, com] (const QByteArray& arr) { 0262 bool ok = false; 0263 auto val = QByteArray::fromRawData(arr.constData() + prefixLen, arr.length() - prefixLen).toLongLong(&ok); 0264 return ok && ((com == LessEqual && val <= comVal) || (com == GreaterEqual && val >= comVal)); 0265 }; 0266 return iter(prefix, validate); 0267 } 0268 0269 PostingIterator* PostingDB::compIter(const QByteArray& prefix, double comVal, PostingDB::Comparator com) 0270 { 0271 int prefixLen = prefix.length(); 0272 auto validate = [prefixLen, comVal, com] (const QByteArray& arr) { 0273 bool ok = false; 0274 auto val = QByteArray::fromRawData(arr.constData() + prefixLen, arr.length() - prefixLen).toDouble(&ok); 0275 return ok && ((com == LessEqual && val <= comVal) || 0276 (com == GreaterEqual && val >= comVal)); 0277 }; 0278 return iter(prefix, validate); 0279 } 0280 0281 PostingIterator* PostingDB::compIter(const QByteArray& prefix, const QByteArray& comVal, PostingDB::Comparator com) 0282 { 0283 int prefixLen = prefix.length(); 0284 auto validate = [prefixLen, comVal, com] (const QByteArray& arr) { 0285 auto val = QByteArray::fromRawData(arr.constData() + prefixLen, arr.length() - prefixLen); 0286 return ((com == LessEqual && val <= comVal) || 0287 (com == GreaterEqual && val >= comVal)); 0288 }; 0289 return iter(prefix, validate); 0290 } 0291 0292 QMap<QByteArray, PostingList> PostingDB::toTestMap() const 0293 { 0294 MDB_cursor* cursor; 0295 mdb_cursor_open(m_txn, m_dbi, &cursor); 0296 0297 MDB_val key = {0, nullptr}; 0298 MDB_val val; 0299 0300 QMap<QByteArray, PostingList> map; 0301 while (1) { 0302 int rc = mdb_cursor_get(cursor, &key, &val, MDB_NEXT); 0303 if (rc == MDB_NOTFOUND) { 0304 break; 0305 } 0306 if (rc) { 0307 qCDebug(ENGINE) << "PostingDB::toTestMap" << mdb_strerror(rc); 0308 break; 0309 } 0310 0311 const QByteArray ba(static_cast<char*>(key.mv_data), key.mv_size); 0312 const PostingList plist = PostingCodec::decode(QByteArray(static_cast<char*>(val.mv_data), val.mv_size)); 0313 map.insert(ba, plist); 0314 } 0315 0316 mdb_cursor_close(cursor); 0317 return map; 0318 }