File indexing completed on 2024-04-21 03:51:43
0001 /* 0002 This file is part of the KDE Baloo project. 0003 SPDX-FileCopyrightText: 2015 Vishesh Handa <vhanda@kde.org> 0004 0005 SPDX-License-Identifier: LGPL-2.1-or-later 0006 */ 0007 0008 #include "writetransaction.h" 0009 #include "transaction.h" 0010 0011 #include "postingdb.h" 0012 #include "documentdb.h" 0013 #include "documentiddb.h" 0014 #include "positiondb.h" 0015 #include "documenttimedb.h" 0016 #include "documentdatadb.h" 0017 #include "mtimedb.h" 0018 #include "idutils.h" 0019 0020 using namespace Baloo; 0021 0022 void WriteTransaction::addDocument(const Document& doc) 0023 { 0024 quint64 id = doc.id(); 0025 0026 DocumentDB documentTermsDB(m_dbis.docTermsDbi, m_txn); 0027 DocumentDB documentXattrTermsDB(m_dbis.docXattrTermsDbi, m_txn); 0028 DocumentDB documentFileNameTermsDB(m_dbis.docFilenameTermsDbi, m_txn); 0029 DocumentTimeDB docTimeDB(m_dbis.docTimeDbi, m_txn); 0030 DocumentDataDB docDataDB(m_dbis.docDataDbi, m_txn); 0031 DocumentIdDB contentIndexingDB(m_dbis.contentIndexingDbi, m_txn); 0032 MTimeDB mtimeDB(m_dbis.mtimeDbi, m_txn); 0033 DocumentUrlDB docUrlDB(m_dbis.idTreeDbi, m_dbis.idFilenameDbi, m_txn); 0034 0035 Q_ASSERT(!documentTermsDB.contains(id)); 0036 Q_ASSERT(!documentXattrTermsDB.contains(id)); 0037 Q_ASSERT(!documentFileNameTermsDB.contains(id)); 0038 Q_ASSERT(!docTimeDB.contains(id)); 0039 Q_ASSERT(!docDataDB.contains(id)); 0040 Q_ASSERT(!contentIndexingDB.contains(id)); 0041 Q_ASSERT(doc.parentId()); 0042 0043 { 0044 auto url = doc.url(); 0045 int pos = url.lastIndexOf('/'); 0046 auto filename = url.mid(pos + 1); 0047 auto parentId = doc.parentId(); 0048 0049 if (pos > 0) { 0050 docUrlDB.addPath(url.left(pos)); 0051 } else { 0052 parentId = 0; 0053 } 0054 0055 if (!docUrlDB.put(id, parentId, filename)) { 0056 return; 0057 } 0058 } 0059 0060 QVector<QByteArray> docTerms = addTerms(id, doc.m_terms); 0061 Q_ASSERT(!docTerms.empty()); 0062 documentTermsDB.put(id, docTerms); 0063 0064 QVector<QByteArray> docXattrTerms = addTerms(id, doc.m_xattrTerms); 0065 if (!docXattrTerms.isEmpty()) { 0066 documentXattrTermsDB.put(id, docXattrTerms); 0067 } 0068 0069 QVector<QByteArray> docFileNameTerms = addTerms(id, doc.m_fileNameTerms); 0070 if (!docFileNameTerms.isEmpty()) { 0071 documentFileNameTermsDB.put(id, docFileNameTerms); 0072 } 0073 0074 if (doc.contentIndexing()) { 0075 contentIndexingDB.put(doc.id()); 0076 } 0077 0078 DocumentTimeDB::TimeInfo info; 0079 info.mTime = doc.m_mTime; 0080 info.cTime = doc.m_cTime; 0081 0082 docTimeDB.put(id, info); 0083 mtimeDB.put(doc.m_mTime, id); 0084 0085 if (!doc.m_data.isEmpty()) { 0086 docDataDB.put(id, doc.m_data); 0087 } 0088 } 0089 0090 QVector<QByteArray> WriteTransaction::addTerms(quint64 id, const QMap<QByteArray, Document::TermData>& terms) 0091 { 0092 QVector<QByteArray> termList; 0093 termList.reserve(terms.size()); 0094 m_pendingOperations.reserve(m_pendingOperations.size() + terms.size()); 0095 0096 for (auto it = terms.cbegin(), end = terms.cend(); it != end; ++it) { 0097 const QByteArray& term = it.key(); 0098 termList.append(term); 0099 0100 Operation op; 0101 op.type = AddId; 0102 op.data.docId = id; 0103 op.data.positions = it.value().positions; 0104 0105 m_pendingOperations[term].append(op); 0106 } 0107 0108 return termList; 0109 } 0110 0111 void WriteTransaction::removeDocument(quint64 id) 0112 { 0113 DocumentDB documentTermsDB(m_dbis.docTermsDbi, m_txn); 0114 DocumentDB documentXattrTermsDB(m_dbis.docXattrTermsDbi, m_txn); 0115 DocumentDB documentFileNameTermsDB(m_dbis.docFilenameTermsDbi, m_txn); 0116 DocumentTimeDB docTimeDB(m_dbis.docTimeDbi, m_txn); 0117 DocumentDataDB docDataDB(m_dbis.docDataDbi, m_txn); 0118 DocumentIdDB contentIndexingDB(m_dbis.contentIndexingDbi, m_txn); 0119 DocumentIdDB failedIndexingDB(m_dbis.failedIdDbi, m_txn); 0120 MTimeDB mtimeDB(m_dbis.mtimeDbi, m_txn); 0121 DocumentUrlDB docUrlDB(m_dbis.idTreeDbi, m_dbis.idFilenameDbi, m_txn); 0122 0123 removeTerms(id, documentTermsDB.get(id)); 0124 removeTerms(id, documentFileNameTermsDB.get(id)); 0125 if (documentXattrTermsDB.contains(id)) { 0126 removeTerms(id, documentXattrTermsDB.get(id)); 0127 } 0128 0129 documentTermsDB.del(id); 0130 documentXattrTermsDB.del(id); 0131 documentFileNameTermsDB.del(id); 0132 0133 docUrlDB.del(id); 0134 0135 contentIndexingDB.del(id); 0136 failedIndexingDB.del(id); 0137 0138 DocumentTimeDB::TimeInfo info = docTimeDB.get(id); 0139 docTimeDB.del(id); 0140 mtimeDB.del(info.mTime, id); 0141 0142 docDataDB.del(id); 0143 } 0144 0145 void WriteTransaction::removeTerms(quint64 id, const QVector<QByteArray>& terms) 0146 { 0147 for (const QByteArray& term : terms) { 0148 Operation op; 0149 op.type = RemoveId; 0150 op.data.docId = id; 0151 0152 m_pendingOperations[term].append(op); 0153 } 0154 } 0155 0156 void WriteTransaction::removeRecursively(quint64 parentId) 0157 { 0158 DocumentUrlDB docUrlDB(m_dbis.idTreeDbi, m_dbis.idFilenameDbi, m_txn); 0159 0160 const QVector<quint64> children = docUrlDB.getChildren(parentId); 0161 for (quint64 id : children) { 0162 if (id) { 0163 removeRecursively(id); 0164 } 0165 } 0166 removeDocument(parentId); 0167 } 0168 0169 bool WriteTransaction::removeRecursively(quint64 parentId, const std::function<bool(quint64)> &shouldDelete) 0170 { 0171 DocumentUrlDB docUrlDB(m_dbis.idTreeDbi, m_dbis.idFilenameDbi, m_txn); 0172 0173 if (parentId && !shouldDelete(parentId)) { 0174 return false; 0175 } 0176 0177 bool isEmpty = true; 0178 const QVector<quint64> children = docUrlDB.getChildren(parentId); 0179 for (quint64 id : children) { 0180 isEmpty &= removeRecursively(id, shouldDelete); 0181 } 0182 // refetch 0183 if (isEmpty && docUrlDB.getChildren(parentId).isEmpty()) { 0184 removeDocument(parentId); 0185 return true; 0186 } 0187 return false; 0188 } 0189 0190 void WriteTransaction::replaceDocument(const Document& doc, DocumentOperations operations) 0191 { 0192 DocumentDB documentTermsDB(m_dbis.docTermsDbi, m_txn); 0193 DocumentDB documentXattrTermsDB(m_dbis.docXattrTermsDbi, m_txn); 0194 DocumentDB documentFileNameTermsDB(m_dbis.docFilenameTermsDbi, m_txn); 0195 DocumentTimeDB docTimeDB(m_dbis.docTimeDbi, m_txn); 0196 DocumentDataDB docDataDB(m_dbis.docDataDbi, m_txn); 0197 DocumentIdDB contentIndexingDB(m_dbis.contentIndexingDbi, m_txn); 0198 MTimeDB mtimeDB(m_dbis.mtimeDbi, m_txn); 0199 DocumentUrlDB docUrlDB(m_dbis.idTreeDbi, m_dbis.idFilenameDbi, m_txn); 0200 0201 const quint64 id = doc.id(); 0202 0203 if (operations & DocumentTerms) { 0204 Q_ASSERT(!doc.m_terms.isEmpty()); 0205 QVector<QByteArray> prevTerms = documentTermsDB.get(id); 0206 QVector<QByteArray> docTerms = replaceTerms(id, prevTerms, doc.m_terms); 0207 0208 if (docTerms != prevTerms) { 0209 documentTermsDB.put(id, docTerms); 0210 } 0211 } 0212 0213 if (operations & XAttrTerms) { 0214 QVector<QByteArray> prevTerms = documentXattrTermsDB.get(id); 0215 QVector<QByteArray> docXattrTerms = replaceTerms(id, prevTerms, doc.m_xattrTerms); 0216 0217 if (docXattrTerms != prevTerms) { 0218 if (!docXattrTerms.isEmpty()) { 0219 documentXattrTermsDB.put(id, docXattrTerms); 0220 } else { 0221 documentXattrTermsDB.del(id); 0222 } 0223 } 0224 } 0225 0226 if (operations & FileNameTerms) { 0227 QVector<QByteArray> prevTerms = documentFileNameTermsDB.get(id); 0228 QVector<QByteArray> docFileNameTerms = replaceTerms(id, prevTerms, doc.m_fileNameTerms); 0229 0230 if (docFileNameTerms != prevTerms) { 0231 if (!docFileNameTerms.isEmpty()) { 0232 documentFileNameTermsDB.put(id, docFileNameTerms); 0233 } else { 0234 documentFileNameTermsDB.del(id); 0235 } 0236 } 0237 } 0238 0239 if (doc.contentIndexing()) { 0240 contentIndexingDB.put(doc.id()); 0241 } 0242 0243 if (operations & DocumentTime) { 0244 DocumentTimeDB::TimeInfo info = docTimeDB.get(id); 0245 if (info.mTime != doc.m_mTime) { 0246 mtimeDB.del(info.mTime, id); 0247 mtimeDB.put(doc.m_mTime, id); 0248 } 0249 0250 info.mTime = doc.m_mTime; 0251 info.cTime = doc.m_cTime; 0252 docTimeDB.put(id, info); 0253 } 0254 0255 if (operations & DocumentData) { 0256 if (!doc.m_data.isEmpty()) { 0257 docDataDB.put(id, doc.m_data); 0258 } else { 0259 docDataDB.del(id); 0260 } 0261 } 0262 0263 if (operations & DocumentUrl) { 0264 auto url = doc.url(); 0265 int pos = url.lastIndexOf('/'); 0266 auto newname = url.mid(pos + 1); 0267 docUrlDB.updateUrl(doc.id(), doc.parentId(), newname); 0268 } 0269 } 0270 0271 QVector< QByteArray > WriteTransaction::replaceTerms(quint64 id, const QVector<QByteArray>& prevTerms, 0272 const QMap<QByteArray, Document::TermData>& terms) 0273 { 0274 m_pendingOperations.reserve(m_pendingOperations.size() + prevTerms.size() + terms.size()); 0275 for (const QByteArray& term : prevTerms) { 0276 Operation op; 0277 op.type = RemoveId; 0278 op.data.docId = id; 0279 0280 m_pendingOperations[term].append(op); 0281 } 0282 0283 return addTerms(id, terms); 0284 } 0285 0286 void WriteTransaction::commit() 0287 { 0288 PostingDB postingDB(m_dbis.postingDbi, m_txn); 0289 PositionDB positionDB(m_dbis.positionDBi, m_txn); 0290 0291 QHashIterator<QByteArray, QVector<Operation> > iter(m_pendingOperations); 0292 while (iter.hasNext()) { 0293 iter.next(); 0294 0295 const QByteArray& term = iter.key(); 0296 const QVector<Operation> operations = iter.value(); 0297 0298 PostingList list = postingDB.get(term); 0299 0300 bool fetchedPositionList = false; 0301 QVector<PositionInfo> positionList; 0302 0303 for (const Operation& op : operations) { 0304 quint64 id = op.data.docId; 0305 0306 if (op.type == AddId) { 0307 sortedIdInsert(list, id); 0308 0309 if (!op.data.positions.isEmpty()) { 0310 if (!fetchedPositionList) { 0311 positionList = positionDB.get(term); 0312 fetchedPositionList = true; 0313 } 0314 sortedIdInsert(positionList, op.data); 0315 } 0316 } 0317 else { 0318 sortedIdRemove(list, id); 0319 if (!fetchedPositionList) { 0320 positionList = positionDB.get(term); 0321 fetchedPositionList = true; 0322 } 0323 sortedIdRemove(positionList, PositionInfo(id)); 0324 } 0325 } 0326 0327 if (!list.isEmpty()) { 0328 postingDB.put(term, list); 0329 } else { 0330 postingDB.del(term); 0331 } 0332 0333 if (fetchedPositionList) { 0334 if (!positionList.isEmpty()) { 0335 positionDB.put(term, positionList); 0336 } else { 0337 positionDB.del(term); 0338 } 0339 } 0340 } 0341 0342 m_pendingOperations.clear(); 0343 }