File indexing completed on 2024-04-21 03:51:43

0001 /*
0002     This file is part of the KDE Baloo project.
0003     SPDX-FileCopyrightText: 2015 Vishesh Handa <vhanda@kde.org>
0004 
0005     SPDX-License-Identifier: LGPL-2.1-or-later
0006 */
0007 
0008 #include "writetransaction.h"
0009 #include "transaction.h"
0010 
0011 #include "postingdb.h"
0012 #include "documentdb.h"
0013 #include "documentiddb.h"
0014 #include "positiondb.h"
0015 #include "documenttimedb.h"
0016 #include "documentdatadb.h"
0017 #include "mtimedb.h"
0018 #include "idutils.h"
0019 
0020 using namespace Baloo;
0021 
0022 void WriteTransaction::addDocument(const Document& doc)
0023 {
0024     quint64 id = doc.id();
0025 
0026     DocumentDB documentTermsDB(m_dbis.docTermsDbi, m_txn);
0027     DocumentDB documentXattrTermsDB(m_dbis.docXattrTermsDbi, m_txn);
0028     DocumentDB documentFileNameTermsDB(m_dbis.docFilenameTermsDbi, m_txn);
0029     DocumentTimeDB docTimeDB(m_dbis.docTimeDbi, m_txn);
0030     DocumentDataDB docDataDB(m_dbis.docDataDbi, m_txn);
0031     DocumentIdDB contentIndexingDB(m_dbis.contentIndexingDbi, m_txn);
0032     MTimeDB mtimeDB(m_dbis.mtimeDbi, m_txn);
0033     DocumentUrlDB docUrlDB(m_dbis.idTreeDbi, m_dbis.idFilenameDbi, m_txn);
0034 
0035     Q_ASSERT(!documentTermsDB.contains(id));
0036     Q_ASSERT(!documentXattrTermsDB.contains(id));
0037     Q_ASSERT(!documentFileNameTermsDB.contains(id));
0038     Q_ASSERT(!docTimeDB.contains(id));
0039     Q_ASSERT(!docDataDB.contains(id));
0040     Q_ASSERT(!contentIndexingDB.contains(id));
0041     Q_ASSERT(doc.parentId());
0042 
0043     {
0044         auto url = doc.url();
0045         int pos = url.lastIndexOf('/');
0046         auto filename = url.mid(pos + 1);
0047         auto parentId = doc.parentId();
0048 
0049         if (pos > 0) {
0050             docUrlDB.addPath(url.left(pos));
0051         } else {
0052             parentId = 0;
0053         }
0054 
0055         if (!docUrlDB.put(id, parentId, filename)) {
0056             return;
0057         }
0058     }
0059 
0060     QVector<QByteArray> docTerms = addTerms(id, doc.m_terms);
0061     Q_ASSERT(!docTerms.empty());
0062     documentTermsDB.put(id, docTerms);
0063 
0064     QVector<QByteArray> docXattrTerms = addTerms(id, doc.m_xattrTerms);
0065     if (!docXattrTerms.isEmpty()) {
0066         documentXattrTermsDB.put(id, docXattrTerms);
0067     }
0068 
0069     QVector<QByteArray> docFileNameTerms = addTerms(id, doc.m_fileNameTerms);
0070     if (!docFileNameTerms.isEmpty()) {
0071         documentFileNameTermsDB.put(id, docFileNameTerms);
0072     }
0073 
0074     if (doc.contentIndexing()) {
0075         contentIndexingDB.put(doc.id());
0076     }
0077 
0078     DocumentTimeDB::TimeInfo info;
0079     info.mTime = doc.m_mTime;
0080     info.cTime = doc.m_cTime;
0081 
0082     docTimeDB.put(id, info);
0083     mtimeDB.put(doc.m_mTime, id);
0084 
0085     if (!doc.m_data.isEmpty()) {
0086         docDataDB.put(id, doc.m_data);
0087     }
0088 }
0089 
0090 QVector<QByteArray> WriteTransaction::addTerms(quint64 id, const QMap<QByteArray, Document::TermData>& terms)
0091 {
0092     QVector<QByteArray> termList;
0093     termList.reserve(terms.size());
0094     m_pendingOperations.reserve(m_pendingOperations.size() + terms.size());
0095 
0096     for (auto it = terms.cbegin(), end = terms.cend(); it != end; ++it) {
0097         const QByteArray& term = it.key();
0098         termList.append(term);
0099 
0100         Operation op;
0101         op.type = AddId;
0102         op.data.docId = id;
0103         op.data.positions = it.value().positions;
0104 
0105         m_pendingOperations[term].append(op);
0106     }
0107 
0108     return termList;
0109 }
0110 
0111 void WriteTransaction::removeDocument(quint64 id)
0112 {
0113     DocumentDB documentTermsDB(m_dbis.docTermsDbi, m_txn);
0114     DocumentDB documentXattrTermsDB(m_dbis.docXattrTermsDbi, m_txn);
0115     DocumentDB documentFileNameTermsDB(m_dbis.docFilenameTermsDbi, m_txn);
0116     DocumentTimeDB docTimeDB(m_dbis.docTimeDbi, m_txn);
0117     DocumentDataDB docDataDB(m_dbis.docDataDbi, m_txn);
0118     DocumentIdDB contentIndexingDB(m_dbis.contentIndexingDbi, m_txn);
0119     DocumentIdDB failedIndexingDB(m_dbis.failedIdDbi, m_txn);
0120     MTimeDB mtimeDB(m_dbis.mtimeDbi, m_txn);
0121     DocumentUrlDB docUrlDB(m_dbis.idTreeDbi, m_dbis.idFilenameDbi, m_txn);
0122 
0123     removeTerms(id, documentTermsDB.get(id));
0124     removeTerms(id, documentFileNameTermsDB.get(id));
0125     if (documentXattrTermsDB.contains(id)) {
0126         removeTerms(id, documentXattrTermsDB.get(id));
0127     }
0128 
0129     documentTermsDB.del(id);
0130     documentXattrTermsDB.del(id);
0131     documentFileNameTermsDB.del(id);
0132 
0133     docUrlDB.del(id);
0134 
0135     contentIndexingDB.del(id);
0136     failedIndexingDB.del(id);
0137 
0138     DocumentTimeDB::TimeInfo info = docTimeDB.get(id);
0139     docTimeDB.del(id);
0140     mtimeDB.del(info.mTime, id);
0141 
0142     docDataDB.del(id);
0143 }
0144 
0145 void WriteTransaction::removeTerms(quint64 id, const QVector<QByteArray>& terms)
0146 {
0147     for (const QByteArray& term : terms) {
0148         Operation op;
0149         op.type = RemoveId;
0150         op.data.docId = id;
0151 
0152         m_pendingOperations[term].append(op);
0153     }
0154 }
0155 
0156 void WriteTransaction::removeRecursively(quint64 parentId)
0157 {
0158     DocumentUrlDB docUrlDB(m_dbis.idTreeDbi, m_dbis.idFilenameDbi, m_txn);
0159 
0160     const QVector<quint64> children = docUrlDB.getChildren(parentId);
0161     for (quint64 id : children) {
0162         if (id) {
0163             removeRecursively(id);
0164         }
0165     }
0166     removeDocument(parentId);
0167 }
0168 
0169 bool WriteTransaction::removeRecursively(quint64 parentId, const std::function<bool(quint64)> &shouldDelete)
0170 {
0171     DocumentUrlDB docUrlDB(m_dbis.idTreeDbi, m_dbis.idFilenameDbi, m_txn);
0172 
0173     if (parentId && !shouldDelete(parentId)) {
0174         return false;
0175     }
0176 
0177     bool isEmpty = true;
0178     const QVector<quint64> children = docUrlDB.getChildren(parentId);
0179     for (quint64 id : children) {
0180         isEmpty &= removeRecursively(id, shouldDelete);
0181     }
0182     // refetch
0183     if (isEmpty && docUrlDB.getChildren(parentId).isEmpty()) {
0184         removeDocument(parentId);
0185         return true;
0186     }
0187     return false;
0188 }
0189 
0190 void WriteTransaction::replaceDocument(const Document& doc, DocumentOperations operations)
0191 {
0192     DocumentDB documentTermsDB(m_dbis.docTermsDbi, m_txn);
0193     DocumentDB documentXattrTermsDB(m_dbis.docXattrTermsDbi, m_txn);
0194     DocumentDB documentFileNameTermsDB(m_dbis.docFilenameTermsDbi, m_txn);
0195     DocumentTimeDB docTimeDB(m_dbis.docTimeDbi, m_txn);
0196     DocumentDataDB docDataDB(m_dbis.docDataDbi, m_txn);
0197     DocumentIdDB contentIndexingDB(m_dbis.contentIndexingDbi, m_txn);
0198     MTimeDB mtimeDB(m_dbis.mtimeDbi, m_txn);
0199     DocumentUrlDB docUrlDB(m_dbis.idTreeDbi, m_dbis.idFilenameDbi, m_txn);
0200 
0201     const quint64 id = doc.id();
0202 
0203     if (operations & DocumentTerms) {
0204         Q_ASSERT(!doc.m_terms.isEmpty());
0205         QVector<QByteArray> prevTerms = documentTermsDB.get(id);
0206         QVector<QByteArray> docTerms = replaceTerms(id, prevTerms, doc.m_terms);
0207 
0208         if (docTerms != prevTerms) {
0209             documentTermsDB.put(id, docTerms);
0210         }
0211     }
0212 
0213     if (operations & XAttrTerms) {
0214         QVector<QByteArray> prevTerms = documentXattrTermsDB.get(id);
0215         QVector<QByteArray> docXattrTerms = replaceTerms(id, prevTerms, doc.m_xattrTerms);
0216 
0217         if (docXattrTerms != prevTerms) {
0218             if (!docXattrTerms.isEmpty()) {
0219                 documentXattrTermsDB.put(id, docXattrTerms);
0220             } else {
0221                 documentXattrTermsDB.del(id);
0222             }
0223         }
0224     }
0225 
0226     if (operations & FileNameTerms) {
0227         QVector<QByteArray> prevTerms = documentFileNameTermsDB.get(id);
0228         QVector<QByteArray> docFileNameTerms = replaceTerms(id, prevTerms, doc.m_fileNameTerms);
0229 
0230         if (docFileNameTerms != prevTerms) {
0231             if (!docFileNameTerms.isEmpty()) {
0232                 documentFileNameTermsDB.put(id, docFileNameTerms);
0233             } else {
0234                 documentFileNameTermsDB.del(id);
0235             }
0236         }
0237     }
0238 
0239     if (doc.contentIndexing()) {
0240         contentIndexingDB.put(doc.id());
0241     }
0242 
0243     if (operations & DocumentTime) {
0244         DocumentTimeDB::TimeInfo info = docTimeDB.get(id);
0245         if (info.mTime != doc.m_mTime) {
0246             mtimeDB.del(info.mTime, id);
0247             mtimeDB.put(doc.m_mTime, id);
0248         }
0249 
0250         info.mTime = doc.m_mTime;
0251         info.cTime = doc.m_cTime;
0252         docTimeDB.put(id, info);
0253     }
0254 
0255     if (operations & DocumentData) {
0256         if (!doc.m_data.isEmpty()) {
0257             docDataDB.put(id, doc.m_data);
0258         } else {
0259             docDataDB.del(id);
0260         }
0261     }
0262 
0263     if (operations & DocumentUrl) {
0264         auto url = doc.url();
0265         int pos = url.lastIndexOf('/');
0266         auto newname = url.mid(pos + 1);
0267         docUrlDB.updateUrl(doc.id(), doc.parentId(), newname);
0268     }
0269 }
0270 
0271 QVector< QByteArray > WriteTransaction::replaceTerms(quint64 id, const QVector<QByteArray>& prevTerms,
0272                                                      const QMap<QByteArray, Document::TermData>& terms)
0273 {
0274     m_pendingOperations.reserve(m_pendingOperations.size() + prevTerms.size() + terms.size());
0275     for (const QByteArray& term : prevTerms) {
0276         Operation op;
0277         op.type = RemoveId;
0278         op.data.docId = id;
0279 
0280         m_pendingOperations[term].append(op);
0281     }
0282 
0283     return addTerms(id, terms);
0284 }
0285 
0286 void WriteTransaction::commit()
0287 {
0288     PostingDB postingDB(m_dbis.postingDbi, m_txn);
0289     PositionDB positionDB(m_dbis.positionDBi, m_txn);
0290 
0291     QHashIterator<QByteArray, QVector<Operation> > iter(m_pendingOperations);
0292     while (iter.hasNext()) {
0293         iter.next();
0294 
0295         const QByteArray& term = iter.key();
0296         const QVector<Operation> operations = iter.value();
0297 
0298         PostingList list = postingDB.get(term);
0299 
0300         bool fetchedPositionList = false;
0301         QVector<PositionInfo> positionList;
0302 
0303         for (const Operation& op : operations) {
0304             quint64 id = op.data.docId;
0305 
0306             if (op.type == AddId) {
0307                 sortedIdInsert(list, id);
0308 
0309                 if (!op.data.positions.isEmpty()) {
0310                     if (!fetchedPositionList) {
0311                         positionList = positionDB.get(term);
0312                         fetchedPositionList = true;
0313                     }
0314                     sortedIdInsert(positionList, op.data);
0315                 }
0316             }
0317             else {
0318                 sortedIdRemove(list, id);
0319                 if (!fetchedPositionList) {
0320                     positionList = positionDB.get(term);
0321                     fetchedPositionList = true;
0322                 }
0323                 sortedIdRemove(positionList, PositionInfo(id));
0324             }
0325         }
0326 
0327         if (!list.isEmpty()) {
0328             postingDB.put(term, list);
0329         } else {
0330             postingDB.del(term);
0331         }
0332 
0333         if (fetchedPositionList) {
0334             if (!positionList.isEmpty()) {
0335                 positionDB.put(term, positionList);
0336             } else {
0337                 positionDB.del(term);
0338             }
0339         }
0340     }
0341 
0342     m_pendingOperations.clear();
0343 }