File indexing completed on 2024-05-12 05:25:59

0001 /*
0002  *   Copyright (C) 2018 Christian Mollekopf <mollekopf@kolabsys.com>
0003  *
0004  *   This program is free software; you can redistribute it and/or modify
0005  *   it under the terms of the GNU General Public License as published by
0006  *   the Free Software Foundation; either version 2 of the License, or
0007  *   (at your option) any later version.
0008  *
0009  *   This program is distributed in the hope that it will be useful,
0010  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
0011  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
0012  *   GNU General Public License for more details.
0013  *
0014  *   You should have received a copy of the GNU General Public License
0015  *   along with this program; if not, write to the
0016  *   Free Software Foundation, Inc.,
0017  *   51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
0018  */
0019 //xapian.h needs to be included first to build
0020 #include <xapian.h>
0021 #include "fulltextindex.h"
0022 
0023 #include <QFile>
0024 #include <QElapsedTimer>
0025 #include <QDir>
0026 #include <QDateTime>
0027 
0028 #include "log.h"
0029 #include "definitions.h"
0030 
0031 using Sink::Storage::Identifier;
0032 
0033 static std::map<std::string, std::string> prefixes()
0034 {
0035     return {
0036         {{"subject"}, {"S"}},
0037         {{"recipients"}, {"R"}},
0038         {{"sender"}, {"F"}}
0039     };
0040 }
0041 
0042 FulltextIndex::FulltextIndex(const QByteArray &resourceInstanceIdentifier, Sink::Storage::DataStore::AccessMode accessMode)
0043     : mName("fulltext"),
0044     mDbPath{QFile::encodeName(Sink::resourceStorageLocation(resourceInstanceIdentifier) + '/' + "fulltext")}
0045 {
0046     try {
0047         if (QDir{}.mkpath(mDbPath)) {
0048             if (accessMode == Sink::Storage::DataStore::ReadWrite) {
0049                 mDb = new Xapian::WritableDatabase(mDbPath.toStdString(), Xapian::DB_CREATE_OR_OPEN);
0050             } else {
0051                 mDb = new Xapian::Database(mDbPath.toStdString(), Xapian::DB_OPEN);
0052             }
0053         } else {
0054             SinkError() << "Failed to open database" << mDbPath;
0055         }
0056     } catch (const Xapian::DatabaseError& e) {
0057         SinkError() << "Failed to open database" << mDbPath << ":" << QString::fromStdString(e.get_msg());
0058     }
0059 }
0060 
0061 FulltextIndex::~FulltextIndex()
0062 {
0063     delete mDb;
0064 }
0065 
0066 bool FulltextIndex::exists(const QByteArray &resourceInstanceIdentifier)
0067 {
0068     return QFile{QFile::encodeName(Sink::resourceStorageLocation(resourceInstanceIdentifier) + '/' + "fulltext/iamglass")}.exists();
0069 }
0070 
0071 static std::string idTerm(const Identifier &key)
0072 {
0073     return "Q" + key.toInternalByteArray().toStdString();
0074 }
0075 
0076 void FulltextIndex::add(const Identifier &key, const QString &value, const QDateTime &date)
0077 {
0078     add(key, {{{}, value}}, date);
0079 }
0080 
0081 void FulltextIndex::add(const Identifier &key, const QList<QPair<QString, QString>> &values, const QDateTime &date)
0082 {
0083     if (!mDb) {
0084         return;
0085     }
0086     try {
0087         Xapian::TermGenerator generator;
0088         Xapian::Document document;
0089         generator.set_document(document);
0090 
0091         const auto prefixMap = prefixes();
0092         for (const auto &entry : values) {
0093             if (!entry.second.isEmpty()) {
0094                 const auto prefix = prefixMap.find(entry.first.toStdString());
0095                 if (prefix != prefixMap.end()) {
0096                     generator.index_text(entry.second.toStdString(), 1, prefix->second);
0097                 } else {
0098                     generator.index_text(entry.second.toStdString(), 1);
0099                 }
0100                 //Prevent phrase searches from spanning different indexed parts
0101                 generator.increase_termpos();
0102             }
0103         }
0104         document.add_value(0, key.toInternalByteArray().toStdString());
0105         document.add_value(1, Xapian::sortable_serialise((double)date.toSecsSinceEpoch()));
0106 
0107         const auto idterm = idTerm(key);
0108         document.add_boolean_term(idterm);
0109 
0110         writableDatabase()->replace_document(idterm, document);
0111     }
0112     catch (const Xapian::Error &error) {
0113         SinkError() << "Exception during Xapian commit_transaction:" << error.get_msg().c_str();
0114         //FIXME we should somehow retry the transaction...
0115         Q_ASSERT(false);
0116     }
0117 }
0118 
0119 void FulltextIndex::commitTransaction()
0120 {
0121     if (mHasTransactionOpen) {
0122         Q_ASSERT(mDb);
0123         try {
0124             writableDatabase()->commit_transaction();
0125             mHasTransactionOpen = false;
0126         }
0127         catch (const Xapian::Error &error) {
0128             SinkError() << "Exception during Xapian commit_transaction:" << error.get_msg().c_str();
0129             //FIXME we should somehow retry the transaction...
0130             Q_ASSERT(false);
0131         }
0132     }
0133 }
0134 
0135 void FulltextIndex::abortTransaction()
0136 {
0137     if (mHasTransactionOpen) {
0138         Q_ASSERT(mDb);
0139         try {
0140             writableDatabase()->cancel_transaction();
0141             mHasTransactionOpen = false;
0142         }
0143         catch (const Xapian::Error &error) {
0144             SinkError() << "Exception during Xapian cancel_transaction:" << error.get_msg().c_str();
0145             //FIXME we should somehow retry the transaction...
0146             Q_ASSERT(false);
0147         }
0148     }
0149 }
0150 
0151 Xapian::WritableDatabase* FulltextIndex::writableDatabase()
0152 {
0153     Q_ASSERT(dynamic_cast<Xapian::WritableDatabase*>(mDb));
0154     auto db = static_cast<Xapian::WritableDatabase*>(mDb);
0155     if (!mHasTransactionOpen) {
0156         try {
0157             db->begin_transaction();
0158             mHasTransactionOpen = true;
0159         }
0160         catch (const Xapian::Error &error) {
0161             SinkError() << "Exception during Xapian begin_transaction:" << error.get_msg().c_str();
0162             //FIXME we should somehow retry the transaction...
0163             Q_ASSERT(false);
0164         }
0165     }
0166     return db;
0167 }
0168 
0169 void FulltextIndex::remove(const Identifier &key)
0170 {
0171     if (!mDb) {
0172         return;
0173     }
0174     try {
0175         writableDatabase()->delete_document(idTerm(key));
0176     }
0177     catch (const Xapian::Error &error) {
0178         SinkError() << "Exception during Xapian delete_document:" << error.get_msg().c_str();
0179         //FIXME we should somehow retry the transaction...
0180         Q_ASSERT(false);
0181     }
0182 }
0183 
0184 QVector<Identifier> FulltextIndex::lookup(const QString &searchTerm, const Identifier &entity)
0185 {
0186     if (!mDb) {
0187         return {};
0188     }
0189     QVector<Identifier> results;
0190 
0191     try {
0192         QElapsedTimer time;
0193         time.start();
0194         Xapian::QueryParser parser;
0195         for (const auto& [name, prefix] : prefixes()) {
0196             parser.add_prefix(name, prefix);
0197             //Search through all prefixes by default
0198             parser.add_prefix("", prefix);
0199         }
0200         //Also search through the empty prefix by default
0201         parser.add_prefix("", "");
0202         parser.add_boolean_prefix("identifier", "Q");
0203         parser.set_default_op(Xapian::Query::OP_AND);
0204         parser.set_database(*mDb);
0205         parser.set_max_expansion(100, Xapian::Query::WILDCARD_LIMIT_MOST_FREQUENT, Xapian::QueryParser::FLAG_PARTIAL);
0206         const auto mainQuery = parser.parse_query(searchTerm.toStdString(), Xapian::QueryParser::FLAG_PHRASE|Xapian::QueryParser::FLAG_BOOLEAN|Xapian::QueryParser::FLAG_LOVEHATE|Xapian::QueryParser::FLAG_PARTIAL);
0207         const auto query = [&] {
0208             if (!entity.isNull()) {
0209                 return Xapian::Query{Xapian::Query::OP_AND, Xapian::Query{idTerm(entity)}, mainQuery};
0210             }
0211             return mainQuery;
0212         }();
0213         SinkTrace() << "Running xapian query: " << QString::fromStdString(query.get_description());
0214         Xapian::Enquire enquire(*mDb);
0215         enquire.set_query(query);
0216         enquire.set_sort_by_value_then_relevance(1, true);
0217 
0218         const Xapian::doccount limit = [&] {
0219             switch (searchTerm.size()) {
0220                 case 1:
0221                 case 2:
0222                 case 3:
0223                     return 500;
0224                 default:
0225                     return 20000;
0226             }
0227         }();
0228         Xapian::MSet mset = enquire.get_mset(0, limit);
0229         results.reserve(mset.size());
0230         for (Xapian::MSetIterator it = mset.begin(); it != mset.end(); it++) {
0231             auto doc = it.get_document();
0232             const auto data = doc.get_value(0);
0233             results << Identifier::fromInternalByteArray({data.c_str(), int(data.length())});
0234         }
0235 
0236         SinkTrace() << "Found " << mset.size() << " results, limited to " << limit << " in " << Sink::Log::TraceTime(time.elapsed());
0237         //Print a hint why a query could lack some expected results (Not for small limits because that becomes noisy)
0238         if (searchTerm.size() >= 4 && mset.size() >= limit) {
0239             SinkLog() << "Result set exceeding limit of " << limit << QString::fromStdString(query.get_description());
0240         }
0241     }
0242     catch (const Xapian::Error &) {
0243         // Nothing to do, move along
0244     }
0245     return results;
0246 }
0247 
0248 qint64 FulltextIndex::getDoccount() const
0249 {
0250     if (!mDb) {
0251         return -1;
0252     }
0253     try {
0254         return mDb->get_doccount();
0255     } catch (const Xapian::Error &) {
0256         // Nothing to do, move along
0257     }
0258     return  -1;
0259 }
0260 
0261 FulltextIndex::Result FulltextIndex::getIndexContent(const Identifier &identifier) const
0262 {
0263     if (!mDb) {
0264         {};
0265     }
0266     try {
0267         const auto id = idTerm(identifier);
0268         Xapian::PostingIterator p = mDb->postlist_begin(id);
0269         if (p != mDb->postlist_end(id)) {
0270             auto document = mDb->get_document(*p);
0271             QStringList terms;
0272             for (auto it = document.termlist_begin(); it != document.termlist_end(); it++) {
0273                 terms << QString::fromStdString(*it);
0274             }
0275             return {true, terms};
0276         }
0277     } catch (const Xapian::Error &) {
0278         // Nothing to do, move along
0279     }
0280     return {};
0281 }