File indexing completed on 2024-05-12 05:25:59
0001 /* 0002 * Copyright (C) 2018 Christian Mollekopf <mollekopf@kolabsys.com> 0003 * 0004 * This program is free software; you can redistribute it and/or modify 0005 * it under the terms of the GNU General Public License as published by 0006 * the Free Software Foundation; either version 2 of the License, or 0007 * (at your option) any later version. 0008 * 0009 * This program is distributed in the hope that it will be useful, 0010 * but WITHOUT ANY WARRANTY; without even the implied warranty of 0011 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 0012 * GNU General Public License for more details. 0013 * 0014 * You should have received a copy of the GNU General Public License 0015 * along with this program; if not, write to the 0016 * Free Software Foundation, Inc., 0017 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 0018 */ 0019 //xapian.h needs to be included first to build 0020 #include <xapian.h> 0021 #include "fulltextindex.h" 0022 0023 #include <QFile> 0024 #include <QElapsedTimer> 0025 #include <QDir> 0026 #include <QDateTime> 0027 0028 #include "log.h" 0029 #include "definitions.h" 0030 0031 using Sink::Storage::Identifier; 0032 0033 static std::map<std::string, std::string> prefixes() 0034 { 0035 return { 0036 {{"subject"}, {"S"}}, 0037 {{"recipients"}, {"R"}}, 0038 {{"sender"}, {"F"}} 0039 }; 0040 } 0041 0042 FulltextIndex::FulltextIndex(const QByteArray &resourceInstanceIdentifier, Sink::Storage::DataStore::AccessMode accessMode) 0043 : mName("fulltext"), 0044 mDbPath{QFile::encodeName(Sink::resourceStorageLocation(resourceInstanceIdentifier) + '/' + "fulltext")} 0045 { 0046 try { 0047 if (QDir{}.mkpath(mDbPath)) { 0048 if (accessMode == Sink::Storage::DataStore::ReadWrite) { 0049 mDb = new Xapian::WritableDatabase(mDbPath.toStdString(), Xapian::DB_CREATE_OR_OPEN); 0050 } else { 0051 mDb = new Xapian::Database(mDbPath.toStdString(), Xapian::DB_OPEN); 0052 } 0053 } else { 0054 SinkError() << "Failed to open database" << mDbPath; 0055 } 0056 } catch (const Xapian::DatabaseError& e) { 0057 SinkError() << "Failed to open database" << mDbPath << ":" << QString::fromStdString(e.get_msg()); 0058 } 0059 } 0060 0061 FulltextIndex::~FulltextIndex() 0062 { 0063 delete mDb; 0064 } 0065 0066 bool FulltextIndex::exists(const QByteArray &resourceInstanceIdentifier) 0067 { 0068 return QFile{QFile::encodeName(Sink::resourceStorageLocation(resourceInstanceIdentifier) + '/' + "fulltext/iamglass")}.exists(); 0069 } 0070 0071 static std::string idTerm(const Identifier &key) 0072 { 0073 return "Q" + key.toInternalByteArray().toStdString(); 0074 } 0075 0076 void FulltextIndex::add(const Identifier &key, const QString &value, const QDateTime &date) 0077 { 0078 add(key, {{{}, value}}, date); 0079 } 0080 0081 void FulltextIndex::add(const Identifier &key, const QList<QPair<QString, QString>> &values, const QDateTime &date) 0082 { 0083 if (!mDb) { 0084 return; 0085 } 0086 try { 0087 Xapian::TermGenerator generator; 0088 Xapian::Document document; 0089 generator.set_document(document); 0090 0091 const auto prefixMap = prefixes(); 0092 for (const auto &entry : values) { 0093 if (!entry.second.isEmpty()) { 0094 const auto prefix = prefixMap.find(entry.first.toStdString()); 0095 if (prefix != prefixMap.end()) { 0096 generator.index_text(entry.second.toStdString(), 1, prefix->second); 0097 } else { 0098 generator.index_text(entry.second.toStdString(), 1); 0099 } 0100 //Prevent phrase searches from spanning different indexed parts 0101 generator.increase_termpos(); 0102 } 0103 } 0104 document.add_value(0, key.toInternalByteArray().toStdString()); 0105 document.add_value(1, Xapian::sortable_serialise((double)date.toSecsSinceEpoch())); 0106 0107 const auto idterm = idTerm(key); 0108 document.add_boolean_term(idterm); 0109 0110 writableDatabase()->replace_document(idterm, document); 0111 } 0112 catch (const Xapian::Error &error) { 0113 SinkError() << "Exception during Xapian commit_transaction:" << error.get_msg().c_str(); 0114 //FIXME we should somehow retry the transaction... 0115 Q_ASSERT(false); 0116 } 0117 } 0118 0119 void FulltextIndex::commitTransaction() 0120 { 0121 if (mHasTransactionOpen) { 0122 Q_ASSERT(mDb); 0123 try { 0124 writableDatabase()->commit_transaction(); 0125 mHasTransactionOpen = false; 0126 } 0127 catch (const Xapian::Error &error) { 0128 SinkError() << "Exception during Xapian commit_transaction:" << error.get_msg().c_str(); 0129 //FIXME we should somehow retry the transaction... 0130 Q_ASSERT(false); 0131 } 0132 } 0133 } 0134 0135 void FulltextIndex::abortTransaction() 0136 { 0137 if (mHasTransactionOpen) { 0138 Q_ASSERT(mDb); 0139 try { 0140 writableDatabase()->cancel_transaction(); 0141 mHasTransactionOpen = false; 0142 } 0143 catch (const Xapian::Error &error) { 0144 SinkError() << "Exception during Xapian cancel_transaction:" << error.get_msg().c_str(); 0145 //FIXME we should somehow retry the transaction... 0146 Q_ASSERT(false); 0147 } 0148 } 0149 } 0150 0151 Xapian::WritableDatabase* FulltextIndex::writableDatabase() 0152 { 0153 Q_ASSERT(dynamic_cast<Xapian::WritableDatabase*>(mDb)); 0154 auto db = static_cast<Xapian::WritableDatabase*>(mDb); 0155 if (!mHasTransactionOpen) { 0156 try { 0157 db->begin_transaction(); 0158 mHasTransactionOpen = true; 0159 } 0160 catch (const Xapian::Error &error) { 0161 SinkError() << "Exception during Xapian begin_transaction:" << error.get_msg().c_str(); 0162 //FIXME we should somehow retry the transaction... 0163 Q_ASSERT(false); 0164 } 0165 } 0166 return db; 0167 } 0168 0169 void FulltextIndex::remove(const Identifier &key) 0170 { 0171 if (!mDb) { 0172 return; 0173 } 0174 try { 0175 writableDatabase()->delete_document(idTerm(key)); 0176 } 0177 catch (const Xapian::Error &error) { 0178 SinkError() << "Exception during Xapian delete_document:" << error.get_msg().c_str(); 0179 //FIXME we should somehow retry the transaction... 0180 Q_ASSERT(false); 0181 } 0182 } 0183 0184 QVector<Identifier> FulltextIndex::lookup(const QString &searchTerm, const Identifier &entity) 0185 { 0186 if (!mDb) { 0187 return {}; 0188 } 0189 QVector<Identifier> results; 0190 0191 try { 0192 QElapsedTimer time; 0193 time.start(); 0194 Xapian::QueryParser parser; 0195 for (const auto& [name, prefix] : prefixes()) { 0196 parser.add_prefix(name, prefix); 0197 //Search through all prefixes by default 0198 parser.add_prefix("", prefix); 0199 } 0200 //Also search through the empty prefix by default 0201 parser.add_prefix("", ""); 0202 parser.add_boolean_prefix("identifier", "Q"); 0203 parser.set_default_op(Xapian::Query::OP_AND); 0204 parser.set_database(*mDb); 0205 parser.set_max_expansion(100, Xapian::Query::WILDCARD_LIMIT_MOST_FREQUENT, Xapian::QueryParser::FLAG_PARTIAL); 0206 const auto mainQuery = parser.parse_query(searchTerm.toStdString(), Xapian::QueryParser::FLAG_PHRASE|Xapian::QueryParser::FLAG_BOOLEAN|Xapian::QueryParser::FLAG_LOVEHATE|Xapian::QueryParser::FLAG_PARTIAL); 0207 const auto query = [&] { 0208 if (!entity.isNull()) { 0209 return Xapian::Query{Xapian::Query::OP_AND, Xapian::Query{idTerm(entity)}, mainQuery}; 0210 } 0211 return mainQuery; 0212 }(); 0213 SinkTrace() << "Running xapian query: " << QString::fromStdString(query.get_description()); 0214 Xapian::Enquire enquire(*mDb); 0215 enquire.set_query(query); 0216 enquire.set_sort_by_value_then_relevance(1, true); 0217 0218 const Xapian::doccount limit = [&] { 0219 switch (searchTerm.size()) { 0220 case 1: 0221 case 2: 0222 case 3: 0223 return 500; 0224 default: 0225 return 20000; 0226 } 0227 }(); 0228 Xapian::MSet mset = enquire.get_mset(0, limit); 0229 results.reserve(mset.size()); 0230 for (Xapian::MSetIterator it = mset.begin(); it != mset.end(); it++) { 0231 auto doc = it.get_document(); 0232 const auto data = doc.get_value(0); 0233 results << Identifier::fromInternalByteArray({data.c_str(), int(data.length())}); 0234 } 0235 0236 SinkTrace() << "Found " << mset.size() << " results, limited to " << limit << " in " << Sink::Log::TraceTime(time.elapsed()); 0237 //Print a hint why a query could lack some expected results (Not for small limits because that becomes noisy) 0238 if (searchTerm.size() >= 4 && mset.size() >= limit) { 0239 SinkLog() << "Result set exceeding limit of " << limit << QString::fromStdString(query.get_description()); 0240 } 0241 } 0242 catch (const Xapian::Error &) { 0243 // Nothing to do, move along 0244 } 0245 return results; 0246 } 0247 0248 qint64 FulltextIndex::getDoccount() const 0249 { 0250 if (!mDb) { 0251 return -1; 0252 } 0253 try { 0254 return mDb->get_doccount(); 0255 } catch (const Xapian::Error &) { 0256 // Nothing to do, move along 0257 } 0258 return -1; 0259 } 0260 0261 FulltextIndex::Result FulltextIndex::getIndexContent(const Identifier &identifier) const 0262 { 0263 if (!mDb) { 0264 {}; 0265 } 0266 try { 0267 const auto id = idTerm(identifier); 0268 Xapian::PostingIterator p = mDb->postlist_begin(id); 0269 if (p != mDb->postlist_end(id)) { 0270 auto document = mDb->get_document(*p); 0271 QStringList terms; 0272 for (auto it = document.termlist_begin(); it != document.termlist_end(); it++) { 0273 terms << QString::fromStdString(*it); 0274 } 0275 return {true, terms}; 0276 } 0277 } catch (const Xapian::Error &) { 0278 // Nothing to do, move along 0279 } 0280 return {}; 0281 }