File indexing completed on 2024-05-19 05:11:52
0001 /* 0002 * This file is part of the KDE Akonadi Search Project 0003 * SPDX-FileCopyrightText: 2013 Vishesh Handa <me@vhanda.in> 0004 * 0005 * SPDX-License-Identifier: LGPL-2.1-only OR LGPL-3.0-only OR LicenseRef-KDE-Accepted-LGPL 0006 * 0007 */ 0008 0009 #include <xapian.h> 0010 0011 #include "akonadi_search_pim_debug.h" 0012 #include "contactcompleter.h" 0013 #include "query.h" 0014 0015 #include <QElapsedTimer> 0016 #include <QFile> 0017 #include <QStandardPaths> 0018 0019 using namespace Akonadi::Search::PIM; 0020 0021 ContactCompleter::ContactCompleter(const QString &prefix, int limit) 0022 : m_prefix(prefix.toLower()) 0023 , m_limit(limit) 0024 { 0025 } 0026 0027 static QStringList processEnquire(Xapian::Enquire &enq, int limit) 0028 { 0029 QElapsedTimer timer; 0030 timer.start(); 0031 0032 // Retrieves no results but provides statistics - it's very quick 0033 auto statsmset = enq.get_mset(0, 0); 0034 qCDebug(AKONADI_SEARCH_PIM_LOG) << "Query:" << QString::fromStdString(enq.get_query().get_description()); 0035 qCDebug(AKONADI_SEARCH_PIM_LOG) << "Estimated matches:" << statsmset.get_matches_estimated(); 0036 const int matchEstimate = statsmset.get_matches_estimated(); 0037 0038 QStringList list; 0039 list.reserve(std::min(limit, matchEstimate)); 0040 int duplicates = 0; 0041 int firstItem = 0; 0042 // We run the query multiple times, since we may discard some results as duplicates. 0043 while (list.size() < limit) { 0044 // Always query the "limit"-count of results: 0045 // * if estimate is less than limit, we make sure we don't miss results any due to wrong estimate 0046 // * if estimate is more than limit, we don't want to query more documents than needed 0047 Xapian::MSet mset = enq.get_mset(firstItem, limit); 0048 if (mset.empty()) { // there are no more non-duplicate results 0049 break; 0050 } 0051 0052 for (auto it = mset.begin(), end = mset.end(); it != end && list.size() < limit; ++it) { 0053 const auto entry = QString::fromStdString(it.get_document().get_data()); 0054 // TODO: Be smarter about the deduplication by fixing the indexing code: 0055 // If we store mailbox name and address as separate named terms then we could deduplicate 0056 // purely based on the email address. 0057 if (!list.contains(entry, Qt::CaseInsensitive)) { 0058 qCDebug(AKONADI_SEARCH_PIM_LOG, "Match: \"%s\" (%d%%), docid %u", qUtf8Printable(entry), it.get_percent(), *it); 0059 list.push_back(entry); 0060 } else { 0061 ++duplicates; 0062 qCDebug(AKONADI_SEARCH_PIM_LOG, "Skipped duplicate match \"%s\" (%d%%) docid %u", qUtf8Printable(entry), it.get_percent(), *it); 0063 } 0064 ++firstItem; 0065 } 0066 } 0067 0068 qCDebug(AKONADI_SEARCH_PIM_LOG) << "Collected" << list.size() << "results in" << timer.elapsed() << "ms, skipped" << duplicates << "duplicates."; 0069 return list; 0070 } 0071 0072 QStringList ContactCompleter::complete() 0073 { 0074 const QString dir = Query::defaultLocation(QStringLiteral("emailContacts")); 0075 Xapian::Database db; 0076 try { 0077 db = Xapian::Database(QFile::encodeName(dir).toStdString()); 0078 } catch (const Xapian::DatabaseOpeningError &) { 0079 qCWarning(AKONADI_SEARCH_PIM_LOG) << "Xapian Database does not exist at " << dir; 0080 return {}; 0081 } catch (const Xapian::DatabaseCorruptError &) { 0082 qCWarning(AKONADI_SEARCH_PIM_LOG) << "Xapian Database corrupted"; 0083 return {}; 0084 } catch (const Xapian::DatabaseError &e) { 0085 qCWarning(AKONADI_SEARCH_PIM_LOG) << QString::fromStdString(e.get_type()) << QString::fromStdString(e.get_description()); 0086 return {}; 0087 } catch (...) { 0088 qCWarning(AKONADI_SEARCH_PIM_LOG) << "Random exception, but we do not want to crash"; 0089 return {}; 0090 } 0091 0092 Xapian::QueryParser parser; 0093 parser.set_database(db); 0094 0095 const int flags = Xapian::QueryParser::FLAG_DEFAULT | Xapian::QueryParser::FLAG_PARTIAL; 0096 const Xapian::Query q = parser.parse_query(m_prefix.toStdString(), flags); 0097 0098 Xapian::Enquire enq(db); 0099 enq.set_query(q); 0100 enq.set_sort_by_relevance(); 0101 // TODO: extend the indexer to use value slots for the normalized email address so that 0102 // duplicates can be collapsed by Xapian::Enquire::set_collapse_key() 0103 0104 int retryCount = 0; 0105 for (;;) { 0106 try { 0107 return processEnquire(enq, m_limit); 0108 } catch (const Xapian::DatabaseCorruptError &e) { 0109 qCWarning(AKONADI_SEARCH_PIM_LOG) << "The emailContacts Xapian database is corrupted:" << QString::fromStdString(e.get_description()); 0110 return {}; 0111 } catch (const Xapian::DatabaseModifiedError &e) { 0112 db.reopen(); 0113 retryCount++; 0114 if (retryCount > 3) { 0115 qCWarning(AKONADI_SEARCH_PIM_LOG) << "The emailContacts Xapian database seems broken:" << QString::fromStdString(e.get_description()); 0116 return {}; 0117 } 0118 continue; // try again 0119 } 0120 } 0121 }