File indexing completed on 2024-05-19 05:11:52

0001 /*
0002  * This file is part of the KDE Akonadi Search Project
0003  * SPDX-FileCopyrightText: 2013 Vishesh Handa <me@vhanda.in>
0004  *
0005  * SPDX-License-Identifier: LGPL-2.1-only OR LGPL-3.0-only OR LicenseRef-KDE-Accepted-LGPL
0006  *
0007  */
0008 
0009 #include <xapian.h>
0010 
0011 #include "akonadi_search_pim_debug.h"
0012 #include "contactcompleter.h"
0013 #include "query.h"
0014 
0015 #include <QElapsedTimer>
0016 #include <QFile>
0017 #include <QStandardPaths>
0018 
0019 using namespace Akonadi::Search::PIM;
0020 
0021 ContactCompleter::ContactCompleter(const QString &prefix, int limit)
0022     : m_prefix(prefix.toLower())
0023     , m_limit(limit)
0024 {
0025 }
0026 
0027 static QStringList processEnquire(Xapian::Enquire &enq, int limit)
0028 {
0029     QElapsedTimer timer;
0030     timer.start();
0031 
0032     // Retrieves no results but provides statistics - it's very quick
0033     auto statsmset = enq.get_mset(0, 0);
0034     qCDebug(AKONADI_SEARCH_PIM_LOG) << "Query:" << QString::fromStdString(enq.get_query().get_description());
0035     qCDebug(AKONADI_SEARCH_PIM_LOG) << "Estimated matches:" << statsmset.get_matches_estimated();
0036     const int matchEstimate = statsmset.get_matches_estimated();
0037 
0038     QStringList list;
0039     list.reserve(std::min(limit, matchEstimate));
0040     int duplicates = 0;
0041     int firstItem = 0;
0042     // We run the query multiple times, since we may discard some results as duplicates.
0043     while (list.size() < limit) {
0044         // Always query the "limit"-count of results:
0045         //  * if estimate is less than limit, we make sure we don't miss results any due to wrong estimate
0046         //  * if estimate is more than limit, we don't want to query more documents than needed
0047         Xapian::MSet mset = enq.get_mset(firstItem, limit);
0048         if (mset.empty()) { // there are no more non-duplicate results
0049             break;
0050         }
0051 
0052         for (auto it = mset.begin(), end = mset.end(); it != end && list.size() < limit; ++it) {
0053             const auto entry = QString::fromStdString(it.get_document().get_data());
0054             // TODO: Be smarter about the deduplication by fixing the indexing code:
0055             // If we store mailbox name and address as separate named terms then we could deduplicate
0056             // purely based on the email address.
0057             if (!list.contains(entry, Qt::CaseInsensitive)) {
0058                 qCDebug(AKONADI_SEARCH_PIM_LOG, "Match: \"%s\" (%d%%), docid %u", qUtf8Printable(entry), it.get_percent(), *it);
0059                 list.push_back(entry);
0060             } else {
0061                 ++duplicates;
0062                 qCDebug(AKONADI_SEARCH_PIM_LOG, "Skipped duplicate match \"%s\" (%d%%) docid %u", qUtf8Printable(entry), it.get_percent(), *it);
0063             }
0064             ++firstItem;
0065         }
0066     }
0067 
0068     qCDebug(AKONADI_SEARCH_PIM_LOG) << "Collected" << list.size() << "results in" << timer.elapsed() << "ms, skipped" << duplicates << "duplicates.";
0069     return list;
0070 }
0071 
0072 QStringList ContactCompleter::complete()
0073 {
0074     const QString dir = Query::defaultLocation(QStringLiteral("emailContacts"));
0075     Xapian::Database db;
0076     try {
0077         db = Xapian::Database(QFile::encodeName(dir).toStdString());
0078     } catch (const Xapian::DatabaseOpeningError &) {
0079         qCWarning(AKONADI_SEARCH_PIM_LOG) << "Xapian Database does not exist at " << dir;
0080         return {};
0081     } catch (const Xapian::DatabaseCorruptError &) {
0082         qCWarning(AKONADI_SEARCH_PIM_LOG) << "Xapian Database corrupted";
0083         return {};
0084     } catch (const Xapian::DatabaseError &e) {
0085         qCWarning(AKONADI_SEARCH_PIM_LOG) << QString::fromStdString(e.get_type()) << QString::fromStdString(e.get_description());
0086         return {};
0087     } catch (...) {
0088         qCWarning(AKONADI_SEARCH_PIM_LOG) << "Random exception, but we do not want to crash";
0089         return {};
0090     }
0091 
0092     Xapian::QueryParser parser;
0093     parser.set_database(db);
0094 
0095     const int flags = Xapian::QueryParser::FLAG_DEFAULT | Xapian::QueryParser::FLAG_PARTIAL;
0096     const Xapian::Query q = parser.parse_query(m_prefix.toStdString(), flags);
0097 
0098     Xapian::Enquire enq(db);
0099     enq.set_query(q);
0100     enq.set_sort_by_relevance();
0101     // TODO: extend the indexer to use value slots for the normalized email address so that
0102     // duplicates can be collapsed by Xapian::Enquire::set_collapse_key()
0103 
0104     int retryCount = 0;
0105     for (;;) {
0106         try {
0107             return processEnquire(enq, m_limit);
0108         } catch (const Xapian::DatabaseCorruptError &e) {
0109             qCWarning(AKONADI_SEARCH_PIM_LOG) << "The emailContacts Xapian database is corrupted:" << QString::fromStdString(e.get_description());
0110             return {};
0111         } catch (const Xapian::DatabaseModifiedError &e) {
0112             db.reopen();
0113             retryCount++;
0114             if (retryCount > 3) {
0115                 qCWarning(AKONADI_SEARCH_PIM_LOG) << "The emailContacts Xapian database seems broken:" << QString::fromStdString(e.get_description());
0116                 return {};
0117             }
0118             continue; // try again
0119         }
0120     }
0121 }