File indexing completed on 2024-05-12 05:11:14

0001 /*
0002  * This file is part of the KDE Akonadi Search Project
0003  * SPDX-FileCopyrightText: 2014-2024 Laurent Montel <montel@kde.org>
0004  *
0005  * SPDX-License-Identifier: LGPL-2.1-only OR LGPL-3.0-only OR LicenseRef-KDE-Accepted-LGPL
0006  *
0007  */
0008 
0009 #include "akonotesindexer.h"
0010 #include "akonadi_indexer_agent_debug.h"
0011 #ifdef HAS_HTMLPARSER
0012 #include <lib.rs.h>
0013 #else
0014 #include <QTextDocument>
0015 #endif
0016 
0017 AkonotesIndexer::AkonotesIndexer(const QString &path)
0018     : AbstractIndexer()
0019 {
0020     try {
0021         m_db = new Xapian::WritableDatabase(path.toStdString(), Xapian::DB_CREATE_OR_OPEN);
0022     } catch (const Xapian::DatabaseCorruptError &err) {
0023         qCWarning(AKONADI_INDEXER_AGENT_LOG) << "Database Corrupted - What did you do?";
0024         qCWarning(AKONADI_INDEXER_AGENT_LOG) << err.get_error_string();
0025         m_db = nullptr;
0026     } catch (const Xapian::Error &e) {
0027         qCWarning(AKONADI_INDEXER_AGENT_LOG) << QString::fromStdString(e.get_type()) << QString::fromStdString(e.get_description());
0028         m_db = nullptr;
0029     }
0030 }
0031 
0032 AkonotesIndexer::~AkonotesIndexer()
0033 {
0034     commit();
0035     delete m_db;
0036 }
0037 
0038 QStringList AkonotesIndexer::mimeTypes() const
0039 {
0040     return {QStringLiteral("text/x-vnd.akonadi.note")};
0041 }
0042 
0043 void AkonotesIndexer::index(const Akonadi::Item &item)
0044 {
0045     if (!m_db) {
0046         return;
0047     }
0048     KMime::Message::Ptr msg;
0049     try {
0050         msg = item.payload<KMime::Message::Ptr>();
0051     } catch (const Akonadi::PayloadException &) {
0052         return;
0053     }
0054     m_doc = new Xapian::Document();
0055     m_termGen = new Xapian::TermGenerator();
0056     m_termGen->set_document(*m_doc);
0057     m_termGen->set_database(*m_db);
0058 
0059     process(msg);
0060 
0061     const Akonadi::Collection::Id colId = item.parentCollection().id();
0062     const QByteArray term = 'C' + QByteArray::number(colId);
0063     m_doc->add_boolean_term(term.data());
0064 
0065     m_db->replace_document(item.id(), *m_doc);
0066 
0067     delete m_doc;
0068     delete m_termGen;
0069 
0070     m_doc = nullptr;
0071     m_termGen = nullptr;
0072 }
0073 
0074 void AkonotesIndexer::process(const KMime::Message::Ptr &msg)
0075 {
0076     //
0077     // Process Headers
0078     // (Give the subject a higher priority)
0079     KMime::Headers::Subject *subject = msg->subject(false);
0080     if (subject) {
0081         const std::string str(normalizeString(subject->asUnicodeString()).toStdString());
0082         qCDebug(AKONADI_INDEXER_AGENT_LOG) << "Indexing" << str.c_str();
0083         m_termGen->index_text_without_positions(str, 1, "SU");
0084         m_termGen->index_text_without_positions(str, 100);
0085         m_doc->set_data(str);
0086     }
0087 
0088     KMime::Content *mainBody = msg->mainBodyPart("text/plain");
0089     if (mainBody) {
0090         const std::string text(normalizeString(mainBody->decodedText()).toStdString());
0091         m_termGen->index_text_without_positions(text);
0092         m_termGen->index_text_without_positions(text, 1, "BO");
0093     } else {
0094         processPart(msg.data(), nullptr);
0095     }
0096 }
0097 
0098 void AkonotesIndexer::processPart(KMime::Content *content, KMime::Content *mainContent)
0099 {
0100     if (content == mainContent) {
0101         return;
0102     }
0103 
0104     KMime::Headers::ContentType *type = content->contentType(false);
0105     if (type) {
0106         if (type->isMultipart()) {
0107             if (type->isSubtype("encrypted")) {
0108                 return;
0109             }
0110 
0111             const auto contents = content->contents();
0112             for (KMime::Content *c : contents) {
0113                 processPart(c, mainContent);
0114             }
0115         }
0116 
0117         // Only get HTML content, if no plain text content
0118         if (!mainContent && type->isHTMLText()) {
0119 #ifdef HAS_HTMLPARSER
0120             const auto html = content->decodedText().toStdString();
0121             const auto text = std::string(convert_to_text(rust::String(html)));
0122 #else
0123             QTextDocument doc;
0124             doc.setHtml(content->decodedText());
0125             const std::string text(normalizeString(doc.toPlainText()).toStdString());
0126 #endif
0127             m_termGen->index_text_without_positions(text);
0128         }
0129     }
0130 }
0131 
0132 void AkonotesIndexer::commit()
0133 {
0134     if (!m_db) {
0135         return;
0136     }
0137 
0138     try {
0139         m_db->commit();
0140     } catch (const Xapian::Error &err) {
0141         qCWarning(AKONADI_INDEXER_AGENT_LOG) << err.get_error_string();
0142     }
0143     qCDebug(AKONADI_INDEXER_AGENT_LOG) << "Xapian Committed";
0144 }
0145 
0146 void AkonotesIndexer::remove(const Akonadi::Item &item)
0147 {
0148     if (!m_db) {
0149         return;
0150     }
0151     try {
0152         m_db->delete_document(item.id());
0153     } catch (const Xapian::DocNotFoundError &) {
0154         return;
0155     }
0156 }
0157 
0158 void AkonotesIndexer::remove(const Akonadi::Collection &collection)
0159 {
0160     if (!m_db) {
0161         return;
0162     }
0163     try {
0164         const Xapian::Query query('C' + QString::number(collection.id()).toStdString());
0165         Xapian::Enquire enquire(*m_db);
0166         enquire.set_query(query);
0167 
0168         Xapian::MSet mset = enquire.get_mset(0, m_db->get_doccount());
0169         Xapian::MSetIterator end(mset.end());
0170         for (Xapian::MSetIterator it = mset.begin(); it != end; ++it) {
0171             const qint64 id = *it;
0172             remove(Akonadi::Item(id));
0173         }
0174     } catch (const Xapian::DocNotFoundError &) {
0175         return;
0176     }
0177 }
0178 
0179 void AkonotesIndexer::move(Akonadi::Item::Id itemId, Akonadi::Collection::Id from, Akonadi::Collection::Id to)
0180 {
0181     if (!m_db) {
0182         return;
0183     }
0184     Xapian::Document doc;
0185     try {
0186         doc = m_db->get_document(itemId);
0187     } catch (const Xapian::DocNotFoundError &) {
0188         return;
0189     }
0190 
0191     const QByteArray ft = 'C' + QByteArray::number(from);
0192     const QByteArray tt = 'C' + QByteArray::number(to);
0193 
0194     doc.remove_term(ft.data());
0195     doc.add_boolean_term(tt.data());
0196     m_db->replace_document(doc.get_docid(), doc);
0197 }