File indexing completed on 2024-05-12 05:11:14
0001 /* 0002 * This file is part of the KDE Akonadi Search Project 0003 * SPDX-FileCopyrightText: 2014-2024 Laurent Montel <montel@kde.org> 0004 * 0005 * SPDX-License-Identifier: LGPL-2.1-only OR LGPL-3.0-only OR LicenseRef-KDE-Accepted-LGPL 0006 * 0007 */ 0008 0009 #include "akonotesindexer.h" 0010 #include "akonadi_indexer_agent_debug.h" 0011 #ifdef HAS_HTMLPARSER 0012 #include <lib.rs.h> 0013 #else 0014 #include <QTextDocument> 0015 #endif 0016 0017 AkonotesIndexer::AkonotesIndexer(const QString &path) 0018 : AbstractIndexer() 0019 { 0020 try { 0021 m_db = new Xapian::WritableDatabase(path.toStdString(), Xapian::DB_CREATE_OR_OPEN); 0022 } catch (const Xapian::DatabaseCorruptError &err) { 0023 qCWarning(AKONADI_INDEXER_AGENT_LOG) << "Database Corrupted - What did you do?"; 0024 qCWarning(AKONADI_INDEXER_AGENT_LOG) << err.get_error_string(); 0025 m_db = nullptr; 0026 } catch (const Xapian::Error &e) { 0027 qCWarning(AKONADI_INDEXER_AGENT_LOG) << QString::fromStdString(e.get_type()) << QString::fromStdString(e.get_description()); 0028 m_db = nullptr; 0029 } 0030 } 0031 0032 AkonotesIndexer::~AkonotesIndexer() 0033 { 0034 commit(); 0035 delete m_db; 0036 } 0037 0038 QStringList AkonotesIndexer::mimeTypes() const 0039 { 0040 return {QStringLiteral("text/x-vnd.akonadi.note")}; 0041 } 0042 0043 void AkonotesIndexer::index(const Akonadi::Item &item) 0044 { 0045 if (!m_db) { 0046 return; 0047 } 0048 KMime::Message::Ptr msg; 0049 try { 0050 msg = item.payload<KMime::Message::Ptr>(); 0051 } catch (const Akonadi::PayloadException &) { 0052 return; 0053 } 0054 m_doc = new Xapian::Document(); 0055 m_termGen = new Xapian::TermGenerator(); 0056 m_termGen->set_document(*m_doc); 0057 m_termGen->set_database(*m_db); 0058 0059 process(msg); 0060 0061 const Akonadi::Collection::Id colId = item.parentCollection().id(); 0062 const QByteArray term = 'C' + QByteArray::number(colId); 0063 m_doc->add_boolean_term(term.data()); 0064 0065 m_db->replace_document(item.id(), *m_doc); 0066 0067 delete m_doc; 0068 delete m_termGen; 0069 0070 m_doc = nullptr; 0071 m_termGen = nullptr; 0072 } 0073 0074 void AkonotesIndexer::process(const KMime::Message::Ptr &msg) 0075 { 0076 // 0077 // Process Headers 0078 // (Give the subject a higher priority) 0079 KMime::Headers::Subject *subject = msg->subject(false); 0080 if (subject) { 0081 const std::string str(normalizeString(subject->asUnicodeString()).toStdString()); 0082 qCDebug(AKONADI_INDEXER_AGENT_LOG) << "Indexing" << str.c_str(); 0083 m_termGen->index_text_without_positions(str, 1, "SU"); 0084 m_termGen->index_text_without_positions(str, 100); 0085 m_doc->set_data(str); 0086 } 0087 0088 KMime::Content *mainBody = msg->mainBodyPart("text/plain"); 0089 if (mainBody) { 0090 const std::string text(normalizeString(mainBody->decodedText()).toStdString()); 0091 m_termGen->index_text_without_positions(text); 0092 m_termGen->index_text_without_positions(text, 1, "BO"); 0093 } else { 0094 processPart(msg.data(), nullptr); 0095 } 0096 } 0097 0098 void AkonotesIndexer::processPart(KMime::Content *content, KMime::Content *mainContent) 0099 { 0100 if (content == mainContent) { 0101 return; 0102 } 0103 0104 KMime::Headers::ContentType *type = content->contentType(false); 0105 if (type) { 0106 if (type->isMultipart()) { 0107 if (type->isSubtype("encrypted")) { 0108 return; 0109 } 0110 0111 const auto contents = content->contents(); 0112 for (KMime::Content *c : contents) { 0113 processPart(c, mainContent); 0114 } 0115 } 0116 0117 // Only get HTML content, if no plain text content 0118 if (!mainContent && type->isHTMLText()) { 0119 #ifdef HAS_HTMLPARSER 0120 const auto html = content->decodedText().toStdString(); 0121 const auto text = std::string(convert_to_text(rust::String(html))); 0122 #else 0123 QTextDocument doc; 0124 doc.setHtml(content->decodedText()); 0125 const std::string text(normalizeString(doc.toPlainText()).toStdString()); 0126 #endif 0127 m_termGen->index_text_without_positions(text); 0128 } 0129 } 0130 } 0131 0132 void AkonotesIndexer::commit() 0133 { 0134 if (!m_db) { 0135 return; 0136 } 0137 0138 try { 0139 m_db->commit(); 0140 } catch (const Xapian::Error &err) { 0141 qCWarning(AKONADI_INDEXER_AGENT_LOG) << err.get_error_string(); 0142 } 0143 qCDebug(AKONADI_INDEXER_AGENT_LOG) << "Xapian Committed"; 0144 } 0145 0146 void AkonotesIndexer::remove(const Akonadi::Item &item) 0147 { 0148 if (!m_db) { 0149 return; 0150 } 0151 try { 0152 m_db->delete_document(item.id()); 0153 } catch (const Xapian::DocNotFoundError &) { 0154 return; 0155 } 0156 } 0157 0158 void AkonotesIndexer::remove(const Akonadi::Collection &collection) 0159 { 0160 if (!m_db) { 0161 return; 0162 } 0163 try { 0164 const Xapian::Query query('C' + QString::number(collection.id()).toStdString()); 0165 Xapian::Enquire enquire(*m_db); 0166 enquire.set_query(query); 0167 0168 Xapian::MSet mset = enquire.get_mset(0, m_db->get_doccount()); 0169 Xapian::MSetIterator end(mset.end()); 0170 for (Xapian::MSetIterator it = mset.begin(); it != end; ++it) { 0171 const qint64 id = *it; 0172 remove(Akonadi::Item(id)); 0173 } 0174 } catch (const Xapian::DocNotFoundError &) { 0175 return; 0176 } 0177 } 0178 0179 void AkonotesIndexer::move(Akonadi::Item::Id itemId, Akonadi::Collection::Id from, Akonadi::Collection::Id to) 0180 { 0181 if (!m_db) { 0182 return; 0183 } 0184 Xapian::Document doc; 0185 try { 0186 doc = m_db->get_document(itemId); 0187 } catch (const Xapian::DocNotFoundError &) { 0188 return; 0189 } 0190 0191 const QByteArray ft = 'C' + QByteArray::number(from); 0192 const QByteArray tt = 'C' + QByteArray::number(to); 0193 0194 doc.remove_term(ft.data()); 0195 doc.add_boolean_term(tt.data()); 0196 m_db->replace_document(doc.get_docid(), doc); 0197 }