Warning, file /pim/akonadi-search/agent/emailindexer.cpp was not indexed or was modified since last indexation (in which case cross-reference links may be missing, inaccurate or erroneous).
0001 /* 0002 * This file is part of the KDE Akonadi Search Project 0003 * SPDX-FileCopyrightText: 2013 Vishesh Handa <me@vhanda.in> 0004 * 0005 * SPDX-License-Identifier: LGPL-2.1-only OR LGPL-3.0-only OR LicenseRef-KDE-Accepted-LGPL 0006 * 0007 */ 0008 0009 #include "emailindexer.h" 0010 #include "akonadi_indexer_agent_email_debug.h" 0011 0012 #include <Akonadi/Collection> 0013 #include <Akonadi/MessageFlags> 0014 #include <KEmailAddress> 0015 0016 #include <QProcess> 0017 0018 EmailIndexer::EmailIndexer(const QString &path, const QString &contactDbPath) 0019 : AbstractIndexer() 0020 { 0021 try { 0022 m_db = new Xapian::WritableDatabase(path.toStdString(), Xapian::DB_CREATE_OR_OPEN); 0023 } catch (const Xapian::DatabaseCorruptError &err) { 0024 qCWarning(AKONADI_INDEXER_AGENT_EMAIL_LOG) << "Database Corrupted - What did you do?"; 0025 qCWarning(AKONADI_INDEXER_AGENT_EMAIL_LOG) << err.get_error_string(); 0026 m_db = nullptr; 0027 } catch (const Xapian::Error &e) { 0028 qCWarning(AKONADI_INDEXER_AGENT_EMAIL_LOG) << QString::fromStdString(e.get_type()) << QString::fromStdString(e.get_description()); 0029 m_db = nullptr; 0030 } 0031 0032 try { 0033 m_contactDb = new Xapian::WritableDatabase(contactDbPath.toStdString(), Xapian::DB_CREATE_OR_OPEN); 0034 } catch (const Xapian::DatabaseCorruptError &err) { 0035 qCWarning(AKONADI_INDEXER_AGENT_EMAIL_LOG) << "Database Corrupted - What did you do?"; 0036 qCWarning(AKONADI_INDEXER_AGENT_EMAIL_LOG) << err.get_error_string(); 0037 m_contactDb = nullptr; 0038 } catch (const Xapian::Error &e) { 0039 qCWarning(AKONADI_INDEXER_AGENT_EMAIL_LOG) << QString::fromStdString(e.get_type()) << QString::fromStdString(e.get_description()); 0040 m_contactDb = nullptr; 0041 } 0042 } 0043 0044 EmailIndexer::~EmailIndexer() 0045 { 0046 commit(); 0047 delete m_db; 0048 delete m_contactDb; 0049 } 0050 0051 QStringList EmailIndexer::mimeTypes() const 0052 { 0053 return QStringList() << KMime::Message::mimeType(); 0054 } 0055 0056 void EmailIndexer::index(const Akonadi::Item &item) 0057 { 0058 qCDebug(AKONADI_INDEXER_AGENT_EMAIL_LOG) << "Indexing item" << item.id(); 0059 if (!m_db) { 0060 return; 0061 } 0062 Akonadi::MessageStatus status; 0063 status.setStatusFromFlags(item.flags()); 0064 if (status.isSpam()) { 0065 return; 0066 } 0067 0068 KMime::Message::Ptr msg; 0069 try { 0070 msg = item.payload<KMime::Message::Ptr>(); 0071 } catch (const Akonadi::PayloadException &) { 0072 return; 0073 } 0074 0075 m_doc = new Xapian::Document(); 0076 m_termGen = new Xapian::TermGenerator(); 0077 m_termGen->set_document(*m_doc); 0078 m_termGen->set_database(*m_db); 0079 0080 processMessageStatus(status); 0081 process(msg); 0082 0083 // Size 0084 m_doc->add_value(1, QString::number(item.size()).toStdString()); 0085 0086 // Parent collection 0087 Q_ASSERT_X(item.parentCollection().isValid(), "Akonadi::Search::EmailIndexer::index", "Item does not have a valid parent collection"); 0088 0089 const Akonadi::Collection::Id colId = item.parentCollection().id(); 0090 const QByteArray term = 'C' + QByteArray::number(colId); 0091 m_doc->add_boolean_term(term.data()); 0092 0093 m_db->replace_document(item.id(), *m_doc); 0094 0095 delete m_doc; 0096 delete m_termGen; 0097 0098 m_doc = nullptr; 0099 m_termGen = nullptr; 0100 qCDebug(AKONADI_INDEXER_AGENT_EMAIL_LOG) << "DONE Indexing item" << item.id(); 0101 } 0102 0103 void EmailIndexer::insert(const QByteArray &key, KMime::Headers::Base *unstructured) 0104 { 0105 if (unstructured) { 0106 m_termGen->index_text_without_positions(unstructured->asUnicodeString().toStdString(), 1, key.data()); 0107 } 0108 } 0109 0110 void EmailIndexer::insert(const QByteArray &key, KMime::Headers::Generics::MailboxList *mlist) 0111 { 0112 if (mlist) { 0113 insert(key, mlist->mailboxes()); 0114 } 0115 } 0116 0117 void EmailIndexer::insert(const QByteArray &key, KMime::Headers::Generics::AddressList *alist) 0118 { 0119 if (alist) { 0120 insert(key, alist->mailboxes()); 0121 } 0122 } 0123 0124 namespace 0125 { 0126 // Does some extra stuff such as lower casing the email, removing all quotes 0127 // and removing extra spaces 0128 // TODO: Move this into KMime? 0129 // TODO: If name is all upper/lower then try to captialize it? 0130 QString prettyAddress(const KMime::Types::Mailbox &mbox) 0131 { 0132 const QString name = mbox.name().simplified(); 0133 const QByteArray email = mbox.address().simplified().toLower(); 0134 return KEmailAddress::normalizedAddress(name, QString::fromUtf8(email)); 0135 } 0136 } 0137 0138 // Add once with a prefix and once without 0139 void EmailIndexer::insert(const QByteArray &key, const KMime::Types::Mailbox::List &list) 0140 { 0141 if (!m_contactDb) { 0142 return; 0143 } 0144 for (const KMime::Types::Mailbox &mbox : list) { 0145 const auto name(mbox.name().toStdString()); 0146 m_termGen->index_text_without_positions(name, 1, key.data()); 0147 m_termGen->index_text_without_positions(name, 1); 0148 m_termGen->index_text_without_positions(mbox.address().data(), 1, key.data()); 0149 m_termGen->index_text_without_positions(mbox.address().data(), 1); 0150 0151 m_doc->add_term(QByteArray(key + mbox.address()).data()); 0152 m_doc->add_term(mbox.address().data()); 0153 0154 // 0155 // Add emails for email auto-completion 0156 // 0157 const auto pa = prettyAddress(mbox); 0158 const auto id = qHash(pa); 0159 try { 0160 const auto doc = m_contactDb->get_document(id); 0161 Q_UNUSED(doc); 0162 continue; 0163 } catch (const Xapian::DocNotFoundError &) { 0164 Xapian::Document doc; 0165 const auto pretty(pa.toStdString()); 0166 doc.set_data(pretty); 0167 0168 Xapian::TermGenerator termGen; 0169 termGen.set_document(doc); 0170 termGen.index_text(pretty); 0171 0172 doc.add_term(mbox.address().data()); 0173 m_contactDb->replace_document(id, doc); 0174 } 0175 } 0176 } 0177 0178 // FIXME: Only index properties that are actually searched! 0179 void EmailIndexer::process(const KMime::Message::Ptr &msg) 0180 { 0181 // 0182 // Process Headers 0183 // (Give the subject a higher priority) 0184 KMime::Headers::Subject *subject = msg->subject(false); 0185 if (subject) { 0186 const std::string str{normalizeString(subject->asUnicodeString()).toStdString()}; 0187 qCDebug(AKONADI_INDEXER_AGENT_EMAIL_LOG) << "Indexing" << str.c_str(); 0188 m_termGen->index_text_without_positions(str, 1, "SU"); 0189 m_termGen->index_text_without_positions(str, 100); 0190 m_doc->set_data(str); 0191 } 0192 0193 KMime::Headers::Date *date = msg->date(false); 0194 if (date) { 0195 const QString str = QString::number(date->dateTime().toSecsSinceEpoch()); 0196 m_doc->add_value(0, str.toStdString()); 0197 const QString julianDay = QString::number(date->dateTime().date().toJulianDay()); 0198 m_doc->add_value(2, julianDay.toStdString()); 0199 } 0200 0201 insert("F", msg->from(false)); 0202 insert("T", msg->to(false)); 0203 insert("CC", msg->cc(false)); 0204 insert("BC", msg->bcc(false)); 0205 insert("O", msg->organization(false)); 0206 insert("RT", msg->replyTo(false)); 0207 insert("RF", msg->headerByType("Resent-From")); 0208 insert("LI", msg->headerByType("List-Id")); 0209 insert("XL", msg->headerByType("X-Loop")); 0210 insert("XML", msg->headerByType("X-Mailing-List")); 0211 insert("XSF", msg->headerByType("X-Spam-Flag")); 0212 0213 // 0214 // Process Plain Text Content 0215 // 0216 0217 // Index all headers 0218 m_termGen->index_text_without_positions(std::string(msg->head().constData()), 1, "HE"); 0219 0220 KMime::Content *mainBody = msg->mainBodyPart("text/plain"); 0221 if (mainBody) { 0222 const std::string text(normalizeString(mainBody->decodedText()).toStdString()); 0223 m_termGen->index_text_without_positions(text); 0224 m_termGen->index_text_without_positions(text, 1, "BO"); 0225 } else { 0226 processPart(msg.data(), nullptr); 0227 } 0228 } 0229 0230 void EmailIndexer::processPart(KMime::Content *content, KMime::Content *mainContent) 0231 { 0232 if (content == mainContent) { 0233 return; 0234 } 0235 0236 KMime::Headers::ContentType *type = content->contentType(false); 0237 if (type) { 0238 if (type->isMultipart()) { 0239 if (type->isSubtype("encrypted")) { 0240 return; 0241 } 0242 0243 for (KMime::Content *c : content->contents()) { 0244 processPart(c, mainContent); 0245 } 0246 } 0247 0248 // Only get HTML content, if no plain text content 0249 if (!mainContent && type->isHTMLText()) { 0250 QProcess converter; 0251 converter.start(QStringLiteral("akonadi_html_to_text")); 0252 if (!converter.waitForStarted()) { 0253 return; 0254 } 0255 0256 converter.write(content->decodedText().toUtf8()); 0257 converter.closeWriteChannel(); 0258 0259 if (!converter.waitForFinished()) { 0260 return; 0261 } 0262 0263 const auto text = converter.readAll().toStdString(); 0264 0265 m_termGen->index_text_without_positions(text); 0266 } 0267 } 0268 0269 // FIXME: Handle attachments? 0270 } 0271 0272 void EmailIndexer::processMessageStatus(Akonadi::MessageStatus status) 0273 { 0274 insertBool('R', status.isRead()); 0275 insertBool('A', status.hasAttachment()); 0276 insertBool('I', status.isImportant()); 0277 insertBool('W', status.isWatched()); 0278 insertBool('T', status.isToAct()); 0279 insertBool('D', status.isDeleted()); 0280 insertBool('S', status.isSpam()); 0281 insertBool('E', status.isReplied()); 0282 insertBool('G', status.isIgnored()); 0283 insertBool('F', status.isForwarded()); 0284 insertBool('N', status.isSent()); 0285 insertBool('Q', status.isQueued()); 0286 insertBool('H', status.isHam()); 0287 insertBool('C', status.isEncrypted()); 0288 insertBool('V', status.hasInvitation()); 0289 } 0290 0291 void EmailIndexer::insertBool(char key, bool value) 0292 { 0293 QByteArray term("B"); 0294 if (value) { 0295 term.append(key); 0296 } else { 0297 term.append('N'); 0298 term.append(key); 0299 } 0300 0301 m_doc->add_boolean_term(term.data()); 0302 } 0303 0304 void EmailIndexer::toggleFlag(Xapian::Document &doc, const char *remove, const char *add) 0305 { 0306 try { 0307 doc.remove_term(remove); 0308 } catch (const Xapian::InvalidArgumentError &e) { 0309 // The previous flag state was not indexed, continue 0310 } 0311 0312 doc.add_term(add); 0313 } 0314 0315 void EmailIndexer::updateFlags(const Akonadi::Item &item, const QSet<QByteArray> &added, const QSet<QByteArray> &removed) 0316 { 0317 if (!m_db) { 0318 return; 0319 } 0320 Xapian::Document doc; 0321 try { 0322 doc = m_db->get_document(item.id()); 0323 } catch (const Xapian::DocNotFoundError &) { 0324 return; 0325 } 0326 0327 for (const QByteArray &flag : removed) { 0328 if (flag == Akonadi::MessageFlags::Seen) { 0329 toggleFlag(doc, "BR", "BNR"); 0330 } else if (flag == Akonadi::MessageFlags::Flagged) { 0331 toggleFlag(doc, "BI", "BNI"); 0332 } else if (flag == Akonadi::MessageFlags::Watched) { 0333 toggleFlag(doc, "BW", "BNW"); 0334 } 0335 } 0336 0337 for (const QByteArray &flag : added) { 0338 if (flag == Akonadi::MessageFlags::Seen) { 0339 toggleFlag(doc, "BNR", "BR"); 0340 } else if (flag == Akonadi::MessageFlags::Flagged) { 0341 toggleFlag(doc, "BNI", "BI"); 0342 } else if (flag == Akonadi::MessageFlags::Watched) { 0343 toggleFlag(doc, "BNW", "BW"); 0344 } 0345 } 0346 0347 m_db->replace_document(doc.get_docid(), doc); 0348 } 0349 0350 void EmailIndexer::remove(const Akonadi::Item &item) 0351 { 0352 if (!m_db) { 0353 return; 0354 } 0355 try { 0356 m_db->delete_document(item.id()); 0357 // TODO remove contacts from contact db? 0358 } catch (const Xapian::DocNotFoundError &) { 0359 return; 0360 } 0361 } 0362 0363 void EmailIndexer::remove(const Akonadi::Collection &collection) 0364 { 0365 if (!m_db) { 0366 return; 0367 } 0368 try { 0369 Xapian::Query query('C' + QString::number(collection.id()).toStdString()); 0370 Xapian::Enquire enquire(*m_db); 0371 enquire.set_query(query); 0372 0373 Xapian::MSet mset = enquire.get_mset(0, m_db->get_doccount()); 0374 Xapian::MSetIterator end = mset.end(); 0375 for (Xapian::MSetIterator it = mset.begin(); it != end; ++it) { 0376 const qint64 id = *it; 0377 remove(Akonadi::Item(id)); 0378 } 0379 } catch (const Xapian::DocNotFoundError &) { 0380 return; 0381 } 0382 } 0383 0384 void EmailIndexer::move(Akonadi::Item::Id itemId, Akonadi::Collection::Id from, Akonadi::Collection::Id to) 0385 { 0386 if (!m_db) { 0387 return; 0388 } 0389 Xapian::Document doc; 0390 try { 0391 doc = m_db->get_document(itemId); 0392 } catch (const Xapian::DocNotFoundError &) { 0393 return; 0394 } 0395 0396 const QByteArray ft = 'C' + QByteArray::number(from); 0397 const QByteArray tt = 'C' + QByteArray::number(to); 0398 0399 doc.remove_term(ft.data()); 0400 doc.add_boolean_term(tt.data()); 0401 m_db->replace_document(doc.get_docid(), doc); 0402 } 0403 0404 void EmailIndexer::commit() 0405 { 0406 if (m_db) { 0407 try { 0408 m_db->commit(); 0409 } catch (const Xapian::Error &err) { 0410 qCWarning(AKONADI_INDEXER_AGENT_EMAIL_LOG) << err.get_error_string(); 0411 } 0412 qCDebug(AKONADI_INDEXER_AGENT_EMAIL_LOG) << "Xapian Committed"; 0413 } 0414 0415 if (m_contactDb) { 0416 try { 0417 m_contactDb->commit(); 0418 } catch (const Xapian::Error &err) { 0419 qCWarning(AKONADI_INDEXER_AGENT_EMAIL_LOG) << err.get_error_string(); 0420 } 0421 qCDebug(AKONADI_INDEXER_AGENT_EMAIL_LOG) << "Xapian Committed"; 0422 } 0423 }