File indexing completed on 2024-05-12 05:26:00
0001 /* 0002 * Copyright (C) 2015 Christian Mollekopf <chrigi_1@fastmail.fm> 0003 * 0004 * This program is free software; you can redistribute it and/or modify 0005 * it under the terms of the GNU General Public License as published by 0006 * the Free Software Foundation; either version 2 of the License, or 0007 * (at your option) any later version. 0008 * 0009 * This program is distributed in the hope that it will be useful, 0010 * but WITHOUT ANY WARRANTY; without even the implied warranty of 0011 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 0012 * GNU General Public License for more details. 0013 * 0014 * You should have received a copy of the GNU General Public License 0015 * along with this program; if not, write to the 0016 * Free Software Foundation, Inc., 0017 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 0018 */ 0019 0020 #include "mailpreprocessor.h" 0021 0022 #include <QFile> 0023 #include <QDir> 0024 #include <QTextDocument> 0025 #include <QGuiApplication> 0026 #include <QUuid> 0027 #include <KMime/KMimeMessage> 0028 #include <KMime/Headers> 0029 #include <mime/mimetreeparser/objecttreeparser.h> 0030 0031 #include "pipeline.h" 0032 #include "definitions.h" 0033 #include "applicationdomaintype.h" 0034 0035 using namespace Sink; 0036 0037 static QString getString(const KMime::Headers::Base *header, const QString &defaultValue = {}) 0038 { 0039 if (!header) { 0040 return defaultValue; 0041 } 0042 return header->asUnicodeString() ; 0043 } 0044 0045 static QDateTime getDate(const KMime::Headers::Base *header) 0046 { 0047 if (!header) { 0048 return QDateTime::currentDateTimeUtc(); 0049 } 0050 return static_cast<const KMime::Headers::Date*>(header)->dateTime(); 0051 } 0052 0053 static Sink::ApplicationDomain::Mail::Contact fromMailbox(const KMime::Types::Mailbox &mb) 0054 { 0055 return Sink::ApplicationDomain::Mail::Contact{mb.name(), mb.address()}; 0056 } 0057 0058 static Sink::ApplicationDomain::Mail::Contact getContact(const KMime::Headers::Base *h) 0059 { 0060 if (!h) { 0061 return {}; 0062 } 0063 const auto header = static_cast<const KMime::Headers::Generics::MailboxList*>(h); 0064 const auto mb = header->mailboxes().isEmpty() ? KMime::Types::Mailbox{} : header->mailboxes().first(); 0065 return fromMailbox(mb); 0066 } 0067 0068 static QList<Sink::ApplicationDomain::Mail::Contact> getContactList(const KMime::Headers::Base *h) 0069 { 0070 if (!h) { 0071 return {}; 0072 } 0073 const auto header = static_cast<const KMime::Headers::Generics::AddressList*>(h); 0074 QList<Sink::ApplicationDomain::Mail::Contact> list; 0075 for (const auto &mb : header->mailboxes()) { 0076 list << fromMailbox(mb); 0077 } 0078 return list; 0079 } 0080 0081 static QVector<QByteArray> getIdentifiers(const KMime::Headers::Base *h) 0082 { 0083 if (!h) { 0084 return {}; 0085 } 0086 return static_cast<const KMime::Headers::Generics::Ident*>(h)->identifiers(); 0087 } 0088 0089 static QByteArray getIdentifier(const KMime::Headers::Base *h) 0090 { 0091 if (!h) { 0092 return {}; 0093 } 0094 return static_cast<const KMime::Headers::Generics::SingleIdent*>(h)->identifier(); 0095 } 0096 0097 static QByteArray normalizeMessageId(const QByteArray &id) 0098 { 0099 return id; 0100 } 0101 0102 static QString toPlain(const QString &html) 0103 { 0104 //QTextDocument has an implicit runtime dependency on QGuiApplication via the color palette. 0105 //If the QGuiApplication is not available we will crash (if the html contains colors). 0106 Q_ASSERT(QGuiApplication::instance()); 0107 // Only get HTML content, if no plain text content 0108 QTextDocument doc; 0109 doc.setHtml(html); 0110 return doc.toPlainText(); 0111 } 0112 0113 void MailPropertyExtractor::updatedIndexedProperties(Sink::ApplicationDomain::Mail &mail, const QByteArray &data) 0114 { 0115 if (data.isEmpty()) { 0116 //Always set a dummy subject and date, so we can find the message 0117 //In test we sometimes pre-set the extracted date though, so we check that first. 0118 if (mail.getSubject().isEmpty()) { 0119 mail.setExtractedSubject("Error: Empty message"); 0120 } 0121 if (!mail.getDate().isValid()) { 0122 mail.setExtractedDate(QDateTime::currentDateTimeUtc()); 0123 } 0124 return; 0125 } 0126 MimeTreeParser::ObjectTreeParser otp; 0127 otp.parseObjectTree(data); 0128 otp.decryptAndVerify(); 0129 0130 const auto partList = otp.collectContentParts(); 0131 const auto part = [&] () -> MimeTreeParser::MessagePartPtr { 0132 if (!partList.isEmpty()) { 0133 return partList[0]; 0134 } 0135 //Extract headers also if there are only attachment parts. 0136 return otp.parsedPart(); 0137 }(); 0138 Q_ASSERT(part); 0139 0140 mail.setExtractedSubject(getString(part->header(KMime::Headers::Subject::staticType()), "Error: No subject")); 0141 mail.setExtractedSender(getContact(part->header(KMime::Headers::From::staticType()))); 0142 mail.setExtractedTo(getContactList(part->header(KMime::Headers::To::staticType()))); 0143 mail.setExtractedCc(getContactList(part->header(KMime::Headers::Cc::staticType()))); 0144 mail.setExtractedBcc(getContactList(part->header(KMime::Headers::Bcc::staticType()))); 0145 mail.setExtractedDate(getDate(part->header(KMime::Headers::Date::staticType()))); 0146 0147 const auto parentMessageIds = [&] { 0148 //The last is the parent 0149 const auto references = getIdentifiers(part->header(KMime::Headers::References::staticType())); 0150 0151 if (!references.isEmpty()) { 0152 QByteArrayList list; 0153 std::transform(references.constBegin(), references.constEnd(), std::back_inserter(list), [] (const QByteArray &id) { return normalizeMessageId(id); }); 0154 return list; 0155 } else { 0156 const auto inReplyTo = getIdentifiers(part->header(KMime::Headers::InReplyTo::staticType())); 0157 if (!inReplyTo.isEmpty()) { 0158 //According to RFC5256 we should ignore all but the first 0159 return QByteArrayList{normalizeMessageId(inReplyTo.first())}; 0160 } 0161 } 0162 return QByteArrayList{}; 0163 }(); 0164 0165 //The rest should never change, unless we didn't have the headers available initially. 0166 auto messageId = normalizeMessageId(getIdentifier(part->header(KMime::Headers::MessageID::staticType()))); 0167 if (messageId.isEmpty()) { 0168 //reuse an existing messageid (on modification) 0169 const auto existing = mail.getMessageId(); 0170 if (existing.isEmpty()) { 0171 auto tmp = KMime::Message::Ptr::create(); 0172 //Genereate a globally unique messageid that doesn't leak the local hostname 0173 messageId = QString{"<" + QUuid::createUuid().toString().mid(1, 36).remove('-') + "@sink>"}.toLatin1(); 0174 tmp->messageID(true)->fromUnicodeString(messageId, "utf-8"); 0175 SinkWarning() << "Message id is empty, generating one: " << messageId; 0176 } else { 0177 messageId = existing; 0178 } 0179 } 0180 0181 mail.setExtractedMessageId(messageId); 0182 if (!parentMessageIds.isEmpty()) { 0183 mail.setExtractedParentMessageIds(parentMessageIds); 0184 } 0185 QList<QPair<QString, QString>> contentToIndex; 0186 const auto subject = getString(part->header(KMime::Headers::Subject::staticType())); 0187 contentToIndex.append({{"subject"}, subject}); 0188 0189 const auto plainTextContent = otp.plainTextContent(); 0190 if (plainTextContent.isEmpty()) { 0191 contentToIndex.append({{}, toPlain(otp.htmlContent())}); 0192 } else { 0193 contentToIndex.append({{}, plainTextContent}); 0194 } 0195 0196 const auto sender = mail.getSender(); 0197 contentToIndex.append({{"sender"}, sender.name}); 0198 contentToIndex.append({{"sender"}, sender.emailAddress}); 0199 for (const auto &c : mail.getTo()) { 0200 contentToIndex.append({{"recipients"}, c.name}); 0201 contentToIndex.append({{"recipients"}, c.emailAddress}); 0202 } 0203 for (const auto &c : mail.getCc()) { 0204 contentToIndex.append({{"recipients"}, c.name}); 0205 contentToIndex.append({{"recipients"}, c.emailAddress}); 0206 } 0207 for (const auto &c : mail.getBcc()) { 0208 contentToIndex.append({{"recipients"}, c.name}); 0209 contentToIndex.append({{"recipients"}, c.emailAddress}); 0210 } 0211 0212 //Prepare content for indexing; 0213 mail.setProperty("index", QVariant::fromValue(contentToIndex)); 0214 mail.setProperty("indexDate", QVariant::fromValue(mail.getDate())); 0215 } 0216 0217 void MailPropertyExtractor::newEntity(Sink::ApplicationDomain::Mail &mail) 0218 { 0219 updatedIndexedProperties(mail, mail.getMimeMessage()); 0220 } 0221 0222 void MailPropertyExtractor::modifiedEntity(const Sink::ApplicationDomain::Mail &oldMail, Sink::ApplicationDomain::Mail &newMail) 0223 { 0224 updatedIndexedProperties(newMail, newMail.getMimeMessage()); 0225 }