File indexing completed on 2024-12-22 05:05:20
0001 // This file is part of KMail, the KDE mail client. 0002 // SPDX-FileCopyrightText: 2003 Marc Mutz <mutz@kde.org> 0003 // SPDX-FileCopyrightText: 2002-2004 Klarälvdalens Datakonsult AB, a KDAB Group company, info@kdab.net 0004 // SPDX-FileCopyrightText: 2009 Andras Mantia <andras@kdab.net> 0005 // SPDX-FileCopyrightText: 2015 Sandro Knauß <sknauss@kde.org> 0006 // SPDX-FileCopyrightText: 2017 Christian Mollekopf <mollekopf@kolabsystems.com> 0007 // SPDX-License-Identifier: GPL-2.0-or-later 0008 0009 #include "objecttreeparser.h" 0010 0011 #include "bodypartformatterbasefactory.h" 0012 0013 #include "bodypartformatter.h" 0014 0015 #include <KMime/Message> 0016 0017 #include <KCharsets> 0018 #include <QByteArray> 0019 #include <QDebug> 0020 #include <QMimeDatabase> 0021 #include <QRegularExpression> 0022 #include <QStringDecoder> 0023 #include <QTextStream> 0024 #include <QUrl> 0025 0026 using namespace MimeTreeParser; 0027 0028 /* 0029 * Collect message parts bottom up. 0030 * Filter to avoid evaluating a subtree. 0031 * Select parts to include it in the result set. Selecting a part in a branch will keep any parent parts from being selected. 0032 */ 0033 static QList<MessagePart::Ptr> collect(MessagePart::Ptr start, 0034 const std::function<bool(const MessagePart::Ptr &)> &evaluateSubtree, 0035 const std::function<bool(const MessagePart::Ptr &)> &select) 0036 { 0037 auto ptr = start.dynamicCast<MessagePart>(); 0038 Q_ASSERT(ptr); 0039 MessagePart::List list; 0040 if (evaluateSubtree(ptr)) { 0041 for (const auto &p : ptr->subParts()) { 0042 list << ::collect(p, evaluateSubtree, select); 0043 } 0044 } 0045 0046 // Don't consider this part if we already selected a subpart 0047 if (list.isEmpty()) { 0048 if (select(ptr)) { 0049 list << start; 0050 } 0051 } 0052 return list; 0053 } 0054 0055 QString ObjectTreeParser::plainTextContent() 0056 { 0057 QString content; 0058 if (mParsedPart) { 0059 auto plainParts = ::collect( 0060 mParsedPart, 0061 [](const MessagePart::Ptr &) { 0062 return true; 0063 }, 0064 [](const MessagePart::Ptr &part) { 0065 if (part->isAttachment()) { 0066 return false; 0067 } 0068 if (dynamic_cast<MimeTreeParser::TextMessagePart *>(part.data())) { 0069 return true; 0070 } 0071 if (dynamic_cast<MimeTreeParser::AlternativeMessagePart *>(part.data())) { 0072 return true; 0073 } 0074 return false; 0075 }); 0076 for (const auto &part : plainParts) { 0077 content += part->text(); 0078 } 0079 } 0080 return content; 0081 } 0082 0083 QString ObjectTreeParser::htmlContent() 0084 { 0085 QString content; 0086 if (mParsedPart) { 0087 MessagePart::List contentParts = ::collect( 0088 mParsedPart, 0089 [](const MessagePart::Ptr &) { 0090 return true; 0091 }, 0092 [](const MessagePart::Ptr &part) { 0093 if (dynamic_cast<MimeTreeParser::HtmlMessagePart *>(part.data())) { 0094 return true; 0095 } 0096 if (dynamic_cast<MimeTreeParser::AlternativeMessagePart *>(part.data())) { 0097 return true; 0098 } 0099 return false; 0100 }); 0101 for (const auto &part : contentParts) { 0102 if (auto p = dynamic_cast<MimeTreeParser::AlternativeMessagePart *>(part.data())) { 0103 content += p->htmlContent(); 0104 } else { 0105 content += part->text(); 0106 } 0107 } 0108 } 0109 return content; 0110 } 0111 0112 bool ObjectTreeParser::hasEncryptedParts() const 0113 { 0114 bool result = false; 0115 0116 ::collect( 0117 mParsedPart, 0118 [](const MessagePart::Ptr &) { 0119 return true; 0120 }, 0121 [&result](const MessagePart::Ptr &part) { 0122 if (const auto enc = dynamic_cast<MimeTreeParser::EncryptedMessagePart *>(part.data())) { 0123 result = true; 0124 } 0125 return false; 0126 }); 0127 0128 return result; 0129 } 0130 0131 bool ObjectTreeParser::hasSignedParts() const 0132 { 0133 bool result = false; 0134 0135 ::collect( 0136 mParsedPart, 0137 [](const MessagePart::Ptr &) { 0138 return true; 0139 }, 0140 [&result](const MessagePart::Ptr &part) { 0141 if (const auto enc = dynamic_cast<MimeTreeParser::SignedMessagePart *>(part.data())) { 0142 result = true; 0143 } 0144 return false; 0145 }); 0146 0147 return result; 0148 } 0149 0150 static void print(QTextStream &stream, KMime::Content *node, const QString prefix = {}) 0151 { 0152 QByteArray mediaType("text"); 0153 QByteArray subType("plain"); 0154 if (node->contentType(false) && !node->contentType()->mediaType().isEmpty() && !node->contentType()->subType().isEmpty()) { 0155 mediaType = node->contentType()->mediaType(); 0156 subType = node->contentType()->subType(); 0157 } 0158 stream << prefix << "! " << mediaType << subType << " isAttachment: " << KMime::isAttachment(node) << "\n"; 0159 const auto contents = node->contents(); 0160 for (const auto nodeContent : contents) { 0161 print(stream, nodeContent, prefix + QLatin1StringView(" ")); 0162 } 0163 } 0164 0165 static void print(QTextStream &stream, const MessagePart &messagePart, const QByteArray pre = {}) 0166 { 0167 stream << pre << "# " << messagePart.metaObject()->className() << " isAttachment: " << messagePart.isAttachment() << "\n"; 0168 const auto subParts = messagePart.subParts(); 0169 for (const auto &subPart : subParts) { 0170 print(stream, *subPart, pre + " "); 0171 } 0172 } 0173 0174 QString ObjectTreeParser::structureAsString() const 0175 { 0176 QString string; 0177 QTextStream stream{&string}; 0178 0179 if (mTopLevelContent) { 0180 ::print(stream, mTopLevelContent); 0181 } 0182 if (mParsedPart) { 0183 ::print(stream, *mParsedPart); 0184 } 0185 return string; 0186 } 0187 0188 void ObjectTreeParser::print() 0189 { 0190 qInfo().noquote() << structureAsString(); 0191 } 0192 0193 static KMime::Content *find(KMime::Content *node, const std::function<bool(KMime::Content *)> &select) 0194 { 0195 QByteArray mediaType("text"); 0196 QByteArray subType("plain"); 0197 if (node->contentType(false) && !node->contentType()->mediaType().isEmpty() && !node->contentType()->subType().isEmpty()) { 0198 mediaType = node->contentType()->mediaType(); 0199 subType = node->contentType()->subType(); 0200 } 0201 if (select(node)) { 0202 return node; 0203 } 0204 const auto contents = node->contents(); 0205 for (const auto nodeContent : contents) { 0206 if (const auto content = find(nodeContent, select)) { 0207 return content; 0208 } 0209 } 0210 return nullptr; 0211 } 0212 0213 KMime::Content *ObjectTreeParser::find(const std::function<bool(KMime::Content *)> &select) 0214 { 0215 return ::find(mTopLevelContent, select); 0216 } 0217 0218 MessagePart::List ObjectTreeParser::collectContentParts() 0219 { 0220 return collectContentParts(mParsedPart); 0221 } 0222 0223 MessagePart::List ObjectTreeParser::collectContentParts(MessagePart::Ptr start) 0224 { 0225 return ::collect( 0226 start, 0227 [start](const MessagePart::Ptr &part) { 0228 // Ignore the top-level 0229 if (start.data() == part.data()) { 0230 return true; 0231 } 0232 if (auto encapsulatedPart = part.dynamicCast<MimeTreeParser::EncapsulatedRfc822MessagePart>()) { 0233 return false; 0234 } 0235 return true; 0236 }, 0237 [start](const MessagePart::Ptr &part) { 0238 if (const auto attachment = dynamic_cast<MimeTreeParser::AttachmentMessagePart *>(part.data())) { 0239 return attachment->mimeType() == "text/calendar"; 0240 } else if (const auto text = dynamic_cast<MimeTreeParser::TextMessagePart *>(part.data())) { 0241 auto enc = dynamic_cast<MimeTreeParser::EncryptedMessagePart *>(text->parentPart()); 0242 if (enc && enc->error()) { 0243 return false; 0244 } 0245 0246 return true; 0247 } else if (dynamic_cast<MimeTreeParser::AlternativeMessagePart *>(part.data())) { 0248 return true; 0249 } else if (dynamic_cast<MimeTreeParser::HtmlMessagePart *>(part.data())) { 0250 // Don't if we have an alternative part as parent 0251 return true; 0252 } else if (dynamic_cast<MimeTreeParser::EncapsulatedRfc822MessagePart *>(part.data())) { 0253 if (start.data() == part.data()) { 0254 return false; 0255 } 0256 return true; 0257 } else if (const auto enc = dynamic_cast<MimeTreeParser::EncryptedMessagePart *>(part.data())) { 0258 if (enc->error()) { 0259 return true; 0260 } 0261 // If we have a textpart with encrypted and unencrypted subparts we want to return the textpart 0262 if (dynamic_cast<MimeTreeParser::TextMessagePart *>(enc->parentPart())) { 0263 return false; 0264 } 0265 } else if (const auto sig = dynamic_cast<MimeTreeParser::SignedMessagePart *>(part.data())) { 0266 // Signatures without subparts already contain the text 0267 return !sig->hasSubParts(); 0268 } 0269 return false; 0270 }); 0271 } 0272 0273 MessagePart::List ObjectTreeParser::collectAttachmentParts() 0274 { 0275 MessagePart::List contentParts = ::collect( 0276 mParsedPart, 0277 [](const MessagePart::Ptr &) { 0278 return true; 0279 }, 0280 [](const MessagePart::Ptr &part) { 0281 return part->isAttachment(); 0282 }); 0283 return contentParts; 0284 } 0285 0286 /* 0287 * This naive implementation assumes that there is an encrypted part wrapping a signature. 0288 * For other cases we would have to process both recursively (I think?) 0289 */ 0290 void ObjectTreeParser::decryptAndVerify() 0291 { 0292 // We first decrypt 0293 ::collect( 0294 mParsedPart, 0295 [](const MessagePart::Ptr &) { 0296 return true; 0297 }, 0298 [](const MessagePart::Ptr &part) { 0299 if (const auto enc = dynamic_cast<MimeTreeParser::EncryptedMessagePart *>(part.data())) { 0300 enc->startDecryption(); 0301 } 0302 return false; 0303 }); 0304 // And then verify the available signatures 0305 ::collect( 0306 mParsedPart, 0307 [](const MessagePart::Ptr &) { 0308 return true; 0309 }, 0310 [](const MessagePart::Ptr &part) { 0311 if (const auto enc = dynamic_cast<MimeTreeParser::SignedMessagePart *>(part.data())) { 0312 enc->startVerification(); 0313 } 0314 return false; 0315 }); 0316 } 0317 0318 QString ObjectTreeParser::resolveCidLinks(const QString &html) 0319 { 0320 auto text = html; 0321 static const auto regex = QRegularExpression(QLatin1StringView("(src)\\s*=\\s*(\"|')(cid:[^\"']+)\\2")); 0322 auto it = regex.globalMatch(text); 0323 while (it.hasNext()) { 0324 const auto match = it.next(); 0325 const auto link = QUrl(match.captured(3)); 0326 auto cid = link.path(); 0327 auto mailMime = const_cast<KMime::Content *>(find([=](KMime::Content *content) { 0328 if (!content || !content->contentID(false)) { 0329 return false; 0330 } 0331 return QString::fromLatin1(content->contentID(false)->identifier()) == cid; 0332 })); 0333 if (mailMime) { 0334 const auto contentType = mailMime->contentType(false); 0335 if (!contentType) { 0336 qWarning() << "No content type, skipping"; 0337 continue; 0338 } 0339 QMimeDatabase mimeDb; 0340 const auto mimetype = mimeDb.mimeTypeForName(QString::fromLatin1(contentType->mimeType())).name(); 0341 if (mimetype.startsWith(QLatin1StringView("image/"))) { 0342 // We reencode to base64 below. 0343 const auto data = mailMime->decodedContent(); 0344 if (data.isEmpty()) { 0345 qWarning() << "Attachment is empty."; 0346 continue; 0347 } 0348 text.replace(match.captured(0), QString::fromLatin1("src=\"data:%1;base64,%2\"").arg(mimetype, QString::fromLatin1(data.toBase64()))); 0349 } 0350 } else { 0351 qWarning() << "Failed to find referenced attachment: " << cid; 0352 } 0353 } 0354 return text; 0355 } 0356 0357 //----------------------------------------------------------------------------- 0358 0359 void ObjectTreeParser::parseObjectTree(const QByteArray &mimeMessage) 0360 { 0361 const auto mailData = KMime::CRLFtoLF(mimeMessage); 0362 mMsg = KMime::Message::Ptr(new KMime::Message); 0363 mMsg->setContent(mailData); 0364 mMsg->parse(); 0365 // We avoid using mMsg->contentType()->charset(), because that will just return kmime's defaultCharset(), ISO-8859-1 0366 const auto charset = mMsg->contentType()->parameter(QStringLiteral("charset")).toLatin1(); 0367 if (charset.isEmpty()) { 0368 mMsg->contentType()->setCharset("us-ascii"); 0369 } 0370 parseObjectTree(mMsg.data()); 0371 } 0372 0373 void ObjectTreeParser::parseObjectTree(KMime::Content *node) 0374 { 0375 mTopLevelContent = node; 0376 mParsedPart = parseObjectTreeInternal(node, false); 0377 } 0378 0379 MessagePart::Ptr ObjectTreeParser::parsedPart() const 0380 { 0381 return mParsedPart; 0382 } 0383 0384 /* 0385 * This will lookup suitable formatters based on the type, 0386 * and let them generate a list of parts. 0387 * If the formatter generated a list of parts, then those are taken, otherwise we move on to the next match. 0388 */ 0389 MessagePart::List ObjectTreeParser::processType(KMime::Content *node, const QByteArray &mediaType, const QByteArray &subType) 0390 { 0391 static MimeTreeParser::BodyPartFormatterBaseFactory factory; 0392 const auto sub = factory.subtypeRegistry(mediaType.constData()); 0393 const auto range = sub.equal_range(subType.constData()); 0394 for (auto it = range.first; it != range.second; ++it) { 0395 const auto formatter = it->second; 0396 if (!formatter) { 0397 continue; 0398 } 0399 const auto list = formatter->processList(this, node); 0400 if (!list.isEmpty()) { 0401 return list; 0402 } 0403 } 0404 return {}; 0405 } 0406 0407 MessagePart::Ptr ObjectTreeParser::parseObjectTreeInternal(KMime::Content *node, bool onlyOneMimePart) 0408 { 0409 if (!node) { 0410 return MessagePart::Ptr(); 0411 } 0412 0413 auto parsedPart = MessagePart::Ptr(new MessagePartList(this, node)); 0414 parsedPart->setIsRoot(node->isTopLevel()); 0415 const auto contents = node->parent() ? node->parent()->contents() : KMime::Content::List{node}; 0416 for (int i = contents.indexOf(node); i < contents.size(); ++i) { 0417 node = contents.at(i); 0418 0419 QByteArray mediaType("text"); 0420 QByteArray subType("plain"); 0421 if (node->contentType(false) && !node->contentType()->mediaType().isEmpty() && !node->contentType()->subType().isEmpty()) { 0422 mediaType = node->contentType()->mediaType(); 0423 subType = node->contentType()->subType(); 0424 } 0425 0426 auto messageParts = [&] { 0427 // Try the specific type handler 0428 { 0429 auto list = processType(node, mediaType, subType); 0430 if (!list.isEmpty()) { 0431 return list; 0432 } 0433 } 0434 // Fallback to the generic handler 0435 { 0436 auto list = processType(node, mediaType, "*"); 0437 if (!list.isEmpty()) { 0438 return list; 0439 } 0440 } 0441 // Fallback to the default handler 0442 return defaultHandling(node); 0443 }(); 0444 0445 for (const auto &part : messageParts) { 0446 parsedPart->appendSubPart(part); 0447 } 0448 0449 if (onlyOneMimePart) { 0450 break; 0451 } 0452 } 0453 0454 return parsedPart; 0455 } 0456 0457 QList<MessagePart::Ptr> ObjectTreeParser::defaultHandling(KMime::Content *node) 0458 { 0459 if (node->contentType()->mimeType() == QByteArrayLiteral("application/octet-stream") 0460 && (node->contentType()->name().endsWith(QLatin1StringView("p7m")) || node->contentType()->name().endsWith(QLatin1StringView("p7s")) 0461 || node->contentType()->name().endsWith(QLatin1StringView("p7c")))) { 0462 auto list = processType(node, "application", "pkcs7-mime"); 0463 if (!list.isEmpty()) { 0464 return list; 0465 } 0466 } 0467 0468 return {AttachmentMessagePart::Ptr(new AttachmentMessagePart(this, node))}; 0469 } 0470 0471 QByteArray ObjectTreeParser::codecNameFor(KMime::Content *node) const 0472 { 0473 if (!node) { 0474 return QByteArrayLiteral("UTF-8"); 0475 } 0476 0477 QByteArray charset = node->contentType()->charset().toLower(); 0478 0479 // utf-8 is a superset of us-ascii, so we don't lose anything if we use it instead 0480 // utf-8 is used so widely nowadays that it is a good idea to use it to fix issues with broken clients. 0481 if (charset == "us-ascii") { 0482 charset = "utf-8"; 0483 } 0484 if (!charset.isEmpty()) { 0485 if (const QStringDecoder c(charset.constData()); c.isValid()) { 0486 return charset; 0487 } 0488 } 0489 // no charset means us-ascii (RFC 2045), so using local encoding should 0490 // be okay 0491 return QByteArrayLiteral("UTF-8"); 0492 }