File indexing completed on 2024-12-22 05:05:20

0001 // This file is part of KMail, the KDE mail client.
0002 // SPDX-FileCopyrightText: 2003      Marc Mutz <mutz@kde.org>
0003 // SPDX-FileCopyrightText: 2002-2004 Klarälvdalens Datakonsult AB, a KDAB Group company, info@kdab.net
0004 // SPDX-FileCopyrightText: 2009 Andras Mantia <andras@kdab.net>
0005 // SPDX-FileCopyrightText: 2015 Sandro Knauß <sknauss@kde.org>
0006 // SPDX-FileCopyrightText: 2017 Christian Mollekopf <mollekopf@kolabsystems.com>
0007 // SPDX-License-Identifier: GPL-2.0-or-later
0008 
0009 #include "objecttreeparser.h"
0010 
0011 #include "bodypartformatterbasefactory.h"
0012 
0013 #include "bodypartformatter.h"
0014 
0015 #include <KMime/Message>
0016 
0017 #include <KCharsets>
0018 #include <QByteArray>
0019 #include <QDebug>
0020 #include <QMimeDatabase>
0021 #include <QRegularExpression>
0022 #include <QStringDecoder>
0023 #include <QTextStream>
0024 #include <QUrl>
0025 
0026 using namespace MimeTreeParser;
0027 
0028 /*
0029  * Collect message parts bottom up.
0030  * Filter to avoid evaluating a subtree.
0031  * Select parts to include it in the result set. Selecting a part in a branch will keep any parent parts from being selected.
0032  */
0033 static QList<MessagePart::Ptr> collect(MessagePart::Ptr start,
0034                                        const std::function<bool(const MessagePart::Ptr &)> &evaluateSubtree,
0035                                        const std::function<bool(const MessagePart::Ptr &)> &select)
0036 {
0037     auto ptr = start.dynamicCast<MessagePart>();
0038     Q_ASSERT(ptr);
0039     MessagePart::List list;
0040     if (evaluateSubtree(ptr)) {
0041         for (const auto &p : ptr->subParts()) {
0042             list << ::collect(p, evaluateSubtree, select);
0043         }
0044     }
0045 
0046     // Don't consider this part if we already selected a subpart
0047     if (list.isEmpty()) {
0048         if (select(ptr)) {
0049             list << start;
0050         }
0051     }
0052     return list;
0053 }
0054 
0055 QString ObjectTreeParser::plainTextContent()
0056 {
0057     QString content;
0058     if (mParsedPart) {
0059         auto plainParts = ::collect(
0060             mParsedPart,
0061             [](const MessagePart::Ptr &) {
0062                 return true;
0063             },
0064             [](const MessagePart::Ptr &part) {
0065                 if (part->isAttachment()) {
0066                     return false;
0067                 }
0068                 if (dynamic_cast<MimeTreeParser::TextMessagePart *>(part.data())) {
0069                     return true;
0070                 }
0071                 if (dynamic_cast<MimeTreeParser::AlternativeMessagePart *>(part.data())) {
0072                     return true;
0073                 }
0074                 return false;
0075             });
0076         for (const auto &part : plainParts) {
0077             content += part->text();
0078         }
0079     }
0080     return content;
0081 }
0082 
0083 QString ObjectTreeParser::htmlContent()
0084 {
0085     QString content;
0086     if (mParsedPart) {
0087         MessagePart::List contentParts = ::collect(
0088             mParsedPart,
0089             [](const MessagePart::Ptr &) {
0090                 return true;
0091             },
0092             [](const MessagePart::Ptr &part) {
0093                 if (dynamic_cast<MimeTreeParser::HtmlMessagePart *>(part.data())) {
0094                     return true;
0095                 }
0096                 if (dynamic_cast<MimeTreeParser::AlternativeMessagePart *>(part.data())) {
0097                     return true;
0098                 }
0099                 return false;
0100             });
0101         for (const auto &part : contentParts) {
0102             if (auto p = dynamic_cast<MimeTreeParser::AlternativeMessagePart *>(part.data())) {
0103                 content += p->htmlContent();
0104             } else {
0105                 content += part->text();
0106             }
0107         }
0108     }
0109     return content;
0110 }
0111 
0112 bool ObjectTreeParser::hasEncryptedParts() const
0113 {
0114     bool result = false;
0115 
0116     ::collect(
0117         mParsedPart,
0118         [](const MessagePart::Ptr &) {
0119             return true;
0120         },
0121         [&result](const MessagePart::Ptr &part) {
0122             if (const auto enc = dynamic_cast<MimeTreeParser::EncryptedMessagePart *>(part.data())) {
0123                 result = true;
0124             }
0125             return false;
0126         });
0127 
0128     return result;
0129 }
0130 
0131 bool ObjectTreeParser::hasSignedParts() const
0132 {
0133     bool result = false;
0134 
0135     ::collect(
0136         mParsedPart,
0137         [](const MessagePart::Ptr &) {
0138             return true;
0139         },
0140         [&result](const MessagePart::Ptr &part) {
0141             if (const auto enc = dynamic_cast<MimeTreeParser::SignedMessagePart *>(part.data())) {
0142                 result = true;
0143             }
0144             return false;
0145         });
0146 
0147     return result;
0148 }
0149 
0150 static void print(QTextStream &stream, KMime::Content *node, const QString prefix = {})
0151 {
0152     QByteArray mediaType("text");
0153     QByteArray subType("plain");
0154     if (node->contentType(false) && !node->contentType()->mediaType().isEmpty() && !node->contentType()->subType().isEmpty()) {
0155         mediaType = node->contentType()->mediaType();
0156         subType = node->contentType()->subType();
0157     }
0158     stream << prefix << "! " << mediaType << subType << " isAttachment: " << KMime::isAttachment(node) << "\n";
0159     const auto contents = node->contents();
0160     for (const auto nodeContent : contents) {
0161         print(stream, nodeContent, prefix + QLatin1StringView(" "));
0162     }
0163 }
0164 
0165 static void print(QTextStream &stream, const MessagePart &messagePart, const QByteArray pre = {})
0166 {
0167     stream << pre << "# " << messagePart.metaObject()->className() << " isAttachment: " << messagePart.isAttachment() << "\n";
0168     const auto subParts = messagePart.subParts();
0169     for (const auto &subPart : subParts) {
0170         print(stream, *subPart, pre + " ");
0171     }
0172 }
0173 
0174 QString ObjectTreeParser::structureAsString() const
0175 {
0176     QString string;
0177     QTextStream stream{&string};
0178 
0179     if (mTopLevelContent) {
0180         ::print(stream, mTopLevelContent);
0181     }
0182     if (mParsedPart) {
0183         ::print(stream, *mParsedPart);
0184     }
0185     return string;
0186 }
0187 
0188 void ObjectTreeParser::print()
0189 {
0190     qInfo().noquote() << structureAsString();
0191 }
0192 
0193 static KMime::Content *find(KMime::Content *node, const std::function<bool(KMime::Content *)> &select)
0194 {
0195     QByteArray mediaType("text");
0196     QByteArray subType("plain");
0197     if (node->contentType(false) && !node->contentType()->mediaType().isEmpty() && !node->contentType()->subType().isEmpty()) {
0198         mediaType = node->contentType()->mediaType();
0199         subType = node->contentType()->subType();
0200     }
0201     if (select(node)) {
0202         return node;
0203     }
0204     const auto contents = node->contents();
0205     for (const auto nodeContent : contents) {
0206         if (const auto content = find(nodeContent, select)) {
0207             return content;
0208         }
0209     }
0210     return nullptr;
0211 }
0212 
0213 KMime::Content *ObjectTreeParser::find(const std::function<bool(KMime::Content *)> &select)
0214 {
0215     return ::find(mTopLevelContent, select);
0216 }
0217 
0218 MessagePart::List ObjectTreeParser::collectContentParts()
0219 {
0220     return collectContentParts(mParsedPart);
0221 }
0222 
0223 MessagePart::List ObjectTreeParser::collectContentParts(MessagePart::Ptr start)
0224 {
0225     return ::collect(
0226         start,
0227         [start](const MessagePart::Ptr &part) {
0228             // Ignore the top-level
0229             if (start.data() == part.data()) {
0230                 return true;
0231             }
0232             if (auto encapsulatedPart = part.dynamicCast<MimeTreeParser::EncapsulatedRfc822MessagePart>()) {
0233                 return false;
0234             }
0235             return true;
0236         },
0237         [start](const MessagePart::Ptr &part) {
0238             if (const auto attachment = dynamic_cast<MimeTreeParser::AttachmentMessagePart *>(part.data())) {
0239                 return attachment->mimeType() == "text/calendar";
0240             } else if (const auto text = dynamic_cast<MimeTreeParser::TextMessagePart *>(part.data())) {
0241                 auto enc = dynamic_cast<MimeTreeParser::EncryptedMessagePart *>(text->parentPart());
0242                 if (enc && enc->error()) {
0243                     return false;
0244                 }
0245 
0246                 return true;
0247             } else if (dynamic_cast<MimeTreeParser::AlternativeMessagePart *>(part.data())) {
0248                 return true;
0249             } else if (dynamic_cast<MimeTreeParser::HtmlMessagePart *>(part.data())) {
0250                 // Don't if we have an alternative part as parent
0251                 return true;
0252             } else if (dynamic_cast<MimeTreeParser::EncapsulatedRfc822MessagePart *>(part.data())) {
0253                 if (start.data() == part.data()) {
0254                     return false;
0255                 }
0256                 return true;
0257             } else if (const auto enc = dynamic_cast<MimeTreeParser::EncryptedMessagePart *>(part.data())) {
0258                 if (enc->error()) {
0259                     return true;
0260                 }
0261                 // If we have a textpart with encrypted and unencrypted subparts we want to return the textpart
0262                 if (dynamic_cast<MimeTreeParser::TextMessagePart *>(enc->parentPart())) {
0263                     return false;
0264                 }
0265             } else if (const auto sig = dynamic_cast<MimeTreeParser::SignedMessagePart *>(part.data())) {
0266                 // Signatures without subparts already contain the text
0267                 return !sig->hasSubParts();
0268             }
0269             return false;
0270         });
0271 }
0272 
0273 MessagePart::List ObjectTreeParser::collectAttachmentParts()
0274 {
0275     MessagePart::List contentParts = ::collect(
0276         mParsedPart,
0277         [](const MessagePart::Ptr &) {
0278             return true;
0279         },
0280         [](const MessagePart::Ptr &part) {
0281             return part->isAttachment();
0282         });
0283     return contentParts;
0284 }
0285 
0286 /*
0287  * This naive implementation assumes that there is an encrypted part wrapping a signature.
0288  * For other cases we would have to process both recursively (I think?)
0289  */
0290 void ObjectTreeParser::decryptAndVerify()
0291 {
0292     // We first decrypt
0293     ::collect(
0294         mParsedPart,
0295         [](const MessagePart::Ptr &) {
0296             return true;
0297         },
0298         [](const MessagePart::Ptr &part) {
0299             if (const auto enc = dynamic_cast<MimeTreeParser::EncryptedMessagePart *>(part.data())) {
0300                 enc->startDecryption();
0301             }
0302             return false;
0303         });
0304     // And then verify the available signatures
0305     ::collect(
0306         mParsedPart,
0307         [](const MessagePart::Ptr &) {
0308             return true;
0309         },
0310         [](const MessagePart::Ptr &part) {
0311             if (const auto enc = dynamic_cast<MimeTreeParser::SignedMessagePart *>(part.data())) {
0312                 enc->startVerification();
0313             }
0314             return false;
0315         });
0316 }
0317 
0318 QString ObjectTreeParser::resolveCidLinks(const QString &html)
0319 {
0320     auto text = html;
0321     static const auto regex = QRegularExpression(QLatin1StringView("(src)\\s*=\\s*(\"|')(cid:[^\"']+)\\2"));
0322     auto it = regex.globalMatch(text);
0323     while (it.hasNext()) {
0324         const auto match = it.next();
0325         const auto link = QUrl(match.captured(3));
0326         auto cid = link.path();
0327         auto mailMime = const_cast<KMime::Content *>(find([=](KMime::Content *content) {
0328             if (!content || !content->contentID(false)) {
0329                 return false;
0330             }
0331             return QString::fromLatin1(content->contentID(false)->identifier()) == cid;
0332         }));
0333         if (mailMime) {
0334             const auto contentType = mailMime->contentType(false);
0335             if (!contentType) {
0336                 qWarning() << "No content type, skipping";
0337                 continue;
0338             }
0339             QMimeDatabase mimeDb;
0340             const auto mimetype = mimeDb.mimeTypeForName(QString::fromLatin1(contentType->mimeType())).name();
0341             if (mimetype.startsWith(QLatin1StringView("image/"))) {
0342                 // We reencode to base64 below.
0343                 const auto data = mailMime->decodedContent();
0344                 if (data.isEmpty()) {
0345                     qWarning() << "Attachment is empty.";
0346                     continue;
0347                 }
0348                 text.replace(match.captured(0), QString::fromLatin1("src=\"data:%1;base64,%2\"").arg(mimetype, QString::fromLatin1(data.toBase64())));
0349             }
0350         } else {
0351             qWarning() << "Failed to find referenced attachment: " << cid;
0352         }
0353     }
0354     return text;
0355 }
0356 
0357 //-----------------------------------------------------------------------------
0358 
0359 void ObjectTreeParser::parseObjectTree(const QByteArray &mimeMessage)
0360 {
0361     const auto mailData = KMime::CRLFtoLF(mimeMessage);
0362     mMsg = KMime::Message::Ptr(new KMime::Message);
0363     mMsg->setContent(mailData);
0364     mMsg->parse();
0365     // We avoid using mMsg->contentType()->charset(), because that will just return kmime's defaultCharset(), ISO-8859-1
0366     const auto charset = mMsg->contentType()->parameter(QStringLiteral("charset")).toLatin1();
0367     if (charset.isEmpty()) {
0368         mMsg->contentType()->setCharset("us-ascii");
0369     }
0370     parseObjectTree(mMsg.data());
0371 }
0372 
0373 void ObjectTreeParser::parseObjectTree(KMime::Content *node)
0374 {
0375     mTopLevelContent = node;
0376     mParsedPart = parseObjectTreeInternal(node, false);
0377 }
0378 
0379 MessagePart::Ptr ObjectTreeParser::parsedPart() const
0380 {
0381     return mParsedPart;
0382 }
0383 
0384 /*
0385  * This will lookup suitable formatters based on the type,
0386  * and let them generate a list of parts.
0387  * If the formatter generated a list of parts, then those are taken, otherwise we move on to the next match.
0388  */
0389 MessagePart::List ObjectTreeParser::processType(KMime::Content *node, const QByteArray &mediaType, const QByteArray &subType)
0390 {
0391     static MimeTreeParser::BodyPartFormatterBaseFactory factory;
0392     const auto sub = factory.subtypeRegistry(mediaType.constData());
0393     const auto range = sub.equal_range(subType.constData());
0394     for (auto it = range.first; it != range.second; ++it) {
0395         const auto formatter = it->second;
0396         if (!formatter) {
0397             continue;
0398         }
0399         const auto list = formatter->processList(this, node);
0400         if (!list.isEmpty()) {
0401             return list;
0402         }
0403     }
0404     return {};
0405 }
0406 
0407 MessagePart::Ptr ObjectTreeParser::parseObjectTreeInternal(KMime::Content *node, bool onlyOneMimePart)
0408 {
0409     if (!node) {
0410         return MessagePart::Ptr();
0411     }
0412 
0413     auto parsedPart = MessagePart::Ptr(new MessagePartList(this, node));
0414     parsedPart->setIsRoot(node->isTopLevel());
0415     const auto contents = node->parent() ? node->parent()->contents() : KMime::Content::List{node};
0416     for (int i = contents.indexOf(node); i < contents.size(); ++i) {
0417         node = contents.at(i);
0418 
0419         QByteArray mediaType("text");
0420         QByteArray subType("plain");
0421         if (node->contentType(false) && !node->contentType()->mediaType().isEmpty() && !node->contentType()->subType().isEmpty()) {
0422             mediaType = node->contentType()->mediaType();
0423             subType = node->contentType()->subType();
0424         }
0425 
0426         auto messageParts = [&] {
0427             // Try the specific type handler
0428             {
0429                 auto list = processType(node, mediaType, subType);
0430                 if (!list.isEmpty()) {
0431                     return list;
0432                 }
0433             }
0434             // Fallback to the generic handler
0435             {
0436                 auto list = processType(node, mediaType, "*");
0437                 if (!list.isEmpty()) {
0438                     return list;
0439                 }
0440             }
0441             // Fallback to the default handler
0442             return defaultHandling(node);
0443         }();
0444 
0445         for (const auto &part : messageParts) {
0446             parsedPart->appendSubPart(part);
0447         }
0448 
0449         if (onlyOneMimePart) {
0450             break;
0451         }
0452     }
0453 
0454     return parsedPart;
0455 }
0456 
0457 QList<MessagePart::Ptr> ObjectTreeParser::defaultHandling(KMime::Content *node)
0458 {
0459     if (node->contentType()->mimeType() == QByteArrayLiteral("application/octet-stream")
0460         && (node->contentType()->name().endsWith(QLatin1StringView("p7m")) || node->contentType()->name().endsWith(QLatin1StringView("p7s"))
0461             || node->contentType()->name().endsWith(QLatin1StringView("p7c")))) {
0462         auto list = processType(node, "application", "pkcs7-mime");
0463         if (!list.isEmpty()) {
0464             return list;
0465         }
0466     }
0467 
0468     return {AttachmentMessagePart::Ptr(new AttachmentMessagePart(this, node))};
0469 }
0470 
0471 QByteArray ObjectTreeParser::codecNameFor(KMime::Content *node) const
0472 {
0473     if (!node) {
0474         return QByteArrayLiteral("UTF-8");
0475     }
0476 
0477     QByteArray charset = node->contentType()->charset().toLower();
0478 
0479     // utf-8 is a superset of us-ascii, so we don't lose anything if we use it instead
0480     // utf-8 is used so widely nowadays that it is a good idea to use it to fix issues with broken clients.
0481     if (charset == "us-ascii") {
0482         charset = "utf-8";
0483     }
0484     if (!charset.isEmpty()) {
0485         if (const QStringDecoder c(charset.constData()); c.isValid()) {
0486             return charset;
0487         }
0488     }
0489     // no charset means us-ascii (RFC 2045), so using local encoding should
0490     // be okay
0491     return QByteArrayLiteral("UTF-8");
0492 }