File indexing completed on 2024-05-12 05:28:19

0001 // This file is part of KMail, the KDE mail client.
0002 // SPDX-FileCopyrightText: 2003      Marc Mutz <mutz@kde.org>
0003 // SPDX-FileCopyrightText: 2002-2004 Klarälvdalens Datakonsult AB, a KDAB Group company, info@kdab.net
0004 // SPDX-FileCopyrightText: 2009 Andras Mantia <andras@kdab.net>
0005 // SPDX-FileCopyrightText: 2015 Sandro Knauß <sknauss@kde.org>
0006 // SPDX-FileCopyrightText: 2017 Christian Mollekopf <mollekopf@kolabsystems.com>
0007 // SPDX-License-Identifier: GPL-2.0-or-later
0008 
0009 #include "objecttreeparser.h"
0010 
0011 #include "bodypartformatterbasefactory.h"
0012 #include "messagepart.h"
0013 
0014 #include "mimetreeparser_debug.h"
0015 
0016 #include "bodypartformatter.h"
0017 #include "utils.h"
0018 
0019 #include <KMime/Message>
0020 
0021 #include <KCharsets>
0022 #include <QByteArray>
0023 #include <QMimeDatabase>
0024 #include <QRegularExpression>
0025 #include <QTextCodec>
0026 #include <QTextStream>
0027 #include <QUrl>
0028 
0029 using namespace MimeTreeParser;
0030 
0031 /*
0032  * Collect message parts bottom up.
0033  * Filter to avoid evaluating a subtree.
0034  * Select parts to include it in the result set. Selecting a part in a branch will keep any parent parts from being selected.
0035  */
0036 static QVector<MessagePart::Ptr>
0037 collect(MessagePart::Ptr start, const std::function<bool(const MessagePartPtr &)> &evaluateSubtree, const std::function<bool(const MessagePartPtr &)> &select)
0038 {
0039     MessagePartPtr ptr = start.dynamicCast<MessagePart>();
0040     Q_ASSERT(ptr);
0041     QVector<MessagePart::Ptr> list;
0042     if (evaluateSubtree(ptr)) {
0043         for (const auto &p : ptr->subParts()) {
0044             list << ::collect(p, evaluateSubtree, select);
0045         }
0046     }
0047 
0048     // Don't consider this part if we already selected a subpart
0049     if (list.isEmpty()) {
0050         if (select(ptr)) {
0051             list << start;
0052         }
0053     }
0054     return list;
0055 }
0056 
0057 QString ObjectTreeParser::plainTextContent()
0058 {
0059     QString content;
0060     if (mParsedPart) {
0061         auto plainParts = ::collect(
0062             mParsedPart,
0063             [](const MessagePartPtr &) {
0064                 return true;
0065             },
0066             [](const MessagePartPtr &part) {
0067                 if (part->isAttachment()) {
0068                     return false;
0069                 }
0070                 if (dynamic_cast<MimeTreeParser::TextMessagePart *>(part.data())) {
0071                     return true;
0072                 }
0073                 if (dynamic_cast<MimeTreeParser::AlternativeMessagePart *>(part.data())) {
0074                     return true;
0075                 }
0076                 return false;
0077             });
0078         for (const auto &part : plainParts) {
0079             content += part->text();
0080         }
0081     }
0082     return content;
0083 }
0084 
0085 QString ObjectTreeParser::htmlContent()
0086 {
0087     QString content;
0088     if (mParsedPart) {
0089         QVector<MessagePart::Ptr> contentParts = ::collect(
0090             mParsedPart,
0091             [](const MessagePartPtr &) {
0092                 return true;
0093             },
0094             [](const MessagePartPtr &part) {
0095                 if (dynamic_cast<MimeTreeParser::HtmlMessagePart *>(part.data())) {
0096                     return true;
0097                 }
0098                 if (dynamic_cast<MimeTreeParser::AlternativeMessagePart *>(part.data())) {
0099                     return true;
0100                 }
0101                 return false;
0102             });
0103         for (const auto &part : contentParts) {
0104             if (auto p = dynamic_cast<MimeTreeParser::AlternativeMessagePart *>(part.data())) {
0105                 content += p->htmlContent();
0106             } else {
0107                 content += part->text();
0108             }
0109         }
0110     }
0111     return content;
0112 }
0113 
0114 static void print(QTextStream &stream, KMime::Content *node, const QString prefix = {})
0115 {
0116     QByteArray mediaType("text");
0117     QByteArray subType("plain");
0118     if (node->contentType(false) && !node->contentType()->mediaType().isEmpty() && !node->contentType()->subType().isEmpty()) {
0119         mediaType = node->contentType()->mediaType();
0120         subType = node->contentType()->subType();
0121     }
0122     stream << prefix << "! " << mediaType << subType << " isAttachment: " << KMime::isAttachment(node) << "\n";
0123     for (const auto nodeContent : node->contents()) {
0124         print(stream, nodeContent, prefix + QLatin1String(" "));
0125     }
0126 }
0127 
0128 static void print(QTextStream &stream, const MessagePart &messagePart, const QByteArray pre = {})
0129 {
0130     stream << pre << "# " << messagePart.metaObject()->className() << " isAttachment: " << messagePart.isAttachment() << "\n";
0131     for (const auto &subPart : messagePart.subParts()) {
0132         print(stream, *subPart, pre + " ");
0133     }
0134 }
0135 
0136 QString ObjectTreeParser::structureAsString() const
0137 {
0138     QString string;
0139     QTextStream stream{&string};
0140 
0141     if (mTopLevelContent) {
0142         ::print(stream, mTopLevelContent);
0143     }
0144     if (mParsedPart) {
0145         ::print(stream, *mParsedPart);
0146     }
0147     return string;
0148 }
0149 
0150 void ObjectTreeParser::print()
0151 {
0152     qInfo().noquote() << structureAsString();
0153 }
0154 
0155 static KMime::Content *find(KMime::Content *node, const std::function<bool(KMime::Content *)> &select)
0156 {
0157     QByteArray mediaType("text");
0158     QByteArray subType("plain");
0159     if (node->contentType(false) && !node->contentType()->mediaType().isEmpty() && !node->contentType()->subType().isEmpty()) {
0160         mediaType = node->contentType()->mediaType();
0161         subType = node->contentType()->subType();
0162     }
0163     if (select(node)) {
0164         return node;
0165     }
0166     for (const auto nodeContent : node->contents()) {
0167         if (const auto content = find(nodeContent, select)) {
0168             return content;
0169         }
0170     }
0171     return nullptr;
0172 }
0173 
0174 KMime::Content *ObjectTreeParser::find(const std::function<bool(KMime::Content *)> &select)
0175 {
0176     return ::find(mTopLevelContent, select);
0177 }
0178 
0179 QVector<MessagePartPtr> ObjectTreeParser::collectContentParts()
0180 {
0181     return collectContentParts(mParsedPart);
0182 }
0183 
0184 QVector<MessagePart::Ptr> ObjectTreeParser::collectContentParts(MessagePart::Ptr start)
0185 {
0186     return ::collect(
0187         start,
0188         [start](const MessagePartPtr &part) {
0189             // Ignore the top-level
0190             if (start.data() == part.data()) {
0191                 return true;
0192             }
0193             if (auto encapsulatedPart = part.dynamicCast<MimeTreeParser::EncapsulatedRfc822MessagePart>()) {
0194                 return false;
0195             }
0196             return true;
0197         },
0198         [start](const MessagePartPtr &part) {
0199             if (const auto attachment = dynamic_cast<MimeTreeParser::AttachmentMessagePart *>(part.data())) {
0200                 return attachment->mimeType() == "text/calendar";
0201             } else if (const auto text = dynamic_cast<MimeTreeParser::TextMessagePart *>(part.data())) {
0202                 auto enc = dynamic_cast<MimeTreeParser::EncryptedMessagePart *>(text->parentPart());
0203                 if (enc && enc->error()) {
0204                     return false;
0205                 }
0206                 return true;
0207             } else if (dynamic_cast<MimeTreeParser::AlternativeMessagePart *>(part.data())) {
0208                 return true;
0209             } else if (dynamic_cast<MimeTreeParser::HtmlMessagePart *>(part.data())) {
0210                 // Don't if we have an alternative part as parent
0211                 return true;
0212             } else if (dynamic_cast<MimeTreeParser::EncapsulatedRfc822MessagePart *>(part.data())) {
0213                 if (start.data() == part.data()) {
0214                     return false;
0215                 }
0216                 return true;
0217             } else if (const auto enc = dynamic_cast<MimeTreeParser::EncryptedMessagePart *>(part.data())) {
0218                 if (enc->error()) {
0219                     return true;
0220                 }
0221                 // If we have a textpart with encrypted and unencrypted subparts we want to return the textpart
0222                 if (dynamic_cast<MimeTreeParser::TextMessagePart *>(enc->parentPart())) {
0223                     return false;
0224                 }
0225             } else if (const auto sig = dynamic_cast<MimeTreeParser::SignedMessagePart *>(part.data())) {
0226                 // Signatures without subparts already contain the text
0227                 return !sig->hasSubParts();
0228             }
0229             return false;
0230         });
0231 }
0232 
0233 QVector<MessagePart::Ptr> ObjectTreeParser::collectAttachmentParts()
0234 {
0235     QVector<MessagePart::Ptr> contentParts = ::collect(
0236         mParsedPart,
0237         [](const MessagePartPtr &) {
0238             return true;
0239         },
0240         [](const MessagePartPtr &part) {
0241             return part->isAttachment();
0242         });
0243     return contentParts;
0244 }
0245 
0246 void ObjectTreeParser::decryptParts()
0247 {
0248     decryptAndVerify();
0249 }
0250 
0251 /*
0252  * This naive implementation assumes that there is an encrypted part wrapping a signature.
0253  * For other cases we would have to process both recursively (I think?)
0254  */
0255 void ObjectTreeParser::decryptAndVerify()
0256 {
0257     // We first decrypt
0258     ::collect(
0259         mParsedPart,
0260         [](const MessagePartPtr &) {
0261             return true;
0262         },
0263         [](const MessagePartPtr &part) {
0264             if (const auto enc = dynamic_cast<MimeTreeParser::EncryptedMessagePart *>(part.data())) {
0265                 enc->startDecryption();
0266             }
0267             return false;
0268         });
0269     // And then verify the available signatures
0270     ::collect(
0271         mParsedPart,
0272         [](const MessagePartPtr &) {
0273             return true;
0274         },
0275         [](const MessagePartPtr &part) {
0276             if (const auto enc = dynamic_cast<MimeTreeParser::SignedMessagePart *>(part.data())) {
0277                 enc->startVerification();
0278             }
0279             return false;
0280         });
0281 }
0282 
0283 void ObjectTreeParser::importCertificates()
0284 {
0285     QVector<MessagePart::Ptr> contentParts = ::collect(
0286         mParsedPart,
0287         [](const MessagePartPtr &) {
0288             return true;
0289         },
0290         [](const MessagePartPtr &part) {
0291             if (const auto cert = dynamic_cast<MimeTreeParser::CertMessagePart *>(part.data())) {
0292                 cert->import();
0293             }
0294             return false;
0295         });
0296 }
0297 
0298 QString ObjectTreeParser::resolveCidLinks(const QString &html)
0299 {
0300     auto text = html;
0301     static const auto regex = QRegularExpression(QLatin1String("(src)\\s*=\\s*(\"|')(cid:[^\"']+)\\2"));
0302     auto it = regex.globalMatch(text);
0303     while (it.hasNext()) {
0304         const auto match = it.next();
0305         const auto link = QUrl(match.captured(3));
0306         auto cid = link.path();
0307         auto mailMime = const_cast<KMime::Content *>(find([=](KMime::Content *content) {
0308             if (!content || !content->contentID(false)) {
0309                 return false;
0310             }
0311             return QString::fromLatin1(content->contentID(false)->identifier()) == cid;
0312         }));
0313         if (mailMime) {
0314             const auto contentType = mailMime->contentType(false);
0315             if (!contentType) {
0316                 qWarning() << "No content type, skipping";
0317                 continue;
0318             }
0319             QMimeDatabase mimeDb;
0320             const auto mimetype = mimeDb.mimeTypeForName(QString::fromLatin1(contentType->mimeType())).name();
0321             if (mimetype.startsWith(QLatin1String("image/"))) {
0322                 // We reencode to base64 below.
0323                 const auto data = mailMime->decodedContent();
0324                 if (data.isEmpty()) {
0325                     qWarning() << "Attachment is empty.";
0326                     continue;
0327                 }
0328                 text.replace(match.captured(0), QString::fromLatin1("src=\"data:%1;base64,%2\"").arg(mimetype, QString::fromLatin1(data.toBase64())));
0329             }
0330         } else {
0331             qWarning() << "Failed to find referenced attachment: " << cid;
0332         }
0333     }
0334     return text;
0335 }
0336 
0337 //-----------------------------------------------------------------------------
0338 
0339 void ObjectTreeParser::parseObjectTree(const QByteArray &mimeMessage)
0340 {
0341     const auto mailData = KMime::CRLFtoLF(mimeMessage);
0342     mMsg = KMime::Message::Ptr(new KMime::Message);
0343     mMsg->setContent(mailData);
0344     mMsg->parse();
0345     // We avoid using mMsg->contentType()->charset(), because that will just return kmime's defaultCharset(), ISO-8859-1
0346     const auto charset = mMsg->contentType()->parameter(QStringLiteral("charset")).toLatin1();
0347     if (charset.isEmpty()) {
0348         mMsg->contentType()->setCharset("us-ascii");
0349     }
0350     parseObjectTree(mMsg.data());
0351 }
0352 
0353 void ObjectTreeParser::parseObjectTree(KMime::Content *node)
0354 {
0355     mTopLevelContent = node;
0356     mParsedPart = parseObjectTreeInternal(node, false);
0357 }
0358 
0359 MessagePartPtr ObjectTreeParser::parsedPart() const
0360 {
0361     return mParsedPart;
0362 }
0363 
0364 /*
0365  * This will lookup suitable formatters based on the type,
0366  * and let them generate a list of parts.
0367  * If the formatter generated a list of parts, then those are taken, otherwise we move on to the next match.
0368  */
0369 QVector<MessagePartPtr> ObjectTreeParser::processType(KMime::Content *node, const QByteArray &mediaType, const QByteArray &subType)
0370 {
0371     static MimeTreeParser::BodyPartFormatterBaseFactory factory;
0372     const auto sub = factory.subtypeRegistry(mediaType.constData());
0373     const auto range = sub.equal_range(subType.constData());
0374     for (auto it = range.first; it != range.second; ++it) {
0375         const auto formatter = it->second;
0376         if (!formatter) {
0377             continue;
0378         }
0379         const auto list = formatter->processList(this, node);
0380         if (!list.isEmpty()) {
0381             return list;
0382         }
0383     }
0384     return {};
0385 }
0386 
0387 MessagePart::Ptr ObjectTreeParser::parseObjectTreeInternal(KMime::Content *node, bool onlyOneMimePart)
0388 {
0389     if (!node) {
0390         return MessagePart::Ptr();
0391     }
0392 
0393     auto parsedPart = MessagePart::Ptr(new MessagePartList(this, node));
0394     parsedPart->setIsRoot(node->isTopLevel());
0395     const auto contents = node->parent() ? node->parent()->contents() : KMime::Content::List{node};
0396     for (int i = contents.indexOf(node); i < contents.size(); ++i) {
0397         node = contents.at(i);
0398 
0399         QByteArray mediaType("text");
0400         QByteArray subType("plain");
0401         if (node->contentType(false) && !node->contentType()->mediaType().isEmpty() && !node->contentType()->subType().isEmpty()) {
0402             mediaType = node->contentType()->mediaType();
0403             subType = node->contentType()->subType();
0404         }
0405 
0406         auto messageParts = [&] {
0407             // Try the specific type handler
0408             {
0409                 auto list = processType(node, mediaType, subType);
0410                 if (!list.isEmpty()) {
0411                     return list;
0412                 }
0413             }
0414             // Fallback to the generic handler
0415             {
0416                 auto list = processType(node, mediaType, "*");
0417                 if (!list.isEmpty()) {
0418                     return list;
0419                 }
0420             }
0421             // Fallback to the default handler
0422             return defaultHandling(node);
0423         }();
0424 
0425         for (const auto &part : messageParts) {
0426             parsedPart->appendSubPart(part);
0427         }
0428 
0429         if (onlyOneMimePart) {
0430             break;
0431         }
0432     }
0433 
0434     return parsedPart;
0435 }
0436 
0437 QVector<MessagePart::Ptr> ObjectTreeParser::defaultHandling(KMime::Content *node)
0438 {
0439     if (node->contentType()->mimeType() == QByteArrayLiteral("application/octet-stream")
0440         && (node->contentType()->name().endsWith(QLatin1String("p7m")) || node->contentType()->name().endsWith(QLatin1String("p7s"))
0441             || node->contentType()->name().endsWith(QLatin1String("p7c")))) {
0442         auto list = processType(node, "application", "pkcs7-mime");
0443         if (!list.isEmpty()) {
0444             return list;
0445         }
0446     }
0447 
0448     return {AttachmentMessagePart::Ptr(new AttachmentMessagePart(this, node))};
0449 }
0450 
0451 static QTextCodec *getLocalCodec()
0452 {
0453     auto codec = QTextCodec::codecForLocale();
0454 
0455     // In the case of Japan. Japanese locale name is "eucjp" but
0456     // The Japanese mail systems normally used "iso-2022-jp" of locale name.
0457     // We want to change locale name from eucjp to iso-2022-jp at KMail only.
0458 
0459     // (Introduction to i18n, 6.6 Limit of Locale technology):
0460     // EUC-JP is the de-facto standard for UNIX systems, ISO 2022-JP
0461     // is the standard for Internet, and Shift-JIS is the encoding
0462     // for Windows and Macintosh.
0463     if (codec) {
0464         const QByteArray codecNameLower = codec->name().toLower();
0465         if (codecNameLower == "eucjp"
0466 #if defined Q_OS_WIN || defined Q_OS_MACX
0467             || codecNameLower == "shift-jis" // OK?
0468 #endif
0469         ) {
0470             codec = QTextCodec::codecForName("jis7");
0471             // QTextCodec *cdc = QTextCodec::codecForName("jis7");
0472             // QTextCodec::setCodecForLocale(cdc);
0473             // KLocale::global()->setEncoding(cdc->mibEnum());
0474         }
0475     }
0476     return codec;
0477 }
0478 
0479 const QTextCodec *ObjectTreeParser::codecFor(KMime::Content *node) const
0480 {
0481     static auto localCodec = getLocalCodec();
0482     if (!node) {
0483         return localCodec;
0484     }
0485 
0486     QByteArray charset = node->contentType()->charset().toLower();
0487 
0488     // utf-8 is a superset of us-ascii, so we don't lose anything if we use it instead
0489     // utf-8 is used so widely nowadays that it is a good idea to use it to fix issues with broken clients.
0490     if (charset == "us-ascii") {
0491         charset = "utf-8";
0492     }
0493     if (!charset.isEmpty()) {
0494         if (auto c = KCharsets::charsets()->codecForName(QLatin1String(charset))) {
0495             return c;
0496         };
0497     }
0498     // no charset means us-ascii (RFC 2045), so using local encoding should
0499     // be okay
0500     return localCodec;
0501 }