File indexing completed on 2025-01-26 04:58:14

0001 /*
0002     objecttreeparser.cpp
0003 
0004     This file is part of KMail, the KDE mail client.
0005     Copyright (c) 2003      Marc Mutz <mutz@kde.org>
0006     Copyright (C) 2002-2004 Klarälvdalens Datakonsult AB, a KDAB Group company, info@kdab.net
0007     Copyright (c) 2009 Andras Mantia <andras@kdab.net>
0008     Copyright (c) 2015 Sandro Knauß <sknauss@kde.org>
0009     Copyright (c) 2017 Christian Mollekopf <mollekopf@kolabsystems.com>
0010 
0011     KMail is free software; you can redistribute it and/or modify it
0012     under the terms of the GNU General Public License, version 2, as
0013     published by the Free Software Foundation.
0014 
0015     KMail is distributed in the hope that it will be useful, but
0016     WITHOUT ANY WARRANTY; without even the implied warranty of
0017     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
0018     General Public License for more details.
0019 
0020     You should have received a copy of the GNU General Public License
0021     along with this program; if not, write to the Free Software
0022     Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
0023 
0024     In addition, as a special exception, the copyright holders give
0025     permission to link the code of this program with any edition of
0026     the Qt library by Trolltech AS, Norway (or with modified versions
0027     of Qt that use the same license as Qt), and distribute linked
0028     combinations including the two.  You must obey the GNU General
0029     Public License in all respects for all of the code used other than
0030     Qt.  If you modify this file, you may extend this exception to
0031     your version of the file, but you are not obligated to do so.  If
0032     you do not wish to do so, delete this exception statement from
0033     your version.
0034 */
0035 
0036 // MessageViewer includes
0037 
0038 #include "objecttreeparser.h"
0039 
0040 #include "bodypartformatterbasefactory.h"
0041 #include "messagepart.h"
0042 
0043 #include "mimetreeparser_debug.h"
0044 
0045 #include "utils.h"
0046 #include "bodypartformatter.h"
0047 
0048 #include <KMime/Message>
0049 
0050 #include <QByteArray>
0051 #include <QUrl>
0052 #include <QMimeDatabase>
0053 #include <QTextStream>
0054 #include <KCharsets>
0055 #include <QTextCodec>
0056 
0057 using namespace MimeTreeParser;
0058 
0059 /*
0060  * Collect message parts bottom up.
0061  * Filter to avoid evaluating a subtree.
0062  * Select parts to include it in the result set. Selecting a part in a branch will keep any parent parts from being selected.
0063  */
0064 static QVector<MessagePart::Ptr> collect(MessagePart::Ptr start, const std::function<bool(const MessagePartPtr &)> &evaluateSubtree, const std::function<bool(const MessagePartPtr &)> &select)
0065 {
0066     MessagePartPtr ptr = start.dynamicCast<MessagePart>();
0067     Q_ASSERT(ptr);
0068     QVector<MessagePart::Ptr> list;
0069     if (evaluateSubtree(ptr)) {
0070         for (const auto &p: ptr->subParts()) {
0071             list << ::collect(p, evaluateSubtree, select);
0072         }
0073     }
0074 
0075     //Don't consider this part if we already selected a subpart
0076     if (list.isEmpty()) {
0077         if (select(ptr)) {
0078             list << start;
0079         }
0080     }
0081     return list;
0082 }
0083 
0084 QString ObjectTreeParser::plainTextContent()
0085 {
0086     QString content;
0087     if (mParsedPart) {
0088         auto plainParts = ::collect(mParsedPart,
0089             [] (const MessagePartPtr &) {
0090                 return true;
0091             },
0092             [] (const MessagePartPtr &part) {
0093                 if (part->isAttachment()) {
0094                     return false;
0095                 }
0096                 if (dynamic_cast<MimeTreeParser::TextMessagePart*>(part.data())) {
0097                     return true;
0098                 }
0099                 if (dynamic_cast<MimeTreeParser::AlternativeMessagePart*>(part.data())) {
0100                     return true;
0101                 }
0102                 return false;
0103             });
0104         for (const auto &part : plainParts) {
0105             content += part->text();
0106         }
0107     }
0108     return content;
0109 }
0110 
0111 QString ObjectTreeParser::htmlContent()
0112 {
0113     QString content;
0114     if (mParsedPart) {
0115         QVector<MessagePart::Ptr> contentParts = ::collect(mParsedPart,
0116             [] (const MessagePartPtr &) {
0117                 return true;
0118             },
0119             [] (const MessagePartPtr &part) {
0120                 if (dynamic_cast<MimeTreeParser::HtmlMessagePart*>(part.data())) {
0121                     return true;
0122                 }
0123                 if (dynamic_cast<MimeTreeParser::AlternativeMessagePart*>(part.data())) {
0124                     return true;
0125                 }
0126                 return false;
0127             });
0128         for (const auto &part : contentParts) {
0129             if (auto p = dynamic_cast<MimeTreeParser::AlternativeMessagePart*>(part.data())) {
0130                 content += p->htmlContent();
0131             } else {
0132                 content += part->text();
0133             }
0134         }
0135     }
0136     return content;
0137 }
0138 
0139 static void print(QTextStream &s, KMime::Content *node, const QString prefix = {})
0140 {
0141     QByteArray mediaType("text");
0142     QByteArray subType("plain");
0143     if (node->contentType(false) && !node->contentType()->mediaType().isEmpty() &&
0144             !node->contentType()->subType().isEmpty()) {
0145         mediaType = node->contentType()->mediaType();
0146         subType = node->contentType()->subType();
0147     }
0148     s << prefix << "! " << mediaType << subType << " isAttachment: " << KMime::isAttachment(node) << "\n";
0149     for (const auto c: node->contents()) {
0150         print(s, c, prefix + QLatin1String(" "));
0151     }
0152 }
0153 
0154 static void print(QTextStream &s, const MessagePart &messagePart, const QByteArray pre = {})
0155 {
0156     s << pre << "# " << messagePart.metaObject()->className() << " isAttachment: " << messagePart.isAttachment() << "\n";
0157     for (const auto &p: messagePart.subParts()) {
0158         print(s, *p, pre + " ");
0159     }
0160 }
0161 
0162 QString ObjectTreeParser::structureAsString() const
0163 {
0164     QString string;
0165     QTextStream s{&string};
0166 
0167     if (mTopLevelContent) {
0168         ::print(s, mTopLevelContent);
0169     }
0170     if (mParsedPart) {
0171         ::print(s, *mParsedPart);
0172     }
0173     return string;
0174 }
0175 
0176 void ObjectTreeParser::print()
0177 {
0178     qInfo().noquote() << structureAsString();
0179 }
0180 
0181 static KMime::Content *find(KMime::Content *node, const std::function<bool(KMime::Content *)> &select)
0182 {
0183     QByteArray mediaType("text");
0184     QByteArray subType("plain");
0185     if (node->contentType(false) && !node->contentType()->mediaType().isEmpty() &&
0186             !node->contentType()->subType().isEmpty()) {
0187         mediaType = node->contentType()->mediaType();
0188         subType = node->contentType()->subType();
0189     }
0190     if (select(node)) {
0191         return node;
0192     }
0193     for (const auto c: node->contents()) {
0194         if (const auto n = find(c, select)) {
0195             return n;
0196         }
0197     }
0198     return nullptr;
0199 }
0200 
0201 
0202 KMime::Content *ObjectTreeParser::find(const std::function<bool(KMime::Content *)> &select)
0203 {
0204     return ::find(mTopLevelContent, select);
0205 }
0206 
0207 QVector<MessagePartPtr> ObjectTreeParser::collectContentParts()
0208 {
0209     return collectContentParts(mParsedPart);
0210 }
0211 
0212 QVector<MessagePart::Ptr> ObjectTreeParser::collectContentParts(MessagePart::Ptr start)
0213 {
0214     return ::collect(start,
0215         [start] (const MessagePartPtr &part) {
0216             //Ignore the top-level
0217             if (start.data() == part.data()) {
0218                 return true;
0219             }
0220             if (auto e = part.dynamicCast<MimeTreeParser::EncapsulatedRfc822MessagePart>()) {
0221                 return false;
0222             }
0223             return true;
0224         },
0225         [start] (const MessagePartPtr &part) {
0226             if (dynamic_cast<MimeTreeParser::AttachmentMessagePart*>(part.data())) {
0227                 return false;
0228             } else if (const auto text = dynamic_cast<MimeTreeParser::TextMessagePart*>(part.data())) {
0229                 auto enc = dynamic_cast<MimeTreeParser::EncryptedMessagePart*>(text->parentPart());
0230                 if (enc && enc->error()) {
0231                     return false;
0232                 }
0233                 return true;
0234             } else if (dynamic_cast<MimeTreeParser::AlternativeMessagePart*>(part.data())) {
0235                 return true;
0236             } else if (dynamic_cast<MimeTreeParser::HtmlMessagePart*>(part.data())) {
0237                 //Don't if we have an alternative part as parent
0238                 return true;
0239             } else if (dynamic_cast<MimeTreeParser::EncapsulatedRfc822MessagePart*>(part.data())) {
0240                 if (start.data() == part.data()) {
0241                     return false;
0242                 }
0243                 return true;
0244             } else if (const auto enc = dynamic_cast<MimeTreeParser::EncryptedMessagePart*>(part.data())) {
0245                 if (enc->error()) {
0246                     return true;
0247                 }
0248                 //If we have a textpart with encrypted and unencrypted subparts we want to return the textpart
0249                 if (dynamic_cast<MimeTreeParser::TextMessagePart*>(enc->parentPart())) {
0250                     return false;
0251                 }
0252             } else if (const auto sig = dynamic_cast<MimeTreeParser::SignedMessagePart*>(part.data())) {
0253                 //Signatures without subparts already contain the text
0254                 return !sig->hasSubParts();
0255             }
0256             return false;
0257         });
0258 }
0259 
0260 QVector<MessagePart::Ptr> ObjectTreeParser::collectAttachmentParts()
0261 {
0262     QVector<MessagePart::Ptr> contentParts = ::collect(mParsedPart,
0263         [] (const MessagePartPtr &) {
0264             return true;
0265         },
0266         [] (const MessagePartPtr &part) {
0267             return part->isAttachment();
0268         });
0269     return contentParts;
0270 }
0271 
0272 void ObjectTreeParser::decryptParts()
0273 {
0274     decryptAndVerify();
0275 }
0276 
0277 /*
0278  * This naive implementation assumes that there is an encrypted part wrapping a signature.
0279  * For other cases we would have to process both recursively (I think?)
0280  */
0281 void ObjectTreeParser::decryptAndVerify()
0282 {
0283     //We first decrypt
0284     ::collect(mParsedPart,
0285         [] (const MessagePartPtr &) { return true; },
0286         [] (const MessagePartPtr &part) {
0287             if (const auto enc = dynamic_cast<MimeTreeParser::EncryptedMessagePart*>(part.data())) {
0288                 enc->startDecryption();
0289             }
0290             return false;
0291         });
0292     //And then verify the available signatures
0293     ::collect(mParsedPart,
0294         [] (const MessagePartPtr &) { return true; },
0295         [] (const MessagePartPtr &part) {
0296             if (const auto enc = dynamic_cast<MimeTreeParser::SignedMessagePart*>(part.data())) {
0297                 enc->startVerification();
0298             }
0299             return false;
0300         });
0301 }
0302 
0303 void ObjectTreeParser::importCertificates()
0304 {
0305     QVector<MessagePart::Ptr> contentParts = ::collect(mParsedPart,
0306         [] (const MessagePartPtr &) { return true; },
0307         [] (const MessagePartPtr &part) {
0308             if (const auto cert = dynamic_cast<MimeTreeParser::CertMessagePart*>(part.data())) {
0309                 cert->import();
0310             }
0311             return false;
0312         });
0313 }
0314 
0315 
0316 QString ObjectTreeParser::resolveCidLinks(const QString &html)
0317 {
0318     auto text = html;
0319     const auto rx = QRegExp(QLatin1String("(src)\\s*=\\s*(\"|')(cid:[^\"']+)\\2"));
0320     int pos = 0;
0321     while ((pos = rx.indexIn(text, pos)) != -1) {
0322         const auto link = QUrl(rx.cap(3));
0323         pos += rx.matchedLength();
0324         auto cid = link.path();
0325         auto mailMime = const_cast<KMime::Content *>(find([=] (KMime::Content *c) {
0326             if (!c || !c->contentID(false)) {
0327                 return false;
0328             }
0329             return QString::fromLatin1(c->contentID(false)->identifier()) == cid;
0330         }));
0331         if (mailMime) {
0332             const auto ct = mailMime->contentType(false);
0333             if (!ct) {
0334                 qWarning() << "No content type, skipping";
0335                 continue;
0336             }
0337             QMimeDatabase mimeDb;
0338             const auto mimetype = mimeDb.mimeTypeForName(QString::fromLatin1(ct->mimeType())).name();
0339             if (mimetype.startsWith(QLatin1String("image/"))) {
0340                 //We reencode to base64 below.
0341                 const auto data = mailMime->decodedContent();
0342                 if (data.isEmpty()) {
0343                     qWarning() << "Attachment is empty.";
0344                     continue;
0345                 }
0346                 text.replace(rx.cap(0), QString::fromLatin1("src=\"data:%1;base64,%2\"").arg(mimetype, QString::fromLatin1(data.toBase64())));
0347             }
0348         } else {
0349             qWarning() << "Failed to find referenced attachment: " << cid;
0350         }
0351     }
0352     return text;
0353 }
0354 
0355 //-----------------------------------------------------------------------------
0356 
0357 void ObjectTreeParser::parseObjectTree(const QByteArray &mimeMessage)
0358 {
0359     const auto mailData = KMime::CRLFtoLF(mimeMessage);
0360     mMsg = KMime::Message::Ptr(new KMime::Message);
0361     mMsg->setContent(mailData);
0362     mMsg->parse();
0363     //We avoid using mMsg->contentType()->charset(), because that will just return kmime's defaultCharset(), ISO-8859-1
0364     const auto charset = mMsg->contentType()->parameter(QStringLiteral("charset")).toLatin1();
0365     if (charset.isEmpty()) {
0366         mMsg->contentType()->setCharset("us-ascii");
0367     }
0368     parseObjectTree(mMsg.data());
0369 }
0370 
0371 void ObjectTreeParser::parseObjectTree(KMime::Content *node)
0372 {
0373     mTopLevelContent = node;
0374     mParsedPart = parseObjectTreeInternal(node, false);
0375 }
0376 
0377 MessagePartPtr ObjectTreeParser::parsedPart() const
0378 {
0379     return mParsedPart;
0380 }
0381 
0382 /*
0383  * This will lookup suitable formatters based on the type,
0384  * and let them generate a list of parts.
0385  * If the formatter generated a list of parts, then those are taken, otherwise we move on to the next match.
0386  */
0387 QVector<MessagePartPtr> ObjectTreeParser::processType(KMime::Content *node, const QByteArray &mediaType, const QByteArray &subType)
0388 {
0389     static MimeTreeParser::BodyPartFormatterBaseFactory factory;
0390     const auto sub = factory.subtypeRegistry(mediaType.constData());
0391     auto range =  sub.equal_range(subType.constData());
0392     for (auto it = range.first; it != range.second; ++it) {
0393         const auto formatter = (*it).second;
0394         if (!formatter) {
0395             continue;
0396         }
0397         const auto list = formatter->processList(this, node);
0398         if (!list.isEmpty()) {
0399             return list;
0400         }
0401     }
0402     return {};
0403 }
0404 
0405 MessagePart::Ptr ObjectTreeParser::parseObjectTreeInternal(KMime::Content *node, bool onlyOneMimePart)
0406 {
0407     if (!node) {
0408         return MessagePart::Ptr();
0409     }
0410 
0411     auto parsedPart = MessagePart::Ptr(new MessagePartList(this, node));
0412     parsedPart->setIsRoot(node->isTopLevel());
0413     const auto contents = node->parent() ? node->parent()->contents() : KMime::Content::List{node};
0414     for (int i = contents.indexOf(node); i < contents.size(); ++i) {
0415         node = contents.at(i);
0416 
0417         QByteArray mediaType("text");
0418         QByteArray subType("plain");
0419         if (node->contentType(false) && !node->contentType()->mediaType().isEmpty() &&
0420                 !node->contentType()->subType().isEmpty()) {
0421             mediaType = node->contentType()->mediaType();
0422             subType = node->contentType()->subType();
0423         }
0424 
0425         auto mp = [&] {
0426             //Try the specific type handler
0427             {
0428                 auto list = processType(node, mediaType, subType);
0429                 if (!list.isEmpty()) {
0430                     return list;
0431                 }
0432             }
0433             //Fallback to the generic handler
0434             {
0435                 auto list = processType(node, mediaType, "*");
0436                 if (!list.isEmpty()) {
0437                     return list;
0438                 }
0439             }
0440             //Fallback to the default handler
0441             return defaultHandling(node);
0442         }();
0443 
0444         for (const auto &p : mp) {
0445             parsedPart->appendSubPart(p);
0446         }
0447 
0448         if (onlyOneMimePart) {
0449             break;
0450         }
0451     }
0452 
0453     return parsedPart;
0454 }
0455 
0456 QVector<MessagePart::Ptr> ObjectTreeParser::defaultHandling(KMime::Content *node)
0457 {
0458     if (node->contentType()->mimeType() == QByteArrayLiteral("application/octet-stream") &&
0459             (node->contentType()->name().endsWith(QLatin1String("p7m")) ||
0460              node->contentType()->name().endsWith(QLatin1String("p7s")) ||
0461              node->contentType()->name().endsWith(QLatin1String("p7c"))
0462             )) {
0463         auto list = processType(node, "application", "pkcs7-mime");
0464         if (!list.isEmpty()) {
0465             return list;
0466         }
0467     }
0468 
0469     return {AttachmentMessagePart::Ptr(new AttachmentMessagePart(this, node))};
0470 }
0471 
0472 
0473 static QTextCodec *getLocalCodec()
0474 {
0475     auto codec = QTextCodec::codecForLocale();
0476 
0477     // In the case of Japan. Japanese locale name is "eucjp" but
0478     // The Japanese mail systems normally used "iso-2022-jp" of locale name.
0479     // We want to change locale name from eucjp to iso-2022-jp at KMail only.
0480 
0481     // (Introduction to i18n, 6.6 Limit of Locale technology):
0482     // EUC-JP is the de-facto standard for UNIX systems, ISO 2022-JP
0483     // is the standard for Internet, and Shift-JIS is the encoding
0484     // for Windows and Macintosh.
0485     if (codec) {
0486         const QByteArray codecNameLower = codec->name().toLower();
0487         if (codecNameLower == "eucjp"
0488 #if defined Q_OS_WIN || defined Q_OS_MACX
0489                 || codecNameLower == "shift-jis" // OK?
0490 #endif
0491            ) {
0492             codec = QTextCodec::codecForName("jis7");
0493             // QTextCodec *cdc = QTextCodec::codecForName("jis7");
0494             // QTextCodec::setCodecForLocale(cdc);
0495             // KLocale::global()->setEncoding(cdc->mibEnum());
0496         }
0497     }
0498     return codec;
0499 }
0500 
0501 const QTextCodec *ObjectTreeParser::codecFor(KMime::Content *node) const
0502 {
0503     static auto localCodec = getLocalCodec();
0504     if (!node) {
0505         return localCodec;
0506     }
0507 
0508     QByteArray charset = node->contentType()->charset().toLower();
0509 
0510     // utf-8 is a superset of us-ascii, so we don't loose anything, if we it insead
0511     // utf-8 is nowadays that widely, that it is a good guess to use it to fix issus with broken clients.
0512     if (charset == "us-ascii") {
0513         charset = "utf-8";
0514     }
0515     if (!charset.isEmpty()) {
0516         if (auto c = KCharsets::charsets()->codecForName(QLatin1String(charset))) {
0517             return c;
0518         };
0519     }
0520     // no charset means us-ascii (RFC 2045), so using local encoding should
0521     // be okay
0522     return localCodec;
0523 }