File indexing completed on 2025-01-26 04:58:14
0001 /* 0002 objecttreeparser.cpp 0003 0004 This file is part of KMail, the KDE mail client. 0005 Copyright (c) 2003 Marc Mutz <mutz@kde.org> 0006 Copyright (C) 2002-2004 Klarälvdalens Datakonsult AB, a KDAB Group company, info@kdab.net 0007 Copyright (c) 2009 Andras Mantia <andras@kdab.net> 0008 Copyright (c) 2015 Sandro Knauß <sknauss@kde.org> 0009 Copyright (c) 2017 Christian Mollekopf <mollekopf@kolabsystems.com> 0010 0011 KMail is free software; you can redistribute it and/or modify it 0012 under the terms of the GNU General Public License, version 2, as 0013 published by the Free Software Foundation. 0014 0015 KMail is distributed in the hope that it will be useful, but 0016 WITHOUT ANY WARRANTY; without even the implied warranty of 0017 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 0018 General Public License for more details. 0019 0020 You should have received a copy of the GNU General Public License 0021 along with this program; if not, write to the Free Software 0022 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 0023 0024 In addition, as a special exception, the copyright holders give 0025 permission to link the code of this program with any edition of 0026 the Qt library by Trolltech AS, Norway (or with modified versions 0027 of Qt that use the same license as Qt), and distribute linked 0028 combinations including the two. You must obey the GNU General 0029 Public License in all respects for all of the code used other than 0030 Qt. If you modify this file, you may extend this exception to 0031 your version of the file, but you are not obligated to do so. If 0032 you do not wish to do so, delete this exception statement from 0033 your version. 0034 */ 0035 0036 // MessageViewer includes 0037 0038 #include "objecttreeparser.h" 0039 0040 #include "bodypartformatterbasefactory.h" 0041 #include "messagepart.h" 0042 0043 #include "mimetreeparser_debug.h" 0044 0045 #include "utils.h" 0046 #include "bodypartformatter.h" 0047 0048 #include <KMime/Message> 0049 0050 #include <QByteArray> 0051 #include <QUrl> 0052 #include <QMimeDatabase> 0053 #include <QTextStream> 0054 #include <KCharsets> 0055 #include <QTextCodec> 0056 0057 using namespace MimeTreeParser; 0058 0059 /* 0060 * Collect message parts bottom up. 0061 * Filter to avoid evaluating a subtree. 0062 * Select parts to include it in the result set. Selecting a part in a branch will keep any parent parts from being selected. 0063 */ 0064 static QVector<MessagePart::Ptr> collect(MessagePart::Ptr start, const std::function<bool(const MessagePartPtr &)> &evaluateSubtree, const std::function<bool(const MessagePartPtr &)> &select) 0065 { 0066 MessagePartPtr ptr = start.dynamicCast<MessagePart>(); 0067 Q_ASSERT(ptr); 0068 QVector<MessagePart::Ptr> list; 0069 if (evaluateSubtree(ptr)) { 0070 for (const auto &p: ptr->subParts()) { 0071 list << ::collect(p, evaluateSubtree, select); 0072 } 0073 } 0074 0075 //Don't consider this part if we already selected a subpart 0076 if (list.isEmpty()) { 0077 if (select(ptr)) { 0078 list << start; 0079 } 0080 } 0081 return list; 0082 } 0083 0084 QString ObjectTreeParser::plainTextContent() 0085 { 0086 QString content; 0087 if (mParsedPart) { 0088 auto plainParts = ::collect(mParsedPart, 0089 [] (const MessagePartPtr &) { 0090 return true; 0091 }, 0092 [] (const MessagePartPtr &part) { 0093 if (part->isAttachment()) { 0094 return false; 0095 } 0096 if (dynamic_cast<MimeTreeParser::TextMessagePart*>(part.data())) { 0097 return true; 0098 } 0099 if (dynamic_cast<MimeTreeParser::AlternativeMessagePart*>(part.data())) { 0100 return true; 0101 } 0102 return false; 0103 }); 0104 for (const auto &part : plainParts) { 0105 content += part->text(); 0106 } 0107 } 0108 return content; 0109 } 0110 0111 QString ObjectTreeParser::htmlContent() 0112 { 0113 QString content; 0114 if (mParsedPart) { 0115 QVector<MessagePart::Ptr> contentParts = ::collect(mParsedPart, 0116 [] (const MessagePartPtr &) { 0117 return true; 0118 }, 0119 [] (const MessagePartPtr &part) { 0120 if (dynamic_cast<MimeTreeParser::HtmlMessagePart*>(part.data())) { 0121 return true; 0122 } 0123 if (dynamic_cast<MimeTreeParser::AlternativeMessagePart*>(part.data())) { 0124 return true; 0125 } 0126 return false; 0127 }); 0128 for (const auto &part : contentParts) { 0129 if (auto p = dynamic_cast<MimeTreeParser::AlternativeMessagePart*>(part.data())) { 0130 content += p->htmlContent(); 0131 } else { 0132 content += part->text(); 0133 } 0134 } 0135 } 0136 return content; 0137 } 0138 0139 static void print(QTextStream &s, KMime::Content *node, const QString prefix = {}) 0140 { 0141 QByteArray mediaType("text"); 0142 QByteArray subType("plain"); 0143 if (node->contentType(false) && !node->contentType()->mediaType().isEmpty() && 0144 !node->contentType()->subType().isEmpty()) { 0145 mediaType = node->contentType()->mediaType(); 0146 subType = node->contentType()->subType(); 0147 } 0148 s << prefix << "! " << mediaType << subType << " isAttachment: " << KMime::isAttachment(node) << "\n"; 0149 for (const auto c: node->contents()) { 0150 print(s, c, prefix + QLatin1String(" ")); 0151 } 0152 } 0153 0154 static void print(QTextStream &s, const MessagePart &messagePart, const QByteArray pre = {}) 0155 { 0156 s << pre << "# " << messagePart.metaObject()->className() << " isAttachment: " << messagePart.isAttachment() << "\n"; 0157 for (const auto &p: messagePart.subParts()) { 0158 print(s, *p, pre + " "); 0159 } 0160 } 0161 0162 QString ObjectTreeParser::structureAsString() const 0163 { 0164 QString string; 0165 QTextStream s{&string}; 0166 0167 if (mTopLevelContent) { 0168 ::print(s, mTopLevelContent); 0169 } 0170 if (mParsedPart) { 0171 ::print(s, *mParsedPart); 0172 } 0173 return string; 0174 } 0175 0176 void ObjectTreeParser::print() 0177 { 0178 qInfo().noquote() << structureAsString(); 0179 } 0180 0181 static KMime::Content *find(KMime::Content *node, const std::function<bool(KMime::Content *)> &select) 0182 { 0183 QByteArray mediaType("text"); 0184 QByteArray subType("plain"); 0185 if (node->contentType(false) && !node->contentType()->mediaType().isEmpty() && 0186 !node->contentType()->subType().isEmpty()) { 0187 mediaType = node->contentType()->mediaType(); 0188 subType = node->contentType()->subType(); 0189 } 0190 if (select(node)) { 0191 return node; 0192 } 0193 for (const auto c: node->contents()) { 0194 if (const auto n = find(c, select)) { 0195 return n; 0196 } 0197 } 0198 return nullptr; 0199 } 0200 0201 0202 KMime::Content *ObjectTreeParser::find(const std::function<bool(KMime::Content *)> &select) 0203 { 0204 return ::find(mTopLevelContent, select); 0205 } 0206 0207 QVector<MessagePartPtr> ObjectTreeParser::collectContentParts() 0208 { 0209 return collectContentParts(mParsedPart); 0210 } 0211 0212 QVector<MessagePart::Ptr> ObjectTreeParser::collectContentParts(MessagePart::Ptr start) 0213 { 0214 return ::collect(start, 0215 [start] (const MessagePartPtr &part) { 0216 //Ignore the top-level 0217 if (start.data() == part.data()) { 0218 return true; 0219 } 0220 if (auto e = part.dynamicCast<MimeTreeParser::EncapsulatedRfc822MessagePart>()) { 0221 return false; 0222 } 0223 return true; 0224 }, 0225 [start] (const MessagePartPtr &part) { 0226 if (dynamic_cast<MimeTreeParser::AttachmentMessagePart*>(part.data())) { 0227 return false; 0228 } else if (const auto text = dynamic_cast<MimeTreeParser::TextMessagePart*>(part.data())) { 0229 auto enc = dynamic_cast<MimeTreeParser::EncryptedMessagePart*>(text->parentPart()); 0230 if (enc && enc->error()) { 0231 return false; 0232 } 0233 return true; 0234 } else if (dynamic_cast<MimeTreeParser::AlternativeMessagePart*>(part.data())) { 0235 return true; 0236 } else if (dynamic_cast<MimeTreeParser::HtmlMessagePart*>(part.data())) { 0237 //Don't if we have an alternative part as parent 0238 return true; 0239 } else if (dynamic_cast<MimeTreeParser::EncapsulatedRfc822MessagePart*>(part.data())) { 0240 if (start.data() == part.data()) { 0241 return false; 0242 } 0243 return true; 0244 } else if (const auto enc = dynamic_cast<MimeTreeParser::EncryptedMessagePart*>(part.data())) { 0245 if (enc->error()) { 0246 return true; 0247 } 0248 //If we have a textpart with encrypted and unencrypted subparts we want to return the textpart 0249 if (dynamic_cast<MimeTreeParser::TextMessagePart*>(enc->parentPart())) { 0250 return false; 0251 } 0252 } else if (const auto sig = dynamic_cast<MimeTreeParser::SignedMessagePart*>(part.data())) { 0253 //Signatures without subparts already contain the text 0254 return !sig->hasSubParts(); 0255 } 0256 return false; 0257 }); 0258 } 0259 0260 QVector<MessagePart::Ptr> ObjectTreeParser::collectAttachmentParts() 0261 { 0262 QVector<MessagePart::Ptr> contentParts = ::collect(mParsedPart, 0263 [] (const MessagePartPtr &) { 0264 return true; 0265 }, 0266 [] (const MessagePartPtr &part) { 0267 return part->isAttachment(); 0268 }); 0269 return contentParts; 0270 } 0271 0272 void ObjectTreeParser::decryptParts() 0273 { 0274 decryptAndVerify(); 0275 } 0276 0277 /* 0278 * This naive implementation assumes that there is an encrypted part wrapping a signature. 0279 * For other cases we would have to process both recursively (I think?) 0280 */ 0281 void ObjectTreeParser::decryptAndVerify() 0282 { 0283 //We first decrypt 0284 ::collect(mParsedPart, 0285 [] (const MessagePartPtr &) { return true; }, 0286 [] (const MessagePartPtr &part) { 0287 if (const auto enc = dynamic_cast<MimeTreeParser::EncryptedMessagePart*>(part.data())) { 0288 enc->startDecryption(); 0289 } 0290 return false; 0291 }); 0292 //And then verify the available signatures 0293 ::collect(mParsedPart, 0294 [] (const MessagePartPtr &) { return true; }, 0295 [] (const MessagePartPtr &part) { 0296 if (const auto enc = dynamic_cast<MimeTreeParser::SignedMessagePart*>(part.data())) { 0297 enc->startVerification(); 0298 } 0299 return false; 0300 }); 0301 } 0302 0303 void ObjectTreeParser::importCertificates() 0304 { 0305 QVector<MessagePart::Ptr> contentParts = ::collect(mParsedPart, 0306 [] (const MessagePartPtr &) { return true; }, 0307 [] (const MessagePartPtr &part) { 0308 if (const auto cert = dynamic_cast<MimeTreeParser::CertMessagePart*>(part.data())) { 0309 cert->import(); 0310 } 0311 return false; 0312 }); 0313 } 0314 0315 0316 QString ObjectTreeParser::resolveCidLinks(const QString &html) 0317 { 0318 auto text = html; 0319 const auto rx = QRegExp(QLatin1String("(src)\\s*=\\s*(\"|')(cid:[^\"']+)\\2")); 0320 int pos = 0; 0321 while ((pos = rx.indexIn(text, pos)) != -1) { 0322 const auto link = QUrl(rx.cap(3)); 0323 pos += rx.matchedLength(); 0324 auto cid = link.path(); 0325 auto mailMime = const_cast<KMime::Content *>(find([=] (KMime::Content *c) { 0326 if (!c || !c->contentID(false)) { 0327 return false; 0328 } 0329 return QString::fromLatin1(c->contentID(false)->identifier()) == cid; 0330 })); 0331 if (mailMime) { 0332 const auto ct = mailMime->contentType(false); 0333 if (!ct) { 0334 qWarning() << "No content type, skipping"; 0335 continue; 0336 } 0337 QMimeDatabase mimeDb; 0338 const auto mimetype = mimeDb.mimeTypeForName(QString::fromLatin1(ct->mimeType())).name(); 0339 if (mimetype.startsWith(QLatin1String("image/"))) { 0340 //We reencode to base64 below. 0341 const auto data = mailMime->decodedContent(); 0342 if (data.isEmpty()) { 0343 qWarning() << "Attachment is empty."; 0344 continue; 0345 } 0346 text.replace(rx.cap(0), QString::fromLatin1("src=\"data:%1;base64,%2\"").arg(mimetype, QString::fromLatin1(data.toBase64()))); 0347 } 0348 } else { 0349 qWarning() << "Failed to find referenced attachment: " << cid; 0350 } 0351 } 0352 return text; 0353 } 0354 0355 //----------------------------------------------------------------------------- 0356 0357 void ObjectTreeParser::parseObjectTree(const QByteArray &mimeMessage) 0358 { 0359 const auto mailData = KMime::CRLFtoLF(mimeMessage); 0360 mMsg = KMime::Message::Ptr(new KMime::Message); 0361 mMsg->setContent(mailData); 0362 mMsg->parse(); 0363 //We avoid using mMsg->contentType()->charset(), because that will just return kmime's defaultCharset(), ISO-8859-1 0364 const auto charset = mMsg->contentType()->parameter(QStringLiteral("charset")).toLatin1(); 0365 if (charset.isEmpty()) { 0366 mMsg->contentType()->setCharset("us-ascii"); 0367 } 0368 parseObjectTree(mMsg.data()); 0369 } 0370 0371 void ObjectTreeParser::parseObjectTree(KMime::Content *node) 0372 { 0373 mTopLevelContent = node; 0374 mParsedPart = parseObjectTreeInternal(node, false); 0375 } 0376 0377 MessagePartPtr ObjectTreeParser::parsedPart() const 0378 { 0379 return mParsedPart; 0380 } 0381 0382 /* 0383 * This will lookup suitable formatters based on the type, 0384 * and let them generate a list of parts. 0385 * If the formatter generated a list of parts, then those are taken, otherwise we move on to the next match. 0386 */ 0387 QVector<MessagePartPtr> ObjectTreeParser::processType(KMime::Content *node, const QByteArray &mediaType, const QByteArray &subType) 0388 { 0389 static MimeTreeParser::BodyPartFormatterBaseFactory factory; 0390 const auto sub = factory.subtypeRegistry(mediaType.constData()); 0391 auto range = sub.equal_range(subType.constData()); 0392 for (auto it = range.first; it != range.second; ++it) { 0393 const auto formatter = (*it).second; 0394 if (!formatter) { 0395 continue; 0396 } 0397 const auto list = formatter->processList(this, node); 0398 if (!list.isEmpty()) { 0399 return list; 0400 } 0401 } 0402 return {}; 0403 } 0404 0405 MessagePart::Ptr ObjectTreeParser::parseObjectTreeInternal(KMime::Content *node, bool onlyOneMimePart) 0406 { 0407 if (!node) { 0408 return MessagePart::Ptr(); 0409 } 0410 0411 auto parsedPart = MessagePart::Ptr(new MessagePartList(this, node)); 0412 parsedPart->setIsRoot(node->isTopLevel()); 0413 const auto contents = node->parent() ? node->parent()->contents() : KMime::Content::List{node}; 0414 for (int i = contents.indexOf(node); i < contents.size(); ++i) { 0415 node = contents.at(i); 0416 0417 QByteArray mediaType("text"); 0418 QByteArray subType("plain"); 0419 if (node->contentType(false) && !node->contentType()->mediaType().isEmpty() && 0420 !node->contentType()->subType().isEmpty()) { 0421 mediaType = node->contentType()->mediaType(); 0422 subType = node->contentType()->subType(); 0423 } 0424 0425 auto mp = [&] { 0426 //Try the specific type handler 0427 { 0428 auto list = processType(node, mediaType, subType); 0429 if (!list.isEmpty()) { 0430 return list; 0431 } 0432 } 0433 //Fallback to the generic handler 0434 { 0435 auto list = processType(node, mediaType, "*"); 0436 if (!list.isEmpty()) { 0437 return list; 0438 } 0439 } 0440 //Fallback to the default handler 0441 return defaultHandling(node); 0442 }(); 0443 0444 for (const auto &p : mp) { 0445 parsedPart->appendSubPart(p); 0446 } 0447 0448 if (onlyOneMimePart) { 0449 break; 0450 } 0451 } 0452 0453 return parsedPart; 0454 } 0455 0456 QVector<MessagePart::Ptr> ObjectTreeParser::defaultHandling(KMime::Content *node) 0457 { 0458 if (node->contentType()->mimeType() == QByteArrayLiteral("application/octet-stream") && 0459 (node->contentType()->name().endsWith(QLatin1String("p7m")) || 0460 node->contentType()->name().endsWith(QLatin1String("p7s")) || 0461 node->contentType()->name().endsWith(QLatin1String("p7c")) 0462 )) { 0463 auto list = processType(node, "application", "pkcs7-mime"); 0464 if (!list.isEmpty()) { 0465 return list; 0466 } 0467 } 0468 0469 return {AttachmentMessagePart::Ptr(new AttachmentMessagePart(this, node))}; 0470 } 0471 0472 0473 static QTextCodec *getLocalCodec() 0474 { 0475 auto codec = QTextCodec::codecForLocale(); 0476 0477 // In the case of Japan. Japanese locale name is "eucjp" but 0478 // The Japanese mail systems normally used "iso-2022-jp" of locale name. 0479 // We want to change locale name from eucjp to iso-2022-jp at KMail only. 0480 0481 // (Introduction to i18n, 6.6 Limit of Locale technology): 0482 // EUC-JP is the de-facto standard for UNIX systems, ISO 2022-JP 0483 // is the standard for Internet, and Shift-JIS is the encoding 0484 // for Windows and Macintosh. 0485 if (codec) { 0486 const QByteArray codecNameLower = codec->name().toLower(); 0487 if (codecNameLower == "eucjp" 0488 #if defined Q_OS_WIN || defined Q_OS_MACX 0489 || codecNameLower == "shift-jis" // OK? 0490 #endif 0491 ) { 0492 codec = QTextCodec::codecForName("jis7"); 0493 // QTextCodec *cdc = QTextCodec::codecForName("jis7"); 0494 // QTextCodec::setCodecForLocale(cdc); 0495 // KLocale::global()->setEncoding(cdc->mibEnum()); 0496 } 0497 } 0498 return codec; 0499 } 0500 0501 const QTextCodec *ObjectTreeParser::codecFor(KMime::Content *node) const 0502 { 0503 static auto localCodec = getLocalCodec(); 0504 if (!node) { 0505 return localCodec; 0506 } 0507 0508 QByteArray charset = node->contentType()->charset().toLower(); 0509 0510 // utf-8 is a superset of us-ascii, so we don't loose anything, if we it insead 0511 // utf-8 is nowadays that widely, that it is a good guess to use it to fix issus with broken clients. 0512 if (charset == "us-ascii") { 0513 charset = "utf-8"; 0514 } 0515 if (!charset.isEmpty()) { 0516 if (auto c = KCharsets::charsets()->codecForName(QLatin1String(charset))) { 0517 return c; 0518 }; 0519 } 0520 // no charset means us-ascii (RFC 2045), so using local encoding should 0521 // be okay 0522 return localCodec; 0523 }