File indexing completed on 2024-06-16 05:00:16

0001 /*
0002     objecttreeparser.h
0003 
0004     This file is part of KMail, the KDE mail client.
0005     SPDX-FileCopyrightText: 2003 Marc Mutz <mutz@kde.org>
0006     SPDX-FileCopyrightText: 2002-2003, 2009 Klarälvdalens Datakonsult AB, a KDAB Group company, info@kdab.net
0007     SPDX-FileCopyrightText: 2009 Andras Mantia <andras@kdab.net>
0008 
0009     SPDX-License-Identifier: GPL-2.0-or-later
0010 */
0011 
0012 #pragma once
0013 
0014 #include "mimetreeparser_export.h"
0015 
0016 #include "mimetreeparser/nodehelper.h"
0017 #include "mimetreeparser/objecttreesource.h"
0018 
0019 #include <gpgme++/verificationresult.h>
0020 
0021 class QString;
0022 
0023 namespace KMime
0024 {
0025 class Content;
0026 }
0027 
0028 namespace MimeTreeParser
0029 {
0030 class PartMetaData;
0031 class ViewerPrivate;
0032 class NodeHelper;
0033 class MessagePart;
0034 class MimeMessagePart;
0035 
0036 using MessagePartPtr = QSharedPointer<MessagePart>;
0037 using MimeMessagePartPtr = QSharedPointer<MimeMessagePart>;
0038 /**
0039  * @brief The ProcessResult class
0040  */
0041 class MIMETREEPARSER_EXPORT ProcessResult
0042 {
0043 public:
0044     explicit ProcessResult(NodeHelper *nodeHelper,
0045                            KMMsgSignatureState inlineSignatureState = KMMsgNotSigned,
0046                            KMMsgEncryptionState inlineEncryptionState = KMMsgNotEncrypted,
0047                            bool neverDisplayInline = false)
0048         : mInlineSignatureState(inlineSignatureState)
0049         , mInlineEncryptionState(inlineEncryptionState)
0050         , mNeverDisplayInline(neverDisplayInline)
0051         , mNodeHelper(nodeHelper)
0052     {
0053     }
0054 
0055     [[nodiscard]] KMMsgSignatureState inlineSignatureState() const;
0056     void setInlineSignatureState(KMMsgSignatureState state);
0057 
0058     [[nodiscard]] KMMsgEncryptionState inlineEncryptionState() const;
0059     void setInlineEncryptionState(KMMsgEncryptionState state);
0060 
0061     [[nodiscard]] bool neverDisplayInline() const;
0062     void setNeverDisplayInline(bool display);
0063 
0064     void adjustCryptoStatesOfNode(const KMime::Content *node) const;
0065 
0066 private:
0067     KMMsgSignatureState mInlineSignatureState;
0068     KMMsgEncryptionState mInlineEncryptionState;
0069     bool mNeverDisplayInline : 1;
0070     NodeHelper *mNodeHelper;
0071 };
0072 
0073 /**
0074 \brief Parses messages and generates HTML display code out of them
0075 
0076 \par Introduction
0077 
0078 First, have a look at the documentation in Mainpage.dox and at the documentation of ViewerPrivate
0079 to understand the broader picture.
0080 
0081 Just a note on the terminology: 'Node' refers to a MIME part here, which in KMime is a
0082 KMime::Content.
0083 
0084 \par Basics
0085 
0086 The ObjectTreeParser basically has two modes: Generating the HTML code for the Viewer, or only
0087 extracting the plainTextContent() for situations where only the message text is needed, for example
0088 when inline forwarding a message. The mode depends on the Interface::ObjectTreeSource passed to the
0089 constructor: If Interface::ObjectTreeSource::htmlWriter() is not 0, then the HTML code generation mode is
0090 used.
0091 
0092 Basically, all the ObjectTreeParser does is going through the tree of MIME parts and operating on
0093 those nodes. Operating here means creating the HTML code for the node or extracting the textual
0094 content from it. This process is started with parseObjectTree(), where we loop over the subnodes
0095 of the current root node. For each of those subnodes, we try to find a BodyPartFormatter that can
0096 handle the type of the node. This can either be an internal function, such as
0097 processMultiPartAlternativeSubtype() or processTextHtmlSubtype(), or it can be an external plugin.
0098 More on external plugins later. When no matching formatter is found, defaultHandling() is called
0099 for that node.
0100 
0101 \par Multipart Nodes
0102 
0103 Those nodes that are of type multipart have subnodes. If one of those children needs to be
0104 processed normally, the processMultipartXXX() functions call stdChildHandling() for the node that
0105 should be handled normally. stdChildHandling() creates its own ObjectTreeParser, which is a clone
0106 of the current ObjectTreeParser, and processes the node. stdChildHandling() is not called for all
0107 children of the multipart node, for example processMultiPartAlternativeSubtype() only calls it on
0108 one of the children, as the other one doesn't need to be displayed. Similarly,
0109 processMultiPartSignedSubtype() doesn't call stdChildHandling() for the signature node, only for the
0110 signed node.
0111 
0112 \par Processed and Unprocessed Nodes
0113 
0114 When a BodyPartFormatter has finished processing a node, it is processed. Nodes are set to being
0115 not processed at the beginning of parseObjectTree(). The processed state of a node is saved in a
0116 list in NodeHelper, see NodeHelper::setNodeProcessed(), NodeHelper::nodeProcessed() and the other
0117 related helper functions.
0118 
0119 It is the responsibility of the BodyPartFormatter to correctly call setNodeProcessed() and the
0120 related functions. This is important so that processing the same node twice can be prevented. The
0121 check that prevents duplicate processing is in parseObjectTree().
0122 
0123 An example where duplicate processing would happen if we didn't check for it is in stdChildHandling(),
0124 which is for example called from processMultiPartAlternativeSubtype(). Let's say the setting is to
0125 prefer HTML over plain text. In this case, processMultiPartAlternativeSubtype() would call
0126 stdChildHandling() on the HTML node, which would create a new ObjectTreeParser and call
0127 parseObjectTree() on it. parseObjectTree() processes the node and all its siblings, and one of the
0128 siblings is the plain text node, which shouldn't be processed! Therefore
0129 processMultiPartAlternativeSubtype() sets the plain text node as been processed already.
0130 
0131 \par Plain Text Output
0132 
0133 Various nodes have plain text that should be displayed. This plain text is usually processed though
0134 writeBodyString() first. That method checks if the provided text is an inline PGP text and decrypts
0135 it if necessary. It also pushes the text through quotedHTML(), which does a number of things like
0136 coloring quoted lines or detecting links and creating real link tags for them.
0137 
0138 \par Modifying the Message
0139 
0140 The ObjectTreeParser does not only parse its message, in some circumstances it also modifies it
0141 before displaying. This is for example the case when displaying a decrypted message: The original
0142 message only contains a binary blob of crypto data, and processMultiPartEncryptedSubtype() decrypts
0143 that blob. After decryption, the current node is replaced with the decrypted node, which happens
0144 in insertAndParseNewChildNode().
0145 
0146 \par Crypto Operations
0147 
0148 For signature and decryption handling, there are functions which help with generating the HTML code
0149 for the signature header and footer. These are writeDeferredDecryptionBlock(), writeSigstatFooter()
0150 and writeSigstatHeader(). As the name writeDeferredDecryptionBlock() suggests, a setting can cause
0151 the message to not be decrypted unless the user clicks a link. Whether the message should be
0152 decrypted or not can be controlled by Interface::ObjectTreeSource::decryptMessage(). When the user clicks the
0153 decryption link, the URLHandler for 'kmail:' URLs sets that variable to true and triggers an update
0154 of the Viewer, which will cause parseObjectTree() to be called again.
0155 
0156 \par Async Crypto Operations
0157 
0158 The above case describes decryption the message in place. However, decryption and also verifying of
0159 the signature can take a long time, so synchronous decryption and verifying would cause the Viewer to
0160 block. Therefore it is possible to run these operations in async mode, see allowAsync().
0161 In the first run of the async mode, all the ObjectTreeParser does is starting the decrypt or the
0162 verify job, and informing the user that the operation is in progress with
0163 writeDecryptionInProgressBlock() or with writeSigstatHeader(). Then, it creates and associates a
0164 BodyPartMemento with the current node, for example a VerifyDetachedBodyPartMemento. Each node can
0165 have multiple mementos associated with it, which are differeniated by name.
0166 
0167 NodeHelper::setBodyPartMemento() and NodeHelper::bodyPartMemento() provide means to store and
0168 retrieve these mementos. A memento is basically a thin wrapper around the crypto job, it stores the
0169 job pointer, the job input data and the job result. Mementos can be used for any async situation,
0170 not just for crypto jobs, but I'll describe crypto jobs here.
0171 
0172 So in the first run of decrypting or verifying a message, the BodyPartFormatter only starts the
0173 crypto job, creates the BodyPartMemento and writes the HTML code that tells the user that the
0174 operation is in progress. parseObjectTree() thus finishes without waiting for anything, and the
0175 message is displayed.
0176 
0177 At some point, the crypto jobs then finish, which will cause slotResult() of the BodyPartMemento
0178 to be called. slotResult() then saves the result to some member variable and calls
0179 BodyPartMemento::notify(), which in the end will trigger an update of the Viewer. That update
0180 will, in ViewerPrivate::parseMsg(), create a new ObjectTreeParser and call parseObjectTree() on it.
0181 This is where the second run begins.
0182 
0183 The functions that deal with decrypting of verifying, like processMultiPartSignedSubtype() or
0184 processMultiPartEncryptedSubtype() will look if they find a BodyPartMemento that is associated with
0185 the current node. Now it finds that memento, since it was created in the first run. It checks if the
0186 memento's job has finished, and if so, the result can be written out (either the decrypted data or
0187 the verified signature).
0188 
0189 When dealing with encrypted nodes, new nodes are created with the decrypted data. It is important to
0190 note that the original MIME tree is never modified, and remains the same as the original one. The method
0191 createAndParseTempNode is called with the newly decrypted data, and it generates a new temporary node to
0192 store the decrypted data. When these nodes are created, it is important to keep track of them as otherwise
0193 some mementos that are added to the newly created temporary nodes will be constantly regenerated. As the
0194 regeneration triggers a viewer update when complete, it results in an infinite refresh loop. The function
0195 NodeHelper::linkAsPermanentDecrypted will create a link between the newly created node and the original parent.
0196 Conversely, the function NodeHelper::attachExtraContent will create a link in the other direction, from the parent
0197 node to the newly created temporary node.
0198 
0199 When generating some mementos for nodes that may be temporary nodes (for example, contact photo mementos), the
0200 function NodeHelper::setBodyPartMementoForPermanentParent is used. This will save the given body part memento for
0201 the closest found permanent parent node, rather than the transient node itself. Then when checking for the existence
0202 of a certain memento in a node, NodeHelper::findPermanentParentBodyPartMemento will check to see if any parent of the
0203 given temporary node is a permanent (encrypted) node that has been used to generate the asked-for node.
0204 
0205 To conclude: For async operations, parseObjectTree() is called twice: The first call starts the
0206 crypto operation and creates the BodyPartMemento, the second calls sees that the BodyPartMemento is
0207 there and can use its result for writing out the HTML.
0208 
0209 \par PartMetaData and ProcessResult
0210 
0211 For crypto operations, the class PartMetaData is used a lot, mainly to pass around info about the
0212 crypto state of a node. A PartMetaData can also be associated with a node by using
0213 NodeHelper::setPartMetaData(). The only user of that however is MessageAnalyzer::processPart() of
0214 the Nepomuk E-Mail Feeder, which also uses the ObjectTreeParser to analyze the message.
0215 
0216 You'll notice that a ProcessResult is passed to each formatter. The formatter is supposed to modify
0217 the ProcessResult to tell the callers something about the state of the nodes that were processed.
0218 One example for its use is to tell the caller about the crypto state of the node.
0219 
0220 \par BodyPartFormatter Plugins
0221 
0222 As mentioned way earlier, BodyPartFormatter can either be plugins or be internal. bodypartformatter.cpp
0223 contains some trickery so that the processXXX() methods of the ObjectTreeParser are called from
0224 a BodyPartFormatter associated with them, see the CREATE_BODY_PART_FORMATTER macro.
0225 
0226 The BodyPartFormatter code is work in progress, it was supposed to be refactored, but that has not
0227 yet happened at the time of writing. Therefore the code can seem a bit chaotic.
0228 
0229 External plugins are loaded with loadPlugins() in bodypartformatterfactory.cpp. External plugins
0230 can only use the classes in the interfaces/ directory, they include BodyPart, BodyPartMemento,
0231 BodyPartFormatterPlugin, BodyPartFormatter, BodyPartURLHandler and URLHandler. Therefore
0232 external plugins have powerful capabilities, which are needed for example in the iCal formatter or
0233 in the vCard formatter.
0234 
0235 \par Special HTML tags
0236 
0237 As also mentioned in the documentation of ViewerPrivate, the ObjectTreeParser writes out special
0238 links that are only understood by the viewer, for example 'kmail:' URLs or 'attachment:' URLs.
0239 Also, some special HTML tags are created, which the Viewer later uses for post-processing. For
0240 example a div with the id 'attachmentInjectionPoint', or a div with the id 'attachmentDiv', which
0241 is used to mark an attachment in the body with a yellow border when the user clicks the attachment
0242 in the header. Finally, parseObjectTree() creates an anchor with the id 'att%1', which is used in
0243 the Viewer to scroll to the attachment.
0244 */
0245 class MIMETREEPARSER_EXPORT ObjectTreeParser
0246 {
0247     /**
0248      * @internal
0249      * Copies the context of @p other, but not it's rawDecryptedBody, plainTextContent or htmlContent.
0250      */
0251     ObjectTreeParser(const ObjectTreeParser &other);
0252 
0253 public:
0254     explicit ObjectTreeParser(Interface::ObjectTreeSource *source, NodeHelper *nodeHelper = nullptr);
0255 
0256     explicit ObjectTreeParser(const ObjectTreeParser *topLevelParser);
0257     virtual ~ObjectTreeParser();
0258 
0259     void setAllowAsync(bool allow);
0260     bool allowAsync() const;
0261 
0262     bool hasPendingAsyncJobs() const;
0263 
0264     /**
0265      * The text of the message, ie. what would appear in the
0266      * composer's text editor if this was edited or replied to.
0267      * This is usually the content of the first text/plain MIME part.
0268      */
0269     QString plainTextContent() const;
0270 
0271     /**
0272      * Similar to plainTextContent(), but returns the HTML source of the first text/html MIME part.
0273      *
0274      * Not to be confused with the HTML code that the message viewer widget displays, that HTML
0275      * is written out by htmlWriter() and a totally different pair of shoes.
0276      */
0277     QString htmlContent() const;
0278 
0279     /**
0280      * The original charset of MIME part the plain text was extracted from.
0281      *
0282      * If there were more than one text/plain MIME parts in the mail, the this is the charset
0283      * of the last MIME part processed.
0284      */
0285     QByteArray plainTextContentCharset() const;
0286     QByteArray htmlContentCharset() const;
0287 
0288     NodeHelper *nodeHelper() const;
0289 
0290     /** Parse beginning at a given node and recursively parsing
0291       the children of that node and it's next sibling. */
0292     void parseObjectTree(KMime::Content *node, bool parseOnlySingleNode = false);
0293     MessagePartPtr parsedPart() const;
0294 
0295 private:
0296     void extractNodeInfos(KMime::Content *curNode, bool isFirstTextPart);
0297     void setPlainTextContent(const QString &plainTextContent);
0298 
0299     /**
0300      * Does the actual work for parseObjectTree. Unlike parseObjectTree(), this does not change the
0301      * top-level content.
0302      */
0303     MessagePartPtr parseObjectTreeInternal(KMime::Content *node, bool mOnlyOneMimePart);
0304     MessagePartPtr processType(KMime::Content *node, MimeTreeParser::ProcessResult &processResult, const QByteArray &mimeType);
0305 
0306 private:
0307     /** ctor helper */
0308     void init();
0309 
0310     QByteArray codecNameFor(KMime::Content *node) const;
0311 
0312 private:
0313     Interface::ObjectTreeSource *mSource;
0314     NodeHelper *mNodeHelper;
0315     QByteArray mPlainTextContentCharset;
0316     QByteArray mHtmlContentCharset;
0317     QString mPlainTextContent;
0318     QString mHtmlContent;
0319     KMime::Content *mTopLevelContent = nullptr;
0320     MessagePartPtr mParsedPart;
0321 
0322     bool mHasPendingAsyncJobs = false;
0323     bool mAllowAsync = false;
0324     // DataUrl Icons cache
0325     QString mCollapseIcon;
0326     QString mExpandIcon;
0327     bool mDeleteNodeHelper;
0328 
0329     friend class PartNodeBodyPart;
0330     friend class MessagePart;
0331     friend class EncryptedMessagePart;
0332     friend class SignedMessagePart;
0333     friend class TextMessagePart;
0334     friend class HtmlMessagePart;
0335     friend class MultiPartSignedBodyPartFormatter;
0336     friend class ApplicationPkcs7MimeBodyPartFormatter;
0337 };
0338 }