File indexing completed on 2022-11-23 12:03:44

0001 /*
0002     kmime_content.h
0003 
0004     KMime, the KDE Internet mail/usenet news message library.
0005     SPDX-FileCopyrightText: 2001 the KMime authors.
0006     See file AUTHORS for details
0007     SPDX-FileCopyrightText: 2006 Volker Krause <vkrause@kde.org>
0008     SPDX-FileCopyrightText: 2009 Constantin Berzan <exit3219@gmail.com>
0009 
0010     SPDX-License-Identifier: LGPL-2.0-or-later
0011 */
0012 /**
0013   @file
0014   This file is part of the API for handling @ref MIME data and
0015   defines the Content class.
0016 
0017   @brief
0018   Defines the Content class.
0019 
0020   @authors the KMime authors (see AUTHORS file),
0021   Volker Krause \<vkrause@kde.org\>
0022 
0023 TODO: possible glossary terms:
0024  content
0025    encoding, transfer type, disposition, description
0026  header
0027  body
0028  attachment
0029  charset
0030  article
0031  string representation
0032  broken-down object representation
0033 */
0034 
0035 #pragma once
0036 
0037 #include "kmime_export.h"
0038 #include "kmime_contentindex.h"
0039 #include "kmime_util.h"
0040 #include "kmime_headers.h"
0041 
0042 #include <QByteArray>
0043 #include <QVector>
0044 #include <QSharedPointer>
0045 #include <QMetaType>
0046 
0047 
0048 namespace KMime
0049 {
0050 
0051 class ContentPrivate;
0052 class Message;
0053 
0054 /**
0055   @brief
0056   A class that encapsulates @ref MIME encoded Content.
0057 
0058   A Content object holds two representations of a content:
0059   - the string representation: This is the content encoded as a string ready
0060     for transport.  Accessible through the encodedContent() method.
0061   - the broken-down representation: This is the tree of objects (headers,
0062     sub-Contents and (if present) the encapsulated message) that this Content is made of.
0063     Accessible through methods like header(), contents() and bodyAsMessage().
0064 
0065   The parse() function updates the broken-down representation of the Content
0066   from its string representation.  Calling it is necessary to access the
0067   headers, sub-Contents or the encapsulated message of this Content.
0068 
0069   The assemble() function updates the string representation of the Content
0070   from its broken-down representation.  Calling it is necessary for
0071   encodedContent() to reflect any changes made to the broken-down representation of the Content.
0072 
0073   There are two basic types of a Content:
0074   - A leaf Content: This is a content that is neither a multipart content nor an encapsulated
0075                     message. Because of this, it will not have any children, it has no sub-contents
0076                     and is therefore a leaf content.
0077                     Only leaf contents have a body that is not empty, i.e. functions that operate
0078                     on the body, such as body(), size() and decodedContent(), will work only on
0079                     leaf contents.
0080   - A non-leaf Content: This is a content that itself doesn't have any body, but that does have
0081                         sub-contents.
0082                         This is the case for contents that are of mimetype multipart/ or of mimetype
0083                         message/rfc822. In case of a multipart content, contents() will return the
0084                         multipart child contents. In case of an encapsulated message, the message
0085                         can be accessed with bodyAsMessage(), and contents() will have one entry
0086                         that is the message as well.
0087                         On a non-leaf content, body() will have an empty return value and other
0088                         functions working on the body will not work.
0089                         A call to parse() is required before the child multipart contents or the
0090                         encapsulated message is created.
0091 */
0092 /*
0093   KDE5:
0094   * Do not convert singlepart <-> multipart automatically.
0095   * A bunch of methods probably don't need to be virtual (since they're not needed
0096     in either Message or NewsArticle).
0097 */
0098 class KMIME_EXPORT Content
0099 {
0100 public:
0101 
0102     /**
0103       Describes a list of Content objects.
0104     */
0105     typedef QVector<KMime::Content *> List;
0106 
0107     /**
0108       Creates an empty Content object with a specified parent.
0109       @param parent the parent Content object
0110       @since 4.3
0111     */
0112     explicit Content(Content *parent = nullptr);
0113 
0114     /**
0115       Destroys this Content object.
0116     */
0117     virtual ~Content();
0118 
0119     /**
0120       Returns true if this Content object is not empty.
0121     */
0122     Q_REQUIRED_RESULT bool hasContent() const;
0123 
0124     /**
0125       Sets the Content to the given raw data, containing the Content head and
0126       body separated by two linefeeds.
0127 
0128       This method operates on the string representation of the Content. Call
0129       parse() if you want to access individual headers, sub-Contents or the
0130       encapsulated message.
0131 
0132       @note The passed data must not contain any CRLF sequences, only LF.
0133             Use CRLFtoLF for conversion before passing in the data.
0134 
0135       @param s is a QByteArray containing the raw Content data.
0136     */
0137     void setContent(const QByteArray &s);
0138 
0139     /**
0140      * Parses the Content.
0141      *
0142      * This means the broken-down object representation of the Content is
0143      * updated from the string representation of the Content.
0144      *
0145      * Call this if you want to access or change headers, sub-Contents or the encapsulated
0146      * message.
0147      *
0148      * @note Calling parse() twice will not work for multipart contents or for contents of which
0149      *       the body is an encapsulated message. The reason is that the first parse() will delete
0150      *       the body, so there is no body to work on for the second call of parse().
0151      *
0152      * @note Calling this will reset the message returned by bodyAsMessage(), as
0153      *       the message is re-parsed as well.
0154      *       Also, all old sub-contents will be deleted, so any old Content pointer will become
0155      *       invalid.
0156      */
0157     void parse();
0158 
0159     /**
0160       Returns whether this Content is frozen.
0161       A frozen content is immutable, i.e. calling assemble() will never modify
0162       its head or body, and encodedContent() will return the same data before
0163       and after parsing.
0164 
0165       @since 4.4.
0166       @see setFrozen().
0167     */
0168     Q_REQUIRED_RESULT bool isFrozen() const;
0169 
0170     /**
0171       Freezes this Content if @p frozen is true; otherwise unfreezes it.
0172       @param frozen freeze content if @c true, otherwise unfreeze
0173       @since 4.4
0174       @see isFrozen().
0175     */
0176     void setFrozen(bool frozen = true);
0177 
0178     /**
0179       Generates the MIME content.
0180       This means the string representation of this Content is updated from the
0181       broken-down object representation.
0182       Call this if you have made changes to the content, and want
0183       encodedContent() to reflect those changes.
0184 
0185       @note assemble() has no effect if the Content isFrozen().  You may want
0186       to freeze, for instance, signed sub-Contents, to make sure they are kept
0187       unmodified.
0188 
0189       @note If this content is an encapsulated message, i.e. bodyIsMessage() returns true,
0190       then calling assemble() will also assemble the message returned by bodyAsMessage().
0191 
0192       @warning assemble() may change the order of the headers, and other
0193       details such as where folding occurs.  This may break things like
0194       signature verification, so you should *ONLY* call assemble() when you
0195       have actually modified the content.
0196     */
0197     void assemble();
0198 
0199     /**
0200       Clears the content, deleting all headers and sub-Contents.
0201     */
0202     void clear();
0203 
0204     /**
0205       Removes all sub-Contents from this content.  Deletes them if @p del is true.
0206       This is different from calling removeContent() on each sub-Content, because
0207       removeContent() will convert this to a single-part Content if only one
0208       sub-Content is left.  Calling clearContents() does NOT make this Content
0209       single-part.
0210 
0211       @param del Whether to delete the sub-Contents.
0212       @see removeContent()
0213       @since 4.4
0214     */
0215     void clearContents(bool del = true);
0216 
0217     /**
0218       Returns the Content header raw data.
0219 
0220       @see setHead().
0221     */
0222     Q_REQUIRED_RESULT QByteArray head() const;
0223 
0224     /**
0225       Sets the Content header raw data.
0226 
0227       This method operates on the string representation of the Content. Call
0228       parse() if you want to access individual headers.
0229 
0230       @param head is a QByteArray containing the header data.
0231 
0232       @see head().
0233     */
0234     void setHead(const QByteArray &head);
0235 
0236     /**
0237      * Returns all headers.
0238      * @since 5.7
0239      */
0240     Q_REQUIRED_RESULT QVector<Headers::Base*> headers() const;
0241 
0242     /**
0243       Returns the first header of type @p type, if it exists.  Otherwise returns 0.
0244       Note that the returned header may be empty.
0245       @param type the header type to find
0246       @since 4.2
0247     */
0248     Headers::Base *headerByType(const char *type) const;
0249 
0250     /**
0251       Returns the first header of type T, if it exists.
0252       If the header does not exist and @p create is true, creates an empty header
0253       and returns it. Otherwise returns 0.
0254       Note that the returned header may be empty.
0255       @param create Whether to create the header if it does not exist.
0256       @since 4.4.
0257 
0258       KDE5: BIC: FIXME: Why is the default argument false here? That is inconsistent with the
0259                         methods in KMime::Message!
0260     */
0261     template <typename T> T *header(bool create = false);
0262 
0263     /**
0264       Returns all @p type headers in the Content.
0265       Take care that this result is not cached, so could be slow.
0266       @param type the header type to find
0267       @since 4.2
0268     */
0269     Q_REQUIRED_RESULT QVector<Headers::Base*> headersByType(const char *type) const;
0270 
0271     /**
0272       Sets the specified header to this Content.
0273       Any previous header of the same type is removed.
0274       If you need multiple headers of the same type, use appendHeader() or
0275       prependHeader().
0276 
0277       @param h The header to set.
0278       @see appendHeader()
0279       @see removeHeader()
0280       @since 4.4
0281     */
0282     void setHeader(Headers::Base *h);
0283 
0284     /**
0285       Appends the specified header to the headers of this Content.
0286       @param h The header to append.
0287       @since 4.4
0288     */
0289     void appendHeader(Headers::Base *h);
0290 
0291     /**
0292       Searches for the first header of type @p type, and deletes it, removing
0293       it from this Content.
0294       @param type The type of the header to look for.
0295       @return true if a header was found and removed.
0296     */
0297     bool removeHeader(const char *type);
0298 
0299     /**
0300       Searches for the first header of type @p T, and deletes it, removing
0301       it from this Content.
0302       @tparam T The type of the header to look for.
0303       @return true if a header was found and removed.
0304     */
0305     template <typename T> bool removeHeader();
0306 
0307 
0308     /**
0309       @return true if this Content has a header of type @p type.
0310       @param type The type of the header to look for.
0311     */
0312     // TODO probably provide hasHeader<T>() too.
0313     Q_REQUIRED_RESULT bool hasHeader(const char *type) const;
0314 
0315     /**
0316       Returns the Content-Type header.
0317 
0318       @param create If true, create the header if it doesn't exist yet.
0319     */
0320     Headers::ContentType *contentType(bool create = true);
0321 
0322     /**
0323       Returns the Content-Transfer-Encoding header.
0324 
0325       @param create If true, create the header if it doesn't exist yet.
0326     */
0327     Headers::ContentTransferEncoding *contentTransferEncoding(bool create = true);
0328 
0329     /**
0330       Returns the Content-Disposition header.
0331 
0332       @param create If true, create the header if it doesn't exist yet.
0333     */
0334     Headers::ContentDisposition *contentDisposition(bool create = true);
0335 
0336     /**
0337       Returns the Content-Description header.
0338 
0339       @param create If true, create the header if it doesn't exist yet.
0340     */
0341     Headers::ContentDescription *contentDescription(bool create = true);
0342 
0343     /**
0344       Returns the Content-Location header.
0345 
0346       @param create If true, create the header if it doesn't exist yet.
0347       @since 4.2
0348     */
0349     Headers::ContentLocation *contentLocation(bool create = true);
0350 
0351     /**
0352       Returns the Content-ID header.
0353       @param create if true, create the header if it does not exist yet.
0354       @since 4.4
0355     */
0356     Headers::ContentID *contentID(bool create = true);
0357 
0358     /**
0359       Returns the size of the Content body after encoding.
0360       (If the encoding is quoted-printable, this is only an approximate size.)
0361       This will return 0 for multipart contents or for encapsulated messages.
0362     */
0363     Q_REQUIRED_RESULT int size();
0364 
0365     /**
0366       Returns the size of this Content and all sub-Contents.
0367     */
0368     Q_REQUIRED_RESULT int storageSize() const;
0369 
0370     /**
0371       Line count of this Content and all sub-Contents.
0372     */
0373     Q_REQUIRED_RESULT int lineCount() const;
0374 
0375     /**
0376       Returns the Content body raw data.
0377 
0378       Note that this will be empty for multipart contents or for encapsulated messages,
0379       after parse() has been called.
0380 
0381       @see setBody().
0382     */
0383     Q_REQUIRED_RESULT QByteArray body() const;
0384 
0385     /**
0386       Sets the Content body raw data.
0387 
0388       This method operates on the string representation of the Content. Call
0389       parse() if you want to access individual sub-Contents or the encapsulated message.
0390 
0391       @param body is a QByteArray containing the body data.
0392 
0393       @see body().
0394     */
0395     void setBody(const QByteArray &body);
0396 
0397     /**
0398       Returns the MIME preamble.
0399 
0400       @return a QByteArray containing the MIME preamble.
0401 
0402       @since 4.9
0403      */
0404     Q_REQUIRED_RESULT QByteArray preamble() const;
0405 
0406     /**
0407       Sets the MIME preamble.
0408 
0409       @param preamble a QByteArray containing what will be used as the
0410       MIME preamble.
0411 
0412       @since 4.9
0413      */
0414 
0415     void setPreamble(const QByteArray &preamble);
0416 
0417     /**
0418       Returns the MIME preamble.
0419 
0420       @return a QByteArray containing the MIME epilogue.
0421 
0422       @since 4.9
0423      */
0424     Q_REQUIRED_RESULT QByteArray epilogue() const;
0425 
0426     /**
0427       Sets the MIME preamble.
0428 
0429       @param epilogue a QByteArray containing what will be used as the
0430       MIME epilogue.
0431 
0432       @since 4.9
0433      */
0434     void setEpilogue(const QByteArray &epilogue);
0435 
0436     /**
0437       Returns a QByteArray containing the encoded Content, including the
0438       Content header and all sub-Contents.
0439 
0440       If you make changes to the broken-down representation of the message, be
0441       sure to first call assemble() before calling encodedContent(), otherwise
0442       the result will not be up-to-date.
0443 
0444       If this content is an encapsulated message, i.e. bodyIsMessage() returns true,
0445       then encodedContent() will use the message returned by bodyAsMessage() as the
0446       body of the result, calling encodedContent() on the message.
0447 
0448       @param useCrLf If true, use @ref CRLF instead of @ref LF for linefeeds.
0449     */
0450     Q_REQUIRED_RESULT QByteArray encodedContent(bool useCrLf = false);
0451 
0452     /**
0453      * Like encodedContent(), with the difference that only the body will be returned, i.e. the
0454      * headers are excluded.
0455      *
0456      * @since 4.6
0457      */
0458     Q_REQUIRED_RESULT QByteArray encodedBody();
0459 
0460     /**
0461      * Returns the decoded Content body.
0462      *
0463      * Note that this will be empty for multipart contents or for encapsulated messages,
0464      * after parse() has been called.
0465      */
0466     // TODO: KDE5: BIC: Rename this to decodedBody(), since only the body is returned.
0467     // In contrast, setContent() sets the head and the body!
0468     // Also, try to make this const.
0469     Q_REQUIRED_RESULT QByteArray decodedContent();
0470 
0471     /**
0472       Returns the decoded text. Additional to decodedContent(), this also
0473       applies charset decoding. If this is not a text Content, decodedText()
0474       returns an empty QString.
0475 
0476       @param trimText If true, then the decoded text will have all trailing
0477       whitespace removed.
0478       @param removeTrailingNewlines If true, then the decoded text will have
0479       all consecutive trailing newlines removed.
0480 
0481       The last trailing new line of the decoded text is always removed.
0482 
0483     */
0484     // TODO: KDE5: BIC: Convert to enums. Also, what if trimText = true but removeTrailingNewlines
0485     //                  is false?
0486     Q_REQUIRED_RESULT QString decodedText(bool trimText = false,
0487                         bool removeTrailingNewlines = false);
0488 
0489     /**
0490       Sets the Content body to the given string using charset of the content type.
0491 
0492       If the charset can not be found, the system charset is taken and the content type header is
0493       changed to that charset.
0494       The charset of the content type header should be set to a charset that can encode the given
0495       string before calling this method.
0496 
0497       This method does not set the content transfer encoding automatically, it needs to be set
0498       to a suitable value that can encode the given string before calling this method.
0499 
0500       This method only makes sense for single-part contents, do not try to pass a multipart body
0501       or an encapsulated message here, that wouldn't work.
0502 
0503       @param s Unicode-encoded string.
0504     */
0505     void fromUnicodeString(const QString &s);
0506 
0507     /**
0508       Returns the first Content with mimetype text/.
0509     */
0510     Content *textContent();
0511 
0512     /**
0513      * Returns all attachments below this node, recursively.
0514      * This does not include crypto parts, nodes of alternative or related multipart nodes, or
0515      * the primary body part (see textContent()).
0516      * @see KMime::isAttachment(), KMime::hasAttachment()
0517      */
0518     Q_REQUIRED_RESULT QVector<Content*> attachments();
0519 
0520     /**
0521      * For multipart contents, this will return a list of all multipart child contents.
0522      * For contents that are of mimetype message/rfc822, this will return a list with one entry,
0523      * and that entry is the encapsulated message, as it would be returned by bodyAsMessage().
0524      */
0525     Q_REQUIRED_RESULT QVector<Content*> contents() const;
0526 
0527     /**
0528       Adds a new sub-Content. If the sub-Content is already part of another
0529       Content object, it is removed from there and its parent is updated.
0530       If the current Content object is single-part, it is converted to
0531       multipart/mixed first.
0532 
0533       @warning If the single-part to multipart conversion happens, all
0534       pointers you may have into this object (such as headers) will become
0535       invalid!
0536 
0537       @param content The new sub-Content.
0538       @param prepend If true, prepend to the Content list; otherwise append.
0539       to the Content list.
0540 
0541       @see removeContent().
0542     */
0543     // KDE5: Do not convert single-part->multipart automatically.
0544     void addContent(Content *content, bool prepend = false);
0545 
0546     void replaceContent(Content *oldContent, Content *newContent);
0547     /**
0548       Removes the given sub-Content. If only one sub-Content is left, the
0549       current Content object is converted into a single-part Content.
0550 
0551       @warning If the multipart to single-part conversion happens, the head
0552       and body of the single remaining sub-Content are copied over, and the
0553       sub-Content is deleted.  All pointers to it or into it (such as headers)
0554       will become invalid!
0555 
0556       @param content The Content to remove.
0557       @param del If true, delete the removed Content object. Otherwise set its
0558       parent to 0.
0559 
0560       @see addContent().
0561       @see clearContents().
0562     */
0563     // KDE5: Do not convert multipart->single-part automatically.
0564     void removeContent(Content *content, bool del = false);
0565 
0566     /**
0567       Changes the encoding of this Content to @p e.  If the Content is binary,
0568       this actually re-encodes the data to use the new encoding.
0569 
0570       @param e The new encoding to use.
0571     */
0572     void changeEncoding(Headers::contentEncoding e);
0573 
0574     /**
0575       Returns the charset that is used to decode RFC2047 strings in all headers and to decode
0576       the body if the charset is not declared explicitly.
0577       It is also used as the charset when encoding RFC2047 strings in headers.
0578     */
0579     // TODO: Split this up into a charset for encoding and one for decoding, and make the one for
0580     //       encoding UTF-8 by default.
0581     static QByteArray defaultCharset();
0582 
0583     /**
0584       Returns the Content specified by the given index.
0585       If the index does not point to a Content, 0 is returned. If the index
0586       is invalid (empty), this Content is returned.
0587 
0588       @param index The Content index.
0589     */
0590     Content *content(const ContentIndex &index) const;
0591 
0592     /**
0593       Returns the ContentIndex for the given Content, or an invalid index
0594       if the Content is not found within the hierarchy.
0595       @param content the Content object to search.
0596     */
0597     Q_REQUIRED_RESULT ContentIndex indexForContent(Content *content) const;
0598 
0599     /**
0600       Returns true if this is the top-level node in the MIME tree. The top-level node is always
0601       a Message or NewsArticle. However, a node can be a Message without being a top-level node when
0602       it is an encapsulated message.
0603     */
0604     Q_REQUIRED_RESULT bool isTopLevel() const;
0605 
0606     /**
0607      * Sets a new parent to the Content and add to its contents list. If it already had a parent, it is removed from the
0608      * old parents contents list.
0609      * @param parent the new parent
0610      * @since 4.3
0611      */
0612     void setParent(Content *parent);
0613 
0614     /**
0615      * Returns the parent content object, or 0 if the content doesn't have a parent.
0616      * @since 4.3
0617      */
0618     Content *parent() const;
0619 
0620     /**
0621      * Returns the toplevel content object, 0 if there is no such object.
0622      * @since 4.3
0623      */
0624     Content *topLevel() const;
0625 
0626     /**
0627      * Returns the index of this Content based on the topLevel() object.
0628      * @since 4.3
0629      */
0630     Q_REQUIRED_RESULT ContentIndex index() const;
0631 
0632     /**
0633      * @return true if this content is an encapsulated message, i.e. if it has the mimetype
0634      *         message/rfc822.
0635      *
0636      * @since 4.5
0637      */
0638     //AK_REVIEW: move to MessageViewer/ObjectTreeParser
0639     Q_REQUIRED_RESULT bool bodyIsMessage() const;
0640 
0641     /**
0642      * If this content is an encapsulated message, in which case bodyIsMessage() will return
0643      * true, the message represented by the body of this content will be returned.
0644      * The returned message is already fully parsed.
0645      * Calling this method is the aquivalent of calling contents().first() and casting the result
0646      * to a KMime::Message*. bodyAsMessage() has the advantage that it will return a shared pointer
0647      * that will not be destroyed when the container message is destroyed or re-parsed.
0648      *
0649      * The message that is returned here is created when calling parse(), so make sure to call
0650      * parse() first. Since each parse() creates a new message object, a different message object
0651      * will be returned each time you call parse().
0652      *
0653      * If you make changes to the returned message, you need to call assemble() on this content
0654      * or on the message if you want that encodedContent() reflects these changes. This also means
0655      * that calling assemble() on this content will assemble the returned message.
0656      *
0657      * @since 4.5
0658      */
0659     //AK_REVIEW: move to MessageViewer/ObjectTreeParser
0660     Q_REQUIRED_RESULT QSharedPointer<Message> bodyAsMessage() const;
0661 
0662 protected:
0663     /**
0664       Reimplement this method if you need to assemble additional headers in a
0665       derived class. Don't forget to call the implementation of the base class.
0666       @return The raw, assembled headers.
0667     */
0668     virtual QByteArray assembleHeaders();
0669 
0670     //@cond PRIVATE
0671     ContentPrivate *d_ptr;
0672     //@endcond
0673 
0674 private:
0675     Q_DECLARE_PRIVATE(Content)
0676     Q_DISABLE_COPY(Content)
0677 };
0678 
0679 template <typename T> T *Content::header(bool create)
0680 {
0681     Headers::Base *h = headerByType(T::staticType());
0682     if (h) {
0683         // Make sure the header is actually of the right type.
0684         Q_ASSERT(dynamic_cast<T *>(h));
0685     } else if (create) {
0686         h = new T;
0687         appendHeader(h); // we already know the header doesn't exist yet
0688     }
0689     return static_cast<T *>(h);
0690 }
0691 
0692 template <typename T> bool Content::removeHeader()
0693 {
0694     return removeHeader(T::staticType());
0695 }
0696 
0697 } // namespace KMime
0698 
0699 Q_DECLARE_METATYPE(KMime::Content*)
0700