File indexing completed on 2024-03-24 16:53:05

0001 /*
0002     kmime_content.h
0003 
0004     KMime, the KDE Internet mail/usenet news message library.
0005     SPDX-FileCopyrightText: 2001 the KMime authors.
0006     See file AUTHORS for details
0007     SPDX-FileCopyrightText: 2006 Volker Krause <vkrause@kde.org>
0008     SPDX-FileCopyrightText: 2009 Constantin Berzan <exit3219@gmail.com>
0009 
0010     SPDX-License-Identifier: LGPL-2.0-or-later
0011 */
0012 /**
0013   @file
0014   This file is part of the API for handling @ref MIME data and
0015   defines the Content class.
0016 
0017   @brief
0018   Defines the Content class.
0019 
0020   @authors the KMime authors (see AUTHORS file),
0021   Volker Krause \<vkrause@kde.org\>
0022 
0023 TODO: possible glossary terms:
0024  content
0025    encoding, transfer type, disposition, description
0026  header
0027  body
0028  attachment
0029  charset
0030  article
0031  string representation
0032  broken-down object representation
0033 */
0034 
0035 #pragma once
0036 
0037 #include "kmime_export.h"
0038 #include "kmime_contentindex.h"
0039 #include "kmime_util.h"
0040 #include "kmime_headers.h"
0041 
0042 #include <QByteArray>
0043 #include <QList>
0044 #include <QMetaType>
0045 #include <QSharedPointer>
0046 
0047 namespace KMime
0048 {
0049 
0050 class ContentPrivate;
0051 class Message;
0052 
0053 /**
0054   @brief
0055   A class that encapsulates @ref MIME encoded Content.
0056 
0057   A Content object holds two representations of a content:
0058   - the string representation: This is the content encoded as a string ready
0059     for transport.  Accessible through the encodedContent() method.
0060   - the broken-down representation: This is the tree of objects (headers,
0061     sub-Contents and (if present) the encapsulated message) that this Content is made of.
0062     Accessible through methods like header(), contents() and bodyAsMessage().
0063 
0064   The parse() function updates the broken-down representation of the Content
0065   from its string representation.  Calling it is necessary to access the
0066   headers, sub-Contents or the encapsulated message of this Content.
0067 
0068   The assemble() function updates the string representation of the Content
0069   from its broken-down representation.  Calling it is necessary for
0070   encodedContent() to reflect any changes made to the broken-down representation of the Content.
0071 
0072   There are two basic types of a Content:
0073   - A leaf Content: This is a content that is neither a multipart content nor an encapsulated
0074                     message. Because of this, it will not have any children, it has no sub-contents
0075                     and is therefore a leaf content.
0076                     Only leaf contents have a body that is not empty, i.e. functions that operate
0077                     on the body, such as body(), size() and decodedContent(), will work only on
0078                     leaf contents.
0079   - A non-leaf Content: This is a content that itself doesn't have any body, but that does have
0080                         sub-contents.
0081                         This is the case for contents that are of mimetype multipart/ or of mimetype
0082                         message/rfc822. In case of a multipart content, contents() will return the
0083                         multipart child contents. In case of an encapsulated message, the message
0084                         can be accessed with bodyAsMessage(), and contents() will have one entry
0085                         that is the message as well.
0086                         On a non-leaf content, body() will have an empty return value and other
0087                         functions working on the body will not work.
0088                         A call to parse() is required before the child multipart contents or the
0089                         encapsulated message is created.
0090 */
0091 /*
0092   KDE5:
0093   * Do not convert singlepart <-> multipart automatically.
0094   * A bunch of methods probably don't need to be virtual (since they're not needed
0095     in either Message or NewsArticle).
0096 */
0097 class KMIME_EXPORT Content
0098 {
0099 public:
0100 
0101     /**
0102       Describes a list of Content objects.
0103     */
0104   typedef QList<KMime::Content *> List;
0105 
0106   /**
0107     Creates an empty Content object with a specified parent.
0108     @param parent the parent Content object
0109     @since 4.3
0110   */
0111   explicit Content(Content *parent = nullptr);
0112 
0113   /**
0114     Destroys this Content object.
0115   */
0116   virtual ~Content();
0117 
0118   /**
0119     Returns true if this Content object is not empty.
0120   */
0121   [[nodiscard]] bool hasContent() const;
0122 
0123   /**
0124     Sets the Content to the given raw data, containing the Content head and
0125     body separated by two linefeeds.
0126 
0127     This method operates on the string representation of the Content. Call
0128     parse() if you want to access individual headers, sub-Contents or the
0129     encapsulated message.
0130 
0131     @note The passed data must not contain any CRLF sequences, only LF.
0132           Use CRLFtoLF for conversion before passing in the data.
0133 
0134     @param s is a QByteArray containing the raw Content data.
0135   */
0136   void setContent(const QByteArray &s);
0137 
0138   /**
0139    * Parses the Content.
0140    *
0141    * This means the broken-down object representation of the Content is
0142    * updated from the string representation of the Content.
0143    *
0144    * Call this if you want to access or change headers, sub-Contents or the
0145    * encapsulated message.
0146    *
0147    * @note Calling parse() twice will not work for multipart contents or for
0148    * contents of which the body is an encapsulated message. The reason is that
0149    * the first parse() will delete the body, so there is no body to work on for
0150    * the second call of parse().
0151    *
0152    * @note Calling this will reset the message returned by bodyAsMessage(), as
0153    *       the message is re-parsed as well.
0154    *       Also, all old sub-contents will be deleted, so any old Content
0155    * pointer will become invalid.
0156    */
0157   void parse();
0158 
0159   /**
0160     Returns whether this Content is frozen.
0161     A frozen content is immutable, i.e. calling assemble() will never modify
0162     its head or body, and encodedContent() will return the same data before
0163     and after parsing.
0164 
0165     @since 4.4.
0166     @see setFrozen().
0167   */
0168   [[nodiscard]] bool isFrozen() const;
0169 
0170   /**
0171     Freezes this Content if @p frozen is true; otherwise unfreezes it.
0172     @param frozen freeze content if @c true, otherwise unfreeze
0173     @since 4.4
0174     @see isFrozen().
0175   */
0176   void setFrozen(bool frozen = true);
0177 
0178   /**
0179     Generates the MIME content.
0180     This means the string representation of this Content is updated from the
0181     broken-down object representation.
0182     Call this if you have made changes to the content, and want
0183     encodedContent() to reflect those changes.
0184 
0185     @note assemble() has no effect if the Content isFrozen().  You may want
0186     to freeze, for instance, signed sub-Contents, to make sure they are kept
0187     unmodified.
0188 
0189     @note If this content is an encapsulated message, i.e. bodyIsMessage()
0190     returns true, then calling assemble() will also assemble the message
0191     returned by bodyAsMessage().
0192 
0193     @warning assemble() may change the order of the headers, and other
0194     details such as where folding occurs.  This may break things like
0195     signature verification, so you should *ONLY* call assemble() when you
0196     have actually modified the content.
0197   */
0198   void assemble();
0199 
0200   /**
0201     Clears the content, deleting all headers and sub-Contents.
0202   */
0203   void clear();
0204 
0205   /**
0206     Removes all sub-Contents from this content.  Deletes them if @p del is true.
0207     This is different from calling removeContent() on each sub-Content, because
0208     removeContent() will convert this to a single-part Content if only one
0209     sub-Content is left.  Calling clearContents() does NOT make this Content
0210     single-part.
0211 
0212     @param del Whether to delete the sub-Contents.
0213     @see removeContent()
0214     @since 4.4
0215   */
0216   void clearContents(bool del = true);
0217 
0218   /**
0219     Returns the Content header raw data.
0220 
0221     @see setHead().
0222   */
0223   [[nodiscard]] QByteArray head() const;
0224 
0225   /**
0226     Sets the Content header raw data.
0227 
0228     This method operates on the string representation of the Content. Call
0229     parse() if you want to access individual headers.
0230 
0231     @param head is a QByteArray containing the header data.
0232 
0233     @see head().
0234   */
0235   void setHead(const QByteArray &head);
0236 
0237   /**
0238    * Returns all headers.
0239    * @since 5.7
0240    */
0241   [[nodiscard]] QList<Headers::Base *> headers() const;
0242 
0243   /**
0244     Returns the first header of type @p type, if it exists.  Otherwise returns
0245     0. Note that the returned header may be empty.
0246     @param type the header type to find
0247     @since 4.2
0248   */
0249   Headers::Base *headerByType(const char *type) const;
0250 
0251   /**
0252     Returns the first header of type T, if it exists.
0253     If the header does not exist and @p create is true, creates an empty header
0254     and returns it. Otherwise returns 0.
0255     Note that the returned header may be empty.
0256     @param create Whether to create the header if it does not exist.
0257     @since 4.4.
0258 
0259     KDE5: BIC: FIXME: Why is the default argument false here? That is
0260     inconsistent with the methods in KMime::Message!
0261   */
0262   template <typename T> T *header(bool create = false);
0263 
0264   /**
0265     Returns all @p type headers in the Content.
0266     Take care that this result is not cached, so could be slow.
0267     @param type the header type to find
0268     @since 4.2
0269   */
0270   [[nodiscard]] QList<Headers::Base *> headersByType(const char *type) const;
0271 
0272   /**
0273     Sets the specified header to this Content.
0274     Any previous header of the same type is removed.
0275     If you need multiple headers of the same type, use appendHeader() or
0276     prependHeader().
0277 
0278     @param h The header to set.
0279     @see appendHeader()
0280     @see removeHeader()
0281     @since 4.4
0282   */
0283   void setHeader(Headers::Base *h);
0284 
0285   /**
0286     Appends the specified header to the headers of this Content.
0287     @param h The header to append.
0288     @since 4.4
0289   */
0290   void appendHeader(Headers::Base *h);
0291 
0292   /**
0293     Searches for the first header of type @p type, and deletes it, removing
0294     it from this Content.
0295     @param type The type of the header to look for.
0296     @return true if a header was found and removed.
0297   */
0298   bool removeHeader(const char *type);
0299 
0300   /**
0301     Searches for the first header of type @p T, and deletes it, removing
0302     it from this Content.
0303     @tparam T The type of the header to look for.
0304     @return true if a header was found and removed.
0305   */
0306   template <typename T> bool removeHeader();
0307 
0308   /**
0309     @return true if this Content has a header of type @p type.
0310     @param type The type of the header to look for.
0311   */
0312   // TODO probably provide hasHeader<T>() too.
0313   [[nodiscard]] bool hasHeader(const char *type) const;
0314 
0315   /**
0316     Returns the Content-Type header.
0317 
0318     @param create If true, create the header if it doesn't exist yet.
0319   */
0320   Headers::ContentType *contentType(bool create = true);
0321 
0322   /**
0323     Returns the Content-Transfer-Encoding header.
0324 
0325     @param create If true, create the header if it doesn't exist yet.
0326   */
0327   Headers::ContentTransferEncoding *contentTransferEncoding(bool create = true);
0328 
0329   /**
0330     Returns the Content-Disposition header.
0331 
0332     @param create If true, create the header if it doesn't exist yet.
0333   */
0334   Headers::ContentDisposition *contentDisposition(bool create = true);
0335 
0336   /**
0337     Returns the Content-Description header.
0338 
0339     @param create If true, create the header if it doesn't exist yet.
0340   */
0341   Headers::ContentDescription *contentDescription(bool create = true);
0342 
0343   /**
0344     Returns the Content-Location header.
0345 
0346     @param create If true, create the header if it doesn't exist yet.
0347     @since 4.2
0348   */
0349   Headers::ContentLocation *contentLocation(bool create = true);
0350 
0351   /**
0352     Returns the Content-ID header.
0353     @param create if true, create the header if it does not exist yet.
0354     @since 4.4
0355   */
0356   Headers::ContentID *contentID(bool create = true);
0357 
0358   /**
0359     Returns the size of the Content body after encoding.
0360     (If the encoding is quoted-printable, this is only an approximate size.)
0361     This will return 0 for multipart contents or for encapsulated messages.
0362   */
0363   [[nodiscard]] int size();
0364 
0365   /**
0366     Returns the size of this Content and all sub-Contents.
0367   */
0368   [[nodiscard]] int storageSize() const;
0369 
0370   /**
0371     Returns the Content body raw data.
0372 
0373     Note that this will be empty for multipart contents or for encapsulated
0374     messages, after parse() has been called.
0375 
0376     @see setBody().
0377   */
0378   [[nodiscard]] QByteArray body() const;
0379 
0380   /**
0381     Sets the Content body raw data.
0382 
0383     This method operates on the string representation of the Content. Call
0384     parse() if you want to access individual sub-Contents or the encapsulated
0385     message.
0386 
0387     @param body is a QByteArray containing the body data.
0388 
0389     @see body().
0390   */
0391   void setBody(const QByteArray &body);
0392 
0393   /**
0394     Returns the MIME preamble.
0395 
0396     @return a QByteArray containing the MIME preamble.
0397 
0398     @since 4.9
0399    */
0400   [[nodiscard]] QByteArray preamble() const;
0401 
0402   /**
0403     Sets the MIME preamble.
0404 
0405     @param preamble a QByteArray containing what will be used as the
0406     MIME preamble.
0407 
0408     @since 4.9
0409    */
0410 
0411   void setPreamble(const QByteArray &preamble);
0412 
0413   /**
0414     Returns the MIME preamble.
0415 
0416     @return a QByteArray containing the MIME epilogue.
0417 
0418     @since 4.9
0419    */
0420   [[nodiscard]] QByteArray epilogue() const;
0421 
0422   /**
0423     Sets the MIME preamble.
0424 
0425     @param epilogue a QByteArray containing what will be used as the
0426     MIME epilogue.
0427 
0428     @since 4.9
0429    */
0430   void setEpilogue(const QByteArray &epilogue);
0431 
0432   /**
0433     Returns a QByteArray containing the encoded Content, including the
0434     Content header and all sub-Contents.
0435 
0436     If you make changes to the broken-down representation of the message, be
0437     sure to first call assemble() before calling encodedContent(), otherwise
0438     the result will not be up-to-date.
0439 
0440     If this content is an encapsulated message, i.e. bodyIsMessage() returns
0441     true, then encodedContent() will use the message returned by bodyAsMessage()
0442     as the body of the result, calling encodedContent() on the message.
0443 
0444     @param useCrLf If true, use @ref CRLF instead of @ref LF for linefeeds.
0445   */
0446   [[nodiscard]] QByteArray encodedContent(bool useCrLf = false);
0447 
0448   /**
0449    * Like encodedContent(), with the difference that only the body will be
0450    * returned, i.e. the headers are excluded.
0451    *
0452    * @since 4.6
0453    */
0454   [[nodiscard]] QByteArray encodedBody();
0455 
0456   /**
0457    * Returns the decoded Content body.
0458    *
0459    * Note that this will be empty for multipart contents or for encapsulated
0460    * messages, after parse() has been called.
0461    */
0462   // TODO: KDE5: BIC: Rename this to decodedBody(), since only the body is
0463   // returned. In contrast, setContent() sets the head and the body! Also, try
0464   // to make this const.
0465   [[nodiscard]] QByteArray decodedContent();
0466 
0467   /**
0468     Returns the decoded text. Additional to decodedContent(), this also
0469     applies charset decoding. If this is not a text Content, decodedText()
0470     returns an empty QString.
0471 
0472     @param trimText If true, then the decoded text will have all trailing
0473     whitespace removed.
0474     @param removeTrailingNewlines If true, then the decoded text will have
0475     all consecutive trailing newlines removed.
0476 
0477     The last trailing new line of the decoded text is always removed.
0478 
0479   */
0480   // TODO: KDE5: BIC: Convert to enums. Also, what if trimText = true but
0481   // removeTrailingNewlines
0482   //                  is false?
0483   [[nodiscard]] QString decodedText(bool trimText = false,
0484                                     bool removeTrailingNewlines = false);
0485 
0486   /**
0487     Sets the Content body to the given string using charset of the content type.
0488 
0489     If the charset can not be found, the system charset is taken and the content
0490     type header is changed to that charset. The charset of the content type
0491     header should be set to a charset that can encode the given string before
0492     calling this method.
0493 
0494     This method does not set the content transfer encoding automatically, it
0495     needs to be set to a suitable value that can encode the given string before
0496     calling this method.
0497 
0498     This method only makes sense for single-part contents, do not try to pass a
0499     multipart body or an encapsulated message here, that wouldn't work.
0500 
0501     @param s Unicode-encoded string.
0502   */
0503   void fromUnicodeString(const QString &s);
0504 
0505   /**
0506     Returns the first Content with mimetype text/.
0507   */
0508   Content *textContent();
0509 
0510   /**
0511    * Returns all attachments below this node, recursively.
0512    * This does not include crypto parts, nodes of alternative or related
0513    * multipart nodes, or the primary body part (see textContent()).
0514    * @see KMime::isAttachment(), KMime::hasAttachment()
0515    */
0516   [[nodiscard]] QList<Content *> attachments();
0517 
0518   /**
0519    * For multipart contents, this will return a list of all multipart child
0520    * contents. For contents that are of mimetype message/rfc822, this will
0521    * return a list with one entry, and that entry is the encapsulated message,
0522    * as it would be returned by bodyAsMessage().
0523    */
0524   [[nodiscard]] QList<Content *> contents() const;
0525 
0526   /**
0527     Adds a new sub-Content. If the sub-Content is already part of another
0528     Content object, it is removed from there and its parent is updated.
0529     If the current Content object is single-part, it is converted to
0530     multipart/mixed first.
0531 
0532     @warning If the single-part to multipart conversion happens, all
0533     pointers you may have into this object (such as headers) will become
0534     invalid!
0535 
0536     @param content The new sub-Content.
0537     @param prepend If true, prepend to the Content list; otherwise append.
0538     to the Content list.
0539 
0540     @see removeContent().
0541   */
0542   // KDE5: Do not convert single-part->multipart automatically.
0543   [[deprecated("use append/prependContent instead")]] void addContent(Content *content, bool prepend = false);
0544 
0545   /**
0546     Appends a new sub-Content. If the sub-Content is already part of another
0547     Content object, it is removed from there and its parent is updated.
0548 
0549     @param content The new sub-Content.
0550     @see prependContent()
0551     @see takeContent()
0552     @since 6.0
0553   */
0554   void appendContent(Content *content);
0555   /**
0556     Prepends a new sub-Content. If the sub-Content is already part of another
0557     Content object, it is removed from there and its parent is updated.
0558 
0559     @param content The new sub-Content.
0560     @see appendContent()
0561     @see takeContent()
0562     @since 6.0
0563   */
0564   void prependContent(Content *content);
0565 
0566   void replaceContent(Content *oldContent, Content *newContent);
0567   /**
0568     Removes the given sub-Content. If only one sub-Content is left, the
0569     current Content object is converted into a single-part Content.
0570 
0571     @warning If the multipart to single-part conversion happens, the head
0572     and body of the single remaining sub-Content are copied over, and the
0573     sub-Content is deleted.  All pointers to it or into it (such as headers)
0574     will become invalid!
0575 
0576     @param content The Content to remove.
0577     @param del If true, delete the removed Content object. Otherwise set its
0578     parent to 0.
0579 
0580     @see addContent().
0581     @see clearContents().
0582   */
0583   // KDE5: Do not convert multipart->single-part automatically.
0584   [[deprecated("use takeContent instead")]] void removeContent(Content *content, bool del = false);
0585   /**
0586     Removes the given sub-Content and, if that actually was a sub-content
0587     returns that.
0588 
0589     @param content The Content to remove. It is not deleted, ownership goes
0590     back to the caller.
0591 
0592     @see appendContent()
0593     @see prependContent()
0594     @see clearContents()
0595     @since 6.0
0596   */
0597   Content *takeContent(Content *content);
0598 
0599   /**
0600     Changes the encoding of this Content to @p e.  If the Content is binary,
0601     this actually re-encodes the data to use the new encoding.
0602 
0603     @param e The new encoding to use.
0604   */
0605   void changeEncoding(Headers::contentEncoding e);
0606 
0607   /**
0608     Returns the charset that is used to decode RFC2047 strings in all headers
0609     and to decode the body if the charset is not declared explicitly. It is also
0610     used as the charset when encoding RFC2047 strings in headers.
0611   */
0612   // TODO: Split this up into a charset for encoding and one for decoding, and
0613   // make the one for
0614   //       encoding UTF-8 by default.
0615   static QByteArray defaultCharset();
0616 
0617   /**
0618     Returns the Content specified by the given index.
0619     If the index does not point to a Content, 0 is returned. If the index
0620     is invalid (empty), this Content is returned.
0621 
0622     @param index The Content index.
0623   */
0624   Content *content(const ContentIndex &index) const;
0625 
0626   /**
0627     Returns the ContentIndex for the given Content, or an invalid index
0628     if the Content is not found within the hierarchy.
0629     @param content the Content object to search.
0630   */
0631   [[nodiscard]] ContentIndex indexForContent(Content *content) const;
0632 
0633   /**
0634     Returns true if this is the top-level node in the MIME tree. The top-level
0635     node is always a Message or NewsArticle. However, a node can be a Message
0636     without being a top-level node when it is an encapsulated message.
0637   */
0638   [[nodiscard]] bool isTopLevel() const;
0639 
0640   /**
0641    * Sets a new parent to the Content and add to its contents list. If it
0642    * already had a parent, it is removed from the old parents contents list.
0643    * @param parent the new parent
0644    * @since 4.3
0645    */
0646   void setParent(Content *parent);
0647 
0648   /**
0649    * Returns the parent content object, or 0 if the content doesn't have a
0650    * parent.
0651    * @since 4.3
0652    */
0653   Content *parent() const;
0654 
0655   /**
0656    * Returns the toplevel content object, 0 if there is no such object.
0657    * @since 4.3
0658    */
0659   Content *topLevel() const;
0660 
0661   /**
0662    * Returns the index of this Content based on the topLevel() object.
0663    * @since 4.3
0664    */
0665   [[nodiscard]] ContentIndex index() const;
0666 
0667   /**
0668    * @return true if this content is an encapsulated message, i.e. if it has the
0669    * mimetype message/rfc822.
0670    *
0671    * @since 4.5
0672    */
0673   // AK_REVIEW: move to MessageViewer/ObjectTreeParser
0674   [[nodiscard]] bool bodyIsMessage() const;
0675 
0676   /**
0677    * If this content is an encapsulated message, in which case bodyIsMessage()
0678    * will return true, the message represented by the body of this content will
0679    * be returned. The returned message is already fully parsed. Calling this
0680    * method is the aquivalent of calling contents().first() and casting the
0681    * result to a KMime::Message*. bodyAsMessage() has the advantage that it will
0682    * return a shared pointer that will not be destroyed when the container
0683    * message is destroyed or re-parsed.
0684    *
0685    * The message that is returned here is created when calling parse(), so make
0686    * sure to call parse() first. Since each parse() creates a new message
0687    * object, a different message object will be returned each time you call
0688    * parse().
0689    *
0690    * If you make changes to the returned message, you need to call assemble() on
0691    * this content or on the message if you want that encodedContent() reflects
0692    * these changes. This also means that calling assemble() on this content will
0693    * assemble the returned message.
0694    *
0695    * @since 4.5
0696    */
0697   // AK_REVIEW: move to MessageViewer/ObjectTreeParser
0698   [[nodiscard]] QSharedPointer<Message> bodyAsMessage() const;
0699 
0700 protected:
0701     /**
0702       Reimplement this method if you need to assemble additional headers in a
0703       derived class. Don't forget to call the implementation of the base class.
0704       @return The raw, assembled headers.
0705     */
0706     virtual QByteArray assembleHeaders();
0707 
0708     //@cond PRIVATE
0709     ContentPrivate *d_ptr;
0710     //@endcond
0711 
0712 private:
0713     Q_DECLARE_PRIVATE(Content)
0714     Q_DISABLE_COPY(Content)
0715 };
0716 
0717 template <typename T> T *Content::header(bool create)
0718 {
0719     Headers::Base *h = headerByType(T::staticType());
0720     if (h) {
0721         // Make sure the header is actually of the right type.
0722         Q_ASSERT(dynamic_cast<T *>(h));
0723     } else if (create) {
0724         h = new T;
0725         appendHeader(h); // we already know the header doesn't exist yet
0726     }
0727     return static_cast<T *>(h);
0728 }
0729 
0730 template <typename T> bool Content::removeHeader()
0731 {
0732     return removeHeader(T::staticType());
0733 }
0734 
0735 } // namespace KMime
0736 
0737 Q_DECLARE_METATYPE(KMime::Content*)
0738