File indexing completed on 2024-09-15 04:28:35
0001 // SPDX-FileCopyrightText: 2023 James Graham <james.h.graham@protonmail.com> 0002 // SPDX-License-Identifier: GPL-2.0-only OR GPL-3.0-only OR LicenseRef-KDE-Accepted-GPL 0003 0004 #pragma once 0005 0006 #include <QHash> 0007 #include <QObject> 0008 #include <QRegularExpression> 0009 #include <QString> 0010 #include <QStringList> 0011 0012 #include "neochatroom.h" 0013 0014 namespace Quotient 0015 { 0016 class RoomMessageEvent; 0017 } 0018 0019 /** 0020 * @class TextHandler 0021 * 0022 * This class is designed to handle the text of both incoming and outgoing messages. 0023 * 0024 * This includes converting markdown to html and removing any html tags that shouldn't 0025 * be present as per the matrix spec 0026 * (https://spec.matrix.org/v1.5/client-server-api/#mroommessage-msgtypes). 0027 */ 0028 class TextHandler : public QObject 0029 { 0030 Q_OBJECT 0031 0032 public: 0033 /** 0034 * @brief List of token types 0035 */ 0036 enum Type { 0037 Text, /*!< Anything not a tag that doesn't have special handling */ 0038 Tag, /*!< For any generic tag that doesn't have special handling */ 0039 TextCode, /*!< Text between code tags */ 0040 End, /*!< End of the input string */ 0041 }; 0042 0043 /** 0044 * @brief Get the string being handled. 0045 * 0046 * Setting new data resets the TextHandler. 0047 */ 0048 QString data() const; 0049 0050 /** 0051 * @brief Set the string being handled. 0052 * 0053 * @note The TextHandler doesn't modify the input data variable so the unhandled 0054 * text can always be retrieved. 0055 */ 0056 void setData(const QString &string); 0057 0058 /** 0059 * @brief Handle the text for a message that is being sent. 0060 */ 0061 QString handleSendText(); 0062 0063 /** 0064 * @brief Handle the text as a rich output for a message being received. 0065 * 0066 * The function does the following: 0067 * - Removes invalid html tags and attributes 0068 * - Strips any reply from the message 0069 * - Formats user mentions 0070 * 0071 * @note In this case the rich text refers to the output format. The input 0072 * can be in either and the parameter inputFormat just needs to be set 0073 * appropriately. 0074 */ 0075 QString handleRecieveRichText(Qt::TextFormat inputFormat = Qt::RichText, 0076 const NeoChatRoom *room = nullptr, 0077 const Quotient::RoomEvent *event = nullptr, 0078 bool stripNewlines = false); 0079 0080 /** 0081 * @brief Handle the text as a plain output for a message being received. 0082 * 0083 * The function does the following: 0084 * - Removes all html tags and attributes (except inside of code tags) 0085 * - Strips any reply from the message 0086 * 0087 * @note In this case the plain text refers to the output format. The input 0088 * can be in either and the parameter inputFormat just needs to be set 0089 * appropriately. 0090 * 0091 * @warning The output of this function should NEVER be input into a rich text 0092 * control. It will try to preserve < and > in the plain string which 0093 * could be malicious tags if the control uses rich text format. 0094 */ 0095 QString handleRecievePlainText(Qt::TextFormat inputFormat = Qt::PlainText, const bool &stripNewlines = false); 0096 0097 private: 0098 QString m_data; 0099 0100 QString m_dataBuffer; 0101 int m_pos; 0102 Type m_nextTokenType = Text; 0103 QString m_nextToken; 0104 0105 void next(); 0106 void nextTokenType(); 0107 0108 QString getTagType() const; 0109 bool isCloseTag() const; 0110 QString getAttributeType(const QString &string); 0111 QString getAttributeData(const QString &string); 0112 bool isAllowedTag(const QString &type); 0113 bool isAllowedAttribute(const QString &tag, const QString &attribute); 0114 bool isAllowedLink(const QString &link, bool isImg = false); 0115 QString cleanAttributes(const QString &tag, const QString &tagString); 0116 0117 QString markdownToHTML(const QString &markdown); 0118 QString escapeHtml(QString stringIn); 0119 QString unescapeHtml(QString stringIn); 0120 QString linkifyUrls(QString stringIn); 0121 };