File indexing completed on 2024-10-06 12:54:08
0001 // SPDX-FileCopyrightText: 2023 James Graham <james.h.graham@protonmail.com> 0002 // SPDX-License-Identifier: GPL-2.0-only OR GPL-3.0-only OR LicenseRef-KDE-Accepted-GPL 0003 0004 #pragma once 0005 0006 #include <QHash> 0007 #include <QObject> 0008 #include <QRegularExpression> 0009 #include <QString> 0010 #include <QStringList> 0011 0012 #include "neochatroom.h" 0013 0014 namespace TextRegex 0015 { 0016 static const QRegularExpression endTagType{QStringLiteral("(>| )")}; 0017 static const QRegularExpression attributeData{QStringLiteral("['\"](.*?)['\"]")}; 0018 static const QRegularExpression removeReply{QStringLiteral("> <.*?>.*?\\n\\n"), QRegularExpression::DotMatchesEverythingOption}; 0019 static const QRegularExpression removeRichReply{QStringLiteral("<mx-reply>.*?</mx-reply>"), QRegularExpression::DotMatchesEverythingOption}; 0020 static const QRegularExpression codePill{QStringLiteral("<pre><code[^>]*>(.*?)</code></pre>"), QRegularExpression::DotMatchesEverythingOption}; 0021 static const QRegularExpression userPill{QStringLiteral("(<a href=\"https://matrix.to/#/@.*?:.*?\">.*?</a>)"), QRegularExpression::DotMatchesEverythingOption}; 0022 static const QRegularExpression strikethrough{QStringLiteral("<del>(.*?)</del>"), QRegularExpression::DotMatchesEverythingOption}; 0023 static const QRegularExpression mxcImage{QStringLiteral(R"AAA(<img(.*?)src="mxc:\/\/(.*?)\/(.*?)"(.*?)>)AAA")}; 0024 static const QRegularExpression plainUrl( 0025 QStringLiteral( 0026 R"(<a.*?<\/a>(*SKIP)(*F)|\b((www\.(?!\.)(?!(\w|\.|-)+@)|(https?|ftp):(//)?\w|(magnet|matrix):)(&(?![lg]t;)|[^&\s<>'"])+(&(?![lg]t;)|[^&!,.\s<>'"\]):])))"), 0027 QRegularExpression::CaseInsensitiveOption | QRegularExpression::UseUnicodePropertiesOption); 0028 static const QRegularExpression 0029 url(QStringLiteral(R"(\b((www\.(?!\.)(?!(\w|\.|-)+@)|https?:(//)?\w)(&(?![lg]t;)|[^&\s<>'"])+(&(?![lg]t;)|[^&!,.\s<>'"\]):])))"), 0030 QRegularExpression::CaseInsensitiveOption | QRegularExpression::UseUnicodePropertiesOption); 0031 static const QRegularExpression emailAddress(QStringLiteral(R"(<a.*?<\/a>(*SKIP)(*F)|\b(mailto:)?((\w|\.|-)+@(\w|\.|-)+\.\w+\b))"), 0032 QRegularExpression::CaseInsensitiveOption | QRegularExpression::UseUnicodePropertiesOption); 0033 static const QRegularExpression mxId(QStringLiteral(R"((^|[][[:space:](){}`'";])([!#@][-a-z0-9_=#/.]{1,252}:\w(?:\w|\.|-)*\.\w+(?::\d{1,5})?))"), 0034 QRegularExpression::CaseInsensitiveOption | QRegularExpression::UseUnicodePropertiesOption); 0035 } 0036 0037 /** 0038 * @class TextHandler 0039 * 0040 * This class is designed to handle the text of both incoming and outgoing messages. 0041 * 0042 * This includes converting markdown to html and removing any html tags that shouldn't 0043 * be present as per the matrix spec 0044 * (https://spec.matrix.org/v1.5/client-server-api/#mroommessage-msgtypes). 0045 */ 0046 class TextHandler : public QObject 0047 { 0048 Q_OBJECT 0049 0050 public: 0051 /** 0052 * @brief List of token types 0053 */ 0054 enum Type { 0055 Text, /*!< Anything not a tag that doesn't have special handling */ 0056 Tag, /*!< For any generic tag that doesn't have special handling */ 0057 TextCode, /*!< Text between code tags */ 0058 End, /*!< End of the input string */ 0059 }; 0060 0061 /** 0062 * @brief Get the string being handled. 0063 * 0064 * Setting new data resets the TextHandler. 0065 */ 0066 QString data() const; 0067 0068 /** 0069 * @brief Set the string being handled. 0070 * 0071 * @note The TextHandler doesn't modify the input data variable so the unhandled 0072 * text can always be retrieved. 0073 */ 0074 void setData(const QString &string); 0075 0076 /** 0077 * @brief Handle the text for a message that is being sent. 0078 */ 0079 QString handleSendText(); 0080 0081 /** 0082 * @brief Handle the text as a rich output for a message being received. 0083 * 0084 * The function does the following: 0085 * - Removes invalid html tags and attributes 0086 * - Strips any reply from the message 0087 * - Formats user mentions 0088 * 0089 * @note In this case the rich text refers to the output format. The input 0090 * can be in either and the parameter inputFormat just needs to be set 0091 * appropriately. 0092 */ 0093 QString handleRecieveRichText(Qt::TextFormat inputFormat = Qt::RichText, 0094 const NeoChatRoom *room = nullptr, 0095 const Quotient::RoomEvent *event = nullptr, 0096 bool stripNewlines = false); 0097 0098 /** 0099 * @brief Handle the text as a plain output for a message being received. 0100 * 0101 * The function does the following: 0102 * - Removes all html tags and attributes (except inside of code tags) 0103 * - Strips any reply from the message 0104 * 0105 * @note In this case the plain text refers to the output format. The input 0106 * can be in either and the parameter inputFormat just needs to be set 0107 * appropriately. 0108 * 0109 * @warning The output of this function should NEVER be input into a rich text 0110 * control. It will try to preserve < and > in the plain string which 0111 * could be malicious tags if the control uses rich text format. 0112 */ 0113 QString handleRecievePlainText(Qt::TextFormat inputFormat = Qt::PlainText, const bool &stripNewlines = false); 0114 0115 /** 0116 * @brief Return a list of links that can be previewed. 0117 * 0118 * This function is designed to give only links that should be previewed so 0119 * http, https or something starting with www. 0120 */ 0121 QList<QUrl> getLinkPreviews(); 0122 0123 private: 0124 QString m_data; 0125 0126 QString m_dataBuffer; 0127 int m_pos; 0128 Type m_nextTokenType = Text; 0129 QString m_nextToken; 0130 0131 void next(); 0132 void nextTokenType(); 0133 0134 QString getTagType() const; 0135 bool isCloseTag() const; 0136 QString getAttributeType(const QString &string); 0137 QString getAttributeData(const QString &string); 0138 bool isAllowedTag(const QString &type); 0139 bool isAllowedAttribute(const QString &tag, const QString &attribute); 0140 bool isAllowedLink(const QString &link, bool isImg = false); 0141 QString cleanAttributes(const QString &tag, const QString &tagString); 0142 0143 QString markdownToHTML(const QString &markdown); 0144 QString escapeHtml(QString stringIn); 0145 QString unescapeHtml(QString stringIn); 0146 QString linkifyUrls(QString stringIn); 0147 };