File indexing completed on 2024-04-28 03:53:48
0001 /* 0002 SPDX-FileCopyrightText: 2002-2008 The Kopete developers <kopete-devel@kde.org> 0003 SPDX-FileCopyrightText: 2008 Carlo Segato <brandon.ml@gmail.com> 0004 SPDX-FileCopyrightText: 2002-2003 Stefan Gehn <metz@gehn.net> 0005 SPDX-FileCopyrightText: 2005 Engin AYDOGAN <engin@bzzzt.biz> 0006 0007 SPDX-License-Identifier: LGPL-2.1-or-later 0008 */ 0009 0010 #include "kemoticonsparser_p.h" 0011 0012 #include <QDebug> 0013 #include <QString> 0014 0015 #include <cstring> 0016 0017 // ### keep sorted by first column and HTML entity-encoded! 0018 struct Emoticon { 0019 const char *match; 0020 const char *replacement; 0021 }; 0022 // clang-format off 0023 static constexpr const Emoticon emoticons_map[] = { 0024 {">-(", "đ "}, 0025 {">:(", "đ "}, 0026 {">:)", "đ"}, 0027 {">:-(", "đ "}, 0028 {">w<", "đ"}, 0029 {"<-.->", "đ´"}, 0030 {"<3", "âĨī¸"}, 0031 {"<]:o){", "đ¤Ą"}, 0032 {"<|:^0|", "đ¤Ą"}, 0033 {"()-()", "đ¤"}, 0034 {"(-_o)zzZ", "đ´"}, 0035 {"(:|", "đĨą"}, 0036 {"(@_@)", "đ"}, 0037 {"(c:>*", "đ¤Ą"}, 0038 {"({)", "đ¤"}, 0039 {"(})", "đ¤"}, 0040 {"*<:^)", "đ¤Ą"}, 0041 {"*<:o)", "đ¤Ą"}, 0042 {"*:o)", "đ¤Ą"}, 0043 {"*:oB", "đ¤Ą"}, 0044 {"*:oP", "đ¤Ą"}, 0045 {"+o(", "đ¤ĸ"}, 0046 {",':(", "đ"}, 0047 {"-_-", "đ´"}, 0048 {"-_-+", "đ "}, 0049 {"-o-o-", "đ¤"}, 0050 {"/00\\", "đ"}, 0051 {"0:)", "đ"}, 0052 {"0:-)", "đ"}, 0053 {"0;)", "đ"}, 0054 {"0=)", "đ"}, 0055 {"3:)", "đ"}, 0056 {"8)", "đ"}, 0057 {"8-)", "đ"}, 0058 {"8:::(", "đ"}, 0059 {":\"-(", "đĸ"}, 0060 {":'(", "đĸ"}, 0061 {":'-(", "đĸ"}, 0062 {":'D", "đ"}, 0063 {":(", "đ"}, 0064 {":((", "đĸ"}, 0065 {":)", "đ"}, 0066 {":))", "đ"}, 0067 {":*", "đ"}, 0068 {":*(", "đĸ"}, 0069 {":*)", "đ"}, 0070 {":-$", "đ¯"}, 0071 {":-&", "đ¤ĸ"}, 0072 {":->", "âēī¸"}, 0073 {":->>", "âēī¸"}, 0074 {":-(", "đ"}, 0075 {":-)", "đ"}, 0076 {":-))", "đ"}, 0077 {":-)*", "đ"}, 0078 {":-*", "đ"}, 0079 {":-/", "đ"}, 0080 {":-@", "đ "}, 0081 {":-D", "đ"}, 0082 {":-O", "đŽ"}, 0083 {":-P", "đ"}, 0084 {":-Q", "đ"}, 0085 {":-S", "đ"}, 0086 {":-X", "đ¤Ģ"}, 0087 {":-[", "đ¯"}, 0088 {":-o", "đŽ"}, 0089 {":-p", "đ"}, 0090 {":-s", "đ"}, 0091 {":-t", "đ"}, 0092 {":-x", "đ¤Ģ"}, 0093 {":-|", "đ"}, 0094 {":-||", "đ "}, 0095 {":/", "đĢ¤"}, 0096 {":@", "đ "}, 0097 {":C", "âšī¸"}, 0098 {":D", "đ"}, 0099 {":O", "đŽ"}, 0100 {":P", "đ"}, 0101 {":S", "đ"}, 0102 {":X", "đ¤Ģ"}, 0103 {":\\", "đĢ¤"}, 0104 {":_(", "đĸ"}, 0105 {":c", "âšī¸"}, 0106 {":o", "đŽ"}, 0107 {":o)", "đ¤Ą"}, 0108 {":p", "đ"}, 0109 {":s", "đ"}, 0110 {":x", "đ¤Ģ"}, 0111 {":|))", "đ"}, 0112 {";(", "đĸ"}, 0113 {";)", "đ"}, 0114 {";-(!)", "đ"}, 0115 {";-(", "đĸ"}, 0116 {";-)", "đ"}, 0117 {";_;", "đĸ"}, 0118 {"= #", "đ"}, 0119 {"='(", "đĸ"}, 0120 {"=(", "đ"}, 0121 {"=[", "đ"}, 0122 {"=^D", "đ"}, 0123 {"B-)", "đ"}, 0124 {"D:", "đ"}, 0125 {"D=", "đ"}, 0126 {"O-)", "đ"}, 0127 {"O.o", "đ¤"}, 0128 {"O.o?", "đ¤"}, 0129 {"O:)", "đ"}, 0130 {"O:-)", "đ"}, 0131 {"O;", "đ"}, 0132 {"T.T", "đ"}, 0133 {"T_T", "đ"}, 0134 {"X-(", "đ "}, 0135 {"Y_Y", "đ"}, 0136 {"Z_Z", "đ´"}, 0137 {"\\o-o/", "đ¤"}, 0138 {"\\~/", "đ¤"}, 0139 {"]:->", "đ"}, 0140 {"^j^", "đ"}, 0141 {"i_i", "đ"}, 0142 {"t.t", "đ"}, 0143 {"y_y", "đ"}, 0144 {"|-O", "đĨą"}, 0145 {"}:-)", "đ"}, 0146 }; 0147 // clang-format on 0148 0149 static const Emoticon *findEmoticon(QStringView s) 0150 { 0151 auto it = std::lower_bound(std::begin(emoticons_map), std::end(emoticons_map), s, [](const auto &emoticon, auto s) { 0152 return QLatin1String(emoticon.match) < s; 0153 }); 0154 if (it != std::end(emoticons_map) && s.startsWith(QLatin1String((*it).match))) { 0155 return it; 0156 } 0157 // if we don't have an exact match but a prefix, that will be in the item before the one returned by lower_bound 0158 if (it != std::begin(emoticons_map)) { 0159 it = std::prev(it); 0160 if (s.startsWith(QLatin1String((*it).match))) { 0161 return it; 0162 } 0163 } 0164 return nullptr; 0165 } 0166 0167 QString KEmoticonsParser::parseEmoticons(const QString &message) 0168 { 0169 QString result; 0170 0171 /* previous char, in the firs iteration assume that it is space since we want 0172 * to let emoticons at the beginning, the very first previous QChar must be a space. */ 0173 QChar p = QLatin1Char(' '); 0174 0175 int pos = 0; 0176 int previousPos = 0; 0177 0178 bool inHTMLTag = false; 0179 bool inHTMLLink = false; 0180 bool inHTMLEntity = false; 0181 0182 for (; pos < message.length(); ++pos) { 0183 const QChar c = message[pos]; 0184 0185 if (!inHTMLTag) { // Are we already in an HTML tag ? 0186 if (c == QLatin1Char('<')) { // If not check if are going into one 0187 inHTMLTag = true; // If we are, change the state to inHTML 0188 p = c; 0189 continue; 0190 } 0191 } else { // We are already in a HTML tag 0192 if (c == QLatin1Char('>')) { // Check if it ends 0193 inHTMLTag = false; // If so, change the state 0194 0195 if (p == QLatin1Char('a')) { 0196 inHTMLLink = false; 0197 } 0198 } else if (c == QLatin1Char('a') && p == QLatin1Char('<')) { // check if we just entered an anchor tag 0199 inHTMLLink = true; // don't put smileys in urls 0200 } 0201 p = c; 0202 continue; 0203 } 0204 0205 if (!inHTMLEntity) { // are we 0206 if (c == QLatin1Char('&')) { 0207 inHTMLEntity = true; 0208 } 0209 } 0210 0211 if (inHTMLLink) { // i can't think of any situation where a link address might need emoticons 0212 p = c; 0213 continue; 0214 } 0215 0216 if (!p.isSpace() && p != QLatin1Char('>')) { // '>' may mark the end of an html tag 0217 p = c; 0218 continue; 0219 } /* strict requires space before the emoticon */ 0220 0221 const auto emoticon = findEmoticon(QStringView(message).mid(pos)); 0222 if (emoticon) { 0223 bool found = true; 0224 /* check if the character after this match is space or end of string*/ 0225 const int matchLen = std::strlen(emoticon->match); 0226 if (message.length() > pos + matchLen) { 0227 const QChar n = message[pos + matchLen]; 0228 //<br/> marks the end of a line 0229 if (n != QLatin1Char('<') && !n.isSpace() && !n.isNull() && n != QLatin1Char('&')) { 0230 found = false; 0231 } 0232 } 0233 0234 if (found) { 0235 result += QStringView(message).mid(previousPos, pos - previousPos); 0236 result += QString::fromUtf8(emoticon->replacement); 0237 0238 /* Skip the matched emoticon's matchText */ 0239 pos += matchLen - 1; 0240 previousPos = pos + 1; 0241 } else { 0242 if (inHTMLEntity) { 0243 // If we are in an HTML entity such as > 0244 const int htmlEnd = message.indexOf(QLatin1Char(';'), pos); 0245 // Search for where it ends 0246 if (htmlEnd == -1) { 0247 // Apparently this HTML entity isn't ended, something is wrong, try skip the '&' 0248 // and continue 0249 // qCDebug(KEMOTICONS_CORE) << "Broken HTML entity, trying to recover."; 0250 inHTMLEntity = false; 0251 pos++; 0252 } else { 0253 pos = htmlEnd; 0254 inHTMLEntity = false; 0255 } 0256 } 0257 } 0258 } /* else no emoticons begin with this character, so don't do anything */ 0259 p = c; 0260 } 0261 0262 if (result.isEmpty()) { 0263 return message; 0264 } 0265 if (previousPos < message.length()) { 0266 result += QStringView(message).mid(previousPos); 0267 } 0268 return result; 0269 }