File indexing completed on 2024-04-28 03:53:48

0001 /*
0002     SPDX-FileCopyrightText: 2002-2008 The Kopete developers <kopete-devel@kde.org>
0003     SPDX-FileCopyrightText: 2008 Carlo Segato <brandon.ml@gmail.com>
0004     SPDX-FileCopyrightText: 2002-2003 Stefan Gehn <metz@gehn.net>
0005     SPDX-FileCopyrightText: 2005 Engin AYDOGAN <engin@bzzzt.biz>
0006 
0007     SPDX-License-Identifier: LGPL-2.1-or-later
0008 */
0009 
0010 #include "kemoticonsparser_p.h"
0011 
0012 #include <QDebug>
0013 #include <QString>
0014 
0015 #include <cstring>
0016 
0017 // ### keep sorted by first column and HTML entity-encoded!
0018 struct Emoticon {
0019     const char *match;
0020     const char *replacement;
0021 };
0022 // clang-format off
0023 static constexpr const Emoticon emoticons_map[] = {
0024     {"&gt;-(", "😠"},
0025     {"&gt;:(", "😠"},
0026     {"&gt;:)", "😈"},
0027     {"&gt;:-(", "😠"},
0028     {"&gt;w&lt;", "😟"},
0029     {"&lt;-.-&gt;", "😴"},
0030     {"&lt;3", "â™Ĩī¸"},
0031     {"&lt;]:o){", "🤡"},
0032     {"&lt;|:^0|", "🤡"},
0033     {"()-()", "🤓"},
0034     {"(-_o)zzZ", "😴"},
0035     {"(:|", "đŸĨą"},
0036     {"(@_@)", "😕"},
0037     {"(c:&gt;*", "🤡"},
0038     {"({)", "🤗"},
0039     {"(})", "🤗"},
0040     {"*&lt;:^)", "🤡"},
0041     {"*&lt;:o)", "🤡"},
0042     {"*:o)", "🤡"},
0043     {"*:oB", "🤡"},
0044     {"*:oP", "🤡"},
0045     {"+o(", "đŸ¤ĸ"},
0046     {",':(", "😕"},
0047     {"-_-", "😴"},
0048     {"-_-+", "😠"},
0049     {"-o-o-", "🤓"},
0050     {"/00\\", "😟"},
0051     {"0:)", "😇"},
0052     {"0:-)", "😇"},
0053     {"0;)", "😇"},
0054     {"0=)", "😇"},
0055     {"3:)", "😈"},
0056     {"8)", "😎"},
0057     {"8-)", "😎"},
0058     {"8:::(", "😭"},
0059     {":\"-(", "đŸ˜ĸ"},
0060     {":'(", "đŸ˜ĸ"},
0061     {":'-(", "đŸ˜ĸ"},
0062     {":'D", "😆"},
0063     {":(", "🙁"},
0064     {":((", "đŸ˜ĸ"},
0065     {":)", "🙂"},
0066     {":))", "😆"},
0067     {":*", "😗"},
0068     {":*(", "đŸ˜ĸ"},
0069     {":*)", "😗"},
0070     {":-$", "đŸ˜¯"},
0071     {":-&amp;", "đŸ¤ĸ"},
0072     {":-&gt;", "â˜ēī¸"},
0073     {":-&gt;&gt;", "â˜ēī¸"},
0074     {":-(", "🙁"},
0075     {":-)", "🙂"},
0076     {":-))", "😀"},
0077     {":-)*", "😗"},
0078     {":-*", "😗"},
0079     {":-/", "😕"},
0080     {":-@", "😠"},
0081     {":-D", "😀"},
0082     {":-O", "😮"},
0083     {":-P", "😛"},
0084     {":-Q", "😕"},
0085     {":-S", "😕"},
0086     {":-X", "đŸ¤Ģ"},
0087     {":-[", "đŸ˜¯"},
0088     {":-o", "😮"},
0089     {":-p", "😛"},
0090     {":-s", "😕"},
0091     {":-t", "😛"},
0092     {":-x", "đŸ¤Ģ"},
0093     {":-|", "😐"},
0094     {":-||", "😠"},
0095     {":/", "đŸĢ¤"},
0096     {":@", "😠"},
0097     {":C", "☚ī¸"},
0098     {":D", "😀"},
0099     {":O", "😮"},
0100     {":P", "😛"},
0101     {":S", "😕"},
0102     {":X", "đŸ¤Ģ"},
0103     {":\\", "đŸĢ¤"},
0104     {":_(", "đŸ˜ĸ"},
0105     {":c", "☚ī¸"},
0106     {":o", "😮"},
0107     {":o)", "🤡"},
0108     {":p", "😛"},
0109     {":s", "😕"},
0110     {":x", "đŸ¤Ģ"},
0111     {":|))", "😀"},
0112     {";(", "đŸ˜ĸ"},
0113     {";)", "😉"},
0114     {";-(!)", "😗"},
0115     {";-(", "đŸ˜ĸ"},
0116     {";-)", "😉"},
0117     {";_;", "đŸ˜ĸ"},
0118     {"= #", "😗"},
0119     {"='(", "đŸ˜ĸ"},
0120     {"=(", "🙁"},
0121     {"=[", "🙁"},
0122     {"=^D", "😆"},
0123     {"B-)", "😎"},
0124     {"D:", "🙁"},
0125     {"D=", "🙁"},
0126     {"O-)", "😇"},
0127     {"O.o", "🤔"},
0128     {"O.o?", "🤔"},
0129     {"O:)", "😇"},
0130     {"O:-)", "😇"},
0131     {"O;", "😇"},
0132     {"T.T", "🙁"},
0133     {"T_T", "😭"},
0134     {"X-(", "😠"},
0135     {"Y_Y", "🙁"},
0136     {"Z_Z", "😴"},
0137     {"\\o-o/", "🤓"},
0138     {"\\~/", "🤓"},
0139     {"]:-&gt;", "😈"},
0140     {"^j^", "😇"},
0141     {"i_i", "😭"},
0142     {"t.t", "🙁"},
0143     {"y_y", "🙁"},
0144     {"|-O", "đŸĨą"},
0145     {"}:-)", "😈"},
0146 };
0147 // clang-format on
0148 
0149 static const Emoticon *findEmoticon(QStringView s)
0150 {
0151     auto it = std::lower_bound(std::begin(emoticons_map), std::end(emoticons_map), s, [](const auto &emoticon, auto s) {
0152         return QLatin1String(emoticon.match) < s;
0153     });
0154     if (it != std::end(emoticons_map) && s.startsWith(QLatin1String((*it).match))) {
0155         return it;
0156     }
0157     // if we don't have an exact match but a prefix, that will be in the item before the one returned by lower_bound
0158     if (it != std::begin(emoticons_map)) {
0159         it = std::prev(it);
0160         if (s.startsWith(QLatin1String((*it).match))) {
0161             return it;
0162         }
0163     }
0164     return nullptr;
0165 }
0166 
0167 QString KEmoticonsParser::parseEmoticons(const QString &message)
0168 {
0169     QString result;
0170 
0171     /* previous char, in the firs iteration assume that it is space since we want
0172      * to let emoticons at the beginning, the very first previous QChar must be a space. */
0173     QChar p = QLatin1Char(' ');
0174 
0175     int pos = 0;
0176     int previousPos = 0;
0177 
0178     bool inHTMLTag = false;
0179     bool inHTMLLink = false;
0180     bool inHTMLEntity = false;
0181 
0182     for (; pos < message.length(); ++pos) {
0183         const QChar c = message[pos];
0184 
0185         if (!inHTMLTag) { // Are we already in an HTML tag ?
0186             if (c == QLatin1Char('<')) { // If not check if are going into one
0187                 inHTMLTag = true; // If we are, change the state to inHTML
0188                 p = c;
0189                 continue;
0190             }
0191         } else { // We are already in a HTML tag
0192             if (c == QLatin1Char('>')) { // Check if it ends
0193                 inHTMLTag = false; // If so, change the state
0194 
0195                 if (p == QLatin1Char('a')) {
0196                     inHTMLLink = false;
0197                 }
0198             } else if (c == QLatin1Char('a') && p == QLatin1Char('<')) { // check if we just entered an anchor tag
0199                 inHTMLLink = true; // don't put smileys in urls
0200             }
0201             p = c;
0202             continue;
0203         }
0204 
0205         if (!inHTMLEntity) { // are we
0206             if (c == QLatin1Char('&')) {
0207                 inHTMLEntity = true;
0208             }
0209         }
0210 
0211         if (inHTMLLink) { // i can't think of any situation where a link address might need emoticons
0212             p = c;
0213             continue;
0214         }
0215 
0216         if (!p.isSpace() && p != QLatin1Char('>')) { // '>' may mark the end of an html tag
0217             p = c;
0218             continue;
0219         } /* strict requires space before the emoticon */
0220 
0221         const auto emoticon = findEmoticon(QStringView(message).mid(pos));
0222         if (emoticon) {
0223             bool found = true;
0224             /* check if the character after this match is space or end of string*/
0225             const int matchLen = std::strlen(emoticon->match);
0226             if (message.length() > pos + matchLen) {
0227                 const QChar n = message[pos + matchLen];
0228                 //<br/> marks the end of a line
0229                 if (n != QLatin1Char('<') && !n.isSpace() && !n.isNull() && n != QLatin1Char('&')) {
0230                     found = false;
0231                 }
0232             }
0233 
0234             if (found) {
0235                 result += QStringView(message).mid(previousPos, pos - previousPos);
0236                 result += QString::fromUtf8(emoticon->replacement);
0237 
0238                 /* Skip the matched emoticon's matchText */
0239                 pos += matchLen - 1;
0240                 previousPos = pos + 1;
0241             } else {
0242                 if (inHTMLEntity) {
0243                     // If we are in an HTML entity such as &gt;
0244                     const int htmlEnd = message.indexOf(QLatin1Char(';'), pos);
0245                     // Search for where it ends
0246                     if (htmlEnd == -1) {
0247                         // Apparently this HTML entity isn't ended, something is wrong, try skip the '&'
0248                         // and continue
0249                         // qCDebug(KEMOTICONS_CORE) << "Broken HTML entity, trying to recover.";
0250                         inHTMLEntity = false;
0251                         pos++;
0252                     } else {
0253                         pos = htmlEnd;
0254                         inHTMLEntity = false;
0255                     }
0256                 }
0257             }
0258         } /* else no emoticons begin with this character, so don't do anything */
0259         p = c;
0260     }
0261 
0262     if (result.isEmpty()) {
0263         return message;
0264     }
0265     if (previousPos < message.length()) {
0266         result += QStringView(message).mid(previousPos);
0267     }
0268     return result;
0269 }