File indexing completed on 2024-12-01 13:09:19

0001 /*
0002    SPDX-FileCopyrightText: 2018-2024 Laurent Montel <montel@kde.org>
0003 
0004    SPDX-License-Identifier: LGPL-2.0-or-later
0005 */
0006 
0007 #include "textconverter.h"
0008 #include "colorsandmessageviewstyle.h"
0009 #include "emoticons/emojimanager.h"
0010 #include "messagecache.h"
0011 #include "messages/message.h"
0012 #include "ruqola_texttohtml_debug.h"
0013 #include "utils.h"
0014 
0015 #include "ktexttohtmlfork/ruqolaktexttohtml.h"
0016 #include "syntaxhighlightingmanager.h"
0017 #include "texthighlighter.h"
0018 #include <KSyntaxHighlighting/Definition>
0019 #include <KSyntaxHighlighting/Repository>
0020 #include <KSyntaxHighlighting/Theme>
0021 
0022 #include <KColorScheme>
0023 namespace
0024 {
0025 /// check if the @p str contains an uneven number of backslashes before @p pos
0026 bool isEscaped(const QString &str, int pos)
0027 {
0028     int backslashes = 0;
0029     while (pos > 0 && str[pos - 1] == QLatin1Char('\\')) {
0030         ++backslashes;
0031         --pos;
0032     }
0033     // even number of escapes means the
0034     return backslashes % 2 == 1;
0035 }
0036 
0037 int findNonEscaped(const QString &str, const QString &regionMarker, int startFrom)
0038 {
0039     while (true) {
0040         const int index = str.indexOf(regionMarker, startFrom);
0041         if (index == -1) {
0042             return -1;
0043         } else if (isEscaped(str, index)) {
0044             startFrom = index + regionMarker.size();
0045             continue;
0046         }
0047         return index;
0048     }
0049     Q_UNREACHABLE();
0050 }
0051 int findNewLineOrEndLine(const QString &str, const QString &regionMarker, int startFrom)
0052 {
0053     const int index = str.indexOf(regionMarker, startFrom);
0054     if (index == -1) {
0055         return str.length() - 1;
0056     } else {
0057         return index;
0058     }
0059     Q_UNREACHABLE();
0060 }
0061 
0062 template<typename InRegionCallback, typename OutsideRegionCallback>
0063 void iterateOverRegions(const QString &str, const QString &regionMarker, InRegionCallback &&inRegion, OutsideRegionCallback &&outsideRegion)
0064 {
0065     int startFrom = 0;
0066     const auto markerSize = regionMarker.size();
0067     while (true) {
0068         const int startIndex = findNonEscaped(str, regionMarker, startFrom);
0069         if (startIndex == -1) {
0070             break;
0071         }
0072 
0073         const int endIndex = findNonEscaped(str, regionMarker, startIndex + markerSize);
0074         if (endIndex == -1) {
0075             break;
0076         }
0077 
0078         const auto codeBlock = str.mid(startIndex + markerSize, endIndex - startIndex - markerSize).trimmed();
0079 
0080         outsideRegion(str.mid(startFrom, startIndex - startFrom));
0081         startFrom = endIndex + markerSize;
0082 
0083         inRegion(codeBlock);
0084     }
0085     outsideRegion(str.mid(startFrom));
0086 }
0087 
0088 template<typename InRegionCallback, typename OutsideRegionCallback, typename NewLineCallBack>
0089 void iterateOverEndLineRegions(const QString &str,
0090                                const QString &regionMarker,
0091                                InRegionCallback &&inRegion,
0092                                OutsideRegionCallback &&outsideRegion,
0093                                NewLineCallBack &&newLine)
0094 {
0095     // We have quote text if text start with > or we have "\n>"
0096     if (str.startsWith(regionMarker) || str.contains(QStringLiteral("\n") + regionMarker)) {
0097         int startFrom = 0;
0098         const auto markerSize = regionMarker.size();
0099         bool hasCode = false;
0100         while (true) {
0101             const int startIndex = findNonEscaped(str, regionMarker, startFrom);
0102             if (startIndex == -1) {
0103                 break;
0104             }
0105 
0106             const int endIndex = findNewLineOrEndLine(str, QStringLiteral("\n"), startIndex + markerSize);
0107             if (endIndex == -1) {
0108                 break;
0109             }
0110 #if QT_VERSION < QT_VERSION_CHECK(6, 0, 0)
0111             QStringView codeBlock = str.midRef(startIndex + markerSize, endIndex - startIndex).trimmed();
0112 #else
0113             QStringView codeBlock = QStringView(str).mid(startIndex + markerSize, endIndex - startIndex).trimmed();
0114 #endif
0115             if (codeBlock.endsWith(regionMarker)) {
0116                 codeBlock.chop(regionMarker.size());
0117             }
0118             if (hasCode) {
0119                 newLine();
0120             }
0121 #if QT_VERSION < QT_VERSION_CHECK(6, 0, 0)
0122             const QStringView midCode = str.midRef(startFrom, startIndex - startFrom);
0123 #else
0124             const QStringView midCode = QStringView(str).mid(startFrom, startIndex - startFrom);
0125 #endif
0126             outsideRegion(midCode.toString());
0127             startFrom = endIndex + markerSize;
0128 
0129             inRegion(codeBlock.toString());
0130             if (!codeBlock.isEmpty()) {
0131                 hasCode = true;
0132             }
0133         }
0134 #if QT_VERSION < QT_VERSION_CHECK(6, 0, 0)
0135         const auto afterstr = str.midRef(startFrom);
0136         outsideRegion(afterstr.toString());
0137 #else
0138         const QString afterstr = str.mid(startFrom);
0139         outsideRegion(afterstr);
0140 #endif
0141     } else {
0142         outsideRegion(str);
0143     }
0144 }
0145 
0146 QString markdownToRichText(const QString &markDown)
0147 {
0148     if (markDown.isEmpty()) {
0149         return {};
0150     }
0151 
0152     qCDebug(RUQOLA_TEXTTOHTML_LOG) << "BEFORE markdownToRichText " << markDown;
0153     QString str = markDown;
0154 
0155     const RuqolaKTextToHTML::Options convertFlags = RuqolaKTextToHTML::HighlightText | RuqolaKTextToHTML::ConvertPhoneNumbers;
0156     str = RuqolaKTextToHTML::convertToHtml(str, convertFlags);
0157     qCDebug(RUQOLA_TEXTTOHTML_LOG) << " AFTER convertToHtml " << str;
0158     // substitute "[example.com](<a href="...">...</a>)" style urls
0159     str = Utils::convertTextWithUrl(str);
0160     // Substiture "- [ ] foo" and "- [x] foo" to checkmark
0161     str = Utils::convertTextWithCheckMark(str);
0162     // Substiture # header
0163     str = Utils::convertTextHeaders(str);
0164     qCDebug(RUQOLA_TEXTTOHTML_LOG) << " AFTER convertTextWithUrl " << str;
0165 
0166     return str;
0167 }
0168 
0169 QString generateRichText(const QString &str,
0170                          const QString &username,
0171                          const QStringList &highlightWords,
0172                          const QMap<QString, QString> &mentions,
0173                          const QMap<QString, QString> &channels,
0174                          const QString &searchedText)
0175 {
0176     QString newStr = markdownToRichText(str);
0177     static const QRegularExpression regularExpressionAHref(QStringLiteral("(<a href=\'.*\'>|<a href=\".*\">)"));
0178     struct HrefPos {
0179         int start = 0;
0180         int end = 0;
0181     };
0182     QList<HrefPos> lstPos;
0183     {
0184         QRegularExpressionMatchIterator userIteratorHref = regularExpressionAHref.globalMatch(newStr);
0185         while (userIteratorHref.hasNext()) {
0186             const QRegularExpressionMatch match = userIteratorHref.next();
0187             HrefPos pos;
0188             pos.start = match.capturedStart(1);
0189             pos.end = match.capturedEnd(1);
0190             lstPos.append(std::move(pos));
0191         }
0192 
0193         static const QRegularExpression regularExpressionRoom(QStringLiteral("(^|\\s+)#([\\w._-]+)"), QRegularExpression::UseUnicodePropertiesOption);
0194         QRegularExpressionMatchIterator roomIterator = regularExpressionRoom.globalMatch(newStr);
0195         while (roomIterator.hasNext()) {
0196             const QRegularExpressionMatch match = roomIterator.next();
0197 #if QT_VERSION < QT_VERSION_CHECK(6, 0, 0)
0198             const QStringRef word = match.capturedRef(2);
0199 #else
0200             const QStringView word = match.capturedView(2);
0201 #endif
0202             bool inAnUrl = false;
0203             const int matchCapturedStart = match.capturedStart(2);
0204             for (const HrefPos &hrefPos : lstPos) {
0205                 if ((matchCapturedStart > hrefPos.start) && (matchCapturedStart < hrefPos.end)) {
0206                     inAnUrl = true;
0207                     break;
0208                 }
0209             }
0210             if (inAnUrl) {
0211                 continue;
0212             }
0213             QString roomIdentifier = channels.value(word.toString());
0214             if (roomIdentifier.isEmpty()) {
0215                 roomIdentifier = word.toString();
0216             }
0217 #if QT_VERSION < QT_VERSION_CHECK(6, 0, 0)
0218             newStr.replace(QLatin1Char('#') + word, QStringLiteral("<a href=\'ruqola:/room/%2\'>#%1</a>").arg(word, roomIdentifier));
0219 #else
0220             newStr.replace(QLatin1Char('#') + word.toString(), QStringLiteral("<a href=\'ruqola:/room/%2\'>#%1</a>").arg(word, roomIdentifier));
0221 #endif
0222         }
0223     }
0224 
0225     if (!highlightWords.isEmpty()) {
0226         const auto userHighlightForegroundColor = ColorsAndMessageViewStyle::self().schemeView().foreground(KColorScheme::PositiveText).color().name();
0227         const auto userHighlightBackgroundColor = ColorsAndMessageViewStyle::self().schemeView().background(KColorScheme::PositiveBackground).color().name();
0228         lstPos.clear();
0229         QRegularExpressionMatchIterator userIteratorHref = regularExpressionAHref.globalMatch(newStr);
0230         while (userIteratorHref.hasNext()) {
0231             const QRegularExpressionMatch match = userIteratorHref.next();
0232             HrefPos pos;
0233             pos.start = match.capturedStart(1);
0234             pos.end = match.capturedEnd(1);
0235             lstPos.append(std::move(pos));
0236         }
0237 
0238         for (const QString &word : highlightWords) {
0239             const QRegularExpression exp(QStringLiteral("(\\b%1\\b)").arg(word), QRegularExpression::CaseInsensitiveOption);
0240             QRegularExpressionMatchIterator userIterator = exp.globalMatch(newStr);
0241             int offset = 0;
0242             while (userIterator.hasNext()) {
0243                 const QRegularExpressionMatch match = userIterator.next();
0244                 const QString word = match.captured(1);
0245                 bool inAnUrl = false;
0246                 const int matchCapturedStart = match.capturedStart(1);
0247                 for (const HrefPos &hrefPos : lstPos) {
0248                     if ((matchCapturedStart > hrefPos.start) && (matchCapturedStart < hrefPos.end)) {
0249                         inAnUrl = true;
0250                         break;
0251                     }
0252                 }
0253                 if (inAnUrl) {
0254                     continue;
0255                 }
0256                 const QString replaceStr =
0257                     QStringLiteral("<a style=\"color:%2;background-color:%3;\">%1</a>").arg(word, userHighlightForegroundColor, userHighlightBackgroundColor);
0258                 newStr.replace(matchCapturedStart + offset, word.length(), replaceStr);
0259                 // We added a new string => increase offset
0260                 offset += replaceStr.length() - word.length();
0261             }
0262         }
0263     }
0264 
0265     if (!searchedText.isEmpty()) {
0266         const auto userHighlightForegroundColor = ColorsAndMessageViewStyle::self().schemeView().foreground(KColorScheme::NeutralText).color().name();
0267         const auto userHighlightBackgroundColor = ColorsAndMessageViewStyle::self().schemeView().background(KColorScheme::NeutralBackground).color().name();
0268         lstPos.clear();
0269         QRegularExpressionMatchIterator userIteratorHref = regularExpressionAHref.globalMatch(newStr);
0270         while (userIteratorHref.hasNext()) {
0271             const QRegularExpressionMatch match = userIteratorHref.next();
0272             HrefPos pos;
0273             pos.start = match.capturedStart(1);
0274             pos.end = match.capturedEnd(1);
0275             lstPos.append(std::move(pos));
0276         }
0277 
0278         const QRegularExpression exp(QStringLiteral("(%1)").arg(searchedText), QRegularExpression::CaseInsensitiveOption);
0279         QRegularExpressionMatchIterator userIterator = exp.globalMatch(newStr);
0280         int offset = 0;
0281         while (userIterator.hasNext()) {
0282             const QRegularExpressionMatch match = userIterator.next();
0283             const QString word = match.captured(1);
0284             bool inAnUrl = false;
0285             const int matchCapturedStart = match.capturedStart(1);
0286             for (const HrefPos &hrefPos : lstPos) {
0287                 if ((matchCapturedStart > hrefPos.start) && (matchCapturedStart < hrefPos.end)) {
0288                     inAnUrl = true;
0289                     break;
0290                 }
0291             }
0292             if (inAnUrl) {
0293                 continue;
0294             }
0295             const QString replaceStr =
0296                 QStringLiteral("<a style=\"color:%2;background-color:%3;\">%1</a>").arg(word, userHighlightForegroundColor, userHighlightBackgroundColor);
0297             newStr.replace(matchCapturedStart + offset, word.length(), replaceStr);
0298             // We added a new string => increase offset
0299             offset += replaceStr.length() - word.length();
0300         }
0301     }
0302     static const QRegularExpression regularExpressionUser(QStringLiteral("(^|\\s+)@([\\w._-]+)"), QRegularExpression::UseUnicodePropertiesOption);
0303     QRegularExpressionMatchIterator userIterator = regularExpressionUser.globalMatch(newStr);
0304 
0305     const auto userMentionForegroundColor = ColorsAndMessageViewStyle::self().schemeView().foreground(KColorScheme::NegativeText).color().name();
0306     const auto userMentionBackgroundColor = ColorsAndMessageViewStyle::self().schemeView().background(KColorScheme::NegativeBackground).color().name();
0307     while (userIterator.hasNext()) {
0308         const QRegularExpressionMatch match = userIterator.next();
0309 #if QT_VERSION < QT_VERSION_CHECK(6, 0, 0)
0310         const QStringRef word = match.capturedRef(2);
0311 #else
0312         const QStringView word = match.capturedView(2);
0313 #endif
0314         // Highlight only if it's yours
0315 
0316         QString userIdentifier = mentions.value(word.toString());
0317         if (userIdentifier.isEmpty()) {
0318             userIdentifier = word.toString();
0319         }
0320         if (word == username) {
0321 #if QT_VERSION < QT_VERSION_CHECK(6, 0, 0)
0322             newStr.replace(QLatin1Char('@') + word,
0323                            QStringLiteral("<a href=\'ruqola:/user/%4\' style=\"color:%2;background-color:%3;font-weight:bold\">@%1</a>")
0324                                .arg(word.toString(), userMentionForegroundColor, userMentionBackgroundColor, userIdentifier));
0325 #else
0326             newStr.replace(QLatin1Char('@') + word.toString(),
0327                            QStringLiteral("<a href=\'ruqola:/user/%4\' style=\"color:%2;background-color:%3;font-weight:bold\">@%1</a>")
0328                                .arg(word.toString(), userMentionForegroundColor, userMentionBackgroundColor, userIdentifier));
0329 
0330 #endif
0331         } else {
0332 #if QT_VERSION < QT_VERSION_CHECK(6, 0, 0)
0333             newStr.replace(QLatin1Char('@') + word, QStringLiteral("<a href=\'ruqola:/user/%2\'>@%1</a>").arg(word, userIdentifier));
0334 #else
0335             newStr.replace(QLatin1Char('@') + word.toString(), QStringLiteral("<a href=\'ruqola:/user/%2\'>@%1</a>").arg(word, userIdentifier));
0336 #endif
0337         }
0338     }
0339 
0340     return newStr;
0341 }
0342 }
0343 
0344 QString TextConverter::convertMessageText(const ConvertMessageTextSettings &settings, QString &needUpdateMessageId, int &recusiveIndex)
0345 {
0346     if (!settings.emojiManager) {
0347         qCWarning(RUQOLA_TEXTTOHTML_LOG) << "Emojimanager is null";
0348     }
0349 
0350     QString quotedMessage;
0351 
0352     QString str = settings.str;
0353     // TODO we need to look at room name too as we can have it when we use "direct reply"
0354     if (str.contains(QLatin1String("[ ](http"))
0355         && (settings.maximumRecursiveQuotedText == -1 || (settings.maximumRecursiveQuotedText > recusiveIndex))) { // ## is there a better way?
0356         const int startPos = str.indexOf(QLatin1Char('('));
0357         const int endPos = str.indexOf(QLatin1Char(')'));
0358         const QString url = str.mid(startPos + 1, endPos - startPos - 1);
0359         // URL example https://HOSTNAME/channel/all?msg=3BR34NSG5x7ZfBa22
0360         const QString messageId = url.mid(url.indexOf(QLatin1String("msg=")) + 4);
0361         // qCDebug(RUQOLA_TEXTTOHTML_LOG) << "Extracted messageId" << messageId;
0362         auto it = std::find_if(settings.allMessages.cbegin(), settings.allMessages.cend(), [messageId](const Message &msg) {
0363             return msg.messageId() == messageId;
0364         });
0365         if (it != settings.allMessages.cend()) {
0366             const ConvertMessageTextSettings newSetting(QLatin1Char('@') + (*it).username() + QStringLiteral(": ") + (*it).text(),
0367                                                         settings.userName,
0368                                                         settings.allMessages,
0369                                                         settings.highlightWords,
0370                                                         settings.emojiManager,
0371                                                         settings.messageCache,
0372                                                         (*it).mentions(),
0373                                                         (*it).channels(),
0374                                                         settings.searchedText,
0375                                                         settings.maximumRecursiveQuotedText);
0376             recusiveIndex++;
0377             const QString text = convertMessageText(newSetting, needUpdateMessageId, recusiveIndex);
0378             Utils::QuotedRichTextInfo info;
0379             info.url = url;
0380             info.richText = text;
0381             info.displayTime = (*it).dateTime();
0382             quotedMessage = Utils::formatQuotedRichText(std::move(info));
0383             str = str.left(startPos - 3) + str.mid(endPos + 1);
0384         } else {
0385             if (settings.messageCache) {
0386                 // TODO allow to reload index when we loaded message
0387                 Message *msg = settings.messageCache->messageForId(messageId);
0388                 if (msg) {
0389                     const ConvertMessageTextSettings newSetting(msg->text(),
0390                                                                 settings.userName,
0391                                                                 settings.allMessages,
0392                                                                 settings.highlightWords,
0393                                                                 settings.emojiManager,
0394                                                                 settings.messageCache,
0395                                                                 msg->mentions(),
0396                                                                 msg->channels(),
0397                                                                 settings.searchedText,
0398                                                                 settings.maximumRecursiveQuotedText);
0399                     recusiveIndex++;
0400                     const QString text = convertMessageText(newSetting, needUpdateMessageId, recusiveIndex);
0401                     Utils::QuotedRichTextInfo info;
0402                     info.url = url;
0403                     info.richText = text;
0404                     info.displayTime = msg->dateTime();
0405                     quotedMessage = Utils::formatQuotedRichText(std::move(info));
0406                     str = str.left(startPos - 3) + str.mid(endPos + 1);
0407                 } else {
0408                     qCDebug(RUQOLA_TEXTTOHTML_LOG) << "Quoted message" << messageId << "not found"; // could be a very old one
0409                     needUpdateMessageId = messageId;
0410                 }
0411             }
0412         }
0413     }
0414 
0415     QString richText;
0416     QTextStream richTextStream(&richText);
0417     const auto codeBackgroundColor = ColorsAndMessageViewStyle::self().schemeView().background(KColorScheme::AlternateBackground).color();
0418     const auto codeBorderColor = ColorsAndMessageViewStyle::self().schemeView().foreground(KColorScheme::InactiveText).color().name();
0419 
0420     QString highlighted;
0421     QTextStream stream(&highlighted);
0422     TextHighlighter highlighter(&stream);
0423     const auto useHighlighter = SyntaxHighlightingManager::self()->syntaxHighlightingInitialized();
0424 
0425     if (useHighlighter) {
0426         auto &repo = SyntaxHighlightingManager::self()->repo();
0427         highlighter.setTheme(codeBackgroundColor.lightness() < 128 ? repo.defaultTheme(KSyntaxHighlighting::Repository::DarkTheme)
0428                                                                    : repo.defaultTheme(KSyntaxHighlighting::Repository::LightTheme));
0429     }
0430     auto highlight = [&](const QString &codeBlock) {
0431         if (!useHighlighter) {
0432             return codeBlock;
0433         }
0434         stream.reset();
0435         stream.seek(0);
0436         highlighted.clear();
0437         highlighter.highlight(codeBlock);
0438         return highlighted;
0439     };
0440 
0441     auto addCodeChunk = [&](QString chunk) {
0442         const auto language = [&]() {
0443             const auto newline = chunk.indexOf(QLatin1Char('\n'));
0444             if (newline == -1) {
0445                 return QString();
0446             }
0447             return chunk.left(newline);
0448         }();
0449 
0450         auto definition = SyntaxHighlightingManager::self()->def(language);
0451         if (definition.isValid()) {
0452             chunk.remove(0, language.size() + 1);
0453         } else {
0454             definition = SyntaxHighlightingManager::self()->defaultDef();
0455         }
0456 
0457         highlighter.setDefinition(std::move(definition));
0458         // Qt's support for borders is limited to tables, so we have to jump through some hoops...
0459         richTextStream << QLatin1String("<table><tr><td style='background-color:") << codeBackgroundColor.name()
0460                        << QLatin1String("; padding: 5px; border: 1px solid ") << codeBorderColor << QLatin1String("'>") << highlight(chunk)
0461                        << QLatin1String("</td></tr></table>");
0462     };
0463 
0464     auto addInlineCodeChunk = [&](const QString &chunk) {
0465         richTextStream << QLatin1String("<code style='background-color:") << codeBackgroundColor.name() << QLatin1String("'>") << chunk.toHtmlEscaped()
0466                        << QLatin1String("</code>");
0467     };
0468 
0469     auto addTextChunk = [&](const QString &chunk) {
0470         auto htmlChunk = generateRichText(chunk, settings.userName, settings.highlightWords, settings.mentions, settings.channels, settings.searchedText);
0471         if (settings.emojiManager) {
0472             settings.emojiManager->replaceEmojis(&htmlChunk);
0473         }
0474         richTextStream << htmlChunk;
0475     };
0476     auto addInlineQuoteCodeChunk = [&](const QString &chunk) {
0477         auto htmlChunk = generateRichText(chunk, settings.userName, settings.highlightWords, settings.mentions, settings.channels, settings.searchedText);
0478         if (settings.emojiManager) {
0479             settings.emojiManager->replaceEmojis(&htmlChunk);
0480         }
0481         richTextStream << QLatin1String("<code style='background-color:") << codeBackgroundColor.name() << QLatin1String("'>") << htmlChunk
0482                        << QLatin1String("</code>");
0483     };
0484 
0485     auto addInlineQuoteCodeNewLineChunk = [&]() {
0486         richTextStream << QLatin1String("<br />");
0487     };
0488 
0489     auto addInlineQuoteChunk = [&](const QString &chunk) {
0490         iterateOverEndLineRegions(chunk, QStringLiteral(">"), addInlineQuoteCodeChunk, addTextChunk, addInlineQuoteCodeNewLineChunk);
0491     };
0492     auto addNonCodeChunk = [&](QString chunk) {
0493         chunk = chunk.trimmed();
0494         if (chunk.isEmpty()) {
0495             return;
0496         }
0497 
0498         richTextStream << QLatin1String("<div>");
0499         iterateOverRegions(chunk, QStringLiteral("`"), addInlineCodeChunk, addInlineQuoteChunk);
0500         richTextStream << QLatin1String("</div>");
0501     };
0502 
0503     iterateOverRegions(str, QStringLiteral("```"), addCodeChunk, addNonCodeChunk);
0504 
0505     return QLatin1String("<qt>") + quotedMessage + richText + QLatin1String("</qt>");
0506 }