File indexing completed on 2024-12-22 04:48:10

0001 /*
0002     SPDX-License-Identifier: GPL-2.0-or-later
0003     SPDX-FileCopyrightText: 2023 Louis Schul <schul9louis@gmail.com>
0004 */
0005 
0006 // CREDIT TO ORIGINAL IDEA: https://marked.js.org/
0007 
0008 #include "blockLexer.h"
0009 
0010 #include <QJsonArray>
0011 #include <QMap>
0012 
0013 #include "kleverconfig.h"
0014 #include "parser.h"
0015 
0016 BlockLexer::BlockLexer(Parser *parser)
0017     : m_parser(parser)
0018 {
0019 }
0020 
0021 void BlockLexer::lex(QString &src)
0022 {
0023     src = preprocess(src);
0024     tokenize(src, true);
0025 }
0026 
0027 QString BlockLexer::preprocess(QString &src) const
0028 {
0029     QRegularExpressionMatch cap;
0030 
0031     for (auto &pat : preprocessRegex.toStdMap()) {
0032         cap = pat.second.match(src);
0033         while (cap.hasMatch()) {
0034             src = src.replace(pat.second, pat.first);
0035             cap = pat.second.match(src);
0036         }
0037     }
0038 
0039     return src;
0040 };
0041 
0042 void BlockLexer::tokenize(QString &remaining, const bool top)
0043 {
0044     static const QString emptyStr = QLatin1String();
0045     QRegularExpressionMatch cap;
0046 
0047     static PluginHelper *pluginHelper = m_parser->getPluginHelper();
0048     static NoteMapperParserUtils *mapperParserUtils = pluginHelper->getMapperParserUtils();
0049     static HighlightParserUtils *highlightParserUtils = pluginHelper->getHighlightParserUtils();
0050     static PUMLParserUtils *pumlParserUtils = pluginHelper->getPUMLParserUtils();
0051 
0052     while (!remaining.isEmpty()) {
0053         cap = block_newline.match(remaining);
0054         if (cap.hasMatch()) {
0055             remaining.replace(cap.capturedStart(), cap.capturedLength(), emptyStr);
0056 
0057             if (cap.capturedLength() > 1) {
0058                 static const QVariantMap tok{{QStringLiteral("type"), QStringLiteral("space")}};
0059                 m_parser->tokens.append(tok);
0060             }
0061         }
0062 
0063         cap = block_code.match(remaining);
0064         if (cap.hasMatch()) {
0065             remaining.replace(cap.capturedStart(), cap.capturedLength(), emptyStr);
0066 
0067             QString cap0 = cap.captured(0);
0068             static const QRegularExpression fourSpaceBlockReg = QRegularExpression(QStringLiteral("^ {4}"), QRegularExpression::MultilineOption);
0069             cap0.replace(fourSpaceBlockReg, emptyStr);
0070             static const QRegularExpression newLineReg = QRegularExpression(QStringLiteral("\n+$"));
0071             const QString text = cap0.replace(newLineReg, emptyStr);
0072 
0073             const QVariantMap tok{{QStringLiteral("type"), QStringLiteral("code")}, {QStringLiteral("text"), text}};
0074             m_parser->tokens.append(tok);
0075             continue;
0076         }
0077 
0078         cap = block_fences.match(remaining);
0079         if (cap.hasMatch()) {
0080             remaining.replace(cap.capturedStart(), cap.capturedLength(), emptyStr);
0081 
0082             const QString text = cap.captured(3);
0083             const QString lang = cap.captured(2).trimmed();
0084             const QVariantMap tok{{QStringLiteral("type"), QStringLiteral("code")}, {QStringLiteral("text"), text}, {QStringLiteral("lang"), lang}};
0085             m_parser->tokens.append(tok);
0086             if (KleverConfig::pumlEnabled() && (lang.toLower() == QStringLiteral("puml") || lang.toLower() == QStringLiteral("plantuml"))) {
0087                 pumlParserUtils->addToNotePUMLBlock(text);
0088             } else if (KleverConfig::codeSynthaxHighlightEnabled() && !lang.isEmpty()) { // Send only the value that will be highlighted
0089                 highlightParserUtils->addToNoteCodeBlocks(text);
0090             }
0091             continue;
0092         }
0093 
0094         cap = block_heading.match(remaining);
0095         if (cap.hasMatch()) {
0096             remaining.replace(cap.capturedStart(), cap.capturedLength(), emptyStr);
0097 
0098             if (KleverConfig::noteMapEnabled()) {
0099                 mapperParserUtils->addToNoteHeaders(cap.captured(0).trimmed());
0100             }
0101 
0102             const QVariantMap tok{{QStringLiteral("type"), QStringLiteral("heading")},
0103                                   {QStringLiteral("depth"), cap.capturedLength(1)},
0104                                   {QStringLiteral("text"), cap.captured(2)}};
0105             m_parser->tokens.append(tok);
0106             continue;
0107         }
0108 
0109         cap = block_nptable.match(remaining);
0110         if (top && cap.hasMatch()) {
0111             static const QRegularExpression headerPipeReg = QRegularExpression(QStringLiteral("^ *| *\\| *$"));
0112             const QStringList headerList = splitCells(cap.captured(1).replace(headerPipeReg, emptyStr));
0113 
0114             static const QRegularExpression alignReg = QRegularExpression(QStringLiteral("^ *|\\| *$"));
0115             static const QRegularExpression alignSplitterReg = QRegularExpression(QStringLiteral(" *\\| *"));
0116             QStringList alignList = cap.captured(2).replace(alignReg, emptyStr).split(alignSplitterReg);
0117             if (alignList.last().isEmpty())
0118                 alignList.removeLast();
0119 
0120             static const QRegularExpression endingNewLineReg = QRegularExpression(QStringLiteral("\n$"));
0121             QStringList allCells = cap.captured(3).replace(endingNewLineReg, emptyStr).split(QStringLiteral("\n"));
0122             if (allCells.last().isEmpty())
0123                 allCells.removeLast();
0124 
0125             QJsonArray cells;
0126 
0127             const int headerSize = headerList.size();
0128             const int alignSize = alignList.size();
0129             if (headerSize == alignSize) {
0130                 remaining.replace(cap.capturedStart(), cap.capturedLength(), emptyStr);
0131 
0132                 for (int i = 0; i < alignSize; i++) {
0133                     static const QRegularExpression rightAlignReg = QRegularExpression(QStringLiteral("^ *-+: *$"));
0134                     static const QRegularExpression centerAlignReg = QRegularExpression(QStringLiteral("^ *:-+: *$"));
0135                     static const QRegularExpression leftAlignReg = QRegularExpression(QStringLiteral("^ *:-+ *$"));
0136                     if (rightAlignReg.match(alignList[i]).hasMatch()) {
0137                         alignList[i] = QStringLiteral("right");
0138                     } else if (centerAlignReg.match(alignList[i]).hasMatch()) {
0139                         alignList[i] = QStringLiteral("center");
0140                     } else if (leftAlignReg.match(alignList[i]).hasMatch()) {
0141                         alignList[i] = QStringLiteral("left");
0142                     } else {
0143                         alignList[i] = emptyStr;
0144                     }
0145                 }
0146                 for (int i = 0; i < allCells.size(); i++) {
0147                     const auto cellsList = QJsonArray::fromStringList(splitCells(allCells[i], headerSize));
0148                     cells.append(QJsonValue(cellsList));
0149                 }
0150                 const QVariantMap item{{QStringLiteral("type"), QStringLiteral("table")},
0151                                        {QStringLiteral("header"), headerList},
0152                                        {QStringLiteral("align"), alignList},
0153                                        {QStringLiteral("cells"), cells}};
0154                 m_parser->tokens.append(item);
0155                 continue;
0156             }
0157         }
0158 
0159         cap = block_hr.match(remaining);
0160         if (cap.hasMatch()) {
0161             remaining.replace(cap.capturedStart(), cap.capturedLength(), emptyStr);
0162 
0163             static const QVariantMap tok{{QStringLiteral("type"), QStringLiteral("hr")}};
0164             m_parser->tokens.append(tok);
0165             continue;
0166         }
0167 
0168         cap = block_blockquote.match(remaining);
0169         if (cap.hasMatch()) {
0170             remaining.replace(cap.capturedStart(), cap.capturedLength(), emptyStr);
0171 
0172             static const QVariantMap startingTok{{QStringLiteral("type"), QStringLiteral("blockquote_start")}};
0173             m_parser->tokens.append(startingTok);
0174 
0175             QString cap0 = cap.captured(0);
0176             static const QRegularExpression quoteBlockReg = QRegularExpression(QStringLiteral("^ *> ?"), QRegularExpression::MultilineOption);
0177             cap0.replace(quoteBlockReg, emptyStr);
0178 
0179             tokenize(cap0, top);
0180 
0181             static const QVariantMap endingTok{{QStringLiteral("type"), QStringLiteral("blockquote_end")}};
0182             m_parser->tokens.append(endingTok);
0183             continue;
0184         }
0185 
0186         cap = block_list.match(remaining);
0187         if (cap.hasMatch()) {
0188             remaining.replace(cap.captured(0), emptyStr);
0189 
0190             QString bull = cap.captured(2);
0191             const bool isOrdered = bull.length() > 1;
0192             static const QString dotStr = QStringLiteral(".");
0193             if (bull.endsWith(dotStr))
0194                 bull.remove(dotStr);
0195 
0196             const QVariantMap tok{{QStringLiteral("type"), QStringLiteral("list_start")},
0197                                   {QStringLiteral("ordered"), isOrdered},
0198                                   {QStringLiteral("start"), isOrdered ? bull : emptyStr}};
0199             m_parser->tokens.append(tok);
0200 
0201             QRegularExpressionMatchIterator globalCap = block_item.globalMatch(cap.captured(0));
0202             bool next = false;
0203 
0204             while (globalCap.hasNext()) {
0205                 const auto matchedItem = globalCap.next();
0206                 QString item = matchedItem.captured();
0207 
0208                 int space = item.length();
0209                 static const QRegularExpression bulletReg = QRegularExpression(QStringLiteral("^ *([*+-]|\\d+\\.) +"));
0210                 const QRegularExpressionMatch firstBulletCatch = bulletReg.match(item);
0211                 item.replace(firstBulletCatch.capturedStart(), firstBulletCatch.capturedLength(), emptyStr);
0212 
0213                 if (item.indexOf(QStringLiteral("\n ")) != -1) {
0214                     space -= item.length();
0215                     static const QRegularExpression multiSpaceBlockReg =
0216                         QRegularExpression(QStringLiteral("^ {1,") + QString::number(space) + QStringLiteral("}"), QRegularExpression::MultilineOption);
0217                     item.replace(multiSpaceBlockReg, emptyStr);
0218                 }
0219 
0220                 static const QRegularExpression looseItemReg = QRegularExpression(QStringLiteral("\n\n(?!\\s*$)"));
0221                 bool loose = next || looseItemReg.match(item).hasMatch();
0222 
0223                 if (globalCap.hasNext()) {
0224                     next = !item.isEmpty() && item[item.length() - 1] == QChar::fromLatin1('\n');
0225                     if (!loose) {
0226                         loose = next;
0227                     }
0228                 }
0229 
0230                 static const QRegularExpression taskCatcherReg = QRegularExpression(QStringLiteral("(^\\[[ xX]\\] )"));
0231                 const QRegularExpressionMatch taskCatcher = taskCatcherReg.match(item);
0232                 const bool istask = taskCatcher.hasMatch();
0233                 bool ischecked = false;
0234                 if (istask) {
0235                     ischecked = item[1] != QChar::fromLatin1(' ');
0236                     item.replace(taskCatcher.capturedStart(1), taskCatcher.capturedLength(1), emptyStr);
0237                 }
0238                 const QVariantMap startingTok{{QStringLiteral("type"), loose ? QStringLiteral("loose_item_start") : QStringLiteral("list_item_start")},
0239                                               {QStringLiteral("task"), istask},
0240                                               {QStringLiteral("checked"), ischecked}};
0241                 m_parser->tokens.append(startingTok);
0242 
0243                 tokenize(item, false);
0244 
0245                 static const QVariantMap endingTok{{QStringLiteral("type"), QStringLiteral("list_item_end")}};
0246                 m_parser->tokens.append(endingTok);
0247             }
0248             static const QVariantMap endingTok{{QStringLiteral("type"), QStringLiteral("list_end")}};
0249             m_parser->tokens.append(endingTok);
0250 
0251             continue;
0252         }
0253 
0254         cap = block_html.match(remaining);
0255         if (cap.hasMatch()) {
0256             remaining.replace(cap.capturedStart(), cap.capturedLength(), emptyStr);
0257 
0258             const bool pre =
0259                 (cap.captured(1) == QStringLiteral("pre") || cap.captured(1) == QStringLiteral("script") || cap.captured(1) == QStringLiteral("type"));
0260 
0261             const QVariantMap tok{{QStringLiteral("type"), QStringLiteral("html")}, {QStringLiteral("pre"), pre}, {QStringLiteral("text"), cap.captured(0)}};
0262             m_parser->tokens.append(tok);
0263             continue;
0264         }
0265 
0266         cap = block_def.match(remaining);
0267         if (top && cap.hasMatch()) {
0268             remaining.replace(cap.capturedStart(), cap.capturedLength(), emptyStr);
0269 
0270             static const QRegularExpression whiteSpaceReg = QRegularExpression(QStringLiteral("\\s+"));
0271             const QString tag = cap.captured(1).toLower().replace(whiteSpaceReg, emptyStr);
0272             if (!m_parser->links.contains(tag)) {
0273                 const QMap<QString, QString> link{{QStringLiteral("href"), cap.captured(2)}, {QStringLiteral("title"), cap.captured(3)}};
0274                 m_parser->links.insert(tag, link);
0275             }
0276             continue;
0277         }
0278 
0279         cap = block_table.match(remaining);
0280         if (top && cap.hasMatch()) {
0281             remaining.replace(cap.capturedStart(), cap.capturedLength(), emptyStr);
0282 
0283             static const QRegularExpression headerPipeReg = QRegularExpression(QStringLiteral("^ *| *$"));
0284             const QStringList headerList = splitCells(cap.captured(1).replace(headerPipeReg, emptyStr));
0285 
0286             static const QRegularExpression alignReg = QRegularExpression(QStringLiteral("^ *|\\| *$"));
0287             static const QRegularExpression alignSplitterReg = QRegularExpression(QStringLiteral(" *\\| *"));
0288             QStringList alignList = cap.captured(2).replace(alignReg, emptyStr).split(alignSplitterReg);
0289             if (alignList.last().isEmpty())
0290                 alignList.removeLast();
0291 
0292             static const QRegularExpression cellReg = QRegularExpression(QStringLiteral("(?: *\\| *)?\n$"));
0293             QStringList allCells = cap.captured(3).replace(cellReg, emptyStr).split(QStringLiteral("\n"));
0294             if (allCells.last().isEmpty())
0295                 allCells.removeLast();
0296 
0297             QJsonArray cells;
0298 
0299             const int headerSize = headerList.size();
0300             const int alignSize = alignList.size();
0301             if (headerSize == alignSize) {
0302                 for (int i = 0; i < alignSize; i++) {
0303                     static const QRegularExpression rightAlignReg = QRegularExpression(QStringLiteral("^ *-+: *$"));
0304                     static const QRegularExpression centerAlignReg = QRegularExpression(QStringLiteral("^ *:-+: *$"));
0305                     static const QRegularExpression leftAlignReg = QRegularExpression(QStringLiteral("^ *:-+ *$"));
0306                     if (rightAlignReg.match(alignList[i]).hasMatch()) {
0307                         alignList[i] = QStringLiteral("right");
0308                     } else if (centerAlignReg.match(alignList[i]).hasMatch()) {
0309                         alignList[i] = QStringLiteral("center");
0310                     } else if (leftAlignReg.match(alignList[i]).hasMatch()) {
0311                         alignList[i] = QStringLiteral("left");
0312                     } else {
0313                         alignList[i] = emptyStr;
0314                     }
0315                 }
0316 
0317                 QJsonArray cellsList;
0318                 const int cellsSize = allCells.size();
0319                 for (int i = 0; i < cellsSize; i++) {
0320                     static const QRegularExpression tableCellReg = QRegularExpression(QStringLiteral("^ *\\| *| *\\| *$"));
0321                     cellsList = QJsonArray::fromStringList(splitCells(allCells[i].replace(tableCellReg, emptyStr), headerSize));
0322                     cells.append(QJsonValue(cellsList));
0323                 }
0324 
0325                 const QVariantMap item{{QStringLiteral("type"), QStringLiteral("table")},
0326                                        {QStringLiteral("header"), headerList},
0327                                        {QStringLiteral("align"), alignList},
0328                                        {QStringLiteral("cells"), cells}};
0329                 m_parser->tokens.append(item);
0330                 continue;
0331             }
0332         }
0333 
0334         cap = block_lheading.match(remaining);
0335         if (cap.hasMatch()) {
0336             remaining.replace(cap.capturedStart(), cap.capturedLength(), emptyStr);
0337 
0338             const int depth = cap.captured(2) == QStringLiteral("=") ? 1 : 2;
0339 
0340             const QVariantMap tok{{QStringLiteral("type"), QStringLiteral("heading")},
0341                                   {QStringLiteral("depth"), depth},
0342                                   {QStringLiteral("text"), cap.captured(1)}};
0343             m_parser->tokens.append(tok);
0344             continue;
0345         }
0346 
0347         cap = block_paragraph.match(remaining);
0348         if (top && cap.hasMatch()) {
0349             remaining.replace(cap.capturedStart(), cap.capturedLength(), emptyStr);
0350 
0351             const QString text = cap.captured(1).endsWith(QStringLiteral("\n")) ? cap.captured(1).left(cap.captured(1).length() - 1) : cap.captured(1);
0352 
0353             const QVariantMap tok{{QStringLiteral("type"), QStringLiteral("paragraph")}, {QStringLiteral("text"), text}};
0354             m_parser->tokens.append(tok);
0355             continue;
0356         }
0357 
0358         cap = block_text.match(remaining);
0359         if (cap.hasMatch()) {
0360             remaining.replace(cap.capturedStart(), cap.capturedLength(), emptyStr);
0361 
0362             const QVariantMap tok{{QStringLiteral("type"), QStringLiteral("text")}, {QStringLiteral("text"), cap.captured(0)}};
0363             m_parser->tokens.append(tok);
0364             continue;
0365         }
0366 
0367         if (!remaining.isEmpty()) {
0368             qFatal("Infinite loop on byte: %d", remaining[0].unicode());
0369         }
0370     }
0371 }
0372 
0373 QStringList BlockLexer::splitCells(QString &tableRow, const int count) const
0374 {
0375     static const QRegularExpression cellReg = QRegularExpression(QStringLiteral("([^\\\\])\\|"));
0376     static const QRegularExpression cellSplitterReg = QRegularExpression(QStringLiteral(" +\\| *"));
0377     QStringList cells = tableRow.replace(cellReg, QStringLiteral("\\1 |")).split(cellSplitterReg);
0378     if (cells.last().isEmpty())
0379         cells.removeLast();
0380 
0381     if (cells.length() > count && count > -1) {
0382         cells.erase(cells.end() - count, cells.end());
0383     } else {
0384         while (cells.length() < count) {
0385             cells.append(QLatin1String());
0386         }
0387     }
0388 
0389     for (int i = 0; i < cells.length(); i++) {
0390         static const QRegularExpression pipeReg = QRegularExpression(QStringLiteral("\\\\\\|"));
0391         cells[i] = cells[i].replace(pipeReg, QStringLiteral("|"));
0392     }
0393 
0394     return cells;
0395 }