File indexing completed on 2024-12-22 04:48:10

0001 /*
0002     SPDX-License-Identifier: GPL-2.0-or-later
0003     SPDX-FileCopyrightText: 2023 Louis Schul <schul9louis@gmail.com>
0004 */
0005 
0006 // CREDIT TO ORIGINAL IDEA: https://marked.js.org/
0007 
0008 #pragma once
0009 
0010 #include <QRegularExpression>
0011 
0012 class Parser;
0013 
0014 class BlockLexer
0015 {
0016 public:
0017     explicit BlockLexer(Parser *parser);
0018 
0019     void lex(QString &src);
0020 
0021 private:
0022     QString preprocess(QString &src) const;
0023     void tokenize(QString &src, const bool top);
0024     QStringList splitCells(QString &tableRow, const int count = -1) const;
0025 
0026     QMap<QString, QRegularExpression> preprocessRegex{{QStringLiteral("\n"), QRegularExpression(QStringLiteral("\r\n|\r|\u2424"))},
0027                                                       {QStringLiteral("    "), QRegularExpression(QStringLiteral("\t"))},
0028                                                       {QStringLiteral(" "), QRegularExpression(QStringLiteral("\u00a0"))},
0029                                                       {QStringLiteral(""), QRegularExpression(QStringLiteral("^ +$"))}};
0030 
0031     inline static const QRegularExpression block_newline = QRegularExpression(QStringLiteral("^\n+"));
0032 
0033     inline static const QRegularExpression block_code = QRegularExpression(QStringLiteral("^( {4}[^\n]+\n*)+"));
0034 
0035     inline static const QRegularExpression block_fences =
0036         QRegularExpression(QStringLiteral("^ *(\\`{3,}|~{3,})[ \\.]*(\\S+)? *\n([\\s\\S]*?)\n? *\\1 *(?:\n+|$)"));
0037 
0038     inline static const QRegularExpression block_heading = QRegularExpression(QStringLiteral("^ *(#{1,6}) *([^\n]+?) *(?:#+ *)?(?:\n+|$)"));
0039 
0040     inline static const QRegularExpression block_nptable =
0041         QRegularExpression(QStringLiteral("^ *([^|\n ].*\\|.*)\n *([-:]+ *\\|[-| :]*)(?:\n((?:.*[^&gt;\n ].*(?:\n|$))*)\n*|$)"));
0042 
0043     inline static const QRegularExpression block_hr = QRegularExpression(QStringLiteral("^ {0,3}((?:- *){3,}|(?:_ *){3,}|(?:\\* *){3,})(?:\n+|$)"));
0044 
0045     inline static const QRegularExpression block_blockquote = QRegularExpression(QStringLiteral(
0046         "^( {0,3}> ?(([^\n]+(?:\n(?! {0,3}((?:- *){3,}|(?:_ *){3,}|(?:\\* *){3,})(?:\n+|$)| *(#{1,6}) *([^\\n]+?) *(?:#+ *)?(?:\n+|$)|([^\\n]+)\n *(=|-){2,} "
0047         "*(?:\n+|$)| "
0048         "{0,3}>|<\\/"
0049         "?(?:address|article|aside|base|basefont|blockquote|body|caption|center|col|colgroup|dd|details|dialog|dir|div|dl|dt|fieldset|figcaption|figure|footer|"
0050         "form|frame|frameset|h[1-6]|head|header|hr|html|iframe|legend|li|link|main|menu|menuitem|meta|nav|noframes|ol|optgroup|option|p|param|section|source|"
0051         "summary|table|tbody|td|tfoot|th|thead|title|tr|track|ul)(?: +|\\n|\\/?>)|<(?:script|pre|style|!--))[^\n]+)*)|[^\n]*)(?:\\n|$))+"));
0052 
0053     inline static const QRegularExpression block_list = QRegularExpression(QStringLiteral(
0054         "^( *)((?:[\\*\\+\\-]|\\d+\\.)) [\\s\\S]+?(?:\n+(?=\\1?(?:(?:- *){3,}|(?:_ *){3,}|(?:\\* *){3,})(?:\n+|$))|\n+(?= "
0055         "{0,3}\\[((?!\\s*\\])(?:\\\\[\\[\\]]|[^\\[\\]])+)\\]: *\n? *<!--?([^\\s\\-\\->]+)&gt;?(?:(?: +\n? *| *\n "
0056         "*)((?:\"(?:\\\\\"?|[^\"\\\\])*\"|'[^'\n]*(?:\n[^'\n]+)*\n?'|\\([^()]*\\))))? *(?:\n+|$))|\n{2,}(?! )(?!\1(?:[\\*\\+\\-]|\\d+\\.) )\n*|\\s*$)"));
0057 
0058     inline static const QRegularExpression block_item =
0059         QRegularExpression(QStringLiteral("^( *)((?:[*+-]|\\d+\\.)) [^\\n]*(?:\\n(?!\\1(?:[*+-]|\\d+\\.) )[^\\n]*)*"), QRegularExpression::MultilineOption);
0060 
0061     inline static const QRegularExpression block_html = QRegularExpression(
0062         QStringLiteral(
0063             "^ "
0064             "{0,3}(?:&lt;(script|pre|style)[\\s&gt;][\\s\\S]*?(?:&lt;\\/"
0065             "\1&gt;[^\n]*\n+|$)|<!--(?!-?>)[\\s\\S]*?-->[^\n]*(\n+|$)|&lt;\\?[\\s\\S]*?\\?&gt;\n*|<!--[A-Z][\\s\\S]*?-->\n*|<!--\\[CDATA\\[[\\s\\S]*?\\]\\]-->"
0066             "\n*|&"
0067             "lt;\\/"
0068             "?(address|article|aside|base|basefont|blockquote|body|caption|center|col|colgroup|dd|details|dialog|dir|div|dl|dt|fieldset|figcaption|figure|"
0069             "footer|"
0070             "form|frame|frameset|h[1-6]|head|header|hr|html|iframe|legend|li|link|main|menu|menuitem|meta|nav|noframes|ol|optgroup|option|p|param|section|"
0071             "source|"
0072             "summary|table|tbody|td|tfoot|th|thead|title|tr|track|ul)(?: +|\n|\\/?&gt;)[\\s\\S]*?(?:\n{2,}|$)|&lt;(?!script|pre|style)([a-z][\\w-]*)(?: "
0073             "+[a-zA-Z:_][\\w.:-]*(?: *= *\"[^\"\n]*\"| *= *'[^'\n]*'| *= *[^\\s\"'=&lt;&gt;`]+)?)*? "
0074             "*\\/?&gt;(?=\\h*\n)[\\s\\S]*?(?:\n{2,}|$)|&lt;\\/(?!script|pre|style)[a-z][\\w-]*\\s*&gt;(?=\\h*\n)[\\s\\S]*?(?:\n{2,}|$))"),
0075         QRegularExpression::CaseInsensitiveOption);
0076 
0077     inline static const QRegularExpression block_def =
0078         QRegularExpression(QStringLiteral("^ {0,3}\\[((?!\\s*\\])(?:\\\\[\\[\\]]|[^\\[\\]])+)\\]: *\\n? *<?([^\\s>]+)>?(?:(?: +\\n? *| *\\n "
0079                                           "*)((?:\"(?:\\\\\"?|[^\"\\\\])*\"|'[^'\n]*(?:\n[^'\n]+)*\n?'|\\([^()]*\\))))? *(?:\\n+|$)"));
0080 
0081     inline static const QRegularExpression block_table =
0082         QRegularExpression(QStringLiteral("^ *\\|(.+)\n *\\|?( *[-:]+[-| :]*)(?:\n((?: *[^&gt;\n ].*(?:\n|$))*)\n*|$)"));
0083 
0084     inline static const QRegularExpression block_lheading = QRegularExpression(QStringLiteral("^([^\n]+)\n *(=|-){2,} *(?:\n+|$)"));
0085 
0086     inline static const QRegularExpression block_paragraph = QRegularExpression(QStringLiteral(
0087         "^([^\\n]+(?:\\n(?! *(`{3,}|~{3,})[ \\.]*(\\S+)? *\\n([\\s\\S]*?)\\n? *\\2 *(?:\\n+|$)|( *)((?:[*+-]|\\d+\\.)) [\\s\\S]+?(?:\\n+(?=\\3?(?:(?:- "
0088         "*){3,}|(?:_ *){3,}|(?:\\* *){3,})(?:\\n+|$))|\\n+(?= {0,3}\\[((?!\\s*\\])(?:\\\\[\\[\\]]|[^\\[\\]])+)\\]: *\\n? *<?([^\\s>]+)>?(?:(?: +\\n? *| *\\n "
0089         "*)((?:\"(?:\\\\\"?|[^\"\\\\])*\"|'[^'\\n]*(?:\\n[^'\\n]+)*\\n?'|\\([^()]*\\))))? *(?:\\n+|$))|\\n{2,}(?! )(?!\\1(?:[*+-]|\\d+\\.) )\\n*|\\s*$)| "
0090         "{0,3}((?:- *){3,}|(?:_ *){3,}|(?:\\* *){3,})(?:\\n+|$)| *(#{1,6}) *([^\\n]+?) *(?:#+ *)?(?:\\n+|$)|([^\\n]+)\\n *(=|-){2,} *(?:\\n+|$)| "
0091         "{0,3}>|<\\/"
0092         "?(?:address|article|aside|base|basefont|blockquote|body|caption|center|col|colgroup|dd|details|dialog|dir|div|dl|dt|fieldset|figcaption|figure|footer|"
0093         "form|frame|frameset|h[1-6]|head|header|hr|html|iframe|legend|li|link|main|menu|menuitem|meta|nav|noframes|ol|optgroup|option|p|param|section|source|"
0094         "summary|table|tbody|td|tfoot|th|thead|title|tr|track|ul)(?: +|\\n|\\/?>)|<(?:script|pre|style|!--))[^\\n]+)*)"));
0095 
0096     inline static const QRegularExpression block_text = QRegularExpression(QStringLiteral("^[^\n]+"));
0097 
0098     Parser *m_parser;
0099 };