File indexing completed on 2024-12-22 04:48:10
0001 /* 0002 SPDX-License-Identifier: GPL-2.0-or-later 0003 SPDX-FileCopyrightText: 2023 Louis Schul <schul9louis@gmail.com> 0004 */ 0005 0006 // CREDIT TO ORIGINAL IDEA: https://marked.js.org/ 0007 0008 #pragma once 0009 0010 #include <QRegularExpression> 0011 0012 class Parser; 0013 0014 class BlockLexer 0015 { 0016 public: 0017 explicit BlockLexer(Parser *parser); 0018 0019 void lex(QString &src); 0020 0021 private: 0022 QString preprocess(QString &src) const; 0023 void tokenize(QString &src, const bool top); 0024 QStringList splitCells(QString &tableRow, const int count = -1) const; 0025 0026 QMap<QString, QRegularExpression> preprocessRegex{{QStringLiteral("\n"), QRegularExpression(QStringLiteral("\r\n|\r|\u2424"))}, 0027 {QStringLiteral(" "), QRegularExpression(QStringLiteral("\t"))}, 0028 {QStringLiteral(" "), QRegularExpression(QStringLiteral("\u00a0"))}, 0029 {QStringLiteral(""), QRegularExpression(QStringLiteral("^ +$"))}}; 0030 0031 inline static const QRegularExpression block_newline = QRegularExpression(QStringLiteral("^\n+")); 0032 0033 inline static const QRegularExpression block_code = QRegularExpression(QStringLiteral("^( {4}[^\n]+\n*)+")); 0034 0035 inline static const QRegularExpression block_fences = 0036 QRegularExpression(QStringLiteral("^ *(\\`{3,}|~{3,})[ \\.]*(\\S+)? *\n([\\s\\S]*?)\n? *\\1 *(?:\n+|$)")); 0037 0038 inline static const QRegularExpression block_heading = QRegularExpression(QStringLiteral("^ *(#{1,6}) *([^\n]+?) *(?:#+ *)?(?:\n+|$)")); 0039 0040 inline static const QRegularExpression block_nptable = 0041 QRegularExpression(QStringLiteral("^ *([^|\n ].*\\|.*)\n *([-:]+ *\\|[-| :]*)(?:\n((?:.*[^>\n ].*(?:\n|$))*)\n*|$)")); 0042 0043 inline static const QRegularExpression block_hr = QRegularExpression(QStringLiteral("^ {0,3}((?:- *){3,}|(?:_ *){3,}|(?:\\* *){3,})(?:\n+|$)")); 0044 0045 inline static const QRegularExpression block_blockquote = QRegularExpression(QStringLiteral( 0046 "^( {0,3}> ?(([^\n]+(?:\n(?! {0,3}((?:- *){3,}|(?:_ *){3,}|(?:\\* *){3,})(?:\n+|$)| *(#{1,6}) *([^\\n]+?) *(?:#+ *)?(?:\n+|$)|([^\\n]+)\n *(=|-){2,} " 0047 "*(?:\n+|$)| " 0048 "{0,3}>|<\\/" 0049 "?(?:address|article|aside|base|basefont|blockquote|body|caption|center|col|colgroup|dd|details|dialog|dir|div|dl|dt|fieldset|figcaption|figure|footer|" 0050 "form|frame|frameset|h[1-6]|head|header|hr|html|iframe|legend|li|link|main|menu|menuitem|meta|nav|noframes|ol|optgroup|option|p|param|section|source|" 0051 "summary|table|tbody|td|tfoot|th|thead|title|tr|track|ul)(?: +|\\n|\\/?>)|<(?:script|pre|style|!--))[^\n]+)*)|[^\n]*)(?:\\n|$))+")); 0052 0053 inline static const QRegularExpression block_list = QRegularExpression(QStringLiteral( 0054 "^( *)((?:[\\*\\+\\-]|\\d+\\.)) [\\s\\S]+?(?:\n+(?=\\1?(?:(?:- *){3,}|(?:_ *){3,}|(?:\\* *){3,})(?:\n+|$))|\n+(?= " 0055 "{0,3}\\[((?!\\s*\\])(?:\\\\[\\[\\]]|[^\\[\\]])+)\\]: *\n? *<!--?([^\\s\\-\\->]+)>?(?:(?: +\n? *| *\n " 0056 "*)((?:\"(?:\\\\\"?|[^\"\\\\])*\"|'[^'\n]*(?:\n[^'\n]+)*\n?'|\\([^()]*\\))))? *(?:\n+|$))|\n{2,}(?! )(?!\1(?:[\\*\\+\\-]|\\d+\\.) )\n*|\\s*$)")); 0057 0058 inline static const QRegularExpression block_item = 0059 QRegularExpression(QStringLiteral("^( *)((?:[*+-]|\\d+\\.)) [^\\n]*(?:\\n(?!\\1(?:[*+-]|\\d+\\.) )[^\\n]*)*"), QRegularExpression::MultilineOption); 0060 0061 inline static const QRegularExpression block_html = QRegularExpression( 0062 QStringLiteral( 0063 "^ " 0064 "{0,3}(?:<(script|pre|style)[\\s>][\\s\\S]*?(?:<\\/" 0065 "\1>[^\n]*\n+|$)|<!--(?!-?>)[\\s\\S]*?-->[^\n]*(\n+|$)|<\\?[\\s\\S]*?\\?>\n*|<!--[A-Z][\\s\\S]*?-->\n*|<!--\\[CDATA\\[[\\s\\S]*?\\]\\]-->" 0066 "\n*|&" 0067 "lt;\\/" 0068 "?(address|article|aside|base|basefont|blockquote|body|caption|center|col|colgroup|dd|details|dialog|dir|div|dl|dt|fieldset|figcaption|figure|" 0069 "footer|" 0070 "form|frame|frameset|h[1-6]|head|header|hr|html|iframe|legend|li|link|main|menu|menuitem|meta|nav|noframes|ol|optgroup|option|p|param|section|" 0071 "source|" 0072 "summary|table|tbody|td|tfoot|th|thead|title|tr|track|ul)(?: +|\n|\\/?>)[\\s\\S]*?(?:\n{2,}|$)|<(?!script|pre|style)([a-z][\\w-]*)(?: " 0073 "+[a-zA-Z:_][\\w.:-]*(?: *= *\"[^\"\n]*\"| *= *'[^'\n]*'| *= *[^\\s\"'=<>`]+)?)*? " 0074 "*\\/?>(?=\\h*\n)[\\s\\S]*?(?:\n{2,}|$)|<\\/(?!script|pre|style)[a-z][\\w-]*\\s*>(?=\\h*\n)[\\s\\S]*?(?:\n{2,}|$))"), 0075 QRegularExpression::CaseInsensitiveOption); 0076 0077 inline static const QRegularExpression block_def = 0078 QRegularExpression(QStringLiteral("^ {0,3}\\[((?!\\s*\\])(?:\\\\[\\[\\]]|[^\\[\\]])+)\\]: *\\n? *<?([^\\s>]+)>?(?:(?: +\\n? *| *\\n " 0079 "*)((?:\"(?:\\\\\"?|[^\"\\\\])*\"|'[^'\n]*(?:\n[^'\n]+)*\n?'|\\([^()]*\\))))? *(?:\\n+|$)")); 0080 0081 inline static const QRegularExpression block_table = 0082 QRegularExpression(QStringLiteral("^ *\\|(.+)\n *\\|?( *[-:]+[-| :]*)(?:\n((?: *[^>\n ].*(?:\n|$))*)\n*|$)")); 0083 0084 inline static const QRegularExpression block_lheading = QRegularExpression(QStringLiteral("^([^\n]+)\n *(=|-){2,} *(?:\n+|$)")); 0085 0086 inline static const QRegularExpression block_paragraph = QRegularExpression(QStringLiteral( 0087 "^([^\\n]+(?:\\n(?! *(`{3,}|~{3,})[ \\.]*(\\S+)? *\\n([\\s\\S]*?)\\n? *\\2 *(?:\\n+|$)|( *)((?:[*+-]|\\d+\\.)) [\\s\\S]+?(?:\\n+(?=\\3?(?:(?:- " 0088 "*){3,}|(?:_ *){3,}|(?:\\* *){3,})(?:\\n+|$))|\\n+(?= {0,3}\\[((?!\\s*\\])(?:\\\\[\\[\\]]|[^\\[\\]])+)\\]: *\\n? *<?([^\\s>]+)>?(?:(?: +\\n? *| *\\n " 0089 "*)((?:\"(?:\\\\\"?|[^\"\\\\])*\"|'[^'\\n]*(?:\\n[^'\\n]+)*\\n?'|\\([^()]*\\))))? *(?:\\n+|$))|\\n{2,}(?! )(?!\\1(?:[*+-]|\\d+\\.) )\\n*|\\s*$)| " 0090 "{0,3}((?:- *){3,}|(?:_ *){3,}|(?:\\* *){3,})(?:\\n+|$)| *(#{1,6}) *([^\\n]+?) *(?:#+ *)?(?:\\n+|$)|([^\\n]+)\\n *(=|-){2,} *(?:\\n+|$)| " 0091 "{0,3}>|<\\/" 0092 "?(?:address|article|aside|base|basefont|blockquote|body|caption|center|col|colgroup|dd|details|dialog|dir|div|dl|dt|fieldset|figcaption|figure|footer|" 0093 "form|frame|frameset|h[1-6]|head|header|hr|html|iframe|legend|li|link|main|menu|menuitem|meta|nav|noframes|ol|optgroup|option|p|param|section|source|" 0094 "summary|table|tbody|td|tfoot|th|thead|title|tr|track|ul)(?: +|\\n|\\/?>)|<(?:script|pre|style|!--))[^\\n]+)*)")); 0095 0096 inline static const QRegularExpression block_text = QRegularExpression(QStringLiteral("^[^\n]+")); 0097 0098 Parser *m_parser; 0099 };