File indexing completed on 2024-04-21 05:49:06
0001 #!/usr/bin/env python3 0002 0003 # SPDX-FileCopyrightText: 2021 Ilia Kats <ilia-kats@gmx.net> 0004 # SPDX-License-Identifier: LGPL-2.0-or-later 0005 0006 0007 JULIA_UNICODE_DOCUMENTATION_URL = "https://docs.julialang.org/en/v1/manual/unicode-input/" 0008 CONTAINER_ID = "documenter-page" 0009 OUTFNAME = "completiontable.h" 0010 0011 from urllib import request 0012 from html.parser import HTMLParser 0013 from string import ascii_letters, digits 0014 0015 class JuliaUnicodeCompletionsParser(HTMLParser): 0016 def __init__(self): 0017 super().__init__() 0018 self.table = [] 0019 self._in_container = False 0020 self._in_table = False 0021 self._in_header = False 0022 self._in_body = False 0023 self._in_cell = False 0024 self._finished = False 0025 0026 self._current_row = None 0027 0028 def handle_starttag(self, tag, attrs): 0029 if self._finished: 0030 return 0031 if not self._in_container: 0032 for a in attrs: 0033 if a[0] == "id" and a[1] == CONTAINER_ID: 0034 self._in_container = True 0035 break 0036 elif not self._in_table and tag == "table": 0037 self._in_table = True 0038 elif self._in_table: 0039 if tag == "tr": 0040 if not self._in_header and not self._in_body: 0041 self._in_header = True 0042 else: 0043 self._in_body = True 0044 self._current_row = [] 0045 elif tag == "td" and self._in_body: 0046 self._in_cell = True 0047 0048 def handle_data(self, data): 0049 if self._finished: 0050 return 0051 if self._in_body: 0052 self._current_row.append(data) 0053 0054 def handle_endtag(self, tag): 0055 if self._finished: 0056 return 0057 if self._in_body: 0058 if tag == "tr": 0059 for ccompletion in self._current_row[2].split(","): 0060 self._current_row[2] = ccompletion.strip() 0061 self.table.append(tuple(self._current_row)) 0062 self._current_row = [] 0063 elif tag == "table": 0064 self._finished = True 0065 0066 parser = JuliaUnicodeCompletionsParser() 0067 with request.urlopen(JULIA_UNICODE_DOCUMENTATION_URL) as page: 0068 parser.feed(page.read().decode(page.headers.get_content_charset())) 0069 parser.close() 0070 0071 parser.table.sort(key=lambda x: x[2]) 0072 0073 completionchars = set() 0074 wordchars = set(list(ascii_letters) + list(digits) + ["_"]) 0075 with open(OUTFNAME, "w", encoding="utf-8") as out: 0076 out.write(f"""\ 0077 #include <QString> 0078 #include <QRegularExpression> 0079 struct Completion {{ 0080 const char16_t *completion; 0081 const char16_t *codepoint; 0082 const char16_t *chars; 0083 const char16_t *name; 0084 const uint16_t completion_strlen; 0085 }}; 0086 0087 static constexpr uint16_t n_completions = {len(parser.table)}; 0088 0089 static constexpr Completion completiontable[] = {{ 0090 """) 0091 0092 for i, completion in enumerate(parser.table): 0093 for letter in completion[2][1:]: 0094 if letter not in wordchars: 0095 completionchars.add(letter) 0096 latexsymlength = (len(completion[2].encode("utf-16")) - 2) // 2 0097 # Python adds the BOM, thus -2. We need number of charachters, not bytes, 0098 # thus //2 0099 latexsym = completion[2].replace("\\", "\\\\") 0100 if i > 0: 0101 out.write(",") 0102 out.write(f"{{\n u\"{latexsym}\",\n" 0103 f" u\"{completion[0]}\",\n" 0104 f" u\"{completion[1]}\",\n" 0105 f" u\"{completion[3]}\",\n" 0106 f" {latexsymlength}\n}}\n") 0107 out.write("""\ 0108 }; 0109 """) 0110 0111 have_dash = False 0112 if "-" in completionchars: 0113 have_dash = True 0114 completionchars.discard("-") 0115 if "]" in completionchars: 0116 completionchars.discard("]") 0117 completionchars.add("\\]") 0118 charclass = "".join(completionchars) 0119 if have_dash: 0120 charclass += "-" 0121 0122 out.write(f'static const QRegularExpression latexexpr(QStringLiteral("\\\\\\\\:?[\\\\w{charclass}]+:?$"), QRegularExpression::DontCaptureOption);\n')