File indexing completed on 2024-05-05 04:01:50
0001 #!/usr/bin/env python3 0002 # SPDX-FileCopyrightText: 2023 Jonathan Poelen <jonathan.poelen@gmail.com> 0003 # SPDX-License-Identifier: MIT 0004 0005 from argparse import ArgumentParser 0006 from pathlib import Path 0007 from typing import Iterable, Mapping 0008 from textwrap import wrap 0009 from xml.parsers import expat 0010 0011 import re 0012 0013 0014 parser = ArgumentParser( 0015 prog='generate-dot-file.py', 0016 description=f'''Dot file generator for xml syntax 0017 0018 Example: 0019 generate-dot-file.py data/syntax/lua.xml | dot -T svg -o image.svg && xdg-open image.svg''') 0020 0021 parser.add_argument('-c', '--context-only', action='store_true', 0022 help='Generates contexts without rules') 0023 0024 parser.add_argument('-r', '--resolve-entities', action='store_true', 0025 help='Evaluate xml entities') 0026 0027 parser.add_argument('-i', '--include', action='append', default=[], 0028 help='Include only contexts that respect a pattern') 0029 0030 parser.add_argument('-e', '--exclude', action='append', default=[], 0031 help='Exclude contexts that respect a pattern') 0032 0033 parser.add_argument('syntax', help='XML Syntax Definition Files') 0034 0035 0036 args = parser.parse_args() 0037 0038 excludes = [re.compile(patt) for patt in args.exclude] 0039 includes = [re.compile(patt) for patt in args.include] 0040 context_only = args.context_only 0041 resolve_entities = args.resolve_entities or context_only 0042 0043 0044 global_entities = { 0045 '	': '\\t', 0046 '%': '%', 0047 '&': '&', 0048 '&': '&', 0049 ''': "'", 0050 '&aquot;': "'", 0051 '"': '"', 0052 '"': '"', 0053 '<': '<', 0054 '<': '<', 0055 '>': '>', 0056 '>': '>', 0057 } 0058 entities_finder = re.compile('|'.join(global_entities)) 0059 0060 0061 0062 Outside = 0 0063 Context = 1 0064 Rule = 2 0065 0066 class XMLParser: 0067 depth = Outside 0068 matched = False 0069 ictx = 0 0070 ctx_name = '' 0071 ctx_attrs: dict[str, str] = {} 0072 escaped_ctx_name = '' 0073 ctx_color = '' 0074 irule = 0 0075 0076 resolve_entities = True 0077 reversed_entities: dict[str, str] = {} 0078 resolved_entity_searcher: re.Pattern 0079 0080 def __init__(self, start_ctx, end_ctx, rule_process): 0081 self.start_ctx = start_ctx 0082 self.end_ctx = end_ctx 0083 self.rule_process = rule_process 0084 0085 def start_element(self, tag: str, attrs: dict[str, str]): 0086 if self.depth == Context: 0087 self.depth = Rule 0088 if self.matched: 0089 self.irule += 1 0090 if not self.resolve_entities: 0091 string = attrs.get('String') 0092 if string: 0093 attrs['String'] = self.unresolve_entities(string) 0094 self.rule_process(self, self.irule, tag, attrs) 0095 elif tag == 'context': 0096 name = attrs['name'] 0097 self.depth = Context 0098 self.matched = (not match_patterns(name, excludes) 0099 and (not includes or match_patterns(name, includes))) 0100 if self.matched: 0101 self.irule = 0 0102 self.ctx_name = name 0103 self.ctx_attrs = attrs 0104 self.escaped_ctx_name = escape(name) 0105 self.ctx_color = compute_color(name) 0106 self.start_ctx(self) 0107 0108 def end_element(self, name: str): 0109 if self.depth == Context: 0110 if self.matched: 0111 self.end_ctx(self) 0112 self.ictx += 1 0113 self.depth -= 1 0114 0115 def unresolve_entities(self, s: str) -> str: 0116 """ 0117 expat module converts all entities. This function tries to do the 0118 opposite by replacing pieces of text with entities. 0119 0120 The result may differ from the original text, but will be equivalent. 0121 """ 0122 b = True 0123 def replace(m): 0124 nonlocal b 0125 b = True 0126 return self.reversed_entities[m[0]] 0127 while b: 0128 b = False 0129 s = self.resolved_entity_searcher.sub(replace, s) 0130 return s 0131 0132 def entity_decl(self, name, is_parameter_entity, value, base, system_id, public_id, notation_name): 0133 value = entities_finder.sub(lambda m: global_entities[m[0]], value) 0134 self.reversed_entities[value] = f'&{name};' 0135 0136 def end_doctype(self): 0137 patt = '|'.join(re.escape(value) for value in self.reversed_entities) 0138 self.resolved_entity_searcher = re.compile(patt) 0139 0140 0141 color_map = [ 0142 '"/rdgy4/3"', 0143 '"/set312/1"', 0144 '"lightgoldenrod1"', 0145 '"/set312/3"', 0146 '"/set312/4"', 0147 '"/set312/5"', 0148 '"/set312/6"', 0149 '"/set312/7"', 0150 '"/rdpu3/2"', 0151 '"/purd6/3"', 0152 '"/ylgn4/2"', 0153 '"/set26/6"', 0154 ] 0155 0156 picked_colors: dict[int, str] = {} 0157 0158 def compute_color(name: str) -> str: 0159 """ 0160 returns a color which depends on the first 2 characters 0161 """ 0162 k = ord(name[0]) 0163 if len(name) > 1: 0164 k += ord(name[1]) * 1024 0165 color = color_map[len(picked_colors) % len(color_map)] 0166 return picked_colors.setdefault(k, color) 0167 0168 0169 def match_patterns(name: str, patterns: list[re.Pattern]) -> bool: 0170 return any(patt.search(name) for patt in patterns) 0171 0172 0173 _pop_counter_re = re.compile('^(?:#pop)+') 0174 0175 def labelize(name: str) -> str: 0176 m = _pop_counter_re.match(name) 0177 if m: 0178 n = len(m[0]) // 4 0179 if n > 1: 0180 return f'#pop({n}){name[n * 4:]}' 0181 return name 0182 0183 0184 def stringify_attrs(attr_names: Iterable[str], attrs: Mapping[str, str]) -> str: 0185 s = '' 0186 for name in attr_names: 0187 attr = attrs.get(name) 0188 if attr: 0189 part = '\n'.join(wrap(attr, 40)) 0190 s += f' {v}:{part}' 0191 return s 0192 0193 0194 def escape(s: str) -> str: 0195 return s.replace('\\', '\\\\').replace('"', '\\"') 0196 0197 0198 def jumpctx(s: str) -> str: 0199 i = s.find('!') 0200 return '' if i == -1 else s[i+1:] 0201 0202 0203 def xml_bool(s: str | None) -> bool: 0204 return s == '1' or s == 'true' 0205 0206 0207 def push_context_attr(output: list[str], 0208 escaped_origin: str, escaped_ctx_name: str, escaped_name_attr: str, 0209 style: str, color: str) -> None: 0210 if escaped_name_attr == '#stay': 0211 output.append(f' "{escaped_origin}" -> "{escaped_ctx_name}" [style={style},color={color}];\n') 0212 elif escaped_name_attr.startswith('#'): 0213 ref = f'{escaped_ctx_name}!!{escaped_name_attr}' 0214 output.append( 0215 f' "{escaped_origin}" -> "{ref}" [style={style},color={color}];\n' 0216 f' "{ref}" [label="{labelize(escaped_name_attr)}",color={color}];\n' 0217 ) 0218 0219 0220 def push_last_transition(output: list[str], 0221 escaped_name: str, escaped_ctx_name: str, escaped_name_attr: str, 0222 color: str) -> None: 0223 if escaped_name_attr == '#stay': 0224 return 0225 0226 if escaped_name_attr.startswith('#'): 0227 escaped_last_ctx = jumpctx(escaped_name_attr) 0228 if escaped_last_ctx: 0229 output.append(f' "{escaped_ctx_name}!!{escaped_name_attr}" -> "{escaped_last_ctx}" [style=dashed,color={color}];\n') 0230 else: 0231 output.append(f' "{escaped_name}" -> "{escaped_name_attr}" [style=dashed,color={color}];\n') 0232 0233 0234 output = [ 0235 'digraph G {\n', 0236 ' compound=true;ratio=auto\n' 0237 ] 0238 0239 if context_only: 0240 # avoid multi arrow for ctx1 -> ctx2 0241 krule_contexts: dict[str, int] = {} 0242 # shares #pop... nodes 0243 kpoped_contexts: dict[tuple[str, str], str] = {} 0244 0245 def start_ctx(p: XMLParser): 0246 krule_contexts.clear() 0247 0248 def rule_process(p: XMLParser, irule: int, name: str, attrs: dict[str, str]): 0249 krule_contexts[attrs.get('context') or '#stay'] = irule 0250 0251 def end_ctx(p: XMLParser): 0252 color = p.ctx_color 0253 ctx_name = p.escaped_ctx_name 0254 output.append(f' "{ctx_name}" [style=filled,color={color}]\n') 0255 0256 krule_contexts.setdefault(p.ctx_attrs.get('fallthroughContext') or '#stay', -1) 0257 krule_contexts.setdefault(p.ctx_attrs.get('lineEndContext') or '#stay', -2) 0258 krule_contexts.setdefault(p.ctx_attrs.get('lineEmptyContext') or '#stay', -3) 0259 0260 krule_contexts.pop('#stay') 0261 0262 for rule_context, i in sorted(krule_contexts.items(), key=lambda t: t[1]): 0263 if i >= 0: 0264 style = f'color={color}' 0265 elif i == -1: 0266 style = f'style=dashed,color={color}' 0267 elif i == -2: 0268 style = 'style=dotted,color=blue' 0269 else: # if i == -3: 0270 style = 'style=dotted,color=purple' 0271 0272 escaped_rule_context = escape(rule_context) 0273 labelized_context = labelize(escaped_rule_context) 0274 if rule_context.startswith('#'): 0275 next_context = jumpctx(escaped_rule_context) 0276 if next_context: 0277 k = (labelized_context, next_context) 0278 poped_context = kpoped_contexts.get(k) 0279 if poped_context: 0280 output.append(f' "{ctx_name}" -> "{poped_context}" [{style}];\n') 0281 else: 0282 poped_context = f'{ctx_name}!!{i}' 0283 kpoped_contexts[k] = poped_context 0284 output.append(f' "{ctx_name}" -> "{poped_context}" [{style}];\n' 0285 f' "{poped_context}" [label="{labelized_context}"];\n' 0286 f' "{poped_context}" -> "{next_context}"\n') 0287 else: 0288 poped_context = f'{ctx_name}!!{i}' 0289 output.append(f' "{ctx_name}" -> "{poped_context}" [{style}];\n' 0290 f' "{poped_context}" [label="{labelized_context}"];\n') 0291 else: 0292 output.append(f' "{ctx_name}" -> "{labelized_context}" [{style}]\n') 0293 0294 else: 0295 first_line_attributes = ('attribute', 'String', 'char') # char1 is tranformed into String 0296 second_line_attributes = ('beginRegion', 'endRegion', 'lookAhead', 'firstNonSpace', 'column', 'additionalDeliminator', 'weakDeliminator') 0297 0298 kdot: dict[str, tuple[str, int]] = {} 0299 escaped_name = '' 0300 0301 def start_ctx(p: XMLParser): 0302 global escaped_name 0303 0304 escaped_name = p.escaped_ctx_name 0305 0306 kdot.clear() 0307 output.append( 0308 f' subgraph cluster{p.ictx} {{\n' 0309 f' "{escaped_name}" [shape=box,style=filled,color={p.ctx_color}];\n' 0310 ) 0311 0312 def rule_process(p: XMLParser, irule: int, name: str, attrs: dict[str, str]): 0313 global escaped_name 0314 0315 color = p.ctx_color 0316 escaped_ctx_name = p.escaped_ctx_name 0317 0318 next_name = f'{p.ctx_name}!!{irule}!!{name}' 0319 escaped_next_name = escape(next_name) 0320 rule_context = attrs.get('context', '#stay') 0321 output.append(f' "{escaped_name}" -> "{escaped_next_name}" [style=dashed,color={color}];\n') 0322 0323 escaped_name = escaped_next_name 0324 0325 if name == 'IncludeRules': 0326 label = f' {rule_context}' 0327 else: 0328 if 'attribute' not in attrs: 0329 attrs['attribute'] = p.ctx_attrs['attribute'] 0330 if 'char1' in attrs: 0331 attrs['String'] = attrs.pop('char') + attrs.pop('char1') 0332 label = stringify_attrs(first_line_attributes, attrs) 0333 label2 = stringify_attrs(second_line_attributes, attrs) 0334 if label2: 0335 label = f'{label}\n{label2}' 0336 output.append(f' "{escaped_name}" [label="{name}{escape(label)}"];\n') 0337 0338 if xml_bool(attrs.get('lookAhead')): 0339 output.append(f' "{escaped_name}" [style=dashed];\n') 0340 0341 if rule_context == '#stay': 0342 output.append(f' "{escaped_name}" -> "{escaped_ctx_name}" [color=dodgerblue3];\n') 0343 elif rule_context: 0344 escaped_rule_context = escape(rule_context) 0345 if rule_context.startswith('#'): 0346 escaped_bind_ctx_name = jumpctx(escaped_rule_context) 0347 ref = f'{escaped_ctx_name}!!{escaped_rule_context}' 0348 output.append( 0349 f' "{escaped_name}" -> "{ref}" [color={color}];\n' 0350 f' "{ref}" [label="{labelize(escaped_rule_context)}"];\n' 0351 ) 0352 if escaped_bind_ctx_name: 0353 kdot[f'{ref}!!{escaped_bind_ctx_name}'] = ( 0354 f' "{ref}" -> "{escaped_bind_ctx_name}" [color={color}];\n' 0355 f' "{ref}" [color=red];\n', 0356 irule, 0357 ) 0358 else: 0359 kdot[f'{irule}'] = ( 0360 f' "{escaped_name}" -> "{escaped_rule_context}" [color={color}];\n', 0361 irule, 0362 ) 0363 0364 def end_ctx(p: XMLParser): 0365 color = p.ctx_color 0366 escaped_ctx_name = p.escaped_ctx_name 0367 0368 fallthrough_ctx = p.ctx_attrs.get('fallthroughContext', '#stay') 0369 escaped_fallthrough_ctx = escape(fallthrough_ctx) 0370 push_context_attr(output, escaped_name, escaped_ctx_name, 0371 escaped_fallthrough_ctx, 'dashed', color) 0372 0373 end_ctx = p.ctx_attrs.get('lineEndContext', '#stay') 0374 escaped_end_ctx = escape(end_ctx) 0375 push_context_attr(output, escaped_ctx_name, escaped_ctx_name, 0376 escaped_end_ctx, 'dotted', 'blue') 0377 0378 empty_ctx = p.ctx_attrs.get('lineEmptyContext', '#stay') 0379 escaped_empty_ctx = escape(empty_ctx) 0380 push_context_attr(output, escaped_ctx_name, escaped_ctx_name, 0381 escaped_empty_ctx, 'dotted', 'purple') 0382 0383 output.append(' }\n') 0384 0385 push_last_transition(output, escaped_name, escaped_ctx_name, 0386 escaped_fallthrough_ctx, color) 0387 0388 push_last_transition(output, escaped_name, escaped_ctx_name, 0389 escaped_end_ctx, color) 0390 0391 push_last_transition(output, escaped_name, escaped_ctx_name, 0392 escaped_empty_ctx, color) 0393 0394 output.extend(expr for expr, _ in sorted(kdot.values(), key=lambda t: t[1])) 0395 0396 0397 xml_parser = XMLParser(start_ctx, end_ctx, rule_process) 0398 p = expat.ParserCreate() 0399 p.StartElementHandler = xml_parser.start_element 0400 p.EndElementHandler = xml_parser.end_element 0401 if not resolve_entities: 0402 xml_parser.resolve_entities = False 0403 p.EntityDeclHandler = xml_parser.entity_decl 0404 p.EndDoctypeDeclHandler = xml_parser.end_doctype 0405 0406 # # remove BOM 0407 # if content.startswith('\xef\xbb\xbf'): 0408 # content = content[3:] 0409 p.Parse(Path(args.syntax).read_text()) 0410 0411 output.append('}\n') 0412 0413 print(''.join(output))