File indexing completed on 2024-05-05 04:01:50

0001 #!/usr/bin/env python3
0002 # SPDX-FileCopyrightText: 2023 Jonathan Poelen <jonathan.poelen@gmail.com>
0003 # SPDX-License-Identifier: MIT
0004 
0005 from argparse import ArgumentParser
0006 from pathlib import Path
0007 from typing import Iterable, Mapping
0008 from textwrap import wrap
0009 from xml.parsers import expat
0010 
0011 import re
0012 
0013 
0014 parser = ArgumentParser(
0015     prog='generate-dot-file.py',
0016     description=f'''Dot file generator for xml syntax
0017 
0018 Example:
0019     generate-dot-file.py data/syntax/lua.xml | dot -T svg -o image.svg && xdg-open image.svg''')
0020 
0021 parser.add_argument('-c', '--context-only', action='store_true',
0022                     help='Generates contexts without rules')
0023 
0024 parser.add_argument('-r', '--resolve-entities', action='store_true',
0025                     help='Evaluate xml entities')
0026 
0027 parser.add_argument('-i', '--include', action='append', default=[],
0028                     help='Include only contexts that respect a pattern')
0029 
0030 parser.add_argument('-e', '--exclude', action='append', default=[],
0031                     help='Exclude contexts that respect a pattern')
0032 
0033 parser.add_argument('syntax', help='XML Syntax Definition Files')
0034 
0035 
0036 args = parser.parse_args()
0037 
0038 excludes = [re.compile(patt) for patt in args.exclude]
0039 includes = [re.compile(patt) for patt in args.include]
0040 context_only = args.context_only
0041 resolve_entities = args.resolve_entities or context_only
0042 
0043 
0044 global_entities = {
0045     '&#9;': '\\t',
0046     '&#37;': '%',
0047     '&#38;': '&',
0048     '&amp;': '&',
0049     '&#39;': "'",
0050     '&aquot;': "'",
0051     '&#34;': '"',
0052     '&quot;': '"',
0053     '&#60;': '<',
0054     '&lt;': '<',
0055     '&#62;': '>',
0056     '&gt;': '>',
0057 }
0058 entities_finder = re.compile('|'.join(global_entities))
0059 
0060 
0061 
0062 Outside = 0
0063 Context = 1
0064 Rule = 2
0065 
0066 class XMLParser:
0067     depth = Outside
0068     matched = False
0069     ictx = 0
0070     ctx_name = ''
0071     ctx_attrs: dict[str, str] = {}
0072     escaped_ctx_name = ''
0073     ctx_color = ''
0074     irule = 0
0075 
0076     resolve_entities = True
0077     reversed_entities: dict[str, str] = {}
0078     resolved_entity_searcher: re.Pattern
0079 
0080     def __init__(self, start_ctx, end_ctx, rule_process):
0081         self.start_ctx = start_ctx
0082         self.end_ctx = end_ctx
0083         self.rule_process = rule_process
0084 
0085     def start_element(self, tag: str, attrs: dict[str, str]):
0086         if self.depth == Context:
0087             self.depth = Rule
0088             if self.matched:
0089                 self.irule += 1
0090                 if not self.resolve_entities:
0091                     string = attrs.get('String')
0092                     if string:
0093                         attrs['String'] = self.unresolve_entities(string)
0094                 self.rule_process(self, self.irule, tag, attrs)
0095         elif tag == 'context':
0096             name = attrs['name']
0097             self.depth = Context
0098             self.matched = (not match_patterns(name, excludes)
0099                             and (not includes or match_patterns(name, includes)))
0100             if self.matched:
0101                 self.irule = 0
0102                 self.ctx_name = name
0103                 self.ctx_attrs = attrs
0104                 self.escaped_ctx_name = escape(name)
0105                 self.ctx_color = compute_color(name)
0106                 self.start_ctx(self)
0107 
0108     def end_element(self, name: str):
0109         if self.depth == Context:
0110             if self.matched:
0111                 self.end_ctx(self)
0112             self.ictx += 1
0113         self.depth -= 1
0114 
0115     def unresolve_entities(self, s: str) -> str:
0116         """
0117         expat module converts all entities. This function tries to do the
0118         opposite by replacing pieces of text with entities.
0119 
0120         The result may differ from the original text, but will be equivalent.
0121         """
0122         b = True
0123         def replace(m):
0124             nonlocal b
0125             b = True
0126             return self.reversed_entities[m[0]]
0127         while b:
0128             b = False
0129             s = self.resolved_entity_searcher.sub(replace, s)
0130         return s
0131 
0132     def entity_decl(self, name, is_parameter_entity, value, base, system_id, public_id, notation_name):
0133         value = entities_finder.sub(lambda m: global_entities[m[0]], value)
0134         self.reversed_entities[value] = f'&{name};'
0135 
0136     def end_doctype(self):
0137         patt = '|'.join(re.escape(value) for value in self.reversed_entities)
0138         self.resolved_entity_searcher = re.compile(patt)
0139 
0140 
0141 color_map = [
0142     '"/rdgy4/3"',
0143     '"/set312/1"',
0144     '"lightgoldenrod1"',
0145     '"/set312/3"',
0146     '"/set312/4"',
0147     '"/set312/5"',
0148     '"/set312/6"',
0149     '"/set312/7"',
0150     '"/rdpu3/2"',
0151     '"/purd6/3"',
0152     '"/ylgn4/2"',
0153     '"/set26/6"',
0154 ]
0155 
0156 picked_colors: dict[int, str] = {}
0157 
0158 def compute_color(name: str) -> str:
0159     """
0160     returns a color which depends on the first 2 characters
0161     """
0162     k = ord(name[0])
0163     if len(name) > 1:
0164         k += ord(name[1]) * 1024
0165     color = color_map[len(picked_colors) % len(color_map)]
0166     return picked_colors.setdefault(k, color)
0167 
0168 
0169 def match_patterns(name: str, patterns: list[re.Pattern]) -> bool:
0170     return any(patt.search(name) for patt in patterns)
0171 
0172 
0173 _pop_counter_re = re.compile('^(?:#pop)+')
0174 
0175 def labelize(name: str) -> str:
0176     m = _pop_counter_re.match(name)
0177     if m:
0178         n = len(m[0]) // 4
0179         if n > 1:
0180             return f'#pop({n}){name[n * 4:]}'
0181     return name
0182 
0183 
0184 def stringify_attrs(attr_names: Iterable[str], attrs: Mapping[str, str]) -> str:
0185     s = ''
0186     for name in attr_names:
0187         attr = attrs.get(name)
0188         if attr:
0189             part = '\n'.join(wrap(attr, 40))
0190             s += f'  {v}:{part}'
0191     return s
0192 
0193 
0194 def escape(s: str) -> str:
0195     return s.replace('\\', '\\\\').replace('"', '\\"')
0196 
0197 
0198 def jumpctx(s: str) -> str:
0199     i = s.find('!')
0200     return '' if i == -1 else s[i+1:]
0201 
0202 
0203 def xml_bool(s: str | None) -> bool:
0204     return s == '1' or s == 'true'
0205 
0206 
0207 def push_context_attr(output: list[str],
0208                       escaped_origin: str, escaped_ctx_name: str, escaped_name_attr: str,
0209                       style: str, color: str) -> None:
0210     if escaped_name_attr == '#stay':
0211         output.append(f'    "{escaped_origin}" -> "{escaped_ctx_name}" [style={style},color={color}];\n')
0212     elif escaped_name_attr.startswith('#'):
0213         ref = f'{escaped_ctx_name}!!{escaped_name_attr}'
0214         output.append(
0215             f'    "{escaped_origin}" -> "{ref}" [style={style},color={color}];\n'
0216             f'    "{ref}" [label="{labelize(escaped_name_attr)}",color={color}];\n'
0217         )
0218 
0219 
0220 def push_last_transition(output: list[str],
0221                          escaped_name: str, escaped_ctx_name: str, escaped_name_attr: str,
0222                          color: str) -> None:
0223     if escaped_name_attr == '#stay':
0224         return
0225 
0226     if escaped_name_attr.startswith('#'):
0227         escaped_last_ctx = jumpctx(escaped_name_attr)
0228         if escaped_last_ctx:
0229             output.append(f'  "{escaped_ctx_name}!!{escaped_name_attr}" -> "{escaped_last_ctx}" [style=dashed,color={color}];\n')
0230     else:
0231         output.append(f'  "{escaped_name}" -> "{escaped_name_attr}" [style=dashed,color={color}];\n')
0232 
0233 
0234 output = [
0235     'digraph G {\n',
0236     '  compound=true;ratio=auto\n'
0237 ]
0238 
0239 if context_only:
0240     # avoid multi arrow for ctx1 -> ctx2
0241     krule_contexts: dict[str, int] = {}
0242     # shares #pop... nodes
0243     kpoped_contexts: dict[tuple[str, str], str] = {}
0244 
0245     def start_ctx(p: XMLParser):
0246         krule_contexts.clear()
0247 
0248     def rule_process(p: XMLParser, irule: int, name: str, attrs: dict[str, str]):
0249         krule_contexts[attrs.get('context') or '#stay'] = irule
0250 
0251     def end_ctx(p: XMLParser):
0252         color = p.ctx_color
0253         ctx_name = p.escaped_ctx_name
0254         output.append(f'  "{ctx_name}" [style=filled,color={color}]\n')
0255 
0256         krule_contexts.setdefault(p.ctx_attrs.get('fallthroughContext') or '#stay', -1)
0257         krule_contexts.setdefault(p.ctx_attrs.get('lineEndContext') or '#stay', -2)
0258         krule_contexts.setdefault(p.ctx_attrs.get('lineEmptyContext') or '#stay', -3)
0259 
0260         krule_contexts.pop('#stay')
0261 
0262         for rule_context, i in sorted(krule_contexts.items(), key=lambda t: t[1]):
0263             if i >= 0:
0264                 style = f'color={color}'
0265             elif i == -1:
0266                 style = f'style=dashed,color={color}'
0267             elif i == -2:
0268                 style = 'style=dotted,color=blue'
0269             else:  # if i == -3:
0270                 style = 'style=dotted,color=purple'
0271 
0272             escaped_rule_context = escape(rule_context)
0273             labelized_context = labelize(escaped_rule_context)
0274             if rule_context.startswith('#'):
0275                 next_context = jumpctx(escaped_rule_context)
0276                 if next_context:
0277                     k = (labelized_context, next_context)
0278                     poped_context = kpoped_contexts.get(k)
0279                     if poped_context:
0280                         output.append(f'  "{ctx_name}" -> "{poped_context}" [{style}];\n')
0281                     else:
0282                         poped_context = f'{ctx_name}!!{i}'
0283                         kpoped_contexts[k] = poped_context
0284                         output.append(f'  "{ctx_name}" -> "{poped_context}" [{style}];\n'
0285                                       f'  "{poped_context}" [label="{labelized_context}"];\n'
0286                                       f'  "{poped_context}" -> "{next_context}"\n')
0287                 else:
0288                     poped_context = f'{ctx_name}!!{i}'
0289                     output.append(f'  "{ctx_name}" -> "{poped_context}" [{style}];\n'
0290                                   f'  "{poped_context}" [label="{labelized_context}"];\n')
0291             else:
0292                 output.append(f'  "{ctx_name}" -> "{labelized_context}" [{style}]\n')
0293 
0294 else:
0295     first_line_attributes = ('attribute', 'String', 'char')  # char1 is tranformed into String
0296     second_line_attributes = ('beginRegion', 'endRegion', 'lookAhead', 'firstNonSpace', 'column', 'additionalDeliminator', 'weakDeliminator')
0297 
0298     kdot: dict[str, tuple[str, int]] = {}
0299     escaped_name = ''
0300 
0301     def start_ctx(p: XMLParser):
0302         global escaped_name
0303 
0304         escaped_name = p.escaped_ctx_name
0305 
0306         kdot.clear()
0307         output.append(
0308             f'  subgraph cluster{p.ictx} {{\n'
0309             f'    "{escaped_name}" [shape=box,style=filled,color={p.ctx_color}];\n'
0310         )
0311 
0312     def rule_process(p: XMLParser, irule: int, name: str, attrs: dict[str, str]):
0313         global escaped_name
0314 
0315         color = p.ctx_color
0316         escaped_ctx_name = p.escaped_ctx_name
0317 
0318         next_name = f'{p.ctx_name}!!{irule}!!{name}'
0319         escaped_next_name = escape(next_name)
0320         rule_context = attrs.get('context', '#stay')
0321         output.append(f'    "{escaped_name}" -> "{escaped_next_name}" [style=dashed,color={color}];\n')
0322 
0323         escaped_name = escaped_next_name
0324 
0325         if name == 'IncludeRules':
0326             label = f'  {rule_context}'
0327         else:
0328             if 'attribute' not in attrs:
0329                 attrs['attribute'] = p.ctx_attrs['attribute']
0330             if 'char1' in attrs:
0331                 attrs['String'] = attrs.pop('char') + attrs.pop('char1')
0332             label = stringify_attrs(first_line_attributes, attrs)
0333             label2 = stringify_attrs(second_line_attributes, attrs)
0334             if label2:
0335                 label = f'{label}\n{label2}'
0336         output.append(f'    "{escaped_name}" [label="{name}{escape(label)}"];\n')
0337 
0338         if xml_bool(attrs.get('lookAhead')):
0339             output.append(f'    "{escaped_name}" [style=dashed];\n')
0340 
0341         if rule_context == '#stay':
0342             output.append(f'    "{escaped_name}" -> "{escaped_ctx_name}" [color=dodgerblue3];\n')
0343         elif rule_context:
0344             escaped_rule_context = escape(rule_context)
0345             if rule_context.startswith('#'):
0346                 escaped_bind_ctx_name = jumpctx(escaped_rule_context)
0347                 ref = f'{escaped_ctx_name}!!{escaped_rule_context}'
0348                 output.append(
0349                     f'    "{escaped_name}" -> "{ref}" [color={color}];\n'
0350                     f'    "{ref}" [label="{labelize(escaped_rule_context)}"];\n'
0351                 )
0352                 if escaped_bind_ctx_name:
0353                     kdot[f'{ref}!!{escaped_bind_ctx_name}'] = (
0354                         f'  "{ref}" -> "{escaped_bind_ctx_name}" [color={color}];\n'
0355                         f'  "{ref}" [color=red];\n',
0356                         irule,
0357                     )
0358             else:
0359                 kdot[f'{irule}'] = (
0360                     f'  "{escaped_name}" -> "{escaped_rule_context}" [color={color}];\n',
0361                     irule,
0362                 )
0363 
0364     def end_ctx(p: XMLParser):
0365         color = p.ctx_color
0366         escaped_ctx_name = p.escaped_ctx_name
0367 
0368         fallthrough_ctx = p.ctx_attrs.get('fallthroughContext', '#stay')
0369         escaped_fallthrough_ctx = escape(fallthrough_ctx)
0370         push_context_attr(output, escaped_name, escaped_ctx_name,
0371                           escaped_fallthrough_ctx, 'dashed', color)
0372 
0373         end_ctx = p.ctx_attrs.get('lineEndContext', '#stay')
0374         escaped_end_ctx = escape(end_ctx)
0375         push_context_attr(output, escaped_ctx_name, escaped_ctx_name,
0376                           escaped_end_ctx, 'dotted', 'blue')
0377 
0378         empty_ctx = p.ctx_attrs.get('lineEmptyContext', '#stay')
0379         escaped_empty_ctx = escape(empty_ctx)
0380         push_context_attr(output, escaped_ctx_name, escaped_ctx_name,
0381                           escaped_empty_ctx, 'dotted', 'purple')
0382 
0383         output.append('  }\n')
0384 
0385         push_last_transition(output, escaped_name, escaped_ctx_name,
0386                              escaped_fallthrough_ctx, color)
0387 
0388         push_last_transition(output, escaped_name, escaped_ctx_name,
0389                              escaped_end_ctx, color)
0390 
0391         push_last_transition(output, escaped_name, escaped_ctx_name,
0392                              escaped_empty_ctx, color)
0393 
0394         output.extend(expr for expr, _ in sorted(kdot.values(), key=lambda t: t[1]))
0395 
0396 
0397 xml_parser = XMLParser(start_ctx, end_ctx, rule_process)
0398 p = expat.ParserCreate()
0399 p.StartElementHandler = xml_parser.start_element
0400 p.EndElementHandler = xml_parser.end_element
0401 if not resolve_entities:
0402     xml_parser.resolve_entities = False
0403     p.EntityDeclHandler = xml_parser.entity_decl
0404     p.EndDoctypeDeclHandler = xml_parser.end_doctype
0405 
0406 # # remove BOM
0407 # if content.startswith('\xef\xbb\xbf'):
0408 #     content = content[3:]
0409 p.Parse(Path(args.syntax).read_text())
0410 
0411 output.append('}\n')
0412 
0413 print(''.join(output))