File indexing completed on 2024-05-19 04:03:42

0001 #!/usr/bin/env python3
0002 # -*- coding: utf-8 -*-
0003 #
0004 # Generate Kate syntax file for CMake
0005 #
0006 # SPDX-FileCopyrightText: 2017-2023 Alex Turbov <i.zaufi@gmail.com>
0007 #
0008 # To install prerequisites:
0009 #
0010 #   $ pip install --user click jinja2 lxml pyyaml
0011 #
0012 # To use:
0013 #
0014 #   $ ./generate-cmake-syntax.py cmake.yaml > ../syntax/cmake.xml
0015 #
0016 
0017 from __future__ import annotations
0018 
0019 import functools
0020 import re
0021 from dataclasses import dataclass, field
0022 
0023 import click
0024 import jinja2
0025 import yaml
0026 import sys
0027 from lxml import etree
0028 
0029 
0030 _TEMPLATED_NAME = re.compile(r'(?:<[^>]+>)')
0031 _PROPERTY_KEYS = [
0032     'global-properties'
0033   , 'directory-properties'
0034   , 'target-properties'
0035   , 'source-properties'
0036   , 'test-properties'
0037   , 'cache-properties'
0038   , 'install-properties'
0039   ]
0040 _KW_RE_LIST = ['kw', 're']
0041 _VAR_KIND_LIST = ['variables', 'deprecated-or-internal-variables', 'environment-variables']
0042 _CONTROL_FLOW_LIST = {
0043     'break'
0044   , 'continue'
0045   , 'elseif'
0046   , 'else'
0047   , 'endforeach'
0048   , 'endif'
0049   , 'endwhile'
0050   , 'foreach'
0051   , 'if'
0052   , 'return'
0053   , 'while'
0054   }
0055 _VAR_REF_ENTITY = '&var_ref_re;'
0056 
0057 _HEURISTICS = [
0058     (
0059         {'MAX(_(COUNT|MAJOR|MINOR|PATCH|TWEAK))?', 'MIN(_(COUNT|MAJOR|MINOR|PATCH|TWEAK))?'}
0060       , 'M(AX|IN)(_(COUNT|MAJOR|MINOR|PATCH|TWEAK))?'
0061     )
0062   , ({'OUTPUTS', 'OUTPUT_(HEADER|SOURCE)'}, 'OUTPUT(S|_(HEADER|SOURCE))')
0063   , ({'PREFIX', 'SUFFIX'}, '(PRE|SUF)FIX')
0064   , ({'CPPCHECK', 'CPPLINT'}, 'CPP(CHECK|LINT)')
0065   , ({'DEPENDS', 'PREDEPENDS'}, '(PRE)?DEPENDS')
0066   , ({'ICON', 'ICONURL'}, 'ICON(URL)?')
0067   , (
0068         {
0069             '&var%ref%re;(_INIT)?'
0070           , 'DEBUG(_INIT)?'
0071           , 'MINSIZEREL(_INIT)?'
0072           , 'RELEASE(_INIT)?'
0073           , 'RELWITHDEBINFO(_INIT)?'
0074           }
0075       , '(DEBUG|MINSIZEREL|REL(EASE|WITHDEBINFO)|&var%ref%re;)(_INIT)?'
0076     )
0077   , ({'RELEASE', 'RELWITHDEBINFO'}, 'REL(EASE|WITHDEBINFO)')
0078   , ({'POST', 'POSTUN', 'PRE', 'PREUN'}, 'P(RE|OST)(UN)?')
0079   , ({'AUTOPROV', 'AUTOREQ', 'AUTOREQPROV'}, 'AUTO(PROV|REQ(PROV)?)')
0080   , ({'DEFINITIONS', 'OPTIONS'}, '(DEFINI|OP)TIONS')
0081   , ({'LIB_NAMES', 'LIBRARY'}, 'LIB(_NAMES|RARY)')
0082   , ({'EXTENSIONS', 'EXTRA_FLAGS'}, 'EXT(ENSIONS|RA_FLAGS)')
0083   , ({'DISABLED', 'DISPLAY_NAME'}, 'DIS(ABLED|PLAY_NAME)')
0084   , ({'LIBRARIES', 'LINK_LIBRARIES', 'STATIC_LINK_LIBRARIES'}, '((STATIC_)?LINK_)?LIBRARIES')
0085   , ({'INCLUDE_DIRS', 'LIBRARY_DIRS'}, '(INCLUDE|LIBRARY)_DIRS')
0086   , ({'BINARY_DIR', 'SOURCE_DIR'}, '(BINARY|SOURCE)_DIR')
0087   , ({'CFLAGS(_OTHER)?', 'LDFLAGS(_OTHER)?'}, '(C|LD)FLAGS(_OTHER)?')
0088   , ({'INCLUDE_DIRECTORIES', 'LIBRARIES'}, '(INCLUDE_DIRECTO|LIBRA)RIES')
0089   , ({'POSTFLIGHT_&var%ref%re;_SCRIPT', 'PREFLIGHT_&var%ref%re;_SCRIPT'}, 'P(RE|OST)FLIGHT_&var%ref%re;_SCRIPT')
0090   , ({'DIRECTORIES', 'FRAMEWORK_DIRECTORIES'}, '(FRAMEWORK_)?DIRECTORIES')
0091   , ({'FILE_FLAG', 'FILE'}, 'FILE(_FLAG)?')
0092   , ({'DIR_PERMISSIONS', 'FILE_PERMISSIONS'}, '(DIR|FILE)_PERMISSIONS')
0093   , ({'COMPILER_LAUNCHER', 'LINKER_LAUNCHER'}, '(COMPIL|LINK)ER_LAUNCHER')
0094   , ({'COMPILER', 'COMPILE_(DEFINI|OP)TIONS'}, 'COMPILE(R|_(DEFINI|OP)TIONS)')
0095   , ({'LICENSEURL', 'LICENSE_(EXPRESSION|FILE_NAME)'}, 'LICENSE(URL|_(EXPRESSION|FILE_NAME))')
0096   , ({'NO_SONAME', 'SONAME'}, '(NO_)?SONAME')
0097   , ({'CODE_SIGN_ON_COPY', 'REMOVE_HEADERS_ON_COPY'}, '(CODE_SIGN|REMOVE_HEADERS)_ON_COPY')
0098   , ({'(REFERENCE|REFERENCEPROP_&var%ref%re;_TAG)_&var%ref%re;'}, 'REFERENCE(PROP_&var%ref%re;_TAG)?_&var%ref%re;')
0099   , ({'DISABLE_FIND_PACKAGE', 'REQUIRE_FIND_PACKAGE'}, '(DISABLE|REQUIRE)_FIND_PACKAGE')
0100   , (
0101         {'GROUP_USING_&var%ref%re;(_SUPPORTED)?', 'LIBRARY_USING_&var%ref%re;(_SUPPORTED)?'}
0102       , '(GROUP|LIBRARY)_USING_&var%ref%re;(_SUPPORTED)?'
0103     )
0104   , (
0105         {
0106             'EXE_LINKER_FLAGS_&var%ref%re;(_INIT)?'
0107           , 'MODULE_LINKER_FLAGS_&var%ref%re;(_INIT)?'
0108           , 'SHARED_LINKER_FLAGS_&var%ref%re;(_INIT)?'
0109           , 'STATIC_LINKER_FLAGS_&var%ref%re;(_INIT)?'
0110         }
0111       , '(EXE|MODULE|SHARED|STATIC)_LINKER_FLAGS_&var%ref%re;(_INIT)?'
0112     )
0113   , (
0114         {
0115             'ARCHIVE_OUTPUT_DIRECTORY'
0116           , 'COMPILE_PDB_OUTPUT_DIRECTORY'
0117           , 'LIBRARY_OUTPUT_DIRECTORY'
0118           , 'PDB_OUTPUT_DIRECTORY'
0119           , 'RUNTIME_OUTPUT_DIRECTORY'
0120         }
0121       , '(ARCHIVE|(COMPILE_)?PDB|LIBRARY|RUNTIME)_OUTPUT_DIRECTORY'
0122     )
0123   , (
0124         {
0125             'ARCHIVE_OUTPUT_(DIRECTORY|NAME)'
0126           , 'LIBRARY_OUTPUT_(DIRECTORY|NAME)'
0127           , 'RUNTIME_OUTPUT_(DIRECTORY|NAME)'
0128         }
0129       , '(ARCHIVE|LIBRARY|RUNTIME)_OUTPUT_(DIRECTORY|NAME)'
0130     )
0131   , ({'ASM&var_ref_re;', 'ASM&var_ref_re;FLAGS'}, 'ASM&var_ref_re;(FLAGS)?')
0132   , (
0133         {
0134             'CMAKE_POLICY_DEFAULT_CMP[0-9]{4}'
0135           , 'CMAKE_POLICY_WARNING_CMP[0-9]{4}'
0136           }
0137       , 'CMAKE_POLICY_(DEFAULT|WARNING)_CMP[0-9]{4}'
0138       )
0139   , ({'CMAKE_ARGV[0-9]+', 'CMAKE_MATCH_[0-9]+'}, 'CMAKE_(ARGV|MATCH_)[0-9]+')
0140  ]
0141 
0142 @dataclass
0143 class RePartNode:
0144     children: dict[str, RePartNode] = field(default_factory=dict, hash=False)
0145     is_leaf: bool = False
0146 
0147 
0148 @dataclass
0149 class RegexCollection:
0150     special_cases: list[str] = field(default_factory=list, hash=False)
0151     re_tree: dict[str, RePartNode] = field(default_factory=dict, hash=False)
0152 
0153     def add_case(self, regex: str) -> RegexCollection:
0154         self.special_cases.append(regex)
0155         return self
0156 
0157     def update_tree(self, name_parts: list[str]) -> RegexCollection:
0158         safe_var_ref = _VAR_REF_ENTITY.replace('_', '%')
0159         current = functools.reduce(
0160             lambda current, part: (
0161                 self.re_tree if current is None else current.children
0162               ).setdefault(part, RePartNode())
0163           , safe_var_ref.join(name_parts).replace(f'{safe_var_ref}_{safe_var_ref}', safe_var_ref).split('_')
0164           , None
0165           )
0166         current.is_leaf = True
0167         return self
0168 
0169 
0170 def try_transform_placeholder_string_to_regex(state: RegexCollection, name: str):
0171     '''
0172         NOTE Some placeholders are not IDs, but numbers...
0173             `CMAKE_MATCH_<N>` 4 example
0174     '''
0175     name_parts = _TEMPLATED_NAME.split(name)
0176     match name_parts:
0177         case ['CMAKE_MATCH_' as head, ''] | ['CMAKE_ARGV' as head, ''] | ['ARGV' as head, '']:
0178             return state.add_case(head + '[0-9]+')
0179 
0180         case ['CMAKE_POLICY_DEFAULT_CMP' as head, ''] | ['CMAKE_POLICY_WARNING_CMP' as head, '']:
0181             return state.add_case(head + '[0-9]{4}')
0182 
0183         case ['', '__TRYRUN_OUTPUT']:
0184             return state.add_case(f'{_VAR_REF_ENTITY}__TRYRUN_OUTPUT')
0185 
0186         case (['ASM', ''] | ['ASM', 'FLAGS']) as asm_env:
0187             return state.add_case(f'{asm_env[0]}{_VAR_REF_ENTITY}{asm_env[1]}')
0188 
0189     return state.update_tree(name_parts)
0190 
0191 
0192 def is_first_subset_of_second(first, second):
0193     subset = set(first)
0194     fullset = set(second)
0195     return subset.issubset(fullset)
0196 
0197 
0198 def try_optimize_known_alt_groups(groups: list[str]) -> list[str]:
0199     for case in _HEURISTICS:
0200         if is_first_subset_of_second(case[0], groups):
0201             groups = sorted([*filter(lambda item: item not in case[0], groups), case[1]])
0202     return groups
0203 
0204 
0205 def try_optimize_trailing_var_ref_regex(groups: list[str]) -> list[str]:
0206     tail_var_ref_re = '_' + _VAR_REF_ENTITY.replace('_', '%')
0207     candidates = [*filter(lambda s: s.endswith(tail_var_ref_re), groups)]
0208     return sorted([
0209         *filter(lambda item: item not in candidates, groups)
0210       , f'({"|".join(try_optimize_known_alt_groups([s[:-len(tail_var_ref_re)] for s in candidates]))}){tail_var_ref_re}'
0211       ]) if len(candidates) > 1 else groups
0212 
0213 
0214 def build_regex(state: list[str], kv: tuple[str, RePartNode]) -> list[str]:
0215     name, value = kv
0216     match (value, len(value.children)):
0217         case (RePartNode(children={}, is_leaf=True), 0):
0218             return [*state, name]
0219 
0220         case (node, sz) if sz > 0:
0221             alt_group = try_optimize_known_alt_groups(
0222                 try_optimize_trailing_var_ref_regex(
0223                     functools.reduce(build_regex, node.children.items(), [])
0224                   )
0225               )
0226 
0227             match (len(alt_group), node.is_leaf):
0228                 case (1, False):
0229                     return [*state, f'{name}_{alt_group[0]}']
0230 
0231                 case (1, True):
0232                     return [*state, f'{name}(_{alt_group[0]})?']
0233 
0234                 case (sz, False) if sz > 0:
0235                     return [*state, f'{name}_({"|".join(alt_group)})']
0236 
0237                 case (sz, True) if sz > 0:
0238                     return [*state, f'{name}(_({"|".join(alt_group)}))?']
0239 
0240                 case _:
0241                     raise AssertionError('Zero children?')
0242 
0243         case _:
0244             raise AssertionError(f'NOT MATCHED: {name=}→{value=}')
0245 
0246     return state
0247 
0248 
0249 def try_placeholders_to_regex(names):
0250     if not names:
0251         return None
0252 
0253     data = functools.reduce(
0254         try_transform_placeholder_string_to_regex
0255       , names
0256       , RegexCollection()
0257       )
0258 
0259     return (
0260         '\\b(?:'
0261       + '|'.join(
0262             try_optimize_known_alt_groups(
0263                 try_optimize_trailing_var_ref_regex(
0264                     functools.reduce(
0265                         build_regex
0266                       , data.re_tree.items()
0267                       , data.special_cases
0268                       )
0269                   )
0270               )
0271           ).replace('%', '_')
0272       + ')\\b'
0273       )
0274 
0275 
0276 def partition_iterable(fn, iterable):
0277     true, false = [], []
0278     for i in iterable:
0279         (false, true)[int(fn(i))].append(i)
0280     return true, false
0281 
0282 
0283 def _transform_command_set(cmd, list_name):
0284     args, args_re = partition_iterable(lambda x: _TEMPLATED_NAME.search(x) is None, cmd[list_name])
0285     del cmd[list_name]
0286     list_name = list_name.replace('-', '_')
0287 
0288     cmd[list_name] = {k: sorted(set(v)) for k, v in zip(_KW_RE_LIST, [args, args_re])}
0289     cmd[list_name]['re'] = try_placeholders_to_regex(args_re)
0290 
0291     return cmd
0292 
0293 
0294 def transform_command(cmd):
0295     can_be_nulary = True
0296 
0297     if 'name' not in cmd:
0298         raise RuntimeError('Command have no name')
0299 
0300     if 'named-args' in cmd:
0301         new_cmd = _transform_command_set(cmd, 'named-args')
0302         assert new_cmd == cmd
0303         can_be_nulary = False
0304 
0305     if 'special-args' in cmd:
0306         new_cmd = _transform_command_set(cmd, 'special-args')
0307         assert new_cmd == cmd
0308         can_be_nulary = False
0309 
0310     if 'property-args' in cmd:
0311         new_cmd = _transform_command_set(cmd, 'property-args')
0312         assert new_cmd == cmd
0313         can_be_nulary = False
0314 
0315     cmd['nested_parentheses'] = cmd.get('nested-parentheses?', False)
0316 
0317     if 'first-arg-is-target?' in cmd:
0318         cmd['first_arg_is_target'] = cmd['first-arg-is-target?']
0319         can_be_nulary = False
0320 
0321     if 'first-args-are-targets?' in cmd:
0322         cmd['first_args_are_targets'] = cmd['first-args-are-targets?']
0323         can_be_nulary = False
0324 
0325     if 'has-target-name-after-kw' in cmd:
0326         cmd['has_target_name_after_kw'] = cmd['has-target-name-after-kw']
0327         can_be_nulary = False
0328 
0329     if 'has-target-names-after-kw' in cmd:
0330         cmd['has_target_names_after_kw'] = cmd['has-target-names-after-kw']
0331         can_be_nulary = False
0332 
0333     if 'second-arg-is-target?' in cmd:
0334         cmd['second_arg_is_target'] = cmd['second-arg-is-target?']
0335         can_be_nulary = False
0336 
0337     if 'nulary?' in cmd and cmd['nulary?'] and not can_be_nulary:
0338         raise RuntimeError('Command `{}` w/ args declared nulary!?'.format(cmd['name']))
0339 
0340     if 'start-region' in cmd:
0341         cmd['start_region'] = cmd['start-region']
0342 
0343     if 'end-region' in cmd:
0344         cmd['end_region'] = cmd['end-region']
0345 
0346     cmd['attribute'] = 'Control Flow' if cmd['name'] in _CONTROL_FLOW_LIST else 'Command'
0347 
0348     return cmd
0349 
0350 
0351 def remove_duplicate_list_nodes(contexts, highlighting):
0352     remap = {}
0353 
0354     items_by_kws = {}
0355     # extract duplicate keyword list
0356     for items in highlighting:
0357         if items.tag != 'list':
0358             break
0359         k = '<'.join(item.text for item in items)
0360         name = items.attrib['name']
0361         rename = items_by_kws.get(k)
0362         if rename:
0363             remap[name] = rename
0364             highlighting.remove(items)
0365         else:
0366             items_by_kws[k] = name
0367 
0368     # update keyword list name referenced by each rule
0369     for context in contexts:
0370         for rule in context:
0371             if rule.tag == 'keyword':
0372                 name = rule.attrib['String']
0373                 rule.attrib['String'] = remap.get(name, name)
0374 
0375 
0376 def remove_duplicate_context_nodes(contexts):
0377     # 3 levels: ctx, ctx_op and ctx_op_nested
0378     for _ in range(3):
0379         remap = {}
0380         duplicated = {}
0381 
0382         # remove duplicate nodes
0383         for context in contexts:
0384             name = context.attrib['name']
0385             context.attrib['name'] = 'dummy'
0386             ref = duplicated.setdefault(etree.tostring(context), [])
0387             if ref:
0388                 contexts.remove(context)
0389             else:
0390                 context.attrib['name'] = name
0391                 ref.append(name)
0392             remap[name] = ref[0]
0393 
0394         # update context name referenced by each rule
0395         for context in contexts:
0396             for rule in context:
0397                 ref = remap.get(rule.attrib.get('context'))
0398                 if ref:
0399                     rule.attrib['context'] = ref
0400 
0401 
0402 def remove_duplicate_nodes(xml_string):
0403     parser = etree.XMLParser(resolve_entities=False, collect_ids=False)
0404     root = etree.fromstring(xml_string.encode(), parser=parser)
0405     highlighting = root[0]
0406 
0407     contexts = highlighting.find('contexts')
0408 
0409     remove_duplicate_list_nodes(contexts, highlighting)
0410     remove_duplicate_context_nodes(contexts)
0411 
0412     # reformat comments
0413     xml = etree.tostring(root)
0414     xml = re.sub(b'(?=[^\n ])<!--', b'\n<!--', xml)
0415     xml = re.sub(b'-->(?=[^ \n])', b'-->\n', xml)
0416 
0417     # extract DOCTYPE removed by etree.fromstring and reformat <language>
0418     doctype = xml_string[:xml_string.find('<highlighting')]
0419 
0420     # remove unformatted <language>
0421     xml = xml[xml.find(b'<highlighting'):]
0422 
0423     # last comment removed by etree.fromstring
0424     last_comment = '\n<!-- kate: replace-tabs on; indent-width 2; tab-width 2; -->'
0425 
0426     return f'{doctype}{xml.decode()}{last_comment}'
0427 
0428 
0429 #BEGIN Jinja filters
0430 
0431 def cmd_is_nulary(cmd):
0432     return cmd.setdefault('nulary?', False)
0433 
0434 #END Jinja filters
0435 
0436 
0437 @click.command()
0438 @click.argument('input_yaml', type=click.File('r'))
0439 @click.argument('template', type=click.File('r'), default='./cmake.xml.tpl')
0440 def cli(input_yaml, template):
0441     data = yaml.load(input_yaml, Loader=yaml.BaseLoader)
0442 
0443     # Partition `variables` and `environment-variables` lists into "pure" (key)words and regexes to match
0444     for var_key in _VAR_KIND_LIST:
0445         data[var_key] = {
0446             k: sorted(set(v)) for k, v in zip(
0447                 _KW_RE_LIST
0448               , [*partition_iterable(lambda x: _TEMPLATED_NAME.search(x) is None, data[var_key])]
0449               )
0450         }
0451         data[var_key]['re'] = try_placeholders_to_regex(data[var_key]['re'])
0452 
0453     # Transform properties and make all-properties list
0454     data['properties'] = {}
0455     for prop in _PROPERTY_KEYS:
0456         python_prop_list_name = prop.replace('-', '_')
0457         props, props_re = partition_iterable(lambda x: _TEMPLATED_NAME.search(x) is None, data[prop])
0458         del data[prop]
0459 
0460         data['properties'][python_prop_list_name] = {
0461             k: sorted(set(v)) for k, v in zip(_KW_RE_LIST, [props, props_re])
0462           }
0463         data['properties'][python_prop_list_name]['re'] = try_placeholders_to_regex(props_re)
0464 
0465     data['properties']['kinds'] = list(map(lambda name: name.replace('-', '_'), _PROPERTY_KEYS))
0466 
0467     # Make all commands list
0468     data['commands'] = list(
0469         map(
0470             transform_command
0471           , data['scripting-commands'] + data['project-commands'] + data['ctest-commands']
0472           )
0473       )
0474     data['standard_module_commands'] = list(
0475         map(
0476             transform_command
0477           , data['standard-module-commands']
0478           )
0479       )
0480     del data['standard-module-commands']
0481 
0482     # Fix node names to be accessible from Jinja template
0483     data['generator_expressions'] = (ex for ex in data['generator-expressions'] if isinstance(ex, str))
0484     data['complex_generator_expressions'] = [ex for ex in data['generator-expressions'] if not isinstance(ex, str)]
0485     data['deprecated_or_internal_variables'] = data['deprecated-or-internal-variables']
0486     data['environment_variables'] = data['environment-variables']
0487     del data['generator-expressions']
0488     del data['deprecated-or-internal-variables']
0489     del data['environment-variables']
0490 
0491     env = jinja2.Environment(
0492         keep_trailing_newline=True
0493       )
0494     env.block_start_string = '<!--['
0495     env.block_end_string = ']-->'
0496     env.variable_start_string = '<!--{'
0497     env.variable_end_string = '}-->'
0498     env.comment_start_string = '<!--#'
0499     env.comment_end_string = '#-->'
0500 
0501     # Register convenience filters
0502     env.tests['nulary'] = cmd_is_nulary
0503 
0504     tpl = env.from_string(template.read())
0505     result = tpl.render(data)
0506     result = remove_duplicate_nodes(result)
0507 
0508     print(result)
0509 
0510 
0511 if __name__ == '__main__':
0512     cli()
0513     # TODO Handle execptions and show errors