File indexing completed on 2024-05-19 04:03:42
0001 #!/usr/bin/env python3 0002 # -*- coding: utf-8 -*- 0003 # 0004 # Generate Kate syntax file for CMake 0005 # 0006 # SPDX-FileCopyrightText: 2017-2023 Alex Turbov <i.zaufi@gmail.com> 0007 # 0008 # To install prerequisites: 0009 # 0010 # $ pip install --user click jinja2 lxml pyyaml 0011 # 0012 # To use: 0013 # 0014 # $ ./generate-cmake-syntax.py cmake.yaml > ../syntax/cmake.xml 0015 # 0016 0017 from __future__ import annotations 0018 0019 import functools 0020 import re 0021 from dataclasses import dataclass, field 0022 0023 import click 0024 import jinja2 0025 import yaml 0026 import sys 0027 from lxml import etree 0028 0029 0030 _TEMPLATED_NAME = re.compile(r'(?:<[^>]+>)') 0031 _PROPERTY_KEYS = [ 0032 'global-properties' 0033 , 'directory-properties' 0034 , 'target-properties' 0035 , 'source-properties' 0036 , 'test-properties' 0037 , 'cache-properties' 0038 , 'install-properties' 0039 ] 0040 _KW_RE_LIST = ['kw', 're'] 0041 _VAR_KIND_LIST = ['variables', 'deprecated-or-internal-variables', 'environment-variables'] 0042 _CONTROL_FLOW_LIST = { 0043 'break' 0044 , 'continue' 0045 , 'elseif' 0046 , 'else' 0047 , 'endforeach' 0048 , 'endif' 0049 , 'endwhile' 0050 , 'foreach' 0051 , 'if' 0052 , 'return' 0053 , 'while' 0054 } 0055 _VAR_REF_ENTITY = '&var_ref_re;' 0056 0057 _HEURISTICS = [ 0058 ( 0059 {'MAX(_(COUNT|MAJOR|MINOR|PATCH|TWEAK))?', 'MIN(_(COUNT|MAJOR|MINOR|PATCH|TWEAK))?'} 0060 , 'M(AX|IN)(_(COUNT|MAJOR|MINOR|PATCH|TWEAK))?' 0061 ) 0062 , ({'OUTPUTS', 'OUTPUT_(HEADER|SOURCE)'}, 'OUTPUT(S|_(HEADER|SOURCE))') 0063 , ({'PREFIX', 'SUFFIX'}, '(PRE|SUF)FIX') 0064 , ({'CPPCHECK', 'CPPLINT'}, 'CPP(CHECK|LINT)') 0065 , ({'DEPENDS', 'PREDEPENDS'}, '(PRE)?DEPENDS') 0066 , ({'ICON', 'ICONURL'}, 'ICON(URL)?') 0067 , ( 0068 { 0069 '&var%ref%re;(_INIT)?' 0070 , 'DEBUG(_INIT)?' 0071 , 'MINSIZEREL(_INIT)?' 0072 , 'RELEASE(_INIT)?' 0073 , 'RELWITHDEBINFO(_INIT)?' 0074 } 0075 , '(DEBUG|MINSIZEREL|REL(EASE|WITHDEBINFO)|&var%ref%re;)(_INIT)?' 0076 ) 0077 , ({'RELEASE', 'RELWITHDEBINFO'}, 'REL(EASE|WITHDEBINFO)') 0078 , ({'POST', 'POSTUN', 'PRE', 'PREUN'}, 'P(RE|OST)(UN)?') 0079 , ({'AUTOPROV', 'AUTOREQ', 'AUTOREQPROV'}, 'AUTO(PROV|REQ(PROV)?)') 0080 , ({'DEFINITIONS', 'OPTIONS'}, '(DEFINI|OP)TIONS') 0081 , ({'LIB_NAMES', 'LIBRARY'}, 'LIB(_NAMES|RARY)') 0082 , ({'EXTENSIONS', 'EXTRA_FLAGS'}, 'EXT(ENSIONS|RA_FLAGS)') 0083 , ({'DISABLED', 'DISPLAY_NAME'}, 'DIS(ABLED|PLAY_NAME)') 0084 , ({'LIBRARIES', 'LINK_LIBRARIES', 'STATIC_LINK_LIBRARIES'}, '((STATIC_)?LINK_)?LIBRARIES') 0085 , ({'INCLUDE_DIRS', 'LIBRARY_DIRS'}, '(INCLUDE|LIBRARY)_DIRS') 0086 , ({'BINARY_DIR', 'SOURCE_DIR'}, '(BINARY|SOURCE)_DIR') 0087 , ({'CFLAGS(_OTHER)?', 'LDFLAGS(_OTHER)?'}, '(C|LD)FLAGS(_OTHER)?') 0088 , ({'INCLUDE_DIRECTORIES', 'LIBRARIES'}, '(INCLUDE_DIRECTO|LIBRA)RIES') 0089 , ({'POSTFLIGHT_&var%ref%re;_SCRIPT', 'PREFLIGHT_&var%ref%re;_SCRIPT'}, 'P(RE|OST)FLIGHT_&var%ref%re;_SCRIPT') 0090 , ({'DIRECTORIES', 'FRAMEWORK_DIRECTORIES'}, '(FRAMEWORK_)?DIRECTORIES') 0091 , ({'FILE_FLAG', 'FILE'}, 'FILE(_FLAG)?') 0092 , ({'DIR_PERMISSIONS', 'FILE_PERMISSIONS'}, '(DIR|FILE)_PERMISSIONS') 0093 , ({'COMPILER_LAUNCHER', 'LINKER_LAUNCHER'}, '(COMPIL|LINK)ER_LAUNCHER') 0094 , ({'COMPILER', 'COMPILE_(DEFINI|OP)TIONS'}, 'COMPILE(R|_(DEFINI|OP)TIONS)') 0095 , ({'LICENSEURL', 'LICENSE_(EXPRESSION|FILE_NAME)'}, 'LICENSE(URL|_(EXPRESSION|FILE_NAME))') 0096 , ({'NO_SONAME', 'SONAME'}, '(NO_)?SONAME') 0097 , ({'CODE_SIGN_ON_COPY', 'REMOVE_HEADERS_ON_COPY'}, '(CODE_SIGN|REMOVE_HEADERS)_ON_COPY') 0098 , ({'(REFERENCE|REFERENCEPROP_&var%ref%re;_TAG)_&var%ref%re;'}, 'REFERENCE(PROP_&var%ref%re;_TAG)?_&var%ref%re;') 0099 , ({'DISABLE_FIND_PACKAGE', 'REQUIRE_FIND_PACKAGE'}, '(DISABLE|REQUIRE)_FIND_PACKAGE') 0100 , ( 0101 {'GROUP_USING_&var%ref%re;(_SUPPORTED)?', 'LIBRARY_USING_&var%ref%re;(_SUPPORTED)?'} 0102 , '(GROUP|LIBRARY)_USING_&var%ref%re;(_SUPPORTED)?' 0103 ) 0104 , ( 0105 { 0106 'EXE_LINKER_FLAGS_&var%ref%re;(_INIT)?' 0107 , 'MODULE_LINKER_FLAGS_&var%ref%re;(_INIT)?' 0108 , 'SHARED_LINKER_FLAGS_&var%ref%re;(_INIT)?' 0109 , 'STATIC_LINKER_FLAGS_&var%ref%re;(_INIT)?' 0110 } 0111 , '(EXE|MODULE|SHARED|STATIC)_LINKER_FLAGS_&var%ref%re;(_INIT)?' 0112 ) 0113 , ( 0114 { 0115 'ARCHIVE_OUTPUT_DIRECTORY' 0116 , 'COMPILE_PDB_OUTPUT_DIRECTORY' 0117 , 'LIBRARY_OUTPUT_DIRECTORY' 0118 , 'PDB_OUTPUT_DIRECTORY' 0119 , 'RUNTIME_OUTPUT_DIRECTORY' 0120 } 0121 , '(ARCHIVE|(COMPILE_)?PDB|LIBRARY|RUNTIME)_OUTPUT_DIRECTORY' 0122 ) 0123 , ( 0124 { 0125 'ARCHIVE_OUTPUT_(DIRECTORY|NAME)' 0126 , 'LIBRARY_OUTPUT_(DIRECTORY|NAME)' 0127 , 'RUNTIME_OUTPUT_(DIRECTORY|NAME)' 0128 } 0129 , '(ARCHIVE|LIBRARY|RUNTIME)_OUTPUT_(DIRECTORY|NAME)' 0130 ) 0131 , ({'ASM&var_ref_re;', 'ASM&var_ref_re;FLAGS'}, 'ASM&var_ref_re;(FLAGS)?') 0132 , ( 0133 { 0134 'CMAKE_POLICY_DEFAULT_CMP[0-9]{4}' 0135 , 'CMAKE_POLICY_WARNING_CMP[0-9]{4}' 0136 } 0137 , 'CMAKE_POLICY_(DEFAULT|WARNING)_CMP[0-9]{4}' 0138 ) 0139 , ({'CMAKE_ARGV[0-9]+', 'CMAKE_MATCH_[0-9]+'}, 'CMAKE_(ARGV|MATCH_)[0-9]+') 0140 ] 0141 0142 @dataclass 0143 class RePartNode: 0144 children: dict[str, RePartNode] = field(default_factory=dict, hash=False) 0145 is_leaf: bool = False 0146 0147 0148 @dataclass 0149 class RegexCollection: 0150 special_cases: list[str] = field(default_factory=list, hash=False) 0151 re_tree: dict[str, RePartNode] = field(default_factory=dict, hash=False) 0152 0153 def add_case(self, regex: str) -> RegexCollection: 0154 self.special_cases.append(regex) 0155 return self 0156 0157 def update_tree(self, name_parts: list[str]) -> RegexCollection: 0158 safe_var_ref = _VAR_REF_ENTITY.replace('_', '%') 0159 current = functools.reduce( 0160 lambda current, part: ( 0161 self.re_tree if current is None else current.children 0162 ).setdefault(part, RePartNode()) 0163 , safe_var_ref.join(name_parts).replace(f'{safe_var_ref}_{safe_var_ref}', safe_var_ref).split('_') 0164 , None 0165 ) 0166 current.is_leaf = True 0167 return self 0168 0169 0170 def try_transform_placeholder_string_to_regex(state: RegexCollection, name: str): 0171 ''' 0172 NOTE Some placeholders are not IDs, but numbers... 0173 `CMAKE_MATCH_<N>` 4 example 0174 ''' 0175 name_parts = _TEMPLATED_NAME.split(name) 0176 match name_parts: 0177 case ['CMAKE_MATCH_' as head, ''] | ['CMAKE_ARGV' as head, ''] | ['ARGV' as head, '']: 0178 return state.add_case(head + '[0-9]+') 0179 0180 case ['CMAKE_POLICY_DEFAULT_CMP' as head, ''] | ['CMAKE_POLICY_WARNING_CMP' as head, '']: 0181 return state.add_case(head + '[0-9]{4}') 0182 0183 case ['', '__TRYRUN_OUTPUT']: 0184 return state.add_case(f'{_VAR_REF_ENTITY}__TRYRUN_OUTPUT') 0185 0186 case (['ASM', ''] | ['ASM', 'FLAGS']) as asm_env: 0187 return state.add_case(f'{asm_env[0]}{_VAR_REF_ENTITY}{asm_env[1]}') 0188 0189 return state.update_tree(name_parts) 0190 0191 0192 def is_first_subset_of_second(first, second): 0193 subset = set(first) 0194 fullset = set(second) 0195 return subset.issubset(fullset) 0196 0197 0198 def try_optimize_known_alt_groups(groups: list[str]) -> list[str]: 0199 for case in _HEURISTICS: 0200 if is_first_subset_of_second(case[0], groups): 0201 groups = sorted([*filter(lambda item: item not in case[0], groups), case[1]]) 0202 return groups 0203 0204 0205 def try_optimize_trailing_var_ref_regex(groups: list[str]) -> list[str]: 0206 tail_var_ref_re = '_' + _VAR_REF_ENTITY.replace('_', '%') 0207 candidates = [*filter(lambda s: s.endswith(tail_var_ref_re), groups)] 0208 return sorted([ 0209 *filter(lambda item: item not in candidates, groups) 0210 , f'({"|".join(try_optimize_known_alt_groups([s[:-len(tail_var_ref_re)] for s in candidates]))}){tail_var_ref_re}' 0211 ]) if len(candidates) > 1 else groups 0212 0213 0214 def build_regex(state: list[str], kv: tuple[str, RePartNode]) -> list[str]: 0215 name, value = kv 0216 match (value, len(value.children)): 0217 case (RePartNode(children={}, is_leaf=True), 0): 0218 return [*state, name] 0219 0220 case (node, sz) if sz > 0: 0221 alt_group = try_optimize_known_alt_groups( 0222 try_optimize_trailing_var_ref_regex( 0223 functools.reduce(build_regex, node.children.items(), []) 0224 ) 0225 ) 0226 0227 match (len(alt_group), node.is_leaf): 0228 case (1, False): 0229 return [*state, f'{name}_{alt_group[0]}'] 0230 0231 case (1, True): 0232 return [*state, f'{name}(_{alt_group[0]})?'] 0233 0234 case (sz, False) if sz > 0: 0235 return [*state, f'{name}_({"|".join(alt_group)})'] 0236 0237 case (sz, True) if sz > 0: 0238 return [*state, f'{name}(_({"|".join(alt_group)}))?'] 0239 0240 case _: 0241 raise AssertionError('Zero children?') 0242 0243 case _: 0244 raise AssertionError(f'NOT MATCHED: {name=}→{value=}') 0245 0246 return state 0247 0248 0249 def try_placeholders_to_regex(names): 0250 if not names: 0251 return None 0252 0253 data = functools.reduce( 0254 try_transform_placeholder_string_to_regex 0255 , names 0256 , RegexCollection() 0257 ) 0258 0259 return ( 0260 '\\b(?:' 0261 + '|'.join( 0262 try_optimize_known_alt_groups( 0263 try_optimize_trailing_var_ref_regex( 0264 functools.reduce( 0265 build_regex 0266 , data.re_tree.items() 0267 , data.special_cases 0268 ) 0269 ) 0270 ) 0271 ).replace('%', '_') 0272 + ')\\b' 0273 ) 0274 0275 0276 def partition_iterable(fn, iterable): 0277 true, false = [], [] 0278 for i in iterable: 0279 (false, true)[int(fn(i))].append(i) 0280 return true, false 0281 0282 0283 def _transform_command_set(cmd, list_name): 0284 args, args_re = partition_iterable(lambda x: _TEMPLATED_NAME.search(x) is None, cmd[list_name]) 0285 del cmd[list_name] 0286 list_name = list_name.replace('-', '_') 0287 0288 cmd[list_name] = {k: sorted(set(v)) for k, v in zip(_KW_RE_LIST, [args, args_re])} 0289 cmd[list_name]['re'] = try_placeholders_to_regex(args_re) 0290 0291 return cmd 0292 0293 0294 def transform_command(cmd): 0295 can_be_nulary = True 0296 0297 if 'name' not in cmd: 0298 raise RuntimeError('Command have no name') 0299 0300 if 'named-args' in cmd: 0301 new_cmd = _transform_command_set(cmd, 'named-args') 0302 assert new_cmd == cmd 0303 can_be_nulary = False 0304 0305 if 'special-args' in cmd: 0306 new_cmd = _transform_command_set(cmd, 'special-args') 0307 assert new_cmd == cmd 0308 can_be_nulary = False 0309 0310 if 'property-args' in cmd: 0311 new_cmd = _transform_command_set(cmd, 'property-args') 0312 assert new_cmd == cmd 0313 can_be_nulary = False 0314 0315 cmd['nested_parentheses'] = cmd.get('nested-parentheses?', False) 0316 0317 if 'first-arg-is-target?' in cmd: 0318 cmd['first_arg_is_target'] = cmd['first-arg-is-target?'] 0319 can_be_nulary = False 0320 0321 if 'first-args-are-targets?' in cmd: 0322 cmd['first_args_are_targets'] = cmd['first-args-are-targets?'] 0323 can_be_nulary = False 0324 0325 if 'has-target-name-after-kw' in cmd: 0326 cmd['has_target_name_after_kw'] = cmd['has-target-name-after-kw'] 0327 can_be_nulary = False 0328 0329 if 'has-target-names-after-kw' in cmd: 0330 cmd['has_target_names_after_kw'] = cmd['has-target-names-after-kw'] 0331 can_be_nulary = False 0332 0333 if 'second-arg-is-target?' in cmd: 0334 cmd['second_arg_is_target'] = cmd['second-arg-is-target?'] 0335 can_be_nulary = False 0336 0337 if 'nulary?' in cmd and cmd['nulary?'] and not can_be_nulary: 0338 raise RuntimeError('Command `{}` w/ args declared nulary!?'.format(cmd['name'])) 0339 0340 if 'start-region' in cmd: 0341 cmd['start_region'] = cmd['start-region'] 0342 0343 if 'end-region' in cmd: 0344 cmd['end_region'] = cmd['end-region'] 0345 0346 cmd['attribute'] = 'Control Flow' if cmd['name'] in _CONTROL_FLOW_LIST else 'Command' 0347 0348 return cmd 0349 0350 0351 def remove_duplicate_list_nodes(contexts, highlighting): 0352 remap = {} 0353 0354 items_by_kws = {} 0355 # extract duplicate keyword list 0356 for items in highlighting: 0357 if items.tag != 'list': 0358 break 0359 k = '<'.join(item.text for item in items) 0360 name = items.attrib['name'] 0361 rename = items_by_kws.get(k) 0362 if rename: 0363 remap[name] = rename 0364 highlighting.remove(items) 0365 else: 0366 items_by_kws[k] = name 0367 0368 # update keyword list name referenced by each rule 0369 for context in contexts: 0370 for rule in context: 0371 if rule.tag == 'keyword': 0372 name = rule.attrib['String'] 0373 rule.attrib['String'] = remap.get(name, name) 0374 0375 0376 def remove_duplicate_context_nodes(contexts): 0377 # 3 levels: ctx, ctx_op and ctx_op_nested 0378 for _ in range(3): 0379 remap = {} 0380 duplicated = {} 0381 0382 # remove duplicate nodes 0383 for context in contexts: 0384 name = context.attrib['name'] 0385 context.attrib['name'] = 'dummy' 0386 ref = duplicated.setdefault(etree.tostring(context), []) 0387 if ref: 0388 contexts.remove(context) 0389 else: 0390 context.attrib['name'] = name 0391 ref.append(name) 0392 remap[name] = ref[0] 0393 0394 # update context name referenced by each rule 0395 for context in contexts: 0396 for rule in context: 0397 ref = remap.get(rule.attrib.get('context')) 0398 if ref: 0399 rule.attrib['context'] = ref 0400 0401 0402 def remove_duplicate_nodes(xml_string): 0403 parser = etree.XMLParser(resolve_entities=False, collect_ids=False) 0404 root = etree.fromstring(xml_string.encode(), parser=parser) 0405 highlighting = root[0] 0406 0407 contexts = highlighting.find('contexts') 0408 0409 remove_duplicate_list_nodes(contexts, highlighting) 0410 remove_duplicate_context_nodes(contexts) 0411 0412 # reformat comments 0413 xml = etree.tostring(root) 0414 xml = re.sub(b'(?=[^\n ])<!--', b'\n<!--', xml) 0415 xml = re.sub(b'-->(?=[^ \n])', b'-->\n', xml) 0416 0417 # extract DOCTYPE removed by etree.fromstring and reformat <language> 0418 doctype = xml_string[:xml_string.find('<highlighting')] 0419 0420 # remove unformatted <language> 0421 xml = xml[xml.find(b'<highlighting'):] 0422 0423 # last comment removed by etree.fromstring 0424 last_comment = '\n<!-- kate: replace-tabs on; indent-width 2; tab-width 2; -->' 0425 0426 return f'{doctype}{xml.decode()}{last_comment}' 0427 0428 0429 #BEGIN Jinja filters 0430 0431 def cmd_is_nulary(cmd): 0432 return cmd.setdefault('nulary?', False) 0433 0434 #END Jinja filters 0435 0436 0437 @click.command() 0438 @click.argument('input_yaml', type=click.File('r')) 0439 @click.argument('template', type=click.File('r'), default='./cmake.xml.tpl') 0440 def cli(input_yaml, template): 0441 data = yaml.load(input_yaml, Loader=yaml.BaseLoader) 0442 0443 # Partition `variables` and `environment-variables` lists into "pure" (key)words and regexes to match 0444 for var_key in _VAR_KIND_LIST: 0445 data[var_key] = { 0446 k: sorted(set(v)) for k, v in zip( 0447 _KW_RE_LIST 0448 , [*partition_iterable(lambda x: _TEMPLATED_NAME.search(x) is None, data[var_key])] 0449 ) 0450 } 0451 data[var_key]['re'] = try_placeholders_to_regex(data[var_key]['re']) 0452 0453 # Transform properties and make all-properties list 0454 data['properties'] = {} 0455 for prop in _PROPERTY_KEYS: 0456 python_prop_list_name = prop.replace('-', '_') 0457 props, props_re = partition_iterable(lambda x: _TEMPLATED_NAME.search(x) is None, data[prop]) 0458 del data[prop] 0459 0460 data['properties'][python_prop_list_name] = { 0461 k: sorted(set(v)) for k, v in zip(_KW_RE_LIST, [props, props_re]) 0462 } 0463 data['properties'][python_prop_list_name]['re'] = try_placeholders_to_regex(props_re) 0464 0465 data['properties']['kinds'] = list(map(lambda name: name.replace('-', '_'), _PROPERTY_KEYS)) 0466 0467 # Make all commands list 0468 data['commands'] = list( 0469 map( 0470 transform_command 0471 , data['scripting-commands'] + data['project-commands'] + data['ctest-commands'] 0472 ) 0473 ) 0474 data['standard_module_commands'] = list( 0475 map( 0476 transform_command 0477 , data['standard-module-commands'] 0478 ) 0479 ) 0480 del data['standard-module-commands'] 0481 0482 # Fix node names to be accessible from Jinja template 0483 data['generator_expressions'] = (ex for ex in data['generator-expressions'] if isinstance(ex, str)) 0484 data['complex_generator_expressions'] = [ex for ex in data['generator-expressions'] if not isinstance(ex, str)] 0485 data['deprecated_or_internal_variables'] = data['deprecated-or-internal-variables'] 0486 data['environment_variables'] = data['environment-variables'] 0487 del data['generator-expressions'] 0488 del data['deprecated-or-internal-variables'] 0489 del data['environment-variables'] 0490 0491 env = jinja2.Environment( 0492 keep_trailing_newline=True 0493 ) 0494 env.block_start_string = '<!--[' 0495 env.block_end_string = ']-->' 0496 env.variable_start_string = '<!--{' 0497 env.variable_end_string = '}-->' 0498 env.comment_start_string = '<!--#' 0499 env.comment_end_string = '#-->' 0500 0501 # Register convenience filters 0502 env.tests['nulary'] = cmd_is_nulary 0503 0504 tpl = env.from_string(template.read()) 0505 result = tpl.render(data) 0506 result = remove_duplicate_nodes(result) 0507 0508 print(result) 0509 0510 0511 if __name__ == '__main__': 0512 cli() 0513 # TODO Handle execptions and show errors