File indexing completed on 2024-05-12 15:49:58
0001 #!/usr/bin/env python3 0002 # -*- coding: utf-8 -*- 0003 # 0004 # Generate Kate syntax file for CMake 0005 # 0006 # SPDX-FileCopyrightText: 2017-2020 Alex Turbov <i.zaufi@gmail.com> 0007 # 0008 # To install prerequisites: 0009 # 0010 # $ pip install --user click jinja2 pyyaml 0011 # 0012 # To use: 0013 # 0014 # $ ./generate-cmake-syntax.py cmake.yaml > ../syntax/cmake.xml 0015 # 0016 import click 0017 import jinja2 0018 import re 0019 import yaml 0020 0021 from lxml import etree 0022 0023 0024 _TEMPLATED_NAME = re.compile('<[^>]+>') 0025 _PROPERTY_KEYS = [ 0026 'global-properties' 0027 , 'directory-properties' 0028 , 'target-properties' 0029 , 'source-properties' 0030 , 'test-properties' 0031 , 'cache-properties' 0032 , 'install-properties' 0033 ] 0034 _KW_RE_LIST = ['kw', 're'] 0035 _VAR_KIND_LIST = ['variables', 'deprecated-or-internal-variables', 'environment-variables'] 0036 _CONTROL_FLOW_LIST = set(( 0037 'break' 0038 , 'continue' 0039 , 'elseif' 0040 , 'else' 0041 , 'endforeach' 0042 , 'endif' 0043 , 'endwhile' 0044 , 'foreach' 0045 , 'if' 0046 , 'return' 0047 , 'while' 0048 )) 0049 0050 0051 def try_transform_placeholder_string_to_regex(name): 0052 ''' 0053 NOTE Some placeholders are not IDs, but numbers... 0054 `CMAKE_MATCH_<N>` 4 example 0055 ''' 0056 m = _TEMPLATED_NAME.split(name) 0057 if 'CMAKE_MATCH_' in m: 0058 return 'CMAKE_MATCH_[0-9]+' 0059 0060 if 'CMAKE_ARGV' in m: 0061 return 'CMAKE_ARGV[0-9]+' 0062 0063 if 'CMAKE_POLICY_DEFAULT_CMP' in m: 0064 return 'CMAKE_POLICY_DEFAULT_CMP[0-9]{4}' 0065 0066 if 'CMAKE_POLICY_WARNING_CMP' in m: 0067 return 'CMAKE_POLICY_WARNING_CMP[0-9]{4}' 0068 0069 if 'ARGV' in m: 0070 return 'ARGV[0-9]+' 0071 0072 return '&var_ref_re;'.join(m) if 1 < len(m) else name 0073 0074 0075 def try_placeholders_to_regex(names): 0076 if not names: 0077 return None 0078 l = map(try_transform_placeholder_string_to_regex, names) 0079 l = sorted(l, reverse=True) 0080 return '\\b(?:' + '|'.join(l) + ')\\b' 0081 0082 0083 def partition_iterable(fn, iterable): 0084 true, false = [], [] 0085 for i in iterable: 0086 (false, true)[int(fn(i))].append(i) 0087 return true, false 0088 0089 0090 def _transform_command_set(cmd, list_name): 0091 args, args_re = partition_iterable(lambda x: _TEMPLATED_NAME.search(x) is None, cmd[list_name]) 0092 del cmd[list_name] 0093 list_name = list_name.replace('-', '_') 0094 0095 cmd[list_name] = {k: sorted(set(v)) for k, v in zip(_KW_RE_LIST, [args, args_re])} 0096 cmd[list_name]['re'] = try_placeholders_to_regex(args_re) 0097 0098 return cmd 0099 0100 0101 def transform_command(cmd): 0102 can_be_nulary = True 0103 0104 if 'name' not in cmd: 0105 raise RuntimeError('Command have no name') 0106 0107 if 'named-args' in cmd: 0108 new_cmd = _transform_command_set(cmd, 'named-args') 0109 assert new_cmd == cmd 0110 can_be_nulary = False 0111 0112 if 'special-args' in cmd: 0113 new_cmd = _transform_command_set(cmd, 'special-args') 0114 assert new_cmd == cmd 0115 can_be_nulary = False 0116 0117 if 'property-args' in cmd: 0118 new_cmd = _transform_command_set(cmd, 'property-args') 0119 assert new_cmd == cmd 0120 can_be_nulary = False 0121 0122 cmd['nested_parentheses'] = cmd.get('nested-parentheses?', False) 0123 0124 if 'first-arg-is-target?' in cmd: 0125 cmd['first_arg_is_target'] = cmd['first-arg-is-target?'] 0126 can_be_nulary = False 0127 0128 if 'first-args-are-targets?' in cmd: 0129 cmd['first_args_are_targets'] = cmd['first-args-are-targets?'] 0130 can_be_nulary = False 0131 0132 if 'has-target-name-after-kw' in cmd: 0133 cmd['has_target_name_after_kw'] = cmd['has-target-name-after-kw'] 0134 can_be_nulary = False 0135 0136 if 'has-target-names-after-kw' in cmd: 0137 cmd['has_target_names_after_kw'] = cmd['has-target-names-after-kw'] 0138 can_be_nulary = False 0139 0140 if 'second-arg-is-target?' in cmd: 0141 cmd['second_arg_is_target'] = cmd['second-arg-is-target?'] 0142 can_be_nulary = False 0143 0144 if 'nulary?' in cmd and cmd['nulary?'] and not can_be_nulary: 0145 raise RuntimeError('Command `{}` w/ args declared nulary!?'.format(cmd['name'])) 0146 0147 if 'start-region' in cmd: 0148 cmd['start_region'] = cmd['start-region'] 0149 0150 if 'end-region' in cmd: 0151 cmd['end_region'] = cmd['end-region'] 0152 0153 cmd['attribute'] = 'Control Flow' if cmd['name'] in _CONTROL_FLOW_LIST else 'Command' 0154 0155 return cmd 0156 0157 0158 def remove_duplicate_list_nodes(contexts, highlighting): 0159 remap = {} 0160 0161 items_by_kws = {} 0162 # extract duplicate keyword list 0163 for items in highlighting: 0164 if items.tag != 'list': 0165 break 0166 k = '<'.join(item.text for item in items) 0167 name = items.attrib['name'] 0168 rename = items_by_kws.get(k) 0169 if rename: 0170 remap[name] = rename 0171 highlighting.remove(items) 0172 else: 0173 items_by_kws[k] = name 0174 0175 # update keyword list name referenced by each rule 0176 for context in contexts: 0177 for rule in context: 0178 if rule.tag == 'keyword': 0179 name = rule.attrib['String'] 0180 rule.attrib['String'] = remap.get(name, name) 0181 0182 0183 def remove_duplicate_context_nodes(contexts): 0184 # 3 levels: ctx, ctx_op and ctx_op_nested 0185 for _ in range(3): 0186 remap = {} 0187 duplicated = {} 0188 0189 # remove duplicate nodes 0190 for context in contexts: 0191 name = context.attrib['name'] 0192 context.attrib['name'] = 'dummy' 0193 ref = duplicated.setdefault(etree.tostring(context), []) 0194 if ref: 0195 contexts.remove(context) 0196 else: 0197 context.attrib['name'] = name 0198 ref.append(name) 0199 remap[name] = ref[0] 0200 0201 # update context name referenced by each rule 0202 for context in contexts: 0203 for rule in context: 0204 ref = remap.get(rule.attrib.get('context')) 0205 if ref: 0206 rule.attrib['context'] = ref 0207 0208 0209 def remove_duplicate_nodes(xml_string): 0210 parser = etree.XMLParser(resolve_entities=False, collect_ids=False) 0211 root = etree.fromstring(xml_string.encode(), parser=parser) 0212 highlighting = root[0] 0213 0214 contexts = highlighting.find('contexts') 0215 0216 remove_duplicate_list_nodes(contexts, highlighting) 0217 remove_duplicate_context_nodes(contexts) 0218 0219 # reformat comments 0220 xml = etree.tostring(root) 0221 xml = re.sub(b'(?=[^\n ])<!--', b'\n<!--', xml) 0222 xml = re.sub(b'-->(?=[^ \n])', b'-->\n', xml) 0223 0224 # extract DOCTYPE removed by etree.fromstring and reformat <language> 0225 doctype = xml_string[:xml_string.find('<highlighting')] 0226 0227 # remove unformatted <language> 0228 xml = xml[xml.find(b'<highlighting'):] 0229 0230 # last comment removed by etree.fromstring 0231 last_comment = '\n<!-- kate: replace-tabs on; indent-width 2; tab-width 2; -->' 0232 0233 return f'{doctype}{xml.decode()}{last_comment}' 0234 0235 0236 #BEGIN Jinja filters 0237 0238 def cmd_is_nulary(cmd): 0239 return cmd.setdefault('nulary?', False) 0240 0241 #END Jinja filters 0242 0243 0244 @click.command() 0245 @click.argument('input_yaml', type=click.File('r')) 0246 @click.argument('template', type=click.File('r'), default='./cmake.xml.tpl') 0247 def cli(input_yaml, template): 0248 data = yaml.load(input_yaml, Loader=yaml.BaseLoader) 0249 0250 # Partition `variables` and `environment-variables` lists into "pure" (key)words and regexes to match 0251 for var_key in _VAR_KIND_LIST: 0252 data[var_key] = { 0253 k: sorted(set(v)) for k, v in zip( 0254 _KW_RE_LIST 0255 , [*partition_iterable(lambda x: _TEMPLATED_NAME.search(x) is None, data[var_key])] 0256 ) 0257 } 0258 data[var_key]['re'] = try_placeholders_to_regex(data[var_key]['re']) 0259 0260 # Transform properties and make all-properties list 0261 data['properties'] = {} 0262 for prop in _PROPERTY_KEYS: 0263 python_prop_list_name = prop.replace('-', '_') 0264 props, props_re = partition_iterable(lambda x: _TEMPLATED_NAME.search(x) is None, data[prop]) 0265 del data[prop] 0266 0267 data['properties'][python_prop_list_name] = { 0268 k: sorted(set(v)) for k, v in zip(_KW_RE_LIST, [props, props_re]) 0269 } 0270 data['properties'][python_prop_list_name]['re'] = try_placeholders_to_regex(props_re) 0271 0272 data['properties']['kinds'] = list(map(lambda name: name.replace('-', '_'), _PROPERTY_KEYS)) 0273 0274 # Make all commands list 0275 data['commands'] = list( 0276 map( 0277 transform_command 0278 , data['scripting-commands'] + data['project-commands'] + data['ctest-commands'] 0279 ) 0280 ) 0281 data['standard_module_commands'] = list( 0282 map( 0283 transform_command 0284 , data['standard-module-commands'] 0285 ) 0286 ) 0287 del data['standard-module-commands'] 0288 0289 # Fix node names to be accessible from Jinja template 0290 data['generator_expressions'] = data['generator-expressions'] 0291 data['deprecated_or_internal_variables'] = data['deprecated-or-internal-variables'] 0292 data['environment_variables'] = data['environment-variables'] 0293 del data['generator-expressions'] 0294 del data['deprecated-or-internal-variables'] 0295 del data['environment-variables'] 0296 0297 env = jinja2.Environment( 0298 keep_trailing_newline=True 0299 ) 0300 env.block_start_string = '<!--[' 0301 env.block_end_string = ']-->' 0302 env.variable_start_string = '<!--{' 0303 env.variable_end_string = '}-->' 0304 env.comment_start_string = '<!--#' 0305 env.comment_end_string = '#-->' 0306 0307 # Register convenience filters 0308 env.tests['nulary'] = cmd_is_nulary 0309 0310 tpl = env.from_string(template.read()) 0311 result = tpl.render(data) 0312 result = remove_duplicate_nodes(result) 0313 0314 print(result) 0315 0316 0317 if __name__ == '__main__': 0318 cli() 0319 # TODO Handle execptions and show errors