data/generators/generate-cmake-syntax.py

0001 #!/usr/bin/env python3
0002 # -*- coding: utf-8 -*-
0003 #
0004 # Generate Kate syntax file for CMake
0005 #
0006 # SPDX-FileCopyrightText: 2017-2020 Alex Turbov <i.zaufi@gmail.com>
0007 #
0008 # To install prerequisites:
0009 #
0010 #   $ pip install --user click jinja2 pyyaml
0011 #
0012 # To use:
0013 #
0014 #   $ ./generate-cmake-syntax.py cmake.yaml > ../syntax/cmake.xml
0015 #
0016 import click
0017 import jinja2
0018 import re
0019 import yaml
0020
0021 from lxml import etree
0022
0023
0024 _TEMPLATED_NAME = re.compile('<[^>]+>')
0025 _PROPERTY_KEYS = [
0026     'global-properties'
0027   , 'directory-properties'
0028   , 'target-properties'
0029   , 'source-properties'
0030   , 'test-properties'
0031   , 'cache-properties'
0032   , 'install-properties'
0033   ]
0034 _KW_RE_LIST = ['kw', 're']
0035 _VAR_KIND_LIST = ['variables', 'deprecated-or-internal-variables', 'environment-variables']
0036 _CONTROL_FLOW_LIST = set((
0037     'break'
0038   , 'continue'
0039   , 'elseif'
0040   , 'else'
0041   , 'endforeach'
0042   , 'endif'
0043   , 'endwhile'
0044   , 'foreach'
0045   , 'if'
0046   , 'return'
0047   , 'while'
0048 ))
0049
0050
0051 def try_transform_placeholder_string_to_regex(name):
0052     '''
0053         NOTE Some placeholders are not IDs, but numbers...
0054             `CMAKE_MATCH_<N>` 4 example
0055     '''
0056     m = _TEMPLATED_NAME.split(name)
0057     if 'CMAKE_MATCH_' in m:
0058         return 'CMAKE_MATCH_[0-9]+'
0059
0060     if 'CMAKE_ARGV' in m:
0061         return 'CMAKE_ARGV[0-9]+'
0062
0063     if 'CMAKE_POLICY_DEFAULT_CMP' in m:
0064         return 'CMAKE_POLICY_DEFAULT_CMP[0-9]{4}'
0065
0066     if 'CMAKE_POLICY_WARNING_CMP' in m:
0067         return 'CMAKE_POLICY_WARNING_CMP[0-9]{4}'
0068
0069     if 'ARGV' in m:
0070         return 'ARGV[0-9]+'
0071
0072     return '&var_ref_re;'.join(m) if 1 < len(m) else name
0073
0074
0075 def try_placeholders_to_regex(names):
0076     if not names:
0077         return None
0078     l = map(try_transform_placeholder_string_to_regex, names)
0079     l = sorted(l, reverse=True)
0080     return '\\b(?:' + '|'.join(l) + ')\\b'
0081
0082
0083 def partition_iterable(fn, iterable):
0084     true, false = [], []
0085     for i in iterable:
0086         (false, true)[int(fn(i))].append(i)
0087     return true, false
0088
0089
0090 def _transform_command_set(cmd, list_name):
0091     args, args_re = partition_iterable(lambda x: _TEMPLATED_NAME.search(x) is None, cmd[list_name])
0092     del cmd[list_name]
0093     list_name = list_name.replace('-', '_')
0094
0095     cmd[list_name] = {k: sorted(set(v)) for k, v in zip(_KW_RE_LIST, [args, args_re])}
0096     cmd[list_name]['re'] = try_placeholders_to_regex(args_re)
0097
0098     return cmd
0099
0100
0101 def transform_command(cmd):
0102     can_be_nulary = True
0103
0104     if 'name' not in cmd:
0105         raise RuntimeError('Command have no name')
0106
0107     if 'named-args' in cmd:
0108         new_cmd = _transform_command_set(cmd, 'named-args')
0109         assert new_cmd == cmd
0110         can_be_nulary = False
0111
0112     if 'special-args' in cmd:
0113         new_cmd = _transform_command_set(cmd, 'special-args')
0114         assert new_cmd == cmd
0115         can_be_nulary = False
0116
0117     if 'property-args' in cmd:
0118         new_cmd = _transform_command_set(cmd, 'property-args')
0119         assert new_cmd == cmd
0120         can_be_nulary = False
0121
0122     cmd['nested_parentheses'] = cmd.get('nested-parentheses?', False)
0123
0124     if 'first-arg-is-target?' in cmd:
0125         cmd['first_arg_is_target'] = cmd['first-arg-is-target?']
0126         can_be_nulary = False
0127
0128     if 'first-args-are-targets?' in cmd:
0129         cmd['first_args_are_targets'] = cmd['first-args-are-targets?']
0130         can_be_nulary = False
0131
0132     if 'has-target-name-after-kw' in cmd:
0133         cmd['has_target_name_after_kw'] = cmd['has-target-name-after-kw']
0134         can_be_nulary = False
0135
0136     if 'has-target-names-after-kw' in cmd:
0137         cmd['has_target_names_after_kw'] = cmd['has-target-names-after-kw']
0138         can_be_nulary = False
0139
0140     if 'second-arg-is-target?' in cmd:
0141         cmd['second_arg_is_target'] = cmd['second-arg-is-target?']
0142         can_be_nulary = False
0143
0144     if 'nulary?' in cmd and cmd['nulary?'] and not can_be_nulary:
0145         raise RuntimeError('Command `{}` w/ args declared nulary!?'.format(cmd['name']))
0146
0147     if 'start-region' in cmd:
0148         cmd['start_region'] = cmd['start-region']
0149
0150     if 'end-region' in cmd:
0151         cmd['end_region'] = cmd['end-region']
0152
0153     cmd['attribute'] = 'Control Flow' if cmd['name'] in _CONTROL_FLOW_LIST else 'Command'
0154
0155     return cmd
0156
0157
0158 def remove_duplicate_list_nodes(contexts, highlighting):
0159     remap = {}
0160
0161     items_by_kws = {}
0162     # extract duplicate keyword list
0163     for items in highlighting:
0164         if items.tag != 'list':
0165             break
0166         k = '<'.join(item.text for item in items)
0167         name = items.attrib['name']
0168         rename = items_by_kws.get(k)
0169         if rename:
0170             remap[name] = rename
0171             highlighting.remove(items)
0172         else:
0173             items_by_kws[k] = name
0174
0175     # update keyword list name referenced by each rule
0176     for context in contexts:
0177         for rule in context:
0178             if rule.tag == 'keyword':
0179                 name = rule.attrib['String']
0180                 rule.attrib['String'] = remap.get(name, name)
0181
0182
0183 def remove_duplicate_context_nodes(contexts):
0184     # 3 levels: ctx, ctx_op and ctx_op_nested
0185     for _ in range(3):
0186         remap = {}
0187         duplicated = {}
0188
0189         # remove duplicate nodes
0190         for context in contexts:
0191             name = context.attrib['name']
0192             context.attrib['name'] = 'dummy'
0193             ref = duplicated.setdefault(etree.tostring(context), [])
0194             if ref:
0195                 contexts.remove(context)
0196             else:
0197                 context.attrib['name'] = name
0198                 ref.append(name)
0199             remap[name] = ref[0]
0200
0201         # update context name referenced by each rule
0202         for context in contexts:
0203             for rule in context:
0204                 ref = remap.get(rule.attrib.get('context'))
0205                 if ref:
0206                     rule.attrib['context'] = ref
0207
0208
0209 def remove_duplicate_nodes(xml_string):
0210     parser = etree.XMLParser(resolve_entities=False, collect_ids=False)
0211     root = etree.fromstring(xml_string.encode(), parser=parser)
0212     highlighting = root[0]
0213
0214     contexts = highlighting.find('contexts')
0215
0216     remove_duplicate_list_nodes(contexts, highlighting)
0217     remove_duplicate_context_nodes(contexts)
0218
0219     # reformat comments
0220     xml = etree.tostring(root)
0221     xml = re.sub(b'(?=[^\n ])<!--', b'\n<!--', xml)
0222     xml = re.sub(b'-->(?=[^ \n])', b'-->\n', xml)
0223
0224     # extract DOCTYPE removed by etree.fromstring and reformat <language>
0225     doctype = xml_string[:xml_string.find('<highlighting')]
0226
0227     # remove unformatted <language>
0228     xml = xml[xml.find(b'<highlighting'):]
0229
0230     # last comment removed by etree.fromstring
0231     last_comment = '\n<!-- kate: replace-tabs on; indent-width 2; tab-width 2; -->'
0232
0233     return f'{doctype}{xml.decode()}{last_comment}'
0234
0235
0236 #BEGIN Jinja filters
0237
0238 def cmd_is_nulary(cmd):
0239     return cmd.setdefault('nulary?', False)
0240
0241 #END Jinja filters
0242
0243
0244 @click.command()
0245 @click.argument('input_yaml', type=click.File('r'))
0246 @click.argument('template', type=click.File('r'), default='./cmake.xml.tpl')
0247 def cli(input_yaml, template):
0248     data = yaml.load(input_yaml, Loader=yaml.BaseLoader)
0249
0250     # Partition `variables` and `environment-variables` lists into "pure" (key)words and regexes to match
0251     for var_key in _VAR_KIND_LIST:
0252         data[var_key] = {
0253             k: sorted(set(v)) for k, v in zip(
0254                 _KW_RE_LIST
0255               , [*partition_iterable(lambda x: _TEMPLATED_NAME.search(x) is None, data[var_key])]
0256               )
0257         }
0258         data[var_key]['re'] = try_placeholders_to_regex(data[var_key]['re'])
0259
0260     # Transform properties and make all-properties list
0261     data['properties'] = {}
0262     for prop in _PROPERTY_KEYS:
0263         python_prop_list_name = prop.replace('-', '_')
0264         props, props_re = partition_iterable(lambda x: _TEMPLATED_NAME.search(x) is None, data[prop])
0265         del data[prop]
0266
0267         data['properties'][python_prop_list_name] = {
0268             k: sorted(set(v)) for k, v in zip(_KW_RE_LIST, [props, props_re])
0269           }
0270         data['properties'][python_prop_list_name]['re'] = try_placeholders_to_regex(props_re)
0271
0272     data['properties']['kinds'] = list(map(lambda name: name.replace('-', '_'), _PROPERTY_KEYS))
0273
0274     # Make all commands list
0275     data['commands'] = list(
0276         map(
0277             transform_command
0278           , data['scripting-commands'] + data['project-commands'] + data['ctest-commands']
0279           )
0280       )
0281     data['standard_module_commands'] = list(
0282         map(
0283             transform_command
0284           , data['standard-module-commands']
0285           )
0286       )
0287     del data['standard-module-commands']
0288
0289     # Fix node names to be accessible from Jinja template
0290     data['generator_expressions'] = data['generator-expressions']
0291     data['deprecated_or_internal_variables'] = data['deprecated-or-internal-variables']
0292     data['environment_variables'] = data['environment-variables']
0293     del data['generator-expressions']
0294     del data['deprecated-or-internal-variables']
0295     del data['environment-variables']
0296
0297     env = jinja2.Environment(
0298         keep_trailing_newline=True
0299       )
0300     env.block_start_string = '<!--['
0301     env.block_end_string = ']-->'
0302     env.variable_start_string = '<!--{'
0303     env.variable_end_string = '}-->'
0304     env.comment_start_string = '<!--#'
0305     env.comment_end_string = '#-->'
0306
0307     # Register convenience filters
0308     env.tests['nulary'] = cmd_is_nulary
0309
0310     tpl = env.from_string(template.read())
0311     result = tpl.render(data)
0312     result = remove_duplicate_nodes(result)
0313
0314     print(result)
0315
0316
0317 if __name__ == '__main__':
0318     cli()
0319     # TODO Handle execptions and show errors