File indexing completed on 2024-05-19 04:03:43

0001 #!/usr/bin/env python3
0002 # SPDX-FileCopyrightText: 2023 Jonathan Poelen <jonathan.poelen@gmail.com>
0003 # SPDX-License-Identifier: MIT
0004 
0005 from pathlib import Path
0006 from collections import defaultdict
0007 from typing import TextIO
0008 import re
0009 import sys
0010 
0011 
0012 exclude_line = {
0013   '  - non-standard\n',
0014   '  - experimental\n',
0015   '  - deprecated\n',
0016   'page-type: css-combinator\n',
0017   'page-type: css-selector\n',
0018   'page-type: css-module\n',
0019   'page-type: landing-page\n',
0020   'page-type: guide\n',
0021 }
0022 
0023 page_type_accepted = {
0024   'page-type: css-type\n',
0025   'page-type: css-function\n',
0026   'page-type: css-property\n',
0027   'page-type: css-keyword\n',
0028   'page-type: css-shorthand-property\n',
0029   'page-type: css-pseudo-element\n',
0030   'page-type: css-pseudo-class\n',
0031   'page-type: css-at-rule-descriptor\n',
0032   'page-type: css-at-rule\n',
0033   'page-type: css-media-feature\n',
0034   'page-type: svg-attribute\n',
0035 }
0036 
0037 exclude_title = {
0038   '<alpha-value>',
0039   '<angle>',
0040   '<angle-percentage>',
0041   '<basic-shape>',
0042   '<calc-constant>',
0043   '<calc-sum>',
0044   '<color-interpolation-method>',
0045   '<color>',
0046   '<custom-ident>',
0047   '<dashed-ident>',
0048   '<display-listitem>',
0049   '<display-inside>',
0050   '<dimension>',
0051   '<easing-function>'
0052   '<filter-function>',
0053   '<flex>',
0054   '<frequency-percentage>',
0055   '<frequency>',
0056   '<gradient>',
0057   '<hex-color>',
0058   '<hue>',
0059   '<hue-interpolation-method>',
0060   '<ident>',
0061   '<image>',
0062   '<integer>',
0063   '<length>',
0064   '<length-percentage>',
0065   '<number>',
0066   '<percentage>',
0067   '<position>',
0068   '<ratio>',
0069   '<resolution>',
0070   '<string>',
0071   '<time-percentage>',
0072   '<time>',
0073   '<transform-function>',
0074   '"!important"',
0075 }
0076 
0077 properties_ignore_value = (
0078   'counter-increment',
0079   'counter-reset',
0080   'counter-set',
0081   'text-rendering',
0082   'page',
0083 )
0084 
0085 
0086 units: list[str] = []
0087 colors: set[str] = set()
0088 system_colors: set[str] = set()
0089 deprecated_system_colors: set[str] = set()
0090 values: set[str] = set()
0091 properties: set[str] = set()
0092 svg_values: set[str] = set()
0093 svg_properties: set[str] = set()
0094 functions: set[str] = set()
0095 pseudo_classes: set[str] = set()
0096 pseudo_elements: set[str] = set()
0097 experimental_pseudo_classes: set[str] = set()
0098 experimental_pseudo_elements: set[str] = set()
0099 at_rules: set[str] = set()
0100 media_features: set[str] = set()
0101 media_feature_values: set[str] = set()
0102 
0103 
0104 _update_version_extractor = re.compile(r' version="(\d+)" ')
0105 
0106 def update_version(s: str) -> str:
0107   return _update_version_extractor.sub(lambda m: f' version="{int(m[1])+1}" ', s, count=1)
0108 
0109 
0110 _md_value_extractor = re.compile(r'(?<=[^\w][ /])`([-\w][-\w\d]+(?:<[^>]+>[?+*])?)`')
0111 _html_value_extractor = re.compile(r'<code>([-\w][-\w\d]+)</code>')
0112 _is_md_value = re.compile(r'^\s*- `')
0113 _is_html_table_desc = re.compile(r'^\s+<td><code>')
0114 
0115 def css_parse_values(f: TextIO, prop: str, values: set[str]) -> None:
0116   line:str = ''
0117   # Format:
0118   # ## Syntax or ### Syntax
0119   #
0120   # ```css
0121   # (optional)
0122   # ```
0123   # ## Values or ### Values or not...
0124   #
0125   # - `ident` or html table <td><code>....</code></td>
0126   #
0127   # ## SVG only ... (optional)
0128   # ## other title
0129   for line in f:
0130     if line.endswith('## Syntax\n') or line.endswith('## Values\n') or '## SVG only' in line:
0131       for line in f:
0132         if _is_md_value.match(line):
0133           if 'deprecated' not in line:
0134             values.update(_md_value_extractor.findall(line))
0135         elif line.startswith('#'):
0136           if not (line.endswith('## Values\n') or '## SVG only' in line
0137                   or (prop == 'display'
0138                       and (line.endswith('## Grouped values\n')
0139                            or line.endswith('## Outside\n')
0140                            or line.endswith('## Inside\n')
0141                            or line.endswith('## List Item\n')
0142                            or line.endswith('## Internal\n')
0143                            or line.endswith('## Box\n')
0144                            or line.endswith('## Precomposed\n')
0145                            ))
0146           ):
0147             return
0148         elif line == '```css\n':
0149           for line in f:
0150             if line.startswith('```\n'):
0151               break
0152         elif _is_html_table_desc.match(line):
0153           values.update(_html_value_extractor.findall(line))
0154 
0155 
0156 def css_parse_named_colors(f: TextIO) -> set[str]:
0157   return set(re.findall('\n      <td>(?:\n        )?<code>([a-z]+)</code>', f.read()))
0158 
0159 
0160 def css_parse_units(f: TextIO) -> list[str]:
0161   return re.findall(r'`([^`]+)`', ''.join(re.findall(r'\n\| (`[^|]+)', f.read())))
0162 
0163 
0164 _svg_values_extractor = re.compile(r'<th scope="row">Value</th>\n\s*<td>(.*?)</td>', re.DOTALL)
0165 _svg_value_extractor = re.compile(r'<code>([-\w\d]+)</code>')
0166 
0167 def css_parse_svg_attribute(f: TextIO, prop: str, properties: set[str], values: set[str]) -> None:
0168   contents = f.read()
0169   if 'can be used as a CSS property' in contents:
0170     properties.add(prop)
0171     m = _svg_values_extractor.search(contents)
0172     if m:
0173       values.update(_svg_value_extractor.findall(m[1]))
0174 
0175 
0176 _experimental_selector_extractor = re.compile(r'\n- {{CSSxRef([^}]+)}} {{Experimental_Inline}}')
0177 _selector_extractor = re.compile(r'":+([-\w\d]+)[()]*"')
0178 
0179 def css_parse_pseudo_classes_or_elements(f: TextIO) -> tuple[
0180   set[str],  # experimental
0181   list[str]
0182 ]:
0183   s = f.read()
0184   experimental_str = ''.join(_experimental_selector_extractor.findall(s))
0185   return (set(_selector_extractor.findall(experimental_str)), _selector_extractor.findall(s))
0186 
0187 
0188 if len(sys.argv) < 5:
0189   print(f'''{Path(sys.argv[0]).name} content-main-directory syntax/css.xml sass-site-directory syntax/scss.xml
0190 
0191 content-main-directory is https://github.com/mdn/content/ (https://github.com/mdn/content/archive/refs/heads/main.zip)
0192 sass-site-directory is https://github.com/sass/sass-site/tree/main (https://github.com/sass/sass-site/archive/refs/heads/main.zip)
0193 ''', file=sys.stderr)
0194   exit(1)
0195 
0196 css_dir = Path(sys.argv[1])
0197 css_filename = Path(sys.argv[2])
0198 scss_dir = Path(sys.argv[3])
0199 scss_filename = Path(sys.argv[4])
0200 
0201 
0202 tmp_pseudo_classes = (set(), ())
0203 tmp_pseudo_elements = (set(), ())
0204 
0205 for pattern in (
0206   'files/en-us/web/svg/attribute/**/',
0207   'files/en-us/web/css/**/',
0208 ):
0209   for md in css_dir.glob(pattern):
0210     with open(md/'index.md', encoding='utf8') as f:
0211       if f.readline() != '---\n':
0212         continue
0213 
0214       title = f.readline()[7:-1]
0215       if title in exclude_title:
0216         continue
0217 
0218       if title.startswith('"'):
0219         title = title[1:-1]
0220 
0221       page_type = ''
0222       for line in f:
0223         if line in exclude_line:
0224           page_type = ''
0225           break
0226 
0227         if line.startswith('page-type: '):
0228           if line not in page_type_accepted:
0229             raise Exception(f'Unknown {line[:-1]}')
0230           page_type = line[11:-1]
0231 
0232         if line == '---\n':
0233           break
0234 
0235       if page_type == 'css-property' or page_type == 'css-at-rule-descriptor':
0236         properties.add(title)
0237         if not title.endswith('-name') and title not in properties_ignore_value:
0238           css_parse_values(f, title, values)
0239       elif page_type == 'css-shorthand-property':
0240         properties.add(title)
0241       elif page_type == 'css-pseudo-class':
0242         pseudo_classes.add(title[1:].removesuffix('()'))
0243       elif page_type == 'css-pseudo-element':
0244         pseudo_elements.add(title[2:].removesuffix('()'))
0245       elif page_type == 'css-type':
0246         if title == '<named-color>':
0247           colors = css_parse_named_colors(f)
0248         if title == '<system-color>':
0249           css_parse_values(f, '', system_colors)
0250           deprecated_system_colors = set(re.findall('\n- `([^`]+)` {{deprecated_inline}}', f.read()))
0251         else:
0252           css_parse_values(f, '', values)
0253       elif page_type == 'css-function':
0254         functions.add(title[:-2])
0255       elif page_type == 'css-at-rule':
0256         at_rules.add(title)
0257       elif page_type == 'css-media-feature':
0258         media_features.add(title)
0259         css_parse_values(f, title, media_feature_values)
0260       elif page_type == 'css-keyword':
0261         values.add(title)
0262       elif title == 'CSS values and units':
0263         units = css_parse_units(f)
0264       elif title == 'Pseudo-classes':
0265         tmp_pseudo_classes = css_parse_pseudo_classes_or_elements(f)
0266       elif title == 'Pseudo-elements':
0267         tmp_pseudo_elements = css_parse_pseudo_classes_or_elements(f)
0268       elif page_type == 'svg-attribute':
0269         css_parse_svg_attribute(f, title, svg_properties, svg_values)
0270       elif title == 'CSS value functions':
0271         functions.update(re.findall(r'\n- {{CSSxRef\("[^"]+", "([-\w\d]+)\(\)"\)}}\n', f.read()))
0272 
0273 
0274 experimental_pseudo_classes = tmp_pseudo_classes[0]
0275 experimental_pseudo_classes -= pseudo_classes
0276 pseudo_classes.update(tmp_pseudo_classes[1])
0277 
0278 experimental_pseudo_elements = tmp_pseudo_elements[0]
0279 experimental_pseudo_elements -= pseudo_elements
0280 pseudo_elements.update(tmp_pseudo_elements[1])
0281 
0282 
0283 global_values = {
0284   'auto',
0285   'inherit',
0286   'initial',
0287   'revert',
0288   'revert-layer',
0289   'unset',
0290 }
0291 values -= global_values
0292 svg_values -= global_values
0293 pseudo_classes -= experimental_pseudo_classes
0294 pseudo_elements -= experimental_pseudo_elements
0295 
0296 # add values of functions
0297 values.update((
0298   # repeat()
0299   'auto-fill',
0300   'auto-fit',
0301 ))
0302 
0303 # move some keyword colors in values
0304 for special_color in ('transparent', 'currentcolor'):
0305   values.add(special_color)
0306   colors.discard(special_color)
0307 
0308 # fix not specified value in mdn file
0309 if 'user-invalid' in experimental_pseudo_classes:
0310   pseudo_classes.discard('user-valid')
0311   experimental_pseudo_classes.add('user-valid')
0312 media_features.update((
0313     'min-width',
0314     'max-width',
0315     'min-height',
0316     'max-height',
0317 ))
0318 
0319 # fix errors in mdn file
0320 for e in ('has', 'host-context'):
0321   pseudo_classes.add(e)
0322   experimental_pseudo_classes.discard(e)
0323 
0324 # @font-format functions
0325 functions.update((
0326     'format',
0327     'local',
0328     'tech',
0329 ))
0330 
0331 
0332 # def show(name, values):
0333 #   print(f'{name} ({len(values)}):')
0334 #   print('\n'.join(sorted(values)), end='\n\n')
0335 #
0336 # show('properties', properties)
0337 # show('svg properties', svg_properties)
0338 # show('values', values)
0339 # show('svg values', svg_values)
0340 # show('global values', global_values)
0341 # show('functions', functions)
0342 # show('pseudo-classes', pseudo_classes)
0343 # show('pseudo-elements', pseudo_elements)
0344 # show('experimental pseudo-classes', experimental_pseudo_classes)
0345 # show('experimental pseudo-elements', experimental_pseudo_elements)
0346 # show('at-rules', at_rules)
0347 # show('media-features', media_features)
0348 # show('media-features values', media_feature_values)
0349 # show('colors', colors)
0350 # show('system colors', system_colors)
0351 # show('deprecated system colors', deprecated_system_colors)
0352 # show('units', units)
0353 # print('units reg:', '|'.join(units))
0354 
0355 
0356 #
0357 # Update CSS
0358 #
0359 
0360 sep = '\n            '
0361 css_replacements = {
0362   prop: f'</item>{sep}<item>'.join(sorted(seq))
0363   for prop, seq in (
0364     ('properties', properties),
0365     ('values', values),
0366     ('value keywords', global_values),
0367     ('functions', functions),
0368     ('pseudo-classes', pseudo_classes),
0369     ('pseudo-elements', pseudo_elements),
0370     ('media features', media_features)
0371   )
0372 }
0373 for prop, seq in (('properties', svg_properties - properties), ('values', svg_values - values)):
0374     if seq:
0375         items = f'</item>{sep}<item>'.join(sorted(seq))
0376         css_replacements[prop] += f'</item>\n{sep}<!-- SVG only -->\n{sep}<item>{items}'
0377 
0378 rep1 = f'</item>{sep}<item>'.join(sorted(colors))
0379 rep2 = f'</item>{sep}<item>'.join(sorted(system_colors))
0380 css_replacements['colors'] = f'{rep1}</item>{sep}{sep}<!-- System colors -->{sep}<item>{rep2}'
0381 
0382 item_extractor = re.compile('<item>([^-<][^<]*)')
0383 
0384 current_at_rules = set()
0385 
0386 def _css_update_and_extract_items(m) -> str:
0387   seq = css_replacements.get(m[1])
0388   if seq:
0389     end = '        ' if m[3] == '</list>' else sep
0390     return f'<list name="{m[1]}">{sep}<item>{seq}</item>\n{end}{m[3]}'
0391 
0392   current_at_rules.update(item_extractor.findall(m[2]))
0393   return m[0]
0394 
0395 
0396 css_content = css_filename.read_text()
0397 original_css_content = css_content
0398 
0399 names = f"{'|'.join(css_replacements)}|at-rules(?: definitions)?"
0400 css_content = re.sub(rf'<list name="({names})">(.*?)(</list>|<!-- manual list -->)',
0401                      _css_update_and_extract_items, css_content, flags=re.DOTALL)
0402 
0403 _regexpr_unit_prefix = r'(<RegExpr attribute="Unit".*?String="\(%\|\()'
0404 regexpr_unit_extractor = re.compile(fr'{_regexpr_unit_prefix}([^)]+)')
0405 
0406 css_content = regexpr_unit_extractor.sub('\\1' + "|".join(units), css_content, 1)
0407 
0408 if original_css_content != css_content:
0409   css_content = update_version(css_content)
0410   css_filename.write_text(css_content)
0411 
0412 
0413 def show_at_rule_difference(language: str, old_at_rules: set[str], new_at_rules: set[str]) -> None:
0414   at_rule_added = new_at_rules - old_at_rules
0415   at_rule_removed = old_at_rules - new_at_rules
0416   nl = '\n  '
0417   if at_rule_added or at_rule_removed:
0418     print(f"""\x1b[31m{language} At-rules requires a manual update
0419 New ({len(at_rule_added)}):\x1b[0m
0420   {nl.join(at_rule_added)}
0421 \x1b[31mRemoved ({len(at_rule_removed)}):\x1b[0m
0422   {nl.join(at_rule_removed)}""")
0423 
0424 show_at_rule_difference('CSS', current_at_rules, at_rules)
0425 
0426 #
0427 # Extract SCSS data
0428 #
0429 
0430 scss_functions:list[str] = []
0431 scss_at_rules:set[str] = {'@content', '@return'}
0432 
0433 _function_list_extractor = re.compile(r'{% function (.*?) %}')
0434 _function_extractor = re.compile(r"'([-._a-zA-Z0-9]+)\(")
0435 _at_rule_extractor = re.compile(r'@[-a-z0-9]+')
0436 
0437 for md in sorted(scss_dir.glob('source/documentation/modules/**/*.md')):
0438   func_list = _function_list_extractor.findall(md.read_text())
0439   func_items = set(_function_extractor.findall(''.join(func_list)))
0440   scss_functions.append(f'\n{sep}<!-- {md.stem} -->')
0441   scss_functions.extend(f'{sep}<item>{func}</item>' for func in sorted(func_items - functions))
0442 
0443 for md in scss_dir.glob('source/documentation/at-rules/**/*.md'):
0444   with open(md) as f:
0445     f.readline()
0446     scss_at_rules.update(_at_rule_extractor.findall(f.readline()))
0447 
0448 subproperties = set(
0449   '-'.join(splitted[i:n])
0450   for prop in properties
0451   for splitted in (prop.rsplit('-', prop.count('-') - 1)  # '-aaa-bbb' -> ['-aaa', 'bbb']
0452                    if prop.startswith('-')
0453                    else prop.split('-'), )  # 'aaa-bbb' -> ['aaa', 'bbb']
0454   for i in range(len(splitted))
0455   for n in range(i+1, len(splitted)+1)
0456 )
0457 
0458 #
0459 # Update SCSS
0460 #
0461 
0462 scss_current_at_rules = set()
0463 
0464 def _scss_update_and_extract_items(m) -> str:
0465   name = m[1]
0466 
0467   if name == 'functions':
0468     return f"""<list name="functions">
0469             <include>functions##CSS</include>
0470 
0471             <!-- https://sass-lang.com/documentation/modules/ -->{f''.join(scss_functions)}
0472         </list>"""
0473 
0474   if name == 'at-rules':
0475     scss_current_at_rules.update(_at_rule_extractor.findall(m[2]))
0476     return m[0]
0477 
0478   # sub-properties
0479   items = f'</item>{sep}<item>'.join(sorted(subproperties - properties))
0480   return f'<list name="{name}">{sep}<item>{items}</item>\n        </list>'
0481 
0482 scss_content = scss_filename.read_text()
0483 original_scss_content = scss_content
0484 
0485 scss_content = re.sub(r'<list name="(sub-properties|functions|at-rules)">(.*?)</list>',
0486                       _scss_update_and_extract_items, scss_content, count=3, flags=re.DOTALL)
0487 
0488 scss_content = re.sub(r'<!ENTITY pseudoclasses "[^"]*">',
0489                       f'<!ENTITY pseudoclasses "{"|".join(sorted(pseudo_classes))}">',
0490                       scss_content, count=1)
0491 
0492 scss_content = regexpr_unit_extractor.sub('\\1' + "|".join(units), scss_content, 1)
0493 
0494 if original_scss_content != scss_content:
0495   scss_content = update_version(scss_content)
0496   scss_filename.write_text(scss_content)
0497 
0498 show_at_rule_difference('SCSS', scss_current_at_rules, scss_at_rules)