File indexing completed on 2024-05-19 04:03:43
0001 #!/usr/bin/env python3 0002 # SPDX-FileCopyrightText: 2023 Jonathan Poelen <jonathan.poelen@gmail.com> 0003 # SPDX-License-Identifier: MIT 0004 0005 from pathlib import Path 0006 from collections import defaultdict 0007 from typing import TextIO 0008 import re 0009 import sys 0010 0011 0012 exclude_line = { 0013 ' - non-standard\n', 0014 ' - experimental\n', 0015 ' - deprecated\n', 0016 'page-type: css-combinator\n', 0017 'page-type: css-selector\n', 0018 'page-type: css-module\n', 0019 'page-type: landing-page\n', 0020 'page-type: guide\n', 0021 } 0022 0023 page_type_accepted = { 0024 'page-type: css-type\n', 0025 'page-type: css-function\n', 0026 'page-type: css-property\n', 0027 'page-type: css-keyword\n', 0028 'page-type: css-shorthand-property\n', 0029 'page-type: css-pseudo-element\n', 0030 'page-type: css-pseudo-class\n', 0031 'page-type: css-at-rule-descriptor\n', 0032 'page-type: css-at-rule\n', 0033 'page-type: css-media-feature\n', 0034 'page-type: svg-attribute\n', 0035 } 0036 0037 exclude_title = { 0038 '<alpha-value>', 0039 '<angle>', 0040 '<angle-percentage>', 0041 '<basic-shape>', 0042 '<calc-constant>', 0043 '<calc-sum>', 0044 '<color-interpolation-method>', 0045 '<color>', 0046 '<custom-ident>', 0047 '<dashed-ident>', 0048 '<display-listitem>', 0049 '<display-inside>', 0050 '<dimension>', 0051 '<easing-function>' 0052 '<filter-function>', 0053 '<flex>', 0054 '<frequency-percentage>', 0055 '<frequency>', 0056 '<gradient>', 0057 '<hex-color>', 0058 '<hue>', 0059 '<hue-interpolation-method>', 0060 '<ident>', 0061 '<image>', 0062 '<integer>', 0063 '<length>', 0064 '<length-percentage>', 0065 '<number>', 0066 '<percentage>', 0067 '<position>', 0068 '<ratio>', 0069 '<resolution>', 0070 '<string>', 0071 '<time-percentage>', 0072 '<time>', 0073 '<transform-function>', 0074 '"!important"', 0075 } 0076 0077 properties_ignore_value = ( 0078 'counter-increment', 0079 'counter-reset', 0080 'counter-set', 0081 'text-rendering', 0082 'page', 0083 ) 0084 0085 0086 units: list[str] = [] 0087 colors: set[str] = set() 0088 system_colors: set[str] = set() 0089 deprecated_system_colors: set[str] = set() 0090 values: set[str] = set() 0091 properties: set[str] = set() 0092 svg_values: set[str] = set() 0093 svg_properties: set[str] = set() 0094 functions: set[str] = set() 0095 pseudo_classes: set[str] = set() 0096 pseudo_elements: set[str] = set() 0097 experimental_pseudo_classes: set[str] = set() 0098 experimental_pseudo_elements: set[str] = set() 0099 at_rules: set[str] = set() 0100 media_features: set[str] = set() 0101 media_feature_values: set[str] = set() 0102 0103 0104 _update_version_extractor = re.compile(r' version="(\d+)" ') 0105 0106 def update_version(s: str) -> str: 0107 return _update_version_extractor.sub(lambda m: f' version="{int(m[1])+1}" ', s, count=1) 0108 0109 0110 _md_value_extractor = re.compile(r'(?<=[^\w][ /])`([-\w][-\w\d]+(?:<[^>]+>[?+*])?)`') 0111 _html_value_extractor = re.compile(r'<code>([-\w][-\w\d]+)</code>') 0112 _is_md_value = re.compile(r'^\s*- `') 0113 _is_html_table_desc = re.compile(r'^\s+<td><code>') 0114 0115 def css_parse_values(f: TextIO, prop: str, values: set[str]) -> None: 0116 line:str = '' 0117 # Format: 0118 # ## Syntax or ### Syntax 0119 # 0120 # ```css 0121 # (optional) 0122 # ``` 0123 # ## Values or ### Values or not... 0124 # 0125 # - `ident` or html table <td><code>....</code></td> 0126 # 0127 # ## SVG only ... (optional) 0128 # ## other title 0129 for line in f: 0130 if line.endswith('## Syntax\n') or line.endswith('## Values\n') or '## SVG only' in line: 0131 for line in f: 0132 if _is_md_value.match(line): 0133 if 'deprecated' not in line: 0134 values.update(_md_value_extractor.findall(line)) 0135 elif line.startswith('#'): 0136 if not (line.endswith('## Values\n') or '## SVG only' in line 0137 or (prop == 'display' 0138 and (line.endswith('## Grouped values\n') 0139 or line.endswith('## Outside\n') 0140 or line.endswith('## Inside\n') 0141 or line.endswith('## List Item\n') 0142 or line.endswith('## Internal\n') 0143 or line.endswith('## Box\n') 0144 or line.endswith('## Precomposed\n') 0145 )) 0146 ): 0147 return 0148 elif line == '```css\n': 0149 for line in f: 0150 if line.startswith('```\n'): 0151 break 0152 elif _is_html_table_desc.match(line): 0153 values.update(_html_value_extractor.findall(line)) 0154 0155 0156 def css_parse_named_colors(f: TextIO) -> set[str]: 0157 return set(re.findall('\n <td>(?:\n )?<code>([a-z]+)</code>', f.read())) 0158 0159 0160 def css_parse_units(f: TextIO) -> list[str]: 0161 return re.findall(r'`([^`]+)`', ''.join(re.findall(r'\n\| (`[^|]+)', f.read()))) 0162 0163 0164 _svg_values_extractor = re.compile(r'<th scope="row">Value</th>\n\s*<td>(.*?)</td>', re.DOTALL) 0165 _svg_value_extractor = re.compile(r'<code>([-\w\d]+)</code>') 0166 0167 def css_parse_svg_attribute(f: TextIO, prop: str, properties: set[str], values: set[str]) -> None: 0168 contents = f.read() 0169 if 'can be used as a CSS property' in contents: 0170 properties.add(prop) 0171 m = _svg_values_extractor.search(contents) 0172 if m: 0173 values.update(_svg_value_extractor.findall(m[1])) 0174 0175 0176 _experimental_selector_extractor = re.compile(r'\n- {{CSSxRef([^}]+)}} {{Experimental_Inline}}') 0177 _selector_extractor = re.compile(r'":+([-\w\d]+)[()]*"') 0178 0179 def css_parse_pseudo_classes_or_elements(f: TextIO) -> tuple[ 0180 set[str], # experimental 0181 list[str] 0182 ]: 0183 s = f.read() 0184 experimental_str = ''.join(_experimental_selector_extractor.findall(s)) 0185 return (set(_selector_extractor.findall(experimental_str)), _selector_extractor.findall(s)) 0186 0187 0188 if len(sys.argv) < 5: 0189 print(f'''{Path(sys.argv[0]).name} content-main-directory syntax/css.xml sass-site-directory syntax/scss.xml 0190 0191 content-main-directory is https://github.com/mdn/content/ (https://github.com/mdn/content/archive/refs/heads/main.zip) 0192 sass-site-directory is https://github.com/sass/sass-site/tree/main (https://github.com/sass/sass-site/archive/refs/heads/main.zip) 0193 ''', file=sys.stderr) 0194 exit(1) 0195 0196 css_dir = Path(sys.argv[1]) 0197 css_filename = Path(sys.argv[2]) 0198 scss_dir = Path(sys.argv[3]) 0199 scss_filename = Path(sys.argv[4]) 0200 0201 0202 tmp_pseudo_classes = (set(), ()) 0203 tmp_pseudo_elements = (set(), ()) 0204 0205 for pattern in ( 0206 'files/en-us/web/svg/attribute/**/', 0207 'files/en-us/web/css/**/', 0208 ): 0209 for md in css_dir.glob(pattern): 0210 with open(md/'index.md', encoding='utf8') as f: 0211 if f.readline() != '---\n': 0212 continue 0213 0214 title = f.readline()[7:-1] 0215 if title in exclude_title: 0216 continue 0217 0218 if title.startswith('"'): 0219 title = title[1:-1] 0220 0221 page_type = '' 0222 for line in f: 0223 if line in exclude_line: 0224 page_type = '' 0225 break 0226 0227 if line.startswith('page-type: '): 0228 if line not in page_type_accepted: 0229 raise Exception(f'Unknown {line[:-1]}') 0230 page_type = line[11:-1] 0231 0232 if line == '---\n': 0233 break 0234 0235 if page_type == 'css-property' or page_type == 'css-at-rule-descriptor': 0236 properties.add(title) 0237 if not title.endswith('-name') and title not in properties_ignore_value: 0238 css_parse_values(f, title, values) 0239 elif page_type == 'css-shorthand-property': 0240 properties.add(title) 0241 elif page_type == 'css-pseudo-class': 0242 pseudo_classes.add(title[1:].removesuffix('()')) 0243 elif page_type == 'css-pseudo-element': 0244 pseudo_elements.add(title[2:].removesuffix('()')) 0245 elif page_type == 'css-type': 0246 if title == '<named-color>': 0247 colors = css_parse_named_colors(f) 0248 if title == '<system-color>': 0249 css_parse_values(f, '', system_colors) 0250 deprecated_system_colors = set(re.findall('\n- `([^`]+)` {{deprecated_inline}}', f.read())) 0251 else: 0252 css_parse_values(f, '', values) 0253 elif page_type == 'css-function': 0254 functions.add(title[:-2]) 0255 elif page_type == 'css-at-rule': 0256 at_rules.add(title) 0257 elif page_type == 'css-media-feature': 0258 media_features.add(title) 0259 css_parse_values(f, title, media_feature_values) 0260 elif page_type == 'css-keyword': 0261 values.add(title) 0262 elif title == 'CSS values and units': 0263 units = css_parse_units(f) 0264 elif title == 'Pseudo-classes': 0265 tmp_pseudo_classes = css_parse_pseudo_classes_or_elements(f) 0266 elif title == 'Pseudo-elements': 0267 tmp_pseudo_elements = css_parse_pseudo_classes_or_elements(f) 0268 elif page_type == 'svg-attribute': 0269 css_parse_svg_attribute(f, title, svg_properties, svg_values) 0270 elif title == 'CSS value functions': 0271 functions.update(re.findall(r'\n- {{CSSxRef\("[^"]+", "([-\w\d]+)\(\)"\)}}\n', f.read())) 0272 0273 0274 experimental_pseudo_classes = tmp_pseudo_classes[0] 0275 experimental_pseudo_classes -= pseudo_classes 0276 pseudo_classes.update(tmp_pseudo_classes[1]) 0277 0278 experimental_pseudo_elements = tmp_pseudo_elements[0] 0279 experimental_pseudo_elements -= pseudo_elements 0280 pseudo_elements.update(tmp_pseudo_elements[1]) 0281 0282 0283 global_values = { 0284 'auto', 0285 'inherit', 0286 'initial', 0287 'revert', 0288 'revert-layer', 0289 'unset', 0290 } 0291 values -= global_values 0292 svg_values -= global_values 0293 pseudo_classes -= experimental_pseudo_classes 0294 pseudo_elements -= experimental_pseudo_elements 0295 0296 # add values of functions 0297 values.update(( 0298 # repeat() 0299 'auto-fill', 0300 'auto-fit', 0301 )) 0302 0303 # move some keyword colors in values 0304 for special_color in ('transparent', 'currentcolor'): 0305 values.add(special_color) 0306 colors.discard(special_color) 0307 0308 # fix not specified value in mdn file 0309 if 'user-invalid' in experimental_pseudo_classes: 0310 pseudo_classes.discard('user-valid') 0311 experimental_pseudo_classes.add('user-valid') 0312 media_features.update(( 0313 'min-width', 0314 'max-width', 0315 'min-height', 0316 'max-height', 0317 )) 0318 0319 # fix errors in mdn file 0320 for e in ('has', 'host-context'): 0321 pseudo_classes.add(e) 0322 experimental_pseudo_classes.discard(e) 0323 0324 # @font-format functions 0325 functions.update(( 0326 'format', 0327 'local', 0328 'tech', 0329 )) 0330 0331 0332 # def show(name, values): 0333 # print(f'{name} ({len(values)}):') 0334 # print('\n'.join(sorted(values)), end='\n\n') 0335 # 0336 # show('properties', properties) 0337 # show('svg properties', svg_properties) 0338 # show('values', values) 0339 # show('svg values', svg_values) 0340 # show('global values', global_values) 0341 # show('functions', functions) 0342 # show('pseudo-classes', pseudo_classes) 0343 # show('pseudo-elements', pseudo_elements) 0344 # show('experimental pseudo-classes', experimental_pseudo_classes) 0345 # show('experimental pseudo-elements', experimental_pseudo_elements) 0346 # show('at-rules', at_rules) 0347 # show('media-features', media_features) 0348 # show('media-features values', media_feature_values) 0349 # show('colors', colors) 0350 # show('system colors', system_colors) 0351 # show('deprecated system colors', deprecated_system_colors) 0352 # show('units', units) 0353 # print('units reg:', '|'.join(units)) 0354 0355 0356 # 0357 # Update CSS 0358 # 0359 0360 sep = '\n ' 0361 css_replacements = { 0362 prop: f'</item>{sep}<item>'.join(sorted(seq)) 0363 for prop, seq in ( 0364 ('properties', properties), 0365 ('values', values), 0366 ('value keywords', global_values), 0367 ('functions', functions), 0368 ('pseudo-classes', pseudo_classes), 0369 ('pseudo-elements', pseudo_elements), 0370 ('media features', media_features) 0371 ) 0372 } 0373 for prop, seq in (('properties', svg_properties - properties), ('values', svg_values - values)): 0374 if seq: 0375 items = f'</item>{sep}<item>'.join(sorted(seq)) 0376 css_replacements[prop] += f'</item>\n{sep}<!-- SVG only -->\n{sep}<item>{items}' 0377 0378 rep1 = f'</item>{sep}<item>'.join(sorted(colors)) 0379 rep2 = f'</item>{sep}<item>'.join(sorted(system_colors)) 0380 css_replacements['colors'] = f'{rep1}</item>{sep}{sep}<!-- System colors -->{sep}<item>{rep2}' 0381 0382 item_extractor = re.compile('<item>([^-<][^<]*)') 0383 0384 current_at_rules = set() 0385 0386 def _css_update_and_extract_items(m) -> str: 0387 seq = css_replacements.get(m[1]) 0388 if seq: 0389 end = ' ' if m[3] == '</list>' else sep 0390 return f'<list name="{m[1]}">{sep}<item>{seq}</item>\n{end}{m[3]}' 0391 0392 current_at_rules.update(item_extractor.findall(m[2])) 0393 return m[0] 0394 0395 0396 css_content = css_filename.read_text() 0397 original_css_content = css_content 0398 0399 names = f"{'|'.join(css_replacements)}|at-rules(?: definitions)?" 0400 css_content = re.sub(rf'<list name="({names})">(.*?)(</list>|<!-- manual list -->)', 0401 _css_update_and_extract_items, css_content, flags=re.DOTALL) 0402 0403 _regexpr_unit_prefix = r'(<RegExpr attribute="Unit".*?String="\(%\|\()' 0404 regexpr_unit_extractor = re.compile(fr'{_regexpr_unit_prefix}([^)]+)') 0405 0406 css_content = regexpr_unit_extractor.sub('\\1' + "|".join(units), css_content, 1) 0407 0408 if original_css_content != css_content: 0409 css_content = update_version(css_content) 0410 css_filename.write_text(css_content) 0411 0412 0413 def show_at_rule_difference(language: str, old_at_rules: set[str], new_at_rules: set[str]) -> None: 0414 at_rule_added = new_at_rules - old_at_rules 0415 at_rule_removed = old_at_rules - new_at_rules 0416 nl = '\n ' 0417 if at_rule_added or at_rule_removed: 0418 print(f"""\x1b[31m{language} At-rules requires a manual update 0419 New ({len(at_rule_added)}):\x1b[0m 0420 {nl.join(at_rule_added)} 0421 \x1b[31mRemoved ({len(at_rule_removed)}):\x1b[0m 0422 {nl.join(at_rule_removed)}""") 0423 0424 show_at_rule_difference('CSS', current_at_rules, at_rules) 0425 0426 # 0427 # Extract SCSS data 0428 # 0429 0430 scss_functions:list[str] = [] 0431 scss_at_rules:set[str] = {'@content', '@return'} 0432 0433 _function_list_extractor = re.compile(r'{% function (.*?) %}') 0434 _function_extractor = re.compile(r"'([-._a-zA-Z0-9]+)\(") 0435 _at_rule_extractor = re.compile(r'@[-a-z0-9]+') 0436 0437 for md in sorted(scss_dir.glob('source/documentation/modules/**/*.md')): 0438 func_list = _function_list_extractor.findall(md.read_text()) 0439 func_items = set(_function_extractor.findall(''.join(func_list))) 0440 scss_functions.append(f'\n{sep}<!-- {md.stem} -->') 0441 scss_functions.extend(f'{sep}<item>{func}</item>' for func in sorted(func_items - functions)) 0442 0443 for md in scss_dir.glob('source/documentation/at-rules/**/*.md'): 0444 with open(md) as f: 0445 f.readline() 0446 scss_at_rules.update(_at_rule_extractor.findall(f.readline())) 0447 0448 subproperties = set( 0449 '-'.join(splitted[i:n]) 0450 for prop in properties 0451 for splitted in (prop.rsplit('-', prop.count('-') - 1) # '-aaa-bbb' -> ['-aaa', 'bbb'] 0452 if prop.startswith('-') 0453 else prop.split('-'), ) # 'aaa-bbb' -> ['aaa', 'bbb'] 0454 for i in range(len(splitted)) 0455 for n in range(i+1, len(splitted)+1) 0456 ) 0457 0458 # 0459 # Update SCSS 0460 # 0461 0462 scss_current_at_rules = set() 0463 0464 def _scss_update_and_extract_items(m) -> str: 0465 name = m[1] 0466 0467 if name == 'functions': 0468 return f"""<list name="functions"> 0469 <include>functions##CSS</include> 0470 0471 <!-- https://sass-lang.com/documentation/modules/ -->{f''.join(scss_functions)} 0472 </list>""" 0473 0474 if name == 'at-rules': 0475 scss_current_at_rules.update(_at_rule_extractor.findall(m[2])) 0476 return m[0] 0477 0478 # sub-properties 0479 items = f'</item>{sep}<item>'.join(sorted(subproperties - properties)) 0480 return f'<list name="{name}">{sep}<item>{items}</item>\n </list>' 0481 0482 scss_content = scss_filename.read_text() 0483 original_scss_content = scss_content 0484 0485 scss_content = re.sub(r'<list name="(sub-properties|functions|at-rules)">(.*?)</list>', 0486 _scss_update_and_extract_items, scss_content, count=3, flags=re.DOTALL) 0487 0488 scss_content = re.sub(r'<!ENTITY pseudoclasses "[^"]*">', 0489 f'<!ENTITY pseudoclasses "{"|".join(sorted(pseudo_classes))}">', 0490 scss_content, count=1) 0491 0492 scss_content = regexpr_unit_extractor.sub('\\1' + "|".join(units), scss_content, 1) 0493 0494 if original_scss_content != scss_content: 0495 scss_content = update_version(scss_content) 0496 scss_filename.write_text(scss_content) 0497 0498 show_at_rule_difference('SCSS', scss_current_at_rules, scss_at_rules)