Warning, file /frameworks/syntax-highlighting/utils/lexers_matcher/match_languages.py was not indexed or was modified since last indexation (in which case cross-reference links may be missing, inaccurate or erroneous).
0001 #!/usr/bin/env python3 0002 0003 """ 0004 Copyright (c) 2022 Rafał Lalik <rafallalik@gmail.com> 0005 0006 Permission is hereby granted, free of charge, to any person obtaining a copy 0007 of this software and associated documentation files (the "Software"), to deal 0008 in the Software without restriction, including without limitation the rights 0009 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 0010 copies of the Software, and to permit persons to whom the Software is 0011 furnished to do so, subject to the following conditions: 0012 0013 The above copyright notice and this permission notice shall be included in all 0014 copies or substantial portions of the Software. 0015 0016 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 0017 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 0018 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 0019 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 0020 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 0021 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 0022 SOFTWARE. 0023 """ 0024 0025 import argparse 0026 import xml.etree.ElementTree as ET 0027 import pygments.lexers as pyglex 0028 import re 0029 import yaml 0030 from itertools import combinations 0031 from colorama import Fore, Back, Style 0032 #from fuzzywuzzy import fuzz 0033 0034 def main(): 0035 parser = argparse.ArgumentParser() 0036 parser.add_argument('-o', '--output', help='output file', type=str, default="lexers_found.xml") 0037 parser.add_argument('-v', '--verbose', help='verbose mode', action='store_true', default=False) 0038 opts, args = parser.parse_known_args() 0039 if opts.verbose: 0040 print("Options: ", opts, args) 0041 0042 with open('config.yml', 'r') as file: 0043 config = yaml.safe_load(file) 0044 print(config) 0045 0046 pygment_lexers = list(pyglex.get_all_lexers()) 0047 0048 kde_langs = [] 0049 0050 for xmlfile in args: 0051 if xmlfile == 'latex.xml': 0052 continue 0053 #print(f"Read {xmlfile}") 0054 rc = parseXML(xmlfile) 0055 if rc is not None: 0056 print(f"Found '{rc}' language syntax file") 0057 kde_langs.append(rc) 0058 0059 if opts.verbose: 0060 print("***\nKDE languages: ", kde_langs) 0061 print("***\nPygments lexers: ", pygment_lexers, f"total = {len(pygment_lexers)}") 0062 print("\n***\nSearch for KDE lexer") 0063 0064 matched_lexers = [] 0065 minted_used_lexers = set() 0066 for kde_lang in kde_langs: 0067 lower_kl = kde_lang.lower() 0068 0069 if kde_lang in config['minted_mapping']: 0070 val_of_minted_mapping = config['minted_mapping'][kde_lang] 0071 if opts.verbose: 0072 print(Fore.YELLOW + f"+ Mapped KDE lang '{kde_lang}' to minted lang '{val_of_minted_mapping}'" + Style.RESET_ALL) 0073 0074 lexers = list(filter(lambda x: x[0] == val_of_minted_mapping, pygment_lexers)) 0075 if len(lexers): 0076 #print(f"Lexers are {lexers[0][1]} from {pygment_lexers} and {kde_lang}") 0077 selected_lexers = filter_minted_lexers(lexers[0], config['minted_blacklisted'], minted_used_lexers) 0078 0079 matched_lexers.append([kde_lang, val_of_minted_mapping, selected_lexers]) 0080 else: 0081 if opts.verbose: 0082 print(Fore.RED + f"No lexers from {pygment_lexers} and {kde_lang}" + Style.RESET_ALL) 0083 continue 0084 0085 key_of_minted_matching = list(filter(lambda x: x[0].casefold() == kde_lang.casefold(), pygment_lexers)) 0086 if len(key_of_minted_matching): 0087 if opts.verbose: 0088 print(Fore.GREEN + f"+ Matched KDE lang '{kde_lang}' to minted lang '{key_of_minted_matching[0][0]}'" + Style.RESET_ALL) 0089 0090 selected_lexers = filter_minted_lexers(key_of_minted_matching[0], config['minted_blacklisted'], minted_used_lexers) 0091 0092 matched_lexers.append([kde_lang, key_of_minted_matching[0][0], selected_lexers]) 0093 continue 0094 0095 #klm = max(kde_langs, key=lambda x: fuzz.ratio(ll[0].lower(), x.lower())) 0096 #klf = fuzz.ratio(ll[0].lower(), klm.lower()) 0097 ##print(f" Fuzz znalazł '{klm}'={klf} dla szukanego '{ll[1]}'") 0098 #if klf > int(config['fuzz_level']): 0099 #print(Fore.YELLOW + f"+ Fuzzed lexer '{ll[0]}' ({klf}) lang '{ll[1]}' with KDE lang '{klm}'" + Style.RESET_ALL) 0100 ##matched_lexers.append([ll[0], klm, ll[1]]) 0101 #else: 0102 if opts.verbose: 0103 print(Fore.RED + f"- Lexer for KDE lang '{kde_lang}' not found" + Style.RESET_ALL) 0104 0105 generate_output(matched_lexers, opts.output) 0106 0107 0108 def generate_output(lexers, filename): 0109 print("\n***\nGenerate outputs") 0110 0111 used_codes = [] 0112 with open(filename, "w") as f: 0113 f.write(' <list name="MintedCodeLang">\n') 0114 last_lang = None 0115 0116 for code,lang,ll in lexers: 0117 if last_lang is None or last_lang is not lang: 0118 f.write(f" <!-- {code} lexers -->\n") 0119 0120 for l in ll: 0121 f.write(f" <item>{l}code*</item>\n") 0122 f.write(f" <item>{l}code</item>\n") 0123 f.write(' </list>\n') 0124 0125 used_langs = [] 0126 mintenv_list = [] 0127 f.write('\n\n\n\n\n') 0128 f.write(' <!-- environment type 5: minted environment created with newminted -->\n') 0129 f.write(' <context name="MintedCodeEnv" attribute="Environment" lineEndContext="#stay" fallthroughContext="#pop#pop#pop#pop">\n') 0130 for code,lang,ll in lexers: 0131 for l in ll: 0132 f.write(f' <WordDetect String="{l}code*" attribute="Environment" context="Highlighting{code}CodeEnvS"/>\n'); 0133 f.write(f' <WordDetect String="{l}code" attribute="Environment" context="Highlighting{code}CodeEnv"/>\n'); 0134 0135 mintenv_list.append(f' <WordDetect String="{l}" insensitive="true" context="Highlighting{code}CodeEnv"/>\n') 0136 0137 f.write(''' <RegExpr String=".+code\*" attribute="Environment" context="UnknownHighlightingCodeEnvS"/> 0138 <RegExpr String=".+code" attribute="Environment" context="UnknownHighlightingCodeEnv"/> 0139 </context>\n\n''') 0140 0141 0142 f.write(' <context name="HighlightingSelector" attribute="Normal Text" lineEndContext="#stay">\n') 0143 f.write(''.join(mintenv_list)) 0144 f.write(''' <AnyChar String="}]" context="#pop!UnknownHighlightingBegin"/> 0145 <RegExpr String="[^]}]*" context="#stay"/> 0146 </context> 0147 ''') 0148 f.write('\n') 0149 f.write(''' <context name="HighlightingCommon" attribute="Normal Text" lineEndContext="#stay"> 0150 <RegExpr String="\\\\end\s*\{(?:lstlisting|minted|[a-zA-Z]+code)\*?\}" attribute="Structure" lookAhead="true" context="#pop#pop#pop#pop#pop#pop"/> 0151 </context>\n''') 0152 0153 0154 for code,lang,ll in lexers: 0155 f.write(''' 0156 <context name="Highlighting{0}CodeEnvS" attribute="Error" lineEndContext="#stay"> 0157 <DetectSpaces/> 0158 <DetectChar char="{2}" attribute="Normal Text" context="#pop!Highlighting{0}CodeEnvSParam"/> 0159 </context> 0160 <context name="Highlighting{0}CodeEnvSParam" attribute="Error" lineEndContext="#stay"> 0161 <DetectSpaces attribute="Normal Text"/> 0162 <DetectChar char="{1}" attribute="Normal Text" context="Highlighting{0}CodeEnvSParamInside"/> 0163 <IncludeRules context="FindComments"/> 0164 </context> 0165 <context name="Highlighting{0}CodeEnvSParamInside" attribute="Normal Text" lineEndContext="#stay"> 0166 <DetectSpaces/> 0167 <DetectIdentifier/> 0168 <DetectChar char="{2}" attribute="Normal Text" context="#pop!HighlightingBegin{0}"/> 0169 <IncludeRules context="FindComments"/> 0170 <RegExpr String="\&envname;" attribute="Macro" context="#stay"/> 0171 </context> 0172 <context name="Highlighting{0}CodeEnv" attribute="Normal Text" lineEndContext="#stay"> 0173 <DetectChar char="{2}" context="HighlightingBegin{0}"/> 0174 <RegExpr String="[^{2}]*" attribute="Normal Text" context="#stay"/> 0175 </context> 0176 <context name="HighlightingBegin{0}" attribute="Normal Text" lineEndContext="#pop!Highlighting{0}"> 0177 <DetectSpaces/> 0178 <RegExpr String=".+" attribute="Error" context="#stay"/> 0179 </context> 0180 <context name="Highlighting{0}" attribute="Normal Text" lineEndContext="#stay"> 0181 <IncludeRules context="HighlightingCommon"/> 0182 <IncludeRules context="##{0}" includeAttrib="true"/> 0183 </context> 0184 '''.format(code, '{', '}')) 0185 0186 f.write(' <!-- end of mintedcode environment -->\n') 0187 0188 0189 def filter_minted_lexers(lexers_list, blacklist, used_lexers): 0190 """Loop over lexers, filter out blacklisted and simplify names, pick up uniques""" 0191 selected = set() 0192 current_set = set() 0193 0194 name_filter = r"^[^a-z]+|[^a-z]+$" 0195 0196 for ll in lexers_list[1]: 0197 if ll in blacklist: 0198 print(f" Ignore blacklisted '{ll[1]}' lexer") 0199 continue 0200 0201 new_ll = ll 0202 #rc = re.search(name_filter, new_ll) 0203 #new_ll = re.sub(r"[^a-z]", "", ll[0]) 0204 new_ll = re.sub(r"\+\+", "pp", new_ll) 0205 new_ll = re.sub(r"\#", "sharp", new_ll) 0206 new_ll = re.sub(r"[^a-z]", "", new_ll) 0207 #new_ll = re.sub(r"[0-9]", "", new_ll) 0208 0209 if new_ll != ll: 0210 print(f" {ll} => {new_ll}" + Fore.YELLOW + " - Replaced" + Style.RESET_ALL) 0211 0212 rc = re.search(name_filter, new_ll) 0213 if rc is not None: 0214 print(f" {ll} => {new_ll}" + Fore.RED + " - Removed" + Style.RESET_ALL) 0215 continue 0216 0217 if new_ll in used_lexers: 0218 print(f" {ll} => {new_ll}" + Fore.RED + " - Ignored" + Style.RESET_ALL) 0219 continue 0220 0221 print(f" {ll} => {new_ll}" + Fore.GREEN + " - Added" + Style.RESET_ALL) 0222 selected.add(new_ll) 0223 0224 used_lexers.update(selected) # update set of all lexers 0225 return sorted(selected) 0226 0227 0228 def search_lexer(langname, pygment_lexers): 0229 for l in pygment_lexers: 0230 #print(l) 0231 if langname.lower() in (x.lower() for x in l[1]): 0232 return l[0] 0233 0234 return None 0235 0236 0237 def split_lexers(lexers): 0238 """Get pygments language with lexers and create all lexers list""" 0239 lexers_list = [] 0240 for l in lexers: 0241 for ll in l[1]: 0242 lexers_list.append([ll, l[0], False]) 0243 0244 return lexers_list 0245 0246 0247 def parseXML(xmlfile): 0248 tree = ET.parse(xmlfile) 0249 root = tree.getroot(); 0250 if root.tag == 'language': 0251 langname = root.attrib['name'] 0252 rc = root.find('highlighting') 0253 if rc is None: 0254 if opts.verbose: 0255 print(f"Language {langname} has no highlightng") 0256 return None 0257 0258 return langname 0259 return None 0260 0261 if __name__ == '__main__': 0262 main()