File indexing completed on 2024-04-28 13:40:39
0001 #!/usr/bin/env python3 0002 # -*- coding: UTF-8 -*- 0003 0004 import os 0005 import re 0006 import sys 0007 0008 from pygments import highlight 0009 from pygments.formatters import HtmlFormatter 0010 from pygments.lexers import get_lexer_by_name 0011 from pygments.lexer import RegexLexer, bygroups, include 0012 from pygments.token import Keyword, Comment, Name, String, Text, Number, Generic 0013 from pygments.util import ClassNotFound 0014 0015 0016 _cmd = os.path.basename(sys.argv[0]) 0017 0018 0019 def main (): 0020 0021 for infile in sys.argv[1:]: 0022 add_html_highlight(infile) 0023 0024 0025 def add_html_highlight (infile): 0026 0027 ifh = open(infile) 0028 htmlstr = ifh.read() 0029 ifh.close() 0030 0031 pre_rx = re.compile(r"(<pre .*?>)" 0032 r"\s*<!--\s*language:\s*(\S*)\s*-->\s*" 0033 r"(.*?)" 0034 r"(</pre>)", 0035 re.S|re.U) 0036 p = 0 0037 segs = [] 0038 while True: 0039 m = pre_rx.search(htmlstr, p) 0040 if m is None: 0041 segs.append(htmlstr[p:]) 0042 break 0043 p1, p2 = m.span() 0044 segs.append(htmlstr[p:p1]) 0045 otag, language, snippet, ctag = m.groups() 0046 try: 0047 lexer = get_custom_lexer_by_name(language) 0048 if lexer is None: 0049 lexer = get_lexer_by_name(language) 0050 except ClassNotFound: 0051 seg = snippet 0052 warning("Unknown language '%s'." % language) 0053 lexer = None 0054 if lexer: 0055 snippet, tags = hide_tags(snippet) 0056 snippet = unescape_xml(snippet) 0057 seg = highlight(snippet, lexer, HtmlFormatter(nowrap=True)) 0058 seg = unhide_tags(seg, tags) 0059 segs.extend((otag, seg, ctag)) 0060 p = p2 0061 htmlstr_mod = "".join(segs) 0062 0063 ofh = open(infile, "w") 0064 ofh.write(htmlstr_mod) 0065 ofh.close() 0066 0067 0068 def warning (msg): 0069 0070 sys.stderr.write("%s: [warning] %s\n" % (_cmd, msg)) 0071 0072 0073 def unescape_xml (s): 0074 0075 s = s.replace("<", "<") 0076 s = s.replace(">", ">") 0077 s = s.replace("'", "'") 0078 s = s.replace(""", '"') 0079 s = s.replace("&", "&") 0080 return s 0081 0082 0083 _hide_tags_rx = re.compile(r"<.*?>", re.S|re.U) 0084 _hide_tags_rseq = "⌒" 0085 0086 def hide_tags (s): 0087 0088 tags = _hide_tags_rx.findall(s) 0089 s = _hide_tags_rx.sub(_hide_tags_rseq, s) 0090 return s, tags 0091 0092 0093 def unhide_tags (s, tags): 0094 0095 segs = [] 0096 i = 0 0097 p1 = 0 0098 while True: 0099 p2 = s.find(_hide_tags_rseq, p1) 0100 if p2 < 0: 0101 p2 = len(s) 0102 segs.append(s[p1:p2]) 0103 if p2 == len(s): 0104 break 0105 assert i < len(tags) 0106 segs.append(tags[i]) 0107 i += 1 0108 p1 = p2 + len(_hide_tags_rseq) 0109 assert i == len(tags) 0110 s = "".join(segs) 0111 return s 0112 0113 0114 _custom_lexers = set() 0115 0116 def get_custom_lexer_by_name (language): 0117 0118 for lexer_type in _custom_lexers: 0119 if language in lexer_type.aliases: 0120 return lexer_type() 0121 return None 0122 0123 0124 from pygments.lexers import GettextLexer 0125 class GettextXLexer (GettextLexer): 0126 pass 0127 GettextXLexer.tokens = { 0128 'root': [ 0129 (r'^#,\s.*?$', Name.Decorator), 0130 (r'^#:\s.*?$', Name.Label), 0131 (r'^#\|\s*(msgid_plural|msgid)\s*"', Comment.Single, 'prevstring'), 0132 (r'^(#|#\.\s|#\|\s|#~\s|#\s).*$', Comment.Single), 0133 (r'^(msgstr\[)(\d)(\])', 0134 bygroups(Name.Variable, Number.Integer, Name.Variable)), 0135 (r'^(msgctxt|msgid_plural|msgid|msgstr|msgscr)', 0136 bygroups(Name.Variable)), 0137 (r'"', String, 'string'), 0138 (r'^\.\.\.$', Text), # for cutting out intermediate messages 0139 (r'\u2060', Text), # for not splitting on empty line in POT extraction 0140 (r'\s+', Text), 0141 ], 0142 'string': [ 0143 (r'\\.', String.Escape), 0144 (r'\{\{|\}\}', String.Escape), 0145 (r'\{-.*?-\}', Generic.Deleted), 0146 (r'\{\+.*?\+\}', Generic.Inserted), 0147 (r'\{([a-z].*?|)\}', String.Interpol), 0148 (r'%[ -+]?\d*\.?\d*[idufFgGeEcs%]', String.Interpol), 0149 (r'<(?=[\w/])', String.Other, 'tag'), 0150 (r'~~', String.Escape), 0151 (r'~', String.Other), 0152 (r'\$\[', String.Symbol, 'script'), 0153 (r'"', String, '#pop'), 0154 (r'.', String), 0155 ], 0156 'prevstring': [ 0157 (r'\{-.*?-\}', Generic.Deleted), 0158 (r'\{\+.*?\+\}', Generic.Inserted), 0159 (r'"', Comment.Single, '#pop'), 0160 (r'.', Comment.Single), 0161 ], 0162 'tag': [ 0163 (r'>', String.Other, '#pop'), 0164 (r'.', String.Other), 0165 ], 0166 'script': [ 0167 (r'\]', String.Symbol, '#pop'), 0168 (r"''", String.Escape), 0169 (r"'", String.Symbol, 'scriptquote'), 0170 include('string'), 0171 ], 0172 'scriptquote': [ 0173 (r"''", String.Escape), 0174 (r"'", String.Symbol, '#pop'), 0175 include('string'), 0176 ], 0177 } 0178 _custom_lexers.add(GettextXLexer) 0179 0180 0181 from pygments.lexers import CppLexer 0182 class CppXLexer (CppLexer): 0183 pass 0184 CppXLexer.tokens = CppLexer.tokens.copy() 0185 CppXLexer.tokens.update({ 0186 'string': [ 0187 (r'"', String, '#pop'), 0188 (r'\\([\\abfnrtv"\']|x[a-fA-F0-9]{2,4}|[0-7]{1,3})', String.Escape), 0189 (r'%(\([a-zA-Z0-9_]+\))?[-#0 +]*([0-9]+|[*])?(\.([0-9]+|[*]))?' 0190 r'[hlL]?[diouxXeEfFgGcrs%]', String.Interpol), 0191 (r'\{\{|\}\}', String.Escape), 0192 (r'\{.*?\}', String.Interpol), 0193 (r'%\d+', String.Interpol), 0194 (r'[^\\"\n%{}]+', String), # all other characters 0195 (r'\\\n', String), # line continuation 0196 (r'\\', String), # stray backslash 0197 (r'[%{}]', String), 0198 ], 0199 }) 0200 _custom_lexers.add(CppXLexer) 0201 0202 0203 from pygments.lexers import PythonLexer 0204 class PythonXLexer (PythonLexer): 0205 pass 0206 PythonXLexer.tokens.update({ 0207 'strings': [ 0208 (r'%(\([a-zA-Z0-9_]+\))?[-#0 +]*([0-9]+|[*])?(\.([0-9]+|[*]))?' 0209 r'[hlL]?[diouxXeEfFgGcrs%]', String.Interpol), 0210 (r'\{\{|\}\}', String.Escape), 0211 (r'\{.*?\}', String.Interpol), 0212 (r'[^\\\'"%{}\n]+', String), 0213 (r'[\'"\\]', String), 0214 (r'[%{}]', String), 0215 ], 0216 }) 0217 _custom_lexers.add(PythonXLexer) 0218 0219 0220 if __name__ == "__main__": 0221 main()