Warning, /education/step/step/data/extractxml is written in an unsupported language. File is not indexed.
0001 #!/usr/bin/env python3 0002 # 0003 # This file is part of Step. 0004 # SPDX-FileCopyrightText: 2009 Vladimir Kuznetsov <ks.vladimir@gmail.com> 0005 # 0006 # SPDX-License-Identifier: GPL-2.0-or-later 0007 0008 import xml.parsers.expat 0009 import xml.sax.saxutils 0010 from io import BytesIO 0011 import optparse 0012 import tempfile 0013 import gettext 0014 import locale 0015 import copy 0016 import sys 0017 import re 0018 import os 0019 0020 # current python gettext module does not 0021 # support contexts, so we implement them ourself 0022 class GNUTranslations(gettext.GNUTranslations): 0023 # The encoding of a msgctxt and a msgid in a .mo file is 0024 # msgctxt + "\x04" + msgid (gettext version >= 0.15) 0025 CONTEXT_ENCODING = "%s\x04%s" 0026 0027 def upgettext(self, context, message): 0028 ctxt_message_id = self.CONTEXT_ENCODING % (context, message) 0029 missing = object() 0030 tmsg = self._catalog.get(ctxt_message_id, missing) 0031 if tmsg is missing: 0032 if self._fallback: 0033 return self._fallback.upgettext(context, message) 0034 return str(message) 0035 return tmsg 0036 0037 class XmlFileTranslator(object): 0038 def __init__(self, opt): 0039 self.opt = opt 0040 self.tag_regex = [] 0041 for r in self.opt.tag_regex: 0042 self.tag_regex.append(re.compile(r)) 0043 0044 def init_parser(self): 0045 self.parser = xml.parsers.expat.ParserCreate() 0046 self.parser.ordered_attributes = 1 0047 self.parser.DefaultHandler = self.default_handler 0048 self.parser.StartElementHandler = self.start_element_handler 0049 self.parser.EndElementHandler = self.end_element_handler 0050 0051 def parse(self, infile): 0052 try: 0053 self.parser.ParseFile(infile) 0054 except xml.parsers.expat.ExpatError as e: 0055 raise 0056 0057 def translate(self, infile_name, infile, outfile, 0058 translation, i18n_stack_base=[], i18n_line_base=0): 0059 self.i18n_file = infile_name 0060 self.outfile = outfile 0061 self.translation = translation 0062 0063 self.i18n_line_base = i18n_line_base 0064 self.i18n_stack_base = i18n_stack_base 0065 self.i18n_stack = [] 0066 self.i18n_save = False 0067 self.i18n_string = '' 0068 0069 self.init_parser() 0070 self.parse(infile) 0071 0072 def extract(self, infile_name, infile, outfile, 0073 i18n_stack_base=[], i18n_line_base=0): 0074 self.i18n_file = infile_name 0075 self.outfile = outfile 0076 self.translation = None 0077 0078 self.i18n_line_base = i18n_line_base 0079 self.i18n_stack_base = [] 0080 self.i18n_stack = [] 0081 self.i18n_save = False 0082 self.i18n_string = '' 0083 0084 self.init_parser() 0085 self.parse(infile) 0086 0087 def parse_unquoted_substring(self, string): 0088 infile = BytesIO(string.encode('UTF-8')) 0089 translator1 = XmlFileTranslator(self.opt.parse_unquoted) 0090 if self.opt.extract: 0091 translator1.extract(self.i18n_file, infile, 0092 self.outfile, self.i18n_stack, self.i18n_stack[-1]['line']) 0093 else: 0094 outfile = BytesIO() 0095 translator1.translate(self.i18n_file, infile, 0096 outfile, self.translation, self.i18n_stack, self.i18n_stack[-1]['line']) 0097 string = outfile.getvalue().decode() 0098 if self.opt.unquote: 0099 string = self.quote_str(string) 0100 self.outfile.write(self.encode_utf8(string)) 0101 0102 def quote_str(self, s): 0103 return s.replace('&', '&').replace('"', '"') \ 0104 .replace('>', '>').replace('<', '<') 0105 0106 def unquote_str(self, s): 0107 return s.replace('<', '<').replace('>', '>') \ 0108 .replace('"', '"').replace('&', '&') 0109 0110 def encode_str(self, s): 0111 return '"' + s.replace('\\', '\\\\').replace('\"', '\\"') \ 0112 .replace('\r', '\\r').replace('\n', '\\n"\n"') + '"' 0113 0114 def encode_utf8(self, s): 0115 if isinstance(s, str): 0116 return s.encode() 0117 else: 0118 return s 0119 0120 def select_context(self, patterns, attr): 0121 for pattern in patterns: 0122 try: 0123 return pattern % attr 0124 except (KeyError, ValueError): 0125 pass 0126 0127 def write_data(self, data): 0128 if self.i18n_save: 0129 self.i18n_string += data 0130 elif self.translation is not None: 0131 self.outfile.write(self.encode_utf8(data)) 0132 0133 def write_i18n(self): 0134 string = self.i18n_string 0135 if self.opt.unquote: 0136 string = self.unquote_str(string) 0137 0138 if self.opt.unquote and self.opt.parse_unquoted \ 0139 and string.lstrip().startswith('<'): 0140 self.parse_unquoted_substring(string) 0141 0142 else: 0143 if self.opt.strip: 0144 string0 = self.i18n_string.lstrip() 0145 begin_string = self.i18n_string[:-len(string0)] 0146 string = string0.rstrip() 0147 end_string = string0[len(string):] 0148 else: 0149 string = self.i18n_string 0150 begin_string = end_string = '' 0151 0152 if not string: 0153 return 0154 0155 info = {'file': self.i18n_file, \ 0156 'filename': os.path.basename(self.i18n_file)} 0157 for n in range(2): 0158 try: 0159 d = self.i18n_stack[-1-n] 0160 except IndexError: 0161 break 0162 p = '../'*n 0163 info[p+'tag'] = d['name'] 0164 info[p+'line'] = d['line'] 0165 for aname, avalue in d['attr'].items(): 0166 info[p+'attr/'+aname] = avalue 0167 0168 ectx = self.select_context(self.opt.ectx, info) 0169 context = self.select_context(self.opt.context, info) 0170 0171 if self.translation is not None: 0172 if context: 0173 string = self.translation.upgettext(context, string) 0174 else: 0175 string = self.translation.ugettext(string) 0176 0177 if self.opt.unquote: 0178 string = self.quote_str(string) 0179 self.outfile.write(self.encode_utf8(begin_string + string + end_string)) 0180 0181 else: 0182 self.outfile.write(self.encode_utf8('%s i18n: file: %s:%d\n' % \ 0183 (self.opt.cstart, self.i18n_file, info['line']))) 0184 0185 if ectx: 0186 self.outfile.write(self.encode_utf8('%s i18n: ectx: %s\n' % \ 0187 (self.opt.cstart, ectx))) 0188 0189 if context: 0190 self.outfile.write(self.encode_utf8('i18nc(%s, %s)\n' % \ 0191 (self.encode_str(context), self.encode_str(string)))) 0192 else: 0193 self.outfile.write(self.encode_utf8('i18n(%s)\n' % \ 0194 (self.encode_str(string),))) 0195 0196 def default_handler(self, data): 0197 self.write_data(data) 0198 0199 def start_element_handler(self, name, attr): 0200 data = '<' + name 0201 attr_dict = {} 0202 for n in range(0, len(attr), 2): 0203 attr_dict[attr[n]] = attr[n+1] 0204 data += ' %s=%s' % (attr[n], xml.sax.saxutils.quoteattr(attr[n+1])) 0205 data += '>' 0206 0207 match = False 0208 if name in self.opt.tag: 0209 match = True 0210 else: 0211 for regex in self.tag_regex: 0212 if regex.search(name): 0213 match = True 0214 break 0215 0216 if self.i18n_stack and self.opt.recursive: 0217 if match: 0218 self.write_i18n() 0219 self.i18n_string = '' 0220 self.i18n_save = False 0221 0222 self.write_data(data) 0223 0224 if match: 0225 self.i18n_stack.append(dict(name=name, attr=attr_dict, 0226 line=self.i18n_line_base+self.parser.CurrentLineNumber)) 0227 self.i18n_save = True 0228 0229 def end_element_handler(self, name): 0230 if self.i18n_stack and self.i18n_stack[-1]['name'] == name: 0231 if self.opt.recursive or len(self.i18n_stack) == 1: 0232 self.write_i18n() 0233 self.i18n_string = '' 0234 self.i18n_save = False 0235 self.i18n_stack.pop() 0236 0237 self.write_data('</%s>' % (name,)) 0238 0239 if self.i18n_stack: 0240 self.i18n_stack[-1]['line'] = self.i18n_line_base + \ 0241 self.parser.CurrentLineNumber 0242 self.i18n_save = True 0243 0244 def safe_remove(fname): 0245 try: 0246 os.remove(fname) 0247 except (IOError, OSError): 0248 pass 0249 0250 def open_mo_file(opt, mo_file_name, remove=False): 0251 try: 0252 mo_file = open(mo_file_name, 'rb') 0253 except IOError as e: 0254 sys.stderr.write('Cannot open .mo file: %s\n' % (str(e),)) 0255 mo_file.close() 0256 if remove: 0257 safe_remove(mo_file_name) 0258 sys.exit(1) 0259 0260 try: 0261 translation = GNUTranslations(mo_file) 0262 except IOError as e: 0263 sys.stderr.write('Cannot parse .mo file: %s\n' % (str(e),)) 0264 mo_file.close() 0265 if remove: 0266 safe_remove(mo_file_name) 0267 sys.exit(1) 0268 0269 mo_file.close() 0270 if remove: 0271 safe_remove(mo_file_name) 0272 0273 return translation 0274 0275 def compile_po_file(opt, po_file_name): 0276 (mo_file_id, mo_file_name) = tempfile.mkstemp(suffix='.mo') 0277 os.close(mo_file_id) 0278 msgfmt_cmd = 'msgfmt "%s" -o "%s"' % (po_file_name, mo_file_name) 0279 0280 if os.system(msgfmt_cmd): 0281 sys.stderr.write('Error running msgfmt\n') 0282 sys.exit(1) 0283 0284 return open_mo_file(opt, mo_file_name, remove=True) 0285 0286 def decode_options(options, str_options): 0287 enc = locale.getdefaultlocale()[1] or 'UTF8' 0288 for name in str_options: 0289 opt = getattr(options, name) 0290 if isinstance(opt, str): 0291 opt = opt 0292 elif isinstance(opt, list): 0293 opt = [x for x in opt] 0294 setattr(options, name, opt) 0295 0296 def main(): 0297 format_options = [ 0298 optparse.make_option('-n', '--tag', action='append', default=[], 0299 help='Extract TAG constants as i18n string. ' + \ 0300 'Repeat this option to specify multiple tags'), 0301 optparse.make_option('-x', '--tag-regex', action='append', default=[], 0302 help='Extract contents of all tags matching TAG_REGEX as i18n string. ' + \ 0303 'Repeat this option to specify multiple regex'), 0304 optparse.make_option('-r', '--recursive', action='store_true', default=False, 0305 help='Recursively pass i18n tags. This means that children tags ' + \ 0306 'will be extracted separately even if parent is also i18n-enabled'), 0307 optparse.make_option('-s', '--strip', action='store_true', default=False, 0308 help='Strip leading and trailing whitespaces of i18n strings'), 0309 optparse.make_option('-q', '--unquote', action='store_true', default=False, 0310 help='Unquote XML-quoted entities on extraction ' + \ 0311 'and quote them back when translating'), 0312 optparse.make_option('--parse-unquoted', default=None, metavar='PARSE_UNQUOTED_OPTIONS', 0313 help='Parse unquoted strings using PARSE_UNQUOTED_OPTIONS as options. ' 0314 'This option is useful when XML file contains quoted HTML fragments') 0315 ] 0316 context_options = [ 0317 optparse.make_option('--context', action='append', default=[], 0318 help='Pattern to generate context. ' + \ 0319 'TODO: pattern syntax. ' + \ 0320 'If specified multiple times, the first matching pattern will be used'), 0321 optparse.make_option('--ectx', action='append', default=[], 0322 help='Pattern to generate ectx. Format is the same as in --context') 0323 ] 0324 0325 0326 optparser = optparse.OptionParser(usage='\n\t%prog --extract [options] XML_FILE...\n' + \ 0327 '\t%prog --translate [options] XML_FILE...') 0328 0329 optparser.add_option('-e', '--extract', action='store_true', default=False, 0330 help='Extract i18n strings from xml files') 0331 optparser.add_option('-t', '--translate', action='store_true', default=False, 0332 help='Translate i18n strings in xml files') 0333 0334 optgroup_format = optparse.OptionGroup(optparser, 'Formatting options') 0335 list(map(optgroup_format.add_option, copy.deepcopy(format_options))) 0336 list(map(optgroup_format.add_option, copy.deepcopy(context_options))) 0337 optparser.add_option_group(optgroup_format) 0338 0339 optgroup_extract = optparse.OptionGroup(optparser, 'Options for extracting messages') 0340 optgroup_extract.add_option('--cstart', default='//', 0341 help='A string to used to start the comment') 0342 optgroup_extract.add_option('--output', help='Output file for extracted messages') 0343 optgroup_extract.add_option('--xgettext', action='store_true', help='Execute xgettext after extracting messages') 0344 optgroup_extract.add_option('--xgettext-args', 0345 default='-ki18n -ki18nc:1c,2 -ci18n --no-location --from-code=UTF-8', 0346 help='Arguments for xgettext (overrides the defaults)') 0347 optgroup_extract.add_option('--xgettext-extra-args', default='', 0348 help='Additional arguments for xgettext (appends to the defaults)') 0349 optparser.add_option_group(optgroup_extract) 0350 0351 optgroup_translate = optparse.OptionGroup(optparser, 'Options for translating messages') 0352 optgroup_translate.add_option('--po-file', help='A file with translations') 0353 optgroup_translate.add_option('--mo-file', help='A file with translations') 0354 optgroup_translate.add_option('--output-dir', default='./i18n', 0355 help='A directory to output translated files') 0356 optparser.add_option_group(optgroup_translate) 0357 0358 opt, args = optparser.parse_args() 0359 decode_options(opt, ('tag', 'tag_regex', 'context', 'ectx')) 0360 0361 if not args: 0362 optparser.error('no xml files was specified') 0363 0364 if opt.extract and opt.translate: 0365 optparser.error('options --extract and --translate are mutually exclusive') 0366 0367 if not opt.extract and not opt.translate: 0368 optparser.error('please specify either --extract or --translate option') 0369 0370 if opt.parse_unquoted is not None: 0371 optparser1 = optparse.OptionParser(usage='%prog --parse-unquoted="[options]"') 0372 options = copy.deepcopy(format_options+context_options) 0373 list(map(optparser1.add_option, options)) 0374 opt1, args1 = optparser1.parse_args(opt.parse_unquoted.split(' ')) 0375 decode_options(opt1, ('tag', 'tag_regex', 'context', 'ectx')) 0376 if args1: 0377 optparser1.error('unexpected argument') 0378 opt.parse_unquoted = copy.deepcopy(opt) 0379 for option in options: 0380 setattr(opt.parse_unquoted, option.dest, 0381 getattr(opt1, option.dest)) 0382 opt.parse_unquoted.parse_unquoted = None 0383 0384 if opt.extract: 0385 if opt.xgettext: 0386 (tmp_id, tmp_fname) = tempfile.mkstemp(suffix='.cc') 0387 os.close(tmp_id) 0388 outfile = open(tmp_fname, 'wb') 0389 else: 0390 if opt.output: 0391 try: 0392 outfile = open(opt.output, 'wb') 0393 except IOError as e: 0394 optparser.error('can not open output file: ' + str(e)) 0395 else: 0396 outfile = sys.stdout.buffer 0397 else: 0398 if not opt.po_file and not opt.mo_file: 0399 optparser.error('please specify either --po-file or --mo-file option for translation') 0400 0401 if opt.po_file: 0402 gnutranslation = compile_po_file(opt, opt.po_file) 0403 else: 0404 gnutranslation = open_mo_file(opt, opt.mo_file) 0405 0406 if not os.path.isdir(opt.output_dir): 0407 try: 0408 os.mkdir(opt.output_dir) 0409 except IOError as e: 0410 sys.stderr.write('Cannot create output directory: %s\n' % (str(e),)) 0411 sys.exit(1) 0412 0413 translator = XmlFileTranslator(opt) 0414 for fname in args: 0415 try: 0416 infile = open(fname, 'rb') 0417 except IOError as e: 0418 sys.stderr.write('can not open input file: %s\n' % (str(e),)) 0419 sys.exit(1) 0420 0421 if opt.extract: 0422 try: 0423 translator.extract(fname, infile, outfile) 0424 except xml.parsers.expat.ExpatError as e: 0425 sys.stderr.write('cannot parse file %s: %s\n' % (fname, str(e))) 0426 sys.exit(1) 0427 0428 else: 0429 outfile_name = os.path.join(opt.output_dir, os.path.basename(fname)) 0430 try: 0431 outfile = open(outfile_name, 'wb') 0432 except IOError as e: 0433 sys.stderr.write('cannot open output file: %s\n' % (str(e),)) 0434 sys.exit(1) 0435 0436 try: 0437 translator.translate(fname, infile, outfile, gnutranslation) 0438 except xml.parsers.expat.ExpatError as e: 0439 sys.stderr.write('can not parse file %s: %s\n' % (fname, str(e))) 0440 sys.exit(1) 0441 0442 0443 infile.close() 0444 0445 if outfile: 0446 outfile.close() 0447 0448 if opt.extract and opt.xgettext: 0449 xgettext_cmd = 'xgettext ' + opt.xgettext_args 0450 xgettext_cmd += ' ' + opt.xgettext_extra_args 0451 if opt.output: 0452 xgettext_cmd += ' --output="' + opt.output + '"' 0453 else: 0454 xgettext_cmd += ' --output=-' 0455 xgettext_cmd += ' "' + tmp_fname + '"' 0456 ret = os.system(xgettext_cmd) 0457 if ret != 0: 0458 sys.stderr.write('error running xgettext: exit code = %d' % (ret,)) 0459 sys.exit(1) 0460 0461 if __name__ == '__main__': 0462 main() 0463