Warning, file /sdk/kde-dev-scripts/kf5/resolve_kuit.py was not indexed or was modified since last indexation (in which case cross-reference links may be missing, inaccurate or erroneous).
0001 #!/usr/bin/env python 0002 # -*- coding: UTF-8 -*- 0003 0004 # Resolve KUIT markup in i18n strings into plain or rich text, 0005 # or switch them to xi18n calls. 0006 # 0007 # Usage: 0008 # resolve_kuit.py [OPTIONS] FILE_OR_DIRECTORY... 0009 # 0010 # By default, KUIT markup is resolved into plain or rich text. 0011 # To switch strings containing any KUIT markup to xi18n calls instead, 0012 # use -x option; to switch all strings to xi18n calls, use -X option. 0013 # For non-code files (.ui, .rc, etc.) -x behaves same like -X, 0014 # since there is no way to specify by string whether it is to be 0015 # passed through i18n or xi18n call at runtime. Instead this is specified 0016 # on the top level (per file, but normally for all such files in a project), 0017 # as described in the "Connecting Calls to Catalogs" section 0018 # of the ki18n Programmer's Guide. 0019 # 0020 # Files are modified in-place. Modified file paths are written to stdout. 0021 # If an argument is a directory, files from it are recursivelly collected. 0022 # Only files with known extensions are processed (even if file with unknown 0023 # extension is given directly in the command line, it will be ignored). 0024 # The list of known extensions by resolution type can be listed with 0025 # -k option. Option -s RESTYPE:EXT1[,EXT2...] can be used to register 0026 # additional extensions (without leading dot, case ignored) for given 0027 # resolution type. One extension may have several resolution types. 0028 # Files in version control bookkeeping directories are skipped. 0029 # 0030 # In C-like function call files (resolution type 'ccall'), 0031 # i18n strings are detected as arguments in calls with 0032 # *i18n, *i18nc, *i18np, and *i18ncp function names. 0033 # By default detection considers string arguments to be either single or 0034 # double quoted, call arguments can be split into several lines, and 0035 # strings are concatenated when separated only by whitespace. 0036 # Default set of quotes can be replaced by repeating the -q QUOTE option. 0037 # 0038 # In XML-like markup files (resolution type 'xml'), 0039 # i18n strings are detected as element texts, for a certain set of tags. 0040 # i18n contexts are detected as attributes to those elements, for a certain 0041 # set of attributes. These sets can be expanded using -T TAG1[,TAG2...] 0042 # and -A ATTR1[,ATTR2...] options. Case is ignored for both. 0043 # Markup inside the element text is expected to be XML-escaped (<, etc.), 0044 # i.e. the element text is first unescaped before resolution. 0045 # 0046 # In PO files (resolution type 'po'), i18n strings are detected 0047 # according to PO format. 0048 # To process PO files, the Pology library must be ready for use. 0049 # In msgstr fields, KUIT markup transformations for given language 0050 # are looked up in its kdelibs4.po. The pattern path to kdelibs4.po files, 0051 # which contains @lang@ placeholder, is given with -t PATTERN option. 0052 # This can be a local path or a HTTP URL (e.g. 0053 # https://websvn.kde.org/*checkout*/trunk/l10n-kde4/@lang@/messages/kdelibs/kdelibs4.po ). 0054 # Language of processed PO file is determined from its Language: header field. 0055 # If only PO files of one language are processed and they do not reliably 0056 # contain this field, the language can be forced with -l LANG option. 0057 # By default both the original and the translation fields are resolved, 0058 # which is appropriate when the PO file is being resolved before 0059 # it has been merged with new template resulting from the resolved code. 0060 # If an unresolved PO file has been merged with new template first, 0061 # then option -m should be issued to resolve only the translation fields. 0062 # In this case, on fuzzy messages, if previous original fields (which are 0063 # also resolved) and current original fields match after resolution, 0064 # the message is unfuzzied. 0065 # 0066 # For a given i18n string, the decision of whether to resolve KUIT markup 0067 # into plain or Qt rich text is made based on the context marker, 0068 # as described in KUIT documentation at 0069 # https://techbase.kde.org/Development/Tutorials/Localization/i18n_Semantics . 0070 # Target formats can also be manually specified for certain context markers 0071 # by repeating the -f option. E.g. -f @info:progress=rich would override 0072 # the default resolution into plain text for @info:progress i18n strings. 0073 # 0074 # NOTE: [INTERNAL] 0075 # If <html> tags are added on rich text(see top_tag_res variable), 0076 # then resolution must not be run over already resolved files. 0077 # Context markers will remain but format modifiers will be removed from them, 0078 # which may cause further modification in the second run. 0079 # 0080 # NOTE: [INTERNAL] 0081 # If <numid> tags are simply removed (see numid_tag_res variable), 0082 # a warning is issued on each removal to do something manually with 0083 # its associated argument, e.g. wrap it in QString::number(). 0084 # It is probably best to look for <numid> tags and handle their arguments 0085 # before running the resolution. 0086 0087 import locale 0088 import optparse 0089 import os 0090 import re 0091 import sys 0092 0093 0094 def main (): 0095 0096 opars = optparse.OptionParser( 0097 usage="%prog FILE_OR_DIRECTORY...", 0098 description="Resolve KUIT markup in i18n strings. " 0099 "Files are recursively searched for, " 0100 "and modified in place. " 0101 "C-like i18n calls are looked for in all files, " 0102 "except in PO files which are specially treated. " 0103 "WARNING: Do not run twice over same files.") 0104 opars.add_option( 0105 "-x", 0106 dest="switch_to_xi18n", action="store_const", default=0, const=1, 0107 help="Instead of resolving markup, switch i18n calls having " 0108 "some markup to xi18n calls.") 0109 opars.add_option( 0110 "-X", 0111 dest="switch_to_xi18n", action="store_const", default=0, const=2, 0112 help="Instead of resolving markup, switch all i18n calls " 0113 "to xi18n calls.") 0114 opars.add_option( 0115 "-f", 0116 dest="formats", action="append", default=[], 0117 metavar="MARKER=FORMAT", 0118 help="Set resolution into given target format for " 0119 "strings with this context marker. " 0120 "Target format can be one of: plain, rich. " 0121 "Option can be repeated.") 0122 opars.add_option( 0123 "-q", 0124 dest="quotes", action="append", default=[], 0125 metavar="QUOTE", 0126 help="Set opening and closing quote for string arguments " 0127 "in '%s' resolution type. " 0128 "Default is single and double quote. " 0129 "Option can be repeated." % "ccall") 0130 opars.add_option( 0131 "-s", 0132 dest="add_restype_exts", action="append", default=[], 0133 metavar="RESTYPE:EXT1[,EXT2...]", 0134 help="Set additional file name extension for given resolution type. " 0135 "Option can be repeated.") 0136 opars.add_option( 0137 "-T", 0138 dest="add_xml_texttags", action="store", default=None, 0139 metavar="TAG1[,TAG2...]", 0140 help="Set additional tags from which to collect text " 0141 "in '%s' resolution type." % "xml") 0142 opars.add_option( 0143 "-A", 0144 dest="add_xml_ctxtattrs", action="store", default=None, 0145 metavar="ATTR1[,ATTR2...]", 0146 help="Set additional attributes to consider as containing " 0147 "context in '%s' resolution type." % "xml") 0148 opars.add_option( 0149 "-t", 0150 dest="kdelibs4_path_pattern", action="store", default=None, 0151 metavar="PATH_PATTERN", 0152 help="The path pattern to kdelibs4.po files, " 0153 "which contains @lang@ placeholder. " 0154 "It can be a local path or HTTP URL. " 0155 "Needed only when processing PO files.") 0156 opars.add_option( 0157 "-l", 0158 dest="kdelibs4_lang", action="store", default=None, 0159 metavar="LANG", 0160 help="The language code of translated text in processed PO files, " 0161 "if it cannot be determined reliably from PO headers. " 0162 "When this option is in effect, PO files of exactly " 0163 "one language of translation must be proceesed.") 0164 opars.add_option( 0165 "-w", 0166 dest="msgfmt_wrap", action="store_true", default=False, 0167 help="Apply Gettext tools wrapping to PO files after resolving them.") 0168 opars.add_option( 0169 "-m", 0170 dest="post_merge", action="store_true", default=False, 0171 help="Resolve only translation fields in PO files. " 0172 "This is to be used when PO file is being resolved " 0173 "after it has been merged with template resulting " 0174 "from resolved code.") 0175 opars.add_option( 0176 "-I", 0177 dest="interface_wrap", action="store", default=None, 0178 metavar="HEAD_SEP", 0179 help="[undocumented]", 0180 ) 0181 opars.add_option( 0182 "-k", 0183 dest="list_restypes", action="store_true", default=False, 0184 help="List known resolution types and associated file extensions. " 0185 "It will include additions by '%s' option." % "-s", 0186 ) 0187 0188 options, args = opars.parse_args() 0189 0190 # Set additional resolution types. 0191 for rtextspec in options.add_restype_exts: 0192 lst = rtextspec.split(":", 1) 0193 if len(lst) != 2: 0194 raise StandardError( 0195 "Resolution specification '%s' given in command line " 0196 "is not valid." % rtextspec) 0197 rt, extspec = lst 0198 if rt not in _map_restype_ext: 0199 raise StandardError( 0200 "Unknown resolution type '%s' in resolution specification '%s' " 0201 "given in command line." % (rt, rtextspec)) 0202 exts = [e.lower() for e in extspec.split(",")] 0203 _map_restype_ext[rt][0].update(exts) 0204 if options.list_restypes: 0205 for rt, (exts, rf, ons) in _map_restype_ext.items(): 0206 report("%s: %s" % (rt, " ".join(sorted(exts)))) 0207 exit(1) 0208 0209 # Update target format by context marker specification. 0210 for fmtspec in options.formats: 0211 try: 0212 cmk, fmt = fmtspec.split("=", 1) 0213 except: 0214 raise StandardError( 0215 "Invalid target format specification '%s' " 0216 "given in command line." % fmtspec) 0217 if fmt not in _known_formats.values(): 0218 raise StandardError( 0219 "Unknown target format '%s' given in command line." % fmt) 0220 _cmarker_to_format[cmk] = fmt 0221 0222 # Set KUIT resolving options. 0223 if options.kdelibs4_path_pattern: 0224 if "@lang@" not in options.kdelibs4_path_pattern: 0225 raise StandardError( 0226 "Path pattern for kdelibs4.po files given in command line " 0227 "does not contain %s placeholder." % "@lang@") 0228 _kuit_spec.kdelibs4_path_pattern = options.kdelibs4_path_pattern 0229 _kuit_spec.force_lang = options.kdelibs4_lang 0230 _kuit_spec.interface_wrap = options.interface_wrap 0231 0232 # Set C-call resolving options. 0233 _ccall_options.switch_to_xi18n = options.switch_to_xi18n 0234 if options.quotes: 0235 squotes = list(reversed(sorted(options.quotes))) # longest first 0236 _ccall_options.quotes[:] = squotes 0237 0238 # Set XML resolving options. 0239 _xml_options.switch_to_xi18n = options.switch_to_xi18n 0240 if options.add_xml_texttags: 0241 tags = options.add_xml_texttags.split(",") 0242 _xml_options.text_tags.update(tags) 0243 if options.add_xml_ctxtattrs: 0244 attrs = options.add_xml_ctxtattrs.split(",") 0245 _xml_options.ctxt_attrs[:0] = attrs # higher priority 0246 0247 # Set PO resolving options. 0248 _po_options.switch_to_xi18n = options.switch_to_xi18n 0249 _po_options.msgfmt_wrap = options.msgfmt_wrap 0250 _po_options.post_merge = options.post_merge 0251 0252 # Collect all files. 0253 file_paths = [] 0254 for path in args: 0255 if os.path.isdir(path): 0256 for root, dirns, filens in os.walk(path): 0257 for filen in filens: 0258 file_paths.append(os.path.join(root, filen)) 0259 elif os.path.isfile(path): 0260 file_paths.append(path) 0261 else: 0262 raise StandardError( 0263 "Command line argument '%s' is neither a file " 0264 "nor a directory." % path) 0265 0266 # Filter out VCS bookkeeping. 0267 mod_file_paths = [] 0268 vcs_dirns = set(["CVS", ".svn", ".git"]) 0269 for fp in file_paths: 0270 els = set(fp.split(os.path.sep)) 0271 if not els.intersection(vcs_dirns): 0272 mod_file_paths.append(fp) 0273 file_paths = mod_file_paths 0274 0275 # Resolve files. 0276 file_paths.sort() 0277 test_encs = ["utf8", "iso8859-1", "iso8859-15", "cp1252"] 0278 for fp in file_paths: 0279 rspecs = get_resolvers_for_file(fp) 0280 modified = False 0281 for restype, resolvef, onstring in rspecs: 0282 if onstring: 0283 fstr = open(fp, "rb").read() 0284 badpos = -1 0285 for fenc in test_encs: 0286 try: 0287 fstr = fstr.decode(fenc) 0288 except UnicodeDecodeError, e: 0289 if badpos < 0: 0290 badpos = e.start 0291 else: 0292 badpos = -1 0293 break 0294 if badpos < 0: 0295 res_fstr = resolvef(fstr, fp) 0296 if res_fstr != fstr: 0297 tmpfp = fp + "~tmp" 0298 fh = open(tmpfp, "wb") 0299 fh.write(res_fstr.encode("utf8")) 0300 fh.close() 0301 os.rename(tmpfp, fp) 0302 modified = True 0303 else: 0304 warning("%s: Cannot decode file using any of " 0305 "test encodings (UTF-8 try produces problem " 0306 "in line %d, column %d), skipping it." 0307 % (fp, lno_to(fstr, badpos), cno_to(fstr, badpos))) 0308 else: 0309 if resolvef(fp): 0310 modified = True 0311 if modified: 0312 report(fp) 0313 0314 0315 def report (msg): 0316 0317 lenc = locale.getpreferredencoding() 0318 emsg = ("%s\n" % msg).encode(lenc) 0319 sys.stdout.write(emsg) 0320 0321 0322 def warning (msg): 0323 0324 lenc = locale.getpreferredencoding() 0325 emsg = ("[warning] %s\n" % msg).encode(lenc) 0326 sys.stderr.write(emsg) 0327 0328 0329 class Data: pass 0330 _kuit_spec = Data() 0331 _kuit_spec.kdelibs4_path_pattern = None 0332 _kuit_spec.force_lang = None 0333 _kuit_spec.interface_wrap = None 0334 _kuit_spec.langdata = {} 0335 0336 0337 _space_in_place_tag_rx = re.compile(r"(<[^>]*\S)(/\s*>)", re.U | re.S) 0338 0339 def get_language_data (lang): 0340 0341 langdata = _kuit_spec.langdata.get(lang) 0342 if langdata: 0343 return langdata 0344 0345 kl4cat = None 0346 if lang != "en_US": 0347 # Fetch kdelibs4.po for this catalog's language. 0348 if not _kuit_spec.kdelibs4_path_pattern: 0349 raise StandardError( 0350 "Path pattern for kdelibs4.po not set (-t option).") 0351 kl4path = _kuit_spec.kdelibs4_path_pattern.replace("@lang@", lang) 0352 from urllib import urlopen 0353 kl4fh = urlopen(kl4path) 0354 from pology.catalog import Catalog 0355 kl4cat = Catalog("kdelibs4.po", readfh=kl4fh) 0356 0357 langdata = Data() 0358 0359 langdata.transform = {} 0360 for spec in _kuit_transforms.items(): 0361 ktrkey, (msgctxt, msgid, subsmap, prepend, postpend, textmodf) = spec 0362 pattern = msgid 0363 if kl4cat is not None: 0364 msgs = kl4cat.select_by_key(msgctxt, msgid) 0365 if msgs and msgs[0].translated: 0366 pattern = msgs[0].msgstr[0] 0367 fmt = ktrkey[2] 0368 if fmt == "rich": 0369 # Add space before /> in in-place closed rich-text tags, 0370 # as Qt may fail to guess format as rich-text otherwise. 0371 pattern = _space_in_place_tag_rx.sub(r"\1 \2", pattern) 0372 tr = Data() 0373 tr.pattern = pattern 0374 tr.subsmap = subsmap 0375 tr.prepend = prepend 0376 tr.postpend = postpend 0377 tr.textmodf = textmodf 0378 langdata.transform[ktrkey] = tr 0379 0380 langdata.shcdelim = {} 0381 for spec in _kuit_shortcut_delimiters.items(): 0382 fmt, (msgctxt, msgid) = spec 0383 delim = msgid 0384 if kl4cat is not None: 0385 msgs = kl4cat.select_by_key(msgctxt, msgid) 0386 if msgs and msgs[0].translated: 0387 delim = msgs[0].msgstr[0] 0388 langdata.shcdelim[fmt] = delim 0389 0390 langdata.keyname = {} 0391 for spec in _kuit_key_names: 0392 msgctxt, msgid = spec 0393 keyname = msgid 0394 if kl4cat is not None: 0395 msgs = kl4cat.select_by_key(msgctxt, msgid) 0396 if msgs and msgs[0].translated: 0397 keyname = msgs[0].msgstr[0] 0398 langdata.keyname[msgid] = keyname 0399 0400 langdata.guidelim = {} 0401 for spec in _kuit_guipath_delimiters.items(): 0402 fmt, (msgctxt, msgid) = spec 0403 delim = msgid 0404 if kl4cat is not None: 0405 msgs = kl4cat.select_by_key(msgctxt, msgid) 0406 if msgs and msgs[0].translated: 0407 delim = msgs[0].msgstr[0] 0408 langdata.guidelim[fmt] = delim 0409 0410 langdata.ifacewrap = None 0411 if _kuit_spec.interface_wrap: 0412 langdata.ifacewrap = _kuit_spec.interface_wrap 0413 0414 _kuit_spec.langdata[lang] = langdata 0415 return langdata 0416 0417 0418 def lno_to (fstr, p): 0419 lno = fstr.count("\n", 0, p) + 1 0420 return lno 0421 0422 0423 def cno_to (fstr, p): 0424 pb = fstr.rfind("\n", 0, p) 0425 # If no \n found, -1 is exactly what's needed below. 0426 cno = p - pb 0427 return cno 0428 0429 0430 _ccall_options = Data() 0431 0432 # Call specification. 0433 _ccall_options.calls = { 0434 # "callname": (ctxt_pos, text_pos, plural_pos) 0435 "i18n": (-1, 0, -1), 0436 "i18nc": (0, 1, -1), 0437 "i18np": (-1, 0, 1), 0438 "i18ncp": (0, 1, 2), 0439 "ki18n": (-1, 0, -1), 0440 "ki18nc": (0, 1, -1), 0441 "ki18np": (-1, 0, 1), 0442 "ki18ncp": (0, 1, 2), 0443 "I18N_NOOP": (-1, 0, -1), 0444 "I18N_NOOP2": (0, 1, -1), 0445 "I18N_NOOP2_NOSTRIP": (0, 1, -1), 0446 } 0447 # Equip with total number of strings. 0448 _ccall_options.calls = dict([(cn, inds + (len([i for i in inds if i >= 0]),)) 0449 for cn, inds in _ccall_options.calls.items()]) 0450 0451 # Default string quotes (must be sorted from longest to shortest). 0452 _ccall_options.quotes = list(reversed(sorted([ 0453 "\"", 0454 "'", 0455 ]))) 0456 0457 # To-EOL and delimited comments which may mingle with 0458 # concatenated string literals. 0459 _ccall_options.midcstr_eolcmnts = set([ 0460 "//", "#", 0461 ]) 0462 _ccall_options.midcstr_delimcmnts = set([ 0463 ("/*", "*/"), 0464 ]) 0465 0466 _ccall_head_rx = re.compile(r"([\w\d_]+)\s*\(", re.U | re.S) 0467 _mask_chr = "\x04" 0468 _print_mask_chr = u"¬" 0469 0470 def resolve_ccall (fstr, path): 0471 0472 showparse = False 0473 if showparse: 0474 report("%s: >>>>> start >>>>>" % path) 0475 0476 langdata = get_language_data("en_US") 0477 toxi18n = _ccall_options.switch_to_xi18n 0478 0479 segs = [] 0480 p1 = 0 0481 while True: 0482 m = _ccall_head_rx.search(fstr, p1) 0483 if not m: 0484 segs.append(fstr[p1:]) 0485 break 0486 p2, p3 = m.span() 0487 callname = m.group(1) 0488 callspec = _ccall_options.calls.get(callname) 0489 if callspec: 0490 ictxt, itext, iplural, total = callspec 0491 p1a = p3 0492 argspecs = [] 0493 all_strings = True 0494 end_call = False 0495 for k in range(total): 0496 if showparse: 0497 report("%s:%d: iarg=%d spos=%d" 0498 % (path, lno_to(fstr, p1a), k, p1a)) 0499 ret = _parse_cstr(fstr, p1a, (",", ")"), path, 0500 _ccall_options.midcstr_eolcmnts, 0501 _ccall_options.midcstr_delimcmnts) 0502 if not ret: 0503 all_strings = False 0504 break 0505 p2a, msarg, quote, outs = ret 0506 argspecs.append((msarg, quote, outs)) 0507 p1a = p2a 0508 if outs[-1].endswith(")"): 0509 end_call = True 0510 break 0511 if len(argspecs) == total: 0512 if showparse: 0513 report("%s:%d: call=[%s]%s" 0514 % (path, lno_to(fstr, p3), callname, 0515 "".join("{%s||%s}" % (_ppmasked(s[0]), s[1]) 0516 for s in argspecs))) 0517 csegs = [] 0518 lno = lno_to(fstr, p3) 0519 mctxt = argspecs[ictxt][0] if ictxt >= 0 else None 0520 res_callname = None 0521 for iarg, (msarg, quote, outs) in enumerate(argspecs): 0522 if iarg != ictxt: 0523 ret = resolve_kuit(mctxt, msarg, quote, 0524 langdata, path, lno, 0525 toxi18n=toxi18n) 0526 res_mctxt, res_msarg, xi18n = ret[:3] 0527 if xi18n and not res_callname: 0528 if callname.startswith("i"): 0529 res_callname = "x" + callname 0530 elif callname.startswith("k"): 0531 res_callname = "kx" + callname[1:] 0532 res_sarg = _unmask(res_msarg, outs) 0533 csegs.append(res_sarg) 0534 else: 0535 csegs.append("") 0536 if not res_callname: 0537 res_callname = callname 0538 if ictxt >= 0: 0539 outs_ctxt = argspecs[ictxt][2] 0540 res_ctxt = _unmask(res_mctxt, outs_ctxt) 0541 csegs[ictxt] = res_ctxt 0542 if showparse: 0543 report("%s:%d: res-segs=%s" 0544 % (path, lno_to(fstr, p3), 0545 "".join("{%s}" % s for s in csegs))) 0546 segs.append(fstr[p1:p2]) 0547 segs.append(res_callname) 0548 segs.append(fstr[p2 + len(callname):p3]) 0549 segs.append("".join(csegs)) 0550 p3 = p1a 0551 elif all_strings and end_call: 0552 if showparse: 0553 report("%s:%d: bad-call" % (path, lno_to(fstr, p3))) 0554 warning("%s:%d: Too little string arguments to call " 0555 "(expected %d, got %d)." 0556 % (path, lno_to(fstr, p3), total, len(argspecs))) 0557 segs.append(fstr[p1:p3]) 0558 p3 = p1a 0559 else: 0560 if showparse: 0561 report("%s:%d: not-literal-call" % (path, lno_to(fstr, p3))) 0562 segs.append(fstr[p1:p3]) 0563 else: 0564 segs.append(fstr[p1:p3]) 0565 p1 = p3 0566 res_fstr = "".join(segs) 0567 if showparse: 0568 report("%s: <<<<< end <<<<<" % path) 0569 return res_fstr 0570 0571 0572 def _ppmasked (s): 0573 0574 return s.replace(_mask_chr, _print_mask_chr) 0575 0576 0577 def _unmask (ms, outs): 0578 0579 segs = [] 0580 p1 = 0 0581 io = 0 0582 while True: 0583 p2 = ms.find(_mask_chr, p1) 0584 if p2 < 0: 0585 segs.append(ms[p1:]) 0586 break 0587 segs.append(ms[p1:p2]) 0588 segs.append(outs[io]) 0589 io += 1 0590 p1 = p2 + len(_mask_chr) 0591 s = "".join(segs) 0592 return s 0593 0594 0595 def _parse_cstr (fstr, spos, ends, path=None, eolcmnts=[], delimcmnts=[]): 0596 0597 showparse = False 0598 0599 l = len(fstr) 0600 p = spos 0601 if showparse: 0602 report("parse-cstr-start %d" % p) 0603 segs = [] 0604 outs = [] 0605 quote = None 0606 while True: 0607 pp = p 0608 while p < l and fstr[p].isspace(): 0609 p += 1 0610 segs.append(_mask_chr) 0611 outs.append(fstr[pp:p]) 0612 if p == l: 0613 break 0614 at_quote = False 0615 if quote is None: 0616 for q in _ccall_options.quotes: 0617 if fstr[p:p + len(q)] == q: 0618 at_quote = True 0619 quote = q 0620 lq = len(quote) 0621 break 0622 else: 0623 if fstr[p:p + lq] == quote: 0624 at_quote = True 0625 if at_quote: 0626 pp = p 0627 p += lq 0628 p = find_esc(fstr, quote, "\\", p) 0629 if p < 0: 0630 if path: 0631 warning("%s:%d: Unterminated string literal." 0632 % (path, lno_to(fstr, pp))) 0633 return None 0634 p += lq 0635 segs.append(fstr[pp:p]) 0636 if showparse: 0637 report("parse-cstr-quote-end %d" % p) 0638 continue 0639 at_end = False 0640 for end in ends: 0641 if fstr[p:p + len(end)] == end: 0642 pp = p 0643 p += len(end) 0644 at_end = True 0645 segs.append(_mask_chr) 0646 outs.append(fstr[pp:p]) 0647 if showparse: 0648 report("parse-cstr-end-end %d" % p) 0649 break 0650 if at_end: 0651 break 0652 cmnt_end = False 0653 for ec in eolcmnts: 0654 if fstr[p:p + len(ec)] == ec: 0655 pp = p 0656 p += len(ec) 0657 while p < l and fstr[p] != "\n": 0658 p += 1 0659 if p < l: 0660 p += 1 0661 cmnt_end = True 0662 segs.append(_mask_chr) 0663 outs.append(fstr[pp:p]) 0664 if showparse: 0665 report("parse-cstr-eol-cmnt-end %d" % p) 0666 break 0667 if cmnt_end: 0668 continue 0669 for dc1, dc2 in delimcmnts: 0670 if fstr[p:p + len(dc1)] == dc1: 0671 pp = p 0672 p += len(dc1) 0673 while p < l and fstr[p:p + len(dc2)] != dc2: 0674 p += 1 0675 if p == l: 0676 warning("%s:%d: Unterminated comment." 0677 % (path, lno_to(fstr, pp))) 0678 return None 0679 p += len(dc2) 0680 cmnt_end = True 0681 segs.append(_mask_chr) 0682 outs.append(fstr[pp:p]) 0683 if showparse: 0684 report("parse-cstr-delim-cmnt-end %d" % p) 0685 break 0686 if cmnt_end: 0687 continue 0688 break 0689 if quote is None: 0690 return None 0691 0692 mstr = "".join(segs) 0693 return p, mstr, quote, outs 0694 0695 0696 _xml_options = Data() 0697 0698 # Default tags and attributes to extract from. 0699 # Ordering of attributes is significant, first found is taken as context. 0700 # According to extractrc from kdesdk/scripts/. 0701 _xml_options.text_tags = set([ 0702 "text", "title", "string", "whatsthis", "tooltip", "label", 0703 ]) 0704 _xml_options.ctxt_attrs = [ 0705 "context", "comment", 0706 ] 0707 0708 _xml_rx = Data() 0709 _xml_rx.inited = False 0710 def _init_xml_regexes (): 0711 if _xml_rx.inited: 0712 return 0713 tagins = "|".join(sorted(_xml_options.text_tags)) 0714 rx = re.compile(r"<\s*(%s)\b([^>]*)>([^<]*)<\s*/\s*\1\s*>" % tagins, 0715 re.U | re.S | re.I) 0716 _xml_rx.i18n_el = rx 0717 attrins = "|".join(_xml_options.ctxt_attrs) 0718 rx = re.compile(r"""^(.*\b(?:%s)\s*=\s*['"])(.*?)(['"].*)$""" % attrins, 0719 re.U | re.S | re.I) 0720 _xml_rx.ctxt_attr = rx 0721 _xml_rx.inited = True 0722 0723 0724 def resolve_xml (fstr, path): 0725 0726 showparse = False 0727 if showparse: 0728 report("%s: >>>>> start >>>>>" % path) 0729 0730 _init_xml_regexes() 0731 langdata = get_language_data("en_US") 0732 toxi18n = _xml_options.switch_to_xi18n 0733 0734 segs = [] 0735 p1 = 0 0736 while True: 0737 m = _xml_rx.i18n_el.search(fstr, p1) 0738 if not m: 0739 segs.append(fstr[p1:]) 0740 break 0741 p2, p3 = m.span() 0742 lno = lno_to(fstr, p2) 0743 segs.append(fstr[p1:p2]) 0744 tag, attr_str, etext = m.groups() 0745 ctxt = None 0746 m = _xml_rx.ctxt_attr.search(attr_str) 0747 if m: 0748 attr_head, ectxt, attr_tail = m.groups() 0749 ctxt, noesc_ctxt = unescape_xml(ectxt, testnoesc=True) 0750 text, noesc_text = unescape_xml(etext, testnoesc=True) 0751 if showparse: 0752 if ctxt is not None: 0753 report("%s:%d: ctxt-text={%s}{%s}" % (path, lno, ectxt, etext)) 0754 else: 0755 report("%s:%d: text={%s}" % (path, lno, etext)) 0756 ret = resolve_kuit(ctxt, text, None, langdata, path, lno, 0757 toxi18n=toxi18n) 0758 res_ctxt, res_text = ret[:2] 0759 res_etext = escape_xml(res_text, noesc=noesc_text) 0760 if ctxt is not None: 0761 res_ectxt = escape_xml(res_ctxt, noesc=noesc_ctxt) 0762 seg = ("<%s%s%s%s>%s</%s>" 0763 % (tag, attr_head, res_ectxt, attr_tail, res_etext, tag)) 0764 else: 0765 seg = "<%s%s>%s</%s>" % (tag, attr_str, res_etext, tag) 0766 if showparse: 0767 if ctxt is not None: 0768 report("%s:%d: res-ctxt-text={%s}{%s}" 0769 % (path, lno, res_ectxt, res_etext)) 0770 else: 0771 report("%s:%d: res-text={%s}" % (path, lno, res_etext)) 0772 segs.append(seg) 0773 p1 = p3 0774 res_fstr = "".join(segs) 0775 0776 if showparse: 0777 report("%s: <<<<< end <<<<<" % path) 0778 return res_fstr 0779 0780 0781 _po_options = Data() 0782 _po_options.msgfmt_wrap = False 0783 0784 def resolve_po (path): 0785 0786 from pology.catalog import Catalog 0787 from pology.gtxtools import msgfilter 0788 0789 cat = Catalog(path) 0790 0791 langdata_src = get_language_data("en_US") 0792 lang = _kuit_spec.force_lang or cat.language() 0793 if not lang: 0794 raise StandardError( 0795 "%s: Cannot determine language of PO file." % path) 0796 langdata_trn = get_language_data(lang) 0797 toxi18n_global = _po_options.switch_to_xi18n 0798 0799 seen_keys = set() 0800 for ind, msg in enumerate(cat): 0801 toxi18n = toxi18n_global 0802 # Override resolution setting by message xi18n flag. 0803 if "kde-kuit-format" in msg.flag: 0804 toxi18n = 2 0805 # Original fields. 0806 ctxt = msg.msgctxt 0807 forcerich = False 0808 if not _po_options.post_merge: 0809 ret = resolve_kuit(ctxt, msg.msgid, None, 0810 langdata_src, path, msg.refline, 0811 toxi18n=toxi18n) 0812 msg.msgid = ret[1] 0813 if ctxt is not None: 0814 msg.msgctxt = ret[0] 0815 if msg.msgid_plural is not None: 0816 ret = resolve_kuit(ctxt, msg.msgid_plural, None, 0817 langdata_src, path, msg.refline, 0818 toxi18n=toxi18n) 0819 msg.msgid_plural = ret[1] 0820 else: 0821 # Check if to not touch existing KUIT or 0822 # to force rich text in non-original fields. 0823 if not forcerich: 0824 ret = resolve_kuit(ctxt, msg.msgid, None, 0825 langdata_src, path, msg.refline, 0826 toxi18n=toxi18n) 0827 has_any_html_tag, has_any_kuit_tag = ret[3:5] 0828 if has_any_kuit_tag: 0829 toxi18n = 2 0830 else: 0831 forcerich = has_any_html_tag 0832 if not forcerich: 0833 ret = resolve_entities(msg.msgid, path, msg.refline) 0834 any_entity_resolved = ret[1] 0835 forcerich = any_entity_resolved 0836 # Previous original fields. 0837 ctxt_prev = msg.msgctxt_previous 0838 has_previous = False 0839 if msg.msgid_previous is not None: 0840 has_previous = True 0841 ret = resolve_kuit(ctxt_prev, msg.msgid_previous, None, 0842 langdata_src, path, msg.refline, 0843 toxi18n=toxi18n, forcerich=forcerich) 0844 msg.msgid_previous = ret[1] 0845 if ctxt_prev is not None: 0846 msg.msgctxt_previous = ret[0] 0847 if msg.msgid_plural_previous is not None: 0848 ret = resolve_kuit(ctxt_prev, msg.msgid_plural_previous, None, 0849 langdata_src, path, msg.refline, 0850 toxi18n=toxi18n, forcerich=forcerich) 0851 msg.msgid_plural_previous = ret[1] 0852 # Translation fields. 0853 ctxt_trn = ctxt if (not msg.fuzzy or not has_previous) else ctxt_prev 0854 for i in range(len(msg.msgstr)): 0855 ret = resolve_kuit(ctxt_trn, msg.msgstr[i], None, 0856 langdata_trn, path, msg.refline, 0857 toxi18n=toxi18n, forcerich=forcerich) 0858 msg.msgstr[i] = ret[1] 0859 if msg.translated: 0860 if msg.msgid.endswith("\n") and not msg.msgstr[i].endswith("\n"): 0861 msg.msgstr[i] += "\n" 0862 elif not msg.msgid.endswith("\n") and msg.msgstr[i].endswith("\n"): 0863 msg.msgstr[i] = msg.msgstr[i][:-1] 0864 # In post-merge mode, maybe it can be unfuzzied now. 0865 if _po_options.post_merge and msg.fuzzy and all(list(msg.msgstr)): 0866 if ( msg.msgctxt == msg.msgctxt_previous 0867 and msg.msgid == msg.msgid_previous 0868 and msg.msgid_plural == msg.msgid_plural_previous 0869 ): 0870 msg.unfuzzy() 0871 # Conversion may make a message with same key as a previous one, 0872 # remove the current message in that case. 0873 if msg.key in seen_keys: 0874 cat.remove_on_sync(ind) 0875 else: 0876 seen_keys.add(msg.key) 0877 0878 modified = cat.sync() 0879 if modified and _po_options.msgfmt_wrap: 0880 msgfilter(["cat"])(cat.filename) 0881 0882 return modified 0883 0884 0885 _map_restype_ext = { 0886 "ccall": (set([ 0887 "cpp", "cxx", "cc", "c", 0888 "h", "hpp", "hxx", "hh", 0889 "py", "js", "rb", "qml", 0890 #"kcfg", won't work due to XML escaping; but there is 0891 # no existing case of embedded i18n() with KUIT in KDE repos. 0892 ]), resolve_ccall, True), 0893 0894 "xml": (set([ 0895 "ui", "rc", "kcfg", 0896 ]), resolve_xml, True), 0897 0898 "po": (set([ 0899 "po", "pot", 0900 ]), resolve_po, False), 0901 } 0902 # Inverted resolution types by extension. 0903 _map_ext_restype = {} 0904 def _init_map_ext_restype (): 0905 if _map_ext_restype: 0906 return 0907 for rt, (exts, rf, ons) in _map_restype_ext.items(): 0908 for ext in exts: 0909 if ext not in _map_ext_restype: 0910 _map_ext_restype[ext] = [] 0911 _map_ext_restype[ext].append((rt, rf, ons)) 0912 0913 0914 def get_resolvers_for_file (path): 0915 0916 _init_map_ext_restype() 0917 p = path.rfind(".") 0918 if p >= 0: 0919 ext = path[p + 1:] 0920 else: 0921 ext = "" 0922 rspecs = _map_ext_restype.get(ext, []) 0923 return rspecs 0924 0925 0926 # KUIT keyboard shortcut delimiters and lookup key in PO files, as 0927 # format: (msgctxt, msgid). 0928 # According to kuitsemantics.cpp from kdecore. 0929 _kuit_raw_shortcut_delimiter_rx = re.compile(r"\+|-", re.U) 0930 _kuit_shortcut_delimiters = { 0931 "plain": (u"shortcut-key-delimiter/plain", u"+"), 0932 "rich": (u"shortcut-key-delimiter/rich", u"+"), 0933 } 0934 # Add delimiters for term format, same as plain. 0935 _kuit_shortcut_delimiters["term"] = _kuit_shortcut_delimiters["plain"] 0936 0937 # KUIT keyboard key names and lookup in PO files, 0938 # as set((msgctxt, msgid)). F%1 is special. 0939 _kuit_key_names_raw = set([ 0940 u"Alt", u"AltGr", u"Backspace", u"CapsLock", u"Control", u"Ctrl", 0941 u"Del", u"Delete", u"Down", u"End", u"Enter", u"Esc", u"Escape", 0942 u"Home", u"Hyper", u"Ins", u"Insert", u"Left", u"Menu", u"Meta", 0943 u"NumLock", u"PageDown", u"PageUp", u"PgDown", u"PgUp", u"PauseBreak", 0944 u"PrintScreen", u"PrtScr", u"Return", u"Right", u"ScrollLock", u"Shift", 0945 u"Space", u"Super", u"SysReq", u"Tab", u"Up", u"Win", u"F%1", 0946 ]) 0947 _kuit_key_names = set((u"keyboard-key-name", kn) for kn in _kuit_key_names_raw) 0948 0949 def textmod_shortcut (text, quote, fmt, langdata): 0950 0951 segs = [] 0952 p1 = 0 0953 while True: 0954 m = _kuit_raw_shortcut_delimiter_rx.search(text, p1) 0955 if not m: 0956 keyname = text[p1:].strip() 0957 else: 0958 p2, p3 = m.span() 0959 keyname = text[p1:p2].strip() 0960 if keyname[:1] == "F" and keyname[1:].isdigit(): 0961 lkeypattern = langdata.keyname.get(u"F%1", u"F%1") 0962 lkeyname = lkeypattern.replace("%1", keyname[1:]) 0963 else: 0964 lkeyname = langdata.keyname.get(keyname, keyname) 0965 segs.append(lkeyname) 0966 if not m: 0967 break 0968 segs.append(langdata.shcdelim[fmt]) 0969 p1 = p3 0970 res_text = "".join(segs) 0971 if quote: 0972 res_text = escape_c(res_text, quote) 0973 return res_text 0974 0975 0976 # KUIT UI path delimiters and lookup key in PO files, as 0977 # format: (msgctxt, msgid). 0978 # According to kuitsemantics.cpp from kdecore. 0979 _kuit_raw_guipath_delimiter_rx = re.compile(r"->", re.U) 0980 _kuit_guipath_delimiters = { 0981 "plain": (u"gui-path-delimiter/plain", u"→"), 0982 "rich": (u"gui-path-delimiter/rich", u"→"), 0983 } 0984 # Add delimiters for term format, same as plain. 0985 _kuit_guipath_delimiters["term"] = _kuit_guipath_delimiters["plain"] 0986 0987 def textmod_interface (text, quote, fmt, langdata): 0988 0989 segs = [] 0990 p1 = 0 0991 while True: 0992 m = _kuit_raw_guipath_delimiter_rx.search(text, p1) 0993 if not m: 0994 pathel = text[p1:].strip() 0995 else: 0996 p2, p3 = m.span() 0997 pathel = text[p1:p2].strip() 0998 if langdata.ifacewrap: 0999 head, sep = langdata.ifacewrap[:-1], langdata.ifacewrap[-1:] 1000 pathel = "%s%s%s" % (head, pathel, sep) 1001 segs.append(pathel) 1002 if not m: 1003 break 1004 segs.append(langdata.guidelim[fmt]) 1005 p1 = p3 1006 res_text = "".join(segs) 1007 if quote: 1008 res_text = escape_c(res_text, quote) 1009 return res_text 1010 1011 1012 # KUIT transformation patterns and lookup key in PO files, as 1013 # (tag, attributes, format): (msgctxt, msgid, subsmap, prepend, postpend, textmodf). 1014 # According to kuitsemantics.cpp from kdecore. 1015 _kuit_transforms = { 1016 (u"title", frozenset([]), "plain"): 1017 (u"@title/plain", 1018 u"== %1 ==", 1019 {"%1": "title"}, 1020 "", "\n", 1021 None), 1022 (u"title", frozenset([]), "rich"): 1023 (u"@title/rich", 1024 u"<h2>%1</h2>", 1025 {"%1": "title"}, 1026 "", "", 1027 None), 1028 (u"subtitle", frozenset([]), "plain"): 1029 (u"@subtitle/plain", 1030 u"~ %1 ~", 1031 {"%1": "subtitle"}, 1032 "", "\n", 1033 None), 1034 (u"subtitle", frozenset([]), "rich"): 1035 (u"@subtitle/rich", 1036 u"<h3>%1</h3>", 1037 {"%1": "subtitle"}, 1038 "", "", 1039 None), 1040 (u"para", frozenset([]), "plain"): 1041 (u"@para/plain", 1042 u"%1", 1043 {"%1": "para"}, 1044 "", "\n", 1045 None), 1046 (u"para", frozenset([]), "rich"): 1047 (u"@para/rich", 1048 u"<p>%1</p>", 1049 {"%1": "para"}, 1050 "", "", 1051 None), 1052 (u"list", frozenset([]), "plain"): 1053 (u"@list/plain", 1054 u"%1", 1055 {"%1": "list"}, 1056 "\n", "", 1057 None), 1058 (u"list", frozenset([]), "rich"): 1059 (u"@list/rich", 1060 u"<ul>%1</ul>", 1061 {"%1": "list"}, 1062 "", "", 1063 None), 1064 (u"item", frozenset([]), "plain"): 1065 (u"@item/plain", 1066 u" * %1", 1067 {"%1": "item"}, 1068 "", "\n", 1069 None), 1070 (u"item", frozenset([]), "rich"): 1071 (u"@item/rich", 1072 u"<li>%1</li>", 1073 {"%1": "item"}, 1074 "", "", 1075 None), 1076 (u"note", frozenset([]), "plain"): 1077 (u"@note/plain", 1078 u"Note: %1", 1079 {"%1": "note"}, 1080 "", "", 1081 None), 1082 (u"note", frozenset([]), "rich"): 1083 (u"@note/rich", 1084 u"<i>Note</i>: %1", 1085 {"%1": "note"}, 1086 "", "", 1087 None), 1088 (u"note", frozenset([u"label"]), "plain"): 1089 (u"@note-with-label/plain\n" 1090 u"%1 is the note label, %2 is the text", 1091 u"%1: %2", 1092 {"%1": "label", "%2": "note"}, 1093 "", "", 1094 None), 1095 (u"note", frozenset([u"label"]), "rich"): 1096 (u"@note-with-label/rich\n" 1097 u"%1 is the note label, %2 is the text", 1098 u"<i>%1</i>: %2", 1099 {"%1": "label", "%2": "note"}, 1100 "", "", 1101 None), 1102 (u"warning", frozenset([]), "plain"): 1103 (u"@warning/plain", 1104 u"WARNING: %1", 1105 {"%1": "warning"}, 1106 "", "", 1107 None), 1108 (u"warning", frozenset([]), "rich"): 1109 (u"@warning/rich", 1110 u"<b>Warning</b>: %1", 1111 {"%1": "warning"}, 1112 "", "", 1113 None), 1114 (u"warning", frozenset([u"label"]), "plain"): 1115 (u"@warning-with-label/plain\n" 1116 u"%1 is the warning label, %2 is the text", 1117 u"%1: %2", 1118 {"%1": "label", "%2": "warning"}, 1119 "", "", 1120 None), 1121 (u"warning", frozenset([u"label"]), "rich"): 1122 (u"@warning-with-label/rich\n" 1123 u"%1 is the warning label, %2 is the text", 1124 u"<b>%1</b>: %2", 1125 {"%1": "label", "%2": "warning"}, 1126 "", "", 1127 None), 1128 (u"link", frozenset([]), "plain"): 1129 (u"@link/plain", 1130 u"%1", 1131 {"%1": "link"}, 1132 "", "", 1133 None), 1134 (u"link", frozenset([]), "rich"): 1135 (u"@link/rich", 1136 u"<a href=\"%1\">%1</a>", 1137 {"%1": "link"}, 1138 "", "", 1139 None), 1140 (u"link", frozenset([u"url"]), "plain"): 1141 (u"@link-with-description/plain\n" 1142 u"%1 is the URL, %2 is the descriptive text", 1143 u"%2 (%1)", 1144 {"%2": "link", "%1": "url"}, 1145 "", "", 1146 None), 1147 (u"link", frozenset([u"url"]), "rich"): 1148 (u"@link-with-description/rich\n" 1149 u"%1 is the URL, %2 is the descriptive text", 1150 u"<a href=\"%1\">%2</a>", 1151 {"%2": "link", "%1": "url"}, 1152 "", "", 1153 None), 1154 (u"filename", frozenset([]), "plain"): 1155 (u"@filename/plain", 1156 u"‘%1’", 1157 {"%1": "filename"}, 1158 "", "", 1159 None), 1160 (u"filename", frozenset([]), "rich"): 1161 (u"@filename/rich", 1162 u"<tt>%1</tt>", 1163 {"%1": "filename"}, 1164 "", "", 1165 None), 1166 (u"application", frozenset([]), "plain"): 1167 (u"@application/plain", 1168 u"%1", 1169 {"%1": "application"}, 1170 "", "", 1171 None), 1172 (u"application", frozenset([]), "rich"): 1173 (u"@application/rich", 1174 u"%1", 1175 {"%1": "application"}, 1176 "", "", 1177 None), 1178 (u"command", frozenset([]), "plain"): 1179 (u"@command/plain", 1180 u"%1", 1181 {"%1": "command"}, 1182 "", "", 1183 None), 1184 (u"command", frozenset([]), "rich"): 1185 (u"@command/rich", 1186 u"<tt>%1</tt>", 1187 {"%1": "command"}, 1188 "", "", 1189 None), 1190 (u"command", frozenset([u"section"]), "plain"): 1191 (u"@command-with-section/plain\n" 1192 u"%1 is the command name, %2 is its man section", 1193 u"%1(%2)", 1194 {"%1": "command", "%2": "section"}, 1195 "", "", 1196 None), 1197 (u"command", frozenset([u"section"]), "rich"): 1198 (u"@command-with-section/rich\n" 1199 u"%1 is the command name, %2 is its man section", 1200 u"<tt>%1(%2)</tt>", 1201 {"%1": "command", "%2": "section"}, 1202 "", "", 1203 None), 1204 (u"resource", frozenset([]), "plain"): 1205 (u"@resource/plain", 1206 u"“%1”", 1207 {"%1": "resource"}, 1208 "", "", 1209 None), 1210 (u"resource", frozenset([]), "rich"): 1211 (u"@resource/rich", 1212 u"“%1”", 1213 {"%1": "resource"}, 1214 "", "", 1215 None), 1216 (u"icode", frozenset([]), "plain"): 1217 (u"@icode/plain", 1218 u"“%1”", 1219 {"%1": "icode"}, 1220 "", "", 1221 None), 1222 (u"icode", frozenset([]), "rich"): 1223 (u"@icode/rich", 1224 u"<tt>%1</tt>", 1225 {"%1": "icode"}, 1226 "", "", 1227 None), 1228 (u"bcode", frozenset([]), "plain"): 1229 (u"@bcode/plain", 1230 u"\n%1\n", 1231 {"%1": "bcode"}, 1232 "", "", 1233 None), 1234 (u"bcode", frozenset([]), "rich"): 1235 (u"@bcode/rich", 1236 u"<pre>%1</pre>", 1237 {"%1": "bcode"}, 1238 "", "", 1239 None), 1240 (u"shortcut", frozenset([]), "plain"): 1241 (u"@shortcut/plain", 1242 u"%1", 1243 {"%1": "shortcut"}, 1244 "", "", 1245 textmod_shortcut), 1246 (u"shortcut", frozenset([]), "rich"): 1247 (u"@shortcut/rich", 1248 u"<b>%1</b>", 1249 {"%1": "shortcut"}, 1250 "", "", 1251 textmod_shortcut), 1252 (u"interface", frozenset([]), "plain"): 1253 (u"@interface/plain", 1254 u"|%1|", 1255 {"%1": "interface"}, 1256 "", "", 1257 textmod_interface), 1258 (u"interface", frozenset([]), "rich"): 1259 (u"@interface/rich", 1260 u"<i>%1</i>", 1261 {"%1": "interface"}, 1262 "", "", 1263 textmod_interface), 1264 (u"emphasis", frozenset([]), "plain"): 1265 (u"@emphasis/plain", 1266 u"*%1*", 1267 {"%1": "emphasis"}, 1268 "", "", 1269 None), 1270 (u"emphasis", frozenset([]), "rich"): 1271 (u"@emphasis/rich", 1272 u"<i>%1</i>", 1273 {"%1": "emphasis"}, 1274 "", "", 1275 None), 1276 (u"emphasis", frozenset([u"strong"]), "plain"): 1277 (u"@emphasis-strong/plain", 1278 u"**%1**", 1279 {"%1": "emphasis"}, 1280 "", "", 1281 None), 1282 (u"emphasis", frozenset([u"strong"]), "rich"): 1283 (u"@emphasis-strong/rich", 1284 u"<b>%1</b>", 1285 {"%1": "emphasis"}, 1286 "", "", 1287 None), 1288 (u"placeholder", frozenset([]), "plain"): 1289 (u"@placeholder/plain", 1290 u"<%1>", 1291 {"%1": "placeholder"}, 1292 "", "", 1293 None), 1294 (u"placeholder", frozenset([]), "rich"): 1295 (u"@placeholder/rich", 1296 u"<<i>%1</i>>", 1297 {"%1": "placeholder"}, 1298 "", "", 1299 None), 1300 (u"email", frozenset([]), "plain"): 1301 (u"@email/plain", 1302 u"<%1>", 1303 {"%1": "email"}, 1304 "", "", 1305 None), 1306 (u"email", frozenset([]), "rich"): 1307 (u"@email/rich", 1308 u"<<a href=\"mailto:%1\">%1</a>>", 1309 {"%1": "email"}, 1310 "", "", 1311 None), 1312 (u"email", frozenset([u"address"]), "plain"): 1313 (u"@email-with-name/plain\n" 1314 u"%1 is name, %2 is address", 1315 u"%1 <%2>", 1316 {"%1": "email", "%2": "address"}, 1317 "", "", 1318 None), 1319 (u"email", frozenset([u"address"]), "rich"): 1320 (u"@email-with-name/rich\n" 1321 u"%1 is name, %2 is address", 1322 u"<a href=\"mailto:%2\">%1</a>", 1323 {"%1": "email", "%2": "address"}, 1324 "", "", 1325 None), 1326 (u"envar", frozenset([]), "plain"): 1327 (u"@envar/plain", 1328 u"$%1", 1329 {"%1": "envar"}, 1330 "", "", 1331 None), 1332 (u"envar", frozenset([]), "rich"): 1333 (u"@envar/rich", 1334 u"<tt>$%1</tt>", 1335 {"%1": "envar"}, 1336 "", "", 1337 None), 1338 (u"message", frozenset([]), "plain"): 1339 (u"@message/plain", 1340 u"/%1/", 1341 {"%1": "message"}, 1342 "", "", 1343 None), 1344 (u"message", frozenset([]), "rich"): 1345 (u"@message/rich", 1346 u"<i>%1</i>", 1347 {"%1": "message"}, 1348 "", "", 1349 None), 1350 (u"nl", frozenset([]), "plain"): 1351 (u"@nl/plain", 1352 u"%1\n", 1353 {"%1": "nl"}, 1354 "", "", 1355 None), 1356 (u"nl", frozenset([]), "rich"): 1357 (u"@nl/rich", 1358 u"%1<br/>", 1359 {"%1": "nl"}, 1360 "", "", 1361 None), 1362 } 1363 1364 # Add patterns for term format, same as plain. 1365 for (tag, attrs, fmt), trspec in _kuit_transforms.items(): 1366 if fmt == "plain": 1367 _kuit_transforms[(tag, attrs, "term")] = trspec 1368 1369 # Collect all known tags and formats. 1370 _kuit_tags = set() 1371 _known_formats = set() 1372 for (tag, attrs, fmt), trspec in _kuit_transforms.items(): 1373 _kuit_tags.add(tag) 1374 _known_formats.add(fmt) 1375 1376 # Qt rich text tags (used for implicit determination of rich format). 1377 _html_tags = set([ 1378 "a", "address", "b", "big", "blockquote", "body", "br", 1379 "center", "cita", "code", "dd", "dfn", "div", "dl", "dt", "em", 1380 "font", "h1", "h2", "h3", "h4", "h5", "h6", "head", "hr", "html", 1381 "i", "img", "kbd", "meta", "li", "nobr", "ol", "p", "pre", 1382 "qt", "s", "samp", "small", "span", "strong", "sup", "sub", 1383 "table", "tbody", "td", "tfoot", "th", "thead", "title", "tr", "tt", 1384 "u", "ul", "var", 1385 ]) 1386 1387 # Default target formats by context marker. 1388 # According to kuitsemantics.cpp from kdecore. 1389 _cmarker_to_format = { 1390 "@action": "plain", 1391 "@title": "plain", 1392 "@label": "plain", 1393 "@option": "plain", 1394 "@item": "plain", 1395 "@info": "rich", 1396 "@info:progress": "plain", 1397 "@info:status": "plain", 1398 "@info:credit": "plain", 1399 "@info:shell": "plain", 1400 } 1401 1402 _top_tag_rx = re.compile(r"<\s*(qt|html)\b[^>]*>(.*)<\s*/\s*qt\s*>", 1403 re.U | re.S | re.I) 1404 1405 def resolve_kuit (ctxt, text, quote, langdata, path, lno, 1406 toxi18n=0, forcerich=False): 1407 1408 xi18n = False 1409 1410 fmt_cm, fmt_rc, res_ctxt, has_cmarker = format_from_cmarker(ctxt, quote) 1411 if forcerich: 1412 fmt_cm = "rich" 1413 fmt_rc = "rich" 1414 if fmt_cm and fmt_cm not in _known_formats: 1415 warning("%s:%d: Unknown format modifier '%s' in context marker. " 1416 "The string will not be resolved until this is fixed." 1417 % (path, lno, fmt_cm)) 1418 has_any_html_tag = False 1419 has_any_kuit_tag = False 1420 return ctxt, text, xi18n, has_any_html_tag, has_any_kuit_tag 1421 if toxi18n in (1, 2) and fmt_cm != fmt_rc and not path.endswith(".po"): 1422 warning("%s:%d: Manual format modifier '%s' does not match " 1423 "the implicit format modifier '%s' based on context marker. " 1424 "Manual format modifiers are no longer supported, " 1425 "replace them with another format selection method." 1426 % (path, lno, fmt_cm, fmt_rc)) 1427 # Recover original context with modifier still inside. 1428 res_ctxt = ctxt 1429 fmt = fmt_cm or format_from_tags(text, quote) or "plain" 1430 1431 ret = _resolve_kuit_r(text, quote, fmt, langdata, path, lno) 1432 res_text, has_any_kuit_tag, has_any_html_tag, has_top_tag = ret 1433 1434 if (toxi18n == 1 and has_any_kuit_tag) or toxi18n == 2: 1435 if has_any_html_tag: 1436 warning("%s:%d: Mixed KUIT and HTML tags. " 1437 "This should be changed to all-KUIT tags." 1438 % (path, lno)) 1439 xi18n = True 1440 return res_ctxt, text, xi18n, has_any_html_tag, has_any_kuit_tag 1441 1442 if fmt_cm != "rich" and not has_any_html_tag: 1443 ret = resolve_entities(res_text, path, lno) 1444 res_text, any_entity_resolved = ret 1445 else: 1446 any_entity_resolved = False 1447 1448 if not has_cmarker and not has_any_kuit_tag and not any_entity_resolved: 1449 # In this case the resolution should have been no-op, 1450 # so return the original input just in case. 1451 return ctxt, text, xi18n, has_any_html_tag, has_any_kuit_tag 1452 1453 if has_top_tag or fmt_cm == "rich": 1454 # What to do with top tag in rich text. 1455 # 0 - As in KUIT processing in kdecore. But this would cause 1456 # <html> tags to appear in otherwise plain text which happens 1457 # to be sent to rich-text capable output. People may not like it. 1458 # (It would also cause that running resolution over already 1459 # resolved files leads to spurious additon of <html> tags, 1460 # e.g. 1st resolution @info/plain -> @info and no <html> tag, 1461 # 2nd resolution @info -> @info and <html> tag.) 1462 # 1 - Original top tag is removed and then <html> tag added only if 1463 # there is another tag or entity in the text. 1464 # 2 - Top tag is neither added nor removed, but left as it is 1465 # in the literal text. 1466 top_tag_res = 2 1467 if top_tag_res in (0, 1): 1468 if has_top_tag: 1469 res_text = _top_tag_rx.sub(r"\2", res_text) 1470 if top_tag_res == 0 or ("<" in res_text or "&" in res_text): 1471 p1 = 0 1472 p2 = len(res_text) 1473 if quote: 1474 p1 = res_text.find(quote) + len(quote) 1475 p2 = res_text.rfind(quote) 1476 res_text = ("%s<html>%s</html>%s" 1477 % (res_text[:p1], res_text[p1:p2], res_text[p2:])) 1478 elif top_tag_res == 2: 1479 pass 1480 else: 1481 raise StandardError( 1482 "Unknown top tag resolution choice '%d'." % top_tag_res) 1483 1484 return res_ctxt, res_text, xi18n, has_any_html_tag, has_any_kuit_tag 1485 1486 1487 _element_rx = re.compile(r"<\s*(\w+)(?:([^>]*)>(.*?)<\s*/\s*\1|\s*/)\s*>", 1488 re.U | re.S) 1489 _attribute_rx = re.compile(r"""\b(\w+)\s*=\s*["'](.*?)["']""") 1490 1491 def _resolve_kuit_r (text, quote, fmt, langdata, path, lno): 1492 1493 segs = [] 1494 p1 = 0 1495 has_any_kuit_tag = False 1496 has_any_html_tag = False 1497 has_top_tag = False 1498 while True: 1499 m = _element_rx.search(text, p1) 1500 if not m: 1501 segs.append(text[p1:]) 1502 break 1503 p2, p3 = m.span() 1504 segs.append(text[p1:p2]) 1505 tag, attrstr, etext = m.groups() 1506 if etext is None: 1507 in_place = True 1508 attrstr, etext = "", "" 1509 else: 1510 in_place = False 1511 ret = _resolve_kuit_r(etext, quote, fmt, langdata, path, lno) 1512 res_etext, has_any_kuit_tag_1, has_any_html_tag_1, has_top_tag_1 = ret 1513 has_any_html_tag = has_any_html_tag or has_any_html_tag_1 1514 has_any_kuit_tag = has_any_kuit_tag or has_any_kuit_tag_1 1515 res_span = text[p2:p3] # in case no other resolution 1516 if tag in _kuit_tags: 1517 has_any_kuit_tag = True 1518 attrmap = dict(_attribute_rx.findall(attrstr)) 1519 has_top_tag = has_top_tag or has_top_tag_1 1520 trkey = (tag, frozenset(attrmap.keys()), fmt) 1521 tr = langdata.transform.get(trkey) 1522 if tr is not None: 1523 if tr.textmodf: 1524 res_etext = tr.textmodf(res_etext, quote, fmt, langdata) 1525 res_span = tr.pattern 1526 if quote: 1527 res_span = escape_c(res_span, quote) 1528 replmap = attrmap 1529 replmap[tag] = res_etext 1530 # Replace in one pass, because replacement might contain %N. 1531 p1a = 0 1532 csegs = [] 1533 seen_pls = set() 1534 while True: 1535 p2a = res_span.find("%", p1a) 1536 if p2a < 0: 1537 csegs.append(res_span[p1a:]) 1538 break 1539 csegs.append(res_span[p1a:p2a]) 1540 if res_span[p2a + 1:p2a + 2].isdigit(): 1541 pl = res_span[p2a:p2a + 2] 1542 nm = tr.subsmap[pl] 1543 cseg = replmap[nm] # cannot fail 1544 if quote and pl in seen_pls: 1545 # If placeholder was already replaced once, 1546 # further replacements have to eliminate 1547 # masking chars and quotes, because 1548 # total number of masking chars must not change. 1549 cseg = join_quoted(cseg, quote, 1550 invert=True, strip=True) 1551 seen_pls.add(pl) 1552 csegs.append(cseg) 1553 p1a = p2a + 2 1554 else: 1555 csegs.append("%") 1556 p1a = p2a + 1 1557 res_span = "".join(csegs) 1558 res_span = tr.prepend + res_span + tr.postpend 1559 else: 1560 warning("%s:%d: No transformation for tag '%s' and format '%s'." 1561 % (path, lno, tag, fmt)) 1562 elif tag == "numid": 1563 has_any_kuit_tag = True 1564 # What to do with numid tag. 1565 # 0 - Simply remove numid tag, with a warning to manually convert 1566 # associated argument into digit string. 1567 # 1 - Modify all placeholders in the text wrapped with numid 1568 # to %I<N> form, which indicates numeric identifier formatting. 1569 numid_tag_res = 0 1570 if numid_tag_res == 0: 1571 if not path.endswith((".po", ".pot")): 1572 warning("%s:%d: A '%s' tag has been removed, do something " 1573 "manually with the affected argument " 1574 "(e.g. wrap it in QString::number())." 1575 % (path, lno, tag)) 1576 res_span = res_etext 1577 elif numid_tag_res == 1: 1578 nisegs = [] 1579 p1b = 0 1580 while True: 1581 p2b = res_etext.find("%", p1b) 1582 if p2b < 0: 1583 nisegs.append(res_etext[p1b:]) 1584 break 1585 nisegs.append(res_etext[p1b:p2b]) 1586 if res_etext[p2b + 1:p2b + 2].isdigit(): 1587 p3b = p2b + 1 1588 while p3b < len(res_etext) and res_etext[p3b].isdigit(): 1589 p3b += 1 1590 nisegs.append("%I" + res_etext[p2b + 1:p3b]) 1591 p1b = p3b 1592 else: 1593 nisegs.append("%") 1594 p1b += 1 1595 res_span = "".join(nisegs) 1596 else: 1597 raise StandardError( 1598 "Unknown '%s' tag resolution choice '%d'." 1599 % ("numid", numid_tag_res)) 1600 elif tag in _html_tags: 1601 has_any_html_tag = True 1602 if tag.lower() in ("qt", "html"): 1603 has_top_tag = True 1604 if not in_place: 1605 res_span = "<%s%s>%s</%s>" % (tag, attrstr, res_etext, tag) 1606 segs.append(res_span) 1607 p1 = p3 1608 res_text = "".join(segs) 1609 return res_text, has_any_kuit_tag, has_any_html_tag, has_top_tag 1610 1611 1612 _entity_rx = re.compile(r"&([a-z]+|#[0-9]+|#x[0-9a-fA-F]+);", re.U | re.S) 1613 1614 _xml_entities = { 1615 "lt": "<", 1616 "gt": ">", 1617 "amp": "&", 1618 "apos": "'", 1619 "quot": "\"", 1620 } 1621 1622 def resolve_entities (text, path, lno): 1623 1624 any_entity_resolved = False 1625 segs = [] 1626 p1 = 0 1627 while True: 1628 m = _entity_rx.search(text, p1) 1629 if not m: 1630 segs.append(text[p1:]) 1631 break 1632 p2, p3 = m.span() 1633 segs.append(text[p1:p2]) 1634 span = text[p2:p3] 1635 ent = m.group(1) 1636 if ent.startswith("#"): # numeric character 1637 try: 1638 if ent[1] == "x": 1639 c = unichr(int(ent[2:], 16)) 1640 else: 1641 c = unichr(int(ent[1:], 10)) 1642 except: 1643 warning("%s:%d: Invalid numeric XML entity '%s'." 1644 % (path, lno, ent)) 1645 segs.append(c) 1646 any_entity_resolved = True 1647 elif ent in _xml_entities: 1648 segs.append(_xml_entities[ent]) 1649 any_entity_resolved = True 1650 else: 1651 # Don't warn, may be some HTML entity. 1652 segs.append(span) 1653 p1 = p3 1654 res_text = "".join(segs) 1655 return res_text, any_entity_resolved 1656 1657 1658 _cmarker_rx = re.compile(r"@(\w+):?(\w+)?/?(\w+)?", re.U | re.S) 1659 1660 def format_from_cmarker (ctxt, quote): 1661 1662 fmt = None 1663 fmt_rc = None 1664 res_ctxt = ctxt 1665 has_cmarker = False 1666 if ctxt is not None: 1667 p1 = 0 1668 if quote: 1669 p1 = ctxt.find(quote) + len(quote) 1670 m = _cmarker_rx.match(ctxt, p1) 1671 if m: 1672 has_cmarker = True 1673 role, cue, fmt = m.groups() 1674 if role and cue: # implicit format by role and cue 1675 fmt_rc = _cmarker_to_format.get("@%s:%s" % (role, cue)) 1676 if not fmt_rc: # implicit format by role alone 1677 fmt_rc = _cmarker_to_format.get("@%s" % role) 1678 if fmt: # explicit format modifier 1679 p2 = ctxt.find("/", p1) 1680 res_ctxt = ctxt[:p2] + ctxt[p2 + 1 + len(fmt):] 1681 else: 1682 fmt = fmt_rc 1683 return fmt, fmt_rc, res_ctxt, has_cmarker 1684 1685 1686 _opentag_rx = re.compile(r"<\s*(\w+)[^>]*>", re.U | re.S) 1687 1688 def format_from_tags (text, quote): 1689 1690 fmt = None 1691 for tag in _opentag_rx.findall(text): 1692 if tag in _html_tags: 1693 fmt = "rich" 1694 break 1695 return fmt 1696 1697 1698 def escape_c (text, quote): 1699 1700 text = text.replace("\\", "\\\\") # must be first 1701 if quote: 1702 text = text.replace(quote, "\\" + quote) 1703 text = text.replace("\t", "\\t") 1704 text = text.replace("\n", "\\n") 1705 return text 1706 1707 1708 def join_quoted (s, quote, invert=False, strip=False): 1709 1710 segs1 = [] 1711 segs2 = [] 1712 p1 = 0 1713 l = len(s) 1714 lq = len(quote) 1715 while True: 1716 p2 = find_esc(s, quote, "\\", p1) 1717 if p2 < 0: 1718 segs2.append(s[p1:]) 1719 break 1720 segs2.append(s[p1:p2]) 1721 p2 += len(quote) 1722 p3 = find_skip_esc(s, quote, "\\", p2) 1723 if p3 < 0: 1724 raise StandardError( 1725 "Malformed concatenated string literal '%s'." % s) 1726 segs1.append(s[p2:p3]) 1727 p1 = p3 + len(quote) 1728 js1 = "".join(segs1) 1729 js2 = "".join(segs2) 1730 js = js1 if not invert else js2 1731 if not strip: 1732 js = quote + js + quote 1733 return js 1734 1735 1736 def find_esc (s, f, e, p=0): 1737 1738 ls = len(s) 1739 le = len(e) 1740 while p < ls: 1741 if s.startswith(e, p): 1742 p += le + 1 1743 elif s.startswith(f, p): 1744 break 1745 else: 1746 p += 1 1747 if p >= ls: 1748 p = -1 1749 return p 1750 1751 1752 _xml_entities_escape_ordered = [ 1753 ("&", "&"), # must be first 1754 ("<", "<"), 1755 (">", ">"), 1756 ("\"", """), 1757 ("'", "'"), 1758 ] 1759 _xml_entities_unescape_ordered = [ 1760 tuple(reversed(x)) for x in reversed(_xml_entities_escape_ordered)] 1761 1762 def unescape_xml (es, testnoesc=False): 1763 1764 s = es 1765 if testnoesc: 1766 noesc = set() 1767 for ent, val in _xml_entities_unescape_ordered: 1768 if testnoesc: 1769 p = s.find(val) 1770 if p >= 0 and not s.startswith(ent, p): # for & -> & 1771 noesc.add(ent) 1772 s = s.replace(ent, val) 1773 if testnoesc: 1774 return s, noesc 1775 else: 1776 return s 1777 1778 def escape_xml (s, noesc=None): 1779 1780 es = s 1781 for val, ent in _xml_entities_escape_ordered: 1782 if not noesc or ent not in noesc: 1783 es = es.replace(val, ent) 1784 return es 1785 1786 1787 if __name__ == "__main__": 1788 main() 1789