File indexing completed on 2024-11-03 11:24:02
0001 # -*- coding: utf-8 -*- 0002 0003 """ 0004 Remove special substrings from text. 0005 0006 @author: Chusslove Illich (Часлав Илић) <caslav.ilic@gmx.net> 0007 @license: GPLv3 0008 """ 0009 0010 import re 0011 0012 from pology import _, n_ 0013 from pology.comments import manc_parse_field_values, manc_parse_list 0014 import pology.markup as M 0015 from pology.msgreport import warning_on_msg 0016 from pology.resolve import remove_accelerator as _rm_accel_in_text 0017 from pology.resolve import remove_fmtdirs as _rm_fmtd_in_text_single 0018 from pology.resolve import remove_literals as _rm_lit_in_text_single 0019 from pology.resolve import resolve_entities_simple 0020 0021 0022 def _rm_accel_in_msg (msg, accels, greedy=False): 0023 0024 msg.msgid = _rm_accel_in_text(msg.msgid, accels, greedy) 0025 if msg.msgid_plural: 0026 msg.msgid_plural = _rm_accel_in_text(msg.msgid_plural, accels, greedy) 0027 for i in range(len(msg.msgstr)): 0028 msg.msgstr[i] = _rm_accel_in_text(msg.msgstr[i], accels, greedy) 0029 0030 if msg.msgid_previous: 0031 msg.msgid_previous = _rm_accel_in_text(msg.msgid_previous, 0032 accels, greedy) 0033 if msg.msgid_plural_previous: 0034 msg.msgid_plural_previous = _rm_accel_in_text(msg.msgid_plural_previous, 0035 accels, greedy) 0036 return 0 0037 0038 0039 def _get_accel_marker (msg, cat): 0040 0041 accels = manc_parse_field_values(msg, "accelerator-marker") 0042 if not accels: 0043 accels = cat.accelerator() 0044 return accels 0045 0046 0047 def remove_accel_text (text, msg, cat): 0048 """ 0049 Remove accelerator marker from one of the text fields of the message 0050 [type F3A hook]. 0051 0052 Accelerator marker is determined from the catalog, by calling its 0053 L{accelerator()<pology.catalog.Catalog.accelerator>} method. 0054 Use L{set_accelerator()<pology.catalog.Catalog.set_accelerator>} 0055 to set possible accelerator markers after the catalog has been opened, 0056 in case it does not specify any on its own. 0057 If catalog reports C{None} for accelerators, text is not touched. 0058 0059 Accelerator marker can also be specified for a particular message, 0060 by embedded C{accelerator-marker} field in manual comments:: 0061 0062 # accelerator-marker: _ 0063 0064 This overrides accelerator marker reported by the catalog. 0065 0066 @return: text 0067 0068 @see: L{pology.resolve.remove_accelerator} 0069 """ 0070 0071 accels = _get_accel_marker(msg, cat) 0072 return _rm_accel_in_text(text, accels) 0073 0074 0075 def remove_accel_text_greedy (text, msg, cat): 0076 """ 0077 Like L{remove_accel_text}, except that if catalog reports C{None} 0078 for accelerators, some frequent marker characters are removed 0079 [type F3A hook]. 0080 0081 @return: text 0082 0083 @see: L{pology.resolve.remove_accelerator} 0084 """ 0085 0086 accels = _get_accel_marker(msg, cat) 0087 return _rm_accel_in_text(text, accels, greedy=True) 0088 0089 0090 def remove_accel_msg (msg, cat): 0091 """ 0092 Remove accelerator marker from all applicable text fields in the message, 0093 as if L{remove_accel_text} was applied to each [type F4A hook]. 0094 0095 @return: number of errors 0096 0097 @see: L{pology.resolve.remove_accelerator} 0098 """ 0099 0100 accels = _get_accel_marker(msg, cat) 0101 return _rm_accel_in_msg(msg, accels) 0102 0103 0104 def remove_accel_msg_greedy (msg, cat): 0105 """ 0106 Like L{remove_accel_msg}, except that if catalog reports C{None} 0107 for accelerators, some frequent marker characters are removed 0108 [type F4A hook]. 0109 0110 @return: number of errors 0111 0112 @see: L{pology.resolve.remove_accelerator} 0113 """ 0114 0115 accels = _get_accel_marker(msg, cat) 0116 return _rm_accel_in_msg(msg, accels, greedy=True) 0117 0118 0119 def _rm_markup_in_text (text, mtypes): 0120 0121 if mtypes is None: 0122 return text 0123 0124 for mtype in mtypes: 0125 mtype = mtype.lower() 0126 if 0: pass 0127 elif mtype == "html": 0128 text = M.html_to_plain(text) 0129 elif mtype == "kde4": 0130 text = M.kde4_to_plain(text) 0131 elif mtype == "qtrich": 0132 text = M.qtrich_to_plain(text) 0133 elif mtype == "kuit": 0134 text = M.kuit_to_plain(text) 0135 elif mtype == "docbook4" or mtype == "docbook": 0136 text = M.docbook4_to_plain(text) 0137 elif mtype == "xml": 0138 text = M.xml_to_plain(text) 0139 elif mtype == "xmlents": 0140 # FIXME: Only default XML entities can be handled as-is; 0141 # perhaps markup remover should also take entity mapping 0142 # as argument, and pass it here? 0143 text = resolve_entities_simple(text, M.xml_entities) 0144 0145 return text 0146 0147 0148 def _rm_markup_in_msg (msg, mtypes): 0149 0150 msg.msgid = _rm_markup_in_text(msg.msgid, mtypes) 0151 if msg.msgid_plural: 0152 msg.msgid_plural = _rm_markup_in_text(msg.msgid_plural, mtypes) 0153 for i in range(len(msg.msgstr)): 0154 msg.msgstr[i] = _rm_markup_in_text(msg.msgstr[i], mtypes) 0155 0156 if msg.msgid_previous: 0157 msg.msgid_previous = _rm_markup_in_text(msg.msgid_previous, mtypes) 0158 if msg.msgid_plural_previous: 0159 msg.msgid_plural_previous = _rm_markup_in_text(msg.msgid_plural_previous, 0160 mtypes) 0161 return 0 0162 0163 0164 def remove_markup_text (text, msg, cat): 0165 """ 0166 Remove markup from one of the text fields of the message [type F3A hook]. 0167 0168 Expected markup types are determined from the catalog, by calling its 0169 L{markup()<pology.catalog.Catalog.markup>} method. 0170 Use L{set_markup()<catalog.Catalog.set_markup>} 0171 to set expected markup types after the catalog has been opened, 0172 in case it does not specify any on its own. 0173 If catalog reports C{None} for markup types, text is not touched. 0174 0175 @return: text 0176 """ 0177 0178 mtypes = cat.markup() 0179 return _rm_markup_in_text(text, mtypes) 0180 0181 0182 def remove_markup_msg (msg, cat): 0183 """ 0184 Remove markup from all applicable text fields in the message, 0185 as if L{remove_markup_text} was applied to each [type F4A hook]. 0186 0187 @return: number of errors 0188 """ 0189 0190 mtypes = cat.markup() 0191 return _rm_markup_in_msg(msg, mtypes) 0192 0193 0194 def _format_flags (msg): 0195 0196 return [x for x in msg.flag if x.endswith("-format")] 0197 0198 0199 def _rm_fmtd_in_text (text, formats, subs=""): 0200 0201 for format in formats: 0202 text = _rm_fmtd_in_text_single(text, format, subs=subs) 0203 0204 return text 0205 0206 0207 def _rm_fmtd_in_msg (msg, subs=""): 0208 0209 formats = _format_flags(msg) 0210 0211 msg.msgid = _rm_fmtd_in_text(msg.msgid, formats, subs) 0212 if msg.msgid_plural: 0213 msg.msgid_plural = _rm_fmtd_in_text(msg.msgid_plural, formats, subs) 0214 for i in range(len(msg.msgstr)): 0215 msg.msgstr[i] = _rm_fmtd_in_text(msg.msgstr[i], formats, subs) 0216 0217 if msg.msgid_previous: 0218 msg.msgid_previous = _rm_fmtd_in_text(msg.msgid_previous, formats, subs) 0219 if msg.msgid_plural_previous: 0220 msg.msgid_plural_previous = _rm_fmtd_in_text(msg.msgid_plural_previous, 0221 formats, subs) 0222 return 0 0223 0224 0225 def remove_fmtdirs_text (text, msg, cat): 0226 """ 0227 Remove format directives from one of the text fields of the message 0228 [type F3A hook]. 0229 0230 The type of format directives is determined from message format flags. 0231 0232 @return: text 0233 0234 @see: L{pology.resolve.remove_fmtdirs} 0235 """ 0236 0237 return _rm_fmtd_in_text(text, _format_flags(msg)) 0238 0239 0240 def remove_fmtdirs_text_tick (tick): 0241 """ 0242 Like L{remove_fmtdirs_text}, except that each format directive is 0243 replaced by a non-whitespace "tick" instead of plainly removed 0244 [hook factory]. 0245 0246 @param tick: the tick sequence 0247 @type tick: string 0248 0249 @return: type F3A hook 0250 @rtype: C{(cat, msg, text) -> text} 0251 """ 0252 0253 def hook (text, msg, cat): 0254 return _rm_fmtd_in_text(text, _format_flags(msg), tick) 0255 0256 return hook 0257 0258 0259 def remove_fmtdirs_msg (msg, cat): 0260 """ 0261 Remove format directives from all applicable text fields in the message, 0262 as if L{remove_fmtdirs_text} was applied to each [type F4A hook]. 0263 0264 @return: number of errors 0265 """ 0266 0267 return _rm_fmtd_in_msg(msg) 0268 0269 0270 def remove_fmtdirs_msg_tick (tick): 0271 """ 0272 Remove format directives from all applicable text fields in the message, 0273 as if L{remove_fmtdirs_text_tick} was applied to each [hook factory]. 0274 0275 @param tick: the tick sequence 0276 @type tick: string 0277 0278 @return: type F4A hook 0279 @rtype: C{(cat, msg, text) -> numerr} 0280 """ 0281 0282 def hook (msg, cat): 0283 return _rm_fmtd_in_msg(msg, tick) 0284 0285 return hook 0286 0287 0288 def _literals_spec (msg, cat): 0289 0290 fname = "literal-segment" 0291 rx_strs = manc_parse_field_values(msg, fname) 0292 0293 # Compile regexes. 0294 # Empty regex indicates not to do any heuristic removal. 0295 rxs = [] 0296 heuristic = True 0297 for rx_str in rx_strs: 0298 if rx_str: 0299 try: 0300 rxs.append(re.compile(rx_str, re.U|re.S)) 0301 except: 0302 warning_on_msg(_("@info", 0303 "Field %(field)s states " 0304 "malformed regex '%(re)s'.", 0305 field=fname, re=rx_str), 0306 msg, cat) 0307 else: 0308 heuristic = False 0309 0310 return [], rxs, heuristic 0311 0312 0313 def _rm_lit_in_text (text, substrs, regexes, heuristic, subs=""): 0314 0315 return _rm_lit_in_text_single(text, subs=subs, substrs=substrs, 0316 regexes=regexes, heuristic=heuristic) 0317 0318 0319 def _rm_lit_in_msg (msg, cat, strs, rxs, heu, subs=""): 0320 0321 msg.msgid = _rm_lit_in_text(msg.msgid, strs, rxs, heu, subs) 0322 if msg.msgid_plural: 0323 msg.msgid_plural = _rm_lit_in_text(msg.msgid_plural, 0324 strs, rxs, heu, subs) 0325 for i in range(len(msg.msgstr)): 0326 msg.msgstr[i] = _rm_lit_in_text(msg.msgstr[i], strs, rxs, heu, subs) 0327 0328 if msg.msgid_previous: 0329 msg.msgid_previous = _rm_lit_in_text(msg.msgid_previous, 0330 strs, rxs, heu, subs) 0331 if msg.msgid_plural_previous: 0332 msg.msgid_plural_previous = _rm_lit_in_text(msg.msgid_plural_previous, 0333 strs, rxs, heu, subs) 0334 return 0 0335 0336 0337 def remove_literals_text (text, msg, cat): 0338 """ 0339 Remove literal segments from one of the text fields of the message 0340 [type F3A hook]. 0341 0342 Literal segments are URLs, email addresses, command line options, etc. 0343 anything symbolic that the machine, rather than human alone, should parse. 0344 Note format directives are excluded here, see L{remove_fmtdirs_text} 0345 for removing them. 0346 0347 By default, literals are removed heuristically, but this can be influenced 0348 by embedded C{literal-segment} fields in manual comments. For example:: 0349 0350 # literal-segment: foobar 0351 0352 states that all C{foobar} segments are literals. The field value is 0353 actually a regular expression, and there may be several such fields:: 0354 0355 # literal-segment: \w+bar 0356 # literal-segment: foo[&=] ### a sub comment 0357 0358 To prevent any heuristic removal of literals, add a C{literal-segment} 0359 field with empty value. 0360 0361 @return: text 0362 0363 @see: L{pology.resolve.remove_literals} 0364 """ 0365 0366 strs, rxs, heu = _literals_spec(msg, cat) 0367 return _rm_lit_in_text(text, strs, rxs, heu) 0368 0369 0370 def remove_literals_text_tick (tick): 0371 """ 0372 Like L{remove_literals_text}, except that each literal segment is 0373 replaced by a non-whitespace "tick" instead of plainly removed 0374 [hook factory]. 0375 0376 @param tick: the tick sequence 0377 @type tick: string 0378 0379 @return: type F3A hook 0380 @rtype: C{(cat, msg, text) -> text} 0381 """ 0382 0383 def hook (text, msg, cat): 0384 strs, rxs, heu = _literals_spec(msg, cat) 0385 return _rm_lit_in_text(text, strs, rxs, heu, tick) 0386 0387 return hook 0388 0389 0390 def remove_literals_msg (msg, cat): 0391 """ 0392 Remove literal segments from all applicable text fields in the message, 0393 as if L{remove_literals_text} was applied to each [type F4A hook]. 0394 0395 @return: number of errors 0396 """ 0397 0398 strs, rxs, heu = _literals_spec(msg, cat) 0399 return _rm_lit_in_msg(msg, cat, strs, rxs, heu) 0400 0401 0402 def remove_literals_msg_tick (tick): 0403 """ 0404 Remove literal segments from all applicable text fields in the message, 0405 as if L{remove_literals_text_tick} was applied to each [hook factory]. 0406 0407 @param tick: the tick sequence 0408 @type tick: string 0409 0410 @return: type F4A hook 0411 @rtype: C{(cat, msg, text) -> numerr} 0412 """ 0413 0414 def hook (msg, cat): 0415 strs, rxs, heu = _literals_spec(msg, cat) 0416 return _rm_lit_in_msg(msg, cat, strs, rxs, heu, tick) 0417 0418 return hook 0419 0420 0421 def remove_marlits_text (text, msg, cat): 0422 """ 0423 Remove literals by markup from one of the text fields of the message 0424 [type F3A hook]. 0425 0426 An "intersection" of L{remove_markup_text} and L{remove_literals_text}, 0427 where literals segments are determined by markup, and both the segment text 0428 and its markup is removed. See documentation of these hooks for notes 0429 on what is considered literal and how markup type is determined. 0430 0431 @return: text 0432 """ 0433 0434 strs, rxs, heu = [], _marlit_rxs(msg, cat), False 0435 return _rm_lit_in_text(text, strs, rxs, heu) 0436 0437 0438 def remove_marlits_msg (msg, cat): 0439 """ 0440 Remove literal segments by markup from all applicable text fields in 0441 the message, as if L{remove_marlits_text} was applied to each 0442 [type F4A hook]. 0443 0444 @return: number of errors 0445 """ 0446 0447 strs, rxs, heu = [], _marlit_rxs(msg, cat), False 0448 return _rm_lit_in_msg(msg, cat, strs, rxs, heu) 0449 0450 0451 class _Cache: pass 0452 _marlit_cache = _Cache() 0453 _marlit_cache.mtypes = None 0454 _marlit_cache.tags = set() 0455 _marlit_cache.rxs = [] 0456 _marlit_cache.acmnt_tag_rx = re.compile(r"^\s*tag:\s*(\w+)\s*$", re.I) 0457 _marlit_cache.rxs_all = [re.compile(r".*", re.S)] 0458 0459 def _marlit_rxs (msg, cat): 0460 0461 # Update regex cache due to markup type. 0462 mtypes = cat.markup() 0463 if _marlit_cache.mtypes != mtypes: 0464 _marlit_cache.mtypes = mtypes 0465 _marlit_cache.tags = set() 0466 _marlit_cache.rxs = [] 0467 for mtype in mtypes or []: 0468 _marlit_cache.tags.update(_marlit_tags(mtype)) 0469 rx = _build_tagged_rx(_marlit_cache.tags) 0470 _marlit_cache.rxs.append(rx) 0471 0472 # Check if the whole message is under a literal tag. 0473 for acmnt in msg.auto_comment: 0474 m = _marlit_cache.acmnt_tag_rx.search(acmnt) 0475 if m: 0476 tag = m.group(1).strip().lower() 0477 if tag in _marlit_cache.tags: 0478 return _marlit_cache.rxs_all 0479 0480 return _marlit_cache.rxs 0481 0482 0483 def _marlit_tags (mtype): 0484 0485 tags = "" 0486 if 0: pass 0487 elif mtype == "html": 0488 tags = """ 0489 tt code 0490 """ 0491 elif mtype == "kde4": 0492 tags = """ 0493 icode bcode filename envar command numid 0494 tt code 0495 """ 0496 elif mtype == "qtrich": 0497 tags = """ 0498 tt code 0499 """ 0500 elif mtype == "kuit": 0501 tags = """ 0502 icode bcode filename envar command numid 0503 """ 0504 elif mtype == "docbook4" or mtype == "docbook": 0505 tags = """ 0506 literal filename envar command option function markup varname 0507 screen programlisting userinput computeroutput 0508 """ 0509 elif mtype == "xml": 0510 pass 0511 0512 return set(tags.split()) 0513 0514 0515 def _build_tagged_rx (tags): 0516 0517 if isinstance(tags, str): 0518 tags = tags.split() 0519 # For proper regex matching, tags that begin with a substring 0520 # equal to another full tag must come before that full tag. 0521 # So sort tags first by length, then by alphabet. 0522 tags = sorted(tags, key=lambda x: (-len(x), x)) 0523 basetagged_rxsub = r"<\s*(%s)\b[^<]*>.*?<\s*/\s*\1\s*>" 0524 tagged_rx = re.compile(basetagged_rxsub % "|".join(tags), re.I|re.S) 0525 0526 return tagged_rx 0527 0528 0529 def remove_ignored_entities_msg (msg, cat): 0530 """ 0531 Remove locally ignored entities from all applicable text fields in 0532 the message [type F4A hook]. 0533 0534 Entities are ignored by listing them in the embedded C{ignore-entities} 0535 fields in manual comments. For example:: 0536 0537 # ignore-entity: foobar, froobaz 0538 0539 will remove entities C{&foobar;} and C{&froobaz;} from all text fields. 0540 0541 @return: number of errors 0542 """ 0543 0544 locally_ignored = manc_parse_list(msg, "ignore-entity:", ",") 0545 if not locally_ignored: 0546 return 0 0547 0548 msg.msgid = _rm_ent_in_text(msg.msgid, locally_ignored) 0549 if msg.msgid_plural: 0550 msg.msgid_plural = _rm_ent_in_text(msg.msgid_plural, locally_ignored) 0551 for i in range(len(msg.msgstr)): 0552 msg.msgstr[i] = _rm_ent_in_text(msg.msgstr[i], locally_ignored) 0553 0554 return 0 0555 0556 0557 def _rm_ent_in_text (text, entities): 0558 0559 for entity in entities: 0560 text = text.replace("&%s;" % entity, "") 0561 0562 return text 0563 0564 0565 def rewrite_msgid (msg, cat): 0566 """ 0567 Rewrite parts of C{msgid} based on translator comments [type F4A hook]. 0568 0569 Translator comments may issue C{rewrite-msgid} directives 0570 to modify parts of C{msgid} (as well as C{msgid_plural}) fields 0571 by applying a search regular expression and replace pattern. 0572 The search and replace pattern are wrapped and separated 0573 by any character consistently used, such as slashes. 0574 Examples:: 0575 0576 # rewrite-msgid: /foo/bar/ 0577 # rewrite-msgid: /foo (\\w+) fam/bar \\1 bam/ 0578 # rewrite-msgid: :foo/bar:foo/bam: 0579 0580 If a search pattern is not valid, a warning on message is issued. 0581 0582 Search pattern is case-sensitive. 0583 0584 @return: number of errors 0585 """ 0586 0587 nerrors = 0 0588 0589 # Collect and compile regular expressions. 0590 fname = "rewrite-msgid" 0591 rwspecs = manc_parse_field_values(msg, fname) 0592 rwrxs = [] 0593 for rwspec in rwspecs: 0594 sep = rwspec[0:1] 0595 if not sep: 0596 warning_on_msg(_("@info", 0597 "No patterns in rewrite directive."), msg, cat) 0598 nerrors += 1 0599 continue 0600 lst = rwspec.split(sep) 0601 if len(lst) != 4 or lst[0] or lst[3]: 0602 warning_on_msg(_("@info", 0603 "Wrongly separated patterns in " 0604 "rewrite directive '%(dir)s'.", dir=rwspec), 0605 msg, cat) 0606 nerrors += 1 0607 continue 0608 srch, repl = lst[1], lst[2] 0609 try: 0610 rx = re.compile(srch, re.U) 0611 except: 0612 warning_on_msg(_("@info", 0613 "Invalid search pattern in " 0614 "rewrite directive '%(dir)s'.", dir=rwspec), 0615 msg, cat) 0616 nerrors += 1 0617 continue 0618 rwrxs.append((rx, repl, rwspec)) 0619 0620 for rx, repl, rwspec in rwrxs: 0621 try: 0622 msg.msgid = rx.sub(repl, msg.msgid) 0623 if msg.msgid_plural is not None: 0624 msg.msgid_plural = rx.sub(repl, msg.msgid_plural) 0625 except: 0626 warning_on_msg(_("@info", 0627 "Error in application of " 0628 "rewrite directive '%(dir)s'.", dir=rwspec), 0629 msg, cat) 0630 nerrors += 1 0631 0632 return nerrors 0633 0634 0635 def rewrite_inverse (msg, cat): 0636 """ 0637 Rewrite message by replacing all its elements with that of another message 0638 which has the same C{msgstr[0]} [type F4A hook]. 0639 0640 Translator comments may issue C{rewrite-inverse} directives 0641 to replace all message parts with those from another message 0642 having the same C{msgstr[0]} field. 0643 The argument to the directive is a regular expression search pattern 0644 on C{msgid} and C{msgctxt} (leading and trailing whitespace get stripped) 0645 which is used to select the particular message if more than 0646 one other messages have same C{msgstr[0]}. 0647 Examples:: 0648 0649 # rewrite-inverse: 0650 # rewrite-inverse: Foo 0651 0652 If the pattern does not match or it matches more than one other message, 0653 current message is not touched; also if the pattern 0654 is left empty and there is more than one other message. 0655 Search pattern is applied to C{msgctxt} and C{msgid} in turn, 0656 and the message is matched if any matches. 0657 Search pattern is case-sensitive. 0658 0659 If more than one C{rewrite-inverse} directive is seen, 0660 or the search pattern is not valid, a warning on message is issued 0661 and current message is not touched. 0662 0663 This hook is then executed again on the resulting message, 0664 in case the new translator comments contain another 0665 C{rewrite-inverse} directive. 0666 0667 @return: number of errors 0668 """ 0669 0670 # Collect and compile regular expressions. 0671 fname = "rewrite-inverse" 0672 rwspecs = manc_parse_field_values(msg, fname) 0673 if not rwspecs: 0674 return 0 0675 if len(rwspecs) > 1: 0676 warning_on_msg(_("@info", 0677 "More than one inverse rewrite directive " 0678 "encountered."), 0679 msg, cat) 0680 return 1 0681 0682 srch = rwspecs[0] 0683 try: 0684 rx = re.compile(srch, re.U) 0685 except: 0686 warning_on_msg(_("@info", 0687 "Invalid search pattern '%(pattern)s' in " 0688 "inverse rewrite directive.", pattern=srch), 0689 msg, cat) 0690 return 1 0691 0692 msgs = cat.select_by_msgstr(msg.msgstr[0], lazy=True) 0693 msgs = [x for x in msgs if x.key != msg.key] # remove current 0694 if not msgs: 0695 warning_on_msg(_("@info", 0696 "There are no other messages with same translation, " 0697 "needed by inverse rewrite directive."), 0698 msg, cat) 0699 return 1 0700 0701 match = lambda x: ( (x.msgctxt is not None and rx.search(x.msgctxt)) 0702 or rx.search(x.msgid)) 0703 sel_msgs = [x for x in msgs if match(x)] # remove non-matched 0704 if not sel_msgs: 0705 warning_on_msg(_("@info", 0706 "Inverse rewrite directive matches none of " 0707 "the other messages with same translation."), 0708 msg, cat) 0709 return 1 0710 if len(sel_msgs) > 1: 0711 warning_on_msg(_("@info", 0712 "Inverse rewrite directive matches more than " 0713 "one other message with same translation."), 0714 msg, cat) 0715 return 1 0716 0717 # Copy all parts of the other message. 0718 omsg = sel_msgs[0] 0719 msg.msgid = omsg.msgid 0720 if msg.msgid_plural is not None and omsg.msgid_plural is not None: 0721 msg.msgid_plural = omsg.msgid_plural 0722 0723 # Copy comments and recurse. 0724 msg.set(omsg) 0725 nerrors = rewrite_inverse(msg, cat) 0726 0727 return nerrors 0728 0729 0730 _ent_rx = re.compile(r"&[\w.:-]+;", re.U) 0731 0732 def remove_paired_ents (msg, cat): 0733 """ 0734 Remove all XML-like entities from original, and from translation 0735 all that are also found in original [type F4A hook]. 0736 0737 To remove all entities from original, and all entitities from translation 0738 that also exist in original, may be useful prior to markup checks, 0739 when list of known entities is not available. 0740 0741 @return: number of errors 0742 """ 0743 0744 return _rm_paired_ents(msg, cat) 0745 0746 0747 def remove_paired_ents_tick (tick): 0748 """ 0749 Like L{remove_paired_ents}, except that each XML-like entity is 0750 replaced by a non-whitespace "tick" instead of plainly removed 0751 [hook factory]. 0752 0753 @param tick: the tick sequence 0754 @type tick: string 0755 0756 @return: type F3A hook 0757 @rtype: C{(cat, msg, text) -> text} 0758 """ 0759 0760 def hook (msg, cat): 0761 return _rm_paired_ents(msg, cat, tick) 0762 0763 return hook 0764 0765 0766 def _rm_paired_ents (msg, cat, tick=''): 0767 0768 ents_orig = set() 0769 ents_orig.update(_ent_rx.findall(msg.msgid)) 0770 for ent in ents_orig: 0771 msg.msgid = msg.msgid.replace(ent, tick) 0772 0773 if msg.msgid_plural: 0774 ents_orig.update(_ent_rx.findall(msg.msgid_plural)) 0775 for ent in ents_orig: 0776 msg.msgid_plural = msg.msgid_plural.replace(ent, tick) 0777 0778 for i in range(len(msg.msgstr)): 0779 ents_trans = set(_ent_rx.findall(msg.msgstr[i])) 0780 for ent in ents_trans.intersection(ents_orig): 0781 msg.msgstr[i] = msg.msgstr[i].replace(ent, tick) 0782 0783 return 0 0784