File indexing completed on 2024-11-03 11:24:02

0001 # -*- coding: utf-8 -*-
0002 
0003 """
0004 Remove special substrings from text.
0005 
0006 @author: Chusslove Illich (Часлав Илић) <caslav.ilic@gmx.net>
0007 @license: GPLv3
0008 """
0009 
0010 import re
0011 
0012 from pology import _, n_
0013 from pology.comments import manc_parse_field_values, manc_parse_list
0014 import pology.markup as M
0015 from pology.msgreport import warning_on_msg
0016 from pology.resolve import remove_accelerator as _rm_accel_in_text
0017 from pology.resolve import remove_fmtdirs as _rm_fmtd_in_text_single
0018 from pology.resolve import remove_literals as _rm_lit_in_text_single
0019 from pology.resolve import resolve_entities_simple
0020 
0021 
0022 def _rm_accel_in_msg (msg, accels, greedy=False):
0023 
0024     msg.msgid = _rm_accel_in_text(msg.msgid, accels, greedy)
0025     if msg.msgid_plural:
0026         msg.msgid_plural = _rm_accel_in_text(msg.msgid_plural, accels, greedy)
0027     for i in range(len(msg.msgstr)):
0028         msg.msgstr[i] = _rm_accel_in_text(msg.msgstr[i], accels, greedy)
0029 
0030     if msg.msgid_previous:
0031         msg.msgid_previous = _rm_accel_in_text(msg.msgid_previous,
0032                                                accels, greedy)
0033     if msg.msgid_plural_previous:
0034         msg.msgid_plural_previous = _rm_accel_in_text(msg.msgid_plural_previous,
0035                                                       accels, greedy)
0036     return 0
0037 
0038 
0039 def _get_accel_marker (msg, cat):
0040 
0041     accels = manc_parse_field_values(msg, "accelerator-marker")
0042     if not accels:
0043         accels = cat.accelerator()
0044     return accels
0045 
0046 
0047 def remove_accel_text (text, msg, cat):
0048     """
0049     Remove accelerator marker from one of the text fields of the message
0050     [type F3A hook].
0051 
0052     Accelerator marker is determined from the catalog, by calling its
0053     L{accelerator()<pology.catalog.Catalog.accelerator>} method.
0054     Use L{set_accelerator()<pology.catalog.Catalog.set_accelerator>}
0055     to set possible accelerator markers after the catalog has been opened,
0056     in case it does not specify any on its own.
0057     If catalog reports C{None} for accelerators, text is not touched.
0058 
0059     Accelerator marker can also be specified for a particular message,
0060     by embedded C{accelerator-marker} field in manual comments::
0061 
0062         # accelerator-marker: _
0063 
0064     This overrides accelerator marker reported by the catalog.
0065 
0066     @return: text
0067 
0068     @see: L{pology.resolve.remove_accelerator}
0069     """
0070 
0071     accels = _get_accel_marker(msg, cat)
0072     return _rm_accel_in_text(text, accels)
0073 
0074 
0075 def remove_accel_text_greedy (text, msg, cat):
0076     """
0077     Like L{remove_accel_text}, except that if catalog reports C{None}
0078     for accelerators, some frequent marker characters are removed
0079     [type F3A hook].
0080 
0081     @return: text
0082 
0083     @see: L{pology.resolve.remove_accelerator}
0084     """
0085 
0086     accels = _get_accel_marker(msg, cat)
0087     return _rm_accel_in_text(text, accels, greedy=True)
0088 
0089 
0090 def remove_accel_msg (msg, cat):
0091     """
0092     Remove accelerator marker from all applicable text fields in the message,
0093     as if L{remove_accel_text} was applied to each [type F4A hook].
0094 
0095     @return: number of errors
0096 
0097     @see: L{pology.resolve.remove_accelerator}
0098     """
0099 
0100     accels = _get_accel_marker(msg, cat)
0101     return _rm_accel_in_msg(msg, accels)
0102 
0103 
0104 def remove_accel_msg_greedy (msg, cat):
0105     """
0106     Like L{remove_accel_msg}, except that if catalog reports C{None}
0107     for accelerators, some frequent marker characters are removed
0108     [type F4A hook].
0109 
0110     @return: number of errors
0111 
0112     @see: L{pology.resolve.remove_accelerator}
0113     """
0114 
0115     accels = _get_accel_marker(msg, cat)
0116     return _rm_accel_in_msg(msg, accels, greedy=True)
0117 
0118 
0119 def _rm_markup_in_text (text, mtypes):
0120 
0121     if mtypes is None:
0122         return text
0123 
0124     for mtype in mtypes:
0125         mtype = mtype.lower()
0126         if 0: pass
0127         elif mtype == "html":
0128             text = M.html_to_plain(text)
0129         elif mtype == "kde4":
0130             text = M.kde4_to_plain(text)
0131         elif mtype == "qtrich":
0132             text = M.qtrich_to_plain(text)
0133         elif mtype == "kuit":
0134             text = M.kuit_to_plain(text)
0135         elif mtype == "docbook4" or mtype == "docbook":
0136             text = M.docbook4_to_plain(text)
0137         elif mtype == "xml":
0138             text = M.xml_to_plain(text)
0139         elif mtype == "xmlents":
0140             # FIXME: Only default XML entities can be handled as-is;
0141             # perhaps markup remover should also take entity mapping
0142             # as argument, and pass it here?
0143             text = resolve_entities_simple(text, M.xml_entities)
0144 
0145     return text
0146 
0147 
0148 def _rm_markup_in_msg (msg, mtypes):
0149 
0150     msg.msgid = _rm_markup_in_text(msg.msgid, mtypes)
0151     if msg.msgid_plural:
0152         msg.msgid_plural = _rm_markup_in_text(msg.msgid_plural, mtypes)
0153     for i in range(len(msg.msgstr)):
0154         msg.msgstr[i] = _rm_markup_in_text(msg.msgstr[i], mtypes)
0155 
0156     if msg.msgid_previous:
0157         msg.msgid_previous = _rm_markup_in_text(msg.msgid_previous, mtypes)
0158     if msg.msgid_plural_previous:
0159         msg.msgid_plural_previous = _rm_markup_in_text(msg.msgid_plural_previous,
0160                                                        mtypes)
0161     return 0
0162 
0163 
0164 def remove_markup_text (text, msg, cat):
0165     """
0166     Remove markup from one of the text fields of the message [type F3A hook].
0167 
0168     Expected markup types are determined from the catalog, by calling its
0169     L{markup()<pology.catalog.Catalog.markup>} method.
0170     Use L{set_markup()<catalog.Catalog.set_markup>}
0171     to set expected markup types after the catalog has been opened,
0172     in case it does not specify any on its own.
0173     If catalog reports C{None} for markup types, text is not touched.
0174 
0175     @return: text
0176     """
0177 
0178     mtypes = cat.markup()
0179     return _rm_markup_in_text(text, mtypes)
0180 
0181 
0182 def remove_markup_msg (msg, cat):
0183     """
0184     Remove markup from all applicable text fields in the message,
0185     as if L{remove_markup_text} was applied to each [type F4A hook].
0186 
0187     @return: number of errors
0188     """
0189 
0190     mtypes = cat.markup()
0191     return _rm_markup_in_msg(msg, mtypes)
0192 
0193 
0194 def _format_flags (msg):
0195 
0196     return [x for x in msg.flag if x.endswith("-format")]
0197 
0198 
0199 def _rm_fmtd_in_text (text, formats, subs=""):
0200 
0201     for format in formats:
0202         text = _rm_fmtd_in_text_single(text, format, subs=subs)
0203 
0204     return text
0205 
0206 
0207 def _rm_fmtd_in_msg (msg, subs=""):
0208 
0209     formats = _format_flags(msg)
0210 
0211     msg.msgid = _rm_fmtd_in_text(msg.msgid, formats, subs)
0212     if msg.msgid_plural:
0213         msg.msgid_plural = _rm_fmtd_in_text(msg.msgid_plural, formats, subs)
0214     for i in range(len(msg.msgstr)):
0215         msg.msgstr[i] = _rm_fmtd_in_text(msg.msgstr[i], formats, subs)
0216 
0217     if msg.msgid_previous:
0218         msg.msgid_previous = _rm_fmtd_in_text(msg.msgid_previous, formats, subs)
0219     if msg.msgid_plural_previous:
0220         msg.msgid_plural_previous = _rm_fmtd_in_text(msg.msgid_plural_previous,
0221                                                      formats, subs)
0222     return 0
0223 
0224 
0225 def remove_fmtdirs_text (text, msg, cat):
0226     """
0227     Remove format directives from one of the text fields of the message
0228     [type F3A hook].
0229 
0230     The type of format directives is determined from message format flags.
0231 
0232     @return: text
0233 
0234     @see: L{pology.resolve.remove_fmtdirs}
0235     """
0236 
0237     return _rm_fmtd_in_text(text, _format_flags(msg))
0238 
0239 
0240 def remove_fmtdirs_text_tick (tick):
0241     """
0242     Like L{remove_fmtdirs_text}, except that each format directive is
0243     replaced by a non-whitespace "tick" instead of plainly removed
0244     [hook factory].
0245 
0246     @param tick: the tick sequence
0247     @type tick: string
0248 
0249     @return: type F3A hook
0250     @rtype: C{(cat, msg, text) -> text}
0251     """
0252 
0253     def hook (text, msg, cat):
0254         return _rm_fmtd_in_text(text, _format_flags(msg), tick)
0255 
0256     return hook
0257 
0258 
0259 def remove_fmtdirs_msg (msg, cat):
0260     """
0261     Remove format directives from all applicable text fields in the message,
0262     as if L{remove_fmtdirs_text} was applied to each [type F4A hook].
0263 
0264     @return: number of errors
0265     """
0266 
0267     return _rm_fmtd_in_msg(msg)
0268 
0269 
0270 def remove_fmtdirs_msg_tick (tick):
0271     """
0272     Remove format directives from all applicable text fields in the message,
0273     as if L{remove_fmtdirs_text_tick} was applied to each [hook factory].
0274 
0275     @param tick: the tick sequence
0276     @type tick: string
0277 
0278     @return: type F4A hook
0279     @rtype: C{(cat, msg, text) -> numerr}
0280     """
0281 
0282     def hook (msg, cat):
0283         return _rm_fmtd_in_msg(msg, tick)
0284 
0285     return hook
0286 
0287 
0288 def _literals_spec (msg, cat):
0289 
0290     fname = "literal-segment"
0291     rx_strs = manc_parse_field_values(msg, fname)
0292 
0293     # Compile regexes.
0294     # Empty regex indicates not to do any heuristic removal.
0295     rxs = []
0296     heuristic = True
0297     for rx_str in rx_strs:
0298         if rx_str:
0299             try:
0300                 rxs.append(re.compile(rx_str, re.U|re.S))
0301             except:
0302                 warning_on_msg(_("@info",
0303                                  "Field %(field)s states "
0304                                  "malformed regex '%(re)s'.",
0305                                  field=fname, re=rx_str),
0306                                  msg, cat)
0307         else:
0308             heuristic = False
0309 
0310     return [], rxs, heuristic
0311 
0312 
0313 def _rm_lit_in_text (text, substrs, regexes, heuristic, subs=""):
0314 
0315     return _rm_lit_in_text_single(text, subs=subs, substrs=substrs,
0316                                   regexes=regexes, heuristic=heuristic)
0317 
0318 
0319 def _rm_lit_in_msg (msg, cat, strs, rxs, heu, subs=""):
0320 
0321     msg.msgid = _rm_lit_in_text(msg.msgid, strs, rxs, heu, subs)
0322     if msg.msgid_plural:
0323         msg.msgid_plural = _rm_lit_in_text(msg.msgid_plural,
0324                                            strs, rxs, heu, subs)
0325     for i in range(len(msg.msgstr)):
0326         msg.msgstr[i] = _rm_lit_in_text(msg.msgstr[i], strs, rxs, heu, subs)
0327 
0328     if msg.msgid_previous:
0329         msg.msgid_previous = _rm_lit_in_text(msg.msgid_previous,
0330                                              strs, rxs, heu, subs)
0331     if msg.msgid_plural_previous:
0332         msg.msgid_plural_previous = _rm_lit_in_text(msg.msgid_plural_previous,
0333                                                     strs, rxs, heu, subs)
0334     return 0
0335 
0336 
0337 def remove_literals_text (text, msg, cat):
0338     """
0339     Remove literal segments from one of the text fields of the message
0340     [type F3A hook].
0341 
0342     Literal segments are URLs, email addresses, command line options, etc.
0343     anything symbolic that the machine, rather than human alone, should parse.
0344     Note format directives are excluded here, see L{remove_fmtdirs_text}
0345     for removing them.
0346 
0347     By default, literals are removed heuristically, but this can be influenced
0348     by embedded C{literal-segment} fields in manual comments. For example::
0349 
0350         # literal-segment: foobar
0351 
0352     states that all C{foobar} segments are literals. The field value is
0353     actually a regular expression, and there may be several such fields::
0354 
0355         # literal-segment: \w+bar
0356         # literal-segment: foo[&=] ### a sub comment
0357 
0358     To prevent any heuristic removal of literals, add a C{literal-segment}
0359     field with empty value.
0360 
0361     @return: text
0362 
0363     @see: L{pology.resolve.remove_literals}
0364     """
0365 
0366     strs, rxs, heu = _literals_spec(msg, cat)
0367     return _rm_lit_in_text(text, strs, rxs, heu)
0368 
0369 
0370 def remove_literals_text_tick (tick):
0371     """
0372     Like L{remove_literals_text}, except that each literal segment is
0373     replaced by a non-whitespace "tick" instead of plainly removed
0374     [hook factory].
0375 
0376     @param tick: the tick sequence
0377     @type tick: string
0378 
0379     @return: type F3A hook
0380     @rtype: C{(cat, msg, text) -> text}
0381     """
0382 
0383     def hook (text, msg, cat):
0384         strs, rxs, heu = _literals_spec(msg, cat)
0385         return _rm_lit_in_text(text, strs, rxs, heu, tick)
0386 
0387     return hook
0388 
0389 
0390 def remove_literals_msg (msg, cat):
0391     """
0392     Remove literal segments from all applicable text fields in the message,
0393     as if L{remove_literals_text} was applied to each [type F4A hook].
0394 
0395     @return: number of errors
0396     """
0397 
0398     strs, rxs, heu = _literals_spec(msg, cat)
0399     return _rm_lit_in_msg(msg, cat, strs, rxs, heu)
0400 
0401 
0402 def remove_literals_msg_tick (tick):
0403     """
0404     Remove literal segments from all applicable text fields in the message,
0405     as if L{remove_literals_text_tick} was applied to each [hook factory].
0406 
0407     @param tick: the tick sequence
0408     @type tick: string
0409 
0410     @return: type F4A hook
0411     @rtype: C{(cat, msg, text) -> numerr}
0412     """
0413 
0414     def hook (msg, cat):
0415         strs, rxs, heu = _literals_spec(msg, cat)
0416         return _rm_lit_in_msg(msg, cat, strs, rxs, heu, tick)
0417 
0418     return hook
0419 
0420 
0421 def remove_marlits_text (text, msg, cat):
0422     """
0423     Remove literals by markup from one of the text fields of the message
0424     [type F3A hook].
0425 
0426     An "intersection" of L{remove_markup_text} and L{remove_literals_text},
0427     where literals segments are determined by markup, and both the segment text
0428     and its markup is removed. See documentation of these hooks for notes
0429     on what is considered literal and how markup type is determined.
0430 
0431     @return: text
0432     """
0433 
0434     strs, rxs, heu = [], _marlit_rxs(msg, cat), False
0435     return _rm_lit_in_text(text, strs, rxs, heu)
0436 
0437 
0438 def remove_marlits_msg (msg, cat):
0439     """
0440     Remove literal segments by markup from all applicable text fields in
0441     the message, as if L{remove_marlits_text} was applied to each
0442     [type F4A hook].
0443 
0444     @return: number of errors
0445     """
0446 
0447     strs, rxs, heu = [], _marlit_rxs(msg, cat), False
0448     return _rm_lit_in_msg(msg, cat, strs, rxs, heu)
0449 
0450 
0451 class _Cache: pass
0452 _marlit_cache = _Cache()
0453 _marlit_cache.mtypes = None
0454 _marlit_cache.tags = set()
0455 _marlit_cache.rxs = []
0456 _marlit_cache.acmnt_tag_rx = re.compile(r"^\s*tag:\s*(\w+)\s*$", re.I)
0457 _marlit_cache.rxs_all = [re.compile(r".*", re.S)]
0458 
0459 def _marlit_rxs (msg, cat):
0460 
0461     # Update regex cache due to markup type.
0462     mtypes = cat.markup()
0463     if _marlit_cache.mtypes != mtypes:
0464         _marlit_cache.mtypes = mtypes
0465         _marlit_cache.tags = set()
0466         _marlit_cache.rxs = []
0467         for mtype in mtypes or []:
0468             _marlit_cache.tags.update(_marlit_tags(mtype))
0469             rx = _build_tagged_rx(_marlit_cache.tags)
0470             _marlit_cache.rxs.append(rx)
0471 
0472     # Check if the whole message is under a literal tag.
0473     for acmnt in msg.auto_comment:
0474         m = _marlit_cache.acmnt_tag_rx.search(acmnt)
0475         if m:
0476             tag = m.group(1).strip().lower()
0477             if tag in _marlit_cache.tags:
0478                 return _marlit_cache.rxs_all
0479 
0480     return _marlit_cache.rxs
0481 
0482 
0483 def _marlit_tags (mtype):
0484 
0485     tags = ""
0486     if 0: pass
0487     elif mtype == "html":
0488         tags = """
0489             tt code
0490         """
0491     elif mtype == "kde4":
0492         tags = """
0493             icode bcode filename envar command numid
0494             tt code
0495         """
0496     elif mtype == "qtrich":
0497         tags = """
0498             tt code
0499         """
0500     elif mtype == "kuit":
0501         tags = """
0502             icode bcode filename envar command numid
0503         """
0504     elif mtype == "docbook4" or mtype == "docbook":
0505         tags = """
0506             literal filename envar command option function markup varname
0507             screen programlisting userinput computeroutput
0508         """
0509     elif mtype == "xml":
0510         pass
0511 
0512     return set(tags.split())
0513 
0514 
0515 def _build_tagged_rx (tags):
0516 
0517     if isinstance(tags, str):
0518         tags = tags.split()
0519     # For proper regex matching, tags that begin with a substring
0520     # equal to another full tag must come before that full tag.
0521     # So sort tags first by length, then by alphabet.
0522     tags = sorted(tags, key=lambda x: (-len(x), x))
0523     basetagged_rxsub = r"<\s*(%s)\b[^<]*>.*?<\s*/\s*\1\s*>"
0524     tagged_rx = re.compile(basetagged_rxsub % "|".join(tags), re.I|re.S)
0525 
0526     return tagged_rx
0527 
0528 
0529 def remove_ignored_entities_msg (msg, cat):
0530     """
0531     Remove locally ignored entities from all applicable text fields in
0532     the message [type F4A hook].
0533 
0534     Entities are ignored by listing them in the embedded C{ignore-entities}
0535     fields in manual comments. For example::
0536 
0537         # ignore-entity: foobar, froobaz
0538 
0539     will remove entities C{&foobar;} and C{&froobaz;} from all text fields.
0540 
0541     @return: number of errors
0542     """
0543 
0544     locally_ignored = manc_parse_list(msg, "ignore-entity:", ",")
0545     if not locally_ignored:
0546         return 0
0547 
0548     msg.msgid = _rm_ent_in_text(msg.msgid, locally_ignored)
0549     if msg.msgid_plural:
0550         msg.msgid_plural = _rm_ent_in_text(msg.msgid_plural, locally_ignored)
0551     for i in range(len(msg.msgstr)):
0552         msg.msgstr[i] = _rm_ent_in_text(msg.msgstr[i], locally_ignored)
0553 
0554     return 0
0555 
0556 
0557 def _rm_ent_in_text (text, entities):
0558 
0559     for entity in entities:
0560         text = text.replace("&%s;" % entity, "")
0561 
0562     return text
0563 
0564 
0565 def rewrite_msgid (msg, cat):
0566     """
0567     Rewrite parts of C{msgid} based on translator comments [type F4A hook].
0568 
0569     Translator comments may issue C{rewrite-msgid} directives
0570     to modify parts of C{msgid} (as well as C{msgid_plural}) fields
0571     by applying a search regular expression and replace pattern.
0572     The search and replace pattern are wrapped and separated
0573     by any character consistently used, such as slashes.
0574     Examples::
0575 
0576         # rewrite-msgid: /foo/bar/
0577         # rewrite-msgid: /foo (\\w+) fam/bar \\1 bam/
0578         # rewrite-msgid: :foo/bar:foo/bam:
0579 
0580     If a search pattern is not valid, a warning on message is issued.
0581 
0582     Search pattern is case-sensitive.
0583 
0584     @return: number of errors
0585     """
0586 
0587     nerrors = 0
0588 
0589     # Collect and compile regular expressions.
0590     fname = "rewrite-msgid"
0591     rwspecs = manc_parse_field_values(msg, fname)
0592     rwrxs = []
0593     for rwspec in rwspecs:
0594         sep = rwspec[0:1]
0595         if not sep:
0596             warning_on_msg(_("@info",
0597                              "No patterns in rewrite directive."), msg, cat)
0598             nerrors += 1
0599             continue
0600         lst = rwspec.split(sep)
0601         if len(lst) != 4 or lst[0] or lst[3]:
0602             warning_on_msg(_("@info",
0603                              "Wrongly separated patterns in "
0604                              "rewrite directive '%(dir)s'.", dir=rwspec),
0605                              msg, cat)
0606             nerrors += 1
0607             continue
0608         srch, repl = lst[1], lst[2]
0609         try:
0610             rx = re.compile(srch, re.U)
0611         except:
0612             warning_on_msg(_("@info",
0613                              "Invalid search pattern in "
0614                              "rewrite directive '%(dir)s'.", dir=rwspec),
0615                              msg, cat)
0616             nerrors += 1
0617             continue
0618         rwrxs.append((rx, repl, rwspec))
0619 
0620     for rx, repl, rwspec in rwrxs:
0621         try:
0622             msg.msgid = rx.sub(repl, msg.msgid)
0623             if msg.msgid_plural is not None:
0624                 msg.msgid_plural = rx.sub(repl, msg.msgid_plural)
0625         except:
0626             warning_on_msg(_("@info",
0627                              "Error in application of "
0628                              "rewrite directive '%(dir)s'.", dir=rwspec),
0629                              msg, cat)
0630             nerrors += 1
0631 
0632     return nerrors
0633 
0634 
0635 def rewrite_inverse (msg, cat):
0636     """
0637     Rewrite message by replacing all its elements with that of another message
0638     which has the same C{msgstr[0]} [type F4A hook].
0639 
0640     Translator comments may issue C{rewrite-inverse} directives
0641     to replace all message parts with those from another message
0642     having the same C{msgstr[0]} field.
0643     The argument to the directive is a regular expression search pattern
0644     on C{msgid} and C{msgctxt} (leading and trailing whitespace get stripped)
0645     which is used to select the particular message if more than
0646     one other messages have same C{msgstr[0]}.
0647     Examples::
0648 
0649         # rewrite-inverse:
0650         # rewrite-inverse: Foo
0651 
0652     If the pattern does not match or it matches more than one other message,
0653     current message is not touched; also if the pattern
0654     is left empty and there is more than one other message.
0655     Search pattern is applied to C{msgctxt} and C{msgid} in turn,
0656     and the message is matched if any matches.
0657     Search pattern is case-sensitive.
0658 
0659     If more than one C{rewrite-inverse} directive is seen,
0660     or the search pattern is not valid, a warning on message is issued
0661     and current message is not touched.
0662 
0663     This hook is then executed again on the resulting message,
0664     in case the new translator comments contain another
0665     C{rewrite-inverse} directive.
0666 
0667     @return: number of errors
0668     """
0669 
0670     # Collect and compile regular expressions.
0671     fname = "rewrite-inverse"
0672     rwspecs = manc_parse_field_values(msg, fname)
0673     if not rwspecs:
0674         return 0
0675     if len(rwspecs) > 1:
0676         warning_on_msg(_("@info",
0677                          "More than one inverse rewrite directive "
0678                          "encountered."),
0679                        msg, cat)
0680         return 1
0681 
0682     srch = rwspecs[0]
0683     try:
0684         rx = re.compile(srch, re.U)
0685     except:
0686         warning_on_msg(_("@info",
0687                          "Invalid search pattern '%(pattern)s' in "
0688                          "inverse rewrite directive.", pattern=srch),
0689                        msg, cat)
0690         return 1
0691 
0692     msgs = cat.select_by_msgstr(msg.msgstr[0], lazy=True)
0693     msgs = [x for x in msgs if x.key != msg.key] # remove current
0694     if not msgs:
0695         warning_on_msg(_("@info",
0696                          "There are no other messages with same translation, "
0697                          "needed by inverse rewrite directive."),
0698                        msg, cat)
0699         return 1
0700 
0701     match = lambda x: (   (x.msgctxt is not None and rx.search(x.msgctxt))
0702                        or rx.search(x.msgid))
0703     sel_msgs = [x for x in msgs if match(x)] # remove non-matched
0704     if not sel_msgs:
0705         warning_on_msg(_("@info",
0706                          "Inverse rewrite directive matches none of "
0707                          "the other messages with same translation."),
0708                        msg, cat)
0709         return 1
0710     if len(sel_msgs) > 1:
0711         warning_on_msg(_("@info",
0712                          "Inverse rewrite directive matches more than "
0713                          "one other message with same translation."),
0714                        msg, cat)
0715         return 1
0716 
0717     # Copy all parts of the other message.
0718     omsg = sel_msgs[0]
0719     msg.msgid = omsg.msgid
0720     if msg.msgid_plural is not None and omsg.msgid_plural is not None:
0721         msg.msgid_plural = omsg.msgid_plural
0722 
0723     # Copy comments and recurse.
0724     msg.set(omsg)
0725     nerrors = rewrite_inverse(msg, cat)
0726 
0727     return nerrors
0728 
0729 
0730 _ent_rx = re.compile(r"&[\w.:-]+;", re.U)
0731 
0732 def remove_paired_ents (msg, cat):
0733     """
0734     Remove all XML-like entities from original, and from translation
0735     all that are also found in original [type F4A hook].
0736 
0737     To remove all entities from original, and all entitities from translation
0738     that also exist in original, may be useful prior to markup checks,
0739     when list of known entities is not available.
0740 
0741     @return: number of errors
0742     """
0743 
0744     return _rm_paired_ents(msg, cat)
0745 
0746 
0747 def remove_paired_ents_tick (tick):
0748     """
0749     Like L{remove_paired_ents}, except that each XML-like entity is
0750     replaced by a non-whitespace "tick" instead of plainly removed
0751     [hook factory].
0752 
0753     @param tick: the tick sequence
0754     @type tick: string
0755 
0756     @return: type F3A hook
0757     @rtype: C{(cat, msg, text) -> text}
0758     """
0759 
0760     def hook (msg, cat):
0761         return _rm_paired_ents(msg, cat, tick)
0762 
0763     return hook
0764 
0765 
0766 def _rm_paired_ents (msg, cat, tick=''):
0767 
0768     ents_orig = set()
0769     ents_orig.update(_ent_rx.findall(msg.msgid))
0770     for ent in ents_orig:
0771         msg.msgid = msg.msgid.replace(ent, tick)
0772 
0773     if msg.msgid_plural:
0774         ents_orig.update(_ent_rx.findall(msg.msgid_plural))
0775         for ent in ents_orig:
0776             msg.msgid_plural = msg.msgid_plural.replace(ent, tick)
0777 
0778     for i in range(len(msg.msgstr)):
0779         ents_trans = set(_ent_rx.findall(msg.msgstr[i]))
0780         for ent in ents_trans.intersection(ents_orig):
0781             msg.msgstr[i] = msg.msgstr[i].replace(ent, tick)
0782 
0783     return 0
0784