File indexing completed on 2024-11-10 11:08:53

0001 # -*- coding: UTF-8 -*-
0002 
0003 """
0004 Find messages in catalogs.
0005 
0006 Documented in C{doc/user/sieving.docbook}.
0007 
0008 @author: Chusslove Illich (Часлав Илић) <caslav.ilic@gmx.net>
0009 @license: GPLv3
0010 """
0011 
0012 import locale
0013 import os
0014 import re
0015 import sys
0016 
0017 from pology import _, n_
0018 from pology.message import MessageUnsafe
0019 from pology.remove import remove_accel_msg
0020 from pology.fsops import str_to_unicode
0021 from pology.getfunc import get_hook_ireq
0022 from pology.match import make_msg_matcher, make_matcher, make_filtered_msg
0023 from pology.match import ExprError
0024 from pology.msgreport import report_msg_content
0025 from pology.msgreport import report_msg_to_lokalize
0026 from pology.report import report, error, warning, format_item_list
0027 from pology.sieve import SieveError
0028 from pology.sieve import add_param_poeditors
0029 from functools import reduce
0030 
0031 
0032 def setup_sieve (p):
0033 
0034     p.set_desc(_("@info sieve discription",
0035     "Find messages in catalogs."
0036     "\n\n"
0037     "Each message is matched according to one or several criteria, "
0038     "and if it matches as whole, it is displayed to standard output, "
0039     "along with the catalog path and referent line and entry number."
0040     "\n\n"
0041     "When several matching parameters are given, by default a message "
0042     "is matched if all of them match (AND-relation). "
0043     "This can be changed to OR-relation for matching in text fields "
0044     "(%(fieldlist)s) using the '%(par)s' parameter. "
0045     "Any matching parameter can be repeated when it makes sense "
0046     "(e.g. two matches on msgid).",
0047     fieldlist=format_item_list(["msgctxt", "msgid", "msgstr", "comment"]),
0048     par="or"
0049     ))
0050 
0051     # NOTE: Do not add default values for matchers,
0052     # we need None to see if they were issued or not.
0053     p.add_param("msgid", str, multival=True,
0054                 metavar=_("@info sieve parameter value placeholder", "REGEX"),
0055                 desc=_("@info sieve parameter discription",
0056     "Matches if the '%(field)s' field matches the regular expression.",
0057     field="msgid"
0058     ))
0059     p.add_param("nmsgid", str, multival=True,
0060                 metavar=_("@info sieve parameter value placeholder", "REGEX"),
0061                 desc=_("@info sieve parameter discription",
0062     "Matches if the '%(field)s' field does not match the regular expression.",
0063     field="msgid"
0064     ))
0065     p.add_param("msgstr", str, multival=True,
0066                 metavar=_("@info sieve parameter value placeholder", "REGEX"),
0067                 desc=_("@info sieve parameter discription",
0068     "Matches if the '%(field)s' field matches the regular expression.",
0069     field="msgstr"
0070     ))
0071     p.add_param("nmsgstr", str, multival=True,
0072                 metavar=_("@info sieve parameter value placeholder", "REGEX"),
0073                 desc=_("@info sieve parameter discription",
0074     "Matches if the '%(field)s' field does not match the regular expression.",
0075     field="msgstr"
0076     ))
0077     p.add_param("msgctxt", str, multival=True,
0078                 metavar=_("@info sieve parameter value placeholder", "REGEX"),
0079                 desc=_("@info sieve parameter discription",
0080     "Matches if the '%(field)s' field matches the regular expression.",
0081     field="msgctxt"
0082     ))
0083     p.add_param("nmsgctxt", str, multival=True,
0084                 metavar=_("@info sieve parameter value placeholder", "REGEX"),
0085                 desc=_("@info sieve parameter discription",
0086     "Matches if the '%(field)s' field does not match the regular expression.",
0087     field="msgctxt"
0088     ))
0089     p.add_param("comment", str, multival=True,
0090                 metavar=_("@info sieve parameter value placeholder", "REGEX"),
0091                 desc=_("@info sieve parameter discription",
0092     "Matches if a comment line (extracted or translator) "
0093     "matches the regular expression."
0094     ))
0095     p.add_param("ncomment", str, multival=True,
0096                 metavar=_("@info sieve parameter value placeholder", "REGEX"),
0097                 desc=_("@info sieve parameter discription",
0098     "Matches if a comment line (extracted or translator) "
0099     "does not match the regular expression."
0100     ))
0101     p.add_param("transl", bool,
0102                 desc=_("@info sieve parameter discription",
0103     "Matches if the message is translated."
0104     ))
0105     p.add_param("ntransl", bool,
0106                 desc=_("@info sieve parameter discription",
0107     "Matches if the message is not translated."
0108     ))
0109     p.add_param("obsol", bool,
0110                 desc=_("@info sieve parameter discription",
0111     "Matches if the message is obsolete."
0112     ))
0113     p.add_param("nobsol", bool,
0114                 desc=_("@info sieve parameter discription",
0115     "Matches if the message is not obsolete."
0116     ))
0117     p.add_param("active", bool,
0118                 desc=_("@info sieve parameter discription",
0119     "Matches if the message is active (translated and not obsolete)."
0120     ))
0121     p.add_param("nactive", bool,
0122                 desc=_("@info sieve parameter discription",
0123     "Matches if the message is not active (not translated or obsolete)."
0124     ))
0125     p.add_param("flag", str, multival=True,
0126                 metavar=_("@info sieve parameter value placeholder", "REGEX"),
0127                 desc=_("@info sieve parameter discription",
0128     "Matches if one of the flags matches the regular expression."
0129     ))
0130     p.add_param("nflag", str, multival=True,
0131                 metavar=_("@info sieve parameter value placeholder", "REGEX"),
0132                 desc=_("@info sieve parameter discription",
0133     "Matches if none of the flags matches the regular expression."
0134     ))
0135     p.add_param("plural", bool,
0136                 desc=_("@info sieve parameter discription",
0137     "Matches if the message is plural."
0138     ))
0139     p.add_param("nplural", bool,
0140                 desc=_("@info sieve parameter discription",
0141     "Matches if the message is not plural."
0142     ))
0143     p.add_param("maxchar", int,
0144                 metavar=_("@info sieve parameter value placeholder", "NUM"),
0145                 desc=_("@info sieve parameter discription",
0146     "Matches if both the '%(field1)s' and '%(field2)s' field "
0147     "have at most this many characters "
0148     "(0 or less means any number of characters).",
0149     field1="msgid", field2="msgstr"
0150     ))
0151     p.add_param("nmaxchar", int,
0152                 metavar=_("@info sieve parameter value placeholder", "NUM"),
0153                 desc=_("@info sieve parameter discription",
0154     "Matches if either the '%(field1)s' or '%(field2)s' field "
0155     "have more than this many characters "
0156     "(0 or less means any number of characters).",
0157     field1="msgid", field2="msgstr"
0158     ))
0159     p.add_param("lspan", str,
0160                 metavar=_("@info sieve parameter value placeholder",
0161                           "START:END"),
0162                 desc=_("@info sieve parameter discription",
0163     "Matches if the message line number is in the given range "
0164     "(including starting line, excluding ending line)."
0165     ))
0166     p.add_param("nlspan", str,
0167                 metavar=_("@info sieve parameter value placeholder",
0168                           "START:END"),
0169                 desc=_("@info sieve parameter discription",
0170     "Matches if the message line number is not in the given range "
0171     "(including starting line, excluding ending line)."
0172     ))
0173     p.add_param("espan", str,
0174                 metavar=_("@info sieve parameter value placeholder",
0175                           "START:END"),
0176                 desc=_("@info sieve parameter discription",
0177     "Matches if the message entry number is in the given range "
0178     "(including starting entry, excluding ending entry)."
0179     ))
0180     p.add_param("nespan", str,
0181                 metavar=_("@info sieve parameter value placeholder",
0182                           "START:END"),
0183                 desc=_("@info sieve parameter discription",
0184     "Matches if the message entry number is not in the given range "
0185     "(including starting entry, excluding ending entry)."
0186     ))
0187     p.add_param("branch", str, seplist=True,
0188                 metavar=_("@info sieve parameter value placeholder", "BRANCH"),
0189                 desc=_("@info sieve parameter discription",
0190     "In summit catalogs, match only messages belonging to given branch. "
0191     "Several branches can be given as comma-separated list."
0192     ))
0193     p.add_param("nbranch", str, seplist=True,
0194                 metavar=_("@info sieve parameter value placeholder", "BRANCH"),
0195                 desc=_("@info sieve parameter discription",
0196     "Match only messages not belonging to given branch."
0197     ))
0198     p.add_param("fexpr", str,
0199                 metavar=_("@info sieve parameter value placeholder",
0200                           "EXPRESSION"),
0201                 desc=_("@info sieve parameter discription",
0202     "Matches if the logical expression matches. "
0203     "The expression is composed of direct matchers (not starting with n*), "
0204     "explicitly linked with AND, OR, and NOT operators, and parenthesis. "
0205     "Base matchers taking parameters are given as MATCHER/VALUE/, "
0206     "where slash can be replaced consistently with any other character. "
0207     "Global matching modifiers can be overriden using MATCHER/VALUE/MODS, or "
0208     "MATCHER/MODS for parameterless matchers "
0209     "(currently available: c/i for case-sensitive/insensitive). "
0210     "Examples:"
0211     "\n\n"
0212     "fexpr:'(msgctxt/foo/ or comment/foo/) and msgid/bar/'"
0213     "\n\n"
0214     "fexpr:'msgid/quuk/ and msgstr/Qaak/c'"
0215     ))
0216     p.add_param("nfexpr", str,
0217                 metavar=_("@info sieve parameter value placeholder",
0218                           "EXPRESSION"),
0219                 desc=_("@info sieve parameter discription",
0220     "Matches if the logical expression does not match."
0221     ))
0222     p.add_param("or", bool, defval=False, attrname="or_match",
0223                 desc=_("@info sieve parameter discription",
0224     "Use OR-relation for matching text fields: if any of "
0225     "the patterns matches, the message is matched as whole."
0226     ))
0227     p.add_param("invert", bool, defval=False,
0228                 desc=_("@info sieve parameter discription",
0229     "Invert the condition: report messages which do not match."
0230     ))
0231     p.add_param("case", bool, defval=False,
0232                 desc=_("@info sieve parameter discription",
0233     "Case-sensitive text matching."
0234     ))
0235     p.add_param("accel", str, multival=True,
0236                 metavar=_("@info sieve parameter value placeholder", "CHAR"),
0237                 desc=_("@info sieve parameter discription",
0238     "Character which is used as UI accelerator marker in text fields, "
0239     "to ignore it on matching. "
0240     "If a catalog defines accelerator marker in the header, "
0241     "this value overrides it."
0242     ))
0243     p.add_param("mark", bool, defval=False,
0244                 desc=_("@info sieve parameter discription",
0245     "Add '%(flag)s' flag to each matched message.",
0246     flag=_flag_mark
0247     ))
0248     p.add_param("filter", str, multival=True,
0249                 metavar=_("@info sieve parameter value placeholder", "HOOK"),
0250                 desc=_("@info sieve parameter discription",
0251     "F1A hook specification, to filter the msgstr fields through "
0252     "before matching them. "
0253     "Several hooks can be specified by repeating the parameter."
0254     ))
0255     p.add_param("replace", str,
0256                 metavar=_("@info sieve parameter value placeholder",
0257                           "REPLSTR"),
0258                 desc=_("@info sieve parameter discription",
0259     "Replace all substrings matched by msgstr pattern with REPLSTR. "
0260     "It can include back-references to matched groups (\\1, \\2, etc.)"
0261     ))
0262     p.add_param("nomsg", bool, defval=False,
0263                 desc=_("@info sieve parameter discription",
0264     "Do not report message to standard output "
0265     "(when only the number of matches is wanted)."
0266     ))
0267     add_param_poeditors(p)
0268 
0269 
0270 _flag_mark = "match"
0271 
0272 
0273 class Sieve (object):
0274 
0275 
0276     def __init__ (self, params):
0277 
0278         self.nmatch = 0
0279 
0280         self.p = params
0281 
0282         # Build matching function.
0283         # It takes as arguments: filtered message, message, catalog,
0284         # and highlight specification (which is filled on matches).
0285 
0286         def make_match_group (names, negatable=False, orlinked=False):
0287 
0288             names_negs = [(x, False) for x in names]
0289             if negatable:
0290                 names_negs.extend([(x, True) for x in names])
0291 
0292             matchers = []
0293             for name, neg in names_negs:
0294                 nname = name
0295                 if neg:
0296                     nname = "n" + name
0297                 values = getattr(params, nname)
0298                 if values is None: # parameter not given
0299                     continue
0300                 if not isinstance(values, list):
0301                     values = [values]
0302                 for value in values:
0303                     try:
0304                         if name == "fexpr":
0305                             m = make_msg_matcher(value, params)
0306                         else:
0307                             m = make_matcher(name, value, [], params, neg)
0308                     except ExprError as e:
0309                         raise SieveError(str_to_unicode(str(e)))
0310                     matchers.append(m)
0311 
0312             if orlinked:
0313                 expr = lambda *a: reduce(lambda s, m: s or m(*a),
0314                                          matchers, False)
0315             else:
0316                 expr = lambda *a: reduce(lambda s, m: s and m(*a),
0317                                          matchers, True)
0318             return expr
0319 
0320         # - first matchers which are always AND
0321         expr_and = make_match_group([
0322             "transl", "obsol", "active", "plural", "maxchar", "lspan", "espan",
0323             "flag", "branch",
0324         ], negatable=True, orlinked=False)
0325 
0326         # - then matchers which can be AND or OR
0327         expr_andor = make_match_group([
0328             "msgctxt", "msgid", "msgstr", "comment",
0329             "fexpr",
0330         ], negatable=True, orlinked=self.p.or_match)
0331 
0332         # - all together
0333         self.matcher = lambda *a: expr_and(*a) and expr_andor(*a)
0334 
0335         # Prepare replacement.
0336         self.replrxs = []
0337         if self.p.replace is not None:
0338             if not self.p.msgstr:
0339                 raise SieveError(
0340                     _("@info",
0341                       "Cannot perform replacement if match "
0342                       "on '%(field)s' is not given.",
0343                       field="msgstr"))
0344             rxflags = re.U
0345             if not self.p.case:
0346                 rxflags |= re.I
0347             for rxstr in self.p.msgstr:
0348                 self.replrxs.append(re.compile(rxstr, rxflags))
0349 
0350         # Resolve filtering hooks.
0351         self.pfilters = []
0352         for hreq in self.p.filter or []:
0353             self.pfilters.append(get_hook_ireq(hreq, abort=True))
0354 
0355         # Unless replacement or marking requested, no need to monitor/sync.
0356         if self.p.replace is None and not self.p.mark:
0357             self.caller_sync = False
0358             self.caller_monitored = False
0359 
0360 
0361     def process_header (self, hdr, cat):
0362 
0363         # Force explicitly given accelerators.
0364         if self.p.accel is not None:
0365             cat.set_accelerator(self.p.accel)
0366 
0367 
0368     def process (self, msg, cat):
0369         """
0370         Returns 0 if the message is matched, 1 otherwise.
0371         """
0372 
0373         # Prepare filtered message for matching.
0374         msgf = make_filtered_msg(msg, cat, filters=self.pfilters)
0375 
0376         # Match the message.
0377         hl_spec = []
0378         match = self.matcher(msgf, msg, cat, hl_spec)
0379         if self.p.invert:
0380             match = not match
0381 
0382         if match:
0383             self.nmatch += 1
0384 
0385             # Do the replacement in translation if requested.
0386             # NOTE: Use the real, not the filtered message.
0387             for regex in self.replrxs:
0388                 for i in range(len(msg.msgstr)):
0389                     msg.msgstr[i] = regex.sub(self.p.replace, msg.msgstr[i])
0390 
0391             if not self.p.nomsg:
0392                 delim = "-" * 20
0393                 if self.nmatch == 1:
0394                     report(delim)
0395                 report_msg_content(msg, cat, wrapf=cat.wrapf(), force=True,
0396                                    delim=delim, highlight=hl_spec)
0397 
0398             if self.p.mark:
0399                 msg.flag.add(_flag_mark)
0400 
0401             if self.p.lokalize:
0402                 report_msg_to_lokalize(msg, cat)
0403 
0404         elif self.p.mark and _flag_mark in msg.flag:
0405             # Remove the flag if present but the message does not match.
0406             msg.flag.remove(_flag_mark)
0407 
0408         return 0 if match else 1
0409 
0410 
0411     def finalize (self):
0412 
0413         if self.nmatch:
0414             msg = n_("@info:progress",
0415                      "Found %(num)d message satisfying the conditions.",
0416                      "Found %(num)d messages satisfying the conditions.",
0417                      num=self.nmatch)
0418             report("===== " + msg)
0419