File indexing completed on 2024-12-08 08:11:28
0001 # -*- coding: UTF-8 -*- 0002 0003 """ 0004 Find messages in catalogs. 0005 0006 Documented in C{doc/user/sieving.docbook}. 0007 0008 @author: Chusslove Illich (Часлав Илић) <caslav.ilic@gmx.net> 0009 @license: GPLv3 0010 """ 0011 0012 import locale 0013 import os 0014 import re 0015 import sys 0016 0017 from pology import _, n_ 0018 from pology.message import MessageUnsafe 0019 from pology.remove import remove_accel_msg 0020 from pology.fsops import str_to_unicode 0021 from pology.getfunc import get_hook_ireq 0022 from pology.match import make_msg_matcher, make_matcher, make_filtered_msg 0023 from pology.match import ExprError 0024 from pology.msgreport import report_msg_content 0025 from pology.msgreport import report_msg_to_lokalize 0026 from pology.report import report, error, warning, format_item_list 0027 from pology.sieve import SieveError 0028 from pology.sieve import add_param_poeditors 0029 from functools import reduce 0030 0031 0032 def setup_sieve (p): 0033 0034 p.set_desc(_("@info sieve discription", 0035 "Find messages in catalogs." 0036 "\n\n" 0037 "Each message is matched according to one or several criteria, " 0038 "and if it matches as whole, it is displayed to standard output, " 0039 "along with the catalog path and referent line and entry number." 0040 "\n\n" 0041 "When several matching parameters are given, by default a message " 0042 "is matched if all of them match (AND-relation). " 0043 "This can be changed to OR-relation for matching in text fields " 0044 "(%(fieldlist)s) using the '%(par)s' parameter. " 0045 "Any matching parameter can be repeated when it makes sense " 0046 "(e.g. two matches on msgid).", 0047 fieldlist=format_item_list(["msgctxt", "msgid", "msgstr", "comment"]), 0048 par="or" 0049 )) 0050 0051 # NOTE: Do not add default values for matchers, 0052 # we need None to see if they were issued or not. 0053 p.add_param("msgid", str, multival=True, 0054 metavar=_("@info sieve parameter value placeholder", "REGEX"), 0055 desc=_("@info sieve parameter discription", 0056 "Matches if the '%(field)s' field matches the regular expression.", 0057 field="msgid" 0058 )) 0059 p.add_param("nmsgid", str, multival=True, 0060 metavar=_("@info sieve parameter value placeholder", "REGEX"), 0061 desc=_("@info sieve parameter discription", 0062 "Matches if the '%(field)s' field does not match the regular expression.", 0063 field="msgid" 0064 )) 0065 p.add_param("msgstr", str, multival=True, 0066 metavar=_("@info sieve parameter value placeholder", "REGEX"), 0067 desc=_("@info sieve parameter discription", 0068 "Matches if the '%(field)s' field matches the regular expression.", 0069 field="msgstr" 0070 )) 0071 p.add_param("nmsgstr", str, multival=True, 0072 metavar=_("@info sieve parameter value placeholder", "REGEX"), 0073 desc=_("@info sieve parameter discription", 0074 "Matches if the '%(field)s' field does not match the regular expression.", 0075 field="msgstr" 0076 )) 0077 p.add_param("msgctxt", str, multival=True, 0078 metavar=_("@info sieve parameter value placeholder", "REGEX"), 0079 desc=_("@info sieve parameter discription", 0080 "Matches if the '%(field)s' field matches the regular expression.", 0081 field="msgctxt" 0082 )) 0083 p.add_param("nmsgctxt", str, multival=True, 0084 metavar=_("@info sieve parameter value placeholder", "REGEX"), 0085 desc=_("@info sieve parameter discription", 0086 "Matches if the '%(field)s' field does not match the regular expression.", 0087 field="msgctxt" 0088 )) 0089 p.add_param("comment", str, multival=True, 0090 metavar=_("@info sieve parameter value placeholder", "REGEX"), 0091 desc=_("@info sieve parameter discription", 0092 "Matches if a comment line (extracted or translator) " 0093 "matches the regular expression." 0094 )) 0095 p.add_param("ncomment", str, multival=True, 0096 metavar=_("@info sieve parameter value placeholder", "REGEX"), 0097 desc=_("@info sieve parameter discription", 0098 "Matches if a comment line (extracted or translator) " 0099 "does not match the regular expression." 0100 )) 0101 p.add_param("transl", bool, 0102 desc=_("@info sieve parameter discription", 0103 "Matches if the message is translated." 0104 )) 0105 p.add_param("ntransl", bool, 0106 desc=_("@info sieve parameter discription", 0107 "Matches if the message is not translated." 0108 )) 0109 p.add_param("obsol", bool, 0110 desc=_("@info sieve parameter discription", 0111 "Matches if the message is obsolete." 0112 )) 0113 p.add_param("nobsol", bool, 0114 desc=_("@info sieve parameter discription", 0115 "Matches if the message is not obsolete." 0116 )) 0117 p.add_param("active", bool, 0118 desc=_("@info sieve parameter discription", 0119 "Matches if the message is active (translated and not obsolete)." 0120 )) 0121 p.add_param("nactive", bool, 0122 desc=_("@info sieve parameter discription", 0123 "Matches if the message is not active (not translated or obsolete)." 0124 )) 0125 p.add_param("flag", str, multival=True, 0126 metavar=_("@info sieve parameter value placeholder", "REGEX"), 0127 desc=_("@info sieve parameter discription", 0128 "Matches if one of the flags matches the regular expression." 0129 )) 0130 p.add_param("nflag", str, multival=True, 0131 metavar=_("@info sieve parameter value placeholder", "REGEX"), 0132 desc=_("@info sieve parameter discription", 0133 "Matches if none of the flags matches the regular expression." 0134 )) 0135 p.add_param("plural", bool, 0136 desc=_("@info sieve parameter discription", 0137 "Matches if the message is plural." 0138 )) 0139 p.add_param("nplural", bool, 0140 desc=_("@info sieve parameter discription", 0141 "Matches if the message is not plural." 0142 )) 0143 p.add_param("maxchar", int, 0144 metavar=_("@info sieve parameter value placeholder", "NUM"), 0145 desc=_("@info sieve parameter discription", 0146 "Matches if both the '%(field1)s' and '%(field2)s' field " 0147 "have at most this many characters " 0148 "(0 or less means any number of characters).", 0149 field1="msgid", field2="msgstr" 0150 )) 0151 p.add_param("nmaxchar", int, 0152 metavar=_("@info sieve parameter value placeholder", "NUM"), 0153 desc=_("@info sieve parameter discription", 0154 "Matches if either the '%(field1)s' or '%(field2)s' field " 0155 "have more than this many characters " 0156 "(0 or less means any number of characters).", 0157 field1="msgid", field2="msgstr" 0158 )) 0159 p.add_param("lspan", str, 0160 metavar=_("@info sieve parameter value placeholder", 0161 "START:END"), 0162 desc=_("@info sieve parameter discription", 0163 "Matches if the message line number is in the given range " 0164 "(including starting line, excluding ending line)." 0165 )) 0166 p.add_param("nlspan", str, 0167 metavar=_("@info sieve parameter value placeholder", 0168 "START:END"), 0169 desc=_("@info sieve parameter discription", 0170 "Matches if the message line number is not in the given range " 0171 "(including starting line, excluding ending line)." 0172 )) 0173 p.add_param("espan", str, 0174 metavar=_("@info sieve parameter value placeholder", 0175 "START:END"), 0176 desc=_("@info sieve parameter discription", 0177 "Matches if the message entry number is in the given range " 0178 "(including starting entry, excluding ending entry)." 0179 )) 0180 p.add_param("nespan", str, 0181 metavar=_("@info sieve parameter value placeholder", 0182 "START:END"), 0183 desc=_("@info sieve parameter discription", 0184 "Matches if the message entry number is not in the given range " 0185 "(including starting entry, excluding ending entry)." 0186 )) 0187 p.add_param("branch", str, seplist=True, 0188 metavar=_("@info sieve parameter value placeholder", "BRANCH"), 0189 desc=_("@info sieve parameter discription", 0190 "In summit catalogs, match only messages belonging to given branch. " 0191 "Several branches can be given as comma-separated list." 0192 )) 0193 p.add_param("nbranch", str, seplist=True, 0194 metavar=_("@info sieve parameter value placeholder", "BRANCH"), 0195 desc=_("@info sieve parameter discription", 0196 "Match only messages not belonging to given branch." 0197 )) 0198 p.add_param("fexpr", str, 0199 metavar=_("@info sieve parameter value placeholder", 0200 "EXPRESSION"), 0201 desc=_("@info sieve parameter discription", 0202 "Matches if the logical expression matches. " 0203 "The expression is composed of direct matchers (not starting with n*), " 0204 "explicitly linked with AND, OR, and NOT operators, and parenthesis. " 0205 "Base matchers taking parameters are given as MATCHER/VALUE/, " 0206 "where slash can be replaced consistently with any other character. " 0207 "Global matching modifiers can be overriden using MATCHER/VALUE/MODS, or " 0208 "MATCHER/MODS for parameterless matchers " 0209 "(currently available: c/i for case-sensitive/insensitive). " 0210 "Examples:" 0211 "\n\n" 0212 "fexpr:'(msgctxt/foo/ or comment/foo/) and msgid/bar/'" 0213 "\n\n" 0214 "fexpr:'msgid/quuk/ and msgstr/Qaak/c'" 0215 )) 0216 p.add_param("nfexpr", str, 0217 metavar=_("@info sieve parameter value placeholder", 0218 "EXPRESSION"), 0219 desc=_("@info sieve parameter discription", 0220 "Matches if the logical expression does not match." 0221 )) 0222 p.add_param("or", bool, defval=False, attrname="or_match", 0223 desc=_("@info sieve parameter discription", 0224 "Use OR-relation for matching text fields: if any of " 0225 "the patterns matches, the message is matched as whole." 0226 )) 0227 p.add_param("invert", bool, defval=False, 0228 desc=_("@info sieve parameter discription", 0229 "Invert the condition: report messages which do not match." 0230 )) 0231 p.add_param("case", bool, defval=False, 0232 desc=_("@info sieve parameter discription", 0233 "Case-sensitive text matching." 0234 )) 0235 p.add_param("accel", str, multival=True, 0236 metavar=_("@info sieve parameter value placeholder", "CHAR"), 0237 desc=_("@info sieve parameter discription", 0238 "Character which is used as UI accelerator marker in text fields, " 0239 "to ignore it on matching. " 0240 "If a catalog defines accelerator marker in the header, " 0241 "this value overrides it." 0242 )) 0243 p.add_param("mark", bool, defval=False, 0244 desc=_("@info sieve parameter discription", 0245 "Add '%(flag)s' flag to each matched message.", 0246 flag=_flag_mark 0247 )) 0248 p.add_param("filter", str, multival=True, 0249 metavar=_("@info sieve parameter value placeholder", "HOOK"), 0250 desc=_("@info sieve parameter discription", 0251 "F1A hook specification, to filter the msgstr fields through " 0252 "before matching them. " 0253 "Several hooks can be specified by repeating the parameter." 0254 )) 0255 p.add_param("replace", str, 0256 metavar=_("@info sieve parameter value placeholder", 0257 "REPLSTR"), 0258 desc=_("@info sieve parameter discription", 0259 "Replace all substrings matched by msgstr pattern with REPLSTR. " 0260 "It can include back-references to matched groups (\\1, \\2, etc.)" 0261 )) 0262 p.add_param("nomsg", bool, defval=False, 0263 desc=_("@info sieve parameter discription", 0264 "Do not report message to standard output " 0265 "(when only the number of matches is wanted)." 0266 )) 0267 add_param_poeditors(p) 0268 0269 0270 _flag_mark = "match" 0271 0272 0273 class Sieve (object): 0274 0275 0276 def __init__ (self, params): 0277 0278 self.nmatch = 0 0279 0280 self.p = params 0281 0282 # Build matching function. 0283 # It takes as arguments: filtered message, message, catalog, 0284 # and highlight specification (which is filled on matches). 0285 0286 def make_match_group (names, negatable=False, orlinked=False): 0287 0288 names_negs = [(x, False) for x in names] 0289 if negatable: 0290 names_negs.extend([(x, True) for x in names]) 0291 0292 matchers = [] 0293 for name, neg in names_negs: 0294 nname = name 0295 if neg: 0296 nname = "n" + name 0297 values = getattr(params, nname) 0298 if values is None: # parameter not given 0299 continue 0300 if not isinstance(values, list): 0301 values = [values] 0302 for value in values: 0303 try: 0304 if name == "fexpr": 0305 m = make_msg_matcher(value, params) 0306 else: 0307 m = make_matcher(name, value, [], params, neg) 0308 except ExprError as e: 0309 raise SieveError(str_to_unicode(str(e))) 0310 matchers.append(m) 0311 0312 if orlinked: 0313 expr = lambda *a: reduce(lambda s, m: s or m(*a), 0314 matchers, False) 0315 else: 0316 expr = lambda *a: reduce(lambda s, m: s and m(*a), 0317 matchers, True) 0318 return expr 0319 0320 # - first matchers which are always AND 0321 expr_and = make_match_group([ 0322 "transl", "obsol", "active", "plural", "maxchar", "lspan", "espan", 0323 "flag", "branch", 0324 ], negatable=True, orlinked=False) 0325 0326 # - then matchers which can be AND or OR 0327 expr_andor = make_match_group([ 0328 "msgctxt", "msgid", "msgstr", "comment", 0329 "fexpr", 0330 ], negatable=True, orlinked=self.p.or_match) 0331 0332 # - all together 0333 self.matcher = lambda *a: expr_and(*a) and expr_andor(*a) 0334 0335 # Prepare replacement. 0336 self.replrxs = [] 0337 if self.p.replace is not None: 0338 if not self.p.msgstr: 0339 raise SieveError( 0340 _("@info", 0341 "Cannot perform replacement if match " 0342 "on '%(field)s' is not given.", 0343 field="msgstr")) 0344 rxflags = re.U 0345 if not self.p.case: 0346 rxflags |= re.I 0347 for rxstr in self.p.msgstr: 0348 self.replrxs.append(re.compile(rxstr, rxflags)) 0349 0350 # Resolve filtering hooks. 0351 self.pfilters = [] 0352 for hreq in self.p.filter or []: 0353 self.pfilters.append(get_hook_ireq(hreq, abort=True)) 0354 0355 # Unless replacement or marking requested, no need to monitor/sync. 0356 if self.p.replace is None and not self.p.mark: 0357 self.caller_sync = False 0358 self.caller_monitored = False 0359 0360 0361 def process_header (self, hdr, cat): 0362 0363 # Force explicitly given accelerators. 0364 if self.p.accel is not None: 0365 cat.set_accelerator(self.p.accel) 0366 0367 0368 def process (self, msg, cat): 0369 """ 0370 Returns 0 if the message is matched, 1 otherwise. 0371 """ 0372 0373 # Prepare filtered message for matching. 0374 msgf = make_filtered_msg(msg, cat, filters=self.pfilters) 0375 0376 # Match the message. 0377 hl_spec = [] 0378 match = self.matcher(msgf, msg, cat, hl_spec) 0379 if self.p.invert: 0380 match = not match 0381 0382 if match: 0383 self.nmatch += 1 0384 0385 # Do the replacement in translation if requested. 0386 # NOTE: Use the real, not the filtered message. 0387 for regex in self.replrxs: 0388 for i in range(len(msg.msgstr)): 0389 msg.msgstr[i] = regex.sub(self.p.replace, msg.msgstr[i]) 0390 0391 if not self.p.nomsg: 0392 delim = "-" * 20 0393 if self.nmatch == 1: 0394 report(delim) 0395 report_msg_content(msg, cat, wrapf=cat.wrapf(), force=True, 0396 delim=delim, highlight=hl_spec) 0397 0398 if self.p.mark: 0399 msg.flag.add(_flag_mark) 0400 0401 if self.p.lokalize: 0402 report_msg_to_lokalize(msg, cat) 0403 0404 elif self.p.mark and _flag_mark in msg.flag: 0405 # Remove the flag if present but the message does not match. 0406 msg.flag.remove(_flag_mark) 0407 0408 return 0 if match else 1 0409 0410 0411 def finalize (self): 0412 0413 if self.nmatch: 0414 msg = n_("@info:progress", 0415 "Found %(num)d message satisfying the conditions.", 0416 "Found %(num)d messages satisfying the conditions.", 0417 num=self.nmatch) 0418 report("===== " + msg) 0419