Warning, file /sdk/pology/sieve/check_rules.py was not indexed or was modified since last indexation (in which case cross-reference links may be missing, inaccurate or erroneous).
0001 # -*- coding: UTF-8 -*- 0002 0003 """ 0004 Apply language- and project-dependent validation rules to messages. 0005 0006 Documented in C{doc/user/sieving.docbook}. 0007 0008 @author: Sébastien Renard <sebastien.renard@digitalfox.org> 0009 @author: Chusslove Illich <caslav.ilic@gmx.net> 0010 @license: GPLv3 0011 """ 0012 0013 from codecs import open 0014 import os 0015 from os.path import abspath, basename, dirname, exists, expandvars, join 0016 import re 0017 import sys 0018 from time import strftime, strptime, mktime 0019 0020 from pology import _, n_ 0021 from pology.colors import cjoin 0022 from pology.comments import manc_parse_list, parse_summit_branches 0023 from pology.fsops import collect_files_by_ext 0024 from pology.message import MessageUnsafe 0025 from pology.msgreport import multi_rule_error, rule_xml_error 0026 from pology.msgreport import report_msg_to_lokalize 0027 from pology.report import report, warning, format_item_list 0028 from pology.rules import loadRules, printStat 0029 from pology.sieve import add_param_lang, add_param_env, add_param_poeditors 0030 from pology.timeout import TimedOutException 0031 from pology.sieve import SieveError, SieveCatalogError, SieveMessageError 0032 from functools import reduce 0033 0034 0035 # Pattern used to marshall path of cached files 0036 _MARSHALL = "+++" 0037 # Cache directory (for xml processing only) 0038 # FIXME: More portable location of cache. 0039 _CACHEDIR = expandvars("$HOME/.pology-check_rules-cache/") 0040 0041 # Flag to add to failed messages, if requested. 0042 _flag_mark = "failed-rule" 0043 0044 0045 def setup_sieve (p): 0046 0047 p.set_desc(_("@info sieve discription", 0048 "Apply rules to messages and report those that do not pass." 0049 )) 0050 0051 add_param_lang(p, appx=_("@info sieve parameter discription", 0052 "If the language is left undefined for a given catalog, " 0053 "it will be skipped and a warning may be output." 0054 )) 0055 add_param_env(p, appx=_("@info sieve parameter discription", 0056 "If the environment is left undefined for a given catalog, " 0057 "only environment-agnostic rules will be applied." 0058 )) 0059 p.add_param("stat", bool, defval=False, 0060 desc=_("@info sieve parameter discription", 0061 "Output statistics on application of rules." 0062 )) 0063 p.add_param("envonly", bool, defval=False, 0064 desc=_("@info sieve parameter discription", 0065 "Load only rules explicitly belonging to environment given by '%(par)s'.", 0066 par="env" 0067 )) 0068 p.add_param("accel", str, multival=True, 0069 metavar=_("@info sieve parameter value placeholder", "CHAR"), 0070 desc=_("@info sieve parameter discription", 0071 "Character which is used as UI accelerator marker in text fields. " 0072 "If a catalog defines accelerator marker in the header, " 0073 "this value overrides it." 0074 )) 0075 p.add_param("markup", str, seplist=True, 0076 metavar=_("@info sieve parameter value placeholder", "KEYWORD"), 0077 desc=_("@info sieve parameter discription", 0078 "Markup that can be expected in text fields, as special keyword " 0079 "(see documentation to pology.catalog, Catalog.set_markup(), " 0080 "for markup keywords currently known to Pology). " 0081 "If a catalog defines markup type in the header, " 0082 "this value overrides it." 0083 "Several markups can be given as comma-separated list." 0084 )) 0085 p.add_param("rfile", str, multival=True, 0086 metavar=_("@info sieve parameter value placeholder", "PATH"), 0087 desc=_("@info sieve parameter discription", 0088 "Load rules from a file, rather than internal Pology rules. " 0089 "Several rule files can be given by repeating the parameter." 0090 )) 0091 p.add_param("rdir", str, multival=True, 0092 metavar=_("@info sieve parameter value placeholder", "DIRPATH"), 0093 desc=_("@info sieve parameter discription", 0094 "Load rules from a directory, rather than internal Pology rules." 0095 "Several rule directories can be given by repeating the parameter." 0096 )) 0097 p.add_param("showfmsg", bool, defval=False, 0098 desc=_("@info sieve parameter discription", 0099 "Show filtered message too when reporting message failed by a rule." 0100 )) 0101 p.add_param("nomsg", bool, attrname="showmsg", defval=True, 0102 desc=_("@info sieve parameter discription", 0103 "Do not show message content at all when reporting failures." 0104 )) 0105 p.add_param("rule", str, seplist=True, 0106 metavar=_("@info sieve parameter value placeholder", "RULEID"), 0107 desc=_("@info sieve parameter discription", 0108 "Apply only the rule given by this identifier. " 0109 "Several identifiers can be given as comma-separated list." 0110 )) 0111 p.add_param("rulerx", str, multival=True, 0112 metavar=_("@info sieve parameter value placeholder", "REGEX"), 0113 desc=_("@info sieve parameter discription", 0114 "Apply only the rules with identifiers matching this regular expression. " 0115 "Several patterns can be given by repeating the parameter." 0116 )) 0117 p.add_param("norule", str, seplist=True, 0118 metavar=_("@info sieve parameter value placeholder", "RULEID"), 0119 desc=_("@info sieve parameter discription", 0120 "Do not apply rule given by this identifier. " 0121 "Several identifiers can be given as comma-separated list." 0122 )) 0123 p.add_param("norulerx", str, multival=True, 0124 metavar=_("@info sieve parameter value placeholder", "REGEX"), 0125 desc=_("@info sieve parameter discription", 0126 "Do not apply the rules with identifiers matching this regular expression. " 0127 "Several patterns can be given by repeating the parameter." 0128 )) 0129 p.add_param("branch", str, seplist=True, 0130 metavar=_("@info sieve parameter value placeholder", "BRANCH"), 0131 desc=_("@info sieve parameter discription", 0132 "In summit catalogs, consider only messages belonging to given branch. " 0133 "Several branches can be given as comma-separated list." 0134 )) 0135 p.add_param("xml", str, 0136 metavar=_("@info sieve parameter value placeholder", "PATH"), 0137 desc=_("@info sieve parameter discription", 0138 "Write rule failures into an XML file instead of stdout." 0139 )) 0140 p.add_param("mark", bool, defval=False, 0141 desc=_("@info sieve parameter discription", 0142 "Add '%(flag)s' flag to each message failed by a rule.", 0143 flag=_flag_mark 0144 )) 0145 p.add_param("byrule", bool, defval=False, 0146 desc=_("@info sieve parameter discription", 0147 "Output failed messages ordered by sorted rule identifiers." 0148 )) 0149 p.add_param("ruleinfo", bool, defval=False, 0150 desc=_("@info sieve parameter discription", 0151 "Show information on loading of rules during sieving." 0152 )) 0153 add_param_poeditors(p) 0154 0155 0156 class Sieve (object): 0157 """Find messages matching given rules.""" 0158 0159 def __init__ (self, params): 0160 0161 self.nmatch = 0 # Number of match for finalize 0162 self.rules = [] # List of rules objects loaded in memory 0163 self.xmlFile = None # File handle to write XML output 0164 self.cacheFile = None # File handle to write XML cache 0165 self.cachePath = None # Path to cache file 0166 self.filename = "" # File name we are processing 0167 self.cached = False # Flag to indicate if process result is already is cache 0168 0169 self.globalLang = params.lang 0170 self.globalEnvs = params.env 0171 self.envOnly = params.envonly 0172 self._rulesCache = {} 0173 0174 self.accels = params.accel 0175 self.markup = params.markup 0176 0177 self.ruleChoice = params.rule 0178 self.ruleChoiceRx = params.rulerx 0179 self.ruleChoiceInv = params.norule 0180 self.ruleChoiceInvRx = params.norulerx 0181 0182 self.stat = params.stat 0183 self.showfmsg = params.showfmsg 0184 self.showmsg = params.showmsg 0185 self.lokalize = params.lokalize 0186 self.mark = params.mark 0187 self.byrule = params.byrule 0188 self.ruleinfo = params.ruleinfo 0189 0190 self.branches = params.branch and set(params.branch) or None 0191 0192 # Collect non-internal rule files. 0193 self.customRuleFiles = None 0194 if params.rfile or params.rdir: 0195 self.customRuleFiles = [] 0196 if params.rfile: 0197 self.customRuleFiles.extend(params.rfile) 0198 if params.rdir: 0199 for rdir in params.rdir: 0200 rfiles = collect_files_by_ext(rdir, "rules") 0201 self.customRuleFiles.extend(rfiles) 0202 0203 # Also output in XML file ? 0204 if params.xml: 0205 xmlPath = params.xml 0206 if os.access(dirname(abspath(xmlPath)), os.W_OK): 0207 #TODO: create nice api to manage xml file and move it to rules.py 0208 self.xmlFile = open(xmlPath, "w", "utf-8") 0209 self.xmlFile.write('<?xml version="1.0" encoding="UTF-8"?>\n') 0210 self.xmlFile.write('<pos date="%s">\n' % strftime('%c')) 0211 else: 0212 warning(_("@info", 0213 "Cannot open file '%(file)s'. XML output disabled.", 0214 file=xmlPath)) 0215 0216 if not exists(_CACHEDIR) and self.xmlFile: 0217 #Create cache dir (only if we want wml output) 0218 try: 0219 os.mkdir(_CACHEDIR) 0220 except IOError as e: 0221 raise SieveError(_("@info", 0222 "Cannot create cache directory '%(dir)s':\n" 0223 "%(msg)s", 0224 dir=_CACHEDIR, msg=e)) 0225 0226 if self.byrule: 0227 self.postFailedMessages = {} 0228 0229 self._first_error = True 0230 0231 # Unless marking requested, no need to monitor/sync. 0232 if not self.mark: 0233 self.caller_sync = False 0234 self.caller_monitored = False 0235 0236 0237 def process_header (self, hdr, cat): 0238 0239 # Force explicitly given accelerators. 0240 if self.accels is not None: 0241 cat.set_accelerator(self.accels) 0242 0243 # Force explicitly given markup. 0244 if self.markup is not None: 0245 cat.set_markup(self.markup) 0246 0247 # Choose (possibly loading) appropriate rules for this catalog. 0248 self.lang = self.globalLang or cat.language() 0249 if not self.lang: 0250 raise SieveCatalogError( 0251 _("@info", 0252 "Cannot determine language for catalog '%(file)s'.", 0253 file=cat.filename)) 0254 self.envs = self.globalEnvs or cat.environment() or [] 0255 rkey = (self.lang, tuple(self.envs)) 0256 if rkey not in self._rulesCache: 0257 self._rulesCache[rkey] = self._loadRules(self.lang, self.envs) 0258 self.rules, self.ruleFilters = self._rulesCache[rkey] 0259 0260 0261 def process (self, msg, cat): 0262 0263 # Apply rules only on translated messages. 0264 if not msg.translated: 0265 return 0266 0267 # Apply rules only to messages from selected branches. 0268 if self.branches: 0269 msg_branches = parse_summit_branches(msg) 0270 if not set.intersection(self.branches, msg_branches): 0271 return 0272 0273 filename = basename(cat.filename) 0274 0275 # New file handling 0276 if self.xmlFile and self.filename != filename: 0277 newFile = True 0278 self.cached = False # Reset flag 0279 self.cachePath = join(_CACHEDIR, abspath(cat.filename).replace("/", _MARSHALL)) 0280 if self.cacheFile: 0281 self.cacheFile.close() 0282 if self.filename != "": 0283 # close previous 0284 self.xmlFile.write("</po>\n") 0285 self.filename = filename 0286 else: 0287 newFile = False 0288 0289 # Current file loaded from cache on previous message. Close and return 0290 if self.cached: 0291 # No need to analyze message, return immediately 0292 if self.cacheFile: 0293 self.cacheFile = None # Indicate cache has been used and flushed into xmlFile 0294 return 0295 0296 # Does cache exist for this file ? 0297 if self.xmlFile and newFile and exists(self.cachePath): 0298 poDate = None 0299 for headerName, headerValue in cat.header.field: 0300 if headerName == "PO-Revision-Date": 0301 poDate = headerValue 0302 break 0303 0304 if poDate: 0305 #Truncate daylight information 0306 poDate = poDate.rstrip("GMT") 0307 poDate = poDate[0:poDate.find("+")] 0308 #Convert in sec since epoch time format 0309 poDate = mktime(strptime(poDate, '%Y-%m-%d %H:%M')) 0310 if os.stat(self.cachePath)[8] > poDate: 0311 if self.ruleinfo: 0312 report(_("@info:progress", "Using cache.")) 0313 self.xmlFile.writelines(open(self.cachePath, "r", "utf-8").readlines()) 0314 self.cached = True 0315 0316 # No cache available, create it for next time 0317 if self.xmlFile and newFile and not self.cached: 0318 if self.ruleinfo: 0319 report(_("@info", "No cache available, processing file.")) 0320 self.cacheFile = open(self.cachePath, "w", "utf-8") 0321 0322 # Handle start/end of files for XML output (not needed for text output) 0323 if self.xmlFile and newFile: 0324 # open new po 0325 if self.cached: 0326 # We can return now, cache is used, no need to process catalog 0327 return 0328 else: 0329 poTag = '<po name="%s">\n' % filename 0330 self.xmlFile.write(poTag) # Write to result 0331 self.cacheFile.write(poTag) # Write to cache 0332 0333 # Collect explicitly ignored rules by ID for this message. 0334 locally_ignored = manc_parse_list(msg, "skip-rule:", ",") 0335 0336 # Collect explicitly applied rules by ID for this message. 0337 locally_applied = manc_parse_list(msg, "apply-rule:", ",") 0338 0339 # Collect ignored/applied rules by switching comment. 0340 swprefix = "switch-rule:" 0341 swsep = ">" 0342 for cmnt in msg.manual_comment: 0343 if cmnt.strip().startswith(swprefix): 0344 p1 = cmnt.find(swprefix) + len(swprefix) 0345 p2 = cmnt.find(swsep, p1) 0346 if p2 < 0: 0347 raise SieveMessageError( 0348 _("@info", 0349 "Separator character '%(sep)s' missing in " 0350 "'%(prefix)s' comment.", 0351 sep=swsep, prefix=swprefix)) 0352 els1 = [x.strip() for x in cmnt[p1:p2].split(",")] 0353 els2 = [x.strip() for x in cmnt[p2 + len(swsep):].split(",")] 0354 locally_ignored.extend(x for x in els1 if x) 0355 locally_applied.extend(x for x in els2 if x) 0356 0357 # NOTE: It would be nice to warn if an explicitly applied rule 0358 # is not defined, but this is not generally possible because 0359 # different rule files may be loaded for different runs. 0360 0361 # Prepare filtered messages for checking. 0362 envSet = set(self.envs) 0363 msgByFilter = {} 0364 for mfilter in self.ruleFilters: 0365 if mfilter is not None: 0366 msgf = MessageUnsafe(msg) 0367 mfilter(msgf, cat, envSet) 0368 else: 0369 msgf = msg 0370 msgByFilter[mfilter] = msgf 0371 0372 # Now the sieve itself. Check message with every rules 0373 failedRules = [] 0374 for rule in self.rules: 0375 if rule.disabled: 0376 continue 0377 if rule.environ and rule.environ not in envSet: 0378 continue 0379 if rule.ident in locally_ignored: 0380 continue 0381 if rule.manual and not rule.ident in locally_applied: 0382 continue 0383 msgf = msgByFilter[rule.mfilter] 0384 try: 0385 spans = rule.process(msgf, cat, envs=envSet, nofilter=True) 0386 except TimedOutException: 0387 warning(_("@info:progress", 0388 "Rule '%(rule)s' timed out, skipping it.", 0389 rule=rule.rawPattern)) 0390 continue 0391 if spans: 0392 self.nmatch += 1 0393 if self.xmlFile: 0394 # FIXME: rule_xml_error is actually broken, 0395 # as it considers matching to always be on msgstr 0396 # Multiple span are now supported as well as msgstr index 0397 0398 # Now, write to XML file if defined 0399 rspans = [x[:2] for x in spans[0][2]] 0400 pluid = spans[0][1] 0401 xmlError = rule_xml_error(msg, cat, rule, rspans, pluid) 0402 self.xmlFile.writelines(xmlError) 0403 if not self.cached: 0404 # Write result in cache 0405 self.cacheFile.writelines(xmlError) 0406 if not self.showfmsg: 0407 msgf = None 0408 failedRules.append((rule, spans, msgf)) 0409 0410 if failedRules: 0411 if not self.byrule: 0412 multi_rule_error(msg, cat, failedRules, self.showmsg, 0413 predelim=self._first_error) 0414 self._first_error = False 0415 else: 0416 for rule, spans, msgf in failedRules: 0417 if rule.ident not in self.postFailedMessages: 0418 self.postFailedMessages[rule.ident] = [] 0419 self.postFailedMessages[rule.ident].append( 0420 (msg, cat, ((rule, spans, msgf)))) 0421 0422 if self.mark: 0423 msg.flag.add(_flag_mark) 0424 0425 if self.lokalize: 0426 repls = [_("@label", "Failed rules:")] 0427 for rule, hl, msgf in failedRules: 0428 repls.append(_("@item", 0429 "rule %(rule)s ==> %(msg)s", 0430 rule=rule.displayName, msg=rule.hint)) 0431 for part, item, spans, fval in hl: 0432 repls.extend(["↳ %s" % x[2] 0433 for x in spans if len(x) > 2]) 0434 report_msg_to_lokalize(msg, cat, cjoin(repls, "\n")) 0435 0436 0437 def finalize (self): 0438 0439 if self.byrule: 0440 ruleIdents = sorted(self.postFailedMessages.keys()) 0441 for ruleIdent in ruleIdents: 0442 for msg, cat, failedRule in self.postFailedMessages[ruleIdent]: 0443 multi_rule_error(msg, cat, [failedRule], self.showmsg, 0444 predelim=self._first_error) 0445 self._first_error = False 0446 0447 if self.xmlFile: 0448 # Close last po tag and xml file 0449 if self.cached and self.cacheFile: 0450 self.cacheFile.write("</po>\n") 0451 self.cacheFile.close() 0452 self.cacheFile = None 0453 else: 0454 self.xmlFile.write("</po>\n") 0455 self.xmlFile.write("</pos>\n") 0456 self.xmlFile.close() 0457 if self.nmatch > 0: 0458 msg = n_("@info:progress", 0459 "Rules detected %(num)d problem.", 0460 "Rules detected %(num)d problems.", 0461 num=self.nmatch) 0462 report("===== " + msg) 0463 printStat(self.rules) 0464 0465 0466 def _loadRules (self, lang, envs): 0467 0468 # Load rules. 0469 rules = loadRules(lang, envs, 0470 self.envOnly, self.customRuleFiles, self.stat, 0471 self.ruleinfo) 0472 0473 # Perhaps retain only those rules explicitly requested 0474 # in the command line, by their identifiers. 0475 selectedRules = set() 0476 srules = set() 0477 if self.ruleChoice: 0478 requestedRules = set([x.strip() for x in self.ruleChoice]) 0479 foundRules = set() 0480 for rule in rules: 0481 if rule.ident in requestedRules: 0482 srules.add(rule) 0483 foundRules.add(rule.ident) 0484 rule.disabled = False 0485 if foundRules != requestedRules: 0486 missingRules = list(requestedRules - foundRules) 0487 fmtMissingRules = format_item_list(sorted(missingRules)) 0488 raise SieveError(_("@info", 0489 "Some explicitly selected rules " 0490 "are missing: %(rulelist)s.", 0491 rulelist=fmtMissingRules)) 0492 selectedRules.update(foundRules) 0493 if self.ruleChoiceRx: 0494 identRxs = [re.compile(x, re.U) for x in self.ruleChoiceRx] 0495 for rule in rules: 0496 if (rule.ident 0497 and reduce(lambda s, x: s or x.search(rule.ident), 0498 identRxs, False) 0499 ): 0500 srules.add(rule) 0501 selectedRules.add(rule.ident) 0502 if self.ruleChoice or self.ruleChoiceRx: 0503 rules = list(srules) 0504 0505 selectedRulesInv = set() 0506 srules = set(rules) 0507 if self.ruleChoiceInv: 0508 requestedRules = set([x.strip() for x in self.ruleChoiceInv]) 0509 foundRules = set() 0510 for rule in rules: 0511 if rule.ident in requestedRules: 0512 if rule in srules: 0513 srules.remove(rule) 0514 foundRules.add(rule.ident) 0515 if foundRules != requestedRules: 0516 missingRules = list(requestedRules - foundRules) 0517 fmtMissingRules = format_item_list(sorted(missingRules)) 0518 raise SieveError(_("@info", 0519 "Some explicitly excluded rules " 0520 "are missing: %(rulelist)s.", 0521 rulelist=fmtMissingRules)) 0522 selectedRulesInv.update(foundRules) 0523 if self.ruleChoiceInvRx: 0524 identRxs = [re.compile(x, re.U) for x in self.ruleChoiceInvRx] 0525 for rule in rules: 0526 if (rule.ident 0527 and reduce(lambda s, x: s or x.search(rule.ident), 0528 identRxs, False) 0529 ): 0530 if rule in srules: 0531 srules.remove(rule) 0532 selectedRulesInv.add(rule.ident) 0533 if self.ruleChoiceInv or self.ruleChoiceInvRx: 0534 rules = list(srules) 0535 0536 if self.ruleinfo: 0537 ntot = len(rules) 0538 ndis = len([x for x in rules if x.disabled]) 0539 nact = ntot - ndis 0540 totfmt = n_("@item:intext inserted below as %(tot)s", 0541 "Loaded %(num)d rule", "Loaded %(num)d rules", 0542 num=ntot) 0543 if self.envOnly: 0544 envfmt = _("@item:intext inserted below as %(env)s", 0545 "[only: %(envlist)s]", 0546 envlist=format_item_list(envs)) 0547 else: 0548 envfmt = _("@item:intext inserted below as %(env)s", 0549 "[%(envlist)s]", 0550 envlist=format_item_list(envs)) 0551 actfmt = n_("@item:intext inserted below as %(act)s", 0552 "%(num)d active", "%(num)d active", 0553 num=nact) 0554 disfmt = n_("@item:intext inserted below as %(dis)s", 0555 "%(num)d disabled", "%(num)d disabled", 0556 num=ndis) 0557 subs = dict(tot=totfmt, env=envfmt, act=actfmt, dis=disfmt) 0558 if ndis and envs: 0559 report(_("@info:progress insertions from above", 0560 "%(tot)s %(env)s (%(act)s, %(dis)s).", **subs)) 0561 elif ndis: 0562 report(_("@info:progress insertions from above", 0563 "%(tot)s (%(act)s, %(dis)s).", **subs)) 0564 elif envs: 0565 report(_("@info:progress insertions from above", 0566 "%(tot)s %(env)s.", **subs)) 0567 else: 0568 report(_("@info:progress insertions from above", 0569 "%(tot)s.", **subs)) 0570 0571 if selectedRules: 0572 selectedRules = selectedRules.difference(selectedRulesInv) 0573 n = len(selectedRules) 0574 if n <= 10: 0575 rlst = list(selectedRules) 0576 report(_("@info:progress", 0577 "Selected rules: %(rulelist)s.", 0578 rulelist=format_item_list(sorted(rlst)))) 0579 else: 0580 report(n_("@info:progress", 0581 "Selected %(num)d rule.", 0582 "Selected %(num)d rules.", 0583 num=n)) 0584 elif selectedRulesInv: 0585 n = len(selectedRulesInv) 0586 if n <= 10: 0587 rlst = list(selectedRulesInv) 0588 report(_("@info:progress", 0589 "Excluded rules: %(rulelist)s.", 0590 rulelist=format_item_list(sorted(rlst)))) 0591 else: 0592 report(n_("@info:progress", 0593 "Excluded %(num)d rule.", 0594 "Excluded %(num)d rules.", 0595 num=n)) 0596 0597 # Collect all distinct filters from rules. 0598 ruleFilters = set() 0599 for rule in rules: 0600 if not rule.disabled: 0601 ruleFilters.add(rule.mfilter) 0602 if self.ruleinfo: 0603 nflt = len([x for x in ruleFilters if x is not None]) 0604 if nflt: 0605 report(n_("@info:progress", 0606 "Active rules define %(num)d distinct filter set.", 0607 "Active rules define %(num)d distinct filter sets.", 0608 num=nflt)) 0609 0610 return rules, ruleFilters 0611