File indexing completed on 2024-11-03 05:12:56
0001 # -*- coding: UTF-8 -*- 0002 0003 """ 0004 Match messages by rules of arbitrary specificity. 0005 0006 A message-matching rule, represented by L{Rule} object, is a series of 0007 pattern matches to be applied to the message, leading to the decision 0008 of whether or not the rule as whole matches the message. 0009 Patterns can be of different kinds, act on different parts of the message, 0010 and be applied in a boolean-like combinations. 0011 0012 See C{doc/user/lingo.docbook#sec-lgrules} for detailed discussion of rules. 0013 0014 @author: Sébastien Renard <sebastien.renard@digitalfox.org> 0015 @author: Chusslove Illich (Часлав Илић) <caslav.ilic@gmx.net> 0016 @license: GPLv3 0017 """ 0018 0019 from codecs import open 0020 from locale import getlocale 0021 from os.path import dirname, basename, isdir, join, isabs 0022 from os import listdir 0023 import re 0024 import sys 0025 from time import time 0026 0027 from pology import PologyError, datadir, _, n_ 0028 from pology.message import MessageUnsafe 0029 from pology.config import strbool 0030 from pology.getfunc import get_hook_ireq, split_ireq 0031 from pology.report import report, warning, format_item_list 0032 from pology.tabulate import tabulate 0033 from pology.timeout import timed_out 0034 0035 TIMEOUT=8 # Time in sec after which a rule processing is timeout 0036 0037 0038 def printStat(rules): 0039 """Print rules match statistics 0040 @param rules: list of rule files 0041 """ 0042 statRules=[r for r in rules if r.count!=0 and r.stat is True] 0043 if statRules: 0044 statRules.sort(key=lambda x: x.time) 0045 data=[] 0046 rown=[r.displayName for r in statRules] 0047 data.append([r.count for r in statRules]) 0048 data.append([r.time/r.count*1000 for r in statRules]) 0049 totTimeMsg=sum(data[-1])/1000 0050 data.append([r.time for r in statRules]) 0051 totTime=sum(data[-1]) 0052 data.append([r.time/totTime*100 for r in statRules]) 0053 report(_("@label", "Rule application statistics:")) 0054 coln=[_("@title:column", "calls"), 0055 _("@title:column avg = average", "avg-time [ms]"), 0056 _("@title:column tot = total", "tot-time [s]"), 0057 _("@title:column", "time-share")] 0058 dfmt=[ "%d", "%.3f", "%.1f", "%.2f%%"] 0059 report(tabulate(data, rown=rown, coln=coln, dfmt=dfmt, colorize=True)) 0060 report(_("@info statistics", 0061 "Total application time [s]: %(num).1f", 0062 num=totTime)) 0063 report(_("@info statistics", 0064 "Average application time per message [ms]: %(num).1f", 0065 num=totTimeMsg*1000)) 0066 0067 0068 def loadRules(lang, envs=[], envOnly=False, ruleFiles=None, stat=False, 0069 printInfo=False): 0070 """Load rules for a given language 0071 @param lang: lang as a string in two caracter (i.e. fr). If none or empty, try to autodetect language 0072 @param envs: also load rules applicable in these environments 0073 @param envOnly: load only rules applicable in given environments 0074 @param ruleFiles: a list of rule files to load instead of internal 0075 @param stat: stat is a boolean to indicate if rule should gather count and time execution 0076 @param printInfo: whether to output information about loading of rules 0077 @return: list of rules objects or None if rules cannot be found (with complaints on stdout) 0078 """ 0079 ruleDir="" # Rules directory 0080 rules=[] # List of rule objects 0081 langDir=join(datadir(), "lang") # Base of rule files per language 0082 0083 # Collect rule files. 0084 if ruleFiles is not None: 0085 if printInfo: 0086 report(_("@info:progress", "Using external rules.")) 0087 else: 0088 ruleDir=join(langDir, lang, "rules") 0089 if not isdir(ruleDir): 0090 raise PologyError( 0091 _("@info", 0092 "There are no internal rules for language '%(langcode)s'.", 0093 langcode=lang)) 0094 if printInfo: 0095 report(_("@info:progress", 0096 "Using internal rules for language '%(langcode)s'.", 0097 langcode=lang)) 0098 ruleFiles=[join(ruleDir, f) for f in listdir(ruleDir) if f.endswith(".rules")] 0099 0100 # Parse rules. 0101 seenMsgFilters = {} 0102 for ruleFile in ruleFiles: 0103 rules.extend(loadRulesFromFile(ruleFile, stat, set(envs), seenMsgFilters)) 0104 0105 # Remove rules with specific but different to given environments, 0106 # or any rule not in given environments in environment-only mode. 0107 # FIXME: This should be moved to loadRulesFromFile. 0108 srules=[] 0109 for rule in rules: 0110 if envOnly and rule.environ not in envs: 0111 continue 0112 elif rule.environ and rule.environ not in envs: 0113 continue 0114 srules.append(rule) 0115 rules=srules 0116 0117 # When operating in specific environments, for rules with 0118 # equal identifiers eliminate all but the one in the last environment. 0119 if envs: 0120 envsByIdent={} 0121 for rule in rules: 0122 if rule.ident: 0123 if rule.ident not in envsByIdent: 0124 envsByIdent[rule.ident]=set() 0125 envsByIdent[rule.ident].add(rule.environ) 0126 srules=[] 0127 for rule in rules: 0128 eliminate=False 0129 if rule.ident and len(envsByIdent[rule.ident])>1: 0130 iEnv=((rule.environ is None and -1) or envs.index(rule.environ)) 0131 for env in envsByIdent[rule.ident]: 0132 iEnvOther=((env is None and -1) or envs.index(env)) 0133 if iEnv<iEnvOther: 0134 eliminate=True 0135 break 0136 if not eliminate: 0137 srules.append(rule) 0138 rules=srules 0139 0140 return rules 0141 0142 0143 _rule_start = "*" 0144 0145 class _IdentError (Exception): pass 0146 class _SyntaxError (Exception): pass 0147 0148 0149 def loadRulesFromFile(filePath, stat, envs=set(), seenMsgFilters={}): 0150 """Load rule file and return list of Rule objects 0151 @param filePath: full path to rule file 0152 @param stat: stat is a boolean to indicate if rule should gather count and time execution 0153 @param envs: environments in which the rules are to be applied 0154 @param seenMsgFilters: dictionary of previously encountered message 0155 filter functions, by their signatures; to avoid constructing 0156 same filters over different files 0157 @return: list of Rule object""" 0158 0159 rules=[] 0160 inRule=False #Flag that indicate we are currently parsing a rule bloc 0161 inGroup=False #Flag that indicate we are currently parsing a validGroup bloc 0162 0163 valid=[] 0164 pattern="" 0165 msgpart="" 0166 hint="" 0167 ident=None 0168 disabled=False 0169 manual=False 0170 casesens=True 0171 environ=None 0172 validGroup={} 0173 validGroupName="" 0174 identLines={} 0175 globalEnviron=None 0176 globalMsgFilters=[] 0177 globalRuleFilters=[] 0178 msgFilters=None 0179 ruleFilters=None 0180 seenRuleFilters={} 0181 triggerFunc=None 0182 lno=0 0183 0184 try: 0185 lines=open(filePath, "r", "UTF-8").readlines() 0186 lines.append("\n") # sentry line 0187 fileStack=[] 0188 while True: 0189 while lno >= len(lines): 0190 if not fileStack: 0191 lines = None 0192 break 0193 lines, filePath, lno = fileStack.pop() 0194 if lines is None: 0195 break 0196 lno += 1 0197 fields, lno = _parseRuleLine(lines, lno) 0198 0199 # End of rule bloc 0200 # FIXME: Remove 'not fields' when global directives too 0201 # start with something. This will eliminate rule separation 0202 # by empty lines, and skipping comment-only lines. 0203 if lines[lno - 1].strip().startswith("#"): 0204 continue 0205 if not fields or fields[0][0] in (_rule_start,): 0206 if inRule: 0207 inRule=False 0208 0209 if msgFilters is None: 0210 msgFilters = globalMsgFilters 0211 if ruleFilters is None: 0212 ruleFilters = globalRuleFilters 0213 # Use previously assembled filter with the same signature, 0214 # to be able to compare filter functions by "is". 0215 msgFilterSig = _filterFinalSig(msgFilters) 0216 msgFilterFunc = seenMsgFilters.get(msgFilterSig) 0217 if msgFilterFunc is None: 0218 msgFilterFunc = _msgFilterComposeFinal(msgFilters) 0219 seenMsgFilters[msgFilterSig] = msgFilterFunc 0220 ruleFilterSig = _filterFinalSig(ruleFilters) 0221 ruleFilterFunc = seenRuleFilters.get(ruleFilterSig) 0222 if ruleFilterFunc is None: 0223 ruleFilterFunc = _ruleFilterComposeFinal(ruleFilters) 0224 seenRuleFilters[ruleFilterSig] = ruleFilterFunc 0225 0226 rules.append(Rule(pattern, msgpart, 0227 hint=hint, valid=valid, 0228 stat=stat, casesens=casesens, 0229 ident=ident, 0230 disabled=disabled, manual=manual, 0231 environ=(environ or globalEnviron), 0232 mfilter=msgFilterFunc, 0233 rfilter=ruleFilterFunc, 0234 trigger=triggerFunc)) 0235 pattern="" 0236 msgpart="" 0237 hint="" 0238 ident=None 0239 disabled=False 0240 manual=False 0241 casesens=True 0242 environ=None 0243 msgFilters=None 0244 ruleFilters=None 0245 triggerFunc=None 0246 elif inGroup: 0247 inGroup=False 0248 validGroup[validGroupName]=valid 0249 validGroupName="" 0250 valid=[] 0251 0252 if not fields: 0253 continue 0254 0255 # Begin of rule (pattern or special) 0256 if fields[0][0]==_rule_start: 0257 inRule=True 0258 keyword=fields[0][1] 0259 if keyword in _trigger_msgparts: 0260 msgpart=keyword 0261 pattern=fields[1][0] 0262 for mmod in fields[1][1]: 0263 if mmod not in _trigger_matchmods: 0264 raise _SyntaxError( 0265 _("@info", 0266 "Unknown match modifier '%(mod)s' " 0267 "in trigger pattern.", 0268 mod=mmod)) 0269 casesens=("i" not in fields[1][1]) 0270 elif keyword in _trigger_specials: 0271 casesens, rest = _triggerParseGeneral(fields[1:]) 0272 if keyword == "hook": 0273 triggerFunc = _triggerFromHook(rest) 0274 else: 0275 raise _SyntaxError( 0276 _("@info", 0277 "Unknown keyword '%(kw)s' in rule trigger.", 0278 kw=keyword)) 0279 0280 # valid line (for rule ou validGroup) 0281 elif fields[0][0]=="valid": 0282 if not inRule and not inGroup: 0283 raise _SyntaxError( 0284 _("@info", 0285 "Directive '%(dir)s' outside of rule or " 0286 "validity group.", 0287 dir="valid")) 0288 valid.append(fields[1:]) 0289 0290 # Rule hint 0291 elif fields[0][0]=="hint": 0292 if not inRule: 0293 raise _SyntaxError( 0294 _("@info", 0295 "Directive '%(dir)s' outside of rule.", 0296 dir="hint")) 0297 hint=fields[0][1] 0298 0299 # Rule identifier 0300 elif fields[0][0]=="id": 0301 if not inRule: 0302 raise _SyntaxError( 0303 _("@info", 0304 "Directive '%(dir)s' outside of rule.", 0305 dir="id")) 0306 ident=fields[0][1] 0307 if ident in identLines: 0308 (prevLine, prevEnviron)=identLines[ident] 0309 if prevEnviron==globalEnviron: 0310 raise _IdentError(ident, prevLine) 0311 identLines[ident]=(lno, globalEnviron) 0312 0313 # Whether rule is disabled 0314 elif fields[0][0]=="disabled": 0315 if not inRule: 0316 raise _SyntaxError( 0317 _("@info", 0318 "Directive '%(dir)s' outside of rule.", 0319 dir="disabled")) 0320 disabled=True 0321 0322 # Whether rule is manually applied 0323 elif fields[0][0]=="manual": 0324 if not inRule: 0325 raise _SyntaxError( 0326 _("@info", 0327 "Directive '%(dir)s' outside of rule.", 0328 dir="manual")) 0329 manual=True 0330 0331 # Validgroup 0332 elif fields[0][0]=="validGroup": 0333 if inGroup: 0334 raise _SyntaxError( 0335 _("@info", 0336 "Directive '%(dir)s' inside validity group.", 0337 dir="validGroup")) 0338 if inRule: 0339 # Use of validGroup directive inside a rule bloc 0340 validGroupName=fields[1][0] 0341 valid.extend(validGroup[validGroupName]) 0342 else: 0343 # Begin of validGroup 0344 inGroup=True 0345 validGroupName=fields[1][0] 0346 0347 # Switch rule environment 0348 elif fields[0][0]=="environment": 0349 if inGroup: 0350 raise _SyntaxError( 0351 _("@info", 0352 "Directive '%(dir)s' inside validity group.", 0353 dir="environment")) 0354 envName=fields[1][0] 0355 if inRule: 0356 # Environment specification for current rule. 0357 environ=envName 0358 else: 0359 # Environment switch for rules that follow. 0360 globalEnviron=envName 0361 0362 # Add or remove filters 0363 elif ( fields[0][0].startswith("addFilter") 0364 or fields[0][0] in ["removeFilter", "clearFilters"]): 0365 # Select the proper filter lists on which to act. 0366 if inRule: 0367 if msgFilters is None: # local filters not created yet 0368 msgFilters = globalMsgFilters[:] # shallow copy 0369 if ruleFilters is None: 0370 ruleFilters = globalRuleFilters[:] 0371 currentMsgFilters = msgFilters 0372 currentRuleFilters = ruleFilters 0373 currentEnviron = environ or globalEnviron 0374 else: 0375 currentMsgFilters = globalMsgFilters 0376 currentRuleFilters = globalRuleFilters 0377 currentEnviron = globalEnviron 0378 0379 if fields[0][0].startswith("addFilter"): 0380 filterType = fields[0][0][len("addFilter"):] 0381 handles, parts, fenvs, rest = _filterParseGeneral(fields[1:]) 0382 if fenvs is None and currentEnviron: 0383 fenvs = [currentEnviron] 0384 if filterType == "Regex": 0385 func, sig = _filterCreateRegex(rest) 0386 elif filterType == "Hook": 0387 func, sig = _filterCreateHook(rest) 0388 else: 0389 raise _SyntaxError( 0390 _("@info", 0391 "Unknown filter directive '%(dir)s'.", 0392 dir=fields[0][0])) 0393 msgParts = set(parts).difference(_filterKnownRuleParts) 0394 if msgParts: 0395 totFunc, totSig = _msgFilterSetOnParts(msgParts, func, sig) 0396 currentMsgFilters.append([handles, fenvs, totFunc, totSig]) 0397 ruleParts = set(parts).difference(_filterKnownMsgParts) 0398 if ruleParts and (not envs or not fenvs or envs.intersection(fenvs)): 0399 totFunc, totSig = _ruleFilterSetOnParts(ruleParts, func, sig) 0400 currentRuleFilters.append([handles, fenvs, totFunc, totSig]) 0401 0402 elif fields[0][0] == ("removeFilter"): 0403 _filterRemove(fields[1:], 0404 (currentMsgFilters, currentRuleFilters), envs) 0405 0406 else: # remove all filters 0407 if len(fields) != 1: 0408 raise _SyntaxError( 0409 _("@info", 0410 "Expected no fields in " 0411 "all-filter removal directive.")) 0412 # Must not loose reference to the selected lists. 0413 while currentMsgFilters: 0414 currentMsgFilters.pop() 0415 while currentRuleFilters: 0416 currentRuleFilters.pop() 0417 0418 # Include another file 0419 elif fields[0][0] == "include": 0420 if inRule or inGroup: 0421 raise _SyntaxError( 0422 _("@info", 0423 "Directive '%(dir)s' inside a rule or group.", 0424 dir="include")) 0425 fileStack.append((lines, filePath, lno)) 0426 lines, filePath, lno = _includeFile(fields[1:], filePath) 0427 0428 else: 0429 raise _SyntaxError( 0430 _("@info", 0431 "Unknown directive '%(dir)s'.", 0432 dir=fields[0][0])) 0433 0434 except _IdentError as e: 0435 raise PologyError( 0436 _("@info", 0437 "Identifier '%(id)s' at %(file)s:%(line)d " 0438 "previously encountered at %(pos)s.", 0439 id=e.args[0], file=filePath, line=lno, pos=e.args[1])) 0440 except IOError as e: 0441 raise PologyError( 0442 _("@info", 0443 "Cannot read rule file '%(file)s'. The error was: %(msg)s", 0444 file=filePath, msg=e.args[0])) 0445 except _SyntaxError as e: 0446 raise PologyError( 0447 _("@info", 0448 "Syntax error at %(file)s:%(line)d:\n%(msg)s", 0449 file=filePath, line=lno, msg=e.args[0])) 0450 0451 return rules 0452 0453 0454 def _checkFields (directive, fields, knownFields, mandatoryFields=set(), 0455 unique=True): 0456 0457 fieldDict = dict(fields) 0458 if unique and len(fieldDict) != len(fields): 0459 raise _SyntaxError( 0460 _("@info", 0461 "Duplicate fields in '%(dir)s' directive.", 0462 dir=directive)) 0463 0464 if not isinstance(knownFields, set): 0465 knownFields = set(knownFields) 0466 unknownFields = set(fieldDict).difference(knownFields) 0467 if unknownFields: 0468 raise _SyntaxError( 0469 _("@info", 0470 "Unknown fields in '%(dir)s' directive: %(fieldlist)s.", 0471 dir=directive, fieldlist=format_item_list(unknownFields))) 0472 0473 for name in mandatoryFields: 0474 if name not in fieldDict: 0475 raise _SyntaxError( 0476 _("@info", 0477 "Mandatory field '%(field)s' missing in '%(dir)s' directive.", 0478 field=name, dir=directive)) 0479 0480 0481 def _includeFile (fields, includingFilePath): 0482 0483 _checkFields("include", fields, ["file"], ["file"]) 0484 fieldDict = dict(fields) 0485 0486 relativeFilePath = fieldDict["file"] 0487 if isabs(relativeFilePath): 0488 filePath = relativeFilePath 0489 else: 0490 filePath = join(dirname(includingFilePath), relativeFilePath) 0491 0492 if filePath.endswith(".rules"): 0493 warning(_("@info", 0494 "Including one rule file into another, " 0495 "'%(file1)s' from '%(file2)s'.", 0496 file1=filePath, file2=includingFilePath)) 0497 0498 lines=open(filePath, "r", "UTF-8").readlines() 0499 lines.append("\n") # sentry line 0500 0501 return lines, filePath, 0 0502 0503 0504 def _filterRemove (fields, filterLists, envs): 0505 0506 _checkFields("removeFilter", fields, ["handle", "env"], ["handle"]) 0507 fieldDict = dict(fields) 0508 0509 handleStr = fieldDict["handle"] 0510 0511 fenvStr = fieldDict.get("env") 0512 if fenvStr is not None: 0513 fenvs = [x.strip() for x in fenvStr.split(",")] 0514 if not envs or not envs.intersection(fenvs): 0515 # We are operating in no environment, or no operating environment 0516 # is listed among the selected; skip removal. 0517 return 0518 0519 handles = set([x.strip() for x in handleStr.split(",")]) 0520 seenHandles = set() 0521 for flist in filterLists: 0522 k = 0 0523 while k < len(flist): 0524 commonHandles = flist[k][0].intersection(handles) 0525 if commonHandles: 0526 flist.pop(k) 0527 seenHandles.update(commonHandles) 0528 else: 0529 k += 1 0530 unseenHandles = handles.difference(seenHandles) 0531 if unseenHandles: 0532 raise PologyError( 0533 _("@info", 0534 "No filters with these handles to remove: %(handlelist)s.", 0535 handlelist=format_item_list(unseenHandles))) 0536 0537 0538 _filterKnownMsgParts = set([ 0539 "msg", "msgid", "msgstr", "pmsgid", "pmsgstr", 0540 ]) 0541 _filterKnownRuleParts = set([ 0542 "pattern", 0543 ]) 0544 _filterKnownParts = set( list(_filterKnownMsgParts) 0545 + list(_filterKnownRuleParts)) 0546 0547 def _filterParseGeneral (fields): 0548 0549 handles = set() 0550 parts = [] 0551 envs = None 0552 0553 rest = [] 0554 for field in fields: 0555 name, value = field 0556 if name == "handle": 0557 handles = set([x.strip() for x in value.split(",")]) 0558 elif name == "on": 0559 parts = [x.strip() for x in value.split(",")] 0560 unknownParts = set(parts).difference(_filterKnownParts) 0561 if unknownParts: 0562 raise _SyntaxError( 0563 _("@info", 0564 "Unknown message parts for the filter to act on: " 0565 "%(partlist)s.", 0566 partlist=format_item_list(unknownParts))) 0567 elif name == "env": 0568 envs = [x.strip() for x in value.split(",")] 0569 else: 0570 rest.append(field) 0571 0572 if not parts: 0573 raise _SyntaxError( 0574 _("@info", 0575 "No message parts specified for the filter to act on.")) 0576 0577 return handles, parts, envs, rest 0578 0579 0580 def _msgFilterSetOnParts (parts, func, sig): 0581 0582 chain = [] 0583 parts = list(parts) 0584 parts.sort() 0585 for part in parts: 0586 if part == "msg": 0587 chain.append(_filterOnMsg(func)) 0588 elif part == "msgstr": 0589 chain.append(_filterOnMsgstr(func)) 0590 elif part == "msgid": 0591 chain.append(_filterOnMsgid(func)) 0592 elif part == "pmsgstr": 0593 chain.append(_filterOnMsgstrPure(func)) 0594 elif part == "pmsgid": 0595 chain.append(_filterOnMsgidPure(func)) 0596 0597 def composition (msg, cat): 0598 0599 for func in chain: 0600 func(msg, cat) 0601 0602 totalSig = sig + "\x04" + ",".join(parts) 0603 0604 return composition, totalSig 0605 0606 0607 def _filterFinalSig (filterList): 0608 0609 sigs = [x[3] for x in filterList] 0610 finalSig = "\x05".join(sigs) 0611 0612 return finalSig 0613 0614 0615 def _msgFilterComposeFinal (filterList): 0616 0617 if not filterList: 0618 return None 0619 0620 fenvs_funcs = [(x[1], x[2]) for x in filterList] 0621 0622 def composition (msg, cat, envs): 0623 0624 for fenvs, func in fenvs_funcs: 0625 # Apply filter if environment-agnostic or in an operating environment. 0626 if fenvs is None or envs.intersection(fenvs): 0627 func(msg, cat) 0628 0629 return composition 0630 0631 0632 def _filterOnMsg (func): 0633 0634 def aggregate (msg, cat): 0635 0636 func(msg, cat) 0637 0638 return aggregate 0639 0640 0641 def _filterOnMsgstr (func): 0642 0643 def aggregate (msg, cat): 0644 0645 for i in range(len(msg.msgstr)): 0646 tmp = func(msg.msgstr[i], msg, cat) 0647 if tmp is not None: msg.msgstr[i] = tmp 0648 0649 return aggregate 0650 0651 0652 def _filterOnMsgid (func): 0653 0654 def aggregate (msg, cat): 0655 0656 tmp = func(msg.msgid, msg, cat) 0657 if tmp is not None: msg.msgid = tmp 0658 if msg.msgid_plural is not None: 0659 tmp = func(msg.msgid_plural, msg, cat) 0660 if tmp is not None: msg.msgid_plural = tmp 0661 0662 return aggregate 0663 0664 0665 def _filterOnMsgstrPure (func): 0666 0667 def aggregate (msg, cat): 0668 0669 for i in range(len(msg.msgstr)): 0670 tmp = func(msg.msgstr[i]) 0671 if tmp is not None: msg.msgstr[i] = tmp 0672 0673 return aggregate 0674 0675 0676 def _filterOnMsgidPure (func): 0677 0678 def aggregate (msg, cat): 0679 0680 tmp = func(msg.msgid) 0681 if tmp is not None: msg.msgid = tmp 0682 if msg.msgid_plural is not None: 0683 tmp = func(msg.msgid_plural) 0684 if tmp is not None: msg.msgid_plural = tmp 0685 0686 return aggregate 0687 0688 0689 def _ruleFilterSetOnParts (parts, func, sig): 0690 0691 chain = [] 0692 parts = list(parts) 0693 parts.sort() 0694 for part in parts: 0695 if part == "pattern": 0696 chain.append((_filterOnPattern(func), part)) 0697 0698 def composition (value, part): 0699 0700 if part not in _filterKnownRuleParts: 0701 raise PologyError( 0702 _("@info", 0703 "Unknown rule part '%(part)s' for the filter to act on.", 0704 part=part)) 0705 0706 for func, fpart in chain: 0707 if fpart == part: 0708 value = func(value) 0709 0710 return value 0711 0712 totalSig = sig + "\x04" + ",".join(parts) 0713 0714 return composition, totalSig 0715 0716 0717 def _ruleFilterComposeFinal (filterList): 0718 0719 if not filterList: 0720 return None 0721 0722 funcs = [x[2] for x in filterList] 0723 0724 def composition (value, part): 0725 0726 for func in funcs: 0727 value = func(value, part) 0728 0729 return value 0730 0731 return composition 0732 0733 0734 def _filterOnPattern (func): 0735 0736 def aggregate (pattern): 0737 0738 tmp = func(pattern) 0739 if tmp is not None: pattern = tmp 0740 0741 return pattern 0742 0743 return aggregate 0744 0745 0746 _filterRegexKnownFields = set(["match", "repl", "casesens"]) 0747 0748 def _filterCreateRegex (fields): 0749 0750 _checkFields("addFilterRegex", fields, _filterRegexKnownFields, ["match"]) 0751 fieldDict = dict(fields) 0752 0753 caseSens = _fancyBool(fieldDict.get("casesens", "0")) 0754 flags = re.U | re.S 0755 if not caseSens: 0756 flags |= re.I 0757 0758 matchStr = fieldDict["match"] 0759 matchRx = re.compile(matchStr, flags) 0760 0761 replStr = fieldDict.get("repl", "") 0762 0763 def func (text): 0764 return matchRx.sub(replStr, text) 0765 0766 sig = "\x04".join([matchStr, replStr, str(caseSens)]) 0767 0768 return func, sig 0769 0770 0771 def _filterCreateHook (fields): 0772 0773 _checkFields("addFilterHook", fields, ["name"], ["name"]) 0774 fieldDict = dict(fields) 0775 0776 hookSpec = fieldDict["name"] 0777 hook = get_hook_ireq(hookSpec, abort=False) 0778 0779 sigSegs = [] 0780 for el in split_ireq(hookSpec): 0781 if el is not None: 0782 sigSegs.append(el) 0783 else: 0784 sigSegs.append("\x00") 0785 sig = "\x04".join(sigSegs) 0786 0787 return hook, sig 0788 0789 0790 def _triggerParseGeneral (fields): 0791 0792 casesens = True 0793 0794 rest = [] 0795 for field in fields: 0796 name, value = field 0797 if name == "casesens": 0798 casesens = _fancyBool(value) 0799 else: 0800 rest.append(field) 0801 0802 return casesens, rest 0803 0804 0805 _triggerKnownMsgParts = set([ 0806 "msg", "msgid", "msgstr", "pmsgid", "pmsgstr", 0807 ]) 0808 0809 def _triggerFromHook (fields): 0810 0811 _checkFields("hook", fields, ["name", "on"], ["name", "on"]) 0812 fieldDict = dict(fields) 0813 0814 hook = get_hook_ireq(fieldDict["name"], abort=False) 0815 0816 msgpart = fieldDict["on"].strip() 0817 if msgpart not in _triggerKnownMsgParts: 0818 raise PologyError( 0819 _("@info", 0820 "Unknown message part '%(part)s' for trigger to act on.", 0821 part=msgpart)) 0822 0823 if msgpart == "msg": 0824 def trigger (msg, cat): 0825 return hook(msg, cat) 0826 elif msgpart == "msgid": 0827 def trigger (msg, cat): 0828 hl = [] 0829 hl.append(("msgid", 0, hook(msg.msgid, msg, cat))) 0830 if msg.msgid_plural is not None: 0831 hl.append(("msgid_plural", 0, hook(msg.msgid_plural, msg, cat))) 0832 return hl 0833 elif msgpart == "msgstr": 0834 def trigger (msg, cat): 0835 hl = [] 0836 for i in range(len(msg.msgstr)): 0837 hl.append(("msgstr", i, hook(msg.msgstr[i], msg, cat))) 0838 return hl 0839 elif msgpart == "pmsgid": 0840 def trigger (msg, cat): 0841 hl = [] 0842 hl.append(("msgid", 0, hook(msg.msgid))) 0843 if msg.msgid_plural is not None: 0844 hl.append(("msgid_plural", 0, hook(msg.msgid_plural))) 0845 return hl 0846 elif msgpart == "pmsgstr": 0847 def trigger (msg, cat): 0848 hl = [] 0849 for i in range(len(msg.msgstr)): 0850 hl.append(("msgstr", i, hook(msg.msgstr[i]))) 0851 return hl 0852 0853 return trigger 0854 0855 0856 def _fancyBool (string): 0857 0858 value = strbool(string) 0859 if value is None: 0860 raise PologyError( 0861 _("@info", 0862 "Cannot convert '%(val)s' to a boolean value.", 0863 val=string)) 0864 return value 0865 0866 0867 _trigger_msgparts = set([ 0868 # For matching in all messages. 0869 "msgctxt", "msgid", "msgstr", 0870 0871 # For matching in plural messages part by part. 0872 "msgid_singular", "msgid_plural", 0873 "msgstr_0", "msgstr_1", "msgstr_2", "msgstr_3", "msgstr_4", "msgstr_5", 0874 "msgstr_6", "msgstr_7", "msgstr_8", "msgstr_9", # ought to be enough 0875 ]) 0876 _trigger_specials = set([ 0877 "hook", 0878 ]) 0879 0880 _trigger_matchmods = [ 0881 "i", 0882 ] 0883 0884 class Rule(object): 0885 """Represent a single rule""" 0886 0887 _knownKeywords = set(("env", "cat", "catrx", "span", "after", "before", "ctx", "msgid", "msgstr", "head", "srcref", "comment")) 0888 _regexKeywords = set(("catrx", "span", "after", "before", "ctx", "msgid", "msgstr", "srcref", "comment")) 0889 _twoRegexKeywords = set(("head",)) 0890 _listKeywords = set(("env", "cat")) 0891 0892 def __init__(self, pattern, msgpart, hint=None, valid=[], 0893 stat=False, casesens=True, ident=None, 0894 disabled=False, manual=False, 0895 environ=None, mfilter=None, rfilter=None, 0896 trigger=None): 0897 """Create a rule 0898 @param pattern: valid regexp pattern that trigger the rule 0899 @type pattern: unicode 0900 @param msgpart: part of the message to be matched by C{pattern} 0901 @type msgpart: string 0902 @param hint: hint given to user when rule match 0903 @type hint: unicode 0904 @param valid: list of cases that should make or not make rule matching 0905 @type valid: list of unicode key=value 0906 @param casesens: whether regex matching will be case-sensitive 0907 @type casesens: bool 0908 @param ident: rule identifier 0909 @type ident: unicode or C{None} 0910 @param disabled: whether rule is disabled 0911 @type disabled: bool 0912 @param manual: whether rule is manually applied 0913 @type manual: bool 0914 @param environ: environment in which the rule applies 0915 @type environ: string or C{None} 0916 @param mfilter: filter to apply to message before checking 0917 @type mfilter: (msg, cat, envs) -> <anything> 0918 @param rfilter: filter to apply to rule strings (e.g. on regex patterns) 0919 @type rfilter: (string) -> string 0920 @param trigger: function to act as trigger instead of C{pattern} applied to C{msgpart} 0921 @type trigger: (msg, cat, envs) -> L{highlight<msgreport.report_msg_content>} 0922 """ 0923 0924 # Define instance variable 0925 self.pattern=None # Compiled regexp into re.pattern object 0926 self.msgpart=msgpart # The part of the message to match 0927 self.valid=None # Parsed valid definition 0928 self.hint=hint # Hint message return to user 0929 self.ident=ident # Rule identifier 0930 self.disabled=disabled # Whether rule is disabled 0931 self.manual=manual # Whether rule is manually applied 0932 self.count=0 # Number of time rule have been triggered 0933 self.time=0 # Total time of rule process calls 0934 self.stat=stat # Wheter to gather stat or not. Default is false (10% perf hit due to time() call) 0935 self.casesens=casesens # Whether regex matches are case-sensitive 0936 self.environ=environ # Environment in which to apply the rule 0937 self.mfilter=mfilter # Function to filter the message before checking 0938 self.rfilter=rfilter # Function to filter the rule strings 0939 self.trigger=None # Function to use as trigger instead of pattern 0940 0941 if trigger is None and msgpart not in _trigger_msgparts: 0942 raise PologyError( 0943 _("@info", 0944 "Unknown message part '%(part)s' set for the rule's " 0945 "trigger pattern.", 0946 part=msgpart)) 0947 0948 # Flags for regex compilation. 0949 self.reflags=re.U|re.S 0950 if not self.casesens: 0951 self.reflags|=re.I 0952 0953 # Setup trigger. 0954 if not trigger: 0955 self.setPattern(pattern) 0956 else: 0957 self.setTrigger(trigger) 0958 0959 #Parse valid key=value arguments 0960 self.setValid(valid) 0961 0962 def setPattern(self, pattern): 0963 """Compile pattern 0964 @param pattern: pattern as an unicode string""" 0965 try: 0966 if self.rfilter: 0967 pattern=self.rfilter(pattern, "pattern") 0968 self.pattern=re.compile(pattern, self.reflags) 0969 except Exception as e: 0970 warning(_("@info", 0971 "Invalid pattern '%(pattern)s', disabling rule:\n" 0972 "%(msg)s", 0973 pattern=pattern, msg=e)) 0974 self.disabled=True 0975 self.rawPattern=pattern 0976 self.trigger=None # invalidate any trigger function 0977 if self.ident: 0978 self.displayName=_("@item:intext", 0979 "[id=%(rule)s]", 0980 rule=self.ident) 0981 else: 0982 self.displayName=_("@item:intext", 0983 "[pattern=%(pattern)s]", 0984 pattern=self.rawPattern) 0985 0986 def setTrigger(self, trigger): 0987 """ 0988 Use trigger function instead of pattern. 0989 0990 @param trigger: function to act as trigger 0991 @type trigger: (msg, cat, envs) -> {highlight<msgreport.report_msg_content>} 0992 """ 0993 self.trigger=trigger 0994 self.pattern=None # invalidate any pattern 0995 self.rawPattern="" 0996 if self.ident: 0997 self.displayName=_("@item:intext", 0998 "[id=%(rule)s]", 0999 rule=self.ident) 1000 else: 1001 self.displayName=_("@item:intext", 1002 "[function]") 1003 1004 1005 def setValid(self, valid): 1006 """Parse valid key=value arguments of valid list 1007 @param valid: valid line as an unicode string""" 1008 self.valid=[] 1009 for item in valid: 1010 try: 1011 entry=[] # Empty valid entry 1012 for (key, value) in item: 1013 key=key.strip() 1014 bkey = key 1015 if key.startswith("!"): 1016 bkey = key[1:] 1017 if bkey not in Rule._knownKeywords: 1018 warning(_("@info", 1019 "Invalid keyword '%(kw)s' in " 1020 "validity definition, skipped.", 1021 kw=key)) 1022 continue 1023 if self.rfilter: 1024 value=self.rfilter(value, "pattern") 1025 if bkey in Rule._regexKeywords: 1026 # Compile regexp 1027 value=re.compile(value, self.reflags) 1028 elif bkey in Rule._listKeywords: 1029 # List of comma-separated words 1030 value=[x.strip() for x in value.split(",")] 1031 elif bkey in Rule._twoRegexKeywords: 1032 # Split into the two regexes and compile them. 1033 frx, vrx=value[1:].split(value[:1]) 1034 value=(re.compile(frx, self.reflags), 1035 re.compile(vrx, self.reflags)) 1036 entry.append((key, value)) 1037 self.valid.append(entry) 1038 except Exception as e: 1039 warning(_("@info", 1040 "Invalid validity definition '%(dfn)s', skipped. " 1041 "The error was:\n%(msg)s", 1042 dfn=item, msg=e)) 1043 continue 1044 1045 #@timed_out(TIMEOUT) 1046 def process (self, msg, cat, envs=set(), nofilter=False): 1047 """ 1048 Apply rule to the message. 1049 1050 If the rule matches, I{highlight specification} of offending spans is 1051 returned (see L{report_msg_content<msgreport.report_msg_content>}); 1052 otherwise an empty list. 1053 1054 Rule will normally apply its own filters to the message before 1055 matching (on a local copy, the original message will not be affected). 1056 If the message is already appropriately filtered, this self-filtering 1057 can be prevented by setting C{nofilter} to {True}. 1058 1059 @param msg: message to which the texts belong 1060 @type msg: instance of L{Message_base} 1061 @param cat: catalog to which the message belongs 1062 @type cat: L{Catalog} 1063 @param envs: environments in which the rule is applied 1064 @type envs: set 1065 @param nofilter: avoid filtering the message if C{True} 1066 @type nofilter: bool 1067 1068 @return: highlight specification (may be empty list) 1069 """ 1070 1071 if self.pattern is None and self.trigger is None: 1072 warning(_("@info", 1073 "Rule trigger not defined, rule skipped.")) 1074 return [] 1075 1076 # If this rule belongs to a specific environment, 1077 # and it is not among operating environments, 1078 # cancel the rule immediately. 1079 if self.environ and self.environ not in envs: 1080 return [] 1081 1082 # Cancel immediately if the rule is disabled. 1083 if self.disabled: 1084 return [] 1085 1086 if self.stat: 1087 begin=time() 1088 1089 # Apply own filters to the message if not filtered already. 1090 if not nofilter: 1091 msg = self._filter_message(msg, cat, envs) 1092 1093 if self.pattern: 1094 failed_spans = self._processWithPattern(msg, cat, envs) 1095 else: 1096 failed_spans = self._processWithTrigger(msg, cat, envs) 1097 1098 # Update stats for matched rules. 1099 self.count += 1 1100 if self.stat: 1101 self.time += time() - begin 1102 1103 return failed_spans 1104 1105 1106 def _create_text_spec (self, msgpart, msg): 1107 1108 if 0: pass 1109 elif msgpart == "msgid": 1110 text_spec = [("msgid", 0, msg.msgid)] 1111 if msg.msgid_plural is not None: 1112 text_spec += [("msgid_plural", 0, msg.msgid_plural)] 1113 elif msgpart == "msgstr": 1114 text_spec = [("msgstr", i, msg.msgstr[i]) 1115 for i in range(len(msg.msgstr))] 1116 elif msgpart == "msgctxt": 1117 text_spec = [] 1118 if msg.msgctxt is not None: 1119 text_spec = [("msgctxt", 0, msg.msgctxt)] 1120 elif msgpart == "msgid_singular": 1121 text_spec = [("msgid", 0, msg.msgid)] 1122 elif msgpart == "msgid_plural": 1123 text_spec = [] 1124 if msg.msgid_plural is not None: 1125 text_spec += [("msgid_plural", 0, msg.msgid_plural)] 1126 elif msgpart.startswith("msgstr_"): 1127 item = int(msgpart.split("_")[1]) 1128 text_spec = [("msgstr", item, msg.msgstr[item])] 1129 else: 1130 raise PologyError( 1131 _("@info", 1132 "Unknown message part '%(part)s' referenced in the rule.", 1133 part=msgpart)) 1134 1135 return text_spec 1136 1137 1138 def _processWithPattern (self, msg, cat, envs): 1139 1140 text_spec = self._create_text_spec(self.msgpart, msg) 1141 1142 failed_spans = {} 1143 for part, item, text in text_spec: 1144 1145 # Get full data per match. 1146 pmatches = list(self.pattern.finditer(text)) 1147 if not pmatches: 1148 # Main pattern does not match anything, go to next text. 1149 continue 1150 1151 # Test all matched segments. 1152 for pmatch in pmatches: 1153 # First validity entry that matches excepts the current segment. 1154 cancel = False 1155 for entry in self.valid: 1156 if self._is_valid(pmatch.group(0), 1157 pmatch.start(), pmatch.end(), 1158 text, entry, msg, cat, envs): 1159 cancel = True 1160 break 1161 if not cancel: 1162 # Record the span of problematic segment. 1163 skey = (part, item) 1164 if skey not in failed_spans: 1165 failed_spans[skey] = (part, item, [], text) 1166 failed_spans[skey][2].append(pmatch.span()) 1167 1168 return list(failed_spans.values()) 1169 1170 1171 def _processWithTrigger (self, msg, cat, envs): 1172 1173 # Apply trigger. 1174 possibly_failed_spans = self.trigger(msg, cat) 1175 1176 # Try to clear spans with validity tests. 1177 failed_spans = {} 1178 for spanspec in possibly_failed_spans: 1179 part, item, spans = spanspec[:3] 1180 ftext = None 1181 if len(spanspec) > 3: 1182 ftext = spanspec[3] 1183 part_item = part 1184 if part == "msgstr": 1185 part_item = part + "_" + str(item) 1186 text_spec = self._create_text_spec(part_item, msg) 1187 if ftext is None: # the trigger didn't do any own filtering 1188 ftext = text_spec[0][2] # message field which contains the span 1189 for span in spans: 1190 mstart, mend = span[:2] # may contain 3rd element, error text 1191 pmatch = ftext[mstart:mend] 1192 cancel = False 1193 for entry in self.valid: 1194 if self._is_valid(pmatch, mstart, mend, 1195 ftext, entry, msg, cat, envs): 1196 cancel = True 1197 break 1198 if not cancel: 1199 # Record the span of problematic segment. 1200 skey = (part, item) 1201 if skey not in failed_spans: 1202 failed_spans[skey] = (part, item, [], ftext) 1203 failed_spans[skey][2].append(span) 1204 1205 return list(failed_spans.values()) 1206 1207 1208 def _filter_message (self, msg, cat, envs): 1209 1210 fmsg = msg 1211 if self.mfilter is not None: 1212 fmsg = MessageUnsafe(msg) 1213 self.mfilter(fmsg, cat, envs) 1214 1215 return fmsg 1216 1217 1218 def _is_valid (self, match, mstart, mend, text, ventry, msg, cat, envs): 1219 1220 # All keys within a validity entry must match for the 1221 # entry to match as whole. 1222 valid = True 1223 for key, value in ventry: 1224 bkey = key 1225 invert = False 1226 if key.startswith("!"): 1227 bkey = key[1:] 1228 invert = True 1229 1230 if bkey == "env": 1231 match = envs.intersection(value) 1232 if invert: match = not match 1233 if not match: 1234 valid = False 1235 break 1236 1237 elif bkey == "cat": 1238 match = cat.name in value 1239 if invert: match = not match 1240 if not match: 1241 valid = False 1242 break 1243 1244 elif bkey == "catrx": 1245 match = bool(value.search(cat.name)) 1246 if invert: match = not match 1247 if not match: 1248 valid = False 1249 break 1250 1251 elif bkey == "head": 1252 frx, vrx = value 1253 match = False 1254 for name, value in cat.header.field: 1255 match = frx.search(name) and vrx.search(value) 1256 if match: 1257 break 1258 if invert: match = not match 1259 if not match: 1260 valid = False 1261 break 1262 1263 elif bkey == "span": 1264 found = value.search(match) is not None 1265 if invert: found = not found 1266 if not found: 1267 valid = False 1268 break 1269 1270 elif bkey == "after": 1271 # Search up to the match to avoid need for lookaheads. 1272 afterMatches = value.finditer(text, 0, mstart) 1273 found = False 1274 for afterMatch in afterMatches: 1275 if afterMatch.end() == mstart: 1276 found = True 1277 break 1278 if invert: found = not found 1279 if not found: 1280 valid = False 1281 break 1282 1283 elif bkey == "before": 1284 # Search from the match to avoid need for lookbehinds. 1285 beforeMatches = value.finditer(text, mend) 1286 found = False 1287 for beforeMatch in beforeMatches: 1288 if beforeMatch.start() == mend: 1289 found = True 1290 break 1291 if invert: found = not found 1292 if not found: 1293 valid = False 1294 break 1295 1296 elif bkey == "ctx": 1297 match = False 1298 if msg.msgctxt: 1299 match = value.search(msg.msgctxt) 1300 if invert: match = not match 1301 if not match: 1302 valid = False 1303 break 1304 1305 elif bkey == "msgid": 1306 match = False 1307 for msgid in (msg.msgid, msg.msgid_plural): 1308 if msgid is not None: 1309 match = value.search(msgid) 1310 if match: 1311 break 1312 if invert: match = not match 1313 if not match: 1314 valid = False 1315 break 1316 1317 elif bkey == "msgstr": 1318 match = False 1319 for msgstr in msg.msgstr: 1320 match = value.search(msgstr) 1321 if match: 1322 break 1323 if invert: match = not match 1324 if not match: 1325 valid = False 1326 break 1327 1328 elif bkey == "srcref": 1329 match = False 1330 for file, lno in msg.source: 1331 if value.search(file): 1332 match = True 1333 break 1334 if invert: match = not match 1335 if not match: 1336 valid = False 1337 break 1338 1339 elif bkey == "comment": 1340 match = False 1341 all_cmnt = [] 1342 all_cmnt.extend(msg.manual_comment) 1343 all_cmnt.extend(msg.auto_comment) 1344 for cmnt in all_cmnt: 1345 if value.search(cmnt): 1346 match = True 1347 break 1348 if invert: match = not match 1349 if not match: 1350 valid = False 1351 break 1352 1353 return valid 1354 1355 1356 def _parseRuleLine (lines, lno): 1357 """ 1358 Split a rule line into fields as list of (name, value) pairs. 1359 1360 If a field name is followed by '=' or '=""', the field value will be 1361 an empty string. If there is no equal sign, the value will be C{None}. 1362 1363 If the line is the trigger pattern, the name of the first field 1364 is going to be the "*", and its value the keyword of the message part 1365 to be matched; the name of the second field is going to be 1366 the pattern itself, and its value the string of match modifiers. 1367 """ 1368 1369 # Compose line out or backslash continuations. 1370 line = lines[lno - 1] 1371 while line.endswith("\\\n"): 1372 line = line[:-2] 1373 if lno >= len(lines): 1374 break 1375 lno += 1 1376 line += lines[lno - 1] 1377 1378 llen = len(line) 1379 fields = [] 1380 p = 0 1381 in_modifiers = False 1382 1383 while p < llen: 1384 while line[p].isspace(): 1385 p += 1 1386 if p >= llen: 1387 break 1388 if p >= llen or line[p] == "#": 1389 break 1390 1391 if len(fields) == 0 and line[p] in ("[", "{"): 1392 # Shorthand trigger pattern. 1393 bropn = line[p] 1394 brcls, fname = {"{": ("}", "msgid"), 1395 "[": ("]", "msgstr")}[bropn] 1396 1397 # Collect the pattern. 1398 # Look for the balanced closing bracket. 1399 p1 = p + 1 1400 balance = 1 1401 while balance > 0: 1402 p += 1 1403 if p >= llen: 1404 break 1405 if line[p] == bropn: 1406 balance += 1 1407 elif line[p] == brcls: 1408 balance -= 1 1409 if balance > 0: 1410 raise _SyntaxError( 1411 _("@info", 1412 "Unbalanced '%(delim)s' in shorthand trigger pattern.", 1413 delim=bropn)) 1414 fields.append((_rule_start, fname)) 1415 fields.append((line[p1:p], "")) 1416 1417 p += 1 1418 in_modifiers = True 1419 1420 elif len(fields) == 0 and line[p] == _rule_start: 1421 # Verbose trigger. 1422 p += 1 1423 while p < llen and line[p].isspace(): 1424 p += 1 1425 if p >= llen: 1426 raise _SyntaxError( 1427 _("@info", 1428 "Missing '%(kw)s' keyword in the rule trigger.", 1429 kw="match")) 1430 1431 # Collect the match keyword. 1432 p1 = p 1433 while line[p].isalnum() or line[p] == "_": 1434 p += 1 1435 if p >= llen: 1436 raise _SyntaxError( 1437 _("@info", 1438 "Malformed rule trigger.")) 1439 tkeyw = line[p1:p] 1440 fields.append((_rule_start, tkeyw)) 1441 1442 if tkeyw in _trigger_msgparts: 1443 # Collect the pattern. 1444 while line[p].isspace(): 1445 p += 1 1446 if p >= llen: 1447 raise _SyntaxError( 1448 _("@info", 1449 "No pattern after the trigger keyword '%(kw)s'.", 1450 kw=tkeyw)) 1451 quote = line[p] 1452 p1 = p + 1 1453 p = _findEndQuote(line, p) 1454 fields.append((line[p1:p], "")) 1455 p += 1 # skip quote 1456 in_modifiers = True 1457 else: 1458 # Special trigger, go on reading fields. 1459 pass 1460 1461 elif in_modifiers: 1462 # Modifiers after the trigger pattern. 1463 p1 = p 1464 while not line[p].isspace(): 1465 p += 1 1466 if p >= llen: 1467 break 1468 pattern, pmods = fields[-1] 1469 fields[-1] = (pattern, pmods + line[p1:p]) 1470 1471 else: 1472 # Subdirective field. 1473 1474 # Collect field name. 1475 p1 = p 1476 while not line[p].isspace() and line[p] != "=": 1477 p += 1 1478 if p >= llen: 1479 break 1480 fname = line[p1:p] 1481 if not re.match(r"^!?[a-z][\w-]*$", fname): 1482 raise _SyntaxError( 1483 _("@info", 1484 "Invalid field name '%(field)s'.", 1485 field=fname)) 1486 1487 if p >= llen or line[p].isspace(): 1488 fields.append((fname, None)) 1489 else: 1490 # Collect field value. 1491 p += 1 # skip equal-character 1492 if p >= llen or line[p].isspace(): 1493 fields.append((fname, "")) 1494 else: 1495 quote = line[p] 1496 p1 = p + 1 1497 p = _findEndQuote(line, p) 1498 fvalue = line[p1:p] 1499 fields.append((fname, fvalue)) 1500 p += 1 # skip quote 1501 1502 return fields, lno 1503 1504 1505 def _findEndQuote (line, pos=0): 1506 """ 1507 Find end quote to the quote at given position in the line. 1508 1509 Character at the C{pos} position is taken as the quote character. 1510 Closing quote can be escaped with backslash inside the string, 1511 in which the backslash is removed in parsed string; 1512 backslash in any other position is considered ordinary. 1513 1514 @param line: the line to parse 1515 @type line: string 1516 @param pos: position of the opening quote 1517 @type pos: int 1518 1519 @return: position of the closing quote 1520 @rtype: int 1521 """ 1522 1523 quote = line[pos] 1524 epos = pos + 1 1525 1526 llen = len(line) 1527 string = "" 1528 while epos < llen: 1529 c = line[epos] 1530 if c == "\\": 1531 epos += 1 1532 c2 = line[epos] 1533 if c2 != quote: 1534 string += c 1535 string += c2 1536 elif c == quote: 1537 break 1538 else: 1539 string += c 1540 epos += 1 1541 1542 if epos == llen: 1543 raise _SyntaxError( 1544 _("@info", 1545 "Non-terminated quoted string '%(snippet)s'.", 1546 snippet=line[pos:])) 1547 1548 return epos 1549