pology/pology/rules.py

0001 # -*- coding: UTF-8 -*-
0002
0003 """
0004 Match messages by rules of arbitrary specificity.
0005
0006 A message-matching rule, represented by L{Rule} object, is a series of
0007 pattern matches to be applied to the message, leading to the decision
0008 of whether or not the rule as whole matches the message.
0009 Patterns can be of different kinds, act on different parts of the message,
0010 and be applied in a boolean-like combinations.
0011
0012 See C{doc/user/lingo.docbook#sec-lgrules} for detailed discussion of rules.
0013
0014 @author: Sébastien Renard <sebastien.renard@digitalfox.org>
0015 @author: Chusslove Illich (Часлав Илић) <caslav.ilic@gmx.net>
0016 @license: GPLv3
0017 """
0018
0019 from codecs import open
0020 from locale import getlocale
0021 from os.path import dirname, basename, isdir, join, isabs
0022 from os import listdir
0023 import re
0024 import sys
0025 from time import time
0026
0027 from pology import PologyError, datadir, _, n_
0028 from pology.message import MessageUnsafe
0029 from pology.config import strbool
0030 from pology.getfunc import get_hook_ireq, split_ireq
0031 from pology.report import report, warning, format_item_list
0032 from pology.tabulate import tabulate
0033 from pology.timeout import timed_out
0034
0035 TIMEOUT=8 # Time in sec after which a rule processing is timeout
0036
0037
0038 def printStat(rules):
0039     """Print rules match statistics
0040     @param rules: list of rule files
0041     """
0042     statRules=[r for r in rules if r.count!=0 and r.stat is True]
0043     if statRules:
0044         statRules.sort(key=lambda x: x.time)
0045         data=[]
0046         rown=[r.displayName for r in statRules]
0047         data.append([r.count for r in statRules])
0048         data.append([r.time/r.count*1000 for r in statRules])
0049         totTimeMsg=sum(data[-1])/1000
0050         data.append([r.time for r in statRules])
0051         totTime=sum(data[-1])
0052         data.append([r.time/totTime*100 for r in statRules])
0053         report(_("@label", "Rule application statistics:"))
0054         coln=[_("@title:column", "calls"),
0055               _("@title:column avg = average", "avg-time [ms]"),
0056               _("@title:column tot = total", "tot-time [s]"),
0057               _("@title:column", "time-share")]
0058         dfmt=[   "%d",           "%.3f",          "%.1f",     "%.2f%%"]
0059         report(tabulate(data, rown=rown, coln=coln, dfmt=dfmt, colorize=True))
0060         report(_("@info statistics",
0061                  "Total application time [s]: %(num).1f",
0062                  num=totTime))
0063         report(_("@info statistics",
0064                  "Average application time per message [ms]: %(num).1f",
0065                  num=totTimeMsg*1000))
0066
0067
0068 def loadRules(lang, envs=[], envOnly=False, ruleFiles=None, stat=False,
0069               printInfo=False):
0070     """Load rules for a given language
0071     @param lang: lang as a string in two caracter (i.e. fr). If none or empty, try to autodetect language
0072     @param envs: also load rules applicable in these environments
0073     @param envOnly: load only rules applicable in given environments
0074     @param ruleFiles: a list of rule files to load instead of internal
0075     @param stat: stat is a boolean to indicate if rule should gather count and time execution
0076     @param printInfo: whether to output information about loading of rules
0077     @return: list of rules objects or None if rules cannot be found (with complaints on stdout)
0078     """
0079     ruleDir=""             # Rules directory
0080     rules=[]               # List of rule objects
0081     langDir=join(datadir(), "lang") # Base of rule files per language
0082
0083     # Collect rule files.
0084     if ruleFiles is not None:
0085         if printInfo:
0086             report(_("@info:progress", "Using external rules."))
0087     else:
0088         ruleDir=join(langDir, lang, "rules")
0089         if not isdir(ruleDir):
0090             raise PologyError(
0091                 _("@info",
0092                   "There are no internal rules for language '%(langcode)s'.",
0093                   langcode=lang))
0094         if printInfo:
0095             report(_("@info:progress",
0096                      "Using internal rules for language '%(langcode)s'.",
0097                      langcode=lang))
0098         ruleFiles=[join(ruleDir, f) for f in listdir(ruleDir) if f.endswith(".rules")]
0099
0100     # Parse rules.
0101     seenMsgFilters = {}
0102     for ruleFile in ruleFiles:
0103         rules.extend(loadRulesFromFile(ruleFile, stat, set(envs), seenMsgFilters))
0104
0105     # Remove rules with specific but different to given environments,
0106     # or any rule not in given environments in environment-only mode.
0107     # FIXME: This should be moved to loadRulesFromFile.
0108     srules=[]
0109     for rule in rules:
0110         if envOnly and rule.environ not in envs:
0111             continue
0112         elif rule.environ and rule.environ not in envs:
0113             continue
0114         srules.append(rule)
0115     rules=srules
0116
0117     # When operating in specific environments, for rules with
0118     # equal identifiers eliminate all but the one in the last environment.
0119     if envs:
0120         envsByIdent={}
0121         for rule in rules:
0122             if rule.ident:
0123                 if rule.ident not in envsByIdent:
0124                     envsByIdent[rule.ident]=set()
0125                 envsByIdent[rule.ident].add(rule.environ)
0126         srules=[]
0127         for rule in rules:
0128             eliminate=False
0129             if rule.ident and len(envsByIdent[rule.ident])>1:
0130                 iEnv=((rule.environ is None and -1) or envs.index(rule.environ))
0131                 for env in envsByIdent[rule.ident]:
0132                     iEnvOther=((env is None and -1) or envs.index(env))
0133                     if iEnv<iEnvOther:
0134                         eliminate=True
0135                         break
0136             if not eliminate:
0137                 srules.append(rule)
0138         rules=srules
0139
0140     return rules
0141
0142
0143 _rule_start = "*"
0144
0145 class _IdentError (Exception): pass
0146 class _SyntaxError (Exception): pass
0147
0148
0149 def loadRulesFromFile(filePath, stat, envs=set(), seenMsgFilters={}):
0150     """Load rule file and return list of Rule objects
0151     @param filePath: full path to rule file
0152     @param stat: stat is a boolean to indicate if rule should gather count and time execution
0153     @param envs: environments in which the rules are to be applied
0154     @param seenMsgFilters: dictionary of previously encountered message
0155         filter functions, by their signatures; to avoid constructing
0156         same filters over different files
0157     @return: list of Rule object"""
0158
0159     rules=[]
0160     inRule=False #Flag that indicate we are currently parsing a rule bloc
0161     inGroup=False #Flag that indicate we are currently parsing a validGroup bloc
0162
0163     valid=[]
0164     pattern=""
0165     msgpart=""
0166     hint=""
0167     ident=None
0168     disabled=False
0169     manual=False
0170     casesens=True
0171     environ=None
0172     validGroup={}
0173     validGroupName=""
0174     identLines={}
0175     globalEnviron=None
0176     globalMsgFilters=[]
0177     globalRuleFilters=[]
0178     msgFilters=None
0179     ruleFilters=None
0180     seenRuleFilters={}
0181     triggerFunc=None
0182     lno=0
0183
0184     try:
0185         lines=open(filePath, "r", "UTF-8").readlines()
0186         lines.append("\n") # sentry line
0187         fileStack=[]
0188         while True:
0189             while lno >= len(lines):
0190                 if not fileStack:
0191                     lines = None
0192                     break
0193                 lines, filePath, lno = fileStack.pop()
0194             if lines is None:
0195                 break
0196             lno += 1
0197             fields, lno = _parseRuleLine(lines, lno)
0198
0199             # End of rule bloc
0200             # FIXME: Remove 'not fields' when global directives too
0201             # start with something. This will eliminate rule separation
0202             # by empty lines, and skipping comment-only lines.
0203             if lines[lno - 1].strip().startswith("#"):
0204                 continue
0205             if not fields or fields[0][0] in (_rule_start,):
0206                 if inRule:
0207                     inRule=False
0208
0209                     if msgFilters is None:
0210                         msgFilters = globalMsgFilters
0211                     if ruleFilters is None:
0212                         ruleFilters = globalRuleFilters
0213                     # Use previously assembled filter with the same signature,
0214                     # to be able to compare filter functions by "is".
0215                     msgFilterSig = _filterFinalSig(msgFilters)
0216                     msgFilterFunc = seenMsgFilters.get(msgFilterSig)
0217                     if msgFilterFunc is None:
0218                         msgFilterFunc = _msgFilterComposeFinal(msgFilters)
0219                         seenMsgFilters[msgFilterSig] = msgFilterFunc
0220                     ruleFilterSig = _filterFinalSig(ruleFilters)
0221                     ruleFilterFunc = seenRuleFilters.get(ruleFilterSig)
0222                     if ruleFilterFunc is None:
0223                         ruleFilterFunc = _ruleFilterComposeFinal(ruleFilters)
0224                         seenRuleFilters[ruleFilterSig] = ruleFilterFunc
0225
0226                     rules.append(Rule(pattern, msgpart,
0227                                       hint=hint, valid=valid,
0228                                       stat=stat, casesens=casesens,
0229                                       ident=ident,
0230                                       disabled=disabled, manual=manual,
0231                                       environ=(environ or globalEnviron),
0232                                       mfilter=msgFilterFunc,
0233                                       rfilter=ruleFilterFunc,
0234                                       trigger=triggerFunc))
0235                     pattern=""
0236                     msgpart=""
0237                     hint=""
0238                     ident=None
0239                     disabled=False
0240                     manual=False
0241                     casesens=True
0242                     environ=None
0243                     msgFilters=None
0244                     ruleFilters=None
0245                     triggerFunc=None
0246                 elif inGroup:
0247                     inGroup=False
0248                     validGroup[validGroupName]=valid
0249                     validGroupName=""
0250                 valid=[]
0251
0252             if not fields:
0253                 continue
0254
0255             # Begin of rule (pattern or special)
0256             if fields[0][0]==_rule_start:
0257                 inRule=True
0258                 keyword=fields[0][1]
0259                 if keyword in _trigger_msgparts:
0260                     msgpart=keyword
0261                     pattern=fields[1][0]
0262                     for mmod in fields[1][1]:
0263                         if mmod not in _trigger_matchmods:
0264                             raise _SyntaxError(
0265                                 _("@info",
0266                                   "Unknown match modifier '%(mod)s' "
0267                                   "in trigger pattern.",
0268                                   mod=mmod))
0269                     casesens=("i" not in fields[1][1])
0270                 elif keyword in _trigger_specials:
0271                     casesens, rest = _triggerParseGeneral(fields[1:])
0272                     if keyword == "hook":
0273                         triggerFunc = _triggerFromHook(rest)
0274                 else:
0275                     raise _SyntaxError(
0276                         _("@info",
0277                           "Unknown keyword '%(kw)s' in rule trigger.",
0278                           kw=keyword))
0279
0280             # valid line (for rule ou validGroup)
0281             elif fields[0][0]=="valid":
0282                 if not inRule and not inGroup:
0283                     raise _SyntaxError(
0284                         _("@info",
0285                           "Directive '%(dir)s' outside of rule or "
0286                           "validity group.",
0287                           dir="valid"))
0288                 valid.append(fields[1:])
0289
0290             # Rule hint
0291             elif fields[0][0]=="hint":
0292                 if not inRule:
0293                     raise _SyntaxError(
0294                         _("@info",
0295                           "Directive '%(dir)s' outside of rule.",
0296                           dir="hint"))
0297                 hint=fields[0][1]
0298
0299             # Rule identifier
0300             elif fields[0][0]=="id":
0301                 if not inRule:
0302                     raise _SyntaxError(
0303                         _("@info",
0304                           "Directive '%(dir)s' outside of rule.",
0305                           dir="id"))
0306                 ident=fields[0][1]
0307                 if ident in identLines:
0308                     (prevLine, prevEnviron)=identLines[ident]
0309                     if prevEnviron==globalEnviron:
0310                         raise _IdentError(ident, prevLine)
0311                 identLines[ident]=(lno, globalEnviron)
0312
0313             # Whether rule is disabled
0314             elif fields[0][0]=="disabled":
0315                 if not inRule:
0316                     raise _SyntaxError(
0317                         _("@info",
0318                           "Directive '%(dir)s' outside of rule.",
0319                           dir="disabled"))
0320                 disabled=True
0321
0322             # Whether rule is manually applied
0323             elif fields[0][0]=="manual":
0324                 if not inRule:
0325                     raise _SyntaxError(
0326                         _("@info",
0327                           "Directive '%(dir)s' outside of rule.",
0328                           dir="manual"))
0329                 manual=True
0330
0331             # Validgroup
0332             elif fields[0][0]=="validGroup":
0333                 if inGroup:
0334                     raise _SyntaxError(
0335                         _("@info",
0336                           "Directive '%(dir)s' inside validity group.",
0337                           dir="validGroup"))
0338                 if inRule:
0339                     # Use of validGroup directive inside a rule bloc
0340                     validGroupName=fields[1][0]
0341                     valid.extend(validGroup[validGroupName])
0342                 else:
0343                     # Begin of validGroup
0344                     inGroup=True
0345                     validGroupName=fields[1][0]
0346
0347             # Switch rule environment
0348             elif fields[0][0]=="environment":
0349                 if inGroup:
0350                     raise _SyntaxError(
0351                         _("@info",
0352                           "Directive '%(dir)s' inside validity group.",
0353                           dir="environment"))
0354                 envName=fields[1][0]
0355                 if inRule:
0356                     # Environment specification for current rule.
0357                     environ=envName
0358                 else:
0359                     # Environment switch for rules that follow.
0360                     globalEnviron=envName
0361
0362             # Add or remove filters
0363             elif (   fields[0][0].startswith("addFilter")
0364                   or fields[0][0] in ["removeFilter", "clearFilters"]):
0365                 # Select the proper filter lists on which to act.
0366                 if inRule:
0367                     if msgFilters is None: # local filters not created yet
0368                         msgFilters = globalMsgFilters[:] # shallow copy
0369                     if ruleFilters is None:
0370                         ruleFilters = globalRuleFilters[:]
0371                     currentMsgFilters = msgFilters
0372                     currentRuleFilters = ruleFilters
0373                     currentEnviron = environ or globalEnviron
0374                 else:
0375                     currentMsgFilters = globalMsgFilters
0376                     currentRuleFilters = globalRuleFilters
0377                     currentEnviron = globalEnviron
0378
0379                 if fields[0][0].startswith("addFilter"):
0380                     filterType = fields[0][0][len("addFilter"):]
0381                     handles, parts, fenvs, rest = _filterParseGeneral(fields[1:])
0382                     if fenvs is None and currentEnviron:
0383                         fenvs = [currentEnviron]
0384                     if filterType == "Regex":
0385                         func, sig = _filterCreateRegex(rest)
0386                     elif filterType == "Hook":
0387                         func, sig = _filterCreateHook(rest)
0388                     else:
0389                         raise _SyntaxError(
0390                             _("@info",
0391                               "Unknown filter directive '%(dir)s'.",
0392                               dir=fields[0][0]))
0393                     msgParts = set(parts).difference(_filterKnownRuleParts)
0394                     if msgParts:
0395                         totFunc, totSig = _msgFilterSetOnParts(msgParts, func, sig)
0396                         currentMsgFilters.append([handles, fenvs, totFunc, totSig])
0397                     ruleParts = set(parts).difference(_filterKnownMsgParts)
0398                     if ruleParts and (not envs or not fenvs or envs.intersection(fenvs)):
0399                         totFunc, totSig = _ruleFilterSetOnParts(ruleParts, func, sig)
0400                         currentRuleFilters.append([handles, fenvs, totFunc, totSig])
0401
0402                 elif fields[0][0] == ("removeFilter"):
0403                     _filterRemove(fields[1:],
0404                                   (currentMsgFilters, currentRuleFilters), envs)
0405
0406                 else: # remove all filters
0407                     if len(fields) != 1:
0408                         raise _SyntaxError(
0409                             _("@info",
0410                               "Expected no fields in "
0411                               "all-filter removal directive."))
0412                     # Must not loose reference to the selected lists.
0413                     while currentMsgFilters:
0414                         currentMsgFilters.pop()
0415                     while currentRuleFilters:
0416                         currentRuleFilters.pop()
0417
0418             # Include another file
0419             elif fields[0][0] == "include":
0420                 if inRule or inGroup:
0421                     raise _SyntaxError(
0422                         _("@info",
0423                           "Directive '%(dir)s' inside a rule or group.",
0424                           dir="include"))
0425                 fileStack.append((lines, filePath, lno))
0426                 lines, filePath, lno = _includeFile(fields[1:], filePath)
0427
0428             else:
0429                 raise _SyntaxError(
0430                     _("@info",
0431                       "Unknown directive '%(dir)s'.",
0432                       dir=fields[0][0]))
0433
0434     except _IdentError as e:
0435         raise PologyError(
0436             _("@info",
0437               "Identifier '%(id)s' at %(file)s:%(line)d "
0438               "previously encountered at %(pos)s.",
0439               id=e.args[0], file=filePath, line=lno, pos=e.args[1]))
0440     except IOError as e:
0441         raise PologyError(
0442             _("@info",
0443               "Cannot read rule file '%(file)s'. The error was: %(msg)s",
0444               file=filePath, msg=e.args[0]))
0445     except _SyntaxError as e:
0446         raise PologyError(
0447             _("@info",
0448               "Syntax error at %(file)s:%(line)d:\n%(msg)s",
0449               file=filePath, line=lno, msg=e.args[0]))
0450
0451     return rules
0452
0453
0454 def _checkFields (directive, fields, knownFields, mandatoryFields=set(),
0455                   unique=True):
0456
0457     fieldDict = dict(fields)
0458     if unique and len(fieldDict) != len(fields):
0459         raise _SyntaxError(
0460             _("@info",
0461               "Duplicate fields in '%(dir)s' directive.",
0462               dir=directive))
0463
0464     if not isinstance(knownFields, set):
0465         knownFields = set(knownFields)
0466     unknownFields = set(fieldDict).difference(knownFields)
0467     if unknownFields:
0468         raise _SyntaxError(
0469             _("@info",
0470               "Unknown fields in '%(dir)s' directive: %(fieldlist)s.",
0471               dir=directive, fieldlist=format_item_list(unknownFields)))
0472
0473     for name in mandatoryFields:
0474         if name not in fieldDict:
0475             raise _SyntaxError(
0476                 _("@info",
0477                   "Mandatory field '%(field)s' missing in '%(dir)s' directive.",
0478                   field=name, dir=directive))
0479
0480
0481 def _includeFile (fields, includingFilePath):
0482
0483     _checkFields("include", fields, ["file"], ["file"])
0484     fieldDict = dict(fields)
0485
0486     relativeFilePath = fieldDict["file"]
0487     if isabs(relativeFilePath):
0488         filePath = relativeFilePath
0489     else:
0490         filePath = join(dirname(includingFilePath), relativeFilePath)
0491
0492     if filePath.endswith(".rules"):
0493         warning(_("@info",
0494                   "Including one rule file into another, "
0495                   "'%(file1)s' from '%(file2)s'.",
0496                   file1=filePath, file2=includingFilePath))
0497
0498     lines=open(filePath, "r", "UTF-8").readlines()
0499     lines.append("\n") # sentry line
0500
0501     return lines, filePath, 0
0502
0503
0504 def _filterRemove (fields, filterLists, envs):
0505
0506     _checkFields("removeFilter", fields, ["handle", "env"], ["handle"])
0507     fieldDict = dict(fields)
0508
0509     handleStr = fieldDict["handle"]
0510
0511     fenvStr = fieldDict.get("env")
0512     if fenvStr is not None:
0513         fenvs = [x.strip() for x in fenvStr.split(",")]
0514         if not envs or not envs.intersection(fenvs):
0515             # We are operating in no environment, or no operating environment
0516             # is listed among the selected; skip removal.
0517             return
0518
0519     handles = set([x.strip() for x in handleStr.split(",")])
0520     seenHandles = set()
0521     for flist in filterLists:
0522         k = 0
0523         while k < len(flist):
0524             commonHandles = flist[k][0].intersection(handles)
0525             if commonHandles:
0526                 flist.pop(k)
0527                 seenHandles.update(commonHandles)
0528             else:
0529                 k += 1
0530     unseenHandles = handles.difference(seenHandles)
0531     if unseenHandles:
0532         raise PologyError(
0533             _("@info",
0534               "No filters with these handles to remove: %(handlelist)s.",
0535               handlelist=format_item_list(unseenHandles)))
0536
0537
0538 _filterKnownMsgParts = set([
0539     "msg", "msgid", "msgstr", "pmsgid", "pmsgstr",
0540 ])
0541 _filterKnownRuleParts = set([
0542     "pattern",
0543 ])
0544 _filterKnownParts = set(  list(_filterKnownMsgParts)
0545                         + list(_filterKnownRuleParts))
0546
0547 def _filterParseGeneral (fields):
0548
0549     handles = set()
0550     parts = []
0551     envs = None
0552
0553     rest = []
0554     for field in fields:
0555         name, value = field
0556         if name == "handle":
0557             handles = set([x.strip() for x in value.split(",")])
0558         elif name == "on":
0559             parts = [x.strip() for x in value.split(",")]
0560             unknownParts = set(parts).difference(_filterKnownParts)
0561             if unknownParts:
0562                 raise _SyntaxError(
0563                     _("@info",
0564                       "Unknown message parts for the filter to act on: "
0565                       "%(partlist)s.",
0566                       partlist=format_item_list(unknownParts)))
0567         elif name == "env":
0568             envs = [x.strip() for x in value.split(",")]
0569         else:
0570             rest.append(field)
0571
0572     if not parts:
0573         raise _SyntaxError(
0574             _("@info",
0575               "No message parts specified for the filter to act on."))
0576
0577     return handles, parts, envs, rest
0578
0579
0580 def _msgFilterSetOnParts (parts, func, sig):
0581
0582     chain = []
0583     parts = list(parts)
0584     parts.sort()
0585     for part in parts:
0586         if part == "msg":
0587             chain.append(_filterOnMsg(func))
0588         elif part == "msgstr":
0589             chain.append(_filterOnMsgstr(func))
0590         elif part == "msgid":
0591             chain.append(_filterOnMsgid(func))
0592         elif part == "pmsgstr":
0593             chain.append(_filterOnMsgstrPure(func))
0594         elif part == "pmsgid":
0595             chain.append(_filterOnMsgidPure(func))
0596
0597     def composition (msg, cat):
0598
0599         for func in chain:
0600             func(msg, cat)
0601
0602     totalSig = sig + "\x04" + ",".join(parts)
0603
0604     return composition, totalSig
0605
0606
0607 def _filterFinalSig (filterList):
0608
0609     sigs = [x[3] for x in filterList]
0610     finalSig = "\x05".join(sigs)
0611
0612     return finalSig
0613
0614
0615 def _msgFilterComposeFinal (filterList):
0616
0617     if not filterList:
0618         return None
0619
0620     fenvs_funcs = [(x[1], x[2]) for x in filterList]
0621
0622     def composition (msg, cat, envs):
0623
0624         for fenvs, func in fenvs_funcs:
0625             # Apply filter if environment-agnostic or in an operating environment.
0626             if fenvs is None or envs.intersection(fenvs):
0627                 func(msg, cat)
0628
0629     return composition
0630
0631
0632 def _filterOnMsg (func):
0633
0634     def aggregate (msg, cat):
0635
0636         func(msg, cat)
0637
0638     return aggregate
0639
0640
0641 def _filterOnMsgstr (func):
0642
0643     def aggregate (msg, cat):
0644
0645         for i in range(len(msg.msgstr)):
0646             tmp = func(msg.msgstr[i], msg, cat)
0647             if tmp is not None: msg.msgstr[i] = tmp
0648
0649     return aggregate
0650
0651
0652 def _filterOnMsgid (func):
0653
0654     def aggregate (msg, cat):
0655
0656         tmp = func(msg.msgid, msg, cat)
0657         if tmp is not None: msg.msgid = tmp
0658         if msg.msgid_plural is not None:
0659             tmp = func(msg.msgid_plural, msg, cat)
0660             if tmp is not None: msg.msgid_plural = tmp
0661
0662     return aggregate
0663
0664
0665 def _filterOnMsgstrPure (func):
0666
0667     def aggregate (msg, cat):
0668
0669         for i in range(len(msg.msgstr)):
0670             tmp = func(msg.msgstr[i])
0671             if tmp is not None: msg.msgstr[i] = tmp
0672
0673     return aggregate
0674
0675
0676 def _filterOnMsgidPure (func):
0677
0678     def aggregate (msg, cat):
0679
0680         tmp = func(msg.msgid)
0681         if tmp is not None: msg.msgid = tmp
0682         if msg.msgid_plural is not None:
0683             tmp = func(msg.msgid_plural)
0684             if tmp is not None: msg.msgid_plural = tmp
0685
0686     return aggregate
0687
0688
0689 def _ruleFilterSetOnParts (parts, func, sig):
0690
0691     chain = []
0692     parts = list(parts)
0693     parts.sort()
0694     for part in parts:
0695         if part == "pattern":
0696             chain.append((_filterOnPattern(func), part))
0697
0698     def composition (value, part):
0699
0700         if part not in _filterKnownRuleParts:
0701             raise PologyError(
0702                 _("@info",
0703                   "Unknown rule part '%(part)s' for the filter to act on.",
0704                   part=part))
0705
0706         for func, fpart in chain:
0707             if fpart == part:
0708                 value = func(value)
0709
0710         return value
0711
0712     totalSig = sig + "\x04" + ",".join(parts)
0713
0714     return composition, totalSig
0715
0716
0717 def _ruleFilterComposeFinal (filterList):
0718
0719     if not filterList:
0720         return None
0721
0722     funcs = [x[2] for x in filterList]
0723
0724     def composition (value, part):
0725
0726         for func in funcs:
0727             value = func(value, part)
0728
0729         return value
0730
0731     return composition
0732
0733
0734 def _filterOnPattern (func):
0735
0736     def aggregate (pattern):
0737
0738         tmp = func(pattern)
0739         if tmp is not None: pattern = tmp
0740
0741         return pattern
0742
0743     return aggregate
0744
0745
0746 _filterRegexKnownFields = set(["match", "repl", "casesens"])
0747
0748 def _filterCreateRegex (fields):
0749
0750     _checkFields("addFilterRegex", fields, _filterRegexKnownFields, ["match"])
0751     fieldDict = dict(fields)
0752
0753     caseSens = _fancyBool(fieldDict.get("casesens", "0"))
0754     flags = re.U | re.S
0755     if not caseSens:
0756         flags |= re.I
0757
0758     matchStr = fieldDict["match"]
0759     matchRx = re.compile(matchStr, flags)
0760
0761     replStr = fieldDict.get("repl", "")
0762
0763     def func (text):
0764         return matchRx.sub(replStr, text)
0765
0766     sig = "\x04".join([matchStr, replStr, str(caseSens)])
0767
0768     return func, sig
0769
0770
0771 def _filterCreateHook (fields):
0772
0773     _checkFields("addFilterHook", fields, ["name"], ["name"])
0774     fieldDict = dict(fields)
0775
0776     hookSpec = fieldDict["name"]
0777     hook = get_hook_ireq(hookSpec, abort=False)
0778
0779     sigSegs = []
0780     for el in split_ireq(hookSpec):
0781         if el is not None:
0782             sigSegs.append(el)
0783         else:
0784             sigSegs.append("\x00")
0785     sig = "\x04".join(sigSegs)
0786
0787     return hook, sig
0788
0789
0790 def _triggerParseGeneral (fields):
0791
0792     casesens = True
0793
0794     rest = []
0795     for field in fields:
0796         name, value = field
0797         if name == "casesens":
0798             casesens = _fancyBool(value)
0799         else:
0800             rest.append(field)
0801
0802     return casesens, rest
0803
0804
0805 _triggerKnownMsgParts = set([
0806     "msg", "msgid", "msgstr", "pmsgid", "pmsgstr",
0807 ])
0808
0809 def _triggerFromHook (fields):
0810
0811     _checkFields("hook", fields, ["name", "on"], ["name", "on"])
0812     fieldDict = dict(fields)
0813
0814     hook = get_hook_ireq(fieldDict["name"], abort=False)
0815
0816     msgpart = fieldDict["on"].strip()
0817     if msgpart not in _triggerKnownMsgParts:
0818         raise PologyError(
0819             _("@info",
0820               "Unknown message part '%(part)s' for trigger to act on.",
0821               part=msgpart))
0822
0823     if msgpart == "msg":
0824         def trigger (msg, cat):
0825             return hook(msg, cat)
0826     elif msgpart == "msgid":
0827         def trigger (msg, cat):
0828             hl = []
0829             hl.append(("msgid", 0, hook(msg.msgid, msg, cat)))
0830             if msg.msgid_plural is not None:
0831                 hl.append(("msgid_plural", 0, hook(msg.msgid_plural, msg, cat)))
0832             return hl
0833     elif msgpart == "msgstr":
0834         def trigger (msg, cat):
0835             hl = []
0836             for i in range(len(msg.msgstr)):
0837                 hl.append(("msgstr", i, hook(msg.msgstr[i], msg, cat)))
0838             return hl
0839     elif msgpart == "pmsgid":
0840         def trigger (msg, cat):
0841             hl = []
0842             hl.append(("msgid", 0, hook(msg.msgid)))
0843             if msg.msgid_plural is not None:
0844                 hl.append(("msgid_plural", 0, hook(msg.msgid_plural)))
0845             return hl
0846     elif msgpart == "pmsgstr":
0847         def trigger (msg, cat):
0848             hl = []
0849             for i in range(len(msg.msgstr)):
0850                 hl.append(("msgstr", i, hook(msg.msgstr[i])))
0851             return hl
0852
0853     return trigger
0854
0855
0856 def _fancyBool (string):
0857
0858     value = strbool(string)
0859     if value is None:
0860         raise PologyError(
0861             _("@info",
0862               "Cannot convert '%(val)s' to a boolean value.",
0863               val=string))
0864     return value
0865
0866
0867 _trigger_msgparts = set([
0868     # For matching in all messages.
0869     "msgctxt", "msgid", "msgstr",
0870
0871     # For matching in plural messages part by part.
0872     "msgid_singular", "msgid_plural",
0873     "msgstr_0", "msgstr_1", "msgstr_2", "msgstr_3", "msgstr_4", "msgstr_5",
0874     "msgstr_6", "msgstr_7", "msgstr_8", "msgstr_9", # ought to be enough
0875 ])
0876 _trigger_specials = set([
0877     "hook",
0878 ])
0879
0880 _trigger_matchmods = [
0881     "i",
0882 ]
0883
0884 class Rule(object):
0885     """Represent a single rule"""
0886
0887     _knownKeywords = set(("env", "cat", "catrx", "span", "after", "before", "ctx", "msgid", "msgstr", "head", "srcref", "comment"))
0888     _regexKeywords = set(("catrx", "span", "after", "before", "ctx", "msgid", "msgstr", "srcref", "comment"))
0889     _twoRegexKeywords = set(("head",))
0890     _listKeywords = set(("env", "cat"))
0891
0892     def __init__(self, pattern, msgpart, hint=None, valid=[],
0893                        stat=False, casesens=True, ident=None,
0894                        disabled=False, manual=False,
0895                        environ=None, mfilter=None, rfilter=None,
0896                        trigger=None):
0897         """Create a rule
0898         @param pattern: valid regexp pattern that trigger the rule
0899         @type pattern: unicode
0900         @param msgpart: part of the message to be matched by C{pattern}
0901         @type msgpart: string
0902         @param hint: hint given to user when rule match
0903         @type hint: unicode
0904         @param valid: list of cases that should make or not make rule matching
0905         @type valid: list of unicode key=value
0906         @param casesens: whether regex matching will be case-sensitive
0907         @type casesens: bool
0908         @param ident: rule identifier
0909         @type ident: unicode or C{None}
0910         @param disabled: whether rule is disabled
0911         @type disabled: bool
0912         @param manual: whether rule is manually applied
0913         @type manual: bool
0914         @param environ: environment in which the rule applies
0915         @type environ: string or C{None}
0916         @param mfilter: filter to apply to message before checking
0917         @type mfilter: (msg, cat, envs) -> <anything>
0918         @param rfilter: filter to apply to rule strings (e.g. on regex patterns)
0919         @type rfilter: (string) -> string
0920         @param trigger: function to act as trigger instead of C{pattern} applied to C{msgpart}
0921         @type trigger: (msg, cat, envs) -> L{highlight<msgreport.report_msg_content>}
0922         """
0923
0924         # Define instance variable
0925         self.pattern=None # Compiled regexp into re.pattern object
0926         self.msgpart=msgpart # The part of the message to match
0927         self.valid=None   # Parsed valid definition
0928         self.hint=hint    # Hint message return to user
0929         self.ident=ident    # Rule identifier
0930         self.disabled=disabled # Whether rule is disabled
0931         self.manual=manual # Whether rule is manually applied
0932         self.count=0      # Number of time rule have been triggered
0933         self.time=0       # Total time of rule process calls
0934         self.stat=stat    # Wheter to gather stat or not. Default is false (10% perf hit due to time() call)
0935         self.casesens=casesens # Whether regex matches are case-sensitive
0936         self.environ=environ # Environment in which to apply the rule
0937         self.mfilter=mfilter # Function to filter the message before checking
0938         self.rfilter=rfilter # Function to filter the rule strings
0939         self.trigger=None # Function to use as trigger instead of pattern
0940
0941         if trigger is None and msgpart not in _trigger_msgparts:
0942             raise PologyError(
0943                 _("@info",
0944                   "Unknown message part '%(part)s' set for the rule's "
0945                   "trigger pattern.",
0946                   part=msgpart))
0947
0948         # Flags for regex compilation.
0949         self.reflags=re.U|re.S
0950         if not self.casesens:
0951             self.reflags|=re.I
0952
0953         # Setup trigger.
0954         if not trigger:
0955             self.setPattern(pattern)
0956         else:
0957             self.setTrigger(trigger)
0958
0959         #Parse valid key=value arguments
0960         self.setValid(valid)
0961
0962     def setPattern(self, pattern):
0963         """Compile pattern
0964         @param pattern: pattern as an unicode string"""
0965         try:
0966             if self.rfilter:
0967                 pattern=self.rfilter(pattern, "pattern")
0968             self.pattern=re.compile(pattern, self.reflags)
0969         except Exception as e:
0970             warning(_("@info",
0971                       "Invalid pattern '%(pattern)s', disabling rule:\n"
0972                       "%(msg)s",
0973                       pattern=pattern, msg=e))
0974             self.disabled=True
0975         self.rawPattern=pattern
0976         self.trigger=None # invalidate any trigger function
0977         if self.ident:
0978             self.displayName=_("@item:intext",
0979                                "[id=%(rule)s]",
0980                                rule=self.ident)
0981         else:
0982             self.displayName=_("@item:intext",
0983                                "[pattern=%(pattern)s]",
0984                                pattern=self.rawPattern)
0985
0986     def setTrigger(self, trigger):
0987         """
0988         Use trigger function instead of pattern.
0989
0990         @param trigger: function to act as trigger
0991         @type trigger: (msg, cat, envs) -> {highlight<msgreport.report_msg_content>}
0992         """
0993         self.trigger=trigger
0994         self.pattern=None # invalidate any pattern
0995         self.rawPattern=""
0996         if self.ident:
0997             self.displayName=_("@item:intext",
0998                                "[id=%(rule)s]",
0999                                rule=self.ident)
1000         else:
1001             self.displayName=_("@item:intext",
1002                                "[function]")
1003
1004
1005     def setValid(self, valid):
1006         """Parse valid key=value arguments of valid list
1007         @param valid: valid line as an unicode string"""
1008         self.valid=[]
1009         for item in valid:
1010             try:
1011                 entry=[] # Empty valid entry
1012                 for (key, value) in item:
1013                     key=key.strip()
1014                     bkey = key
1015                     if key.startswith("!"):
1016                         bkey = key[1:]
1017                     if bkey not in Rule._knownKeywords:
1018                         warning(_("@info",
1019                                   "Invalid keyword '%(kw)s' in "
1020                                   "validity definition, skipped.",
1021                                   kw=key))
1022                         continue
1023                     if self.rfilter:
1024                         value=self.rfilter(value, "pattern")
1025                     if bkey in Rule._regexKeywords:
1026                         # Compile regexp
1027                         value=re.compile(value, self.reflags)
1028                     elif bkey in Rule._listKeywords:
1029                         # List of comma-separated words
1030                         value=[x.strip() for x in value.split(",")]
1031                     elif bkey in Rule._twoRegexKeywords:
1032                         # Split into the two regexes and compile them.
1033                         frx, vrx=value[1:].split(value[:1])
1034                         value=(re.compile(frx, self.reflags),
1035                                re.compile(vrx, self.reflags))
1036                     entry.append((key, value))
1037                 self.valid.append(entry)
1038             except Exception as e:
1039                 warning(_("@info",
1040                           "Invalid validity definition '%(dfn)s', skipped. "
1041                           "The error was:\n%(msg)s",
1042                           dfn=item, msg=e))
1043                 continue
1044
1045     #@timed_out(TIMEOUT)
1046     def process (self, msg, cat, envs=set(), nofilter=False):
1047         """
1048         Apply rule to the message.
1049
1050         If the rule matches, I{highlight specification} of offending spans is
1051         returned (see L{report_msg_content<msgreport.report_msg_content>});
1052         otherwise an empty list.
1053
1054         Rule will normally apply its own filters to the message before
1055         matching (on a local copy, the original message will not be affected).
1056         If the message is already appropriately filtered, this self-filtering
1057         can be prevented by setting C{nofilter} to {True}.
1058
1059         @param msg: message to which the texts belong
1060         @type msg: instance of L{Message_base}
1061         @param cat: catalog to which the message belongs
1062         @type cat: L{Catalog}
1063         @param envs: environments in which the rule is applied
1064         @type envs: set
1065         @param nofilter: avoid filtering the message if C{True}
1066         @type nofilter: bool
1067
1068         @return: highlight specification (may be empty list)
1069         """
1070
1071         if self.pattern is None and self.trigger is None:
1072             warning(_("@info",
1073                       "Rule trigger not defined, rule skipped."))
1074             return []
1075
1076         # If this rule belongs to a specific environment,
1077         # and it is not among operating environments,
1078         # cancel the rule immediately.
1079         if self.environ and self.environ not in envs:
1080             return []
1081
1082         # Cancel immediately if the rule is disabled.
1083         if self.disabled:
1084             return []
1085
1086         if self.stat:
1087             begin=time()
1088
1089         # Apply own filters to the message if not filtered already.
1090         if not nofilter:
1091             msg = self._filter_message(msg, cat, envs)
1092
1093         if self.pattern:
1094             failed_spans = self._processWithPattern(msg, cat, envs)
1095         else:
1096             failed_spans = self._processWithTrigger(msg, cat, envs)
1097
1098         # Update stats for matched rules.
1099         self.count += 1
1100         if self.stat:
1101             self.time += time() - begin
1102
1103         return failed_spans
1104
1105
1106     def _create_text_spec (self, msgpart, msg):
1107
1108         if 0: pass
1109         elif msgpart == "msgid":
1110             text_spec = [("msgid", 0, msg.msgid)]
1111             if msg.msgid_plural is not None:
1112                 text_spec += [("msgid_plural", 0, msg.msgid_plural)]
1113         elif msgpart == "msgstr":
1114             text_spec = [("msgstr", i, msg.msgstr[i])
1115                          for i in range(len(msg.msgstr))]
1116         elif msgpart == "msgctxt":
1117             text_spec = []
1118             if msg.msgctxt is not None:
1119                 text_spec = [("msgctxt", 0, msg.msgctxt)]
1120         elif msgpart == "msgid_singular":
1121             text_spec = [("msgid", 0, msg.msgid)]
1122         elif msgpart == "msgid_plural":
1123             text_spec = []
1124             if msg.msgid_plural is not None:
1125                 text_spec += [("msgid_plural", 0, msg.msgid_plural)]
1126         elif msgpart.startswith("msgstr_"):
1127             item = int(msgpart.split("_")[1])
1128             text_spec = [("msgstr", item, msg.msgstr[item])]
1129         else:
1130             raise PologyError(
1131                 _("@info",
1132                   "Unknown message part '%(part)s' referenced in the rule.",
1133                   part=msgpart))
1134
1135         return text_spec
1136
1137
1138     def _processWithPattern (self, msg, cat, envs):
1139
1140         text_spec = self._create_text_spec(self.msgpart, msg)
1141
1142         failed_spans = {}
1143         for part, item, text in text_spec:
1144
1145             # Get full data per match.
1146             pmatches = list(self.pattern.finditer(text))
1147             if not pmatches:
1148                 # Main pattern does not match anything, go to next text.
1149                 continue
1150
1151             # Test all matched segments.
1152             for pmatch in pmatches:
1153                 # First validity entry that matches excepts the current segment.
1154                 cancel = False
1155                 for entry in self.valid:
1156                     if self._is_valid(pmatch.group(0),
1157                                       pmatch.start(), pmatch.end(),
1158                                       text, entry, msg, cat, envs):
1159                         cancel = True
1160                         break
1161                 if not cancel:
1162                     # Record the span of problematic segment.
1163                     skey = (part, item)
1164                     if skey not in failed_spans:
1165                         failed_spans[skey] = (part, item, [], text)
1166                     failed_spans[skey][2].append(pmatch.span())
1167
1168         return list(failed_spans.values())
1169
1170
1171     def _processWithTrigger (self, msg, cat, envs):
1172
1173         # Apply trigger.
1174         possibly_failed_spans = self.trigger(msg, cat)
1175
1176         # Try to clear spans with validity tests.
1177         failed_spans = {}
1178         for spanspec in possibly_failed_spans:
1179             part, item, spans = spanspec[:3]
1180             ftext = None
1181             if len(spanspec) > 3:
1182                 ftext = spanspec[3]
1183             part_item = part
1184             if part == "msgstr":
1185                 part_item = part + "_" + str(item)
1186             text_spec = self._create_text_spec(part_item, msg)
1187             if ftext is None: # the trigger didn't do any own filtering
1188                 ftext = text_spec[0][2] # message field which contains the span
1189             for span in spans:
1190                 mstart, mend = span[:2] # may contain 3rd element, error text
1191                 pmatch = ftext[mstart:mend]
1192                 cancel = False
1193                 for entry in self.valid:
1194                     if self._is_valid(pmatch, mstart, mend,
1195                                       ftext, entry, msg, cat, envs):
1196                         cancel = True
1197                         break
1198                 if not cancel:
1199                     # Record the span of problematic segment.
1200                     skey = (part, item)
1201                     if skey not in failed_spans:
1202                         failed_spans[skey] = (part, item, [], ftext)
1203                     failed_spans[skey][2].append(span)
1204
1205         return list(failed_spans.values())
1206
1207
1208     def _filter_message (self, msg, cat, envs):
1209
1210         fmsg = msg
1211         if self.mfilter is not None:
1212             fmsg = MessageUnsafe(msg)
1213             self.mfilter(fmsg, cat, envs)
1214
1215         return fmsg
1216
1217
1218     def _is_valid (self, match, mstart, mend, text, ventry, msg, cat, envs):
1219
1220         # All keys within a validity entry must match for the
1221         # entry to match as whole.
1222         valid = True
1223         for key, value in ventry:
1224             bkey = key
1225             invert = False
1226             if key.startswith("!"):
1227                 bkey = key[1:]
1228                 invert = True
1229
1230             if bkey == "env":
1231                 match = envs.intersection(value)
1232                 if invert: match = not match
1233                 if not match:
1234                     valid = False
1235                     break
1236
1237             elif bkey == "cat":
1238                 match = cat.name in value
1239                 if invert: match = not match
1240                 if not match:
1241                     valid = False
1242                     break
1243
1244             elif bkey == "catrx":
1245                 match = bool(value.search(cat.name))
1246                 if invert: match = not match
1247                 if not match:
1248                     valid = False
1249                     break
1250
1251             elif bkey == "head":
1252                 frx, vrx = value
1253                 match = False
1254                 for name, value in cat.header.field:
1255                     match = frx.search(name) and vrx.search(value)
1256                     if match:
1257                         break
1258                 if invert: match = not match
1259                 if not match:
1260                     valid = False
1261                     break
1262
1263             elif bkey == "span":
1264                 found = value.search(match) is not None
1265                 if invert: found = not found
1266                 if not found:
1267                     valid = False
1268                     break
1269
1270             elif bkey == "after":
1271                 # Search up to the match to avoid need for lookaheads.
1272                 afterMatches = value.finditer(text, 0, mstart)
1273                 found = False
1274                 for afterMatch in afterMatches:
1275                     if afterMatch.end() == mstart:
1276                         found = True
1277                         break
1278                 if invert: found = not found
1279                 if not found:
1280                     valid = False
1281                     break
1282
1283             elif bkey == "before":
1284                 # Search from the match to avoid need for lookbehinds.
1285                 beforeMatches = value.finditer(text, mend)
1286                 found = False
1287                 for beforeMatch in beforeMatches:
1288                     if beforeMatch.start() == mend:
1289                         found = True
1290                         break
1291                 if invert: found = not found
1292                 if not found:
1293                     valid = False
1294                     break
1295
1296             elif bkey == "ctx":
1297                 match = False
1298                 if msg.msgctxt:
1299                     match = value.search(msg.msgctxt)
1300                 if invert: match = not match
1301                 if not match:
1302                     valid = False
1303                     break
1304
1305             elif bkey == "msgid":
1306                 match = False
1307                 for msgid in (msg.msgid, msg.msgid_plural):
1308                     if msgid is not None:
1309                         match = value.search(msgid)
1310                     if match:
1311                         break
1312                 if invert: match = not match
1313                 if not match:
1314                     valid = False
1315                     break
1316
1317             elif bkey == "msgstr":
1318                 match = False
1319                 for msgstr in msg.msgstr:
1320                     match = value.search(msgstr)
1321                     if match:
1322                         break
1323                 if invert: match = not match
1324                 if not match:
1325                     valid = False
1326                     break
1327
1328             elif bkey == "srcref":
1329                 match = False
1330                 for file, lno in msg.source:
1331                     if value.search(file):
1332                         match = True
1333                         break
1334                 if invert: match = not match
1335                 if not match:
1336                     valid = False
1337                     break
1338
1339             elif bkey == "comment":
1340                 match = False
1341                 all_cmnt = []
1342                 all_cmnt.extend(msg.manual_comment)
1343                 all_cmnt.extend(msg.auto_comment)
1344                 for cmnt in all_cmnt:
1345                     if value.search(cmnt):
1346                         match = True
1347                         break
1348                 if invert: match = not match
1349                 if not match:
1350                     valid = False
1351                     break
1352
1353         return valid
1354
1355
1356 def _parseRuleLine (lines, lno):
1357     """
1358     Split a rule line into fields as list of (name, value) pairs.
1359
1360     If a field name is followed by '=' or '=""', the field value will be
1361     an empty string. If there is no equal sign, the value will be C{None}.
1362
1363     If the line is the trigger pattern, the name of the first field
1364     is going to be the "*", and its value the keyword of the message part
1365     to be matched; the name of the second field is going to be
1366     the pattern itself, and its value the string of match modifiers.
1367     """
1368
1369     # Compose line out or backslash continuations.
1370     line = lines[lno - 1]
1371     while line.endswith("\\\n"):
1372         line = line[:-2]
1373         if lno >= len(lines):
1374             break
1375         lno += 1
1376         line += lines[lno - 1]
1377
1378     llen = len(line)
1379     fields = []
1380     p = 0
1381     in_modifiers = False
1382
1383     while p < llen:
1384         while line[p].isspace():
1385             p += 1
1386             if p >= llen:
1387                 break
1388         if p >= llen or line[p] == "#":
1389             break
1390
1391         if len(fields) == 0 and line[p] in ("[", "{"):
1392             # Shorthand trigger pattern.
1393             bropn = line[p]
1394             brcls, fname = {"{": ("}", "msgid"),
1395                             "[": ("]", "msgstr")}[bropn]
1396
1397             # Collect the pattern.
1398             # Look for the balanced closing bracket.
1399             p1 = p + 1
1400             balance = 1
1401             while balance > 0:
1402                 p += 1
1403                 if p >= llen:
1404                     break
1405                 if line[p] == bropn:
1406                     balance += 1
1407                 elif line[p] == brcls:
1408                     balance -= 1
1409             if balance > 0:
1410                 raise _SyntaxError(
1411                     _("@info",
1412                       "Unbalanced '%(delim)s' in shorthand trigger pattern.",
1413                       delim=bropn))
1414             fields.append((_rule_start, fname))
1415             fields.append((line[p1:p], ""))
1416
1417             p += 1
1418             in_modifiers = True
1419
1420         elif len(fields) == 0 and line[p] == _rule_start:
1421             # Verbose trigger.
1422             p += 1
1423             while p < llen and line[p].isspace():
1424                 p += 1
1425             if p >= llen:
1426                 raise _SyntaxError(
1427                     _("@info",
1428                       "Missing '%(kw)s' keyword in the rule trigger.",
1429                       kw="match"))
1430
1431             # Collect the match keyword.
1432             p1 = p
1433             while line[p].isalnum() or line[p] == "_":
1434                 p += 1
1435                 if p >= llen:
1436                     raise _SyntaxError(
1437                         _("@info",
1438                           "Malformed rule trigger."))
1439             tkeyw = line[p1:p]
1440             fields.append((_rule_start, tkeyw))
1441
1442             if tkeyw in _trigger_msgparts:
1443                 # Collect the pattern.
1444                 while line[p].isspace():
1445                     p += 1
1446                     if p >= llen:
1447                         raise _SyntaxError(
1448                             _("@info",
1449                               "No pattern after the trigger keyword '%(kw)s'.",
1450                               kw=tkeyw))
1451                 quote = line[p]
1452                 p1 = p + 1
1453                 p = _findEndQuote(line, p)
1454                 fields.append((line[p1:p], ""))
1455                 p += 1 # skip quote
1456                 in_modifiers = True
1457             else:
1458                 # Special trigger, go on reading fields.
1459                 pass
1460
1461         elif in_modifiers:
1462             # Modifiers after the trigger pattern.
1463             p1 = p
1464             while not line[p].isspace():
1465                 p += 1
1466                 if p >= llen:
1467                     break
1468             pattern, pmods = fields[-1]
1469             fields[-1] = (pattern, pmods + line[p1:p])
1470
1471         else:
1472             # Subdirective field.
1473
1474             # Collect field name.
1475             p1 = p
1476             while not line[p].isspace() and line[p] != "=":
1477                 p += 1
1478                 if p >= llen:
1479                     break
1480             fname = line[p1:p]
1481             if not re.match(r"^!?[a-z][\w-]*$", fname):
1482                 raise _SyntaxError(
1483                     _("@info",
1484                       "Invalid field name '%(field)s'.",
1485                       field=fname))
1486
1487             if p >= llen or line[p].isspace():
1488                 fields.append((fname, None))
1489             else:
1490                 # Collect field value.
1491                 p += 1 # skip equal-character
1492                 if p >= llen or line[p].isspace():
1493                     fields.append((fname, ""))
1494                 else:
1495                     quote = line[p]
1496                     p1 = p + 1
1497                     p = _findEndQuote(line, p)
1498                     fvalue = line[p1:p]
1499                     fields.append((fname, fvalue))
1500                     p += 1 # skip quote
1501
1502     return fields, lno
1503
1504
1505 def _findEndQuote (line, pos=0):
1506     """
1507     Find end quote to the quote at given position in the line.
1508
1509     Character at the C{pos} position is taken as the quote character.
1510     Closing quote can be escaped with backslash inside the string,
1511     in which the backslash is removed in parsed string;
1512     backslash in any other position is considered ordinary.
1513
1514     @param line: the line to parse
1515     @type line: string
1516     @param pos: position of the opening quote
1517     @type pos: int
1518
1519     @return: position of the closing quote
1520     @rtype: int
1521     """
1522
1523     quote = line[pos]
1524     epos = pos + 1
1525
1526     llen = len(line)
1527     string = ""
1528     while epos < llen:
1529         c = line[epos]
1530         if c == "\\":
1531             epos += 1
1532             c2 = line[epos]
1533             if c2 != quote:
1534                 string += c
1535             string += c2
1536         elif c == quote:
1537             break
1538         else:
1539             string += c
1540         epos += 1
1541
1542     if epos == llen:
1543         raise _SyntaxError(
1544             _("@info",
1545               "Non-terminated quoted string '%(snippet)s'.",
1546               snippet=line[pos:]))
1547
1548     return epos
1549