File indexing completed on 2024-04-21 16:29:16

0001 # -*- coding: UTF-8 -*-
0002 
0003 """
0004 Matchers and matcher helpers for various objects.
0005 
0006 @author: Chusslove Illich (Часлав Илић) <caslav.ilic@gmx.net>
0007 @license: GPLv3
0008 """
0009 
0010 import locale
0011 import re
0012 
0013 from pology import _, n_
0014 from pology.comments import parse_summit_branches
0015 from pology.fsops import str_to_unicode
0016 from pology.message import MessageUnsafe
0017 from pology.remove import remove_accel_msg
0018 from pology.report import error
0019 
0020 
0021 _all_ops = set()
0022 _unary_ops = set(["not"])
0023 _all_ops.update(_unary_ops)
0024 _binary_ops = set(["and", "or"])
0025 _all_ops.update(_binary_ops)
0026 
0027 class ExprError (Exception):
0028     """
0029     Exception for errors in matching expressions.
0030     """
0031 
0032     def __init__ (self, expr=None, msg=None, start=None, end=None):
0033         """
0034         Constructor.
0035 
0036         All the parameters are made available as instance variables.
0037 
0038         @param expr: the complete expression that caused the problem
0039         @type expr: string or None
0040         @param msg: the description of the problem
0041         @type msg: string or None
0042         @param start: start position of the problem into the expression string
0043         @type start: int or None
0044         @param end: end position of the problem
0045         @type end: int or None
0046         """
0047 
0048         self.expr = expr
0049         self.msg = msg
0050         self.start = start
0051         self.end = end
0052 
0053 
0054     def __unicode__ (self):
0055 
0056         if self.expr is not None and self.start is not None:
0057             start = self.start
0058             if self.end is not None:
0059                 end = self.end
0060             else:
0061                 end = self.start + 10
0062             subexpr = self.expr[start:end]
0063             if start > 0:
0064                 subexpr = "..." + subexpr
0065             if end < len(self.expr):
0066                 subexpr = subexpr + "..."
0067         else:
0068             subexpr = None
0069 
0070         if self.msg is not None and subexpr is not None:
0071             repstr = _("@info",
0072                        "Invalid expression at %(col)d [%(snippet)s]: "
0073                        "%(reason)s.",
0074                        col=self.start, snippet=subexpr, reason=self.msg)
0075         elif self.msg is not None:
0076             repstr = _("@info",
0077                        "Invalid expression: %(reason)s.",
0078                        reason=self.msg)
0079         elif subexpr is not None:
0080             repstr = _("@info",
0081                        "Invalid expression at %(col)d [%(snippet)s].",
0082                        col=self.start, snippet=subexpr)
0083         else:
0084             repstr = _("@info", "Invalid expression.")
0085 
0086         return str(repstr)
0087 
0088 
0089     def __str__ (self):
0090 
0091         return self.__unicode__().encode(locale.getpreferredencoding())
0092 
0093 
0094 def make_filtered_msg (msg, cat, accels=None, filters=[]):
0095     """
0096     TODO: Write documentation.
0097     """
0098 
0099     # Must not modify contents of real message.
0100     msgf = MessageUnsafe(msg)
0101 
0102     # - remove accelerators
0103     if accels is not None:
0104         old_accels = cat.accelerator()
0105         cat.set_accelerator(accels)
0106     remove_accel_msg(msgf, cat)
0107     if accels is not None:
0108         cat.set_accelerator(old_accels)
0109     # - apply msgstr filters
0110     for filtr in filters:
0111         for i in range(len(msgf.msgstr)):
0112             msgf.msgstr[i] = filtr(msgf.msgstr[i])
0113 
0114     return msgf
0115 
0116 
0117 def make_msg_matcher (exprstr, mopts=None, abort=False):
0118     """
0119     Build expression matcher for messages.
0120 
0121     For expression syntax, check C{find-messages} sieve documentation
0122     for C{fexpr} parameter.
0123     TODO: Put this instruction here.
0124 
0125     The C{mopts} parameter, if given, defines global matching options.
0126     It can be either a dictionary or an object with data attributes,
0127     and can contain the following keys/attributes (in parenthesis:
0128     type and default value in case the key is not present):
0129 
0130       - C{case} (C{bool}, C{False}): C{True} for case-sensitive matching
0131 
0132     The built matcher function takes up to four parameters, in order:
0133 
0134       - C{msgf}: filtered message (to really match against)
0135       - C{msg}: raw message (to properly report matched spans)
0136       - C{cat}: catalog in which the message resides
0137       - C{hl}: L{highlight specification<msgreport.report_msg_content>}
0138         (to be filled with matched spans, can be omitted from the call)
0139 
0140     Matcher function returns C{True} if the message is matched,
0141     C{False} otherwise.
0142 
0143     In case an error in expression is encountered while building the matcher,
0144     either L{ExprError} exception may be thrown or execution aborted,
0145     depending on the parameter C{abort}.
0146 
0147     @param exprstr: expression string
0148     @type exprstr: string
0149     @param mopts: global matching options
0150     @type mopts: dict or attribute object
0151     @param abort: on errors in expression, abort execution if C{True},
0152         raise L{ExprError} if C{False}
0153     @type abort: bool
0154 
0155     @return: matcher function
0156     @rtype: (msgf, msg, cat, hl=[])->bool
0157     """
0158 
0159     mopts = _prep_attrobj(mopts, dict(
0160         case=False,
0161     ))
0162 
0163     try:
0164         expr, p = _build_expr_r(exprstr, 0, len(exprstr), mopts)
0165         if p < len(exprstr):
0166             raise ExprError(exprstr, _("@item:intext",
0167                                        "premature end of expression"))
0168     except ExprError as e:
0169         if abort:
0170             error(str_to_unicode(str(e)))
0171         else:
0172             raise
0173     return expr
0174 
0175 
0176 def make_msg_fmatcher (exprstr, mopts=None,
0177                        accels=None, filters=[], abort=False):
0178     """
0179     Build expression matcher for messages, with filtering.
0180 
0181     Like L{make_msg_matcher}, except that matchers built by this function
0182     do their own filtering, and so omit the first argument.
0183 
0184     For semantics of C{accels} and C{filters}, see this module documentation
0185     on C{accel} and C{filter} sieve parameters.
0186 
0187     @param exprstr: expression string
0188     @type exprstr: string
0189     @param mopts: global matching options
0190     @type mopts: attribute object
0191     @param accels: possible accelerator markers
0192     @type accels: sequence of strings or C{None}
0193     @param filters: filters to apply to text fields [F1A hooks]
0194     @type filters: (text)->text
0195     @param abort: on errors, abort execution if C{True},
0196         raise exception if C{False}
0197     @type abort: bool
0198 
0199     @return: matcher function
0200     @rtype: (msg, cat, hl=[])->bool
0201     """
0202 
0203     raw_matcher = make_msg_matcher(exprstr, mopts=mopts, abort=abort)
0204 
0205     def matcher (msg, cat, hl=[]):
0206         msgf = make_filtered_msg(msg, cat, accels, filters)
0207         return raw_matcher(msgf, msg, cat, hl)
0208 
0209     return matcher
0210 
0211 
0212 def _prep_attrobj (aobj, dctdef=None):
0213 
0214     if aobj is None or isinstance(aobj, dict):
0215         dct = aobj or {}
0216         class _Data: pass
0217         aobj = _Data()
0218         for key, value in list(dct.items()):
0219             setattr(aobj, key, value)
0220 
0221     for key, val in list((dctdef or {}).items()):
0222         if not hasattr(aobj, key):
0223             setattr(aobj, key, val)
0224 
0225     return aobj
0226 
0227 
0228 def _build_expr_r (exprstr, start, end, params):
0229 
0230     p = start
0231     tstack = []
0232     can_unary = True
0233     can_binary = False
0234     can_operand = True
0235     while p < end:
0236         while p < end and exprstr[p].isspace() and exprstr[p] != ")":
0237             p += 1
0238         if p == end or exprstr[p] == ")":
0239             break
0240 
0241         # Parse current subexpression, matcher, or operator.
0242         if exprstr[p] == "(":
0243             if not can_operand:
0244                 raise ExprError(exprstr, _("@item:intext",
0245                                            "expected operator"), p)
0246             expr, p = _build_expr_r(exprstr, p + 1, end, params)
0247             if p == end or exprstr[p] != ")":
0248                 raise ExprError(exprstr, _("@item:intext",
0249                                            "no closing parenthesis"), p)
0250             tstack.append(expr)
0251             can_operand = False
0252             can_unary = False
0253             can_binary = True
0254             p += 1
0255         elif exprstr[p].isalpha():
0256             pp = p
0257             while p < end and exprstr[p].isalnum():
0258                 p += 1
0259             tok = exprstr[pp:p].lower()
0260             if tok in _all_ops:
0261                 if tok in _unary_ops and not can_unary:
0262                     raise ExprError(exprstr, _("@item:intext",
0263                                               "unexpected unary operator"), pp)
0264                 if tok in _binary_ops and not can_binary:
0265                     raise ExprError(exprstr,
0266                                     _("@item:intext",
0267                                       "unexpected binary operator"), pp)
0268                 can_operand = True
0269                 can_unary = True
0270                 can_binary = False
0271                 tstack.append(tok)
0272             else:
0273                 if not can_operand:
0274                     raise ExprError(exprstr, _("@item:intext",
0275                                                "expected an operator"), pp)
0276                 expr, p = _build_expr_matcher(tok, exprstr, p, end, params)
0277                 tstack.append(expr)
0278                 can_operand = False
0279                 can_unary = False
0280                 can_binary = True
0281         else:
0282             raise ExprError(exprstr,
0283                             _("@item:intext",
0284                               "expected token starting with a letter"), p + 1)
0285 
0286         # Update expression as possible.
0287         updated = True
0288         while updated:
0289             updated = False
0290             if (    len(tstack) >= 2
0291                 and tstack[-2] in _unary_ops
0292                 and tstack[-1] not in _all_ops
0293             ):
0294                 def closure (): # for closure over cexpr*
0295                     cexpr1 = tstack.pop()
0296                     op = tstack.pop()
0297                     if op == "not":
0298                         cexpr = lambda *a: not cexpr1(*a)
0299                     else: # cannot happen
0300                         raise ExprError(exprstr,
0301                                         _("@item:intext",
0302                                           "unknown unary operator '%(op)s'",
0303                                           op=op))
0304                     return cexpr
0305                 tstack.append(closure())
0306                 updated = True
0307             if (    len(tstack) >= 3
0308                 and tstack[-3] not in _all_ops
0309                 and tstack[-2] in _binary_ops
0310                 and tstack[-1] not in _all_ops
0311             ):
0312                 def closure (): # for closure over cexpr*
0313                     cexpr2 = tstack.pop()
0314                     op = tstack.pop()
0315                     cexpr1 = tstack.pop()
0316                     if op == "and":
0317                         cexpr = lambda *a: cexpr1(*a) and cexpr2(*a)
0318                     elif op == "or":
0319                         cexpr = lambda *a: cexpr1(*a) or cexpr2(*a)
0320                     else: # cannot happen
0321                         raise ExprError(exprstr,
0322                                         _("@item:intext",
0323                                           "unknown binary operator '%(op)s'",
0324                                           op=op))
0325                     return cexpr
0326                 tstack.append(closure())
0327                 updated = True
0328 
0329     if len(tstack) >= 2:
0330         raise ExprError(exprstr, _("@item:intext",
0331                                    "premature end of expression"), end)
0332     if len(tstack) == 0:
0333         raise ExprError(exprstr, _("@item:intext",
0334                                    "expected subexpression"), start)
0335 
0336     return tstack[0], p
0337 
0338 
0339 # Matchers taking a value.
0340 _op_matchers = set(["msgctxt", "msgid", "msgstr", "comment", "flag", "branch"])
0341 # Matchers not taking a value.
0342 _nop_matchers = set(["transl", "obsol", "active", "plural"])
0343 
0344 # Matchers which produce a regular expression out of their value.
0345 _rx_matchers = set(["msgctxt", "msgid", "msgstr", "comment", "flag"])
0346 
0347 # All matchers together.
0348 _all_matchers = set()
0349 _all_matchers.update(_op_matchers)
0350 _all_matchers.update(_nop_matchers)
0351 
0352 def _build_expr_matcher (mname, exprstr, start, end, params):
0353 
0354     if mname not in _all_matchers:
0355         raise ExprError(exprstr, _("@item:intext",
0356                                    "unknown matcher '%(match)s'",
0357                                    match=mname),
0358                         start - len(mname))
0359 
0360     # Get matcher value, if any.
0361     mval = None
0362     p = start
0363     if mname in _op_matchers:
0364         c = exprstr[p:p + 1]
0365         if p == end or c.isspace() or c.isalnum() or c in ("(", ")"):
0366             raise ExprError(exprstr, _("@item:intext",
0367                                        "expected parameter delimiter"), p)
0368         delim = exprstr[p]
0369         pp = p + 1
0370         p = exprstr.find(delim, p + 1, end)
0371         if p < 0:
0372             raise ExprError(exprstr, _("@item:intext",
0373                                        "expected closing delimiter"), end - 1)
0374         mval = exprstr[pp:p]
0375     # Get match modifiers, if any.
0376     mmods = []
0377     c = exprstr[p:p + 1]
0378     if p < end and not c.isspace() and not c.isalnum() and c not in ("(", ")"):
0379         p += 1
0380         pp = p
0381         while p < end and exprstr[p].isalnum():
0382             p += 1
0383         mmods = list(exprstr[pp:p])
0384 
0385     #print("{%s}{%s}{%s}" % (mname, mval, mmods))
0386     return make_matcher(mname, mval, mmods, params), p
0387 
0388 
0389 _matcher_mods = {
0390     "msgctxt": ["c", "i"],
0391     "msgid": ["c", "i"],
0392     "msgstr": ["c", "i"],
0393     "comment": ["c", "i"],
0394 }
0395 
0396 def make_matcher (name, value, mods, params, neg=False):
0397     """
0398     TODO: Write documentation.
0399     """
0400 
0401     known_mods = _matcher_mods.get(name, [])
0402     bad_mods = set(mods).difference(known_mods)
0403     if bad_mods:
0404         raise ExprError(None,
0405                         _("@item:intext",
0406                           "unknown modifiers %(modlist)s "
0407                           "to matcher '%(match)s'",
0408                           modlist=format_item_list(bad_mods), match=name))
0409 
0410     if name in _rx_matchers:
0411         rxflags = re.U
0412         if "i" in mods or (not params.case and "c" not in mods):
0413             rxflags |= re.I
0414         try:
0415             regex = re.compile(value, rxflags)
0416         except:
0417             raise ExprError(None, _("@item:intext",
0418                                     "invalid regular expression '%(regex)s'",
0419                                     regex=value))
0420 
0421     if 0: pass
0422 
0423     elif name == "msgctxt":
0424         def matcher (msgf, msg, cat, hl=[]):
0425             texts = []
0426             if msgf.msgctxt is not None:
0427                 texts += [(msgf.msgctxt, "msgctxt", 0)]
0428             return _rx_in_any_text(regex, texts, hl)
0429 
0430     elif name == "msgid":
0431         def matcher (msgf, msg, cat, hl=[]):
0432             texts = [(msgf.msgid, "msgid", 0)]
0433             if msgf.msgid_plural is not None:
0434                 texts += [(msgf.msgid_plural, "msgid_plural", 0)]
0435             return _rx_in_any_text(regex, texts, hl)
0436 
0437     elif name == "msgstr":
0438         def matcher (msgf, msg, cat, hl=[]):
0439             texts = [(msgf.msgstr[i], "msgstr", i)
0440                      for i in range(len(msgf.msgstr))]
0441             return _rx_in_any_text(regex, texts, hl)
0442 
0443     elif name == "comment":
0444         def matcher (msgf, msg, cat, hl=[]):
0445             texts = []
0446             texts.extend([(msgf.manual_comment[i], "manual_comment", i)
0447                           for i in range(len(msgf.manual_comment))])
0448             texts.extend([(msgf.auto_comment[i], "auto_comment", i)
0449                           for i in range(len(msgf.auto_comment))])
0450             texts.extend([(msgf.source[i][0], "source", i)
0451                           for i in range(len(msgf.source))])
0452             return _rx_in_any_text(regex, texts, hl)
0453 
0454     elif name == "transl":
0455         def matcher (msgf, msg, cat, hl=[]):
0456             if value is None or value:
0457                 return msg.translated
0458             else:
0459                 return not msg.translated
0460 
0461     elif name == "obsol":
0462         def matcher (msgf, msg, cat, hl=[]):
0463             if value is None or value:
0464                 return msg.obsolete
0465             else:
0466                 return not msg.obsolete
0467 
0468     elif name == "active":
0469         def matcher (msgf, msg, cat, hl=[]):
0470             if value is None or value:
0471                 return msg.translated and not msg.obsolete
0472             else:
0473                 return not msg.translated or msg.obsolete
0474 
0475     elif name == "plural":
0476         def matcher (msgf, msg, cat, hl=[]):
0477             if value is None or value:
0478                 return msg.msgid_plural is not None
0479             else:
0480                 return msg.msgid_plural is None
0481 
0482     elif name == "maxchar":
0483         def matcher (msgf, msg, cat, hl=[]):
0484             otexts = [msgf.msgid]
0485             if msgf.msgid_plural is not None:
0486                 otexts.append(msgf.msgid_plural)
0487             ttexts = msgf.msgstr
0488             onchar = sum([len(x) for x in otexts]) // len(otexts)
0489             tnchar = sum([len(x) for x in ttexts]) // len(ttexts)
0490             return onchar <= value and tnchar <= value
0491 
0492     elif name == "lspan":
0493         try:
0494             start, end = value.split(":", 1)
0495             start = int(start) if start else 0
0496             end = int(end) if end else None
0497         except:
0498             raise ExprError(value, _("@item:intext", "invalid line span"), 0)
0499         def matcher (msgf, msg, cat, hl=[]):
0500             cend = end
0501             if cend is None:
0502                 cend = cat[-1].refline + 1
0503             return msg.refline >= start and msg.refline < cend
0504 
0505     elif name == "espan":
0506         try:
0507             start, end = value.split(":", 1)
0508             start = int(start) if start else 0
0509             end = int(end) if end else None
0510         except:
0511             raise ExprError(value, _("@item:intext", "invalid entry span"), 0)
0512         def matcher (msgf, msg, cat, hl=[]):
0513             cend = end
0514             if cend is None:
0515                 cend = cat[-1].refentry + 1
0516             return msg.refentry >= start and msg.refentry < cend
0517 
0518     elif name == "branch":
0519         def matcher (msgf, msg, cat, hl=[]):
0520             return value in parse_summit_branches(msg)
0521 
0522     elif name == "flag":
0523         def matcher (msgf, msg, cat, hl=[]):
0524             #FIXME: How to highlight flags? (then use _rx_in_any_text)
0525             for flag in msgf.flag:
0526                 if regex.search(flag):
0527                     return True
0528             return False
0529 
0530     else:
0531         raise ExprError(name, _("@item:intext", "unknown matcher"), 0)
0532 
0533     if neg:
0534         return lambda *a: not matcher(*a)
0535     else:
0536         return matcher
0537 
0538 
0539 def _rx_in_any_text (regex, texts, hl):
0540 
0541     match = False
0542     hl_dct = {}
0543     for text, hl_name, hl_item in texts:
0544         # Go through all matches, to highlight them all.
0545         for m in regex.finditer(text):
0546             hl_key = (hl_name, hl_item)
0547             if hl_key not in hl_dct:
0548                 hl_dct[hl_key] = ([], text)
0549             hl_dct[hl_key][0].append(m.span())
0550             match = True
0551 
0552     hl.extend([x + y for x, y in list(hl_dct.items())])
0553 
0554     return match
0555