File indexing completed on 2024-04-21 05:44:48
0001 # -*- coding: UTF-8 -*- 0002 0003 """ 0004 Matchers and matcher helpers for various objects. 0005 0006 @author: Chusslove Illich (Часлав Илић) <caslav.ilic@gmx.net> 0007 @license: GPLv3 0008 """ 0009 0010 import locale 0011 import re 0012 0013 from pology import _, n_ 0014 from pology.comments import parse_summit_branches 0015 from pology.fsops import str_to_unicode 0016 from pology.message import MessageUnsafe 0017 from pology.remove import remove_accel_msg 0018 from pology.report import error 0019 0020 0021 _all_ops = set() 0022 _unary_ops = set(["not"]) 0023 _all_ops.update(_unary_ops) 0024 _binary_ops = set(["and", "or"]) 0025 _all_ops.update(_binary_ops) 0026 0027 class ExprError (Exception): 0028 """ 0029 Exception for errors in matching expressions. 0030 """ 0031 0032 def __init__ (self, expr=None, msg=None, start=None, end=None): 0033 """ 0034 Constructor. 0035 0036 All the parameters are made available as instance variables. 0037 0038 @param expr: the complete expression that caused the problem 0039 @type expr: string or None 0040 @param msg: the description of the problem 0041 @type msg: string or None 0042 @param start: start position of the problem into the expression string 0043 @type start: int or None 0044 @param end: end position of the problem 0045 @type end: int or None 0046 """ 0047 0048 self.expr = expr 0049 self.msg = msg 0050 self.start = start 0051 self.end = end 0052 0053 0054 def __unicode__ (self): 0055 0056 if self.expr is not None and self.start is not None: 0057 start = self.start 0058 if self.end is not None: 0059 end = self.end 0060 else: 0061 end = self.start + 10 0062 subexpr = self.expr[start:end] 0063 if start > 0: 0064 subexpr = "..." + subexpr 0065 if end < len(self.expr): 0066 subexpr = subexpr + "..." 0067 else: 0068 subexpr = None 0069 0070 if self.msg is not None and subexpr is not None: 0071 repstr = _("@info", 0072 "Invalid expression at %(col)d [%(snippet)s]: " 0073 "%(reason)s.", 0074 col=self.start, snippet=subexpr, reason=self.msg) 0075 elif self.msg is not None: 0076 repstr = _("@info", 0077 "Invalid expression: %(reason)s.", 0078 reason=self.msg) 0079 elif subexpr is not None: 0080 repstr = _("@info", 0081 "Invalid expression at %(col)d [%(snippet)s].", 0082 col=self.start, snippet=subexpr) 0083 else: 0084 repstr = _("@info", "Invalid expression.") 0085 0086 return str(repstr) 0087 0088 0089 def __str__ (self): 0090 0091 return self.__unicode__().encode(locale.getpreferredencoding()) 0092 0093 0094 def make_filtered_msg (msg, cat, accels=None, filters=[]): 0095 """ 0096 TODO: Write documentation. 0097 """ 0098 0099 # Must not modify contents of real message. 0100 msgf = MessageUnsafe(msg) 0101 0102 # - remove accelerators 0103 if accels is not None: 0104 old_accels = cat.accelerator() 0105 cat.set_accelerator(accels) 0106 remove_accel_msg(msgf, cat) 0107 if accels is not None: 0108 cat.set_accelerator(old_accels) 0109 # - apply msgstr filters 0110 for filtr in filters: 0111 for i in range(len(msgf.msgstr)): 0112 msgf.msgstr[i] = filtr(msgf.msgstr[i]) 0113 0114 return msgf 0115 0116 0117 def make_msg_matcher (exprstr, mopts=None, abort=False): 0118 """ 0119 Build expression matcher for messages. 0120 0121 For expression syntax, check C{find-messages} sieve documentation 0122 for C{fexpr} parameter. 0123 TODO: Put this instruction here. 0124 0125 The C{mopts} parameter, if given, defines global matching options. 0126 It can be either a dictionary or an object with data attributes, 0127 and can contain the following keys/attributes (in parenthesis: 0128 type and default value in case the key is not present): 0129 0130 - C{case} (C{bool}, C{False}): C{True} for case-sensitive matching 0131 0132 The built matcher function takes up to four parameters, in order: 0133 0134 - C{msgf}: filtered message (to really match against) 0135 - C{msg}: raw message (to properly report matched spans) 0136 - C{cat}: catalog in which the message resides 0137 - C{hl}: L{highlight specification<msgreport.report_msg_content>} 0138 (to be filled with matched spans, can be omitted from the call) 0139 0140 Matcher function returns C{True} if the message is matched, 0141 C{False} otherwise. 0142 0143 In case an error in expression is encountered while building the matcher, 0144 either L{ExprError} exception may be thrown or execution aborted, 0145 depending on the parameter C{abort}. 0146 0147 @param exprstr: expression string 0148 @type exprstr: string 0149 @param mopts: global matching options 0150 @type mopts: dict or attribute object 0151 @param abort: on errors in expression, abort execution if C{True}, 0152 raise L{ExprError} if C{False} 0153 @type abort: bool 0154 0155 @return: matcher function 0156 @rtype: (msgf, msg, cat, hl=[])->bool 0157 """ 0158 0159 mopts = _prep_attrobj(mopts, dict( 0160 case=False, 0161 )) 0162 0163 try: 0164 expr, p = _build_expr_r(exprstr, 0, len(exprstr), mopts) 0165 if p < len(exprstr): 0166 raise ExprError(exprstr, _("@item:intext", 0167 "premature end of expression")) 0168 except ExprError as e: 0169 if abort: 0170 error(str_to_unicode(str(e))) 0171 else: 0172 raise 0173 return expr 0174 0175 0176 def make_msg_fmatcher (exprstr, mopts=None, 0177 accels=None, filters=[], abort=False): 0178 """ 0179 Build expression matcher for messages, with filtering. 0180 0181 Like L{make_msg_matcher}, except that matchers built by this function 0182 do their own filtering, and so omit the first argument. 0183 0184 For semantics of C{accels} and C{filters}, see this module documentation 0185 on C{accel} and C{filter} sieve parameters. 0186 0187 @param exprstr: expression string 0188 @type exprstr: string 0189 @param mopts: global matching options 0190 @type mopts: attribute object 0191 @param accels: possible accelerator markers 0192 @type accels: sequence of strings or C{None} 0193 @param filters: filters to apply to text fields [F1A hooks] 0194 @type filters: (text)->text 0195 @param abort: on errors, abort execution if C{True}, 0196 raise exception if C{False} 0197 @type abort: bool 0198 0199 @return: matcher function 0200 @rtype: (msg, cat, hl=[])->bool 0201 """ 0202 0203 raw_matcher = make_msg_matcher(exprstr, mopts=mopts, abort=abort) 0204 0205 def matcher (msg, cat, hl=[]): 0206 msgf = make_filtered_msg(msg, cat, accels, filters) 0207 return raw_matcher(msgf, msg, cat, hl) 0208 0209 return matcher 0210 0211 0212 def _prep_attrobj (aobj, dctdef=None): 0213 0214 if aobj is None or isinstance(aobj, dict): 0215 dct = aobj or {} 0216 class _Data: pass 0217 aobj = _Data() 0218 for key, value in list(dct.items()): 0219 setattr(aobj, key, value) 0220 0221 for key, val in list((dctdef or {}).items()): 0222 if not hasattr(aobj, key): 0223 setattr(aobj, key, val) 0224 0225 return aobj 0226 0227 0228 def _build_expr_r (exprstr, start, end, params): 0229 0230 p = start 0231 tstack = [] 0232 can_unary = True 0233 can_binary = False 0234 can_operand = True 0235 while p < end: 0236 while p < end and exprstr[p].isspace() and exprstr[p] != ")": 0237 p += 1 0238 if p == end or exprstr[p] == ")": 0239 break 0240 0241 # Parse current subexpression, matcher, or operator. 0242 if exprstr[p] == "(": 0243 if not can_operand: 0244 raise ExprError(exprstr, _("@item:intext", 0245 "expected operator"), p) 0246 expr, p = _build_expr_r(exprstr, p + 1, end, params) 0247 if p == end or exprstr[p] != ")": 0248 raise ExprError(exprstr, _("@item:intext", 0249 "no closing parenthesis"), p) 0250 tstack.append(expr) 0251 can_operand = False 0252 can_unary = False 0253 can_binary = True 0254 p += 1 0255 elif exprstr[p].isalpha(): 0256 pp = p 0257 while p < end and exprstr[p].isalnum(): 0258 p += 1 0259 tok = exprstr[pp:p].lower() 0260 if tok in _all_ops: 0261 if tok in _unary_ops and not can_unary: 0262 raise ExprError(exprstr, _("@item:intext", 0263 "unexpected unary operator"), pp) 0264 if tok in _binary_ops and not can_binary: 0265 raise ExprError(exprstr, 0266 _("@item:intext", 0267 "unexpected binary operator"), pp) 0268 can_operand = True 0269 can_unary = True 0270 can_binary = False 0271 tstack.append(tok) 0272 else: 0273 if not can_operand: 0274 raise ExprError(exprstr, _("@item:intext", 0275 "expected an operator"), pp) 0276 expr, p = _build_expr_matcher(tok, exprstr, p, end, params) 0277 tstack.append(expr) 0278 can_operand = False 0279 can_unary = False 0280 can_binary = True 0281 else: 0282 raise ExprError(exprstr, 0283 _("@item:intext", 0284 "expected token starting with a letter"), p + 1) 0285 0286 # Update expression as possible. 0287 updated = True 0288 while updated: 0289 updated = False 0290 if ( len(tstack) >= 2 0291 and tstack[-2] in _unary_ops 0292 and tstack[-1] not in _all_ops 0293 ): 0294 def closure (): # for closure over cexpr* 0295 cexpr1 = tstack.pop() 0296 op = tstack.pop() 0297 if op == "not": 0298 cexpr = lambda *a: not cexpr1(*a) 0299 else: # cannot happen 0300 raise ExprError(exprstr, 0301 _("@item:intext", 0302 "unknown unary operator '%(op)s'", 0303 op=op)) 0304 return cexpr 0305 tstack.append(closure()) 0306 updated = True 0307 if ( len(tstack) >= 3 0308 and tstack[-3] not in _all_ops 0309 and tstack[-2] in _binary_ops 0310 and tstack[-1] not in _all_ops 0311 ): 0312 def closure (): # for closure over cexpr* 0313 cexpr2 = tstack.pop() 0314 op = tstack.pop() 0315 cexpr1 = tstack.pop() 0316 if op == "and": 0317 cexpr = lambda *a: cexpr1(*a) and cexpr2(*a) 0318 elif op == "or": 0319 cexpr = lambda *a: cexpr1(*a) or cexpr2(*a) 0320 else: # cannot happen 0321 raise ExprError(exprstr, 0322 _("@item:intext", 0323 "unknown binary operator '%(op)s'", 0324 op=op)) 0325 return cexpr 0326 tstack.append(closure()) 0327 updated = True 0328 0329 if len(tstack) >= 2: 0330 raise ExprError(exprstr, _("@item:intext", 0331 "premature end of expression"), end) 0332 if len(tstack) == 0: 0333 raise ExprError(exprstr, _("@item:intext", 0334 "expected subexpression"), start) 0335 0336 return tstack[0], p 0337 0338 0339 # Matchers taking a value. 0340 _op_matchers = set(["msgctxt", "msgid", "msgstr", "comment", "flag", "branch"]) 0341 # Matchers not taking a value. 0342 _nop_matchers = set(["transl", "obsol", "active", "plural"]) 0343 0344 # Matchers which produce a regular expression out of their value. 0345 _rx_matchers = set(["msgctxt", "msgid", "msgstr", "comment", "flag"]) 0346 0347 # All matchers together. 0348 _all_matchers = set() 0349 _all_matchers.update(_op_matchers) 0350 _all_matchers.update(_nop_matchers) 0351 0352 def _build_expr_matcher (mname, exprstr, start, end, params): 0353 0354 if mname not in _all_matchers: 0355 raise ExprError(exprstr, _("@item:intext", 0356 "unknown matcher '%(match)s'", 0357 match=mname), 0358 start - len(mname)) 0359 0360 # Get matcher value, if any. 0361 mval = None 0362 p = start 0363 if mname in _op_matchers: 0364 c = exprstr[p:p + 1] 0365 if p == end or c.isspace() or c.isalnum() or c in ("(", ")"): 0366 raise ExprError(exprstr, _("@item:intext", 0367 "expected parameter delimiter"), p) 0368 delim = exprstr[p] 0369 pp = p + 1 0370 p = exprstr.find(delim, p + 1, end) 0371 if p < 0: 0372 raise ExprError(exprstr, _("@item:intext", 0373 "expected closing delimiter"), end - 1) 0374 mval = exprstr[pp:p] 0375 # Get match modifiers, if any. 0376 mmods = [] 0377 c = exprstr[p:p + 1] 0378 if p < end and not c.isspace() and not c.isalnum() and c not in ("(", ")"): 0379 p += 1 0380 pp = p 0381 while p < end and exprstr[p].isalnum(): 0382 p += 1 0383 mmods = list(exprstr[pp:p]) 0384 0385 #print("{%s}{%s}{%s}" % (mname, mval, mmods)) 0386 return make_matcher(mname, mval, mmods, params), p 0387 0388 0389 _matcher_mods = { 0390 "msgctxt": ["c", "i"], 0391 "msgid": ["c", "i"], 0392 "msgstr": ["c", "i"], 0393 "comment": ["c", "i"], 0394 } 0395 0396 def make_matcher (name, value, mods, params, neg=False): 0397 """ 0398 TODO: Write documentation. 0399 """ 0400 0401 known_mods = _matcher_mods.get(name, []) 0402 bad_mods = set(mods).difference(known_mods) 0403 if bad_mods: 0404 raise ExprError(None, 0405 _("@item:intext", 0406 "unknown modifiers %(modlist)s " 0407 "to matcher '%(match)s'", 0408 modlist=format_item_list(bad_mods), match=name)) 0409 0410 if name in _rx_matchers: 0411 rxflags = re.U 0412 if "i" in mods or (not params.case and "c" not in mods): 0413 rxflags |= re.I 0414 try: 0415 regex = re.compile(value, rxflags) 0416 except: 0417 raise ExprError(None, _("@item:intext", 0418 "invalid regular expression '%(regex)s'", 0419 regex=value)) 0420 0421 if 0: pass 0422 0423 elif name == "msgctxt": 0424 def matcher (msgf, msg, cat, hl=[]): 0425 texts = [] 0426 if msgf.msgctxt is not None: 0427 texts += [(msgf.msgctxt, "msgctxt", 0)] 0428 return _rx_in_any_text(regex, texts, hl) 0429 0430 elif name == "msgid": 0431 def matcher (msgf, msg, cat, hl=[]): 0432 texts = [(msgf.msgid, "msgid", 0)] 0433 if msgf.msgid_plural is not None: 0434 texts += [(msgf.msgid_plural, "msgid_plural", 0)] 0435 return _rx_in_any_text(regex, texts, hl) 0436 0437 elif name == "msgstr": 0438 def matcher (msgf, msg, cat, hl=[]): 0439 texts = [(msgf.msgstr[i], "msgstr", i) 0440 for i in range(len(msgf.msgstr))] 0441 return _rx_in_any_text(regex, texts, hl) 0442 0443 elif name == "comment": 0444 def matcher (msgf, msg, cat, hl=[]): 0445 texts = [] 0446 texts.extend([(msgf.manual_comment[i], "manual_comment", i) 0447 for i in range(len(msgf.manual_comment))]) 0448 texts.extend([(msgf.auto_comment[i], "auto_comment", i) 0449 for i in range(len(msgf.auto_comment))]) 0450 texts.extend([(msgf.source[i][0], "source", i) 0451 for i in range(len(msgf.source))]) 0452 return _rx_in_any_text(regex, texts, hl) 0453 0454 elif name == "transl": 0455 def matcher (msgf, msg, cat, hl=[]): 0456 if value is None or value: 0457 return msg.translated 0458 else: 0459 return not msg.translated 0460 0461 elif name == "obsol": 0462 def matcher (msgf, msg, cat, hl=[]): 0463 if value is None or value: 0464 return msg.obsolete 0465 else: 0466 return not msg.obsolete 0467 0468 elif name == "active": 0469 def matcher (msgf, msg, cat, hl=[]): 0470 if value is None or value: 0471 return msg.translated and not msg.obsolete 0472 else: 0473 return not msg.translated or msg.obsolete 0474 0475 elif name == "plural": 0476 def matcher (msgf, msg, cat, hl=[]): 0477 if value is None or value: 0478 return msg.msgid_plural is not None 0479 else: 0480 return msg.msgid_plural is None 0481 0482 elif name == "maxchar": 0483 def matcher (msgf, msg, cat, hl=[]): 0484 otexts = [msgf.msgid] 0485 if msgf.msgid_plural is not None: 0486 otexts.append(msgf.msgid_plural) 0487 ttexts = msgf.msgstr 0488 onchar = sum([len(x) for x in otexts]) // len(otexts) 0489 tnchar = sum([len(x) for x in ttexts]) // len(ttexts) 0490 return onchar <= value and tnchar <= value 0491 0492 elif name == "lspan": 0493 try: 0494 start, end = value.split(":", 1) 0495 start = int(start) if start else 0 0496 end = int(end) if end else None 0497 except: 0498 raise ExprError(value, _("@item:intext", "invalid line span"), 0) 0499 def matcher (msgf, msg, cat, hl=[]): 0500 cend = end 0501 if cend is None: 0502 cend = cat[-1].refline + 1 0503 return msg.refline >= start and msg.refline < cend 0504 0505 elif name == "espan": 0506 try: 0507 start, end = value.split(":", 1) 0508 start = int(start) if start else 0 0509 end = int(end) if end else None 0510 except: 0511 raise ExprError(value, _("@item:intext", "invalid entry span"), 0) 0512 def matcher (msgf, msg, cat, hl=[]): 0513 cend = end 0514 if cend is None: 0515 cend = cat[-1].refentry + 1 0516 return msg.refentry >= start and msg.refentry < cend 0517 0518 elif name == "branch": 0519 def matcher (msgf, msg, cat, hl=[]): 0520 return value in parse_summit_branches(msg) 0521 0522 elif name == "flag": 0523 def matcher (msgf, msg, cat, hl=[]): 0524 #FIXME: How to highlight flags? (then use _rx_in_any_text) 0525 for flag in msgf.flag: 0526 if regex.search(flag): 0527 return True 0528 return False 0529 0530 else: 0531 raise ExprError(name, _("@item:intext", "unknown matcher"), 0) 0532 0533 if neg: 0534 return lambda *a: not matcher(*a) 0535 else: 0536 return matcher 0537 0538 0539 def _rx_in_any_text (regex, texts, hl): 0540 0541 match = False 0542 hl_dct = {} 0543 for text, hl_name, hl_item in texts: 0544 # Go through all matches, to highlight them all. 0545 for m in regex.finditer(text): 0546 hl_key = (hl_name, hl_item) 0547 if hl_key not in hl_dct: 0548 hl_dct[hl_key] = ([], text) 0549 hl_dct[hl_key][0].append(m.span()) 0550 match = True 0551 0552 hl.extend([x + y for x, y in list(hl_dct.items())]) 0553 0554 return match 0555