File indexing completed on 2024-10-27 11:34:22
0001 # -*- coding: UTF-8 -*- 0002 0003 """ 0004 Assemble a property map from entries in manual comments. 0005 0006 Documented in C{doc/user/sieving.docbook}. 0007 0008 @author: Chusslove Illich (Часлав Илић) <caslav.ilic@gmx.net> 0009 @license: GPLv3 0010 """ 0011 0012 import os 0013 import re 0014 0015 from pology import _, n_ 0016 from pology.colors import cjoin 0017 from pology.fsops import str_to_unicode 0018 from pology.msgreport import warning_on_msg 0019 from pology.report import report, format_item_list 0020 from pology.sieve import SieveError 0021 from pology.synder import Synder 0022 0023 0024 def setup_sieve (p): 0025 0026 p.set_desc(_("@info sieve discription", 0027 "Assemble a property map from entries in manual comments." 0028 )) 0029 0030 p.add_param("outfile", str, 0031 metavar=_("@info sieve parameter value placeholder", "FILE"), 0032 desc=_("@info sieve parameter discription", 0033 "File to output the property map into. " 0034 "If not given, nothing is output (useful for validation runs)." 0035 )) 0036 p.add_param("propcons", str, 0037 metavar=_("@info sieve parameter value placeholder", "FILE"), 0038 desc=_("@info sieve parameter discription", 0039 "File defining the constraints on property keys and values." 0040 )) 0041 p.add_param("extrakeys", bool, defval=False, 0042 desc=_("@info sieve parameter discription", 0043 "Allow defining additional entry keys." 0044 )) 0045 p.add_param("derivs", str, 0046 metavar=_("@info sieve parameter value placeholder", "FILE"), 0047 desc=_("@info sieve parameter discription", 0048 "File defining the derivators used in derived entries." 0049 )) 0050 p.add_param("pmhead", str, defval="pmap:", 0051 metavar=_("@info sieve parameter value placeholder", "STRING"), 0052 desc=_("@info sieve parameter discription", 0053 "Prefix which starts property map entries in comments." 0054 )) 0055 p.add_param("sdhead", str, defval="synder:", 0056 metavar=_("@info sieve parameter value placeholder", "STRING"), 0057 desc=_("@info sieve parameter discription", 0058 "Prefix which starts syntagma derivator entries in comments." 0059 )) 0060 0061 0062 class Sieve (object): 0063 0064 def __init__ (self, params): 0065 0066 self.caller_sync = False 0067 self.caller_monitored = False 0068 0069 self.propcons = None 0070 if params.propcons: 0071 self.propcons = self._read_propcons(params.propcons) 0072 0073 self.p = params 0074 0075 if not params.pmhead: 0076 raise SieveError(_("@info", 0077 "Prefix which starts property map entries " 0078 "in comments cannot be empty.")) 0079 if not params.sdhead: 0080 raise SieveError(_("@info", 0081 "Prefix which starts syntagma derivator entries " 0082 "in comments cannot be empty.")) 0083 0084 # Collected entries. 0085 # Each element is a tuple of the form: 0086 # (ekeys, props, psep, kvsep, msg, cat) 0087 self.entries = [] 0088 0089 # Syntagma derivator, for synder entries. 0090 self.synder = Synder() 0091 self.sdord = 0 0092 0093 0094 def process (self, msg, cat): 0095 0096 if not msg.translated or msg.obsolete: 0097 return 0098 if msg.msgid_plural is not None: 0099 return 0100 0101 # Parse property map entries from the message. 0102 psep, kvsep = None, None 0103 ekeys = set() 0104 props = {} 0105 for i in range(len(msg.manual_comment)): 0106 ind = i + 1 0107 manc = (msg.manual_comment[i]).strip() 0108 if manc.startswith(self.p.pmhead): 0109 # Parse and check consistency of separators. 0110 espec = manc[len(self.p.pmhead):].lstrip() 0111 lkvsep, lpsep = espec[:2] 0112 if lkvsep.isalnum() or lpsep.isalnum(): 0113 warning_on_msg(_("@info", 0114 "An alphanumeric separator is used for " 0115 "property map entry in comment " 0116 "no. %(ord)d.", ord=ind), 0117 msg, cat) 0118 return 0119 if not psep: 0120 psep, kvsep = lpsep, lkvsep 0121 elif (psep, kvsep) != (lpsep, lkvsep): 0122 warning_on_msg(_("@info", 0123 "Inconsistent separators for " 0124 "continued property map entry in comment " 0125 "no. %(ord)d.", ord=ind), 0126 msg, cat) 0127 return 0128 # Remove leading and trailing separators. 0129 respec = espec[2:] 0130 if respec.endswith(psep + psep): 0131 respec = respec[:-2] 0132 elif respec.endswith(psep): 0133 respec = respec[:-1] 0134 else: 0135 warning_on_msg(_("@info", 0136 "Missing terminating separator for " 0137 "property map entry in comment " 0138 "no. %(ord)d.", ord=ind), 0139 msg, cat) 0140 return 0141 # Parse entry keys and key-value pairs. 0142 for elspec in respec.split(psep): 0143 if kvsep in elspec: 0144 pkey, pval = elspec.split(kvsep, 1) 0145 props[pkey] = pval 0146 else: 0147 ekey = elspec 0148 if not self.p.extrakeys: 0149 warning_on_msg(_("@info", 0150 "Additional entry key '%(key)s' " 0151 "is defined but not allowed for " 0152 "property map entry in comment " 0153 "no. %(ord)d.", key=ekey, ord=ind), 0154 msg, cat) 0155 return 0156 ekeys.add(ekey) 0157 0158 elif manc.startswith(self.p.sdhead): 0159 sddef = manc[len(self.p.sdhead):].lstrip() 0160 sdkey = str(self.sdord) 0161 sdexpr = sdkey + ":" + sddef 0162 if self.p.derivs: 0163 sdexpr = ">" + self.p.derivs + "\n" + sdexpr 0164 try: 0165 self.synder.import_string(sdexpr) 0166 cprops = self.synder.props(sdkey) 0167 except Exception as e: 0168 errmsg = str_to_unicode(str(e)) 0169 warning_on_msg(_("@info", 0170 "Invalid derivation '%(deriv)s':\n" 0171 "%(msg)s", deriv=sddef, msg=errmsg), 0172 msg, cat) 0173 return 0174 0175 jumble = "".join(["".join(x) for x in list(cprops.items())]) 0176 if not psep: 0177 psep = self._pick_sep(jumble, "/|¦") 0178 kvsep = self._pick_sep(jumble, "=:→") 0179 if not psep or not kvsep: 0180 warning_on_msg(_("@info", 0181 "No known separator are applicable " 0182 "to keys and values derived from " 0183 "'%(deriv)s'.", deriv=sddef), 0184 msg, cat) 0185 return 0186 else: 0187 if psep in jumble or kvsep in jumble: 0188 warning_on_msg(_("@info", 0189 "Previously selected separators " 0190 "are not applicable to " 0191 "keys and values derived from " 0192 "'%(deriv)s'.", deriv=sddef), 0193 msg, cat) 0194 return 0195 0196 props.update(cprops) 0197 0198 if not props: 0199 if ekeys: 0200 warning_on_msg(_("@info", 0201 "Some additional entry keys " 0202 "are defined for property map entry, " 0203 "but there are no properties."), 0204 msg, cat) 0205 return 0206 props = sorted(props.items()) # no need for dictionary any more 0207 0208 # Add default keys. 0209 ekeys.add(msg.msgid) 0210 ekeys.add(msg.msgstr[0]) 0211 0212 # Validate entry if requested. 0213 if self.propcons: 0214 errs = self._validate_props(props, msg, cat, self.propcons) 0215 if errs: 0216 problems = cjoin([" " + x for x in errs], "\n") 0217 warning_on_msg(_("@info", 0218 "Property map entry fails validation:\n" 0219 "%(msgs)s", msgs=problems), 0220 msg, cat) 0221 return 0222 0223 # Entry parsed. 0224 ekeys = sorted(ekeys) 0225 props = sorted(props) 0226 self.entries.append((ekeys, props, psep, kvsep, msg, cat)) 0227 0228 0229 def finalize (self): 0230 0231 # Check cross-entry validity, select valid. 0232 msgs_by_seen_msgstr = {} 0233 unique_entries = [] 0234 for entry in self.entries: 0235 d1, props, d3, d4, msg, cat = entry 0236 msgstr = msg.msgstr[0] 0237 if msgstr not in msgs_by_seen_msgstr: 0238 msgs_by_seen_msgstr[msgstr] = [] 0239 else: 0240 for d1, d2, oprops in msgs_by_seen_msgstr[msgstr]: 0241 if props == oprops: 0242 props = None 0243 break 0244 if props: 0245 unique_entries.append(entry) 0246 msgs_by_seen_msgstr[msgstr].append((msg, cat, props)) 0247 good_entries = [] 0248 for ekeys, props, psep, kvsep, msg, cat in unique_entries: 0249 eq_msgstr_set = msgs_by_seen_msgstr.get(msg.msgstr[0]) 0250 if eq_msgstr_set is not None: 0251 if len(eq_msgstr_set) > 1: 0252 cmsgcats = msgs_by_seen_msgstr.pop(msg.msgstr[0]) 0253 msg0, cat0, d3 = cmsgcats[0] 0254 warning_on_msg(_("@info split to link below", 0255 "Property map entries removed due " 0256 "to translation conflict with..."), 0257 msg0, cat0) 0258 for msg, cat, d3 in cmsgcats[1:]: 0259 warning_on_msg(_("@info continuation from above", 0260 "...this message."), 0261 msg, cat) 0262 else: 0263 good_entries.append((ekeys, props, psep, kvsep)) 0264 0265 # If output file has not been given, only validation was expected. 0266 if not self.p.outfile: 0267 return 0268 0269 # Serialize entries. 0270 good_entries.sort(key=lambda x: x[0]) 0271 lines = [] 0272 for ekeys, props, psep, kvsep in good_entries: 0273 # Do Unicode, locale-unaware sorting, 0274 # for equal results over different systems; 0275 # they are not to be read by humans anyway. 0276 propstr = psep.join([kvsep.join(x) for x in sorted(props)]) 0277 ekeystr = psep.join(sorted(ekeys)) 0278 estr = kvsep + psep + ekeystr + psep + propstr + psep + psep 0279 lines.append(estr) 0280 0281 # Write out the property map. 0282 lines.append("") 0283 fstr = "\n".join(lines) 0284 fstr = fstr.encode("UTF-8") 0285 fh = open(self.p.outfile, "w") 0286 fh.write(fstr) 0287 fh.close() 0288 0289 msg = n_("@info:progress", 0290 "Collected %(num)d entry for the property map.", 0291 "Collected %(num)d entries for the property map.", 0292 num=len(good_entries)) 0293 report("===== " + msg) 0294 0295 0296 def _pick_sep (self, teststr, seps): 0297 0298 good = False 0299 for sep in seps: 0300 if sep not in teststr: 0301 good = True 0302 break 0303 return sep if good else None 0304 0305 0306 def _read_propcons (self, fpath): 0307 0308 if not os.path.isfile(fpath): 0309 raise SieveError(_("@info", 0310 "Property constraint file '%(file)s' " 0311 "does not exist.", 0312 file=fpath)) 0313 lines = open(fpath).read().decode("UTF-8").split("\n") 0314 if not lines[-1]: 0315 lines.pop() 0316 0317 cmrx = re.compile(r"#.*") 0318 # Constraints collected as list of tuples: 0319 # (compiled key regex, string key regex, 0320 # compiled value regex, string value regex, 0321 # string of flags) 0322 propcons = [] 0323 lno = 0 0324 def mkerr (problem): 0325 return _("@info", 0326 "Invalid property map constraint " 0327 "at %(file)s:%(line)d: %(snippet)s.", 0328 file=fpath, line=lno, snippet=problem) 0329 known_flags = set(("i", "I", "t", "r")) 0330 for line in lines: 0331 lno += 1 0332 line = cmrx.sub("", line).strip() 0333 if not line: 0334 continue 0335 0336 sep = line[0] 0337 if sep.isalnum(): 0338 raise SieveError(mkerr(_("@item:intext", 0339 "alphanumeric separators " 0340 "not allowed"))) 0341 lst = line.split(sep) 0342 if len(lst) < 4: 0343 raise SieveError(mkerr(_("@item:intext", 0344 "too few separators"))) 0345 elif len(lst) > 4: 0346 raise SieveError(mkerr(_("@item:intext", 0347 "too many separators"))) 0348 0349 d1, keyrxstr, valrxstr, flags = lst 0350 0351 unknown_flags = set(flags).difference(known_flags) 0352 if unknown_flags: 0353 fmtflags = format_item_list(sorted(unknown_flags), quoted=True) 0354 raise SieveError(mkerr(_("@item:intext", 0355 "unknown flags %(flaglist)s", 0356 flaglist=fmtflags))) 0357 0358 rxs = [] 0359 for rxstr, iflag in ((keyrxstr, "I"), (valrxstr, "i")): 0360 rxfls = re.U 0361 if iflag in flags: 0362 rxfls |= re.I 0363 wrxstr = r"^(?:%s)$" % rxstr 0364 try: 0365 rx = re.compile(wrxstr, rxfls) 0366 except: 0367 raise SieveError(mkerr(_("@item:intext", 0368 "invalid regular expression " 0369 "'%(regex)s'", 0370 regex=rxstr))) 0371 rxs.append(rx) 0372 keyrx, valrx = rxs 0373 0374 propcons.append((keyrx, keyrxstr, valrx, valrxstr, flags)) 0375 0376 return propcons 0377 0378 0379 def _validate_props (self, props, msg, cat, propcons): 0380 0381 matched_cons = set() 0382 errs = [] 0383 adderr = lambda err: errs.append(err) 0384 for prop, ip in zip(props, list(range(len(props)))): 0385 key, val = prop 0386 key_matched = False 0387 for propcon, ic in zip(propcons, list(range(len(propcons)))): 0388 keyrx, keyrxstr, valrx, valrxstr, flags = propcon 0389 if keyrx.search(key): 0390 key_matched = True 0391 matched_cons.add(ic) 0392 if not valrx.search(val): 0393 pattern = valrx 0394 adderr(_("@info", 0395 "Value '%(val)s' to key '%(key)s' " 0396 "does not match '%(pattern)s'.", 0397 val=val, key=key, pattern=pattern)) 0398 if "t" in flags: 0399 if "i" in flags: 0400 eq = (val.lower() == msg.msgstr[0].lower()) 0401 else: 0402 eq = (val == msg.msgstr[0]) 0403 if not eq: 0404 adderr(_("@info", 0405 "Value '%(val)s' to key '%(key)s' " 0406 "does not match translation " 0407 "of the message.", 0408 val=val, key=key)) 0409 if not key_matched: 0410 adderr(_("@info", 0411 "Key '%(key)s' does not match any constraint.", 0412 key=key)) 0413 0414 for propcon, ic in zip(propcons, list(range(len(propcons)))): 0415 pattern, rlags = propcon[1], propcon[-1] 0416 if "r" in flags and ic not in matched_cons: 0417 adderr(_("@info", 0418 "No key matched required constraint '%(pattern)s'.", 0419 pattern=pattern)) 0420 0421 return errs 0422