File indexing completed on 2024-10-27 11:34:22

0001 # -*- coding: UTF-8 -*-
0002 
0003 """
0004 Assemble a property map from entries in manual comments.
0005 
0006 Documented in C{doc/user/sieving.docbook}.
0007 
0008 @author: Chusslove Illich (Часлав Илић) <caslav.ilic@gmx.net>
0009 @license: GPLv3
0010 """
0011 
0012 import os
0013 import re
0014 
0015 from pology import _, n_
0016 from pology.colors import cjoin
0017 from pology.fsops import str_to_unicode
0018 from pology.msgreport import warning_on_msg
0019 from pology.report import report, format_item_list
0020 from pology.sieve import SieveError
0021 from pology.synder import Synder
0022 
0023 
0024 def setup_sieve (p):
0025 
0026     p.set_desc(_("@info sieve discription",
0027     "Assemble a property map from entries in manual comments."
0028     ))
0029 
0030     p.add_param("outfile", str,
0031                 metavar=_("@info sieve parameter value placeholder", "FILE"),
0032                 desc=_("@info sieve parameter discription",
0033     "File to output the property map into. "
0034     "If not given, nothing is output (useful for validation runs)."
0035     ))
0036     p.add_param("propcons", str,
0037                 metavar=_("@info sieve parameter value placeholder", "FILE"),
0038                 desc=_("@info sieve parameter discription",
0039     "File defining the constraints on property keys and values."
0040     ))
0041     p.add_param("extrakeys", bool, defval=False,
0042                 desc=_("@info sieve parameter discription",
0043     "Allow defining additional entry keys."
0044     ))
0045     p.add_param("derivs", str,
0046                 metavar=_("@info sieve parameter value placeholder", "FILE"),
0047                 desc=_("@info sieve parameter discription",
0048     "File defining the derivators used in derived entries."
0049     ))
0050     p.add_param("pmhead", str, defval="pmap:",
0051                 metavar=_("@info sieve parameter value placeholder", "STRING"),
0052                 desc=_("@info sieve parameter discription",
0053     "Prefix which starts property map entries in comments."
0054     ))
0055     p.add_param("sdhead", str, defval="synder:",
0056                 metavar=_("@info sieve parameter value placeholder", "STRING"),
0057                 desc=_("@info sieve parameter discription",
0058     "Prefix which starts syntagma derivator entries in comments."
0059     ))
0060 
0061 
0062 class Sieve (object):
0063 
0064     def __init__ (self, params):
0065 
0066         self.caller_sync = False
0067         self.caller_monitored = False
0068 
0069         self.propcons = None
0070         if params.propcons:
0071             self.propcons = self._read_propcons(params.propcons)
0072 
0073         self.p = params
0074 
0075         if not params.pmhead:
0076             raise SieveError(_("@info",
0077                                "Prefix which starts property map entries "
0078                                "in comments cannot be empty."))
0079         if not params.sdhead:
0080             raise SieveError(_("@info",
0081                                "Prefix which starts syntagma derivator entries "
0082                                "in comments cannot be empty."))
0083 
0084         # Collected entries.
0085         # Each element is a tuple of the form:
0086         # (ekeys, props, psep, kvsep, msg, cat)
0087         self.entries = []
0088 
0089         # Syntagma derivator, for synder entries.
0090         self.synder = Synder()
0091         self.sdord = 0
0092 
0093 
0094     def process (self, msg, cat):
0095 
0096         if not msg.translated or msg.obsolete:
0097             return
0098         if msg.msgid_plural is not None:
0099             return
0100 
0101         # Parse property map entries from the message.
0102         psep, kvsep = None, None
0103         ekeys = set()
0104         props = {}
0105         for i in range(len(msg.manual_comment)):
0106             ind = i + 1
0107             manc = (msg.manual_comment[i]).strip()
0108             if manc.startswith(self.p.pmhead):
0109                 # Parse and check consistency of separators.
0110                 espec = manc[len(self.p.pmhead):].lstrip()
0111                 lkvsep, lpsep = espec[:2]
0112                 if lkvsep.isalnum() or lpsep.isalnum():
0113                     warning_on_msg(_("@info",
0114                                      "An alphanumeric separator is used for "
0115                                      "property map entry in comment "
0116                                      "no. %(ord)d.", ord=ind),
0117                                      msg, cat)
0118                     return
0119                 if not psep:
0120                     psep, kvsep = lpsep, lkvsep
0121                 elif (psep, kvsep) != (lpsep, lkvsep):
0122                     warning_on_msg(_("@info",
0123                                      "Inconsistent separators for "
0124                                      "continued property map entry in comment "
0125                                      "no. %(ord)d.", ord=ind),
0126                                      msg, cat)
0127                     return
0128                 # Remove leading and trailing separators.
0129                 respec = espec[2:]
0130                 if respec.endswith(psep + psep):
0131                     respec = respec[:-2]
0132                 elif respec.endswith(psep):
0133                     respec = respec[:-1]
0134                 else:
0135                     warning_on_msg(_("@info",
0136                                      "Missing terminating separator for "
0137                                      "property map entry in comment "
0138                                      "no. %(ord)d.", ord=ind),
0139                                      msg, cat)
0140                     return
0141                 # Parse entry keys and key-value pairs.
0142                 for elspec in respec.split(psep):
0143                     if kvsep in elspec:
0144                         pkey, pval = elspec.split(kvsep, 1)
0145                         props[pkey] = pval
0146                     else:
0147                         ekey = elspec
0148                         if not self.p.extrakeys:
0149                             warning_on_msg(_("@info",
0150                                              "Additional entry key '%(key)s' "
0151                                              "is defined but not allowed for "
0152                                              "property map entry in comment "
0153                                              "no. %(ord)d.", key=ekey, ord=ind),
0154                                              msg, cat)
0155                             return
0156                         ekeys.add(ekey)
0157 
0158             elif manc.startswith(self.p.sdhead):
0159                 sddef = manc[len(self.p.sdhead):].lstrip()
0160                 sdkey = str(self.sdord)
0161                 sdexpr = sdkey + ":" + sddef
0162                 if self.p.derivs:
0163                     sdexpr = ">" + self.p.derivs + "\n" + sdexpr
0164                 try:
0165                     self.synder.import_string(sdexpr)
0166                     cprops = self.synder.props(sdkey)
0167                 except Exception as e:
0168                     errmsg = str_to_unicode(str(e))
0169                     warning_on_msg(_("@info",
0170                                      "Invalid derivation '%(deriv)s':\n"
0171                                      "%(msg)s", deriv=sddef, msg=errmsg),
0172                                      msg, cat)
0173                     return
0174 
0175                 jumble = "".join(["".join(x) for x in list(cprops.items())])
0176                 if not psep:
0177                     psep = self._pick_sep(jumble, "/|¦")
0178                     kvsep = self._pick_sep(jumble, "=:→")
0179                     if not psep or not kvsep:
0180                         warning_on_msg(_("@info",
0181                                          "No known separator are applicable "
0182                                          "to keys and values derived from "
0183                                          "'%(deriv)s'.", deriv=sddef),
0184                                          msg, cat)
0185                         return
0186                 else:
0187                     if psep in jumble or kvsep in jumble:
0188                         warning_on_msg(_("@info",
0189                                          "Previously selected separators "
0190                                          "are not applicable to "
0191                                          "keys and values derived from "
0192                                          "'%(deriv)s'.", deriv=sddef),
0193                                          msg, cat)
0194                         return
0195 
0196                 props.update(cprops)
0197 
0198         if not props:
0199             if ekeys:
0200                 warning_on_msg(_("@info",
0201                                  "Some additional entry keys "
0202                                  "are defined for property map entry, "
0203                                  "but there are no properties."),
0204                                msg, cat)
0205             return
0206         props = sorted(props.items()) # no need for dictionary any more
0207 
0208         # Add default keys.
0209         ekeys.add(msg.msgid)
0210         ekeys.add(msg.msgstr[0])
0211 
0212         # Validate entry if requested.
0213         if self.propcons:
0214             errs = self._validate_props(props, msg, cat, self.propcons)
0215             if errs:
0216                 problems = cjoin(["  " + x for x in errs], "\n")
0217                 warning_on_msg(_("@info",
0218                                  "Property map entry fails validation:\n"
0219                                  "%(msgs)s", msgs=problems),
0220                                  msg, cat)
0221                 return
0222 
0223         # Entry parsed.
0224         ekeys = sorted(ekeys)
0225         props = sorted(props)
0226         self.entries.append((ekeys, props, psep, kvsep, msg, cat))
0227 
0228 
0229     def finalize (self):
0230 
0231         # Check cross-entry validity, select valid.
0232         msgs_by_seen_msgstr = {}
0233         unique_entries = []
0234         for entry in self.entries:
0235             d1, props, d3, d4, msg, cat = entry
0236             msgstr = msg.msgstr[0]
0237             if msgstr not in msgs_by_seen_msgstr:
0238                 msgs_by_seen_msgstr[msgstr] = []
0239             else:
0240                 for d1, d2, oprops in msgs_by_seen_msgstr[msgstr]:
0241                     if props == oprops:
0242                         props = None
0243                         break
0244             if props:
0245                 unique_entries.append(entry)
0246                 msgs_by_seen_msgstr[msgstr].append((msg, cat, props))
0247         good_entries = []
0248         for ekeys, props, psep, kvsep, msg, cat in unique_entries:
0249             eq_msgstr_set = msgs_by_seen_msgstr.get(msg.msgstr[0])
0250             if eq_msgstr_set is not None:
0251                 if len(eq_msgstr_set) > 1:
0252                     cmsgcats = msgs_by_seen_msgstr.pop(msg.msgstr[0])
0253                     msg0, cat0, d3 = cmsgcats[0]
0254                     warning_on_msg(_("@info split to link below",
0255                                      "Property map entries removed due "
0256                                      "to translation conflict with..."),
0257                                      msg0, cat0)
0258                     for msg, cat, d3 in cmsgcats[1:]:
0259                         warning_on_msg(_("@info continuation from above",
0260                                          "...this message."),
0261                                        msg, cat)
0262                 else:
0263                     good_entries.append((ekeys, props, psep, kvsep))
0264 
0265         # If output file has not been given, only validation was expected.
0266         if not self.p.outfile:
0267             return
0268 
0269         # Serialize entries.
0270         good_entries.sort(key=lambda x: x[0])
0271         lines = []
0272         for ekeys, props, psep, kvsep in good_entries:
0273             # Do Unicode, locale-unaware sorting,
0274             # for equal results over different systems;
0275             # they are not to be read by humans anyway.
0276             propstr = psep.join([kvsep.join(x) for x in sorted(props)])
0277             ekeystr = psep.join(sorted(ekeys))
0278             estr = kvsep + psep + ekeystr + psep + propstr + psep + psep
0279             lines.append(estr)
0280 
0281         # Write out the property map.
0282         lines.append("")
0283         fstr = "\n".join(lines)
0284         fstr = fstr.encode("UTF-8")
0285         fh = open(self.p.outfile, "w")
0286         fh.write(fstr)
0287         fh.close()
0288 
0289         msg = n_("@info:progress",
0290                  "Collected %(num)d entry for the property map.",
0291                  "Collected %(num)d entries for the property map.",
0292                  num=len(good_entries))
0293         report("===== " + msg)
0294 
0295 
0296     def _pick_sep (self, teststr, seps):
0297 
0298         good = False
0299         for sep in seps:
0300             if sep not in teststr:
0301                 good = True
0302                 break
0303         return sep if good else None
0304 
0305 
0306     def _read_propcons (self, fpath):
0307 
0308         if not os.path.isfile(fpath):
0309             raise SieveError(_("@info",
0310                                "Property constraint file '%(file)s' "
0311                                "does not exist.",
0312                                file=fpath))
0313         lines = open(fpath).read().decode("UTF-8").split("\n")
0314         if not lines[-1]:
0315             lines.pop()
0316 
0317         cmrx = re.compile(r"#.*")
0318         # Constraints collected as list of tuples:
0319         # (compiled key regex, string key regex,
0320         #  compiled value regex, string value regex,
0321         #  string of flags)
0322         propcons = []
0323         lno = 0
0324         def mkerr (problem):
0325             return _("@info",
0326                      "Invalid property map constraint "
0327                      "at %(file)s:%(line)d: %(snippet)s.",
0328                      file=fpath, line=lno, snippet=problem)
0329         known_flags = set(("i", "I", "t", "r"))
0330         for line in lines:
0331             lno += 1
0332             line = cmrx.sub("", line).strip()
0333             if not line:
0334                 continue
0335 
0336             sep = line[0]
0337             if sep.isalnum():
0338                 raise SieveError(mkerr(_("@item:intext",
0339                                          "alphanumeric separators "
0340                                          "not allowed")))
0341             lst = line.split(sep)
0342             if len(lst) < 4:
0343                 raise SieveError(mkerr(_("@item:intext",
0344                                          "too few separators")))
0345             elif len(lst) > 4:
0346                 raise SieveError(mkerr(_("@item:intext",
0347                                          "too many separators")))
0348 
0349             d1, keyrxstr, valrxstr, flags = lst
0350 
0351             unknown_flags = set(flags).difference(known_flags)
0352             if unknown_flags:
0353                 fmtflags = format_item_list(sorted(unknown_flags), quoted=True)
0354                 raise SieveError(mkerr(_("@item:intext",
0355                                          "unknown flags %(flaglist)s",
0356                                          flaglist=fmtflags)))
0357 
0358             rxs = []
0359             for rxstr, iflag in ((keyrxstr, "I"), (valrxstr, "i")):
0360                 rxfls = re.U
0361                 if iflag in flags:
0362                     rxfls |= re.I
0363                 wrxstr = r"^(?:%s)$" % rxstr
0364                 try:
0365                     rx = re.compile(wrxstr, rxfls)
0366                 except:
0367                     raise SieveError(mkerr(_("@item:intext",
0368                                              "invalid regular expression "
0369                                              "'%(regex)s'",
0370                                              regex=rxstr)))
0371                 rxs.append(rx)
0372             keyrx, valrx = rxs
0373 
0374             propcons.append((keyrx, keyrxstr, valrx, valrxstr, flags))
0375 
0376         return propcons
0377 
0378 
0379     def _validate_props (self, props, msg, cat, propcons):
0380 
0381         matched_cons = set()
0382         errs = []
0383         adderr = lambda err: errs.append(err)
0384         for prop, ip in zip(props, list(range(len(props)))):
0385             key, val = prop
0386             key_matched = False
0387             for propcon, ic in zip(propcons, list(range(len(propcons)))):
0388                 keyrx, keyrxstr, valrx, valrxstr, flags = propcon
0389                 if keyrx.search(key):
0390                     key_matched = True
0391                     matched_cons.add(ic)
0392                     if not valrx.search(val):
0393                         pattern = valrx
0394                         adderr(_("@info",
0395                                  "Value '%(val)s' to key '%(key)s' "
0396                                  "does not match '%(pattern)s'.",
0397                                  val=val, key=key, pattern=pattern))
0398                     if "t" in flags:
0399                         if "i" in flags:
0400                             eq = (val.lower() == msg.msgstr[0].lower())
0401                         else:
0402                             eq = (val == msg.msgstr[0])
0403                         if not eq:
0404                             adderr(_("@info",
0405                                      "Value '%(val)s' to key '%(key)s' "
0406                                      "does not match translation "
0407                                      "of the message.",
0408                                      val=val, key=key))
0409             if not key_matched:
0410                 adderr(_("@info",
0411                          "Key '%(key)s' does not match any constraint.",
0412                          key=key))
0413 
0414         for propcon, ic in zip(propcons, list(range(len(propcons)))):
0415             pattern, rlags = propcon[1], propcon[-1]
0416             if "r" in flags and ic not in matched_cons:
0417                 adderr(_("@info",
0418                          "No key matched required constraint '%(pattern)s'.",
0419                          pattern=pattern))
0420 
0421         return errs
0422