File indexing completed on 2024-04-14 05:37:50

0001 # -*- coding: UTF-8 -*
0002 
0003 """
0004 Process ascription configurations, catalogs, and histories.
0005 
0006 @note: For the moment, this module is only for internal use within Pology.
0007 Interfaces may change arbitrarily between any two Pology releases.
0008 
0009 @author: Chusslove Illich (Часлав Илић) <caslav.ilic@gmx.net>
0010 @license: GPLv3
0011 """
0012 
0013 import codecs
0014 from configparser import SafeConfigParser
0015 import datetime
0016 import os
0017 import re
0018 import time
0019 from types import ModuleType
0020 
0021 from pology import PologyError, _, n_, t_
0022 from pology.header import format_datetime, parse_datetime
0023 from pology.message import Message, MessageUnsafe
0024 from pology.comments import parse_summit_branches
0025 from pology.diff import msg_ediff
0026 from pology.fsops import join_ncwd, str_to_unicode, unicode_to_str
0027 from pology.match import make_msg_fmatcher
0028 from pology.monitored import Monlist
0029 from pology.msgreport import warning_on_msg
0030 from pology.report import warning
0031 from pology.vcs import make_vcs
0032 
0033 
0034 # -----------------------------------------------------------------------------
0035 # Ascription data representations.
0036 
0037 class AscConfig (object):
0038     """
0039     Representation of an ascription configuration file.
0040 
0041     The settings are reached through class attributes.
0042     Some attributes are raw data read from configuration fields,
0043     while other may be derived based on configuration fields.
0044 
0045     Parameters which have "for header updates" in their description
0046     are used for creating and updating ascription catalog headers,
0047     as well as original catalog headers when header update on commit
0048     is requested. They may contain a number of interpolations, see
0049     L{Catalog.update_header<pology.catalog.Catalog.update_header>}.
0050 
0051     @ivar path: the path to the ascription configuration file
0052     @type path: string
0053     @ivar catroot: the path to root directory of original catalogs
0054     @type catroot: string
0055     @ivar ascroot: the path to root directory of ascription catalogs
0056     @type ascroot: string
0057     @ivar title: the header title comment  (for header updates;
0058         only for original catalogs)
0059     @type title: string or None
0060     @ivar langteam: the language team name (for header updates)
0061     @type langteam: string or None
0062     @ivar teamemail: the language team email address (for header updates)
0063     @type teamemail: string or None
0064     @ivar langcode: the language code (for header updates)
0065     @type langcode: string or None
0066     @ivar plforms: the PO plural forms specification (for header updates)
0067     @type plforms: string or None
0068     @ivar vcs: the version control system for catalogs
0069     @type vcs: L{VcsBase<pology.vcs.VcsBase>}
0070     @ivar commitmsg: the automatic commit message
0071     @type commitmsg: string or None
0072     @ivar revtags: known review tags (empty string always included)
0073     @type revtags: set(string*)
0074     @ivar users: data for ascription users by username
0075     @type users: {string: L{AscUser}*}
0076     """
0077 
0078     def __init__ (self, cfgpath):
0079         """
0080         Constructor.
0081 
0082         Reads the ascription configuration file to set raw and derived
0083         ascription settings.
0084 
0085         @param cfgpath: the path to ascription configuration file
0086         @type cfgpath: string
0087         """
0088 
0089         config = SafeConfigParser()
0090         ifl = codecs.open(cfgpath, "r", "UTF-8")
0091         config.readfp(ifl)
0092         ifl.close()
0093 
0094         self.path = cfgpath
0095 
0096         gsect = dict(config.items("global"))
0097         cpathdir = os.path.dirname(cfgpath)
0098         self.catroot = join_ncwd(cpathdir, gsect.get("catalog-root", ""))
0099         self.ascroot = join_ncwd(cpathdir, gsect.get("ascript-root", ""))
0100         if self.catroot == self.ascroot:
0101             raise PologyError(
0102                 _("@info",
0103                   "Catalog root and ascription root for '%(file)s' "
0104                   "resolve to same path '%(dir)s'.",
0105                   file=cfgpath, dir=self.catroot))
0106 
0107         self.title = gsect.get("title", None)
0108         self.langteam = gsect.get("language-team", None)
0109         self.teamemail = gsect.get("team-email", None)
0110         self.langcode = gsect.get("language", None)
0111         self.plforms = gsect.get("plural-header", None)
0112 
0113         self.vcs = make_vcs(gsect.get("version-control", "noop"))
0114 
0115         self.commitmsg = gsect.get("commit-message", None)
0116 
0117         cval = gsect.get("review-tags", None)
0118         if cval is not None:
0119             self.revtags = set(cval.split())
0120         else:
0121             self.revtags = set()
0122         self.revtags.add("")
0123 
0124         self.users = {}
0125         userst = "user-"
0126         for section in config.sections():
0127             if section.startswith(userst):
0128                 user = section[len(userst):]
0129                 usect = dict(config.items(section))
0130                 if user in self.users:
0131                     raise PologyError(
0132                         _("@info",
0133                           "Repeated user '%(user)s' in '%(file)s'.",
0134                           user=user, file=cpath))
0135                 if "name" not in usect:
0136                     raise PologyError(
0137                         _("@info",
0138                           "The name is missing for "
0139                           "user '%(user)s' in '%(file)s'.",
0140                           user=user, file=cpath))
0141                 udat = AscUser()
0142                 udat.name = usect.get("name")
0143                 udat.oname = usect.get("original-name")
0144                 udat.email = usect.get("email")
0145                 self.users[user] = udat
0146 
0147 
0148 class AscUser (object):
0149     """
0150     Representation of an ascription user.
0151 
0152     @ivar name: user's name readable in English
0153     @type name: string or None
0154     @ivar oname: user's name in user's native language
0155     @type oname: string or None
0156     @ivar email: user's email address
0157     @type email: string or None
0158     """
0159 
0160     def __init__ (self, name=None, oname=None, email=None):
0161         """
0162         Constructor.
0163 
0164         See attribute documentation for details on parameters.
0165         """
0166 
0167         self.name = name
0168         self.oname = oname
0169         self.email = email
0170 
0171 
0172 class AscPoint (object):
0173     """
0174     Representation of an ascription point.
0175 
0176     @ivar msg: a stripped version of the original PO message as it appeared
0177         when the ascription was made, containing only
0178         L{extraction-invariant parts<message.Message_base.inv>}
0179     @type msg: L{MessageUnsafe<message.MessageUnsafe>}
0180     @ivar rmsg: the message in the ascription catalog from which
0181         C{msg} was parsed
0182     @type rmsg: L{MessageUnsafe<message.MessageUnsafe>}
0183     @ivar user: the user to whom the ascription was made
0184     @type user: string
0185     @ivar type: the ascription type (one of C{ATYPE_} constants)
0186     @type type: string
0187     @ivar tag: the review tag (from the set defined in ascription config)
0188     @type tag: string
0189     @ivar date: the date when the ascription was made
0190     @type date: datetime.datetime
0191     @ivar slen: the length of the separator in ascription message (C{rmsg})
0192     @type slen: int
0193     @ivar fuzz: whether the original message was fuzzy
0194     @type fuzz: bool
0195     @ivar obs: whether the original message was obsolete
0196     @type obs: bool
0197     @ivar pos: the position of this ascription point within
0198         the ascription history (increasing from 1, 1 is the latest by date)
0199     @type pos: int
0200     """
0201 
0202     _known_attrs = (
0203         "rmsg", "msg",
0204         "user", "type", ("tag", ""), "date",
0205         "slen", "fuzz", "obs",
0206         "pos"
0207     )
0208 
0209     def __init__ (self, apoint=None):
0210         """
0211         Create an empty ascription point or a shallow copy of another.
0212 
0213         @param apoint: an ascription point
0214         @type apoint: L{AscPoint}
0215         """
0216 
0217         for attr in AscPoint._known_attrs:
0218             if isinstance(attr, tuple):
0219                 attr, dval = attr
0220             else:
0221                 attr, dval = attr, None
0222             if apoint is not None:
0223                 self.__dict__[attr] = apoint.__dict__[attr]
0224             else:
0225                 self.__dict__[attr] = dval
0226 
0227 
0228     # Ascription types.
0229     # NOTE: These string are written into and read from ascription files.
0230     ATYPE_MOD = "modified"
0231     ATYPE_REV = "reviewed"
0232 
0233 
0234 # -----------------------------------------------------------------------------
0235 # Collecting ascription configurations and catalog paths.
0236 
0237 
0238 def collect_ascription_associations (catpaths):
0239     """
0240     Build up ascription associations for catalog paths.
0241 
0242     For each catalog path, the ascription configuration to which it
0243     belongs is found and parsed, and the corresponding ascription
0244     catalog path assembled.
0245     The association is organized as list of two-tuples;
0246     the first element is the parsed ascription configuration,
0247     and the second element the list of two-tuples of original
0248     catalog paths and associated ascription catalog paths
0249     (whether the ascription catalog already exists or not).
0250     For example, if the input is::
0251 
0252         ["foo/alpha.po", "foo/bravo.po", "bar/november.po"]
0253 
0254     and the files are covered by ascription configurations at
0255     C{foo/ascription-config} and C{bar/ascription-config},
0256     the return value is::
0257 
0258         [(AscConfig("foo/ascription-config"),
0259          [("foo/alpha.po", "foo-ascript/alpha.po"),
0260           ("foo/bravo.po", "foo-ascript/bravo.po")]),
0261          (AscConfig("bar/ascription-config"),
0262           [("bar/november.po", "bar-ascript/november.po")])]
0263 
0264     (assuming that both ascription configurations set C{*-ascript/}
0265     directories as corresponding ascription catalog roots).
0266 
0267     @param catpaths: a list of catalog paths
0268     @type catpaths: [string*]
0269     @returns: the ascription association list
0270     @rtype: [(AscConfig, [(string, string)*])*]
0271     """
0272 
0273     aconfs_by_cfgpath = {}
0274     catpaths_by_cfgpath = {}
0275     for catpath in catpaths:
0276         # Look for the first config file up the directory tree.
0277         parent = os.path.dirname(os.path.abspath(catpath))
0278         cfgpath = None
0279         while True:
0280             for cfgname in ("ascription-config", "ascribe"):
0281                 test_cfgpath = os.path.join(parent, cfgname)
0282                 if os.path.isfile(test_cfgpath):
0283                     cfgpath = test_cfgpath
0284                     break
0285             if cfgpath:
0286                 break
0287             pparent = parent
0288             parent = os.path.dirname(parent)
0289             if parent == pparent:
0290                 break
0291         if not cfgpath:
0292             raise PologyError(
0293                 _("@info",
0294                   "Cannot find ascription configuration for '%(file)s'.",
0295                   file=catpath))
0296         cfgpath = join_ncwd(cfgpath) # for nicer message output
0297         aconf = aconfs_by_cfgpath.get(cfgpath)
0298         if not aconf:
0299             # New config, load.
0300             aconf = AscConfig(cfgpath)
0301             aconfs_by_cfgpath[cfgpath] = aconf
0302             catpaths_by_cfgpath[cfgpath] = []
0303         catpaths = catpaths_by_cfgpath.get(cfgpath)
0304 
0305         # If this catalog is under ascription,
0306         # determine path to ascription catalog.
0307         # Ignore it otherwise.
0308         relcatpath = _relpath(catpath, aconf.catroot)
0309         if relcatpath is not None:
0310             acatpath = join_ncwd(aconf.ascroot, relcatpath)
0311             catpath = join_ncwd(catpath)
0312             catpaths.append((catpath, acatpath))
0313 
0314     # Link config objects and catalog paths.
0315     aconfs_catpaths = []
0316     for cfgpath in sorted(aconfs_by_cfgpath):
0317         aconfs_catpaths.append((aconfs_by_cfgpath[cfgpath],
0318                                 catpaths_by_cfgpath[cfgpath]))
0319 
0320     return aconfs_catpaths
0321 
0322 
0323 def _relpath (path, dirpath):
0324 
0325     absdirpath = os.path.abspath(dirpath)
0326     lenadpath = len(absdirpath)
0327     lenadpathws = lenadpath + len(os.path.sep)
0328     abspath = os.path.abspath(path)
0329     p = abspath.find(absdirpath)
0330     if p == 0 and abspath[lenadpath:lenadpathws] == os.path.sep:
0331         return abspath[lenadpathws:]
0332     else:
0333         return None
0334 
0335 
0336 # -----------------------------------------------------------------------------
0337 # Reading ascriptions.
0338 
0339 # FIXME: Factor out into message module.
0340 _id_fields = (
0341     "msgctxt", "msgid",
0342 )
0343 _nonid_fields = (
0344     "msgid_plural", "msgstr",
0345 )
0346 _fields_previous = (
0347     "msgctxt_previous", "msgid_previous", "msgid_plural_previous",
0348 )
0349 _fields_current = (
0350     "msgctxt", "msgid", "msgid_plural",
0351 )
0352 _fields_comment = (
0353     "manual_comment", "auto_comment",
0354 )
0355 _multiple_fields = (()
0356     + ("msgstr",)
0357     + _fields_comment
0358 )
0359 _nonid_fields_eq_nonfuzzy = (()
0360     + _nonid_fields
0361     + ("manual_comment",)
0362 )
0363 _nonid_fields_eq_fuzzy = (()
0364     + _nonid_fields_eq_nonfuzzy
0365     + _fields_previous
0366 )
0367 _translator_parts = (
0368     "manual_comment", "fuzzy", "msgstr",
0369 )
0370 
0371 # FIXME: ...but this stays here.
0372 _nonid_fields_tracked = (()
0373     + _nonid_fields
0374     + _fields_previous
0375     + ("manual_comment",)
0376 )
0377 
0378 
0379 def collect_ascription_history (msg, acat, aconf,
0380                                 nomrg=False, hfilter=None, shallow=False,
0381                                 addrem=None):
0382     """
0383     Collect ascription history of a message.
0384 
0385     The ascription history of C{msg} is collected from
0386     the ascription catalog C{acat},
0387     falling under the ascription configuration C{aconf}.
0388     The ascription history is a list of L{AscPoint} objects,
0389     ordered from the newest to the oldest by date of ascription.
0390 
0391     Some ascription points may be due to merging with template,
0392     when the ascriptions on a catalog were made just after merging.
0393     In many cases of examining the history these ascriptions are not useful,
0394     so they can be removed by setting C{nomrg} to C{True}.
0395 
0396     Sometimes it may be convenient to operate on history in which
0397     the translations of historical messages have been filtered,
0398     and this filter can be specified with C{hfilter}.
0399     If under filter two consecutive historical messages become equal,
0400     one of them will be eliminated from the history.
0401 
0402     History normally extends in the past through merging with templates
0403     (think of a paragraph-length message in which only one word was changed),
0404     so it may contain messages with keys different from the current message
0405     from some point and onwards. If only the history up to the earliest
0406     message with equal key is desired, C{shallow} can be set to C{True}.
0407 
0408     Sometimes it may be convenient to operate on I{incremental} history,
0409     in which every historical message is actually a partial difference
0410     (added, removed or equal segments) from the previous historical message.
0411     This can be requested by setting C{addrem} to one of the values
0412     as described in L{msg_diff<diff.msg_diff>} function.
0413 
0414     @param msg: the message from the original catalog
0415     @type msg: L{Message_base<message.Message_base>}
0416     @param acat: the ascription catalog corresponding to the original catalog
0417     @type acat: L{Catalog<catalog.Catalog>}
0418     @param aconf: the ascription configuration which covers the catalogs
0419     @type aconf: L{AscConfig}
0420     @param nomrg: whether to eliminate from history pure merge ascriptions
0421     @type nomrg: bool
0422     @param hfilter: the filter to apply to C{msgstr} fields of
0423         historical messages
0424     @type hfilter: (string)->string
0425     @param shallow: whether to collect history only up to
0426         last historical message with same key
0427     @type shallow: bool
0428     @param addrem: make each historical message an incremental difference
0429         from the first earlier historical message; see same-name parameter
0430         of L{msg_diff<diff.msg_diff>} for possible values
0431     @type addrem: string
0432 
0433     @returns: the ascription history
0434     @rtype: [AscPoint*]
0435     """
0436 
0437     ahist = _collect_ascription_history_w(msg, acat, aconf, None, set(),
0438                                           shallow)
0439 
0440     # If the message is not ascribed,
0441     # add it in front as modified by unknown user.
0442     if not ahist or not ascription_equal(msg, ahist[0].msg):
0443         a = AscPoint()
0444         a.type = AscPoint.ATYPE_MOD
0445         a.user = None
0446         a.msg = msg
0447         ahist.insert(0, a)
0448 
0449     # Equip ascriptions with position markers,
0450     # to be able to see gaps possibly introduced by removals.
0451     pos = 1
0452     for a in ahist:
0453         a.pos = pos
0454         pos += 1
0455 
0456     # Eliminate clean merges from history.
0457     if nomrg:
0458         ahist_r = []
0459         for i in range(len(ahist) - 1):
0460             a, ao = ahist[i], ahist[i + 1]
0461             if (   a.type != AscPoint.ATYPE_MOD
0462                 or not merge_modified(ao.msg, a.msg)
0463             ):
0464                 ahist_r.append(a)
0465         ahist_r.append(ahist[-1])
0466         ahist = ahist_r
0467 
0468     # Eliminate contiguous chain of modifications equal under the filter,
0469     # except for the earliest in the chain.
0470     # (After elimination of clean merges.)
0471     if hfilter:
0472         def flt (msg):
0473             msg = MessageUnsafe(msg)
0474             msg.msgstr = list(map(hfilter, msg.msgstr))
0475             return msg
0476         ahist_r = []
0477         a_prevmod = None
0478         ahist.reverse()
0479         for a in ahist:
0480             if (   a.type != AscPoint.ATYPE_MOD or not a_prevmod
0481                 or flt(a.msg).inv != a_prevmod.msg.inv
0482             ):
0483                 ahist_r.append(a)
0484                 if a.type == AscPoint.ATYPE_MOD:
0485                     a_prevmod = AscPoint(a)
0486                     a_prevmod.msg = flt(a.msg)
0487         ahist = ahist_r
0488         ahist.reverse()
0489 
0490     # Reduce history to particular segments of diffs between modifications.
0491     # (After filtering).
0492     if addrem:
0493         a_nextmod = None
0494         for a in ahist:
0495             if a.type == AscPoint.ATYPE_MOD:
0496                 if a_nextmod is not None:
0497                     msg_ediff(a.msg, a_nextmod.msg, emsg=a_nextmod.msg,
0498                               addrem=addrem)
0499                 a_nextmod = a
0500 
0501     return ahist
0502 
0503 
0504 def _collect_ascription_history_w (msg, acat, aconf, before, seenmsg,
0505                                    shallow=False):
0506 
0507     ahist = []
0508 
0509     # Avoid circular paths.
0510     if msg.key in seenmsg:
0511         return ahist
0512     seenmsg.add(msg.key)
0513 
0514     # Collect history from current ascription message.
0515     if msg in acat:
0516         amsg = acat[msg]
0517         for a in collect_ascription_history_segment(amsg, acat, aconf):
0518             if not before or a.date <= before.date:
0519                 ahist.append(a)
0520 
0521     if shallow:
0522         return ahist
0523 
0524     # Continue into the past by pivoting around earliest message if fuzzy.
0525     amsg = ahist[-1].msg if ahist else msg
0526     if amsg.fuzzy and amsg.msgid_previous:
0527         pmsg = MessageUnsafe()
0528         for field in _id_fields:
0529             setattr(pmsg, field, amsg.get(field + "_previous"))
0530         # All ascriptions beyond the pivot must be older than the oldest so far.
0531         after = ahist and ahist[-1] or before
0532         ct_ahist = _collect_ascription_history_w(pmsg, acat, aconf, after,
0533                                                  seenmsg)
0534         ahist.extend(ct_ahist)
0535 
0536     return ahist
0537 
0538 
0539 def collect_ascription_history_segment (amsg, acat, aconf):
0540     """
0541     Collect a segment of an ascription history.
0542 
0543     C{amsg} is an ascription message from the ascription catalog C{acat},
0544     falling under the ascription configuration C{aconf},
0545     and it contains a part of the ascription history of some message.
0546     This function is used to get only that part of the ascription history.
0547     The ascription history segment is a list of L{AscPoint} objects,
0548     ordered from the newest to the oldest by date of ascription.
0549 
0550     @param amsg: the ascription message from the ascription catalog
0551     @type amsg: L{Message_base<message.Message_base>}
0552     @param acat: the ascription catalog
0553     @type acat: L{Catalog<catalog.Catalog>}
0554     @param aconf: the ascription configuration which covers the catalogs
0555     @type aconf: L{AscConfig}
0556 
0557     @returns: the ascription history segment
0558     @rtype: [AscPoint*]
0559     """
0560 
0561     ahist = []
0562     spos = dict([(field, [0]) for field in _nonid_fields_tracked])
0563     pvals = dict([(field, [[]]) for field in _nonid_fields_tracked])
0564     for aflds in _parse_ascription_fields(amsg, acat, aconf):
0565         a = AscPoint()
0566         a.user, a.type, a.tag, a.date, a.slen, a.fuzz, a.obs = aflds
0567         if a.slen: # separator existing, reconstruct the fields
0568             shead = _field_separator_head(a.slen)
0569             pmsg = MessageUnsafe()
0570             for field in _id_fields:
0571                 setattr(pmsg, field, amsg.get(field))
0572             for field in _nonid_fields_tracked:
0573                 amsg_seq = _get_as_sequence(amsg, field)
0574                 pmsg_seq = []
0575                 for i in range(len(amsg_seq)):
0576                     aval = amsg_seq[i]
0577                     pval = _amsg_step_value(aval, shead, "\n",
0578                                             spos[field], pvals[field], i)
0579                     # ...do not break if None, has to roll all spos items
0580                     if pval is not None:
0581                         while i >= len(pmsg_seq):
0582                             pmsg_seq.append("")
0583                         pmsg_seq[i] = pval
0584                 _set_from_sequence(pmsg_seq, pmsg, field)
0585         else:
0586             pmsg = MessageUnsafe(ahist[-1].msg) # must exist
0587         if a.fuzz:
0588             pmsg.flag.add("fuzzy")
0589         elif "fuzzy" in pmsg.flag:
0590             pmsg.flag.remove("fuzzy")
0591         pmsg.obsolete = a.obs
0592         a.rmsg, a.msg = amsg, pmsg
0593         ahist.append(a)
0594 
0595     # Sort history by date and put it in reverse.
0596     # If several ascriptions have same time stamps, preserve their order.
0597     ahist_ord = list(zip(ahist, list(range(len(ahist)))))
0598     ahist_ord.sort(key=lambda x: (x[0].date, x[1]))
0599     ahist_ord.reverse()
0600     ahist = [x[0] for x in ahist_ord]
0601 
0602     return ahist
0603 
0604 
0605 def _parse_ascription_fields (amsg, acat, aconf):
0606     """
0607     Get ascriptions from given ascription message as list of tuples
0608     C{(user, type, tag, date, seplen, isfuzzy, isobsolete)},
0609     with date being a real C{datetime} object.
0610     """
0611 
0612     ascripts = []
0613     for cmnt in amsg.auto_comment:
0614         p = cmnt.find(":")
0615         if p < 0:
0616             warning_on_msg(_("@info",
0617                              "No type "
0618                              "in ascription comment '%(cmnt)s'.",
0619                              cmnt=cmnt), amsg, acat)
0620             continue
0621         atype = cmnt[:p].strip()
0622         atag = ""
0623         lst = atype.split(_atag_sep, 1)
0624         if len(lst) == 2:
0625             atype = lst[0].strip()
0626             atag = lst[1].strip()
0627         lst = cmnt[p+1:].split("|")
0628         if len(lst) < 2 or len(lst) > 3:
0629             warning_on_msg(_("@info",
0630                              "Wrong number of descriptors "
0631                              "in ascription comment '%(cmnt)s'.",
0632                              cmnt=cmnt), amsg, acat)
0633             continue
0634 
0635         auser = lst.pop(0).strip()
0636         if not auser:
0637             warning_on_msg(_("@info",
0638                              "Malformed user string "
0639                              "in ascription comment '%(cmnt)s'.",
0640                              cmnt=cmnt), amsg, acat)
0641             continue
0642         if auser not in aconf.users:
0643             warning_on_msg(_("@info",
0644                              "Unknown user "
0645                              "in ascription comment '%(cmnt)s'.",
0646                              cmnt=cmnt), amsg, acat)
0647             continue
0648 
0649         datestr = lst.pop(0).strip()
0650         try:
0651             date = parse_datetime(datestr)
0652         except:
0653             warning_on_msg(_("@info",
0654                              "Malformed date string "
0655                              "in ascription comment '%(cmnt)s'.",
0656                              cmnt=cmnt), amsg, acat)
0657             continue
0658 
0659         # States are reset only on modification ascriptions,
0660         # in order to keep them for the following review ascriptions.
0661         if atype == AscPoint.ATYPE_MOD:
0662             isfuzz = False
0663             isobs = False
0664         seplen = 0
0665         if lst:
0666             tmp = lst.pop(0).strip()
0667             if _mark_fuzz in tmp:
0668                 isfuzz = True
0669                 tmp = tmp.replace(_mark_fuzz, "", 1)
0670             if _mark_obs in tmp:
0671                 isobs = True
0672                 tmp = tmp.replace(_mark_obs, "", 1)
0673             if tmp:
0674                 try:
0675                     seplen = int(tmp)
0676                 except:
0677                     warning_on_msg(_("@info",
0678                                      "Malformed separator length "
0679                                      "in ascription comment '%(cmnt)s'.",
0680                                      cmnt=cmnt), amsg, acat)
0681                     continue
0682 
0683         ascripts.append((auser, atype, atag, date, seplen, isfuzz, isobs))
0684 
0685     return ascripts
0686 
0687 
0688 def _amsg_step_value (aval, shead, stail, spos, pvals, i):
0689 
0690     if i >= len(spos):
0691         spos.extend([0] * (i - len(spos) + 1))
0692     if i >= len(pvals):
0693         pvals.extend([[] for x in range(i - len(pvals) + 1)])
0694     p0 = spos[i]
0695     p1 = aval.find(shead, p0)
0696     p2 = aval.find(stail, p1 + 1)
0697     if p2 < 0:
0698         p2 = len(aval)
0699     spos[i] = p2 + len(stail)
0700     mods = aval[p1 + len(shead):p2]
0701     if _trsep_mod_eq in mods:
0702         q1 = mods.find(_trsep_mod_eq) + len(_trsep_mod_eq)
0703         q2 = q1
0704         while q2 < len(mods) and mods[q2].isdigit():
0705             q2 += 1
0706         nrev = int(mods[q1:q2])
0707         pval = pvals[i][nrev]
0708     else:
0709         if _trsep_mod_none in mods:
0710             pval = None
0711         else:
0712             pval = aval[p0:p1]
0713     pvals[i].append(pval)
0714     return pval
0715 
0716 
0717 _trsep_head = "|"
0718 _trsep_head_ext = "~"
0719 _trsep_mod_none = "x"
0720 _trsep_mod_eq = "e"
0721 
0722 def _field_separator_head (length):
0723 
0724     return _trsep_head + _trsep_head_ext * length
0725 
0726 
0727 def _needed_separator_length (msg):
0728 
0729     goodsep = False
0730     seplen = 0
0731     while not goodsep:
0732         seplen += 1
0733         sephead = _field_separator_head(seplen)
0734         goodsep = True
0735         for field in _nonid_fields_tracked:
0736             values = msg.get(field)
0737             if values is None:
0738                 continue
0739             if isinstance(values, str):
0740                 values = [values]
0741             for value in values:
0742                 if sephead in value:
0743                     goodsep = False
0744                     break
0745             if not goodsep:
0746                 break
0747 
0748     return seplen
0749 
0750 
0751 def _get_as_sequence (msg, field, asc=True):
0752 
0753     if not asc and not msg.fuzzy and field in _fields_previous:
0754         # Ignore previous fields on non-ascription messages without fuzzy flag.
0755         return []
0756 
0757     msg_seq = msg.get(field)
0758     if msg_seq is None:
0759         msg_seq = []
0760     elif field not in _multiple_fields:
0761         msg_seq = [msg_seq]
0762     elif field in _fields_comment:
0763         # Report comments as a single newline-delimited entry.
0764         if msg_seq:
0765             msg_seq = ["\n".join(msg_seq)]
0766 
0767     return msg_seq
0768 
0769 
0770 def _set_from_sequence (msg_seq, msg, field):
0771 
0772     if field not in _multiple_fields:
0773         # Single entry; set to given, or to None if no elements.
0774         msg_val = None
0775         if msg_seq:
0776             msg_val = msg_seq[0]
0777         multiple = False
0778     elif field in _fields_comment:
0779         # Comments treated as single newline-delimited entries; split.
0780         msg_val = []
0781         if msg_seq:
0782             msg_val = msg_seq[0].split("\n")
0783         multiple = True
0784     else:
0785         # Straight sequence.
0786         msg_val = msg_seq
0787         multiple = True
0788 
0789     if multiple and isinstance(msg, Message):
0790         msg_val = Monlist(msg_val)
0791 
0792     setattr(msg, field, msg_val)
0793 
0794 
0795 # -----------------------------------------------------------------------------
0796 # Writing ascriptions.
0797 
0798 
0799 def ascribe_modification (msg, user, dt, acat, aconf):
0800     """
0801     Ascribe message modification.
0802 
0803     @param msg: modified message which is being ascribed
0804     @type msg: L{Message_base<message.Message_base>}
0805     @param user: user to whom the ascription is made
0806     @type user: string
0807     @param dt: the time stamp when the ascription is made
0808     @type dt: datetime.datetime
0809     @param acat: the ascription catalogs
0810     @type acat: L{Catalog<catalog.Catalog>}
0811     @param aconf: the ascription configuration
0812     @type aconf: L{AscConfig}
0813     """
0814 
0815     _ascribe_any(msg, user, acat, AscPoint.ATYPE_MOD, [], aconf, dt)
0816 
0817 
0818 def ascribe_review (msg, user, dt, tags, acat, aconf):
0819     """
0820     Ascribe message review.
0821 
0822     @param msg: reviewed message which is being ascribed
0823     @type msg: L{Message_base<message.Message_base>}
0824     @param user: user to whom the ascription is made
0825     @type user: string
0826     @param dt: the time stamp when the ascription is made
0827     @type dt: datetime.datetime
0828     @param tags: review tags
0829     @type tags: [string*]
0830     @param acat: the ascription catalogs
0831     @type acat: L{Catalog<catalog.Catalog>}
0832     @param aconf: the ascription configuration
0833     @type aconf: L{AscConfig}
0834     """
0835 
0836     _ascribe_any(msg, user, acat, AscPoint.ATYPE_REV, tags, aconf, dt)
0837 
0838 
0839 _atag_sep = "/"
0840 _mark_fuzz = "f"
0841 _mark_obs = "o"
0842 
0843 def _ascribe_any (msg, user, acat, atype, atags, aconf, dt=None):
0844 
0845     # Create or retrieve ascription message.
0846     if msg not in acat:
0847         # Copy ID elements of the original message.
0848         amsg = Message()
0849         for field in _id_fields:
0850             setattr(amsg, field, getattr(msg, field))
0851         # Append to the end of catalog.
0852         acat.add_last(amsg)
0853     else:
0854         # Retrieve existing ascription message.
0855         amsg = acat[msg]
0856 
0857     # Reconstruct historical messages, from first to last.
0858     rahist = collect_ascription_history_segment(amsg, acat, aconf)
0859     rahist.reverse()
0860 
0861     # Do any of non-ID elements differ to last historical message?
0862     if rahist:
0863         hasdiff_state = rahist[-1].msg.state() != msg.state()
0864         hasdiff_nonid = _has_nonid_diff(rahist[-1].msg, msg)
0865     else:
0866         hasdiff_nonid = True
0867         hasdiff_state = True
0868     hasdiff = hasdiff_nonid or hasdiff_state
0869 
0870     # Add ascription comment.
0871     modstr = user + " | " + format_datetime(dt, wsec=True)
0872     modstr_wsep = modstr
0873     if hasdiff:
0874         wsep = ""
0875         if hasdiff_nonid:
0876             seplen = _needed_separator_length(msg)
0877             wsep += str(seplen)
0878         if msg.obsolete:
0879             wsep += _mark_obs
0880         if msg.fuzzy:
0881             wsep += _mark_fuzz
0882         if wsep:
0883             modstr_wsep += " | " + wsep
0884     first = True
0885     for atag in atags or [""]:
0886         field = atype
0887         if atag != "":
0888             field += _atag_sep + atag
0889         if first:
0890             _asc_append_field(amsg, field, modstr_wsep)
0891             first = False
0892         else:
0893             _asc_append_field(amsg, field, modstr)
0894 
0895     # Add non-ID fields.
0896     if hasdiff_nonid:
0897         _add_nonid(amsg, msg, seplen, rahist)
0898 
0899     # Update state.
0900     if msg.fuzzy:
0901         amsg.flag.add("fuzzy")
0902     else:
0903         amsg.flag.remove("fuzzy")
0904     if msg.obsolete:
0905         amsg.obsolete = True
0906     else:
0907         amsg.obsolete = False
0908 
0909 
0910 def _has_nonid_diff (pmsg, msg):
0911 
0912     for field in _nonid_fields_tracked:
0913         msg_value = msg.get(field)
0914         if not msg.fuzzy and field in _fields_previous:
0915             # Ignore previous values in messages with no fuzzy flag.
0916             msg_value = None
0917         pmsg_value = pmsg.get(field)
0918         if msg_value != pmsg_value:
0919             return True
0920 
0921     return False
0922 
0923 
0924 def _add_nonid (amsg, msg, slen, rahist):
0925 
0926     shead = _field_separator_head(slen)
0927     nones = [_field_separator_head(x.slen) + _trsep_mod_none
0928              for x in rahist if x.slen]
0929     padnone = "\n".join(nones)
0930 
0931     for field in _nonid_fields_tracked:
0932 
0933         msg_seq = _get_as_sequence(msg, field, asc=False)
0934         amsg_seq = _get_as_sequence(amsg, field)
0935 
0936         # Expand items to length in new message.
0937         for i in range(len(amsg_seq), len(msg_seq)):
0938             amsg_seq.append(padnone)
0939 
0940         # Add to items.
0941         for i in range(len(amsg_seq)):
0942             if i < len(msg_seq):
0943                 nmod = 0
0944                 i_eq = None
0945                 for a in rahist:
0946                     if not a.slen: # no modification in this ascription
0947                         continue
0948                     if i_eq is None:
0949                         msg_seq_p = _get_as_sequence(a.msg, field)
0950                         if i < len(msg_seq_p) and msg_seq[i] == msg_seq_p[i]:
0951                             i_eq = nmod
0952                             # ...no break, need number of modifications.
0953                     nmod += 1
0954                 if i_eq is None:
0955                     add = msg_seq[i] + shead
0956                 else:
0957                     add = shead + _trsep_mod_eq + str(i_eq)
0958             else:
0959                 add = shead + _trsep_mod_none
0960             if amsg_seq[i]:
0961                 amsg_seq[i] += "\n"
0962             amsg_seq[i] += add
0963 
0964         _set_from_sequence(amsg_seq, amsg, field)
0965 
0966 
0967 fld_sep = ":"
0968 
0969 def _asc_append_field (msg, field, value):
0970 
0971     stext = "".join([field, fld_sep, " ", str(value)])
0972     msg.auto_comment.append(stext)
0973 
0974 
0975 # -----------------------------------------------------------------------------
0976 # Utilities for comparing and selecting ascriptions.
0977 
0978 
0979 def ascription_equal (msg1, msg2):
0980     """
0981     Whether two messages are equal from the ascription viewpoint.
0982 
0983     @param msg1: first message
0984     @type msg1: L{Message_base<message.Message_base>}
0985     @param msg2: second message
0986     @type msg2: L{Message_base<message.Message_base>}
0987 
0988     @returns: C{True} if messages are equal, C{False} otherwise
0989     @rtype: bool
0990     """
0991 
0992     if msg1.state() != msg2.state():
0993         return False
0994     if msg1.fuzzy:
0995         check_fields = _nonid_fields_eq_fuzzy
0996     else:
0997         check_fields = _nonid_fields_eq_nonfuzzy
0998     for field in check_fields:
0999         if msg1.get(field) != msg2.get(field):
1000             return False
1001     return True
1002 
1003 
1004 def merge_modified (msg1, msg2):
1005     """
1006     Whether second message may have been derived from first
1007     by merging with templates.
1008 
1009     @param msg1: first message
1010     @type msg1: L{Message_base<message.Message_base>}
1011     @param msg2: second message
1012     @type msg2: L{Message_base<message.Message_base>}
1013 
1014     @returns: C{True} if C{msg2} is derived by merging from C{msg1},
1015         C{False} otherwise
1016     @rtype: bool
1017     """
1018 
1019     # Anything can happen on merge when going from obsolete to current.
1020     if msg1.obsolete and not msg2.obsolete:
1021         return True
1022 
1023     # Manual comments do not change on merge.
1024     if msg1.manual_comment != msg2.manual_comment:
1025         return False
1026 
1027     # Current and previous original fields may have changed on merge,
1028     # depending on whether both messages are fuzzy, or only one, and which.
1029     if msg1.fuzzy == msg2.fuzzy:
1030         fields = msg1.fuzzy and _fields_previous or _fields_current
1031         for field in fields:
1032             if msg1.get(field) != msg2.get(field):
1033                 return False
1034     else:
1035         fields = (msg1.fuzzy and list(zip(_fields_previous, _fields_current))
1036                               or list(zip(_fields_current, _fields_previous)))
1037         for field1, field2 in fields:
1038             if msg1.get(field1) != msg2.get(field2):
1039                 return False
1040 
1041     # Translation does not change on merge, except
1042     # on multiplication/reduction when plurality differs.
1043     if (msg1.msgid_plural is None) != (msg2.msgid_plural is None):
1044         if not msg1.fuzzy and not msg2.fuzzy:
1045             # Plurality cannot change between two non-fuzzy messages.
1046             return False
1047         if msg1.msgid_plural is not None:
1048             # Reduction to non-plural.
1049             if msg1.msgstr[0] != msg2.msgstr[0]:
1050                 return False
1051         else:
1052             # Multiplication to plural.
1053             for msgstr in msg2.msgstr:
1054                 if msgstr != msg1.msgstr[0]:
1055                     return False
1056     else:
1057         if msg1.msgstr != msg2.msgstr:
1058             return False
1059 
1060     return True
1061 
1062 
1063 def first_non_fuzzy (ahist, start=0):
1064     """
1065     Find first non fuzzy message in the ascription history.
1066 
1067     @param ahist: the ascription history
1068     @type ahist: [AscPoint*]
1069     @param start: position in history to start searching from
1070     @type start: int
1071 
1072     @returns: index of first non-fuzzy message, or None if there is none such
1073     @rtype: int
1074     """
1075 
1076     for i in range(start, len(ahist)):
1077         hmsg = ahist[i].msg
1078         if hmsg and not hmsg.fuzzy:
1079             return i
1080 
1081     return None
1082 
1083 
1084 def has_tracked_parts (msg):
1085     """
1086     Check whether the message has any parts which are tracked for ascription.
1087 
1088     For example, a pristine untranslated message is considered to have
1089     no tracked parts.
1090 
1091     @returns: C{True} if there are any tracked parts, C{False} otherwise
1092     @rtype: bool
1093     """
1094 
1095     for part in _nonid_fields_tracked:
1096         pval = msg.get(part)
1097         if part not in _multiple_fields:
1098             if pval is not None and part != "msgid_plural":
1099                 return True
1100         else:
1101             if part == "msgstr":
1102                 for pval1 in pval:
1103                     if pval1:
1104                         return True
1105             elif pval:
1106                 return True
1107 
1108     return False
1109 
1110 
1111 # -----------------------------------------------------------------------------
1112 # Argument parsing for selectors.
1113 
1114 def parse_users (userspec, aconf):
1115     """
1116     Parse ascription user specification.
1117 
1118     The user specification is a comma-separated list of user names.
1119     If the list starts with tilde (~), all users defined in
1120     the ascription configuration but for those listed
1121     will be selected (inverted selection).
1122 
1123     If an undefined user (according to ascription configuration) is mentioned,
1124     an exception is raised.
1125 
1126     @param userspec: the user specification
1127     @type userspec: string
1128     @param aconf: the ascription configuration
1129     @type aconf: L{AscConfig}
1130 
1131     @returns: selected user names
1132     @rtype: set(string*)
1133     """
1134 
1135     return _parse_fixed_set(userspec, aconf, aconf.users,
1136                             t_("@info",
1137                                "User '%(name)s' not defined in '%(file)s'."))
1138 
1139 
1140 def parse_review_tags (tagspec, aconf):
1141     """
1142     Parse review tag specification.
1143 
1144     The tag specification is a comma-separated list of tags.
1145     If the list starts with tilde (~), all review tags defined in
1146     the ascription configuration but for those listed
1147     will be selected (inverted selection).
1148 
1149     If an undefined tag (according to ascription configuration) is mentioned,
1150     an exception is raised.
1151 
1152     @param tagspec: the review tag specification
1153     @type tagspec: string
1154     @param aconf: the ascription configuration
1155     @type aconf: L{AscConfig}
1156 
1157     @returns: selected review tags
1158     @rtype: set(string*)
1159     """
1160 
1161     tags = _parse_fixed_set(tagspec, aconf, aconf.revtags,
1162                             t_("@info",
1163                                "Review tag '%(name)s' "
1164                                "not defined in '%(file)s'."))
1165     if not tags:
1166         tags = set([""])
1167 
1168     return tags
1169 
1170 
1171 def _parse_fixed_set (elstr, aconf, knownels, errfmt):
1172 
1173     if not elstr:
1174         return set()
1175 
1176     elstr = elstr.replace(" ", "")
1177     inverted = False
1178     if elstr.startswith("~"):
1179         inverted = True
1180         elstr = elstr[1:]
1181 
1182     els = set(elstr.split(","))
1183     for el in els:
1184         if el not in knownels:
1185             raise PologyError(
1186                 errfmt.with_args(name=el, file=aconf.path).to_string())
1187     if inverted:
1188         els = set(knownels).difference(els)
1189 
1190     return els
1191 
1192 # -----------------------------------------------------------------------------
1193 # Caching for selectors.
1194 
1195 _cache = {}
1196 
1197 def cached_matcher (expr):
1198     """
1199     Fetch a cached message matcher for the given expression,
1200     for use in ascription selectors.
1201 
1202     When this function is called for the first time on a new expression,
1203     the matcher function is created and cached.
1204     On subsequent invocations with the same expression,
1205     the matcher is fetched from the cache rather than created anew.
1206 
1207     @param expr: the matching expression; see
1208         L{make_msg_matcher<match.make_msg_matcher>} for details
1209     @type expr: string
1210 
1211     @returns: the matcher function
1212     @rtype: (L{Message_base<message.Message_base>},
1213         L{Catalog<catalog.Catalog>})->bool
1214     """
1215 
1216     key = ("matcher", expr)
1217     if key not in _cache:
1218         _cache[key] = make_msg_fmatcher(expr, abort=True)
1219 
1220     return _cache[key]
1221 
1222 
1223 def cached_users (userspec, aconf, utype=None):
1224     """
1225     Fetch a cached set of users for the given user specification,
1226     for use in ascription selectors.
1227 
1228     When this function is called for the first time on a new combination
1229     of user specification C{userspec}, ascription configuration C{aconf},
1230     and "user type" C{utype}, the specification is parsed and users collected.
1231     On subsequent invocations with the same combination,
1232     the user set is fetched from the cache rather than created anew.
1233     C{utype} is actually just an arbitrary string,
1234     for when you need to cache users by different categories.
1235 
1236     @param userspec: the user specification; see L{parse_users} for details
1237     @type userspec: string
1238     @param aconf: the ascription configuration
1239     @type aconf: L{AscConfig}
1240     @param utype: user type
1241     @type utype: string
1242 
1243     @returns: the set of users
1244     @rtype: set(string*)
1245     """
1246 
1247     key = ("users", userspec, aconf, utype)
1248     if key not in _cache:
1249         _cache[key] = parse_users(userspec, aconf)
1250 
1251     return _cache[key]
1252 
1253 
1254 def cached_review_tags (tagspec, aconf):
1255     """
1256     Fetch a cached set of review tags for the given tag specification,
1257     for use in ascription selectors.
1258 
1259     When this function is called for the first time on a new combination
1260     of tag specification C{tagspec} and ascription configuration C{aconf},
1261     the specification is parsed and tags collected.
1262     On subsequent invocations with the same combination,
1263     the tag set is fetched from the cache rather than created anew.
1264 
1265     @param tagspec: the tag specification; see L{parse_review_tags} for details
1266     @type tagspec: string
1267     @param aconf: the ascription configuration
1268     @type aconf: L{AscConfig}
1269 
1270     @returns: the set of tags
1271     @rtype: set(string*)
1272     """
1273 
1274     key = ("tags", tagspec, aconf)
1275     if key not in _cache:
1276         _cache[key] = parse_review_tags(tagspec, aconf)
1277 
1278     return _cache[key]
1279 
1280 
1281 # -----------------------------------------------------------------------------
1282 # Making selectors.
1283 
1284 # Build compound selector out of list of specifications.
1285 # Selector specification is a string in format NAME:ARG1:ARG2:...
1286 # (instead of colon, separator can be any non-alphanumeric excluding
1287 # underscore and hyphen)
1288 def make_ascription_selector (selspecs, hist=False):
1289     """
1290     Build compound ascription selector out of string specifications
1291     of basic selectors.
1292 
1293     Selector specification string has the format NAME:ARG1:ARG2:...
1294     Instead of colon, separator can be any non-alphanumeric character
1295     used consistently, except for underscore and hyphen.
1296     The compound selector is obtained by constructing each
1297     basic selector according to the specification in turn,
1298     and linking them with AND-boolean semantics.
1299 
1300     Parameter C{hist} determines whether the compound selector should
1301     be a shallow selector (C{True}) or a history selector (C{False}).
1302     If a history selector is required but cannot be made from
1303     the given composition of basic selectors, an exception is raised.
1304 
1305     @param selspecs: specifications of basic selectors
1306     @type selspecs: [string*]
1307     @param hist: C{True} if the compound selector should be history selector,
1308         C{False} if it should be shallow selector
1309     @type hist: bool
1310 
1311     @returns: the compound selector
1312     @rtype: (L{Message_base<message.Message_base>}, L{Catalog<catalog.Catalog>},
1313         [AscPoint*], L{AscConfig})->bool (shallow),
1314         (...)->int/None (history)
1315     """
1316 
1317     # Component selectors.
1318     selectors = []
1319     for selspec in selspecs:
1320         argsep = ":"
1321         for c in selspec:
1322             if not (c.isalpha() or c.isdigit() or c in ("_", "-")):
1323                 argsep = c
1324                 break
1325         lst = selspec.split(argsep)
1326         sname, sargs = lst[0], lst[1:]
1327         negated = False
1328         if sname.startswith("n"):
1329             sname = sname[1:]
1330             negated = True
1331         sfactory, can_hist = _selector_factories.get(sname, (None, False))
1332         if not sfactory:
1333             raise PologyError(
1334                 _("@info",
1335                   "Unknown selector '%(sel)s'.",
1336                   sel=sname))
1337         if hist:
1338             if not can_hist:
1339                 raise PologyError(
1340                     _("@info",
1341                       "Selector '%(sel)s' cannot be used "
1342                       "as history selector.",
1343                       sel=sname))
1344             if negated:
1345                 raise PologyError(
1346                     _("@info",
1347                       "Negated selectors (here '%(sel)s') cannot be used "
1348                       "as history selectors.",
1349                       sel=sname))
1350         try:
1351             selector = sfactory(sargs)
1352         except PologyError as e:
1353             raise PologyError(
1354                 _("@info",
1355                   "Selector '%(sel)s' not created due to "
1356                   "the following error:\n"
1357                   "%(msg)s",
1358                   sel=selspec, msg=str_to_unicode(str(e))))
1359         if negated:
1360             selector = _negate_selector(selector)
1361         selectors.append((selector, selspec))
1362 
1363     # Compound selector.
1364     if hist:
1365         res0 = None
1366     else:
1367         res0 = False
1368     def cselector (msg, cat, ahist, aconf):
1369         res = res0
1370         for selector, selspec in selectors:
1371             try:
1372                 res = selector(msg, cat, ahist, aconf)
1373             except PologyError as e:
1374                 raise PologyError(
1375                     _("@info",
1376                       "Selector '%(sel)s' failed on message "
1377                       "%(file)s:%(line)d:(#%(entry)d) "
1378                       "with the following error:\n"
1379                       "%(msg)s",
1380                       sel=selspec, file=cat.filename, line=msg.refline,
1381                       entry=msg.refentry, msg=str_to_unicode(str(e))))
1382             if not res:
1383                 return res
1384         return res
1385 
1386     return cselector
1387 
1388 
1389 def _negate_selector (selector):
1390 
1391     def negative_selector (*args):
1392         return not selector(*args)
1393 
1394     return negative_selector
1395 
1396 
1397 _external_mods = {}
1398 
1399 def import_ascription_extensions (modpath):
1400     """
1401     Import extensions to ascription functionality from a Python module.
1402 
1403     Additional selector factories can be introduced by defining
1404     the C{asc_selector_factories} dictionary,
1405     in which the key is the selector name,
1406     and the value a tuple of the selector factory function
1407     and the indicator of whether the selector can be used as
1408     a history selector or not.
1409     For example::
1410 
1411         asc_selector_factories = {
1412             # key: (function, can_be_used_as_history_selector),
1413             "specsel1": (selector_specsel1, True),
1414             "specsel2": (selector_specsel2, False),
1415             ...
1416         }
1417 
1418     @param modpath: path to Python file
1419     @type modpath: string
1420     """
1421 
1422     # Load external module.
1423     try:
1424         with open(unicode_to_str(modpath)) as modfile:
1425             module_code = modfile.read()
1426         # ...unicode_to_str because of exec below.
1427     except IOError:
1428         raise PologyError(
1429             _("@info",
1430               "Cannot load external module '%(file)s'.",
1431               file=modpath))
1432     # Load file into new module.
1433     modname = "mod" + str(len(_external_mods))
1434     xmod = ModuleType(modname)
1435     exec(module_code, xmod.__dict__)
1436     modfile.close()
1437     _external_mods[modname] = xmod # to avoid garbage collection
1438 
1439     # Collect everything collectable from the module.
1440 
1441     xms = []
1442 
1443     xms.append("asc_selector_factories")
1444     selector_factories = getattr(xmod, xms[-1], None)
1445     if selector_factories is not None:
1446         _selector_factories.update(selector_factories)
1447 
1448     # Warn of unknown externals.
1449     known_xms = set(xms)
1450     for xm in [x for x in dir(xmod) if x.startswith("asc_")]:
1451         if xm not in known_xms:
1452             warning(_("@info",
1453                       "Unknown external resource '%(res)s' "
1454                       "in module '%(file)s'.",
1455                       res=xm, file=modpath))
1456 
1457 
1458 # Registry of basic selector factories.
1459 _selector_factories = {
1460     # key: (function, can_be_used_as_history_selector),
1461 }
1462 
1463 # -----------------------------------------------------------------------------
1464 # Internal selector factories.
1465 # Use make_ascription_selector() to create selectors.
1466 
1467 # NOTE:
1468 # Plain selectors should return True or False.
1469 # History selectors should return 1-based index into ascription history
1470 # when the appropriate historical message is found, and 0 otherwise.
1471 # In this way, when it is only necessary to test if a message is selected,
1472 # returns from both types of selectors can be tested for simple falsity/truth,
1473 # and non-zero integer return always indicates history selection.
1474 
1475 def _selector_any (args):
1476 
1477     if len(args) != 0:
1478         raise PologyError(_("@info", "Wrong number of arguments."))
1479 
1480     def selector (msg, cat, ahist, aconf):
1481 
1482         return True
1483 
1484     return selector
1485 
1486 _selector_factories["any"] = (_selector_any, False)
1487 
1488 
1489 def _selector_active (args):
1490 
1491     if len(args) != 0:
1492         raise PologyError(_("@info", "Wrong number of arguments."))
1493 
1494     def selector (msg, cat, ahist, aconf):
1495 
1496         return msg.translated and not msg.obsolete
1497 
1498     return selector
1499 
1500 _selector_factories["active"] = (_selector_active, False)
1501 
1502 
1503 def _selector_current (args):
1504 
1505     if len(args) != 0:
1506         raise PologyError(_("@info", "Wrong number of arguments."))
1507 
1508     def selector (msg, cat, ahist, aconf):
1509 
1510         return not msg.obsolete
1511 
1512     return selector
1513 
1514 _selector_factories["current"] = (_selector_current, False)
1515 
1516 
1517 def _selector_branch (args):
1518 
1519     if len(args) != 1:
1520         raise PologyError(_("@info", "Wrong number of arguments."))
1521     branch = args[0]
1522     if not branch:
1523         raise PologyError(_("@info", "Branch ID must not be empty."))
1524     branches = set(branch.split(","))
1525 
1526     def selector (msg, cat, ahist, aconf):
1527 
1528         return bool(branches.intersection(parse_summit_branches(msg)))
1529 
1530     return selector
1531 
1532 _selector_factories["branch"] = (_selector_branch, False)
1533 
1534 
1535 def _selector_unasc (args):
1536 
1537     if len(args) != 0:
1538         raise PologyError(_("@info", "Wrong number of arguments."))
1539 
1540     def selector (msg, cat, ahist, aconf):
1541 
1542         # Do not consider pristine messages as unascribed.
1543         return ahist[0].user is None and has_tracked_parts(msg)
1544 
1545     return selector
1546 
1547 _selector_factories["unasc"] = (_selector_unasc, False)
1548 
1549 
1550 def _selector_fexpr (args):
1551 
1552     if len(args) != 1:
1553         raise PologyError(_("@info", "Wrong number of arguments."))
1554     expr = args[0]
1555     if not expr:
1556         raise PologyError(_("@info", "Match expression must not be empty."))
1557 
1558     def selector (msg, cat, ahist, aconf):
1559 
1560         matcher = cached_matcher(expr)
1561         return bool(matcher(msg, cat))
1562 
1563     return selector
1564 
1565 _selector_factories["fexpr"] = (_selector_fexpr, False)
1566 
1567 
1568 def _selector_e (args):
1569 
1570     if len(args) != 1:
1571         raise PologyError(_("@info", "Wrong number of arguments."))
1572     entry = args[0]
1573     if not entry or not entry.isdigit():
1574         raise PologyError(
1575             _("@info",
1576               "Message entry number must be a positive integer."))
1577     refentry = int(entry)
1578 
1579     def selector (msg, cat, ahist, aconf):
1580 
1581         return msg.refentry == refentry
1582 
1583     return selector
1584 
1585 _selector_factories["e"] = (_selector_e, False)
1586 
1587 
1588 def _selector_l (args):
1589 
1590     if len(args) != 1:
1591         raise PologyError(_("@info", "Wrong number of arguments."))
1592     line = args[0]
1593     if not line or not line.isdigit():
1594         raise PologyError(
1595             _("@info",
1596               "Message line number must be a positive integer."))
1597     refline = int(line)
1598 
1599     def selector (msg, cat, ahist, aconf):
1600 
1601         return abs(msg.refline - refline) <= 1
1602 
1603     return selector
1604 
1605 _selector_factories["l"] = (_selector_l, False)
1606 
1607 
1608 # Select messages between and including first and last reference by entry.
1609 # If first entry is not given, all messages to the last entry are selected.
1610 # If last entry is not given, all messages from the first entry are selected.
1611 def _selector_espan (args):
1612 
1613     if not 1 <= len(args) <= 2:
1614         raise PologyError(_("@info", "Wrong number of arguments."))
1615     first = args[0]
1616     last = args[1] if len(args) > 1 else ""
1617     if not first and not last:
1618         raise PologyError(
1619             _("@info",
1620               "At least one of the first and last message entry numbers "
1621               "must be given."))
1622     if first and not first.isdigit():
1623         raise PologyError(
1624             _("@info",
1625               "First message entry number must be a positive integer."))
1626     if last and not last.isdigit():
1627         raise PologyError(
1628             _("@info",
1629               "Last message entry number must be a positive integer."))
1630     first_entry = (first and [int(first)] or [None])[0]
1631     last_entry = (last and [int(last)] or [None])[0]
1632 
1633     def selector (msg, cat, ahist, aconf):
1634 
1635         if first_entry is not None and msg.refentry < first_entry:
1636             return False
1637         if last_entry is not None and msg.refentry > last_entry:
1638             return False
1639         return True
1640 
1641     return selector
1642 
1643 _selector_factories["espan"] = (_selector_espan, False)
1644 
1645 
1646 # Select messages between and including first and last reference by line.
1647 # If first line is not given, all messages to the last line are selected.
1648 # If last line is not given, all messages from the first line are selected.
1649 def _selector_lspan (args):
1650 
1651     if not 1 <= len(args) <= 2:
1652         raise PologyError(_("@info", "Wrong number of arguments."))
1653     first = args[0]
1654     last = args[1] if len(args) > 1 else ""
1655     if not first and not last:
1656         raise PologyError(
1657             _("@info",
1658               "At least one of the first and last message line numbers "
1659               "must be given."))
1660     if first and not first.isdigit():
1661         raise PologyError(
1662             _("@info",
1663               "First message line number must be a positive integer."))
1664     if last and not last.isdigit():
1665         raise PologyError(
1666             _("@info",
1667               "Last message line number must be a positive integer."))
1668     first_line = (first and [int(first)] or [None])[0]
1669     last_line = (last and [int(last)] or [None])[0]
1670 
1671     def selector (msg, cat, ahist, aconf):
1672 
1673         if first_line is not None and msg.refline < first_line:
1674             return False
1675         if last_line is not None and msg.refline > last_line:
1676             return False
1677         return True
1678 
1679     return selector
1680 
1681 _selector_factories["lspan"] = (_selector_lspan, False)
1682 
1683 
1684 def _selector_hexpr (args):
1685 
1686     if not 1 <= len(args) <= 3:
1687         raise PologyError(_("@info", "Wrong number of arguments."))
1688     expr = args[0]
1689     user_spec = args[1] if len(args) > 1 else ""
1690     addrem = args[2] if len(args) > 2 else ""
1691     if not expr:
1692         raise PologyError(
1693             _("@info",
1694               "Match expression cannot be empty."))
1695 
1696     def selector (msg, cat, ahist, aconf):
1697 
1698         if ahist[0].user is None:
1699             return 0
1700 
1701         matcher = cached_matcher(expr)
1702         users = cached_users(user_spec, aconf)
1703 
1704         if not addrem:
1705             i = 0
1706         else:
1707             i = first_non_fuzzy(ahist, 0)
1708             if i is None:
1709                 return 0
1710 
1711         while i < len(ahist):
1712             a = ahist[i]
1713             if users and a.user not in users:
1714                 i += 1
1715                 continue
1716 
1717             if not addrem:
1718                 amsg = a.msg
1719                 i_next = i + 1
1720             else:
1721                 i_next = first_non_fuzzy(ahist, i + 1)
1722                 if i_next is not None:
1723                     amsg2 = ahist[i_next].msg
1724                 else:
1725                     amsg2 = MessageUnsafe(a.msg)
1726                     for field in _nonid_fields_tracked:
1727                         amsg2_value = amsg2.get(field)
1728                         if amsg2_value is None:
1729                             pass
1730                         elif isinstance(amsg2_value, str):
1731                             setattr(amsg2, field, None)
1732                         else:
1733                             amsg2_value = [""] * len(amsg2_value)
1734                     i_next = len(ahist)
1735                 amsg = MessageUnsafe(a.msg)
1736                 msg_ediff(amsg2, amsg, emsg=amsg, addrem=addrem)
1737 
1738             if matcher(amsg, cat):
1739                 return i + 1
1740 
1741             i = i_next
1742 
1743         return 0
1744 
1745     return selector
1746 
1747 _selector_factories["hexpr"] = (_selector_hexpr, True)
1748 
1749 
1750 # Select last ascription (any, or by users).
1751 def _selector_asc (args):
1752 
1753     if not 0 <= len(args) <= 1:
1754         raise PologyError(_("@info", "Wrong number of arguments."))
1755     user_spec = args[0] if len(args) > 0 else ""
1756 
1757     def selector (msg, cat, ahist, aconf):
1758 
1759         if ahist[0].user is None:
1760             return 0
1761 
1762         users = cached_users(user_spec, aconf)
1763 
1764         hi_sel = 0
1765         for i in range(len(ahist)):
1766             a = ahist[i]
1767             if not users or a.user in users:
1768                 hi_sel = i + 1
1769                 break
1770 
1771         return hi_sel
1772 
1773     return selector
1774 
1775 _selector_factories["asc"] = (_selector_asc, True)
1776 
1777 
1778 # Select last modification (any or by users).
1779 def _selector_mod (args):
1780 
1781     if not 0 <= len(args) <= 1:
1782         raise PologyError(_("@info", "Wrong number of arguments."))
1783     user_spec = args[0] if len(args) > 0 else ""
1784 
1785     def selector (msg, cat, ahist, aconf):
1786 
1787         if ahist[0].user is None:
1788             return 0
1789 
1790         users = cached_users(user_spec, aconf)
1791 
1792         hi_sel = 0
1793         for i in range(len(ahist)):
1794             a = ahist[i]
1795             if not a.user:
1796                 continue
1797             if a.type == AscPoint.ATYPE_MOD and (not users or a.user in users):
1798                 hi_sel = i + 1
1799                 break
1800 
1801         return hi_sel
1802 
1803     return selector
1804 
1805 _selector_factories["mod"] = (_selector_mod, True)
1806 
1807 
1808 # Select first modification (any or by m-users, and not by r-users)
1809 # after last review (any or by r-users, and not by m-users).
1810 def _selector_modar (args):
1811 
1812     return _w_selector_modax(False, True, args, 3)
1813 
1814 _selector_factories["modar"] = (_selector_modar, True)
1815 
1816 
1817 # Select first modification (any or by m-users, and not by mm-users)
1818 # after last modification (any or by mm-users, and not by m-users).
1819 def _selector_modam (args):
1820 
1821     return _w_selector_modax(True, False, args, 2)
1822 
1823 _selector_factories["modam"] = (_selector_modam, True)
1824 
1825 
1826 # Select first modification (any or by m-users, and not by rm-users)
1827 # after last review or modification (any or by m-users, and not by rm-users).
1828 def _selector_modarm (args):
1829 
1830     return _w_selector_modax(True, True, args, 3)
1831 
1832 _selector_factories["modarm"] = (_selector_modarm, True)
1833 
1834 
1835 # Select first modification of translation
1836 # (any or by m-users, and not by r-users)
1837 # after last review (any or by r-users, and not by m-users).
1838 def _selector_tmodar (args):
1839 
1840     return _w_selector_modax(False, True, args, 3, True)
1841 
1842 _selector_factories["tmodar"] = (_selector_tmodar, True)
1843 
1844 
1845 # Worker for builders of *moda* selectors.
1846 def _w_selector_modax (amod, arev, args, maxnarg, tronly=False):
1847 
1848     if not 0 <= len(args) <= maxnarg:
1849         raise PologyError(_("@info", "Wrong number of arguments."))
1850     muser_spec = args[0] if len(args) > 0 else ""
1851     rmuser_spec = args[1] if len(args) > 1 else ""
1852     atag_spec = args[2] if len(args) > 2 else ""
1853 
1854     def selector (msg, cat, ahist, aconf):
1855 
1856         if ahist[0].user is None:
1857             return 0
1858 
1859         musers = cached_users(muser_spec, aconf, utype="m")
1860         rmusers = cached_users(rmuser_spec, aconf, utype="rm")
1861         atags = cached_review_tags(atag_spec, aconf)
1862 
1863         hi_sel = 0
1864         for i in range(len(ahist)):
1865             a = ahist[i]
1866 
1867             # Check if this message cancels further modifications.
1868             if (    (   (amod and a.type == AscPoint.ATYPE_MOD)
1869                      or (arev and a.type == AscPoint.ATYPE_REV and a.tag in atags))
1870                 and (not rmusers or a.user in rmusers)
1871                 and (not musers or a.user not in musers)
1872             ):
1873                 break
1874 
1875             # Check if this message is admissible modification.
1876             if (    a.type == AscPoint.ATYPE_MOD
1877                 and (not musers or a.user in musers)
1878                 and (not rmusers or a.user not in rmusers)
1879             ):
1880                 # Cannot be a candidate if in translation-only mode and
1881                 # there is no difference in translation to earlier message.
1882                 ae = ahist[i + 1] if i + 1 < len(ahist) else None
1883                 if not (tronly and ae and ae.msg.msgstr == a.msg.msgstr):
1884                     hi_sel = i + 1
1885 
1886         return hi_sel
1887 
1888     return selector
1889 
1890 
1891 # Select last review (any or by users).
1892 def _selector_rev (args):
1893 
1894     if not 0 <= len(args) <= 2:
1895         raise PologyError(_("@info", "Wrong number of arguments."))
1896     user_spec = args[0] if len(args) > 0 else ""
1897     atag_spec = args[1] if len(args) > 1 else ""
1898 
1899     def selector (msg, cat, ahist, aconf):
1900 
1901         if ahist[0].user is None:
1902             return 0
1903 
1904         users = cached_users(user_spec, aconf)
1905         atags = cached_review_tags(atag_spec, aconf)
1906 
1907         hi_sel = 0
1908         for i in range(len(ahist)):
1909             a = ahist[i]
1910             if (    a.type == AscPoint.ATYPE_REV and a.tag in atags
1911                 and (not users or a.user in users)
1912             ):
1913                 hi_sel = i + 1
1914                 break
1915 
1916         return hi_sel
1917 
1918     return selector
1919 
1920 _selector_factories["rev"] = (_selector_rev, True)
1921 
1922 
1923 # Select first review (any or by r-users, and not by m-users)
1924 # before last modification (any or by m-users, and not by r-users).
1925 def _selector_revbm (args):
1926 
1927     if not 0 <= len(args) <= 3:
1928         raise PologyError(_("@info", "Wrong number of arguments."))
1929     ruser_spec = args[0] if len(args) > 0 else ""
1930     muser_spec = args[1] if len(args) > 1 else ""
1931     atag_spec = args[2] if len(args) > 2 else ""
1932 
1933     def selector (msg, cat, ahist, aconf):
1934 
1935         if ahist[0].user is None:
1936             return 0
1937 
1938         rusers = cached_users(ruser_spec, aconf, utype="r")
1939         musers = cached_users(muser_spec, aconf, utype="m")
1940         atags = cached_review_tags(atag_spec, aconf)
1941 
1942         hi_sel = 0
1943         can_select = False
1944         for i in range(len(ahist)):
1945             a = ahist[i]
1946             if (     a.type == AscPoint.ATYPE_MOD
1947                 and (not musers or a.user in musers)
1948                 and (not rusers or a.user not in rusers)
1949             ):
1950                 # Modification found, enable selection of review.
1951                 can_select = True
1952             if (    a.type == AscPoint.ATYPE_REV and a.tag in atags
1953                 and (not rusers or a.user in rusers)
1954                 and (not musers or a.user not in musers)
1955             ):
1956                 # Review found, select it if enabled, and stop anyway.
1957                 if can_select:
1958                     hi_sel = i + 1
1959                 break
1960 
1961         return hi_sel
1962 
1963     return selector
1964 
1965 _selector_factories["revbm"] = (_selector_revbm, True)
1966 
1967 
1968 # Select first modification (any or by users) at or after given time.
1969 def _selector_modafter (args):
1970 
1971     if not 0 <= len(args) <= 2:
1972         raise PologyError(_("@info", "Wrong number of arguments."))
1973     time_spec = args[0] if len(args) > 0 else ""
1974     user_spec = args[1] if len(args) > 1 else ""
1975     if not time_spec:
1976         raise PologyError(
1977             _("@info",
1978               "Time specification cannot be empty."))
1979 
1980     date = parse_datetime(time_spec)
1981 
1982     def selector (msg, cat, ahist, aconf):
1983 
1984         if ahist[0].user is None:
1985             return 0
1986 
1987         users = cached_users(user_spec, aconf)
1988 
1989         hi_sel = 0
1990         for i in range(len(ahist) - 1, -1, -1):
1991             a = ahist[i]
1992             if (    a.type == AscPoint.ATYPE_MOD
1993                 and (not users or a.user in users)
1994                 and a.date >= date
1995             ):
1996                 hi_sel = i + 1
1997                 break
1998 
1999         return hi_sel
2000 
2001     return selector
2002 
2003 _selector_factories["modafter"] = (_selector_modafter, True)
2004