File indexing completed on 2024-11-03 11:24:03

0001 # -*- coding: UTF-8 -*-
0002 
0003 """
0004 Derive forms and properties of syntagmas by macro expansion.
0005 
0006 This module provides facilities for macro derivations on syntagmas.
0007 It consists of two elements: the text format for defining macro derivations,
0008 and the derivator class which reads and processes these definitions.
0009 The derivator class is documented within this module,
0010 while the syntax and semantics of syntagma derivations are documented
0011 in the user manual, at C{doc/user/lingo.docbook#sec-lgsynder}.
0012 
0013 @author: Chusslove Illich (Часлав Илић) <caslav.ilic@gmx.net>
0014 @license: GPLv3
0015 """
0016 
0017 import copy
0018 import pickle as pickle
0019 import hashlib
0020 import locale
0021 import os
0022 import re
0023 
0024 from pology import PologyError, _, n_
0025 from pology.fsops import str_to_unicode
0026 from pology.normalize import simplify
0027 from pology.report import warning, format_item_list
0028 from pology.resolve import first_to_upper, first_to_lower
0029 
0030 
0031 # ----------------------------------------
0032 # Error handling.
0033 
0034 class SynderError (PologyError):
0035 
0036     def __init__ (self, message, code, source=None, pos=None):
0037         """
0038         Constructor.
0039 
0040         All the parameters are made available as instance variables.
0041 
0042         @param message: description of what went wrong
0043         @type message: string
0044         @param code: numerical ID of the problem
0045         @type code: int
0046         @param source: name of the source in which the problem occured
0047         @type source: string
0048         @param pos: line or line and column in the source
0049             in which the problem occured
0050         @type pos: int or (int, int)
0051         """
0052 
0053         self.message = message
0054         self.code = code
0055         self.source = source
0056         if isinstance(pos, tuple):
0057             self.line, self.col = pos
0058         else:
0059             self.line = pos
0060             self.col = None
0061 
0062         PologyError.__init__(self, str(self))
0063 
0064 
0065     def __unicode__ (self):
0066 
0067         if self.source is None:
0068             s = _("@info context of error",
0069                   "[synder-%(code)d]: %(msg)s",
0070                   code=self.code, msg=self.message)
0071         elif self.line is None:
0072             s = _("@info context of error",
0073                   "[synder-%(code)d] in %(source)s: %(msg)s",
0074                   code=self.code, msg=self.message, source=self.source)
0075         elif self.col is None:
0076             s = _("@info context of error",
0077                   "[synder-%(code)d] at %(source)s:%(line)d: %(msg)s",
0078                   code=self.code, msg=self.message, source=self.source,
0079                   line=self.line)
0080         else:
0081             s = _("@info context of error",
0082                   "[synder-%(code)d] at %(source)s:%(line)d:%(col)d: %(msg)s",
0083                   code=self.code, msg=self.message, source=self.source,
0084                   line=self.line, col=self.col)
0085 
0086         return str(s)
0087 
0088 
0089 # ----------------------------------------
0090 # Caching.
0091 
0092 # Cache for file sources, by absolute path.
0093 _parsed_sources = {}
0094 
0095 
0096 def empty_source_cache ():
0097     """
0098     Clear all cached sources.
0099 
0100     When file with derivations is loaded, its parsed form is cached,
0101     such that future load instructions on that same path
0102     (e.g. when the path is included from another file)
0103     do not waste any extra time and memory.
0104     This function erases all sources from the cache,
0105     when loading files anew on future load instructions is desired.
0106     """
0107 
0108     _parsed_sources.clear()
0109 
0110 
0111 # ----------------------------------------
0112 # Parsing.
0113 
0114 _ch_escape          = "\\"
0115 _ch_comment         = "#"
0116 _ch_props           = ":"
0117 _ch_env             = "@"
0118 _ch_ksyn_hd         = "|"
0119 _ch_prop_sep        = ","
0120 _ch_pkey_sep        = "&"
0121 _ch_pval            = "="
0122 _ch_exp             = "|"
0123 _ch_cutprop         = "!"
0124 _ch_termprop        = "."
0125 _ch_remprop         = "^"
0126 _ch_exp_mask        = "~"
0127 _ch_exp_mask_pl     = "."
0128 _ch_exp_kext        = "%"
0129 _ch_exp_kext_pl     = "*"
0130 _ch_exp_upc         = "^"
0131 _ch_exp_lwc         = "`"
0132 _ch_tag             = "~"
0133 _ch_tag_sep         = "&"
0134 _ch_grp_opn         = "{"
0135 _ch_grp_cls         = "}"
0136 _ch_inc             = ">"
0137 
0138 _strict_ws = " \t\n" #set((" ", "\t", "\n"))
0139 _ch_nl = "\n"
0140 
0141 
0142 def _parse_string_w (instr, srcname):
0143 
0144     ctx = _ctx_void
0145     dobj = _SDSource(srcname)
0146     ctx_stack = []
0147 
0148     pos = 0
0149     bpos = (1, 1)
0150     while True:
0151         handler = _ctx_handlers[ctx]
0152         nctx, ndobj, descend, pos, bpos = handler(dobj, instr, pos, bpos)
0153         if nctx is not None:
0154             if descend:
0155                 ctx_stack.append((ctx, dobj))
0156             ctx, dobj = nctx, ndobj
0157         elif ctx_stack:
0158             ctx, dobj = ctx_stack.pop()
0159         else:
0160             return dobj
0161 
0162 
0163 _anonsrc_count = [0]
0164 
0165 def _parse_string (instr, srcname=None):
0166 
0167     # Try to return parsed source from cache.
0168     if srcname in _parsed_sources:
0169         return _parsed_sources[srcname]
0170 
0171     if srcname is None:
0172         srcname = _("@item automatic name for anonymous input stream",
0173                     "&lt;stream-%(num)s&gt;",
0174                     num=_anonsrc_count[0]).resolve("none")
0175         _anonsrc_count[0] += 1
0176 
0177     source = _parse_string_w(instr, srcname)
0178 
0179     # Cache the source by name (before procesing includes).
0180     _parsed_sources[srcname] = source
0181 
0182     # Load included sources.
0183     source.incsources = _include_sources(source, source.incsources)
0184 
0185     return source
0186 
0187 
0188 def _parse_file (path):
0189 
0190     # Try to return parsed source from cache.
0191     apath = os.path.abspath(path)
0192     if apath in _parsed_sources:
0193         return _parsed_sources[apath]
0194 
0195     # Try to load parsed source from disk.
0196     source = _read_parsed_file(apath)
0197     if source:
0198         # Set attributes discarded on compiling.
0199         source.name = path
0200 
0201     # If still no hit, compile the file.
0202     if source is None:
0203         source = _compile_file_w(path)
0204 
0205     # Cache the source by absolute path (before procesing includes).
0206     _parsed_sources[apath] = source
0207 
0208     # Load included sources.
0209     source.incsources = _include_sources(source, source.incsources)
0210 
0211     return source
0212 
0213 
0214 def _compile_file_w (path, cpath=None):
0215 
0216     if cpath is None:
0217         cpath = path + _compfile_suff
0218 
0219     # Parse the file.
0220     ifs = open(path, "rb")
0221     lines = ifs.readlines()
0222     ifs.close()
0223 
0224     m = re.search(br"^#\s+~~~\s+(\S+)\s+~~~\s*$", lines[0]) if lines else None
0225     enc = m and m.group(1) or "UTF-8"
0226     lines = [x.decode(enc) for x in lines]
0227 
0228     instr = "".join(lines)
0229     source = _parse_string_w(instr, path)
0230 
0231     # Write out parsed file.
0232     # Temporarily discard attributes relative to importing.
0233     iname = source.name
0234     source.name = None
0235     _write_parsed_file(source, path, cpath)
0236     source.name = iname
0237 
0238     return source
0239 
0240 
0241 def compile_file (path, cpath=None, doraise=False):
0242     """
0243     Import file with derivations.
0244 
0245     If the compile file path C{cpath} is not given,
0246     it is constructed as C{path} plus standard extension suffix.
0247 
0248     If the file cannot be compiled, the behavior depends on C{doraise}.
0249     If C{doraise} is C{False}, a warning is reported to standard error;
0250     if C{doraise} is C{True}, an L{SynderError} exception is raised.
0251 
0252     @param path: the path to file to compile
0253     @type path: string
0254     @param cpath: the path to compiled file
0255     @type cpath: string
0256 
0257     @returns: C{True} if the file was successfully compiled
0258     @rtype: bool
0259     """
0260 
0261     try:
0262         _compile_file_w(path, cpath)
0263     except Exception as e:
0264         if doraise:
0265             raise
0266         else:
0267             warning(_("@info",
0268                       "Derivation file '%(file)s' cannot be compiled "
0269                       "due to the following error:\n"
0270                       "%(msg)s",
0271                       file=path, msg=str_to_unicode(str(e))))
0272             return False
0273     else:
0274         return True
0275 
0276 
0277 def _include_sources (source, incpaths):
0278 
0279     incsources = []
0280     incroot = os.path.dirname(os.path.abspath(source.name))
0281     for incpath in incpaths:
0282         # If included path relative, make it relative to current source.
0283         if not incpath.startswith(os.path.sep):
0284             path = os.path.join(incroot, incpath)
0285         else:
0286             path = incpath
0287         if not os.path.isfile(path):
0288             # FIXME: Position of include directive in the file lost,
0289             # propagate it to this place to report error properly.
0290             raise SynderError(
0291                 _("@info",
0292                   "Included file '%(name)s' not found at '%(path)s'.",
0293                   name=incpath, path=path), 1101, source.name)
0294         incsource = _parse_file(path)
0295         incsources.append(incsource)
0296 
0297     return incsources
0298 
0299 
0300 _compfile_suff = "c"
0301 _compfile_dver = b"0003"
0302 _compfile_hlen = hashlib.md5().digest_size * 2
0303 
0304 def _write_parsed_file (source, path, cpath=None):
0305 
0306     if cpath is None:
0307         cpath = path + _compfile_suff
0308     try:
0309         fhc = open(cpath, "wb")
0310         fh = open(path, "rb")
0311     except:
0312         return False
0313 
0314     # Write out data version and file hash.
0315     fhc.write(_compfile_dver)
0316     hasher = hashlib.md5
0317     fhc.write(hashlib.md5(fh.read()).hexdigest().encode() + b"\n")
0318     pickle.dump(source, fhc, 2) # 0 for ASCII instead of binary
0319     fhc.close()
0320 
0321     return True
0322 
0323 
0324 def _read_parsed_file (path):
0325 
0326     cpath = path + _compfile_suff
0327     try:
0328         fhc = open(cpath, "rb")
0329         fh = open(path, "rb")
0330     except:
0331         return None
0332 
0333     # Check if data version and file hashes match.
0334     fdverc = fhc.read(len(_compfile_dver))
0335     if fdverc != _compfile_dver:
0336         return None
0337     fhash = hashlib.md5(fh.read()).hexdigest()
0338     fhashc = fhc.read(_compfile_hlen + 1)[:-1]
0339     if fhash != fhashc:
0340         return None
0341 
0342     # Load the compiled source.
0343     source = pickle.load(fhc)
0344 
0345     return source
0346 
0347 
0348 # ----------------------------------------
0349 # Parsing context handlers.
0350 
0351 def _ctx_handler_void (source, instr, pos, bpos):
0352 
0353     obpos = bpos
0354     testsep = lambda c: (c not in _strict_ws and [""] or [None])[0]
0355     substr, sep, pos, bpos = _move_to_sep(instr, pos, bpos, testsep, wesc=False)
0356 
0357     if sep is not None:
0358         indent = instr[pos - bpos[1] + 1:pos]
0359         if instr[pos] == _ch_inc:
0360             return _ctx_inc, source, True, pos, bpos
0361         elif instr[pos] == _ch_env:
0362             if not source.derivs:
0363                 raise SynderError(
0364                     _("@info",
0365                       "No derivation yet for which to start an environment."),
0366                     1002, source.name, bpos)
0367             if source.indenv is None:
0368                 source.indenv = indent
0369             if indent != source.indenv:
0370                 raise SynderError(
0371                     _("@info",
0372                       "Inconsistent indenting of environment head."),
0373                     1003, source.name, bpos)
0374             deriv = source.derivs[-1]
0375             env = _SDEnv(deriv, bpos)
0376             deriv.envs.append(env)
0377             return _ctx_env, env, True, pos, bpos
0378         else:
0379             if source.indderiv is None:
0380                 source.indderiv = indent
0381             if indent != source.indderiv:
0382                 raise SynderError(
0383                     _("@info",
0384                       "Inconsistent indenting of derivation head."),
0385                     1001, source.name, bpos)
0386             deriv = _SDDeriv(source, bpos)
0387             source.derivs.append(deriv)
0388             ksyn = _SDSyn(deriv, bpos)
0389             deriv.syns.append(ksyn)
0390             return _ctx_ksyn, ksyn, True, pos, bpos
0391     else:
0392         return None, None, False, pos, bpos
0393 
0394 
0395 _seps_ksyn = set((_ch_prop_sep, _ch_props, _ch_tag, _ch_nl))
0396 
0397 def _ctx_handler_ksyn (ksyn, instr, pos, bpos):
0398 
0399     opos, obpos = pos, bpos
0400     testsep = lambda c: c in _seps_ksyn and c or None
0401     substr, sep, pos, bpos, isesc = _move_to_sep(instr, pos, bpos, testsep,
0402                                                  repesc=True)
0403 
0404     substrls = substr.lstrip(_strict_ws)
0405     if (    not ksyn.segs and substrls.startswith(_ch_ksyn_hd)
0406         and not isesc[len(substr) - len(substrls)]
0407     ):
0408         ksyn.hidden = True
0409         substr = substr.lstrip()[len(_ch_ksyn_hd):]
0410 
0411     if substr or not ksyn.segs:
0412         ksyn.segs.append(_SDText(ksyn, obpos, substr))
0413 
0414     if sep == _ch_props:
0415         deriv = ksyn.parent
0416         env = _SDEnv(deriv, bpos)
0417         deriv.envs.append(env)
0418         prop = _SDProp(env, bpos)
0419         env.props.append(prop)
0420         return _ctx_pkey, prop, False, pos, bpos
0421     elif sep == _ch_prop_sep:
0422         deriv = ksyn.parent
0423         ksyn = _SDSyn(deriv, bpos)
0424         deriv.syns.append(ksyn)
0425         return _ctx_ksyn, ksyn, False, pos, bpos
0426     elif sep == _ch_tag:
0427         tag = _SDTag(ksyn, bpos)
0428         ksyn.segs.append(tag)
0429         return _ctx_tag, tag, True, pos, bpos
0430     else:
0431         raise SynderError(
0432             _("@info",
0433               "Unexpected end of derivation head started at %(line)d:%(col)d.",
0434               line=obpos[0], col=obpos[1]),
0435             1010, ksyn.parent.parent.name, bpos)
0436 
0437 
0438 def _ctx_handler_env (env, instr, pos, bpos):
0439 
0440     obpos = bpos
0441     testsep = lambda c: c == _ch_props and c or None
0442     substr, sep, pos, bpos = _move_to_sep(instr, pos, bpos, testsep)
0443 
0444     if sep == _ch_props:
0445         env.name = substr[len(_ch_env):]
0446         if not env.name:
0447             raise SynderError(
0448                 _("@info",
0449                   "Empty environment name."),
0450                 1021, env.parent.parent.name, obpos)
0451         for oenv in env.parent.envs[:-1]:
0452             if env.name == oenv.name:
0453                 raise SynderError(
0454                     _("@info",
0455                       "Repeated environment name '%(env)s'.",
0456                       env=oenv.name),
0457                     1022, env.parent.parent.name, obpos)
0458         prop = _SDProp(env, bpos)
0459         env.props.append(prop)
0460         return _ctx_pkey, prop, False, pos, bpos
0461     else:
0462        raise SynderError(
0463         _("@info",
0464           "Unexpected end of environment head started at %(line)d:%(col)d.",
0465           line=obpos[0], col=obpos[1]),
0466         1020, env.parent.parent.name, bpos)
0467 
0468 
0469 _seps_pkey = set((_ch_pval, _ch_prop_sep, _ch_exp, _ch_tag, _ch_nl))
0470 
0471 def _ctx_handler_pkey (prop, instr, pos, bpos):
0472 
0473     opos, obpos = pos, bpos
0474     testsep = lambda c: c in _seps_pkey and c or None
0475     substr, sep, pos, bpos = _move_to_sep(instr, pos, bpos, testsep)
0476 
0477     if sep == _ch_pval:
0478         substr = substr.strip()
0479         for rawkey in substr.split(_ch_pkey_sep):
0480             cut, terminal, canceling = [False] * 3
0481             while rawkey.endswith((_ch_cutprop, _ch_termprop, _ch_remprop)):
0482                 if rawkey.endswith(_ch_cutprop):
0483                     cut = True
0484                     rawkey = rawkey[:-len(_ch_cutprop)]
0485                 elif rawkey.endswith(_ch_termprop):
0486                     terminal = True
0487                     rawkey = rawkey[:-len(_ch_termprop)]
0488                 elif rawkey.endswith(_ch_remprop):
0489                     canceling = True
0490                     rawkey = rawkey[:-len(_ch_remprop)]
0491             key = _SDKey(prop, obpos, rawkey, cut, terminal, canceling)
0492             prop.keys.append(key)
0493         return _ctx_pval, prop, False, pos, bpos
0494     else:
0495         # Backtrack and go into value context.
0496         return _ctx_pval, prop, False, opos, obpos
0497 
0498 
0499 _seps_pval = set((_ch_prop_sep, _ch_exp, _ch_tag, _ch_nl))
0500 
0501 def _ctx_handler_pval (prop, instr, pos, bpos):
0502 
0503     opos, obpos = pos, bpos
0504     testsep = lambda c: c in _seps_pval and c or None
0505     substr, sep, pos, bpos = _move_to_sep(instr, pos, bpos, testsep)
0506 
0507     if substr:
0508         prop.segs.append(_SDText(prop, obpos, substr))
0509 
0510     if sep == _ch_prop_sep:
0511         env = prop.parent
0512         prop = _SDProp(env, bpos)
0513         env.props.append(prop)
0514         return _ctx_pkey, prop, False, pos, bpos
0515     elif sep == _ch_exp:
0516         exp = _SDExp(prop, bpos)
0517         prop.segs.append(exp)
0518         return _ctx_exp, exp, True, pos, bpos
0519     elif sep == _ch_tag:
0520         tag = _SDTag(prop, bpos)
0521         prop.segs.append(tag)
0522         return _ctx_tag, tag, True, pos, bpos
0523     else:
0524         return None, None, False, pos, bpos
0525 
0526 
0527 _seps_exp = set([_ch_prop_sep, _ch_exp] + list(_strict_ws))
0528 
0529 def _ctx_handler_exp (exp, instr, pos, bpos):
0530 
0531     if instr[pos:pos + len(_ch_grp_opn)] == _ch_grp_opn:
0532         enclosed = True
0533         testsep = lambda c: c in (_ch_grp_cls, _ch_nl) and c or None
0534     else:
0535         enclosed = False
0536         testsep = lambda c: (c in _seps_exp and [""] or [None])[0]
0537 
0538     obpos = bpos
0539     substr, sep, pos, bpos, isesc = _move_to_sep(instr, pos, bpos, testsep,
0540                                                  repesc=True)
0541     if enclosed and sep is None or sep == _ch_nl:
0542         raise SynderError(
0543             _("@info",
0544               "Unexpected end of expander started at %(line)d:%(col)d.",
0545               line=obpos[0], col=obpos[1]),
0546             1050, exp.parent.parent.parent.parent.name, bpos)
0547 
0548     if enclosed:
0549         substr = substr[len(_ch_grp_opn):]
0550 
0551     p = substr.find(_ch_exp_kext)
0552     if p >= 0:
0553         exp.kext = substr[p + len(_ch_exp_kext):]
0554         substr = substr[:p]
0555 
0556     p = substr.find(_ch_exp_mask)
0557     if p >= 0:
0558         exp.mask = substr[p + len(_ch_exp_mask):]
0559         substr = substr[:p]
0560 
0561     if substr.startswith(_ch_exp_upc) and not isesc[0]:
0562         exp.caps = True
0563         substr = substr[len(_ch_exp_upc):]
0564     elif substr.startswith(_ch_exp_lwc) and not isesc[0]:
0565         exp.caps = False
0566         substr = substr[len(_ch_exp_lwc):]
0567 
0568     exp.ref = substr
0569 
0570     return None, None, False, pos, bpos
0571 
0572 
0573 _seps_tag = set([_ch_prop_sep, _ch_exp, _ch_tag] + list(_strict_ws))
0574 
0575 def _ctx_handler_tag (tag, instr, pos, bpos):
0576 
0577     if instr[pos:pos + len(_ch_grp_opn)] == _ch_grp_opn:
0578         enclosed = True
0579         testsep = lambda c: c in (_ch_grp_cls, _ch_nl) and c or None
0580     else:
0581         enclosed = False
0582         testsep = lambda c: (c in _seps_exp and [""] or [None])[0]
0583 
0584     obpos = bpos
0585     substr, sep, pos, bpos = _move_to_sep(instr, pos, bpos, testsep)
0586     if enclosed and sep is None or sep == _ch_nl:
0587         raise SynderError(
0588             _("@info",
0589               "Unexpected end of tag started at %(line)d:%(col)d.",
0590               line=obpos[0], col=obpos[1]),
0591             1050, exp.parent.parent.parent.parent.name, bpos)
0592 
0593     if enclosed:
0594         substr = substr[len(_ch_grp_opn):]
0595 
0596     tag.names = substr.split(_ch_tag_sep)
0597 
0598     return None, None, False, pos, bpos
0599 
0600 
0601 def _ctx_handler_inc (source, instr, pos, bpos):
0602 
0603     # Skip include directive.
0604     substr, sep, pos, bpos = _move_to_sep(instr, pos, bpos, lambda c: c)
0605 
0606     # Parse include path.
0607     obpos = bpos
0608     testsep = lambda c: c == _ch_nl and c or None
0609     substr, sep, pos, bpos = _move_to_sep(instr, pos, bpos, testsep)
0610 
0611     incpath = substr.strip()
0612     if not incpath:
0613         raise SynderError(
0614             _("@info",
0615               "Empty target path in inclusion directive."),
0616             1100, source.name, obpos)
0617 
0618     # Add to included sources of this source.
0619     # Temporarily store paths, to be resolved into full sources later.
0620     source.incsources.append(incpath)
0621 
0622     return None, None, False, pos, bpos
0623 
0624 
0625 # ----------------------------------------
0626 # Parsing context IDs and handlers collected.
0627 # IDs and handlers must be in the same order,
0628 # as IDs are used to index handlers.
0629 
0630 (
0631     _ctx_void,
0632     _ctx_ksyn,
0633     _ctx_env,
0634     _ctx_pkey,
0635     _ctx_pval,
0636     _ctx_exp,
0637     _ctx_tag,
0638     _ctx_inc,
0639 ) = list(range(8))
0640 
0641 _ctx_handlers = (
0642     _ctx_handler_void,
0643     _ctx_handler_ksyn,
0644     _ctx_handler_env,
0645     _ctx_handler_pkey,
0646     _ctx_handler_pval,
0647     _ctx_handler_exp,
0648     _ctx_handler_tag,
0649     _ctx_handler_inc,
0650 )
0651 
0652 # ----------------------------------------
0653 # Parsing utilities.
0654 
0655 # Find the first separator admitted by the test function,
0656 # skipping over escaped characters, continued lines and comments.
0657 # Return substring to that point (without escapes, comments, line cont.),
0658 # separator, and new position and block position (line, column).
0659 # On request, also return list of escape indicators for each character
0660 # in the substring (True where character was escaped).
0661 # Separator test function takes single argument, the current character,
0662 # and returns None if it is not admitted as separator.
0663 # If end of input is reached without test function admitting a separator,
0664 # separator is reported as None; otherwise, separator is reported as
0665 # the return value from the test function.
0666 def _move_to_sep (instr, pos, bpos, testsep, wesc=True, repesc=False):
0667 
0668     opos = pos
0669     substr = []
0670     isesc = []
0671     sep = None
0672     while sep is None and pos < len(instr):
0673         c = instr[pos]
0674         if c == _ch_comment:
0675             p = instr.find(_ch_nl, pos)
0676             if p < 0:
0677                 pos += len(instr) - pos
0678             else:
0679                 pos = p
0680         elif wesc and c == _ch_escape:
0681             pos += 1
0682             if pos < len(instr):
0683                 if instr[pos] == _ch_nl: # line continuation
0684                     pass
0685                 # elif instr[pos] == _ch_ucode: # unicode hex
0686                 else:
0687                     substr.append(instr[pos])
0688                     isesc.append(True)
0689                 pos += 1
0690         else:
0691             sep = testsep(c)
0692             if sep is not None:
0693                 pos += len(sep)
0694             else:
0695                 substr.append(c)
0696                 isesc.append(False)
0697                 pos += 1
0698 
0699     # Update block position (line, column).
0700     rawsubstr = instr[opos:pos]
0701     p = rawsubstr.rfind(_ch_nl)
0702     if p >= 0:
0703         bpos = (bpos[0] + rawsubstr.count(_ch_nl), len(rawsubstr) - p)
0704     else:
0705         bpos = (bpos[0], bpos[1] + len(rawsubstr))
0706 
0707     ret = ("".join(substr), sep, pos, bpos)
0708     if repesc:
0709         ret = ret + (isesc,)
0710     return ret
0711 
0712 
0713 # ----------------------------------------
0714 # Data structures.
0715 
0716 # Synder source.
0717 class _SDSource:
0718 
0719     def __init__ (self, name):
0720 
0721         # Name of the source (filename, etc).
0722         self.name = name
0723 
0724         # Derivations (SDDeriv).
0725         self.derivs = []
0726         # Included sources (must be ordered).
0727         self.incsources = []
0728         # Indentation for derivation and environments heads
0729         # (set on first parsed).
0730         self.indderiv = None
0731         self.indenv = None
0732 
0733         ## Global directives.
0734         #...
0735 
0736 
0737     def __unicode__ (self):
0738         return (  "============> %s\n" % self.name
0739                 + "\n".join(map(str, self.derivs)))
0740     def __str__ (self):
0741         return self.__unicode__().encode(locale.getpreferredencoding())
0742 
0743 
0744 # Derivation.
0745 class _SDDeriv:
0746 
0747     def __init__ (self, parent, pos):
0748 
0749         # Parent source and position in it.
0750         self.parent = parent
0751         self.pos = pos
0752 
0753         # Key syntagmas (SDProp).
0754         self.syns = []
0755         # Environments (SDEnv).
0756         self.envs = []
0757 
0758     def __unicode__ (self):
0759         return (  "  -----> %d:%d\n" % self.pos
0760                 + "  " + "\n  ".join(map(str, self.syns)) + "\n"
0761                 + "\n".join(map(str, self.envs)))
0762     def __str__ (self):
0763         return self.__unicode__().encode(locale.getpreferredencoding())
0764 
0765 
0766 # Environment.
0767 class _SDEnv:
0768 
0769     def __init__ (self, parent, pos, name=""):
0770 
0771         # Parent derivation and position in source.
0772         self.parent = parent
0773         self.pos = pos
0774         # Environment name.
0775         self.name = name
0776 
0777         # Properties (SDProp).
0778         self.props = []
0779 
0780     def __unicode__ (self):
0781         return (  "    @%s:%d:%d\n" % ((self.name,) + self.pos)
0782                 + "\n".join(map(str, self.props)))
0783     def __str__ (self):
0784         return self.__unicode__().encode(locale.getpreferredencoding())
0785 
0786 
0787 # Syntagma.
0788 class _SDSyn:
0789 
0790     def __init__ (self, parent, pos, hidden=False):
0791 
0792         # Parent derivation and position in source.
0793         self.parent = parent
0794         self.pos = pos
0795         # Visibility of the syntagma.
0796         self.hidden = hidden
0797 
0798         # Syntagma segments (SDText, SDTag).
0799         self.segs = []
0800 
0801     def __unicode__ (self):
0802         return (  "{p:%d:%d|%s}=" % (self.pos + (self.hidden,))
0803                 + "".join(map(str, self.segs)))
0804     def __str__ (self):
0805         return self.__unicode__().encode(locale.getpreferredencoding())
0806 
0807 
0808 # Property.
0809 class _SDProp:
0810 
0811     def __init__ (self, parent, pos):
0812 
0813         # Parent environment and position in source.
0814         self.parent = parent
0815         self.pos = pos
0816 
0817         # Keys (SDKey).
0818         self.keys = []
0819         # Value segments (SDText, SDExp, SDTag).
0820         self.segs = []
0821 
0822     def __unicode__ (self):
0823         return (  "      %d:%d " % self.pos
0824                 + "k=" + "".join(map(str, self.keys)) + " "
0825                 + "v=" + "".join(map(str, self.segs)))
0826     def __str__ (self):
0827         return self.__unicode__().encode(locale.getpreferredencoding())
0828 
0829 
0830 # Property key.
0831 class _SDKey:
0832 
0833     def __init__ (self, parent, pos, name="",
0834                   cut=False, terminal=False, canceling=False):
0835 
0836         # Parent property and position in source.
0837         self.parent = parent
0838         self.pos = pos
0839         # Key behaviors.
0840         self.name = name
0841         self.cut = cut
0842         self.terminal = terminal
0843         self.canceling = canceling
0844 
0845     def __unicode__ (self):
0846         return "{k:%d:%d:%s|%s&%s}" % (self.pos + (self.name,
0847                                                    self.cut, self.terminal,
0848                                                    self.canceling))
0849     def __str__ (self):
0850         return self.__unicode__().encode(locale.getpreferredencoding())
0851 
0852 
0853 # Expander.
0854 class _SDExp:
0855 
0856     def __init__ (self, parent, pos, ref=None, mask=None, caps=None, kext=None):
0857 
0858         # Parent property and position in source.
0859         self.parent = parent
0860         self.pos = pos
0861         # Reference, selection mask, capitalization, key extender.
0862         self.ref = ref
0863         self.mask = mask
0864         self.caps = caps
0865         self.kext = kext
0866 
0867     def __unicode__ (self):
0868         return "{e:%d:%d:%s|%s|%s|%s}" % (self.pos + (self.ref, self.mask,
0869                                                        self.caps, self.kext))
0870     def __str__ (self):
0871         return self.__unicode__().encode(locale.getpreferredencoding())
0872 
0873 
0874 # Tag.
0875 class _SDTag:
0876 
0877     def __init__ (self, parent, pos):
0878 
0879         # Parent property and position in source.
0880         self.parent = parent
0881         self.pos = pos
0882         # Names associated to this tag.
0883         self.names = []
0884 
0885     def __unicode__ (self):
0886         return "{g:%d:%d:%s}" % (self.pos + ("+".join(self.names),))
0887     def __str__ (self):
0888         return self.__unicode__().encode(locale.getpreferredencoding())
0889 
0890 
0891 # Text segment.
0892 class _SDText:
0893 
0894     def __init__ (self, parent, pos, text=""):
0895 
0896         # Parent property and position in source.
0897         self.parent = parent
0898         self.pos = pos
0899         # Text.
0900         self.text = text
0901 
0902     def __unicode__ (self):
0903         return "{t:%d:%d:%s}" % (self.pos + (self.text,))
0904     def __str__ (self):
0905         return self.__unicode__().encode(locale.getpreferredencoding())
0906 
0907 
0908 # ----------------------------------------
0909 # High level access.
0910 
0911 class Synder (object):
0912     """
0913     Derivator objects import sources of derivations
0914     and get queried for properties of syntagmas.
0915 
0916     Lookup can be done by derivation key and property key,
0917     but also by single compound key (serialization of the previous two),
0918     to have interface and behavior similar to built-in dictionaries.
0919 
0920     Basic usage is rather simple. If there are derivation files
0921     C{planets.sd} and {moons.sd}, they can be used like this::
0922 
0923         >>> sd = Synder()
0924         >>> sd.import_file("planets.sd")
0925         >>> sd.import_file("moons.sd")
0926         >>>
0927         >>> # Lookup of properties by derivation and property key.
0928         >>> sd.get2("Venus", "nom")
0929         Venera
0930         >>> sd.get2("Callisto", "nom")
0931         Kalisto
0932         >>> sd.get2("Foobar", "nom")
0933         None
0934         >>> # Lookup of properties by compound key.
0935         >>> sd["Venus-nom"]
0936         Venera
0937         >>>
0938         >>> # Iteration through properties by derivation keys.
0939         >>> for dkey in sd.dkeys(): print sd.get2(dkey, "nom")
0940         ...
0941         Venera
0942         Kalisto
0943         Merkur
0944         Jupiter
0945 
0946         >>> # Iteration through properties by compound keys.
0947         >>> for ckey in sd: print sd[ckey]
0948         ...
0949         Venera
0950         Veneri
0951         Venerom
0952 
0953         Merkuru
0954         Merkur
0955         Merkura
0956 
0957         >>> # Querying for key syntagmas.
0958         >>> sd.syns("Venus")
0959         ['Venus']
0960         >>> sd.syns("Iapetus")
0961         ['Iapetus', 'Japetus']
0962         >>> sd.syns("Japetus")
0963         ['Iapetus', 'Japetus']
0964         >>>
0965         >>> # Querying for property keys.
0966         >>> sd.pkeys("Venus")
0967         ['gen', 'acc', 'nom', 'dat', 'gender']
0968 
0969     Syntax errors in derivations sources will raise L{SynderError}
0970     exceptions on import.
0971     Unresolvable conflicts in derivation keys will be reported
0972     as warning on import, and conflicted derivations will not be imported.
0973     Errors in expansions are not reported on import, but when
0974     the problematic derivation is queried; warnings are output,
0975     and C{None} (or default value) is returned for all properties.
0976     """
0977 
0978     def __init__ (self,
0979                   env="",
0980                   ckeysep="-",
0981                   strictkey=False,
0982                   dkeytf=None, dkeyitf=None,
0983                   pkeytf=None, pkeyitf=None,
0984                   pvaltf=None,
0985                   ksyntf=None,
0986                   envtf=None):
0987         """
0988         Constructor of syntagma derivators.
0989 
0990         The default resolution of derivation key conflicts,
0991         as described in module documentation, can be changed
0992         to strict resolution through C{strictkey} parameter.
0993         If C{strictkey} is C{True}, all key syntagmas must be unique.
0994 
0995         Parameter C{env} is used to specify the environment from which
0996         the derivations are taken. In case no non-default environments
0997         have been used in derivations, C{env} is simply empty string.
0998         Otherwise, it can be:
0999           - a string specifying a non-default environment
1000           - a tuple specifying an environment fallback chain
1001           - a tuple of tuples, specifying more than one environment chain
1002         (Lists can also be used instead of tuples.)
1003 
1004         If several environment fallback chains are given, when a property
1005         is requrested they are tried in the order of specification,
1006         and the first yielded property is returned.
1007         It is also possible to combine properties from different
1008         environment chains in a custom way, by supplying a property
1009         value transformation function (C{pvaltf} parameter).
1010 
1011         Compound keys, for single-key lookups, are built by joining
1012         the derivation and property keys with a separator.
1013         This separator can be chosen through C{ckeysep} parameter.
1014         The separator string can be contained inside a derivation key,
1015         but it must not be found inside any property key
1016         (the compound key is split from the back).
1017 
1018         A myriad of I{transformation functions} can be applied by
1019         derivator object to imported derivations, through C{*tf} parameters.
1020         They are as follows (stating only default inputs, see below
1021         for more possibilities):
1022           - C{dkeytf}: applied to derivation key supplied on lookups
1023                 (e.g. in L{get} or L{get2} methods). Takes the derivation
1024                 key as parameter, returns either the derivation key
1025                 or a tuple of the derivation key and another object.
1026           - C{dkeyitf}: applied to all derivation keys on import.
1027                 Same default input-output as C{dkey}.
1028           - C{pkeytf}: like C{dkeytf}, only working analogously on
1029                 property key instead of derivation key.
1030           - C{pkeyitf}: like C{dkeyitf}, only working analogously on
1031                 property key instead of derivation key.
1032           - C{pvaltf}: applied to tagged segments of property values.
1033                 The input to this function is a list of lists
1034                 by each environment fallback chain;
1035                 list for one environemnt chain consists of 2-tuples,
1036                 each tuple having a list of tags as the first element,
1037                 and a text segment as the second element.
1038                 For example, if there is only one environment chain
1039                 (e.g. C{evn=""} or C{env=("someenv", "")},
1040                 and the property value is derived to be C{foo ~tag bar}
1041                 in this environment, then the argument to the function
1042                 will be C{[[([''], "foo "), (['tag'], " bar")]]}.
1043                 If an environemnt chain yielded no property value,
1044                 its element will be C{None} instead of list of 2-tuples.
1045                 The return value is the final property value string.
1046                 Note that simplification will not be applied to this
1047                 value afterwards, so if desired,
1048                 L{simplify()<pology.normalize.simplify>}
1049                 should be manually called inside the function.
1050           - C{ksyntf}: quite similar to C{pvaltf}, only applied to
1051                 tagged segments of key syntagmas.
1052                 The difference is that there are no multiple environments
1053                 for key syntagmas, so the input value is just one list
1054                 of tagged text segments (what would be the first element
1055                 of input list to C{pvaltf}).
1056           - C{envtf}: applied to environment fallback chain on lookups.
1057                 Takes original environment chain as argument,
1058                 returns new environment chain
1059                 (in one of the forms acceptable as C{env} parameter).
1060 
1061         Transformation functions can take more input arguments than
1062         the default described above, on demand.
1063         If transformation function  is supplied directly,
1064         e.g. C{pvaltf=somefunc}, it is sent default inputs.
1065         Extra inputs are requested by supplying instead a tuple, where
1066         the first element is the transformation function, and the following
1067         elements are predefined keywords of available extra inputs,
1068         e.g. C{pvalf=(somefunc, "dkey", "pkrest")}.
1069         Available extra inputs by transformation function are:
1070           - C{dkeytf}: C{"self"} the derivation object.
1071           - C{pkeytf}: C{"self"}, C{"dkey"} the derivation key
1072                 (original or that returned by C{dkeytf}),
1073                 C{"dkrest"} the second object returned by C{dkeytf}.
1074           - C{pvaltf}: C{"self"}, C{"dkey"}, C{"pkey"} the property
1075                 key (original or that returned by C{pkeytf}),
1076                 C{"env"} the tuple of environment chains, C{"dkrest"},
1077                 C{"pkrest"} the second object returned by C{pkeytf}.
1078           - C{ksyntf}: C{"self"}, C{"dkey"}, C{"dkrest"}.
1079           - C{envtf}: C{"self"}, C{"dkey"}, C{"dkrest"}.
1080 
1081         @param env: environment for derivations
1082         @type env: string, (string*), ((string*)*)
1083         @param ckeysep: derivation-property key separator in compound keys
1084         @type ckeysep: string
1085         @param strictkey: whether all key syntagmas must be unique to
1086             avoid conflicts
1087         @param dkeytf: transformation function for lookup derivation keys
1088         @param dkeyitf: transformation function for imported derivation keys
1089         @param pkeytf: transformation function for lookup property keys
1090         @param pkeyitf: transformation function for imported property keys
1091         @param pvaltf: transformation fucntion for property values
1092         @param ksyntf: transformation fucntion for key syntagamas
1093         """
1094 
1095         self._env = self._normenv(env)
1096 
1097         self._ckeysep = ckeysep
1098 
1099         self._dkeytf = self._resolve_tf(dkeytf, ["self"])
1100         self._dkeyitf = self._resolve_tf(dkeyitf, [])
1101         self._pkeytf = self._resolve_tf(pkeytf, ["dkey", "dkrest", "self"])
1102         self._pkeyitf = self._resolve_tf(pkeyitf, [])
1103         self._pvaltf = self._resolve_tf(pvaltf, ["pkey", "dkey", "env",
1104                                                  "dkrest", "pkrest", "self"])
1105         self._ksyntf = self._resolve_tf(ksyntf, ["dkey", "dkrest", "self"])
1106         self._envtf = self._resolve_tf(envtf, ["dkey", "dkrest", "self"])
1107 
1108         self._strictkey = strictkey
1109 
1110         self._imported_srcnames = set()
1111         self._visible_srcnames = set()
1112         self._derivs_by_srcname = {}
1113         self._deriv_by_srcname_idkey = {}
1114         self._visible_deriv_by_dkey = {}
1115         self._props_by_deriv_env1 = {}
1116         self._raw_props_by_deriv_env1 = {}
1117         self._single_dkeys = set()
1118 
1119 
1120     def _normenv (self, env):
1121 
1122         if isinstance(env, (tuple, list)):
1123             if not env or isinstance(env[0], str):
1124                 env = (env,)
1125         else:
1126             env = ((env,),)
1127 
1128         return env
1129 
1130 
1131     def _resolve_tf (self, tfspec, kneargs):
1132 
1133         eaords = [0]
1134         if isinstance(tfspec, (tuple, list)):
1135             tf0, eargs = tfspec[0], list(tfspec[1:])
1136             unkeargs = set(eargs).difference(kneargs)
1137             if unkeargs:
1138                 raise SynderError(
1139                     _("@info",
1140                       "Unknown extra arguments for transformation function "
1141                       "requested in derivator constructor: %(arglist)s",
1142                       arglist=format_item_list(sorted(unkeargs))))
1143             eaords.extend([kneargs.index(x) + 1 for x in eargs])
1144         else:
1145             tf0 = tfspec
1146 
1147         if tf0 is None:
1148             return None
1149 
1150         def tf (*args):
1151             args0 = [args[x] for x in eaords]
1152             return tf0(*args0)
1153 
1154         return tf
1155 
1156 
1157     def import_string (self, string, ignhid=False):
1158         """
1159         Import string with derivations.
1160 
1161         @param string: the string to parse
1162         @type string: string
1163         @param ignhid: also make hidden derivations visible if C{True}
1164         @type ignhid: bool
1165 
1166         @returns: number of newly imported visible derivations
1167         @rtype: int
1168         """
1169 
1170         source = _parse_string(string)
1171         return self._process_import_visible(source, ignhid)
1172 
1173 
1174     def import_file (self, filename, ignhid=False):
1175         """
1176         Import file with derivations.
1177 
1178         @param filename: the path to file to parse
1179         @type filename: string
1180         @param ignhid: also make hidden derivations visible if C{True}
1181         @type ignhid: bool
1182 
1183         @returns: number of newly imported visible derivations
1184         @rtype: int
1185         """
1186 
1187         source = _parse_file(filename)
1188         return self._process_import_visible(source, ignhid)
1189 
1190 
1191     def _process_import_visible (self, source, ignhid):
1192 
1193         nnew = self._process_import(source)
1194         nvis = self._make_visible(source, ignhid)
1195         return (nvis, nnew)
1196 
1197 
1198     def _process_import (self, source):
1199 
1200         if source.name in self._imported_srcnames:
1201             return 0
1202 
1203         self._imported_srcnames.add(source.name)
1204 
1205         iderivs = []
1206         self._derivs_by_srcname[source.name] = iderivs
1207         idmap = {}
1208         self._deriv_by_srcname_idkey[source.name] = idmap
1209 
1210         # Construct wrapping derivations and file them by derivation keys.
1211         nadded = 0
1212         for rawderiv in source.derivs:
1213 
1214             # Create wrapper derivation for the raw derivation.
1215             deriv = self._Deriv(rawderiv, self._dkeyitf)
1216 
1217             # Eliminate internal key conflicts of this derivation.
1218             self._eliminate_conflicts(deriv, idmap, None, lambda x: x.idkeys)
1219 
1220             # Register internal derivation in this source.
1221             if deriv.idkeys:
1222                 iderivs.append(deriv)
1223                 for idkey in deriv.idkeys:
1224                     idmap[idkey] = deriv
1225                 nadded += 1
1226 
1227         # Import included sources.
1228         for incsource in source.incsources:
1229             nadded += self._process_import(incsource)
1230 
1231         return nadded
1232 
1233 
1234     def _make_visible (self, source, ignhid):
1235 
1236         if source.name in self._visible_srcnames:
1237             return 0
1238 
1239         self._visible_srcnames.add(source.name)
1240 
1241         nvis = 0
1242 
1243         for deriv in self._derivs_by_srcname[source.name]:
1244             if not ignhid and all([x.hidden for x in deriv.base.syns]):
1245                 continue
1246 
1247             # Eliminate external key conflicts of this derivation.
1248             self._eliminate_conflicts(deriv, self._visible_deriv_by_dkey,
1249                                       self._single_dkeys, lambda x: x.dkeys)
1250 
1251             # Register visible derivation in this source.
1252             if deriv.dkeys:
1253                 self._single_dkeys.add(tuple(deriv.dkeys)[0])
1254                 for dkey in deriv.dkeys:
1255                     self._visible_deriv_by_dkey[dkey] = deriv
1256                 nvis += 1
1257 
1258         return nvis
1259 
1260 
1261     class _Deriv:
1262 
1263         def __init__ (self, deriv, dkeyitf):
1264 
1265             self.base = deriv
1266 
1267             # Compute internal and external derivation keys from key syntagmas.
1268             self.idkeys = set()
1269             self.dkeys = set()
1270             for syn in deriv.syns:
1271                 synt = "".join([x.text for x in syn.segs
1272                                        if isinstance(x, _SDText)])
1273                 idkey = simplify(synt)
1274                 self.idkeys.add(idkey)
1275                 dkeys = dkeyitf(idkey) if dkeyitf else idkey
1276                 if dkeys is not None:
1277                     if not isinstance(dkeys, (tuple, list)):
1278                         dkeys = [dkeys]
1279                     self.dkeys.update(dkeys)
1280 
1281 
1282     def _eliminate_conflicts (self, deriv, kmap, kskeys, keyf):
1283 
1284         to_remove_keys = set()
1285         to_remove_keys_other = {}
1286         for key in keyf(deriv):
1287             oderiv = kmap.get(key)
1288             if oderiv is not None:
1289                 to_remove_keys.add(key)
1290                 if oderiv not in to_remove_keys_other:
1291                     to_remove_keys_other[oderiv] = set()
1292                 to_remove_keys_other[oderiv].add(key)
1293 
1294         noconfres_oderivs = []
1295         if self._strictkey or to_remove_keys == keyf(deriv):
1296             noconfres_oderivs.extend(list(to_remove_keys_other.keys()))
1297         else:
1298             for oderiv, keys in list(to_remove_keys_other.items()):
1299                 if keyf(oderiv) == keys:
1300                     noconfres_oderivs.append(oderiv)
1301 
1302         if noconfres_oderivs:
1303             # Clear both internal and external keys.
1304             deriv.dkeys.clear()
1305             deriv.idkeys.clear()
1306             eposf = lambda x: (x.base.parent.name, x.base.syns[0].pos[0])
1307             noconfres_oderivs.sort(key=eposf)
1308             pos1 = "%s:%d" % eposf(deriv)
1309             pos2s = ["%s:%d" % eposf(x) for x in noconfres_oderivs]
1310             pos2s = "\n".join(pos2s)
1311             warning(_("@info",
1312                       "Derivation at %(pos1)s eliminated due to "
1313                       "key conflict with the following derivations:\n"
1314                       "%(pos2list)s",
1315                       pos1=pos1, pos2list=pos2s))
1316         else:
1317             for key in to_remove_keys:
1318                 keyf(deriv).remove(key)
1319             for oderiv, keys in list(to_remove_keys_other.items()):
1320                 for key in keys:
1321                     keyf(oderiv).remove(key)
1322                     kmap.pop(key)
1323                     if kskeys is not None and key in kskeys:
1324                         kskeys.remove(key)
1325                         kskeys.add(tuple(keyf(oderiv))[0])
1326 
1327 
1328     def _resolve_dkey (self, dkey):
1329 
1330         dkrest = ()
1331         if self._dkeytf:
1332             dkey = self._dkeytf(dkey, self)
1333             if isinstance(dkey, tuple):
1334                 dkey, dkrest = dkey[0], dkey[1:]
1335 
1336         deriv = None
1337         if dkey is not None:
1338             deriv = self._visible_deriv_by_dkey.get(dkey)
1339             if deriv is None:
1340                 dkey = None
1341 
1342         return dkey, dkrest, deriv
1343 
1344 
1345     def _resolve_pkey (self, pkey, dkey, dkrest):
1346 
1347         pkrest = ()
1348         if self._pkeytf:
1349             pkey = self._pkeytf(pkey, dkey, dkrest, self)
1350             if isinstance(pkey, tuple):
1351                 pkey, pkrest = pkey[0], pkey[1:]
1352 
1353         return pkey, pkrest
1354 
1355 
1356     def _resolve_env (self, env, dkey, dkrest):
1357 
1358         if self._envtf:
1359             env = self._envtf(env, dkey, dkrest, self)
1360             if env is not None:
1361                 env = self._normenv(env)
1362 
1363         return env
1364 
1365 
1366     def get2 (self, dkey, pkey, defval=None):
1367         """
1368         Get property value by derivation key and property key.
1369 
1370         @param dkey: derivation key
1371         @type dkey: string
1372         @param pkey: property key
1373         @type pkey: string
1374         @param defval: the value to return if the property does not exist
1375         @type defval: string
1376 
1377         @returns: the property value
1378         @rtype: string
1379         """
1380 
1381         dkey, dkrest, deriv = self._resolve_dkey(dkey)
1382         if dkey is None:
1383             return defval
1384 
1385         pkey, pkrest = self._resolve_pkey(pkey, dkey, dkrest)
1386         if pkey is None:
1387             return defval
1388 
1389         env = self._resolve_env(self._env, dkey, dkrest)
1390         if env is None:
1391             return defval
1392 
1393         mtsegs = []
1394         for env1 in env:
1395             tsegs = self._getprops(deriv, env1).get(pkey)
1396             mtsegs.append(tsegs)
1397 
1398         if self._pvaltf:
1399             pval = self._pvaltf(mtsegs, pkey, dkey, env,
1400                                 dkrest, pkrest, self)
1401         else:
1402             pval = None
1403             for tsegs in mtsegs:
1404                 if tsegs is not None:
1405                     pval = simplify("".join([x[0] for x in tsegs]))
1406                     break
1407 
1408         return pval if pval is not None else defval
1409 
1410 
1411     def _getprops (self, deriv, env1):
1412 
1413         # Try to fetch derivation from cache.
1414         props = self._props_by_deriv_env1.get((deriv, env1))
1415         if props is not None:
1416             return props
1417 
1418         # Construct raw derivation and extract key-value pairs.
1419         rprops = self._derive(deriv, env1)
1420         props = dict([(x, self._simple_segs(y[0])) for x, y in list(rprops.items())
1421                                                    if not y[1].canceling])
1422 
1423         # Internally transform keys if requested.
1424         if self._pkeyitf:
1425             nprops = []
1426             for pkey, segs in list(props.items()):
1427                 pkey = self._pkeyitf(pkey)
1428                 if pkey is not None:
1429                     nprops.append((pkey, segs))
1430             props = dict(nprops)
1431 
1432         self._props_by_deriv_env1[(deriv, env1)] = props
1433         return props
1434 
1435 
1436     def _derive (self, deriv, env1):
1437 
1438         # Try to fetch raw derivation from cache.
1439         dprops = self._raw_props_by_deriv_env1.get((deriv, env1))
1440         if dprops is not None:
1441             return dprops
1442 
1443         # Derivator core.
1444         dprops = {}
1445         env = None
1446         envs_by_name = dict([(x.name, x) for x in deriv.base.envs])
1447         for env0 in reversed(env1):
1448             env = envs_by_name.get(env0)
1449             if env is None:
1450                 continue
1451             for prop in env.props:
1452                 fsegs = []
1453                 cprops = dict([(simplify(x.name), ([], x)) for x in prop.keys])
1454                 ownpkeys = set(cprops.keys())
1455                 for seg in prop.segs:
1456                     if isinstance(seg, _SDExp):
1457                         eprops = self._expand(seg, deriv, env1)
1458                         if len(eprops) != 1 or list(eprops.keys())[0]:
1459                             if cprops:
1460                                 for cpkey, csegskey in list(cprops.items()):
1461                                     if not csegskey[1].cut:
1462                                         esegskey = eprops.get(cpkey)
1463                                         if esegskey is not None:
1464                                             if not esegskey[1].cut:
1465                                                 csegskey[0].extend(esegskey[0])
1466                                         else:
1467                                             cprops.pop(cpkey)
1468                                             if not cprops:
1469                                                 break
1470                                 for epkey, esegskey in list(eprops.items()):
1471                                     if esegskey[1].cut:
1472                                         cprops[epkey] = esegskey
1473                                 if not cprops:
1474                                     break
1475                             else:
1476                                 for pkey, (esegs, key) in list(eprops.items()):
1477                                     csegs = esegs[:]
1478                                     if not key.cut:
1479                                         csegs[:0] = fsegs
1480                                     cprops[pkey] = (csegs, key)
1481                         else:
1482                             esegs = list(eprops.values())[0][0]
1483                             if cprops:
1484                                 for pkey, (csegs, key) in list(cprops.items()):
1485                                     if not key.cut or pkey in ownpkeys:
1486                                         csegs.extend(esegs)
1487                             else:
1488                                 fsegs.extend(esegs)
1489                     elif cprops:
1490                         for pkey, (csegs, key) in list(cprops.items()):
1491                             if not key.cut or pkey in ownpkeys:
1492                                 csegs.append(seg)
1493                     else:
1494                         fsegs.append(seg)
1495                 for pkey, (segs, key) in list(cprops.items()):
1496                     if key.canceling and pkey in dprops:
1497                         osegskey = dprops.get(pkey)
1498                         if osegskey is not None and not osegskey[1].canceling:
1499                             dprops.pop(pkey)
1500                             cprops.pop(pkey)
1501                 dprops.update(cprops)
1502 
1503         # Eliminate leading and trailing empty text segments.
1504         list(map(self._trim_segs, [x[0] for x in list(dprops.values())]))
1505 
1506         self._raw_props_by_deriv_env1[(deriv, env1)] = dprops
1507         return dprops
1508 
1509 
1510     def _expand (self, exp, pderiv, env1):
1511         # TODO: Discover circular expansion paths.
1512 
1513         # Fetch the derivation pointed to by the expansion.
1514         idkey = simplify(exp.ref)
1515         source = pderiv.base.parent
1516         deriv = self._deriv_by_srcname_idkey[source.name].get(idkey)
1517         if deriv is None:
1518             for isource in reversed(source.incsources):
1519                 deriv = self._deriv_by_srcname_idkey[isource.name].get(idkey)
1520                 if deriv is not None:
1521                     break
1522         if deriv is None:
1523             raise SynderError(
1524                 _("@info",
1525                   "Expansion '%(ref)s' does not reference a known derivation.",
1526                   ref=exp.ref, file=source.name, line=exp.pos[0]),
1527                 5010, source.name, exp.pos)
1528 
1529         # Derive the referenced derivation.
1530         props = self._derive(deriv, env1)
1531 
1532         # Drop terminal properties.
1533         nprops = []
1534         for pkey, (segs, key) in list(props.items()):
1535             if not key.terminal:
1536                 nprops.append((pkey, (segs, key)))
1537         props = dict(nprops)
1538 
1539         # Apply expansion mask.
1540         if exp.mask is not None:
1541             # Eliminate all obtained keys not matching the mask.
1542             # Reduce by mask those that match.
1543             nprops = []
1544             for pkey, segskey in list(props.items()):
1545                 if len(pkey) != len(exp.mask):
1546                     continue
1547                 mpkey = ""
1548                 for c, cm in zip(pkey, exp.mask):
1549                     if cm != _ch_exp_mask_pl:
1550                         if cm != c:
1551                             mpkey = None
1552                             break
1553                     else:
1554                         mpkey += c
1555                 if mpkey is not None:
1556                     nprops.append((mpkey, segskey))
1557             props = dict(nprops)
1558 
1559         # Apply key extension.
1560         if exp.kext is not None:
1561             nprops = []
1562             for pkey, (segs, key) in list(props.items()):
1563                 npkey = exp.kext.replace(_ch_exp_kext_pl, pkey)
1564                 nprops.append((npkey, (segs, key)))
1565             props = dict(nprops)
1566 
1567         # Apply capitalization.
1568         if exp.caps is not None:
1569             chcaps = first_to_upper if exp.caps else first_to_lower
1570             nprops = []
1571             for pkey, (segs, key) in list(props.items()):
1572                 chcapsed = False
1573                 nsegs = []
1574                 for seg in segs:
1575                     if (    not chcapsed
1576                         and isinstance(seg, _SDText) and seg.text.strip()
1577                     ):
1578                         nseg = copy.copy(seg)
1579                         nseg.text = chcaps(seg.text)
1580                         chcapsed = True
1581                         nsegs.append(nseg)
1582                     else:
1583                         nsegs.append(seg)
1584                 nprops.append((pkey, (nsegs, key)))
1585             props = dict(nprops)
1586 
1587         if not props:
1588             raise SynderError(
1589                 _("@info",
1590                   "Expansion '%(ref)s' expands into nothing.",
1591                   ref=exp.ref, file=source.name, line=exp.pos[0]),
1592                 5020, source.name, exp.pos)
1593 
1594         return props
1595 
1596 
1597     def _trim_segs (self, segs):
1598 
1599         for i0, di, stripf in (
1600             (0, 1, str.lstrip),
1601             (len(segs) - 1, -1, str.rstrip),
1602         ):
1603             i = i0
1604             while i >= 0 and i < len(segs):
1605                 if isinstance(segs[i], _SDText):
1606                     segs[i].text = stripf(segs[i].text)
1607                     if segs[i].text:
1608                         break
1609                 i += di
1610 
1611 
1612     def _simple_segs (self, segs):
1613 
1614         # Add sentries.
1615         if not segs:
1616             segs = [_SDText(None, None, "")]
1617         if not isinstance(segs[0], _SDTag):
1618             segs = [_SDTag(None, None)] + segs
1619         if not isinstance(segs[-1], _SDText):
1620             segs = segs + [_SDText(None, None, "")]
1621 
1622         # Construct simplified segments: [(text, [tagname...])...]
1623         tsegs = []
1624         i = 0
1625         while i < len(segs):
1626             # Tag names for the next piece of text.
1627             tags = segs[i].names
1628             # Join contiguous text segments into single plain text.
1629             i += 1
1630             i0 = i
1631             while i < len(segs) and isinstance(segs[i], _SDText):
1632                 i += 1
1633             text = "".join([x.text for x in segs[i0:i]])
1634             # Collect simplified segment.
1635             tsegs.append((text, tags))
1636 
1637         return tsegs
1638 
1639 
1640     def get (self, ckey, defval=None):
1641         """
1642         Get property value by compound key.
1643 
1644         @param ckey: compound key
1645         @type ckey: string
1646         @param defval: the value to return if the property does not exist
1647         @type defval: string
1648 
1649         @returns: the property value
1650         @rtype: string
1651         """
1652 
1653         # Split the compound key into derivation and property keys.
1654         lst = ckey.rsplit(self._ckeysep, 1)
1655         if len(lst) < 2:
1656             return defval
1657         dkey, pkey = lst
1658 
1659         return self.get2(dkey, pkey, defval)
1660 
1661 
1662     def dkeys (self, single=False):
1663         """
1664         Get list of all derivation keys.
1665 
1666         For derivations accessible through more than one derivation
1667         key, by default all of them are included in the result.
1668         If instead only a single random of those keys is wanted
1669         (i.e. strictly one key per derivation), C{single} can
1670         be set to C{True}.
1671 
1672         @param single: whether to return a single key for each derivation
1673         @type single: param
1674 
1675         @returns: list of derivation keys
1676         @rtype: [string*]
1677         """
1678 
1679         if not single:
1680             return list(self._visible_deriv_by_dkey.keys())
1681         else:
1682             return self._single_dkeys
1683 
1684 
1685     def syns (self, dkey):
1686         """
1687         Get list of key syntagmas by derivation key.
1688 
1689         Key syntagmas are always returned in the order in which
1690         they appear in the derivation.
1691         If no derivation is found for the given key,
1692         an empty list is returned.
1693 
1694         @param dkey: derivation key
1695         @type dkey: string
1696 
1697         @returns: key syntagmas
1698         @rtype: [string*]
1699         """
1700 
1701         dkey, dkrest, deriv = self._resolve_dkey(dkey)
1702         if dkey is None:
1703             return []
1704 
1705         rsyns = []
1706         for syn in deriv.base.syns:
1707             if not syn.hidden:
1708                 tsegs = self._simple_segs(syn.segs)
1709                 if self._ksyntf:
1710                     rsyn = self._ksyntf(tsegs, dkey, dkrest, self)
1711                 else:
1712                     rsyn = simplify("".join([x[0] for x in tsegs]))
1713                 if rsyn is not None:
1714                     rsyns.append(rsyn)
1715 
1716         return rsyns
1717 
1718 
1719     def altdkeys (self, dkey):
1720         """
1721         Get list of all derivation keys pointing to same entry as given key.
1722 
1723         @param dkey: derivation key
1724         @type dkey: string
1725 
1726         @returns: alternative derivation keys
1727         @rtype: [string*]
1728         """
1729 
1730         dkey, dkrest, deriv = self._resolve_dkey(dkey)
1731         if dkey is None:
1732             return []
1733 
1734         return deriv.dkeys
1735 
1736 
1737     def pkeys (self, dkey):
1738         """
1739         Get set of property keys available for given derivation key.
1740 
1741         If no derivation is found for the given key,
1742         an empty set is returned.
1743 
1744         @param dkey: derivation key
1745         @type dkey: string
1746 
1747         @returns: property keys
1748         @rtype: set(string*)
1749         """
1750 
1751         dkey, dkrest, deriv = self._resolve_dkey(dkey)
1752         if dkey is None:
1753             return set()
1754 
1755         env = self._resolve_env(self._env, dkey, dkrest)
1756         if env is None:
1757             return set()
1758 
1759         pkeys = set()
1760         for env1 in env:
1761             props = self._getprops(deriv, env1)
1762             pkeys.update(list(props.keys()))
1763 
1764         return pkeys
1765 
1766 
1767     def props (self, dkey):
1768         """
1769         Get dictionary of property values by property keys for
1770         given derivation key.
1771 
1772         If no derivation is found for the given key,
1773         an empty dictionary is returned.
1774 
1775         @param dkey: derivation key
1776         @type dkey: string
1777 
1778         @returns: property dictionary
1779         @rtype: {(string, string)*}
1780         """
1781 
1782         # TODO: Implement more efficiently.
1783         props = dict([(x, self.get2(dkey, x)) for x in self.pkeys(dkey)])
1784 
1785         return props
1786 
1787 
1788     def envs (self, dkey):
1789         """
1790         Get list of all explicitly defined environments in given derivation.
1791 
1792         "Explicitly" means environments mentioned in the derivation itself,
1793         and not those inherited through expansions.
1794 
1795         @param dkey: derivation key
1796         @type dkey: string
1797 
1798         @returns: explicit environment names
1799         @rtype: [string*]
1800         """
1801 
1802         dkey, dkrest, deriv = self._resolve_dkey(dkey)
1803         if dkey is None:
1804             return []
1805 
1806         return [x.name for x in deriv.base.envs]
1807 
1808 
1809     def source_name (self, dkey):
1810         """
1811         Get the name of the source in which the derivation is found.
1812 
1813         If no derivation is found for the given key, C{None} is returned.
1814 
1815         @param dkey: derivation key
1816         @type dkey: string
1817 
1818         @returns: name of the source
1819         @rtype: string
1820         """
1821 
1822         dkey, dkrest, deriv = self._resolve_dkey(dkey)
1823         if dkey is None:
1824             return None
1825 
1826         srcname = deriv.base.parent.name.split(os.path.sep)[-1]
1827         srcname = srcname[:srcname.rfind(".")]
1828 
1829         return srcname
1830 
1831 
1832     def source_pos (self, dkey):
1833         """
1834         Get the position in the source where the derivation is found.
1835 
1836         Position is a 3-tuple of file path, line and column numbers.
1837         If no derivation is found for the given key, C{None} is returned.
1838 
1839         @param dkey: derivation key
1840         @type dkey: string
1841 
1842         @returns: source position
1843         @rtype: (string, int, int)
1844         """
1845 
1846         dkey, dkrest, deriv = self._resolve_dkey(dkey)
1847         if dkey is None:
1848             return None
1849 
1850         path = deriv.base.parent.name
1851         lno, cno = deriv.base.pos
1852 
1853         return path, lno, cno
1854 
1855 
1856     def keys (self):
1857         """
1858         Get the list of all compound keys.
1859 
1860         @returns: compound keys
1861         @rtype: [string*]
1862         """
1863 
1864         return list(self.keys())
1865 
1866 
1867     def values (self):
1868         """
1869         Get the list of all property values.
1870 
1871         @returns: property values
1872         @rtype: [string*]
1873         """
1874 
1875         return list(self.values())
1876 
1877 
1878     def items (self):
1879         """
1880         Get the list of all pairs of compound keys and property values.
1881 
1882         @returns: compound keys and property values
1883         @rtype: [(string, string)*]
1884         """
1885 
1886         return list(self.items())
1887 
1888 
1889     def __contains__ (self, ckey):
1890         """
1891         Check if the compound key is present in the derivator.
1892 
1893         @returns: C{True} if present, C{False} otherwie
1894         @rtype: bool
1895         """
1896 
1897         return self.get(ckey) is not None
1898 
1899 
1900     def __getitem__ (self, ckey):
1901         """
1902         Get property value by compound key, in dictionary notation.
1903 
1904         Like L{get}, but raises C{KeyError} if key is not found.
1905 
1906         @returns: property value
1907         @rtype: string
1908         """
1909 
1910         res = self.get(ckey)
1911         if res is None:
1912             raise KeyError(ckey)
1913 
1914         return res
1915 
1916 
1917     def __iter__ (self):
1918         """
1919         Iterate through all compound keys, in random order.
1920 
1921         @returns: iterator through compound keys
1922         @rtype: iterator(string)
1923         """
1924 
1925         return iter(self.keys())
1926 
1927 
1928     def iterkeys (self):
1929         """
1930         Iterate through all compound keys, in random order.
1931 
1932         @returns: iterator through compound keys
1933         @rtype: iterator(string)
1934         """
1935 
1936         return self._Iterator(self._make_iter(lambda x: x))
1937 
1938 
1939     def itervalues (self):
1940         """
1941         Iterate through all property values, in random order.
1942 
1943         @returns: iterator through property values
1944         @rtype: iteratorstring)
1945         """
1946 
1947         return self._Iterator(self._make_iter(lambda x: self.get(x)))
1948 
1949 
1950     def iteritems (self):
1951         """
1952         Iterate through all pairs of compound key and property value,
1953         in random order.
1954 
1955         @returns: iterator through compound key property value pairs
1956         @rtype: iterator((string, string))
1957         """
1958 
1959         return self._Iterator(self._make_iter(lambda x: (x, self.get(x))))
1960 
1961 
1962     class _Iterator (object):
1963 
1964         def __init__ (self, it):
1965             self._it = it
1966 
1967         def __iter__ (self):
1968             return self
1969 
1970         def __next__ (self):
1971             return self._it() # expected to raise StopIteration on its own
1972 
1973 
1974     def _make_iter (self, keyf):
1975 
1976         it = iter(self._visible_deriv_by_dkey)
1977         gdat = [None, []] # dkey, pkeys
1978         def next ():
1979             while not gdat[1]:
1980                 gdat[0] = next(it) # will raise StopIteration
1981                 gdat[1] = self.pkeys(gdat[0])
1982             dkey = gdat[0]
1983             pkey = gdat[1].pop()
1984             return keyf(dkey + self._ckeysep + pkey)
1985 
1986         return next
1987 
1988 
1989     def empty_pcache (self):
1990 
1991         self._props_by_deriv_env1 = {}
1992         self._raw_props_by_deriv_env1 = {}
1993