File indexing completed on 2024-11-03 11:24:03
0001 # -*- coding: UTF-8 -*- 0002 0003 """ 0004 Derive forms and properties of syntagmas by macro expansion. 0005 0006 This module provides facilities for macro derivations on syntagmas. 0007 It consists of two elements: the text format for defining macro derivations, 0008 and the derivator class which reads and processes these definitions. 0009 The derivator class is documented within this module, 0010 while the syntax and semantics of syntagma derivations are documented 0011 in the user manual, at C{doc/user/lingo.docbook#sec-lgsynder}. 0012 0013 @author: Chusslove Illich (Часлав Илић) <caslav.ilic@gmx.net> 0014 @license: GPLv3 0015 """ 0016 0017 import copy 0018 import pickle as pickle 0019 import hashlib 0020 import locale 0021 import os 0022 import re 0023 0024 from pology import PologyError, _, n_ 0025 from pology.fsops import str_to_unicode 0026 from pology.normalize import simplify 0027 from pology.report import warning, format_item_list 0028 from pology.resolve import first_to_upper, first_to_lower 0029 0030 0031 # ---------------------------------------- 0032 # Error handling. 0033 0034 class SynderError (PologyError): 0035 0036 def __init__ (self, message, code, source=None, pos=None): 0037 """ 0038 Constructor. 0039 0040 All the parameters are made available as instance variables. 0041 0042 @param message: description of what went wrong 0043 @type message: string 0044 @param code: numerical ID of the problem 0045 @type code: int 0046 @param source: name of the source in which the problem occured 0047 @type source: string 0048 @param pos: line or line and column in the source 0049 in which the problem occured 0050 @type pos: int or (int, int) 0051 """ 0052 0053 self.message = message 0054 self.code = code 0055 self.source = source 0056 if isinstance(pos, tuple): 0057 self.line, self.col = pos 0058 else: 0059 self.line = pos 0060 self.col = None 0061 0062 PologyError.__init__(self, str(self)) 0063 0064 0065 def __unicode__ (self): 0066 0067 if self.source is None: 0068 s = _("@info context of error", 0069 "[synder-%(code)d]: %(msg)s", 0070 code=self.code, msg=self.message) 0071 elif self.line is None: 0072 s = _("@info context of error", 0073 "[synder-%(code)d] in %(source)s: %(msg)s", 0074 code=self.code, msg=self.message, source=self.source) 0075 elif self.col is None: 0076 s = _("@info context of error", 0077 "[synder-%(code)d] at %(source)s:%(line)d: %(msg)s", 0078 code=self.code, msg=self.message, source=self.source, 0079 line=self.line) 0080 else: 0081 s = _("@info context of error", 0082 "[synder-%(code)d] at %(source)s:%(line)d:%(col)d: %(msg)s", 0083 code=self.code, msg=self.message, source=self.source, 0084 line=self.line, col=self.col) 0085 0086 return str(s) 0087 0088 0089 # ---------------------------------------- 0090 # Caching. 0091 0092 # Cache for file sources, by absolute path. 0093 _parsed_sources = {} 0094 0095 0096 def empty_source_cache (): 0097 """ 0098 Clear all cached sources. 0099 0100 When file with derivations is loaded, its parsed form is cached, 0101 such that future load instructions on that same path 0102 (e.g. when the path is included from another file) 0103 do not waste any extra time and memory. 0104 This function erases all sources from the cache, 0105 when loading files anew on future load instructions is desired. 0106 """ 0107 0108 _parsed_sources.clear() 0109 0110 0111 # ---------------------------------------- 0112 # Parsing. 0113 0114 _ch_escape = "\\" 0115 _ch_comment = "#" 0116 _ch_props = ":" 0117 _ch_env = "@" 0118 _ch_ksyn_hd = "|" 0119 _ch_prop_sep = "," 0120 _ch_pkey_sep = "&" 0121 _ch_pval = "=" 0122 _ch_exp = "|" 0123 _ch_cutprop = "!" 0124 _ch_termprop = "." 0125 _ch_remprop = "^" 0126 _ch_exp_mask = "~" 0127 _ch_exp_mask_pl = "." 0128 _ch_exp_kext = "%" 0129 _ch_exp_kext_pl = "*" 0130 _ch_exp_upc = "^" 0131 _ch_exp_lwc = "`" 0132 _ch_tag = "~" 0133 _ch_tag_sep = "&" 0134 _ch_grp_opn = "{" 0135 _ch_grp_cls = "}" 0136 _ch_inc = ">" 0137 0138 _strict_ws = " \t\n" #set((" ", "\t", "\n")) 0139 _ch_nl = "\n" 0140 0141 0142 def _parse_string_w (instr, srcname): 0143 0144 ctx = _ctx_void 0145 dobj = _SDSource(srcname) 0146 ctx_stack = [] 0147 0148 pos = 0 0149 bpos = (1, 1) 0150 while True: 0151 handler = _ctx_handlers[ctx] 0152 nctx, ndobj, descend, pos, bpos = handler(dobj, instr, pos, bpos) 0153 if nctx is not None: 0154 if descend: 0155 ctx_stack.append((ctx, dobj)) 0156 ctx, dobj = nctx, ndobj 0157 elif ctx_stack: 0158 ctx, dobj = ctx_stack.pop() 0159 else: 0160 return dobj 0161 0162 0163 _anonsrc_count = [0] 0164 0165 def _parse_string (instr, srcname=None): 0166 0167 # Try to return parsed source from cache. 0168 if srcname in _parsed_sources: 0169 return _parsed_sources[srcname] 0170 0171 if srcname is None: 0172 srcname = _("@item automatic name for anonymous input stream", 0173 "<stream-%(num)s>", 0174 num=_anonsrc_count[0]).resolve("none") 0175 _anonsrc_count[0] += 1 0176 0177 source = _parse_string_w(instr, srcname) 0178 0179 # Cache the source by name (before procesing includes). 0180 _parsed_sources[srcname] = source 0181 0182 # Load included sources. 0183 source.incsources = _include_sources(source, source.incsources) 0184 0185 return source 0186 0187 0188 def _parse_file (path): 0189 0190 # Try to return parsed source from cache. 0191 apath = os.path.abspath(path) 0192 if apath in _parsed_sources: 0193 return _parsed_sources[apath] 0194 0195 # Try to load parsed source from disk. 0196 source = _read_parsed_file(apath) 0197 if source: 0198 # Set attributes discarded on compiling. 0199 source.name = path 0200 0201 # If still no hit, compile the file. 0202 if source is None: 0203 source = _compile_file_w(path) 0204 0205 # Cache the source by absolute path (before procesing includes). 0206 _parsed_sources[apath] = source 0207 0208 # Load included sources. 0209 source.incsources = _include_sources(source, source.incsources) 0210 0211 return source 0212 0213 0214 def _compile_file_w (path, cpath=None): 0215 0216 if cpath is None: 0217 cpath = path + _compfile_suff 0218 0219 # Parse the file. 0220 ifs = open(path, "rb") 0221 lines = ifs.readlines() 0222 ifs.close() 0223 0224 m = re.search(br"^#\s+~~~\s+(\S+)\s+~~~\s*$", lines[0]) if lines else None 0225 enc = m and m.group(1) or "UTF-8" 0226 lines = [x.decode(enc) for x in lines] 0227 0228 instr = "".join(lines) 0229 source = _parse_string_w(instr, path) 0230 0231 # Write out parsed file. 0232 # Temporarily discard attributes relative to importing. 0233 iname = source.name 0234 source.name = None 0235 _write_parsed_file(source, path, cpath) 0236 source.name = iname 0237 0238 return source 0239 0240 0241 def compile_file (path, cpath=None, doraise=False): 0242 """ 0243 Import file with derivations. 0244 0245 If the compile file path C{cpath} is not given, 0246 it is constructed as C{path} plus standard extension suffix. 0247 0248 If the file cannot be compiled, the behavior depends on C{doraise}. 0249 If C{doraise} is C{False}, a warning is reported to standard error; 0250 if C{doraise} is C{True}, an L{SynderError} exception is raised. 0251 0252 @param path: the path to file to compile 0253 @type path: string 0254 @param cpath: the path to compiled file 0255 @type cpath: string 0256 0257 @returns: C{True} if the file was successfully compiled 0258 @rtype: bool 0259 """ 0260 0261 try: 0262 _compile_file_w(path, cpath) 0263 except Exception as e: 0264 if doraise: 0265 raise 0266 else: 0267 warning(_("@info", 0268 "Derivation file '%(file)s' cannot be compiled " 0269 "due to the following error:\n" 0270 "%(msg)s", 0271 file=path, msg=str_to_unicode(str(e)))) 0272 return False 0273 else: 0274 return True 0275 0276 0277 def _include_sources (source, incpaths): 0278 0279 incsources = [] 0280 incroot = os.path.dirname(os.path.abspath(source.name)) 0281 for incpath in incpaths: 0282 # If included path relative, make it relative to current source. 0283 if not incpath.startswith(os.path.sep): 0284 path = os.path.join(incroot, incpath) 0285 else: 0286 path = incpath 0287 if not os.path.isfile(path): 0288 # FIXME: Position of include directive in the file lost, 0289 # propagate it to this place to report error properly. 0290 raise SynderError( 0291 _("@info", 0292 "Included file '%(name)s' not found at '%(path)s'.", 0293 name=incpath, path=path), 1101, source.name) 0294 incsource = _parse_file(path) 0295 incsources.append(incsource) 0296 0297 return incsources 0298 0299 0300 _compfile_suff = "c" 0301 _compfile_dver = b"0003" 0302 _compfile_hlen = hashlib.md5().digest_size * 2 0303 0304 def _write_parsed_file (source, path, cpath=None): 0305 0306 if cpath is None: 0307 cpath = path + _compfile_suff 0308 try: 0309 fhc = open(cpath, "wb") 0310 fh = open(path, "rb") 0311 except: 0312 return False 0313 0314 # Write out data version and file hash. 0315 fhc.write(_compfile_dver) 0316 hasher = hashlib.md5 0317 fhc.write(hashlib.md5(fh.read()).hexdigest().encode() + b"\n") 0318 pickle.dump(source, fhc, 2) # 0 for ASCII instead of binary 0319 fhc.close() 0320 0321 return True 0322 0323 0324 def _read_parsed_file (path): 0325 0326 cpath = path + _compfile_suff 0327 try: 0328 fhc = open(cpath, "rb") 0329 fh = open(path, "rb") 0330 except: 0331 return None 0332 0333 # Check if data version and file hashes match. 0334 fdverc = fhc.read(len(_compfile_dver)) 0335 if fdverc != _compfile_dver: 0336 return None 0337 fhash = hashlib.md5(fh.read()).hexdigest() 0338 fhashc = fhc.read(_compfile_hlen + 1)[:-1] 0339 if fhash != fhashc: 0340 return None 0341 0342 # Load the compiled source. 0343 source = pickle.load(fhc) 0344 0345 return source 0346 0347 0348 # ---------------------------------------- 0349 # Parsing context handlers. 0350 0351 def _ctx_handler_void (source, instr, pos, bpos): 0352 0353 obpos = bpos 0354 testsep = lambda c: (c not in _strict_ws and [""] or [None])[0] 0355 substr, sep, pos, bpos = _move_to_sep(instr, pos, bpos, testsep, wesc=False) 0356 0357 if sep is not None: 0358 indent = instr[pos - bpos[1] + 1:pos] 0359 if instr[pos] == _ch_inc: 0360 return _ctx_inc, source, True, pos, bpos 0361 elif instr[pos] == _ch_env: 0362 if not source.derivs: 0363 raise SynderError( 0364 _("@info", 0365 "No derivation yet for which to start an environment."), 0366 1002, source.name, bpos) 0367 if source.indenv is None: 0368 source.indenv = indent 0369 if indent != source.indenv: 0370 raise SynderError( 0371 _("@info", 0372 "Inconsistent indenting of environment head."), 0373 1003, source.name, bpos) 0374 deriv = source.derivs[-1] 0375 env = _SDEnv(deriv, bpos) 0376 deriv.envs.append(env) 0377 return _ctx_env, env, True, pos, bpos 0378 else: 0379 if source.indderiv is None: 0380 source.indderiv = indent 0381 if indent != source.indderiv: 0382 raise SynderError( 0383 _("@info", 0384 "Inconsistent indenting of derivation head."), 0385 1001, source.name, bpos) 0386 deriv = _SDDeriv(source, bpos) 0387 source.derivs.append(deriv) 0388 ksyn = _SDSyn(deriv, bpos) 0389 deriv.syns.append(ksyn) 0390 return _ctx_ksyn, ksyn, True, pos, bpos 0391 else: 0392 return None, None, False, pos, bpos 0393 0394 0395 _seps_ksyn = set((_ch_prop_sep, _ch_props, _ch_tag, _ch_nl)) 0396 0397 def _ctx_handler_ksyn (ksyn, instr, pos, bpos): 0398 0399 opos, obpos = pos, bpos 0400 testsep = lambda c: c in _seps_ksyn and c or None 0401 substr, sep, pos, bpos, isesc = _move_to_sep(instr, pos, bpos, testsep, 0402 repesc=True) 0403 0404 substrls = substr.lstrip(_strict_ws) 0405 if ( not ksyn.segs and substrls.startswith(_ch_ksyn_hd) 0406 and not isesc[len(substr) - len(substrls)] 0407 ): 0408 ksyn.hidden = True 0409 substr = substr.lstrip()[len(_ch_ksyn_hd):] 0410 0411 if substr or not ksyn.segs: 0412 ksyn.segs.append(_SDText(ksyn, obpos, substr)) 0413 0414 if sep == _ch_props: 0415 deriv = ksyn.parent 0416 env = _SDEnv(deriv, bpos) 0417 deriv.envs.append(env) 0418 prop = _SDProp(env, bpos) 0419 env.props.append(prop) 0420 return _ctx_pkey, prop, False, pos, bpos 0421 elif sep == _ch_prop_sep: 0422 deriv = ksyn.parent 0423 ksyn = _SDSyn(deriv, bpos) 0424 deriv.syns.append(ksyn) 0425 return _ctx_ksyn, ksyn, False, pos, bpos 0426 elif sep == _ch_tag: 0427 tag = _SDTag(ksyn, bpos) 0428 ksyn.segs.append(tag) 0429 return _ctx_tag, tag, True, pos, bpos 0430 else: 0431 raise SynderError( 0432 _("@info", 0433 "Unexpected end of derivation head started at %(line)d:%(col)d.", 0434 line=obpos[0], col=obpos[1]), 0435 1010, ksyn.parent.parent.name, bpos) 0436 0437 0438 def _ctx_handler_env (env, instr, pos, bpos): 0439 0440 obpos = bpos 0441 testsep = lambda c: c == _ch_props and c or None 0442 substr, sep, pos, bpos = _move_to_sep(instr, pos, bpos, testsep) 0443 0444 if sep == _ch_props: 0445 env.name = substr[len(_ch_env):] 0446 if not env.name: 0447 raise SynderError( 0448 _("@info", 0449 "Empty environment name."), 0450 1021, env.parent.parent.name, obpos) 0451 for oenv in env.parent.envs[:-1]: 0452 if env.name == oenv.name: 0453 raise SynderError( 0454 _("@info", 0455 "Repeated environment name '%(env)s'.", 0456 env=oenv.name), 0457 1022, env.parent.parent.name, obpos) 0458 prop = _SDProp(env, bpos) 0459 env.props.append(prop) 0460 return _ctx_pkey, prop, False, pos, bpos 0461 else: 0462 raise SynderError( 0463 _("@info", 0464 "Unexpected end of environment head started at %(line)d:%(col)d.", 0465 line=obpos[0], col=obpos[1]), 0466 1020, env.parent.parent.name, bpos) 0467 0468 0469 _seps_pkey = set((_ch_pval, _ch_prop_sep, _ch_exp, _ch_tag, _ch_nl)) 0470 0471 def _ctx_handler_pkey (prop, instr, pos, bpos): 0472 0473 opos, obpos = pos, bpos 0474 testsep = lambda c: c in _seps_pkey and c or None 0475 substr, sep, pos, bpos = _move_to_sep(instr, pos, bpos, testsep) 0476 0477 if sep == _ch_pval: 0478 substr = substr.strip() 0479 for rawkey in substr.split(_ch_pkey_sep): 0480 cut, terminal, canceling = [False] * 3 0481 while rawkey.endswith((_ch_cutprop, _ch_termprop, _ch_remprop)): 0482 if rawkey.endswith(_ch_cutprop): 0483 cut = True 0484 rawkey = rawkey[:-len(_ch_cutprop)] 0485 elif rawkey.endswith(_ch_termprop): 0486 terminal = True 0487 rawkey = rawkey[:-len(_ch_termprop)] 0488 elif rawkey.endswith(_ch_remprop): 0489 canceling = True 0490 rawkey = rawkey[:-len(_ch_remprop)] 0491 key = _SDKey(prop, obpos, rawkey, cut, terminal, canceling) 0492 prop.keys.append(key) 0493 return _ctx_pval, prop, False, pos, bpos 0494 else: 0495 # Backtrack and go into value context. 0496 return _ctx_pval, prop, False, opos, obpos 0497 0498 0499 _seps_pval = set((_ch_prop_sep, _ch_exp, _ch_tag, _ch_nl)) 0500 0501 def _ctx_handler_pval (prop, instr, pos, bpos): 0502 0503 opos, obpos = pos, bpos 0504 testsep = lambda c: c in _seps_pval and c or None 0505 substr, sep, pos, bpos = _move_to_sep(instr, pos, bpos, testsep) 0506 0507 if substr: 0508 prop.segs.append(_SDText(prop, obpos, substr)) 0509 0510 if sep == _ch_prop_sep: 0511 env = prop.parent 0512 prop = _SDProp(env, bpos) 0513 env.props.append(prop) 0514 return _ctx_pkey, prop, False, pos, bpos 0515 elif sep == _ch_exp: 0516 exp = _SDExp(prop, bpos) 0517 prop.segs.append(exp) 0518 return _ctx_exp, exp, True, pos, bpos 0519 elif sep == _ch_tag: 0520 tag = _SDTag(prop, bpos) 0521 prop.segs.append(tag) 0522 return _ctx_tag, tag, True, pos, bpos 0523 else: 0524 return None, None, False, pos, bpos 0525 0526 0527 _seps_exp = set([_ch_prop_sep, _ch_exp] + list(_strict_ws)) 0528 0529 def _ctx_handler_exp (exp, instr, pos, bpos): 0530 0531 if instr[pos:pos + len(_ch_grp_opn)] == _ch_grp_opn: 0532 enclosed = True 0533 testsep = lambda c: c in (_ch_grp_cls, _ch_nl) and c or None 0534 else: 0535 enclosed = False 0536 testsep = lambda c: (c in _seps_exp and [""] or [None])[0] 0537 0538 obpos = bpos 0539 substr, sep, pos, bpos, isesc = _move_to_sep(instr, pos, bpos, testsep, 0540 repesc=True) 0541 if enclosed and sep is None or sep == _ch_nl: 0542 raise SynderError( 0543 _("@info", 0544 "Unexpected end of expander started at %(line)d:%(col)d.", 0545 line=obpos[0], col=obpos[1]), 0546 1050, exp.parent.parent.parent.parent.name, bpos) 0547 0548 if enclosed: 0549 substr = substr[len(_ch_grp_opn):] 0550 0551 p = substr.find(_ch_exp_kext) 0552 if p >= 0: 0553 exp.kext = substr[p + len(_ch_exp_kext):] 0554 substr = substr[:p] 0555 0556 p = substr.find(_ch_exp_mask) 0557 if p >= 0: 0558 exp.mask = substr[p + len(_ch_exp_mask):] 0559 substr = substr[:p] 0560 0561 if substr.startswith(_ch_exp_upc) and not isesc[0]: 0562 exp.caps = True 0563 substr = substr[len(_ch_exp_upc):] 0564 elif substr.startswith(_ch_exp_lwc) and not isesc[0]: 0565 exp.caps = False 0566 substr = substr[len(_ch_exp_lwc):] 0567 0568 exp.ref = substr 0569 0570 return None, None, False, pos, bpos 0571 0572 0573 _seps_tag = set([_ch_prop_sep, _ch_exp, _ch_tag] + list(_strict_ws)) 0574 0575 def _ctx_handler_tag (tag, instr, pos, bpos): 0576 0577 if instr[pos:pos + len(_ch_grp_opn)] == _ch_grp_opn: 0578 enclosed = True 0579 testsep = lambda c: c in (_ch_grp_cls, _ch_nl) and c or None 0580 else: 0581 enclosed = False 0582 testsep = lambda c: (c in _seps_exp and [""] or [None])[0] 0583 0584 obpos = bpos 0585 substr, sep, pos, bpos = _move_to_sep(instr, pos, bpos, testsep) 0586 if enclosed and sep is None or sep == _ch_nl: 0587 raise SynderError( 0588 _("@info", 0589 "Unexpected end of tag started at %(line)d:%(col)d.", 0590 line=obpos[0], col=obpos[1]), 0591 1050, exp.parent.parent.parent.parent.name, bpos) 0592 0593 if enclosed: 0594 substr = substr[len(_ch_grp_opn):] 0595 0596 tag.names = substr.split(_ch_tag_sep) 0597 0598 return None, None, False, pos, bpos 0599 0600 0601 def _ctx_handler_inc (source, instr, pos, bpos): 0602 0603 # Skip include directive. 0604 substr, sep, pos, bpos = _move_to_sep(instr, pos, bpos, lambda c: c) 0605 0606 # Parse include path. 0607 obpos = bpos 0608 testsep = lambda c: c == _ch_nl and c or None 0609 substr, sep, pos, bpos = _move_to_sep(instr, pos, bpos, testsep) 0610 0611 incpath = substr.strip() 0612 if not incpath: 0613 raise SynderError( 0614 _("@info", 0615 "Empty target path in inclusion directive."), 0616 1100, source.name, obpos) 0617 0618 # Add to included sources of this source. 0619 # Temporarily store paths, to be resolved into full sources later. 0620 source.incsources.append(incpath) 0621 0622 return None, None, False, pos, bpos 0623 0624 0625 # ---------------------------------------- 0626 # Parsing context IDs and handlers collected. 0627 # IDs and handlers must be in the same order, 0628 # as IDs are used to index handlers. 0629 0630 ( 0631 _ctx_void, 0632 _ctx_ksyn, 0633 _ctx_env, 0634 _ctx_pkey, 0635 _ctx_pval, 0636 _ctx_exp, 0637 _ctx_tag, 0638 _ctx_inc, 0639 ) = list(range(8)) 0640 0641 _ctx_handlers = ( 0642 _ctx_handler_void, 0643 _ctx_handler_ksyn, 0644 _ctx_handler_env, 0645 _ctx_handler_pkey, 0646 _ctx_handler_pval, 0647 _ctx_handler_exp, 0648 _ctx_handler_tag, 0649 _ctx_handler_inc, 0650 ) 0651 0652 # ---------------------------------------- 0653 # Parsing utilities. 0654 0655 # Find the first separator admitted by the test function, 0656 # skipping over escaped characters, continued lines and comments. 0657 # Return substring to that point (without escapes, comments, line cont.), 0658 # separator, and new position and block position (line, column). 0659 # On request, also return list of escape indicators for each character 0660 # in the substring (True where character was escaped). 0661 # Separator test function takes single argument, the current character, 0662 # and returns None if it is not admitted as separator. 0663 # If end of input is reached without test function admitting a separator, 0664 # separator is reported as None; otherwise, separator is reported as 0665 # the return value from the test function. 0666 def _move_to_sep (instr, pos, bpos, testsep, wesc=True, repesc=False): 0667 0668 opos = pos 0669 substr = [] 0670 isesc = [] 0671 sep = None 0672 while sep is None and pos < len(instr): 0673 c = instr[pos] 0674 if c == _ch_comment: 0675 p = instr.find(_ch_nl, pos) 0676 if p < 0: 0677 pos += len(instr) - pos 0678 else: 0679 pos = p 0680 elif wesc and c == _ch_escape: 0681 pos += 1 0682 if pos < len(instr): 0683 if instr[pos] == _ch_nl: # line continuation 0684 pass 0685 # elif instr[pos] == _ch_ucode: # unicode hex 0686 else: 0687 substr.append(instr[pos]) 0688 isesc.append(True) 0689 pos += 1 0690 else: 0691 sep = testsep(c) 0692 if sep is not None: 0693 pos += len(sep) 0694 else: 0695 substr.append(c) 0696 isesc.append(False) 0697 pos += 1 0698 0699 # Update block position (line, column). 0700 rawsubstr = instr[opos:pos] 0701 p = rawsubstr.rfind(_ch_nl) 0702 if p >= 0: 0703 bpos = (bpos[0] + rawsubstr.count(_ch_nl), len(rawsubstr) - p) 0704 else: 0705 bpos = (bpos[0], bpos[1] + len(rawsubstr)) 0706 0707 ret = ("".join(substr), sep, pos, bpos) 0708 if repesc: 0709 ret = ret + (isesc,) 0710 return ret 0711 0712 0713 # ---------------------------------------- 0714 # Data structures. 0715 0716 # Synder source. 0717 class _SDSource: 0718 0719 def __init__ (self, name): 0720 0721 # Name of the source (filename, etc). 0722 self.name = name 0723 0724 # Derivations (SDDeriv). 0725 self.derivs = [] 0726 # Included sources (must be ordered). 0727 self.incsources = [] 0728 # Indentation for derivation and environments heads 0729 # (set on first parsed). 0730 self.indderiv = None 0731 self.indenv = None 0732 0733 ## Global directives. 0734 #... 0735 0736 0737 def __unicode__ (self): 0738 return ( "============> %s\n" % self.name 0739 + "\n".join(map(str, self.derivs))) 0740 def __str__ (self): 0741 return self.__unicode__().encode(locale.getpreferredencoding()) 0742 0743 0744 # Derivation. 0745 class _SDDeriv: 0746 0747 def __init__ (self, parent, pos): 0748 0749 # Parent source and position in it. 0750 self.parent = parent 0751 self.pos = pos 0752 0753 # Key syntagmas (SDProp). 0754 self.syns = [] 0755 # Environments (SDEnv). 0756 self.envs = [] 0757 0758 def __unicode__ (self): 0759 return ( " -----> %d:%d\n" % self.pos 0760 + " " + "\n ".join(map(str, self.syns)) + "\n" 0761 + "\n".join(map(str, self.envs))) 0762 def __str__ (self): 0763 return self.__unicode__().encode(locale.getpreferredencoding()) 0764 0765 0766 # Environment. 0767 class _SDEnv: 0768 0769 def __init__ (self, parent, pos, name=""): 0770 0771 # Parent derivation and position in source. 0772 self.parent = parent 0773 self.pos = pos 0774 # Environment name. 0775 self.name = name 0776 0777 # Properties (SDProp). 0778 self.props = [] 0779 0780 def __unicode__ (self): 0781 return ( " @%s:%d:%d\n" % ((self.name,) + self.pos) 0782 + "\n".join(map(str, self.props))) 0783 def __str__ (self): 0784 return self.__unicode__().encode(locale.getpreferredencoding()) 0785 0786 0787 # Syntagma. 0788 class _SDSyn: 0789 0790 def __init__ (self, parent, pos, hidden=False): 0791 0792 # Parent derivation and position in source. 0793 self.parent = parent 0794 self.pos = pos 0795 # Visibility of the syntagma. 0796 self.hidden = hidden 0797 0798 # Syntagma segments (SDText, SDTag). 0799 self.segs = [] 0800 0801 def __unicode__ (self): 0802 return ( "{p:%d:%d|%s}=" % (self.pos + (self.hidden,)) 0803 + "".join(map(str, self.segs))) 0804 def __str__ (self): 0805 return self.__unicode__().encode(locale.getpreferredencoding()) 0806 0807 0808 # Property. 0809 class _SDProp: 0810 0811 def __init__ (self, parent, pos): 0812 0813 # Parent environment and position in source. 0814 self.parent = parent 0815 self.pos = pos 0816 0817 # Keys (SDKey). 0818 self.keys = [] 0819 # Value segments (SDText, SDExp, SDTag). 0820 self.segs = [] 0821 0822 def __unicode__ (self): 0823 return ( " %d:%d " % self.pos 0824 + "k=" + "".join(map(str, self.keys)) + " " 0825 + "v=" + "".join(map(str, self.segs))) 0826 def __str__ (self): 0827 return self.__unicode__().encode(locale.getpreferredencoding()) 0828 0829 0830 # Property key. 0831 class _SDKey: 0832 0833 def __init__ (self, parent, pos, name="", 0834 cut=False, terminal=False, canceling=False): 0835 0836 # Parent property and position in source. 0837 self.parent = parent 0838 self.pos = pos 0839 # Key behaviors. 0840 self.name = name 0841 self.cut = cut 0842 self.terminal = terminal 0843 self.canceling = canceling 0844 0845 def __unicode__ (self): 0846 return "{k:%d:%d:%s|%s&%s}" % (self.pos + (self.name, 0847 self.cut, self.terminal, 0848 self.canceling)) 0849 def __str__ (self): 0850 return self.__unicode__().encode(locale.getpreferredencoding()) 0851 0852 0853 # Expander. 0854 class _SDExp: 0855 0856 def __init__ (self, parent, pos, ref=None, mask=None, caps=None, kext=None): 0857 0858 # Parent property and position in source. 0859 self.parent = parent 0860 self.pos = pos 0861 # Reference, selection mask, capitalization, key extender. 0862 self.ref = ref 0863 self.mask = mask 0864 self.caps = caps 0865 self.kext = kext 0866 0867 def __unicode__ (self): 0868 return "{e:%d:%d:%s|%s|%s|%s}" % (self.pos + (self.ref, self.mask, 0869 self.caps, self.kext)) 0870 def __str__ (self): 0871 return self.__unicode__().encode(locale.getpreferredencoding()) 0872 0873 0874 # Tag. 0875 class _SDTag: 0876 0877 def __init__ (self, parent, pos): 0878 0879 # Parent property and position in source. 0880 self.parent = parent 0881 self.pos = pos 0882 # Names associated to this tag. 0883 self.names = [] 0884 0885 def __unicode__ (self): 0886 return "{g:%d:%d:%s}" % (self.pos + ("+".join(self.names),)) 0887 def __str__ (self): 0888 return self.__unicode__().encode(locale.getpreferredencoding()) 0889 0890 0891 # Text segment. 0892 class _SDText: 0893 0894 def __init__ (self, parent, pos, text=""): 0895 0896 # Parent property and position in source. 0897 self.parent = parent 0898 self.pos = pos 0899 # Text. 0900 self.text = text 0901 0902 def __unicode__ (self): 0903 return "{t:%d:%d:%s}" % (self.pos + (self.text,)) 0904 def __str__ (self): 0905 return self.__unicode__().encode(locale.getpreferredencoding()) 0906 0907 0908 # ---------------------------------------- 0909 # High level access. 0910 0911 class Synder (object): 0912 """ 0913 Derivator objects import sources of derivations 0914 and get queried for properties of syntagmas. 0915 0916 Lookup can be done by derivation key and property key, 0917 but also by single compound key (serialization of the previous two), 0918 to have interface and behavior similar to built-in dictionaries. 0919 0920 Basic usage is rather simple. If there are derivation files 0921 C{planets.sd} and {moons.sd}, they can be used like this:: 0922 0923 >>> sd = Synder() 0924 >>> sd.import_file("planets.sd") 0925 >>> sd.import_file("moons.sd") 0926 >>> 0927 >>> # Lookup of properties by derivation and property key. 0928 >>> sd.get2("Venus", "nom") 0929 Venera 0930 >>> sd.get2("Callisto", "nom") 0931 Kalisto 0932 >>> sd.get2("Foobar", "nom") 0933 None 0934 >>> # Lookup of properties by compound key. 0935 >>> sd["Venus-nom"] 0936 Venera 0937 >>> 0938 >>> # Iteration through properties by derivation keys. 0939 >>> for dkey in sd.dkeys(): print sd.get2(dkey, "nom") 0940 ... 0941 Venera 0942 Kalisto 0943 Merkur 0944 Jupiter 0945 … 0946 >>> # Iteration through properties by compound keys. 0947 >>> for ckey in sd: print sd[ckey] 0948 ... 0949 Venera 0950 Veneri 0951 Venerom 0952 … 0953 Merkuru 0954 Merkur 0955 Merkura 0956 … 0957 >>> # Querying for key syntagmas. 0958 >>> sd.syns("Venus") 0959 ['Venus'] 0960 >>> sd.syns("Iapetus") 0961 ['Iapetus', 'Japetus'] 0962 >>> sd.syns("Japetus") 0963 ['Iapetus', 'Japetus'] 0964 >>> 0965 >>> # Querying for property keys. 0966 >>> sd.pkeys("Venus") 0967 ['gen', 'acc', 'nom', 'dat', 'gender'] 0968 0969 Syntax errors in derivations sources will raise L{SynderError} 0970 exceptions on import. 0971 Unresolvable conflicts in derivation keys will be reported 0972 as warning on import, and conflicted derivations will not be imported. 0973 Errors in expansions are not reported on import, but when 0974 the problematic derivation is queried; warnings are output, 0975 and C{None} (or default value) is returned for all properties. 0976 """ 0977 0978 def __init__ (self, 0979 env="", 0980 ckeysep="-", 0981 strictkey=False, 0982 dkeytf=None, dkeyitf=None, 0983 pkeytf=None, pkeyitf=None, 0984 pvaltf=None, 0985 ksyntf=None, 0986 envtf=None): 0987 """ 0988 Constructor of syntagma derivators. 0989 0990 The default resolution of derivation key conflicts, 0991 as described in module documentation, can be changed 0992 to strict resolution through C{strictkey} parameter. 0993 If C{strictkey} is C{True}, all key syntagmas must be unique. 0994 0995 Parameter C{env} is used to specify the environment from which 0996 the derivations are taken. In case no non-default environments 0997 have been used in derivations, C{env} is simply empty string. 0998 Otherwise, it can be: 0999 - a string specifying a non-default environment 1000 - a tuple specifying an environment fallback chain 1001 - a tuple of tuples, specifying more than one environment chain 1002 (Lists can also be used instead of tuples.) 1003 1004 If several environment fallback chains are given, when a property 1005 is requrested they are tried in the order of specification, 1006 and the first yielded property is returned. 1007 It is also possible to combine properties from different 1008 environment chains in a custom way, by supplying a property 1009 value transformation function (C{pvaltf} parameter). 1010 1011 Compound keys, for single-key lookups, are built by joining 1012 the derivation and property keys with a separator. 1013 This separator can be chosen through C{ckeysep} parameter. 1014 The separator string can be contained inside a derivation key, 1015 but it must not be found inside any property key 1016 (the compound key is split from the back). 1017 1018 A myriad of I{transformation functions} can be applied by 1019 derivator object to imported derivations, through C{*tf} parameters. 1020 They are as follows (stating only default inputs, see below 1021 for more possibilities): 1022 - C{dkeytf}: applied to derivation key supplied on lookups 1023 (e.g. in L{get} or L{get2} methods). Takes the derivation 1024 key as parameter, returns either the derivation key 1025 or a tuple of the derivation key and another object. 1026 - C{dkeyitf}: applied to all derivation keys on import. 1027 Same default input-output as C{dkey}. 1028 - C{pkeytf}: like C{dkeytf}, only working analogously on 1029 property key instead of derivation key. 1030 - C{pkeyitf}: like C{dkeyitf}, only working analogously on 1031 property key instead of derivation key. 1032 - C{pvaltf}: applied to tagged segments of property values. 1033 The input to this function is a list of lists 1034 by each environment fallback chain; 1035 list for one environemnt chain consists of 2-tuples, 1036 each tuple having a list of tags as the first element, 1037 and a text segment as the second element. 1038 For example, if there is only one environment chain 1039 (e.g. C{evn=""} or C{env=("someenv", "")}, 1040 and the property value is derived to be C{foo ~tag bar} 1041 in this environment, then the argument to the function 1042 will be C{[[([''], "foo "), (['tag'], " bar")]]}. 1043 If an environemnt chain yielded no property value, 1044 its element will be C{None} instead of list of 2-tuples. 1045 The return value is the final property value string. 1046 Note that simplification will not be applied to this 1047 value afterwards, so if desired, 1048 L{simplify()<pology.normalize.simplify>} 1049 should be manually called inside the function. 1050 - C{ksyntf}: quite similar to C{pvaltf}, only applied to 1051 tagged segments of key syntagmas. 1052 The difference is that there are no multiple environments 1053 for key syntagmas, so the input value is just one list 1054 of tagged text segments (what would be the first element 1055 of input list to C{pvaltf}). 1056 - C{envtf}: applied to environment fallback chain on lookups. 1057 Takes original environment chain as argument, 1058 returns new environment chain 1059 (in one of the forms acceptable as C{env} parameter). 1060 1061 Transformation functions can take more input arguments than 1062 the default described above, on demand. 1063 If transformation function is supplied directly, 1064 e.g. C{pvaltf=somefunc}, it is sent default inputs. 1065 Extra inputs are requested by supplying instead a tuple, where 1066 the first element is the transformation function, and the following 1067 elements are predefined keywords of available extra inputs, 1068 e.g. C{pvalf=(somefunc, "dkey", "pkrest")}. 1069 Available extra inputs by transformation function are: 1070 - C{dkeytf}: C{"self"} the derivation object. 1071 - C{pkeytf}: C{"self"}, C{"dkey"} the derivation key 1072 (original or that returned by C{dkeytf}), 1073 C{"dkrest"} the second object returned by C{dkeytf}. 1074 - C{pvaltf}: C{"self"}, C{"dkey"}, C{"pkey"} the property 1075 key (original or that returned by C{pkeytf}), 1076 C{"env"} the tuple of environment chains, C{"dkrest"}, 1077 C{"pkrest"} the second object returned by C{pkeytf}. 1078 - C{ksyntf}: C{"self"}, C{"dkey"}, C{"dkrest"}. 1079 - C{envtf}: C{"self"}, C{"dkey"}, C{"dkrest"}. 1080 1081 @param env: environment for derivations 1082 @type env: string, (string*), ((string*)*) 1083 @param ckeysep: derivation-property key separator in compound keys 1084 @type ckeysep: string 1085 @param strictkey: whether all key syntagmas must be unique to 1086 avoid conflicts 1087 @param dkeytf: transformation function for lookup derivation keys 1088 @param dkeyitf: transformation function for imported derivation keys 1089 @param pkeytf: transformation function for lookup property keys 1090 @param pkeyitf: transformation function for imported property keys 1091 @param pvaltf: transformation fucntion for property values 1092 @param ksyntf: transformation fucntion for key syntagamas 1093 """ 1094 1095 self._env = self._normenv(env) 1096 1097 self._ckeysep = ckeysep 1098 1099 self._dkeytf = self._resolve_tf(dkeytf, ["self"]) 1100 self._dkeyitf = self._resolve_tf(dkeyitf, []) 1101 self._pkeytf = self._resolve_tf(pkeytf, ["dkey", "dkrest", "self"]) 1102 self._pkeyitf = self._resolve_tf(pkeyitf, []) 1103 self._pvaltf = self._resolve_tf(pvaltf, ["pkey", "dkey", "env", 1104 "dkrest", "pkrest", "self"]) 1105 self._ksyntf = self._resolve_tf(ksyntf, ["dkey", "dkrest", "self"]) 1106 self._envtf = self._resolve_tf(envtf, ["dkey", "dkrest", "self"]) 1107 1108 self._strictkey = strictkey 1109 1110 self._imported_srcnames = set() 1111 self._visible_srcnames = set() 1112 self._derivs_by_srcname = {} 1113 self._deriv_by_srcname_idkey = {} 1114 self._visible_deriv_by_dkey = {} 1115 self._props_by_deriv_env1 = {} 1116 self._raw_props_by_deriv_env1 = {} 1117 self._single_dkeys = set() 1118 1119 1120 def _normenv (self, env): 1121 1122 if isinstance(env, (tuple, list)): 1123 if not env or isinstance(env[0], str): 1124 env = (env,) 1125 else: 1126 env = ((env,),) 1127 1128 return env 1129 1130 1131 def _resolve_tf (self, tfspec, kneargs): 1132 1133 eaords = [0] 1134 if isinstance(tfspec, (tuple, list)): 1135 tf0, eargs = tfspec[0], list(tfspec[1:]) 1136 unkeargs = set(eargs).difference(kneargs) 1137 if unkeargs: 1138 raise SynderError( 1139 _("@info", 1140 "Unknown extra arguments for transformation function " 1141 "requested in derivator constructor: %(arglist)s", 1142 arglist=format_item_list(sorted(unkeargs)))) 1143 eaords.extend([kneargs.index(x) + 1 for x in eargs]) 1144 else: 1145 tf0 = tfspec 1146 1147 if tf0 is None: 1148 return None 1149 1150 def tf (*args): 1151 args0 = [args[x] for x in eaords] 1152 return tf0(*args0) 1153 1154 return tf 1155 1156 1157 def import_string (self, string, ignhid=False): 1158 """ 1159 Import string with derivations. 1160 1161 @param string: the string to parse 1162 @type string: string 1163 @param ignhid: also make hidden derivations visible if C{True} 1164 @type ignhid: bool 1165 1166 @returns: number of newly imported visible derivations 1167 @rtype: int 1168 """ 1169 1170 source = _parse_string(string) 1171 return self._process_import_visible(source, ignhid) 1172 1173 1174 def import_file (self, filename, ignhid=False): 1175 """ 1176 Import file with derivations. 1177 1178 @param filename: the path to file to parse 1179 @type filename: string 1180 @param ignhid: also make hidden derivations visible if C{True} 1181 @type ignhid: bool 1182 1183 @returns: number of newly imported visible derivations 1184 @rtype: int 1185 """ 1186 1187 source = _parse_file(filename) 1188 return self._process_import_visible(source, ignhid) 1189 1190 1191 def _process_import_visible (self, source, ignhid): 1192 1193 nnew = self._process_import(source) 1194 nvis = self._make_visible(source, ignhid) 1195 return (nvis, nnew) 1196 1197 1198 def _process_import (self, source): 1199 1200 if source.name in self._imported_srcnames: 1201 return 0 1202 1203 self._imported_srcnames.add(source.name) 1204 1205 iderivs = [] 1206 self._derivs_by_srcname[source.name] = iderivs 1207 idmap = {} 1208 self._deriv_by_srcname_idkey[source.name] = idmap 1209 1210 # Construct wrapping derivations and file them by derivation keys. 1211 nadded = 0 1212 for rawderiv in source.derivs: 1213 1214 # Create wrapper derivation for the raw derivation. 1215 deriv = self._Deriv(rawderiv, self._dkeyitf) 1216 1217 # Eliminate internal key conflicts of this derivation. 1218 self._eliminate_conflicts(deriv, idmap, None, lambda x: x.idkeys) 1219 1220 # Register internal derivation in this source. 1221 if deriv.idkeys: 1222 iderivs.append(deriv) 1223 for idkey in deriv.idkeys: 1224 idmap[idkey] = deriv 1225 nadded += 1 1226 1227 # Import included sources. 1228 for incsource in source.incsources: 1229 nadded += self._process_import(incsource) 1230 1231 return nadded 1232 1233 1234 def _make_visible (self, source, ignhid): 1235 1236 if source.name in self._visible_srcnames: 1237 return 0 1238 1239 self._visible_srcnames.add(source.name) 1240 1241 nvis = 0 1242 1243 for deriv in self._derivs_by_srcname[source.name]: 1244 if not ignhid and all([x.hidden for x in deriv.base.syns]): 1245 continue 1246 1247 # Eliminate external key conflicts of this derivation. 1248 self._eliminate_conflicts(deriv, self._visible_deriv_by_dkey, 1249 self._single_dkeys, lambda x: x.dkeys) 1250 1251 # Register visible derivation in this source. 1252 if deriv.dkeys: 1253 self._single_dkeys.add(tuple(deriv.dkeys)[0]) 1254 for dkey in deriv.dkeys: 1255 self._visible_deriv_by_dkey[dkey] = deriv 1256 nvis += 1 1257 1258 return nvis 1259 1260 1261 class _Deriv: 1262 1263 def __init__ (self, deriv, dkeyitf): 1264 1265 self.base = deriv 1266 1267 # Compute internal and external derivation keys from key syntagmas. 1268 self.idkeys = set() 1269 self.dkeys = set() 1270 for syn in deriv.syns: 1271 synt = "".join([x.text for x in syn.segs 1272 if isinstance(x, _SDText)]) 1273 idkey = simplify(synt) 1274 self.idkeys.add(idkey) 1275 dkeys = dkeyitf(idkey) if dkeyitf else idkey 1276 if dkeys is not None: 1277 if not isinstance(dkeys, (tuple, list)): 1278 dkeys = [dkeys] 1279 self.dkeys.update(dkeys) 1280 1281 1282 def _eliminate_conflicts (self, deriv, kmap, kskeys, keyf): 1283 1284 to_remove_keys = set() 1285 to_remove_keys_other = {} 1286 for key in keyf(deriv): 1287 oderiv = kmap.get(key) 1288 if oderiv is not None: 1289 to_remove_keys.add(key) 1290 if oderiv not in to_remove_keys_other: 1291 to_remove_keys_other[oderiv] = set() 1292 to_remove_keys_other[oderiv].add(key) 1293 1294 noconfres_oderivs = [] 1295 if self._strictkey or to_remove_keys == keyf(deriv): 1296 noconfres_oderivs.extend(list(to_remove_keys_other.keys())) 1297 else: 1298 for oderiv, keys in list(to_remove_keys_other.items()): 1299 if keyf(oderiv) == keys: 1300 noconfres_oderivs.append(oderiv) 1301 1302 if noconfres_oderivs: 1303 # Clear both internal and external keys. 1304 deriv.dkeys.clear() 1305 deriv.idkeys.clear() 1306 eposf = lambda x: (x.base.parent.name, x.base.syns[0].pos[0]) 1307 noconfres_oderivs.sort(key=eposf) 1308 pos1 = "%s:%d" % eposf(deriv) 1309 pos2s = ["%s:%d" % eposf(x) for x in noconfres_oderivs] 1310 pos2s = "\n".join(pos2s) 1311 warning(_("@info", 1312 "Derivation at %(pos1)s eliminated due to " 1313 "key conflict with the following derivations:\n" 1314 "%(pos2list)s", 1315 pos1=pos1, pos2list=pos2s)) 1316 else: 1317 for key in to_remove_keys: 1318 keyf(deriv).remove(key) 1319 for oderiv, keys in list(to_remove_keys_other.items()): 1320 for key in keys: 1321 keyf(oderiv).remove(key) 1322 kmap.pop(key) 1323 if kskeys is not None and key in kskeys: 1324 kskeys.remove(key) 1325 kskeys.add(tuple(keyf(oderiv))[0]) 1326 1327 1328 def _resolve_dkey (self, dkey): 1329 1330 dkrest = () 1331 if self._dkeytf: 1332 dkey = self._dkeytf(dkey, self) 1333 if isinstance(dkey, tuple): 1334 dkey, dkrest = dkey[0], dkey[1:] 1335 1336 deriv = None 1337 if dkey is not None: 1338 deriv = self._visible_deriv_by_dkey.get(dkey) 1339 if deriv is None: 1340 dkey = None 1341 1342 return dkey, dkrest, deriv 1343 1344 1345 def _resolve_pkey (self, pkey, dkey, dkrest): 1346 1347 pkrest = () 1348 if self._pkeytf: 1349 pkey = self._pkeytf(pkey, dkey, dkrest, self) 1350 if isinstance(pkey, tuple): 1351 pkey, pkrest = pkey[0], pkey[1:] 1352 1353 return pkey, pkrest 1354 1355 1356 def _resolve_env (self, env, dkey, dkrest): 1357 1358 if self._envtf: 1359 env = self._envtf(env, dkey, dkrest, self) 1360 if env is not None: 1361 env = self._normenv(env) 1362 1363 return env 1364 1365 1366 def get2 (self, dkey, pkey, defval=None): 1367 """ 1368 Get property value by derivation key and property key. 1369 1370 @param dkey: derivation key 1371 @type dkey: string 1372 @param pkey: property key 1373 @type pkey: string 1374 @param defval: the value to return if the property does not exist 1375 @type defval: string 1376 1377 @returns: the property value 1378 @rtype: string 1379 """ 1380 1381 dkey, dkrest, deriv = self._resolve_dkey(dkey) 1382 if dkey is None: 1383 return defval 1384 1385 pkey, pkrest = self._resolve_pkey(pkey, dkey, dkrest) 1386 if pkey is None: 1387 return defval 1388 1389 env = self._resolve_env(self._env, dkey, dkrest) 1390 if env is None: 1391 return defval 1392 1393 mtsegs = [] 1394 for env1 in env: 1395 tsegs = self._getprops(deriv, env1).get(pkey) 1396 mtsegs.append(tsegs) 1397 1398 if self._pvaltf: 1399 pval = self._pvaltf(mtsegs, pkey, dkey, env, 1400 dkrest, pkrest, self) 1401 else: 1402 pval = None 1403 for tsegs in mtsegs: 1404 if tsegs is not None: 1405 pval = simplify("".join([x[0] for x in tsegs])) 1406 break 1407 1408 return pval if pval is not None else defval 1409 1410 1411 def _getprops (self, deriv, env1): 1412 1413 # Try to fetch derivation from cache. 1414 props = self._props_by_deriv_env1.get((deriv, env1)) 1415 if props is not None: 1416 return props 1417 1418 # Construct raw derivation and extract key-value pairs. 1419 rprops = self._derive(deriv, env1) 1420 props = dict([(x, self._simple_segs(y[0])) for x, y in list(rprops.items()) 1421 if not y[1].canceling]) 1422 1423 # Internally transform keys if requested. 1424 if self._pkeyitf: 1425 nprops = [] 1426 for pkey, segs in list(props.items()): 1427 pkey = self._pkeyitf(pkey) 1428 if pkey is not None: 1429 nprops.append((pkey, segs)) 1430 props = dict(nprops) 1431 1432 self._props_by_deriv_env1[(deriv, env1)] = props 1433 return props 1434 1435 1436 def _derive (self, deriv, env1): 1437 1438 # Try to fetch raw derivation from cache. 1439 dprops = self._raw_props_by_deriv_env1.get((deriv, env1)) 1440 if dprops is not None: 1441 return dprops 1442 1443 # Derivator core. 1444 dprops = {} 1445 env = None 1446 envs_by_name = dict([(x.name, x) for x in deriv.base.envs]) 1447 for env0 in reversed(env1): 1448 env = envs_by_name.get(env0) 1449 if env is None: 1450 continue 1451 for prop in env.props: 1452 fsegs = [] 1453 cprops = dict([(simplify(x.name), ([], x)) for x in prop.keys]) 1454 ownpkeys = set(cprops.keys()) 1455 for seg in prop.segs: 1456 if isinstance(seg, _SDExp): 1457 eprops = self._expand(seg, deriv, env1) 1458 if len(eprops) != 1 or list(eprops.keys())[0]: 1459 if cprops: 1460 for cpkey, csegskey in list(cprops.items()): 1461 if not csegskey[1].cut: 1462 esegskey = eprops.get(cpkey) 1463 if esegskey is not None: 1464 if not esegskey[1].cut: 1465 csegskey[0].extend(esegskey[0]) 1466 else: 1467 cprops.pop(cpkey) 1468 if not cprops: 1469 break 1470 for epkey, esegskey in list(eprops.items()): 1471 if esegskey[1].cut: 1472 cprops[epkey] = esegskey 1473 if not cprops: 1474 break 1475 else: 1476 for pkey, (esegs, key) in list(eprops.items()): 1477 csegs = esegs[:] 1478 if not key.cut: 1479 csegs[:0] = fsegs 1480 cprops[pkey] = (csegs, key) 1481 else: 1482 esegs = list(eprops.values())[0][0] 1483 if cprops: 1484 for pkey, (csegs, key) in list(cprops.items()): 1485 if not key.cut or pkey in ownpkeys: 1486 csegs.extend(esegs) 1487 else: 1488 fsegs.extend(esegs) 1489 elif cprops: 1490 for pkey, (csegs, key) in list(cprops.items()): 1491 if not key.cut or pkey in ownpkeys: 1492 csegs.append(seg) 1493 else: 1494 fsegs.append(seg) 1495 for pkey, (segs, key) in list(cprops.items()): 1496 if key.canceling and pkey in dprops: 1497 osegskey = dprops.get(pkey) 1498 if osegskey is not None and not osegskey[1].canceling: 1499 dprops.pop(pkey) 1500 cprops.pop(pkey) 1501 dprops.update(cprops) 1502 1503 # Eliminate leading and trailing empty text segments. 1504 list(map(self._trim_segs, [x[0] for x in list(dprops.values())])) 1505 1506 self._raw_props_by_deriv_env1[(deriv, env1)] = dprops 1507 return dprops 1508 1509 1510 def _expand (self, exp, pderiv, env1): 1511 # TODO: Discover circular expansion paths. 1512 1513 # Fetch the derivation pointed to by the expansion. 1514 idkey = simplify(exp.ref) 1515 source = pderiv.base.parent 1516 deriv = self._deriv_by_srcname_idkey[source.name].get(idkey) 1517 if deriv is None: 1518 for isource in reversed(source.incsources): 1519 deriv = self._deriv_by_srcname_idkey[isource.name].get(idkey) 1520 if deriv is not None: 1521 break 1522 if deriv is None: 1523 raise SynderError( 1524 _("@info", 1525 "Expansion '%(ref)s' does not reference a known derivation.", 1526 ref=exp.ref, file=source.name, line=exp.pos[0]), 1527 5010, source.name, exp.pos) 1528 1529 # Derive the referenced derivation. 1530 props = self._derive(deriv, env1) 1531 1532 # Drop terminal properties. 1533 nprops = [] 1534 for pkey, (segs, key) in list(props.items()): 1535 if not key.terminal: 1536 nprops.append((pkey, (segs, key))) 1537 props = dict(nprops) 1538 1539 # Apply expansion mask. 1540 if exp.mask is not None: 1541 # Eliminate all obtained keys not matching the mask. 1542 # Reduce by mask those that match. 1543 nprops = [] 1544 for pkey, segskey in list(props.items()): 1545 if len(pkey) != len(exp.mask): 1546 continue 1547 mpkey = "" 1548 for c, cm in zip(pkey, exp.mask): 1549 if cm != _ch_exp_mask_pl: 1550 if cm != c: 1551 mpkey = None 1552 break 1553 else: 1554 mpkey += c 1555 if mpkey is not None: 1556 nprops.append((mpkey, segskey)) 1557 props = dict(nprops) 1558 1559 # Apply key extension. 1560 if exp.kext is not None: 1561 nprops = [] 1562 for pkey, (segs, key) in list(props.items()): 1563 npkey = exp.kext.replace(_ch_exp_kext_pl, pkey) 1564 nprops.append((npkey, (segs, key))) 1565 props = dict(nprops) 1566 1567 # Apply capitalization. 1568 if exp.caps is not None: 1569 chcaps = first_to_upper if exp.caps else first_to_lower 1570 nprops = [] 1571 for pkey, (segs, key) in list(props.items()): 1572 chcapsed = False 1573 nsegs = [] 1574 for seg in segs: 1575 if ( not chcapsed 1576 and isinstance(seg, _SDText) and seg.text.strip() 1577 ): 1578 nseg = copy.copy(seg) 1579 nseg.text = chcaps(seg.text) 1580 chcapsed = True 1581 nsegs.append(nseg) 1582 else: 1583 nsegs.append(seg) 1584 nprops.append((pkey, (nsegs, key))) 1585 props = dict(nprops) 1586 1587 if not props: 1588 raise SynderError( 1589 _("@info", 1590 "Expansion '%(ref)s' expands into nothing.", 1591 ref=exp.ref, file=source.name, line=exp.pos[0]), 1592 5020, source.name, exp.pos) 1593 1594 return props 1595 1596 1597 def _trim_segs (self, segs): 1598 1599 for i0, di, stripf in ( 1600 (0, 1, str.lstrip), 1601 (len(segs) - 1, -1, str.rstrip), 1602 ): 1603 i = i0 1604 while i >= 0 and i < len(segs): 1605 if isinstance(segs[i], _SDText): 1606 segs[i].text = stripf(segs[i].text) 1607 if segs[i].text: 1608 break 1609 i += di 1610 1611 1612 def _simple_segs (self, segs): 1613 1614 # Add sentries. 1615 if not segs: 1616 segs = [_SDText(None, None, "")] 1617 if not isinstance(segs[0], _SDTag): 1618 segs = [_SDTag(None, None)] + segs 1619 if not isinstance(segs[-1], _SDText): 1620 segs = segs + [_SDText(None, None, "")] 1621 1622 # Construct simplified segments: [(text, [tagname...])...] 1623 tsegs = [] 1624 i = 0 1625 while i < len(segs): 1626 # Tag names for the next piece of text. 1627 tags = segs[i].names 1628 # Join contiguous text segments into single plain text. 1629 i += 1 1630 i0 = i 1631 while i < len(segs) and isinstance(segs[i], _SDText): 1632 i += 1 1633 text = "".join([x.text for x in segs[i0:i]]) 1634 # Collect simplified segment. 1635 tsegs.append((text, tags)) 1636 1637 return tsegs 1638 1639 1640 def get (self, ckey, defval=None): 1641 """ 1642 Get property value by compound key. 1643 1644 @param ckey: compound key 1645 @type ckey: string 1646 @param defval: the value to return if the property does not exist 1647 @type defval: string 1648 1649 @returns: the property value 1650 @rtype: string 1651 """ 1652 1653 # Split the compound key into derivation and property keys. 1654 lst = ckey.rsplit(self._ckeysep, 1) 1655 if len(lst) < 2: 1656 return defval 1657 dkey, pkey = lst 1658 1659 return self.get2(dkey, pkey, defval) 1660 1661 1662 def dkeys (self, single=False): 1663 """ 1664 Get list of all derivation keys. 1665 1666 For derivations accessible through more than one derivation 1667 key, by default all of them are included in the result. 1668 If instead only a single random of those keys is wanted 1669 (i.e. strictly one key per derivation), C{single} can 1670 be set to C{True}. 1671 1672 @param single: whether to return a single key for each derivation 1673 @type single: param 1674 1675 @returns: list of derivation keys 1676 @rtype: [string*] 1677 """ 1678 1679 if not single: 1680 return list(self._visible_deriv_by_dkey.keys()) 1681 else: 1682 return self._single_dkeys 1683 1684 1685 def syns (self, dkey): 1686 """ 1687 Get list of key syntagmas by derivation key. 1688 1689 Key syntagmas are always returned in the order in which 1690 they appear in the derivation. 1691 If no derivation is found for the given key, 1692 an empty list is returned. 1693 1694 @param dkey: derivation key 1695 @type dkey: string 1696 1697 @returns: key syntagmas 1698 @rtype: [string*] 1699 """ 1700 1701 dkey, dkrest, deriv = self._resolve_dkey(dkey) 1702 if dkey is None: 1703 return [] 1704 1705 rsyns = [] 1706 for syn in deriv.base.syns: 1707 if not syn.hidden: 1708 tsegs = self._simple_segs(syn.segs) 1709 if self._ksyntf: 1710 rsyn = self._ksyntf(tsegs, dkey, dkrest, self) 1711 else: 1712 rsyn = simplify("".join([x[0] for x in tsegs])) 1713 if rsyn is not None: 1714 rsyns.append(rsyn) 1715 1716 return rsyns 1717 1718 1719 def altdkeys (self, dkey): 1720 """ 1721 Get list of all derivation keys pointing to same entry as given key. 1722 1723 @param dkey: derivation key 1724 @type dkey: string 1725 1726 @returns: alternative derivation keys 1727 @rtype: [string*] 1728 """ 1729 1730 dkey, dkrest, deriv = self._resolve_dkey(dkey) 1731 if dkey is None: 1732 return [] 1733 1734 return deriv.dkeys 1735 1736 1737 def pkeys (self, dkey): 1738 """ 1739 Get set of property keys available for given derivation key. 1740 1741 If no derivation is found for the given key, 1742 an empty set is returned. 1743 1744 @param dkey: derivation key 1745 @type dkey: string 1746 1747 @returns: property keys 1748 @rtype: set(string*) 1749 """ 1750 1751 dkey, dkrest, deriv = self._resolve_dkey(dkey) 1752 if dkey is None: 1753 return set() 1754 1755 env = self._resolve_env(self._env, dkey, dkrest) 1756 if env is None: 1757 return set() 1758 1759 pkeys = set() 1760 for env1 in env: 1761 props = self._getprops(deriv, env1) 1762 pkeys.update(list(props.keys())) 1763 1764 return pkeys 1765 1766 1767 def props (self, dkey): 1768 """ 1769 Get dictionary of property values by property keys for 1770 given derivation key. 1771 1772 If no derivation is found for the given key, 1773 an empty dictionary is returned. 1774 1775 @param dkey: derivation key 1776 @type dkey: string 1777 1778 @returns: property dictionary 1779 @rtype: {(string, string)*} 1780 """ 1781 1782 # TODO: Implement more efficiently. 1783 props = dict([(x, self.get2(dkey, x)) for x in self.pkeys(dkey)]) 1784 1785 return props 1786 1787 1788 def envs (self, dkey): 1789 """ 1790 Get list of all explicitly defined environments in given derivation. 1791 1792 "Explicitly" means environments mentioned in the derivation itself, 1793 and not those inherited through expansions. 1794 1795 @param dkey: derivation key 1796 @type dkey: string 1797 1798 @returns: explicit environment names 1799 @rtype: [string*] 1800 """ 1801 1802 dkey, dkrest, deriv = self._resolve_dkey(dkey) 1803 if dkey is None: 1804 return [] 1805 1806 return [x.name for x in deriv.base.envs] 1807 1808 1809 def source_name (self, dkey): 1810 """ 1811 Get the name of the source in which the derivation is found. 1812 1813 If no derivation is found for the given key, C{None} is returned. 1814 1815 @param dkey: derivation key 1816 @type dkey: string 1817 1818 @returns: name of the source 1819 @rtype: string 1820 """ 1821 1822 dkey, dkrest, deriv = self._resolve_dkey(dkey) 1823 if dkey is None: 1824 return None 1825 1826 srcname = deriv.base.parent.name.split(os.path.sep)[-1] 1827 srcname = srcname[:srcname.rfind(".")] 1828 1829 return srcname 1830 1831 1832 def source_pos (self, dkey): 1833 """ 1834 Get the position in the source where the derivation is found. 1835 1836 Position is a 3-tuple of file path, line and column numbers. 1837 If no derivation is found for the given key, C{None} is returned. 1838 1839 @param dkey: derivation key 1840 @type dkey: string 1841 1842 @returns: source position 1843 @rtype: (string, int, int) 1844 """ 1845 1846 dkey, dkrest, deriv = self._resolve_dkey(dkey) 1847 if dkey is None: 1848 return None 1849 1850 path = deriv.base.parent.name 1851 lno, cno = deriv.base.pos 1852 1853 return path, lno, cno 1854 1855 1856 def keys (self): 1857 """ 1858 Get the list of all compound keys. 1859 1860 @returns: compound keys 1861 @rtype: [string*] 1862 """ 1863 1864 return list(self.keys()) 1865 1866 1867 def values (self): 1868 """ 1869 Get the list of all property values. 1870 1871 @returns: property values 1872 @rtype: [string*] 1873 """ 1874 1875 return list(self.values()) 1876 1877 1878 def items (self): 1879 """ 1880 Get the list of all pairs of compound keys and property values. 1881 1882 @returns: compound keys and property values 1883 @rtype: [(string, string)*] 1884 """ 1885 1886 return list(self.items()) 1887 1888 1889 def __contains__ (self, ckey): 1890 """ 1891 Check if the compound key is present in the derivator. 1892 1893 @returns: C{True} if present, C{False} otherwie 1894 @rtype: bool 1895 """ 1896 1897 return self.get(ckey) is not None 1898 1899 1900 def __getitem__ (self, ckey): 1901 """ 1902 Get property value by compound key, in dictionary notation. 1903 1904 Like L{get}, but raises C{KeyError} if key is not found. 1905 1906 @returns: property value 1907 @rtype: string 1908 """ 1909 1910 res = self.get(ckey) 1911 if res is None: 1912 raise KeyError(ckey) 1913 1914 return res 1915 1916 1917 def __iter__ (self): 1918 """ 1919 Iterate through all compound keys, in random order. 1920 1921 @returns: iterator through compound keys 1922 @rtype: iterator(string) 1923 """ 1924 1925 return iter(self.keys()) 1926 1927 1928 def iterkeys (self): 1929 """ 1930 Iterate through all compound keys, in random order. 1931 1932 @returns: iterator through compound keys 1933 @rtype: iterator(string) 1934 """ 1935 1936 return self._Iterator(self._make_iter(lambda x: x)) 1937 1938 1939 def itervalues (self): 1940 """ 1941 Iterate through all property values, in random order. 1942 1943 @returns: iterator through property values 1944 @rtype: iteratorstring) 1945 """ 1946 1947 return self._Iterator(self._make_iter(lambda x: self.get(x))) 1948 1949 1950 def iteritems (self): 1951 """ 1952 Iterate through all pairs of compound key and property value, 1953 in random order. 1954 1955 @returns: iterator through compound key property value pairs 1956 @rtype: iterator((string, string)) 1957 """ 1958 1959 return self._Iterator(self._make_iter(lambda x: (x, self.get(x)))) 1960 1961 1962 class _Iterator (object): 1963 1964 def __init__ (self, it): 1965 self._it = it 1966 1967 def __iter__ (self): 1968 return self 1969 1970 def __next__ (self): 1971 return self._it() # expected to raise StopIteration on its own 1972 1973 1974 def _make_iter (self, keyf): 1975 1976 it = iter(self._visible_deriv_by_dkey) 1977 gdat = [None, []] # dkey, pkeys 1978 def next (): 1979 while not gdat[1]: 1980 gdat[0] = next(it) # will raise StopIteration 1981 gdat[1] = self.pkeys(gdat[0]) 1982 dkey = gdat[0] 1983 pkey = gdat[1].pop() 1984 return keyf(dkey + self._ckeysep + pkey) 1985 1986 return next 1987 1988 1989 def empty_pcache (self): 1990 1991 self._props_by_deriv_env1 = {} 1992 self._raw_props_by_deriv_env1 = {} 1993