File indexing completed on 2024-12-01 13:47:50

0001 # -*- coding: UTF-8 -*-
0002 
0003 """
0004 Handle entity definitions.
0005 
0006 @author: Chusslove Illich (Часлав Илић) <caslav.ilic@gmx.net>
0007 @license: GPLv3
0008 """
0009 
0010 import os
0011 import xml.parsers.expat
0012 
0013 from pology import PologyError, _, n_
0014 from pology.fsops import collect_files_by_ext
0015 from pology.report import warning
0016 
0017 
0018 def parse_entities (defstr, src=None):
0019     """
0020     Parse XML entity definitions from given string.
0021 
0022     The string should contain only entity definitions in DTD form,
0023     without any prolog or epilogue::
0024 
0025       ...
0026       <!ENTITY foo 'Foo-fum'>
0027       <!ENTITY bar 'Bar-boo'>
0028       ...
0029 
0030     If the same entity is defined several times, the last read definition
0031     is taken as final.
0032 
0033     @param defstr: entity-defining string
0034     @type defstr: string
0035 
0036     @param src: name of the source, for problem reporting
0037     @param src: C{None} or string
0038 
0039     @returns: name-value pairs of parsed entities
0040     @rtype: dict
0041     """
0042 
0043     # Equip with prolog and epilogue.
0044     defstr = "<?xml version='1.0' encoding='UTF-8'?>\n" \
0045              "<!DOCTYPE entityLoader [" + defstr + "]><done/>"
0046     # Parse entities.
0047     entities = {}
0048     def handler (name, is_parameter_entity, value,
0049                  base, systemId, publicId, notationName):
0050         entities[name] = value
0051     p = xml.parsers.expat.ParserCreate()
0052     p.EntityDeclHandler = handler
0053     try:
0054         p.Parse(defstr, True)
0055     except xml.parsers.expat.ExpatError as inst:
0056         if src:
0057             raise PologyError(
0058                 _("@info error report for a named source",
0059                   "%(src)s: %(msg)s",
0060                   src=src, msg=inst))
0061         else:
0062             raise PologyError(
0063                 _("@info error report for a string",
0064                   "&lt;string&gt;: %(msg)s",
0065                   msg=inst))
0066 
0067     return entities
0068 
0069 
0070 def read_entities (filepath, fcap=False):
0071     """
0072     Read XML entity definitions from given file path.
0073 
0074     Input argument can be a single file path, or a sequence of paths.
0075     Content of each file is parsed by L{parse_entities}.
0076 
0077     For each read entity, another entity may be added which has the first
0078     letter converted to upper-case, both in the entity name and value.
0079     See L{fcap_entities} for more details.
0080 
0081     @param filepath: path or paths of entity-defining file
0082     @type filepath: string or sequence of strings
0083     @param fcap: whether to add paired first-caps entities
0084     @type fcap: bool
0085 
0086     @returns: (name, value) dictionary of parsed entities
0087     @rtype: dict
0088 
0089     @see: L{parse_entities}
0090     """
0091 
0092     if isinstance(filepath, str):
0093         fnames = [filepath]
0094     else:
0095         fnames = filepath
0096 
0097     entities = {}
0098     for fname in fnames:
0099         # Scoop up file contents, as raw bytes (UTF-8 expected).
0100         ifs = open(fname, "r")
0101         defstr = "".join(ifs.readlines())
0102         ifs.close()
0103         # Parse entities.
0104         entities.update(parse_entities(defstr, src=fname))
0105 
0106     if fcap:
0107         fcap_entities(entities, update=True)
0108 
0109     return entities
0110 
0111 
0112 def read_entities_by_env (entpathenv, recurse=True, fcap=False):
0113     """
0114     Read XML entity definitions from directory paths given by
0115     an environment variable.
0116 
0117     Directory paths given by environment variable are searched for files with
0118     C{.entities} extension, and all found files are sent to L{read_entities}.
0119     Search through directories can be recursive or non-recursive.
0120 
0121     See L{fcap_entities} for use of C{fcap} parameter.
0122 
0123     If the environment variable is not set, a warning is output and empty
0124     collection of entities returned.
0125 
0126     @param entpathenv: environment variable that holds directory paths
0127     @type entpathenv: string
0128     @param recurse: whether to search directories recursively
0129     @type recurse: bool
0130     @param fcap: whether to add paired first-caps entities
0131     @type fcap: bool
0132 
0133     @returns: (name, value) dictionary of parsed entities
0134     @rtype: dict
0135     """
0136 
0137     entities = {}
0138 
0139     entpath = os.getenv(entpathenv)
0140     if entpath is None:
0141         warning(_("@info",
0142                   "Environment variable with paths to entity definitions "
0143                   "'%(envar)s' is not set.",
0144                   envar=entpathenv))
0145         return entities
0146 
0147     entfilepaths = collect_files_by_ext(entpath.split(":"), "entities")
0148     entities.update(read_entities(entfilepaths, fcap))
0149 
0150     return entities
0151 
0152 
0153 def fcap_entities (entities, update=False):
0154     """
0155     Create paired set of entities with first letters in upper-case.
0156 
0157     For each given entity, another entity may be created which has the first
0158     letter converted to upper-case, both in the entity name and value.
0159     Such entity is created only if the original entity has at least one
0160     letter in the name, and the first letter in the name is lower-case.
0161 
0162     New entities are either returned in a new dictionary, or are inserted
0163     into the original dictionary, which is then returned.
0164 
0165     @param entities: (name, value) dictionary of entities
0166     @type entities: dict
0167     @param update: whether to insert new entities into C{entities} itself
0168     @type update: bool
0169 
0170     @returns: (name, value) dictionary of upper-case entities
0171     @rtype: dict
0172     """
0173 
0174     if update:
0175         fcaps = entities
0176         iterents = list(entities.items())
0177     else:
0178         fcaps = {}
0179         iterents = iter(entities.items())
0180 
0181     for name, value in iterents:
0182         # Upper-case entity name.
0183         p = 0
0184         while p < len(name) and not name[p].isalpha():
0185             p += 1
0186         if p >= len(name): # nothing to upper-case, skip
0187             continue
0188         if not name[p].islower(): # first letter is not lower-case, skip
0189             continue
0190         name = name[:p] + name[p].upper() + name[p + 1:]
0191 
0192         # Upper-case entity value, if possible.
0193         p = 0
0194         while p < len(value) and not value[p].isalpha():
0195             p += 1
0196         if p < len(value):
0197             value = value[:p] + value[p].upper() + value[p + 1:]
0198 
0199         fcaps[name] = value
0200 
0201     return fcaps
0202