File indexing completed on 2024-11-03 11:24:01
0001 # -*- coding: UTF-8 -*- 0002 0003 """ 0004 Handle entity definitions. 0005 0006 @author: Chusslove Illich (Часлав Илић) <caslav.ilic@gmx.net> 0007 @license: GPLv3 0008 """ 0009 0010 import os 0011 import xml.parsers.expat 0012 0013 from pology import PologyError, _, n_ 0014 from pology.fsops import collect_files_by_ext 0015 from pology.report import warning 0016 0017 0018 def parse_entities (defstr, src=None): 0019 """ 0020 Parse XML entity definitions from given string. 0021 0022 The string should contain only entity definitions in DTD form, 0023 without any prolog or epilogue:: 0024 0025 ... 0026 <!ENTITY foo 'Foo-fum'> 0027 <!ENTITY bar 'Bar-boo'> 0028 ... 0029 0030 If the same entity is defined several times, the last read definition 0031 is taken as final. 0032 0033 @param defstr: entity-defining string 0034 @type defstr: string 0035 0036 @param src: name of the source, for problem reporting 0037 @param src: C{None} or string 0038 0039 @returns: name-value pairs of parsed entities 0040 @rtype: dict 0041 """ 0042 0043 # Equip with prolog and epilogue. 0044 defstr = "<?xml version='1.0' encoding='UTF-8'?>\n" \ 0045 "<!DOCTYPE entityLoader [" + defstr + "]><done/>" 0046 # Parse entities. 0047 entities = {} 0048 def handler (name, is_parameter_entity, value, 0049 base, systemId, publicId, notationName): 0050 entities[name] = value 0051 p = xml.parsers.expat.ParserCreate() 0052 p.EntityDeclHandler = handler 0053 try: 0054 p.Parse(defstr, True) 0055 except xml.parsers.expat.ExpatError as inst: 0056 if src: 0057 raise PologyError( 0058 _("@info error report for a named source", 0059 "%(src)s: %(msg)s", 0060 src=src, msg=inst)) 0061 else: 0062 raise PologyError( 0063 _("@info error report for a string", 0064 "<string>: %(msg)s", 0065 msg=inst)) 0066 0067 return entities 0068 0069 0070 def read_entities (filepath, fcap=False): 0071 """ 0072 Read XML entity definitions from given file path. 0073 0074 Input argument can be a single file path, or a sequence of paths. 0075 Content of each file is parsed by L{parse_entities}. 0076 0077 For each read entity, another entity may be added which has the first 0078 letter converted to upper-case, both in the entity name and value. 0079 See L{fcap_entities} for more details. 0080 0081 @param filepath: path or paths of entity-defining file 0082 @type filepath: string or sequence of strings 0083 @param fcap: whether to add paired first-caps entities 0084 @type fcap: bool 0085 0086 @returns: (name, value) dictionary of parsed entities 0087 @rtype: dict 0088 0089 @see: L{parse_entities} 0090 """ 0091 0092 if isinstance(filepath, str): 0093 fnames = [filepath] 0094 else: 0095 fnames = filepath 0096 0097 entities = {} 0098 for fname in fnames: 0099 # Scoop up file contents, as raw bytes (UTF-8 expected). 0100 ifs = open(fname, "r") 0101 defstr = "".join(ifs.readlines()) 0102 ifs.close() 0103 # Parse entities. 0104 entities.update(parse_entities(defstr, src=fname)) 0105 0106 if fcap: 0107 fcap_entities(entities, update=True) 0108 0109 return entities 0110 0111 0112 def read_entities_by_env (entpathenv, recurse=True, fcap=False): 0113 """ 0114 Read XML entity definitions from directory paths given by 0115 an environment variable. 0116 0117 Directory paths given by environment variable are searched for files with 0118 C{.entities} extension, and all found files are sent to L{read_entities}. 0119 Search through directories can be recursive or non-recursive. 0120 0121 See L{fcap_entities} for use of C{fcap} parameter. 0122 0123 If the environment variable is not set, a warning is output and empty 0124 collection of entities returned. 0125 0126 @param entpathenv: environment variable that holds directory paths 0127 @type entpathenv: string 0128 @param recurse: whether to search directories recursively 0129 @type recurse: bool 0130 @param fcap: whether to add paired first-caps entities 0131 @type fcap: bool 0132 0133 @returns: (name, value) dictionary of parsed entities 0134 @rtype: dict 0135 """ 0136 0137 entities = {} 0138 0139 entpath = os.getenv(entpathenv) 0140 if entpath is None: 0141 warning(_("@info", 0142 "Environment variable with paths to entity definitions " 0143 "'%(envar)s' is not set.", 0144 envar=entpathenv)) 0145 return entities 0146 0147 entfilepaths = collect_files_by_ext(entpath.split(":"), "entities") 0148 entities.update(read_entities(entfilepaths, fcap)) 0149 0150 return entities 0151 0152 0153 def fcap_entities (entities, update=False): 0154 """ 0155 Create paired set of entities with first letters in upper-case. 0156 0157 For each given entity, another entity may be created which has the first 0158 letter converted to upper-case, both in the entity name and value. 0159 Such entity is created only if the original entity has at least one 0160 letter in the name, and the first letter in the name is lower-case. 0161 0162 New entities are either returned in a new dictionary, or are inserted 0163 into the original dictionary, which is then returned. 0164 0165 @param entities: (name, value) dictionary of entities 0166 @type entities: dict 0167 @param update: whether to insert new entities into C{entities} itself 0168 @type update: bool 0169 0170 @returns: (name, value) dictionary of upper-case entities 0171 @rtype: dict 0172 """ 0173 0174 if update: 0175 fcaps = entities 0176 iterents = list(entities.items()) 0177 else: 0178 fcaps = {} 0179 iterents = iter(entities.items()) 0180 0181 for name, value in iterents: 0182 # Upper-case entity name. 0183 p = 0 0184 while p < len(name) and not name[p].isalpha(): 0185 p += 1 0186 if p >= len(name): # nothing to upper-case, skip 0187 continue 0188 if not name[p].islower(): # first letter is not lower-case, skip 0189 continue 0190 name = name[:p] + name[p].upper() + name[p + 1:] 0191 0192 # Upper-case entity value, if possible. 0193 p = 0 0194 while p < len(value) and not value[p].isalpha(): 0195 p += 1 0196 if p < len(value): 0197 value = value[:p] + value[p].upper() + value[p + 1:] 0198 0199 fcaps[name] = value 0200 0201 return fcaps 0202