File indexing completed on 2024-10-27 05:14:07
0001 # -*- coding: UTF-8 -*- 0002 0003 """ 0004 Escaping texts in various contexts. 0005 0006 @author: Chusslove Illich (Часлав Илић) <caslav.ilic@gmx.net> 0007 @license: GPLv3 0008 """ 0009 0010 import re 0011 0012 from pology import PologyError, _, n_ 0013 from pology.report import warning 0014 0015 0016 _escapes_c = { 0017 "\a" : "a", 0018 "\b" : "b", 0019 "\f" : "f", 0020 "\n" : "n", 0021 "\r" : "r", 0022 "\t" : "t", 0023 "\v" : "v", 0024 "\"" : "\"", 0025 "\\" : "\\", 0026 } 0027 0028 _unescapes_c = dict([(y, x) for x, y in list(_escapes_c.items())]) 0029 0030 def unescape_c (s): 0031 """ 0032 Unescape text for C-style quoted strings. 0033 0034 Octal and hex sequences (C{\\0OO}, C{\\xHH}) are converted into 0035 the corresponding ASCII characters if less than 128, or else 0036 thrown out (with a warning). 0037 0038 Invalid escape sequences raise exception. 0039 0040 @param s: text to unescape (without wrapping quotes) 0041 @type s: string 0042 0043 @returns: unescaped text 0044 @rtype: string 0045 0046 @see: L{escape_c} 0047 """ 0048 0049 segs = [] 0050 p = 0 0051 while True: 0052 pp = p 0053 p = s.find("\\", p) 0054 if p < 0: 0055 segs.append(s[pp:]) 0056 break 0057 segs.append(s[pp:p]) 0058 p += 1 0059 c = s[p:p + 1] 0060 ec = None 0061 if c in ("x", "0"): 0062 dd = s[p + 1:p + 3] 0063 if len(dd) == 2: 0064 try: 0065 ec = chr(int(dd, c == "x" and 16 or 8)) 0066 p += 3 0067 except: 0068 pass 0069 else: 0070 ec = _unescapes_c.get(c) 0071 if ec is not None: 0072 p += 1 0073 if ec is None: 0074 raise PologyError( 0075 _("@info \"C\" is the C programming language", 0076 "Invalid C escape sequence after '%(snippet)s'.", 0077 snippet=s[:p])) 0078 segs.append(ec) 0079 0080 return type(s)().join(segs) 0081 0082 0083 _escapes_c_wpref = dict([(x, "\\" + y) for x, y in list(_escapes_c.items())]) 0084 0085 def escape_c (s): 0086 """ 0087 Escape text for C-style quoted strings. 0088 0089 @param s: text to escape 0090 @type s: string 0091 0092 @returns: escaped text (without wrapping quotes) 0093 @rtype: string 0094 0095 @see: L{unescape_c} 0096 """ 0097 return type(s)().join([_escapes_c_wpref.get(c, c) for c in s]) 0098 0099 0100 _special_chars_sh = set(r" ~`#$&*()\|[]{};'\"<>?!") 0101 0102 def escape_sh (s): 0103 0104 """ 0105 Escape text for Unix sh-like shell. 0106 0107 Escaped text may be used as a fixed argument in command line, 0108 i.e. the shell will not interpret any part of it in a special way. 0109 It is undefined which of the possible ways to escape are used 0110 (single quotes, double quotes, backslashes). 0111 0112 @param s: text to escape 0113 @type s: string 0114 0115 @returns: escaped text 0116 @rtype: string 0117 """ 0118 0119 if bool(set(s).intersection(_special_chars_sh)): 0120 quote = "'" if "'" not in s else '"' 0121 s = s.replace(quote, "\\" + quote) 0122 s = quote + s + quote 0123 0124 return s 0125 0126 0127 def split_escaped (text, sep): 0128 """ 0129 Like C{split()}, but double-separator is treated as an escape of itself. 0130 0131 @param text: the text to split 0132 @type text: string 0133 0134 @param sep: the separator 0135 @type sep: string 0136 0137 @returns: parsed elements 0138 @rtype: list of strings 0139 """ 0140 0141 alakazoom = "\u0004" 0142 tmp = text.replace(sep + sep, alakazoom).split(sep) 0143 return [x.replace(alakazoom, sep) for x in tmp] 0144