pology/pology/escape.py

0001 # -*- coding: UTF-8 -*-
0002
0003 """
0004 Escaping texts in various contexts.
0005
0006 @author: Chusslove Illich (Часлав Илић) <caslav.ilic@gmx.net>
0007 @license: GPLv3
0008 """
0009
0010 import re
0011
0012 from pology import PologyError, _, n_
0013 from pology.report import warning
0014
0015
0016 _escapes_c = {
0017     "\a" : "a",
0018     "\b" : "b",
0019     "\f" : "f",
0020     "\n" : "n",
0021     "\r" : "r",
0022     "\t" : "t",
0023     "\v" : "v",
0024     "\"" : "\"",
0025     "\\" : "\\",
0026 }
0027
0028 _unescapes_c = dict([(y, x) for x, y in list(_escapes_c.items())])
0029
0030 def unescape_c (s):
0031     """
0032     Unescape text for C-style quoted strings.
0033
0034     Octal and hex sequences (C{\\0OO}, C{\\xHH}) are converted into
0035     the corresponding ASCII characters if less than 128, or else
0036     thrown out (with a warning).
0037
0038     Invalid escape sequences raise exception.
0039
0040     @param s: text to unescape (without wrapping quotes)
0041     @type s: string
0042
0043     @returns: unescaped text
0044     @rtype: string
0045
0046     @see: L{escape_c}
0047     """
0048
0049     segs = []
0050     p = 0
0051     while True:
0052         pp = p
0053         p = s.find("\\", p)
0054         if p < 0:
0055             segs.append(s[pp:])
0056             break
0057         segs.append(s[pp:p])
0058         p += 1
0059         c = s[p:p + 1]
0060         ec = None
0061         if c in ("x", "0"):
0062             dd = s[p + 1:p + 3]
0063             if len(dd) == 2:
0064                 try:
0065                     ec = chr(int(dd, c == "x" and 16 or 8))
0066                     p += 3
0067                 except:
0068                     pass
0069         else:
0070             ec = _unescapes_c.get(c)
0071             if ec is not None:
0072                 p += 1
0073         if ec is None:
0074             raise PologyError(
0075                 _("@info \"C\" is the C programming language",
0076                   "Invalid C escape sequence after '%(snippet)s'.",
0077                   snippet=s[:p]))
0078         segs.append(ec)
0079
0080     return type(s)().join(segs)
0081
0082
0083 _escapes_c_wpref = dict([(x, "\\" + y) for x, y in list(_escapes_c.items())])
0084
0085 def escape_c (s):
0086     """
0087     Escape text for C-style quoted strings.
0088
0089     @param s: text to escape
0090     @type s: string
0091
0092     @returns: escaped text (without wrapping quotes)
0093     @rtype: string
0094
0095     @see: L{unescape_c}
0096     """
0097     return type(s)().join([_escapes_c_wpref.get(c, c) for c in s])
0098
0099
0100 _special_chars_sh = set(r" ~`#$&*()\|[]{};'\"<>?!")
0101
0102 def escape_sh (s):
0103
0104     """
0105     Escape text for Unix sh-like shell.
0106
0107     Escaped text may be used as a fixed argument in command line,
0108     i.e. the shell will not interpret any part of it in a special way.
0109     It is undefined which of the possible ways to escape are used
0110     (single quotes, double quotes, backslashes).
0111
0112     @param s: text to escape
0113     @type s: string
0114
0115     @returns: escaped text
0116     @rtype: string
0117     """
0118
0119     if bool(set(s).intersection(_special_chars_sh)):
0120         quote = "'" if "'" not in s else '"'
0121         s = s.replace(quote, "\\" + quote)
0122         s = quote + s + quote
0123
0124     return s
0125
0126
0127 def split_escaped (text, sep):
0128     """
0129     Like C{split()}, but double-separator is treated as an escape of itself.
0130
0131     @param text: the text to split
0132     @type text: string
0133
0134     @param sep: the separator
0135     @type sep: string
0136
0137     @returns: parsed elements
0138     @rtype: list of strings
0139     """
0140
0141     alakazoom = "\u0004"
0142     tmp = text.replace(sep + sep, alakazoom).split(sep)
0143     return [x.replace(alakazoom, sep) for x in tmp]
0144