File indexing completed on 2024-12-01 13:47:54
0001 # -*- coding: UTF-8 -*- 0002 0003 """ 0004 Transform ASCII single and double quotes into fancy counterparts. 0005 0006 Documented in C{doc/user/sieving.docbook}. 0007 0008 @author: Chusslove Illich (Часлав Илић) <caslav.ilic@gmx.net> 0009 @license: GPLv3 0010 """ 0011 0012 import os 0013 import re 0014 0015 from pology import _, n_ 0016 from pology.comments import manc_parse_flag_list 0017 from pology.escape import split_escaped 0018 from pology.report import report 0019 from pology.sieve import SieveError 0020 0021 0022 def setup_sieve (p): 0023 0024 p.set_desc(_("@info sieve discription", 0025 "Transform ASCII single and double quotes into fancy counterparts." 0026 )) 0027 0028 p.add_param("single", str, 0029 metavar=_("@info sieve parameter value placeholder", "QUOTES"), 0030 desc=_("@info sieve parameter discription", 0031 "Opening and closing single quote (two characters)." 0032 )) 0033 p.add_param("double", str, 0034 metavar=_("@info sieve parameter value placeholder", "QUOTES"), 0035 desc=_("@info sieve parameter discription", 0036 "Opening and closing double quote (two characters)." 0037 )) 0038 p.add_param("longsingle", str, 0039 metavar=_("@info sieve parameter value placeholder", 0040 "OPEN,CLOSED"), 0041 desc=_("@info sieve parameter discription", 0042 "Opening and closing single quote longer than single character." 0043 )) 0044 p.add_param("longdouble", str, 0045 metavar=_("@info sieve parameter value placeholder", 0046 "OPEN,CLOSED"), 0047 desc=_("@info sieve parameter discription", 0048 "Opening and closing double quote longer than single character." 0049 )) 0050 0051 0052 # Pipe flag used to manually prevent transformation into fancy quotes. 0053 _flag_no_fancy_quote = "no-fancy-quote" 0054 0055 0056 class Sieve (object): 0057 0058 def __init__ (self, params): 0059 0060 self.nrepl_single = 0 0061 self.nrepl_double = 0 0062 0063 # Pair of single quotes. 0064 self.singles = () 0065 if params.single is not None and params.longsingle is not None: 0066 raise SieveError( 0067 _("@info", 0068 "Both single- and multi-character replacement of " 0069 "single quotes issued.")) 0070 if params.single is not None: 0071 quotes = params.single 0072 if len(quotes) != 2: 0073 raise SieveError( 0074 _("@info", 0075 "Invalid specification of single quotes (%(quotes)s), " 0076 "expected two characters.", 0077 quotes=quotes)) 0078 self.singles = (quotes[0], quotes[1]) 0079 elif params.longsingle is not None: 0080 quotes = split_escaped(params.longsingle, ",") 0081 if len(quotes) != 2: 0082 raise SieveError( 0083 _("@info", 0084 "Invalid specification of single quotes (%(quotes)s), " 0085 "expected two strings.", 0086 quotes=quotes)) 0087 self.singles = (quotes[0], quotes[1]) 0088 0089 # Pair of double quotes. 0090 self.doubles = () 0091 if params.double is not None and params.longdouble is not None: 0092 raise SieveError( 0093 _("@info", 0094 "Both single- and multi-character replacement of " 0095 "double quotes issued.")) 0096 if params.double is not None: 0097 quotes = params.double 0098 if len(quotes) != 2: 0099 raise SieveError( 0100 _("@info", 0101 "Invalid specification of double quotes (%(quotes)s), " 0102 "expected two characters.", 0103 quotes=quotes)) 0104 self.doubles = (quotes[0], quotes[1]) 0105 elif params.longdouble is not None: 0106 quotes = split_escaped(params.longdouble, ",") 0107 if len(quotes) != 2: 0108 raise SieveError( 0109 _("@info", 0110 "Invalid specification of double quotes '%(quotes)s', " 0111 "expected two strings.", 0112 quotes=quotes)) 0113 self.doubles = (quotes[0], quotes[1]) 0114 0115 0116 def process (self, msg, cat): 0117 0118 # Skip the message when told so. 0119 if _flag_no_fancy_quote in manc_parse_flag_list(msg, "|"): 0120 return 0121 0122 # Skip the message if special by context (one of meta-messages). 0123 if _spec_msgctxt_rx.search(msg.msgctxt or ""): 0124 return 0125 0126 # Skip the message if auto comments identify it as literal user input. 0127 for cmnt in msg.auto_comment: 0128 cmnt = cmnt.lower() 0129 # - extracted by KDE's xml2pot 0130 if "tag:" in cmnt: 0131 tag = cmnt[cmnt.find(":")+1:].strip() 0132 if tag in _xml_literal_tags: 0133 return 0134 0135 # Modify quotes in all translations. 0136 for i in range(len(msg.msgstr)): 0137 text = msg.msgstr[i] 0138 if self.singles: 0139 text, nrepl = equip_fancy_quotes(text, "'", self.singles) 0140 self.nrepl_single += nrepl 0141 if self.doubles: 0142 text, nrepl = equip_fancy_quotes(text, '"', self.doubles) 0143 self.nrepl_double += nrepl 0144 msg.msgstr[i] = text 0145 0146 0147 def finalize (self): 0148 0149 nrepl_both = self.nrepl_single + self.nrepl_double 0150 if nrepl_both > 0: 0151 msg = n_("@info:progress", 0152 "Replaced %(num)d pair of quotes in translation " 0153 "(single+double: %(nums)d+%(numd)d).", 0154 "Replaced %(num)d pairs of quotes in translation " 0155 "(single+double: %(nums)d+%(numd)d).", 0156 num=nrepl_both, 0157 nums=self.nrepl_single, numd=self.nrepl_double) 0158 report("===== " + msg) 0159 0160 0161 # Regular expression for matching special messages by context. 0162 _spec_msgctxt = ( 0163 "qtdt-format", 0164 ) 0165 _spec_msgctxt_rx = re.compile("|".join(_spec_msgctxt)) 0166 0167 # Regular expression for matching no-modify nodes in XML markup. 0168 _xml_literal_tags = ( 0169 # HTML 0170 "tt", "code", 0171 # KUIT 0172 "icode", "bcode", 0173 # Docbook 0174 "screen", "screenco", "userinput", "code", "literal", "markup", 0175 "programlisting", "programlistingco", "returnvalue", "command", 0176 "synopsis", "cmdsynopsis", "synopfragment", "synopfragmentref", 0177 "guilabel", "guimenuitem", "action", "errorname", 0178 ) 0179 _xml_literal_rx = re.compile(r"< *(%s)\b" % "|".join(_xml_literal_tags)) 0180 0181 def equip_fancy_quotes (text, squote, fquotes): 0182 """ 0183 Heuristically replace simple with fancy quotes (eg. "foo" with “foo”). 0184 0185 The replacement tries to avoid quotes in markup (e.g. XML attributes), 0186 and other situations where the original quoting should not be touched. 0187 0188 @param text: the text to equip with fancy quotes 0189 @type text: string 0190 0191 @param squote: the simple quote, used for both opening and closing 0192 @type squote: string 0193 0194 @param fquotes: the opening and closing fancy quote 0195 @type fquotes: two-tuple of strings 0196 0197 @returns: the modified text and number of fancy pairs replaced 0198 @rtype: string, int 0199 """ 0200 0201 # Quick check: simple quote valid, any simple quotes at all? 0202 if not squote or squote not in text: 0203 return text, 0 0204 0205 nrepl = 0 0206 no_mod_end = "" 0207 i_after_close = 0 0208 i_open = -1 0209 i = 0 0210 ntext = "" 0211 lensq = len(squote) 0212 while i < len(text): 0213 0214 # Calculate the length of no-modify segment if it starts here. 0215 no_mod_len = 0 0216 0217 # - known XML nodes which are literal user input to computer 0218 m = _xml_literal_rx.match(text, i) 0219 if m: 0220 tag = m.group(1) 0221 end_rx = re.compile(r"\b%s *>" % tag) 0222 m = end_rx.search(text, i + len(tag)) 0223 if m: # skip only if closed, otherwise stay put 0224 no_mod_len = m.span()[1] - i 0225 0226 # - within XML tags 0227 elif text[i] == "<": 0228 ic = text.find(">", i + 1) 0229 if ic >= 0: # markup only if closed, otherwise stay put 0230 no_mod_len = ic - i + 1 0231 0232 # - text in special parenthesis 0233 elif text[i] in ("{", "["): 0234 qopen = text[i] 0235 if qopen == "{": 0236 qclose = "}" 0237 else: 0238 qclose = "]" 0239 # Look for balanced pair. 0240 nopen = 1 0241 ic = i + 1 0242 while ic < len(text) and nopen > 0: 0243 if text[ic] == qopen: 0244 nopen += 1 0245 elif text[ic] == qclose: 0246 nopen -= 1 0247 ic += 1 0248 if nopen == 0: # special only if closed, otherwise stay put 0249 no_mod_len = ic - i 0250 0251 # - simple quotes with no text in between 0252 elif text[i:i + 2 * lensq] == squote + squote: 0253 no_mod_len = 2 * lensq 0254 0255 # - ASCII quote just after a number, and no opening quote so far 0256 # (may be a unit: inch, foot, minute, second) 0257 elif i_open < 0 and text[i:i + 1].isdigit(): 0258 if text[i + 1:i + 1 + lensq] == squote: 0259 no_mod_len = 1 + lensq 0260 0261 # - simple quote in between two letters, may be a contraction 0262 elif ( text[i:i + 1].isalpha() 0263 and text[i + 1:i + 1 + lensq] == squote 0264 and text[i + 1 + lensq:i + 1 + lensq + 1].isalpha() 0265 ): 0266 no_mod_len = 1 + lensq + 1 0267 0268 # Advance past the end of no-modify segment if found. 0269 if no_mod_len > 0: 0270 i += no_mod_len 0271 0272 # If at simple quote. 0273 elif text[i:i+len(squote)] == squote: 0274 if i_open < 0: 0275 # No quote opened, this is opening quote. 0276 i_open = i # record opening position 0277 ntext += text[i_after_close:i_open] # append text so far 0278 else: 0279 # Quote opened beforehand, this is closing quote. 0280 tseg = text[i_open + len(squote) : i] # quoted segment 0281 ntext += fquotes[0] + tseg + fquotes[1] # append fancy-quoted 0282 nrepl += 1 # count added fancy pair 0283 i_open = -1 # cancel opened state 0284 i_after_close = i + len(squote) # record position after closing 0285 0286 # Advance past the simple quote 0287 i += len(squote) 0288 0289 else: 0290 # Nothing special, advance to next char. 0291 i += 1 0292 0293 # Append the remaining text. 0294 if i_open >= 0: 0295 # Unpaired opening quote. 0296 ntext += text[i_open:] 0297 else: 0298 # All quotes paired. 0299 ntext += text[i_after_close:] 0300 0301 return ntext, nrepl 0302