File indexing completed on 2024-12-01 13:47:54

0001 # -*- coding: UTF-8 -*-
0002 
0003 """
0004 Transform ASCII single and double quotes into fancy counterparts.
0005 
0006 Documented in C{doc/user/sieving.docbook}.
0007 
0008 @author: Chusslove Illich (Часлав Илић) <caslav.ilic@gmx.net>
0009 @license: GPLv3
0010 """
0011 
0012 import os
0013 import re
0014 
0015 from pology import _, n_
0016 from pology.comments import manc_parse_flag_list
0017 from pology.escape import split_escaped
0018 from pology.report import report
0019 from pology.sieve import SieveError
0020 
0021 
0022 def setup_sieve (p):
0023 
0024     p.set_desc(_("@info sieve discription",
0025     "Transform ASCII single and double quotes into fancy counterparts."
0026     ))
0027 
0028     p.add_param("single", str,
0029                 metavar=_("@info sieve parameter value placeholder", "QUOTES"),
0030                 desc=_("@info sieve parameter discription",
0031     "Opening and closing single quote (two characters)."
0032     ))
0033     p.add_param("double", str,
0034                 metavar=_("@info sieve parameter value placeholder", "QUOTES"),
0035                 desc=_("@info sieve parameter discription",
0036     "Opening and closing double quote (two characters)."
0037     ))
0038     p.add_param("longsingle", str,
0039                 metavar=_("@info sieve parameter value placeholder",
0040                           "OPEN,CLOSED"),
0041                 desc=_("@info sieve parameter discription",
0042     "Opening and closing single quote longer than single character."
0043     ))
0044     p.add_param("longdouble", str,
0045                 metavar=_("@info sieve parameter value placeholder", 
0046                           "OPEN,CLOSED"),
0047                 desc=_("@info sieve parameter discription",
0048     "Opening and closing double quote longer than single character."
0049     ))
0050 
0051 
0052 # Pipe flag used to manually prevent transformation into fancy quotes.
0053 _flag_no_fancy_quote = "no-fancy-quote"
0054 
0055 
0056 class Sieve (object):
0057 
0058     def __init__ (self, params):
0059 
0060         self.nrepl_single = 0
0061         self.nrepl_double = 0
0062 
0063         # Pair of single quotes.
0064         self.singles = ()
0065         if params.single is not None and params.longsingle is not None:
0066             raise SieveError(
0067                 _("@info",
0068                   "Both single- and multi-character replacement of "
0069                   "single quotes issued."))
0070         if params.single is not None:
0071             quotes = params.single
0072             if len(quotes) != 2:
0073                 raise SieveError(
0074                     _("@info",
0075                       "Invalid specification of single quotes (%(quotes)s), "
0076                       "expected two characters.",
0077                       quotes=quotes))
0078             self.singles = (quotes[0], quotes[1])
0079         elif params.longsingle is not None:
0080             quotes = split_escaped(params.longsingle, ",")
0081             if len(quotes) != 2:
0082                 raise SieveError(
0083                     _("@info",
0084                       "Invalid specification of single quotes (%(quotes)s), "
0085                       "expected two strings.",
0086                       quotes=quotes))
0087             self.singles = (quotes[0], quotes[1])
0088 
0089         # Pair of double quotes.
0090         self.doubles = ()
0091         if params.double is not None and params.longdouble is not None:
0092             raise SieveError(
0093                 _("@info",
0094                   "Both single- and multi-character replacement of "
0095                   "double quotes issued."))
0096         if params.double is not None:
0097             quotes = params.double
0098             if len(quotes) != 2:
0099                 raise SieveError(
0100                     _("@info",
0101                       "Invalid specification of double quotes (%(quotes)s), "
0102                       "expected two characters.",
0103                       quotes=quotes))
0104             self.doubles = (quotes[0], quotes[1])
0105         elif params.longdouble is not None:
0106             quotes = split_escaped(params.longdouble, ",")
0107             if len(quotes) != 2:
0108                 raise SieveError(
0109                     _("@info",
0110                       "Invalid specification of double quotes '%(quotes)s', "
0111                       "expected two strings.",
0112                       quotes=quotes))
0113             self.doubles = (quotes[0], quotes[1])
0114 
0115 
0116     def process (self, msg, cat):
0117 
0118         # Skip the message when told so.
0119         if _flag_no_fancy_quote in manc_parse_flag_list(msg, "|"):
0120             return
0121 
0122         # Skip the message if special by context (one of meta-messages).
0123         if _spec_msgctxt_rx.search(msg.msgctxt or ""):
0124             return
0125 
0126         # Skip the message if auto comments identify it as literal user input.
0127         for cmnt in msg.auto_comment:
0128             cmnt = cmnt.lower()
0129             # - extracted by KDE's xml2pot
0130             if "tag:" in cmnt:
0131                 tag = cmnt[cmnt.find(":")+1:].strip()
0132                 if tag in _xml_literal_tags:
0133                     return
0134 
0135         # Modify quotes in all translations.
0136         for i in range(len(msg.msgstr)):
0137             text = msg.msgstr[i]
0138             if self.singles:
0139                 text, nrepl = equip_fancy_quotes(text, "'", self.singles)
0140                 self.nrepl_single += nrepl
0141             if self.doubles:
0142                 text, nrepl = equip_fancy_quotes(text, '"', self.doubles)
0143                 self.nrepl_double += nrepl
0144             msg.msgstr[i] = text
0145 
0146 
0147     def finalize (self):
0148 
0149         nrepl_both = self.nrepl_single + self.nrepl_double
0150         if nrepl_both > 0:
0151             msg = n_("@info:progress",
0152                      "Replaced %(num)d pair of quotes in translation "
0153                      "(single+double: %(nums)d+%(numd)d).",
0154                      "Replaced %(num)d pairs of quotes in translation "
0155                      "(single+double: %(nums)d+%(numd)d).",
0156                      num=nrepl_both,
0157                      nums=self.nrepl_single, numd=self.nrepl_double)
0158             report("===== " + msg)
0159 
0160 
0161 # Regular expression for matching special messages by context.
0162 _spec_msgctxt = (
0163     "qtdt-format",
0164 )
0165 _spec_msgctxt_rx = re.compile("|".join(_spec_msgctxt))
0166 
0167 # Regular expression for matching no-modify nodes in XML markup.
0168 _xml_literal_tags = (
0169     # HTML
0170     "tt", "code",
0171     # KUIT
0172     "icode", "bcode",
0173     # Docbook
0174     "screen", "screenco", "userinput", "code", "literal", "markup",
0175     "programlisting", "programlistingco", "returnvalue", "command",
0176     "synopsis", "cmdsynopsis", "synopfragment", "synopfragmentref",
0177     "guilabel", "guimenuitem", "action", "errorname", 
0178 )
0179 _xml_literal_rx = re.compile(r"< *(%s)\b" % "|".join(_xml_literal_tags))
0180 
0181 def equip_fancy_quotes (text, squote, fquotes):
0182     """
0183     Heuristically replace simple with fancy quotes (eg. "foo" with “foo”).
0184 
0185     The replacement tries to avoid quotes in markup (e.g. XML attributes),
0186     and other situations where the original quoting should not be touched.
0187 
0188     @param text: the text to equip with fancy quotes
0189     @type text: string
0190 
0191     @param squote: the simple quote, used for both opening and closing
0192     @type squote: string
0193 
0194     @param fquotes: the opening and closing fancy quote
0195     @type fquotes: two-tuple of strings
0196 
0197     @returns: the modified text and number of fancy pairs replaced
0198     @rtype: string, int
0199     """
0200 
0201     # Quick check: simple quote valid, any simple quotes at all?
0202     if not squote or squote not in text:
0203         return text, 0
0204 
0205     nrepl = 0
0206     no_mod_end = ""
0207     i_after_close = 0
0208     i_open = -1
0209     i = 0
0210     ntext = ""
0211     lensq = len(squote)
0212     while i < len(text):
0213 
0214         # Calculate the length of no-modify segment if it starts here.
0215         no_mod_len = 0
0216 
0217         # - known XML nodes which are literal user input to computer
0218         m = _xml_literal_rx.match(text, i)
0219         if m:
0220             tag = m.group(1)
0221             end_rx = re.compile(r"\b%s *>" % tag)
0222             m = end_rx.search(text, i + len(tag))
0223             if m: # skip only if closed, otherwise stay put
0224                 no_mod_len = m.span()[1] - i
0225 
0226         # - within XML tags
0227         elif text[i] == "<":
0228             ic = text.find(">", i + 1)
0229             if ic >= 0: # markup only if closed, otherwise stay put
0230                 no_mod_len = ic - i + 1
0231 
0232         # - text in special parenthesis
0233         elif text[i] in ("{", "["):
0234             qopen = text[i]
0235             if qopen == "{":
0236                 qclose = "}"
0237             else:
0238                 qclose = "]"
0239             # Look for balanced pair.
0240             nopen = 1
0241             ic = i + 1
0242             while ic < len(text) and nopen > 0:
0243                 if text[ic] == qopen:
0244                     nopen += 1
0245                 elif text[ic] == qclose:
0246                     nopen -= 1
0247                 ic += 1
0248             if nopen == 0: # special only if closed, otherwise stay put
0249                 no_mod_len = ic - i
0250 
0251         # - simple quotes with no text in between
0252         elif text[i:i + 2 * lensq] == squote + squote:
0253             no_mod_len = 2 * lensq
0254 
0255         # - ASCII quote just after a number, and no opening quote so far
0256         # (may be a unit: inch, foot, minute, second)
0257         elif i_open < 0 and text[i:i + 1].isdigit():
0258             if text[i + 1:i + 1 + lensq] == squote:
0259                 no_mod_len = 1 + lensq
0260 
0261         # - simple quote in between two letters, may be a contraction
0262         elif (    text[i:i + 1].isalpha()
0263               and text[i + 1:i + 1 + lensq] == squote
0264               and text[i + 1 + lensq:i + 1 + lensq + 1].isalpha()
0265         ):
0266             no_mod_len = 1 + lensq + 1
0267 
0268         # Advance past the end of no-modify segment if found.
0269         if no_mod_len > 0:
0270             i += no_mod_len
0271 
0272         # If at simple quote.
0273         elif text[i:i+len(squote)] == squote:
0274             if i_open < 0:
0275                 # No quote opened, this is opening quote.
0276                 i_open = i # record opening position
0277                 ntext += text[i_after_close:i_open] # append text so far
0278             else:
0279                 # Quote opened beforehand, this is closing quote.
0280                 tseg = text[i_open + len(squote) : i] # quoted segment
0281                 ntext += fquotes[0] + tseg + fquotes[1] # append fancy-quoted
0282                 nrepl += 1 # count added fancy pair
0283                 i_open = -1 # cancel opened state
0284                 i_after_close = i + len(squote) # record position after closing
0285 
0286             # Advance past the simple quote
0287             i += len(squote)
0288 
0289         else:
0290             # Nothing special, advance to next char.
0291             i += 1
0292 
0293     # Append the remaining text.
0294     if i_open >= 0:
0295         # Unpaired opening quote.
0296         ntext += text[i_open:]
0297     else:
0298         # All quotes paired.
0299         ntext += text[i_after_close:]
0300 
0301     return ntext, nrepl
0302