File indexing completed on 2024-05-12 17:18:07
0001 # -*- coding: UTF-8 -*- 0002 0003 """ 0004 Various checks for translations into Serbian. 0005 0006 @author: Chusslove Illich (Часлав Илић) <caslav.ilic@gmx.net> 0007 @license: GPLv3 0008 """ 0009 0010 import re 0011 0012 from pology import _, n_ 0013 from pology.diff import adapt_spans 0014 from pology.msgreport import warning_on_msg 0015 0016 # ---------------------------------------- 0017 # Checks for presence of naked Latin segments. 0018 0019 # Directive head for manual GUI references in docs. 0020 _gui_man_dir = "~%" 0021 0022 # Directive head for explicit wrapping to ignore Latin. 0023 _lat_wrap_dir = "~!" 0024 0025 # - segments not to be considered naked 0026 _no_check_lat_rxs = ( 0027 # - explicitly wrapped to ignore 0028 re.compile(_lat_wrap_dir + r"(.)(.*?)\1", re.U|re.I|re.S), 0029 # - format directives 0030 re.compile(r"%(\(\w+\))? ?\d?\$?[\d.]*[a-z]+", re.U|re.I), # C-like 0031 re.compile(r"%\d+", re.U|re.I), # Qt-like, must come after C-like 0032 # - text within these tags 0033 re.compile(r"<\s*(%s)\b.*?\b\1\s*>" % "|".join(""" 0034 bcode command envar filename icode shortcut placeholder style code tt 0035 literal screen option keycap userinput systemitem prompt function 0036 foreignphrase varname programlisting token markup parameter keysym 0037 methodname replaceable sgmltag arg classname type package errorcode 0038 """.split()), re.U|re.I|re.S), 0039 # - some tags are requested without attributes, as otherwise 0040 # Latin-content is allowed inside attributes only. 0041 re.compile(r"<\s*(%s)\s*>\b.*?\b\1\s*>" % "|".join(""" 0042 email link 0043 """.split()), re.U|re.I|re.S), 0044 # - all tags (must come after the above text removed by tags) 0045 re.compile(r"<.*?>", re.U|re.I), 0046 # - entities 0047 re.compile(r"&[\w_:][\w\d._:-]*;", re.U|re.I), 0048 # - command line options 0049 re.compile(r"(?<!\w)--?\w[\w-]*", re.U|re.I), 0050 # - hex numbers 0051 re.compile(r"0x[\dabcdef]*", re.U|re.I), 0052 # - alternatives directives 0053 re.compile(r"~@(.)(.*?)\1(.*?)\1", re.U|re.I|re.S), 0054 # - extension filter, e.g. "*.png|PNG files" 0055 re.compile(r"^.*\*\..*\|", re.U|re.I), 0056 # - URLs and web links 0057 re.compile(r"\S+://\S*[\w&=]", re.U), 0058 re.compile(r"\w{3,}(\.[\w-]{2,})+", re.U), 0059 # - wiki stuff 0060 re.compile(r"\[\[[^\]]*(\||\])", re.U|re.I), 0061 re.compile(r"\[[^\s]*", re.U|re.I), 0062 re.compile(r"\{\{.*?(\||\}\})", re.U|re.I), 0063 # - double escapings 0064 re.compile(r"\\[nt]"), 0065 ) 0066 _no_check_lat_origui_rxs = ( 0067 # - automatic by tags 0068 re.compile(r"<\s*(gui[a-z]+)\b.*?\b\1\s*>", re.U|re.I|re.S), # Docbook 0069 re.compile(r"<\s*(interface)\b.*?\b\1\s*>", re.U|re.I|re.S), # KUIT 0070 # - manually wrapped 0071 re.compile(_gui_man_dir + r"(.)(.*?)\1", re.U|re.I|re.S), 0072 ) 0073 0074 # Warn on naked-Latin if this matches. 0075 _naked_latin_rx = re.compile(r"[a-z][a-z\W]*", re.U|re.I) 0076 0077 # Messages to skip by tags in auto comments. 0078 _auto_cmnt_tag_rx = re.compile(r"^\s*Tag:\s*(%s)\s*$" % "|".join(""" 0079 filename envar programlisting screen command option userinput cmdsynopsis 0080 email errorcode 0081 """.split()), re.U|re.I) 0082 0083 # The hook worker. 0084 def _naked_latin_w (msgstr, msg, cat, origui=False, sideeffect=False): 0085 0086 # Avoid meta-messages. 0087 if ( msg.msgctxt in ("EMAIL OF TRANSLATORS",) 0088 or ( cat.name.endswith(".desktop") 0089 and msg.msgctxt in ("Keywords", "Query")) 0090 ): 0091 if sideeffect: 0092 return 0 0093 else: 0094 return [] 0095 0096 # Avoid specially tagged messages. 0097 for auto_cmnt in msg.auto_comment: 0098 if _auto_cmnt_tag_rx.search(auto_cmnt): 0099 if sideeffect: 0100 return 0 0101 else: 0102 return [] 0103 0104 # Eliminate all no-check segments. 0105 stripped_msgstr = msgstr 0106 if origui: # must come before tag removal 0107 for rx in _no_check_lat_origui_rxs: 0108 stripped_msgstr = rx.sub("", stripped_msgstr) 0109 for rx in _no_check_lat_rxs: 0110 stripped_msgstr = rx.sub("", stripped_msgstr) 0111 0112 matches = list(_naked_latin_rx.finditer(stripped_msgstr)) 0113 if sideeffect: 0114 # Report if any Latin text remained in stripped msgstr. 0115 for m in matches: 0116 warning_on_msg(_("@info", 0117 "Naked Latin segment '%(snippet)s'.", 0118 snippet=m.group(0)), msg, cat) 0119 return len(matches) 0120 else: 0121 # Collect and adapt offending spans. 0122 spans = [m.span() for m in matches] 0123 spans = adapt_spans(msgstr, stripped_msgstr, spans, merge=False) 0124 return spans 0125 0126 0127 def naked_latin (msgstr, msg, cat): 0128 """ 0129 Report spans of Latin letters outside of sanctioned contexts 0130 [type V3C hook]. 0131 0132 Latin segments are allowed within: 0133 - some XML-like tags, e.g. C{tt}, C{code}, C{email}, C{envar}, etc. 0134 - XML-like entities, e.g. C{&foo;} or C{ } 0135 - format directives starting with %-character 0136 - command line options, e.g. C{-o} or C{--foo-bar} 0137 - hexadecimal number starting with C{0x} 0138 - extension filters, e.g. C{"*.png|ПНГ слике"} 0139 - alternative directives {~@/.../.../}, e.g. C{~@/Делфин/Dolphin/} 0140 - links in wiki markup, e.g. C{"...на [http://foo.org страни Фуа]"} 0141 - templates in wiki markup, e.g. C{"{{note|Обавезно проверите...}}"} 0142 - explicit wrapping C{~!/.../}, e.g. C{"...наредбом ~!/grep/..."} 0143 0144 @return: annotated spans 0145 """ 0146 0147 return _naked_latin_w(msgstr, msg, cat) 0148 0149 0150 def naked_latin_origui (msgstr, msg, cat): 0151 """ 0152 Like C{naked_latin}, but allowing original UI references which are 0153 supposed to be automatically resolved [type V3C hook]. 0154 0155 Original UI references are given: 0156 - within XML-like tags: C{gui*} (C{guilabel}, C{guimenu}, etc.), 0157 C{interface}; 0158 e.g. C{"...кликните на <guibutton>Scramble Reactor</guibutton> да..."} 0159 - manually wrapped by C{~%/.../}, 0160 e.g. C{"...кликните на ~%/Scramble Reactor/ да..."} 0161 0162 @return: annotated spans 0163 """ 0164 0165 return _naked_latin_w(msgstr, msg, cat, origui=True) 0166 0167 0168 def naked_latin_se (msgstr, msg, cat): 0169 """ 0170 Side-effect version of C{naked_latin}, issuing warnings to stderr 0171 [type S3C hook]. 0172 0173 @return: number of errors 0174 """ 0175 0176 return _naked_latin_w(msgstr, msg, cat, sideeffect=True) 0177 0178 0179 def naked_latin_origui_se (msgstr, msg, cat): 0180 """ 0181 Side-effect version of C{naked_latin_origui}, issuing warnings to stderr 0182 [type S3C hook]. 0183 0184 @return: number of errors 0185 """ 0186 0187 return _naked_latin_w(msgstr, msg, cat, origui=True, sideeffect=True) 0188