File indexing completed on 2025-02-09 07:00:46
0001 # -*- coding: UTF-8 -*- 0002 0003 """ 0004 Detect unwanted patterns in translation. 0005 0006 @note: This module is deprecated. 0007 Use L{rules<pology.rules>} through C{check-rules} sieve instead. 0008 0009 @author: Chusslove Illich (Часлав Илић) <caslav.ilic@gmx.net> 0010 @license: GPLv3 0011 """ 0012 0013 import re 0014 import codecs 0015 0016 from pology import _, n_ 0017 from pology.comments import manc_parse_flag_list 0018 from pology.msgreport import report_on_msg, report_msg_content 0019 0020 0021 def bad_patterns (rxmatch=False, casesens=True, patterns=None, fromfiles=None): 0022 """ 0023 Detect unwanted patterns in text [hook factory]. 0024 0025 Patterns can be given both as list of strings, and as a list of file 0026 paths containing patterns (in each file: one pattern per line, 0027 strip leading and trailing whitespace, skip empty lines, #-comments). 0028 Detected patterns are reported to stdout. 0029 0030 If C{rxmatch} is C{False}, patterns are matched by plain substring search, 0031 otherwise as regular expressions. 0032 If C{casesens} is True, matching is case sensitive. 0033 0034 If the message has pipe flag C{no-bad-patterns}, check is skipped. 0035 0036 @param rxmatch: whether to take pattern as regular expression 0037 @type rxmatch: bool 0038 @param casesens: whether the match should be case-sensitive 0039 @type casesens: bool 0040 @param patterns: patterns to match the text 0041 @type patterns: list of strings 0042 @param fromfiles: file paths from which to read patterns 0043 @type fromfiles: list of strings 0044 0045 @return: type S3A hook 0046 @rtype: C{(text, msg, cat)->numerr} 0047 """ 0048 0049 patterns_str = list(patterns or []) 0050 for file in fromfiles: 0051 patterns_str.extend(_load_patterns(file)) 0052 0053 patterns_cmp = _process_patterns(rxmatch=rxmatch, casesens=casesens, 0054 patterns=patterns_str) 0055 0056 def hook (text, msg, cat): 0057 if _flag_no_bad_patterns in manc_parse_flag_list(msg, "|"): 0058 return 0 0059 0060 indspans = _match_patterns(text, patterns_cmp) 0061 for pind, span in indspans: 0062 pstr = patterns_str[pind] 0063 report_on_msg(_("@info", 0064 "Bad pattern '%(pattern)s' detected.", 0065 pattern=pstr), msg, cat) 0066 return len(indspans) 0067 0068 return hook 0069 0070 0071 def bad_patterns_msg (rxmatch=False, casesens=True, 0072 patterns=None, fromfiles=None): 0073 """ 0074 Detect unwanted patterns in translation [hook factory]. 0075 0076 Like L{bad_patterns}, but checks and reports on all C{msgstr} 0077 fields in the message. 0078 0079 @return: type S4A hook 0080 @rtype: C{(msg, cat)->numerr} 0081 """ 0082 0083 return _bad_patterns_msg_w(rxmatch, casesens, patterns, fromfiles, False) 0084 0085 0086 def bad_patterns_msg_sp (rxmatch=False, casesens=True, 0087 patterns=None, fromfiles=None): 0088 """ 0089 Detect unwanted patterns in translation, report spans [hook factory]. 0090 0091 Like L{bad_patterns_msg}, but reports parts instead of writing to stdout. 0092 0093 @return: type V4A hook 0094 @rtype: C{(msg, cat)->parts} 0095 """ 0096 0097 return _bad_patterns_msg_w(rxmatch, casesens, patterns, fromfiles, True) 0098 0099 0100 # Worker for bad_patterns_msg* hooks. 0101 def _bad_patterns_msg_w (rxmatch, casesens, patterns, fromfiles, partrep): 0102 0103 patterns_str = list(patterns or []) 0104 for file in fromfiles or []: 0105 patterns_str.extend(_load_patterns(file)) 0106 0107 patterns_cmp = _process_patterns(rxmatch=rxmatch, casesens=casesens, 0108 patterns=patterns_str) 0109 0110 def hook (msg, cat): 0111 if _flag_no_bad_patterns in manc_parse_flag_list(msg, "|"): 0112 return 0 0113 0114 parts = [] 0115 nbad = 0 0116 for i in range(len(msg.msgstr)): 0117 indspans = _match_patterns(msg.msgstr[i], patterns_cmp) 0118 spans = [] 0119 for pind, span in indspans: 0120 emsg = _("@info", 0121 "Bad pattern '%(pattern)s' detected.", 0122 pattern=patterns_str[pind]) 0123 spans.append(span + (emsg,)) 0124 nbad += 1 0125 if spans: 0126 parts.append(("msgstr", i, spans)) 0127 0128 if partrep: 0129 return parts 0130 else: 0131 if parts: 0132 report_msg_content(msg, cat, highlight=parts, delim=("-" * 20)) 0133 return nbad 0134 0135 return hook 0136 0137 0138 # Pipe flag used to manually prevent matching for a particular message. 0139 _flag_no_bad_patterns = "no-bad-patterns" 0140 0141 0142 # Load pattern string from the file: 0143 # one pattern per non-empty line in the file, 0144 # leading and trailing whitespace stripped, 0145 # #-comments possible. 0146 def _load_patterns (filepath): 0147 0148 ifl = codecs.open(filepath, "r", "UTF-8") 0149 0150 rem_cmnt_rx = re.compile(r"#.*") 0151 patterns = [] 0152 for line in ifl.readlines(): 0153 line = rem_cmnt_rx.sub("", line).strip() 0154 if line: 0155 patterns.append(line) 0156 0157 return patterns 0158 0159 0160 # Process given list of pattern strings. 0161 # If rxmatch is True, patterns are compiled into regexes. 0162 # If casesens is False, re.I flag is used in regex compilation, or 0163 # if regex patterns are not requested, patterns are lower-cased. 0164 def _process_patterns (patterns, rxmatch=False, casesens=True): 0165 0166 patterns_cmp = [] 0167 if rxmatch: 0168 rx_flags = re.U 0169 if not casesens: 0170 rx_flags |= re.I 0171 for pattern in patterns: 0172 patterns_cmp.append(re.compile(pattern, rx_flags)) 0173 else: 0174 for pattern in patterns: 0175 if not casesens: 0176 patterns_cmp.append(pattern.lower()) 0177 else: 0178 patterns_cmp.append(pattern) 0179 0180 return patterns_cmp 0181 0182 0183 # Try to match the text by all patterns in the list. 0184 # A pattern can be either a plain string for substring search, 0185 # or a compiled regular expression. 0186 # Returns a list of (pattern_index, span) tuples for patterns that matched. 0187 def _match_patterns (text, patterns): 0188 0189 matched_patterns = [] 0190 for i in range(len(patterns)): 0191 pattern = patterns[i] 0192 0193 span = None 0194 if isinstance(pattern, str): 0195 p = text.find(pattern) 0196 if p >= 0: 0197 span = (p, p + len(pattern)) 0198 else: 0199 m = pattern.search(text) 0200 if m: 0201 span = m.span() 0202 0203 if span: 0204 matched_patterns.append((i, span)) 0205 0206 return matched_patterns 0207