File indexing completed on 2024-11-03 05:12:59
0001 # -*- coding: UTF-8 -*- 0002 0003 """ 0004 Check validity of translation in catalogs within KDE Translation Project. 0005 0006 Documented in C{doc/user/sieving.docbook}. 0007 0008 @author: Chusslove Illich (Часлав Илић) <caslav.ilic@gmx.net> 0009 @license: GPLv3 0010 """ 0011 0012 import os 0013 import re 0014 0015 from pology import _, n_ 0016 from pology.markup import flag_no_check_markup 0017 from pology.escape import escape_c 0018 from pology.msgreport import report_on_msg_hl, report_msg_content 0019 from pology.msgreport import report_msg_to_lokalize 0020 from pology.normalize import identify 0021 from pology.report import report, format_item_list 0022 from pology.sieve import add_param_poeditors 0023 from pology.sieve import SieveError, SieveCatalogError, parse_sieve_flags 0024 from pology.proj.kde.cattype import get_project_subdir 0025 from pology.proj.kde.cattype import is_txt_cat, is_qt_cat, is_docbook_cat 0026 from pology.proj.kde.cattype import is_html_cat, is_unknown_cat 0027 0028 0029 def setup_sieve (p): 0030 0031 p.set_desc(_("@info sieve discription", 0032 "Check validity of messages in catalogs within KDE Translation Project." 0033 )) 0034 p.add_param("strict", bool, defval=False, 0035 desc=_("@info sieve parameter discription", 0036 "Check translations strictly: report problems in translation regardless " 0037 "of whether original itself is valid (default is to check translation " 0038 "only if original passes checks)." 0039 )) 0040 chnames = list(_known_checks.keys()) 0041 chnames.sort() 0042 p.add_param("check", str, seplist=True, 0043 metavar=_("@info sieve parameter value placeholder", 0044 "KEYWORD,..."), 0045 desc=_("@info sieve parameter discription", 0046 "Run only this check instead of all (currently available: %(chklist)s). " 0047 "Several checks can be specified as a comma-separated list.", 0048 chklist=format_item_list(chnames) 0049 )) 0050 p.add_param("showmsg", bool, defval=False, 0051 desc=_("@info sieve parameter discription", 0052 "Also show the full message that had some problems." 0053 )) 0054 add_param_poeditors(p) 0055 0056 0057 class Sieve (object): 0058 0059 def __init__ (self, params): 0060 0061 self.strict = params.strict 0062 self.showmsg = params.showmsg 0063 self.lokalize = params.lokalize 0064 0065 self.selected_checks = None 0066 if params.check is not None: 0067 unknown_checks = [] 0068 for chname in params.check: 0069 if chname not in _known_checks: 0070 unknown_checks.append(chname) 0071 if unknown_checks: 0072 fmtchecks = format_item_list(unknown_checks) 0073 raise SieveError( 0074 _("@info", 0075 "Unknown checks selected: %(chklist)s.", 0076 chklist=fmtchecks)) 0077 self.selected_checks = set(params.check) 0078 0079 # Indicators to the caller: 0080 self.caller_sync = False # no need to sync catalogs to the caller 0081 self.caller_monitored = False # no need for monitored messages 0082 0083 self.nproblems = 0 0084 0085 0086 def process_header (self, hdr, cat): 0087 0088 # Collect catalog data for determining type. 0089 cname = cat.name 0090 csubdir = get_project_subdir(cat.filename) 0091 if not csubdir: 0092 raise SieveCatalogError( 0093 _("@info", 0094 "Cannot determine project subdirectory " 0095 "of the catalog '%(file)s'.", 0096 file=cat.filename)) 0097 0098 # Select checks applicable to current catalog. 0099 self.current_checks = [] 0100 0101 def add_checks (names): 0102 if self.selected_checks is not None: 0103 names = set(names).intersection(self.selected_checks) 0104 for name in names: 0105 self.current_checks.append(_known_checks[name]) 0106 0107 if is_txt_cat(cname, csubdir): 0108 add_checks(["nots", "keywlist"]) 0109 elif is_qt_cat(cname, csubdir): 0110 add_checks(["qtmarkup", "qtdt", "nots"]) 0111 elif is_docbook_cat(cname, csubdir): 0112 add_checks(["dbmarkup", "nots"]) 0113 elif is_html_cat(cname, csubdir): 0114 add_checks(["htmlmarkup", "nots"]) 0115 elif is_unknown_cat(cname, csubdir): 0116 add_checks([]) 0117 else: # default to native KDE4 catalog 0118 add_checks(["kde4markup", "qtdt", "trcredits", "plrunq"]) 0119 add_checks(["catspec"]) # to all catalogs, will select internally 0120 0121 # Reset catalog progress cache, available to checks. 0122 self.pcache = { 0123 "strict": self.strict, 0124 } 0125 0126 0127 def process (self, msg, cat): 0128 0129 if not msg.translated: 0130 return 0131 0132 highlight = [] 0133 for check in self.current_checks: 0134 self.nproblems += check(msg, cat, self.pcache, highlight) 0135 0136 if highlight: 0137 if self.showmsg: 0138 report_msg_content(msg, cat, highlight=highlight, 0139 delim=("-" * 20)) 0140 else: 0141 report_on_msg_hl(highlight, msg, cat) 0142 if self.lokalize: 0143 report_msg_to_lokalize(msg, cat, highlight) 0144 0145 0146 def finalize (self): 0147 0148 if self.nproblems > 0: 0149 if not self.strict: 0150 msg = n_("@info:progress TP stands for Translation Project", 0151 "Found %(num)d problem in KDE TP translations.", 0152 "Found %(num)d problems in KDE TP translations.", 0153 num=self.nproblems) 0154 else: 0155 msg = n_("@info:progress", 0156 "Found %(num)d problem in " 0157 "KDE TP translations (strict mode).", 0158 "Found %(num)d problems in " 0159 "KDE TP translations (strict mode).", 0160 num=self.nproblems) 0161 report("===== " + msg) 0162 0163 0164 # -------------------------------------- 0165 # Helpers for checks. 0166 0167 # Memoizer for hook factories. 0168 class _FuncallMemoizer (object): 0169 0170 def __init__ (self): 0171 0172 self._cache = {} 0173 0174 def __call__ (self, func, *args, **kwargs): 0175 0176 ckey = args + tuple(sorted(kwargs.items())) 0177 if ckey in self._cache: 0178 value = self._cache[ckey] 0179 else: 0180 value = func(*args, **kwargs) 0181 self._cache[ckey] = value 0182 return value 0183 0184 0185 # Map of checks by name, 0186 # updated at point of definition of the check. 0187 _known_checks = {} 0188 0189 # -------------------------------------- 0190 # Check for KDE4 markup. 0191 0192 from pology.markup import validate_kde4_l1 0193 0194 _tsfence = "|/|" 0195 0196 def _check_kde4markup (msg, cat, pcache, hl): 0197 0198 strict = pcache.get("strict", False) 0199 0200 # Do not check markup if: 0201 # - the check is explicitly skipped for this message 0202 # - the original is bad and not running in strict mode 0203 if flag_no_check_markup in parse_sieve_flags(msg): 0204 return 0 0205 if not strict: 0206 if ( validate_kde4_l1(msg.msgid, ents=[]) 0207 or validate_kde4_l1(msg.msgid_plural or "", ents=[]) 0208 ): 0209 return 0 0210 0211 nproblems = 0 0212 for i in range(len(msg.msgstr)): 0213 msgstr = msg.msgstr[i] 0214 0215 lst = msgstr.split(_tsfence, 1) 0216 msgstr = lst[0] 0217 msgscript = "" 0218 if len(lst) == 2: 0219 # FIXME: No point in checking the scripted part as it is, 0220 # since calls may be used to modify markup in special ways. 0221 # Perhaps it would work to remove calls and check what's left? 0222 #msgscript = lst[1] 0223 pass 0224 0225 for text in (msgstr, msgscript): 0226 spans = validate_kde4_l1(text, ents=[]) 0227 if spans: 0228 nproblems += len(spans) 0229 hl.append(("msgstr", i, spans)) 0230 0231 return nproblems 0232 0233 _known_checks["kde4markup"] = _check_kde4markup 0234 0235 # -------------------------------------- 0236 # Check for Qt markup. 0237 0238 from pology.markup import validate_qtrich_l1 0239 0240 def _check_qtmarkup (msg, cat, pcache, hl): 0241 0242 strict = pcache.get("strict", False) 0243 0244 if flag_no_check_markup in parse_sieve_flags(msg): 0245 return 0 0246 if not strict: 0247 if ( validate_qtrich_l1(msg.msgid, ents=[]) 0248 or validate_qtrich_l1(msg.msgid_plural or "", ents=[]) 0249 ): 0250 return 0 0251 0252 nproblems = 0 0253 for i in range(len(msg.msgstr)): 0254 spans = validate_qtrich_l1(msg.msgstr[i], ents=[]) 0255 if spans: 0256 nproblems += len(spans) 0257 hl.append(("msgstr", i, spans)) 0258 0259 return nproblems 0260 0261 _known_checks["qtmarkup"] = _check_qtmarkup 0262 0263 # -------------------------------------- 0264 # Check for Docbook markup. 0265 0266 from pology.markup import check_docbook4_msg 0267 0268 def _check_dbmarkup (msg, cat, pcache, hl): 0269 0270 check1 = pcache.get("check_dbmarkup_hook") 0271 if not check1: 0272 strict = pcache.get("strict", False) 0273 check1 = check_docbook4_msg(strict=strict, entities=None) 0274 pcache["check_dbmarkup_hook"] = check1 0275 0276 hl1 = check1(msg, cat) 0277 hl.extend(hl1) 0278 nproblems = sum(len(x[2]) for x in hl1) 0279 0280 return nproblems 0281 0282 _known_checks["dbmarkup"] = _check_dbmarkup 0283 0284 # -------------------------------------- 0285 # Check for HTML markup. 0286 0287 from pology.markup import validate_html_l1 0288 0289 def _check_htmlmarkup (msg, cat, pcache, hl): 0290 0291 strict = pcache.get("strict", False) 0292 0293 if flag_no_check_markup in parse_sieve_flags(msg): 0294 return 0 0295 if not strict: 0296 if ( validate_html_l1(msg.msgid, ents=[]) 0297 or validate_html_l1(msg.msgid_plural or "", ents=[]) 0298 ): 0299 return 0 0300 0301 nproblems = 0 0302 for i in range(len(msg.msgstr)): 0303 spans = validate_html_l1(msg.msgstr[i], ents=[]) 0304 if spans: 0305 nproblems += len(spans) 0306 hl.append(("msgstr", i, spans)) 0307 0308 return nproblems 0309 0310 _known_checks["htmlmarkup"] = _check_htmlmarkup 0311 0312 # -------------------------------------- 0313 # Check for no scripting in dumb messages. 0314 0315 def _check_nots (msg, cat, pcache, hl): 0316 0317 nproblems = 0 0318 for i in range(len(msg.msgstr)): 0319 msgstr = msg.msgstr[i] 0320 p = msgstr.find(_tsfence) 0321 if p >= 0: 0322 nproblems += 1 0323 hl.append(("msgstr", i, 0324 [(p, p + len(_tsfence), 0325 _("@info", 0326 "Dumb message, translation cannot be scripted."))])) 0327 0328 return nproblems 0329 0330 _known_checks["nots"] = _check_nots 0331 0332 # -------------------------------------- 0333 # Qt datetime format messages. 0334 0335 _qtdt_flag = "qtdt-format" 0336 0337 _qtdt_clean_rx = re.compile(r"'.*?'") 0338 _qtdt_split_rx = re.compile(r"\W+", re.U) 0339 0340 def _qtdt_parse (text): 0341 0342 text = _qtdt_clean_rx.sub("", text) 0343 fields = [x for x in _qtdt_split_rx.split(text) if x] 0344 return fields 0345 0346 0347 def _is_qtdt_msg (msg): 0348 0349 return ( (_qtdt_flag in (msg.msgctxt or "").lower()) 0350 or (_qtdt_flag in msg.flag)) 0351 0352 0353 # Worker for check_qtdt* hooks. 0354 def _check_qtdt_w (msgstr, msg, cat): 0355 0356 if not _is_qtdt_msg(msg): 0357 return [] 0358 0359 # Get format fields from the msgid. 0360 msgid_fmts = _qtdt_parse(msg.msgid) 0361 0362 # Expect the same format fields in msgstr. 0363 msgstr_fmts = _qtdt_parse(msgstr) 0364 spans = [] 0365 if set(msgid_fmts) != set(msgstr_fmts): 0366 errmsg = _("@info", 0367 "Qt date-format mismatch: " 0368 "original contains fields {%(fieldlist1)s} " 0369 "while translation contains {%(fieldlist2)s}.", 0370 fieldlist1=format_item_list(sorted(msgid_fmts)), 0371 fieldlist2=format_item_list(sorted(msgstr_fmts))) 0372 spans.append((None, None, errmsg)) 0373 0374 return spans 0375 0376 0377 # Pass-through test hook (for external use). 0378 def check_qtdt (msgstr, msg, cat): 0379 """ 0380 Check validity of translation if the message is a Qt date-time format 0381 [type S3C hook]. 0382 0383 TODO: Document further. 0384 """ 0385 0386 spans = _check_qtdt_w(msgstr, msg, cat) 0387 if spans: 0388 report_on_msg(spans[0][-1], msg, cat) 0389 return False 0390 else: 0391 return True 0392 0393 0394 # Span-reporting test hook (for external use). 0395 def check_qtdt_sp (msgstr, msg, cat): 0396 """ 0397 Check validity of translation if the message is a Qt date-time format 0398 [type V3C hook]. 0399 0400 Span reporting version of L{check_qtdt}. 0401 """ 0402 0403 return _check_qtdt_w(msgstr, msg, cat) 0404 0405 0406 # Internal check for this sieve's use. 0407 def _check_qtdt (msg, cat, pcache, hl): 0408 0409 if not _is_qtdt_msg(msg): 0410 return 0 0411 0412 nproblems = 0 0413 for i in range(len(msg.msgstr)): 0414 msgstr = msg.msgstr[i] 0415 spans = _check_qtdt_w(msgstr, msg, cat) 0416 if spans: 0417 nproblems += 1 0418 hl.append(("msgstr", i, spans)) 0419 0420 return nproblems 0421 0422 _known_checks["qtdt"] = _check_qtdt 0423 0424 # -------------------------------------- 0425 # Check for runtime translator data. 0426 0427 _trcredit_name_ctxt = "NAME OF TRANSLATORS" 0428 _trcredit_email_ctxt = "EMAIL OF TRANSLATORS" 0429 0430 _trcredit_ctxts = set(( 0431 _trcredit_name_ctxt, 0432 _trcredit_email_ctxt, 0433 )) 0434 0435 _valid_email_rx = re.compile(r"^\S+@\S+\.\S+$", re.U) 0436 0437 def _check_trcredits (msg, cat, pcache, hl): 0438 0439 if not msg.active: 0440 return 0 0441 if msg.msgctxt not in _trcredit_ctxts: 0442 return 0 0443 0444 errors = [] 0445 0446 if msg.msgctxt == _trcredit_name_ctxt: 0447 names = [x.strip() for x in msg.msgstr[0].split(",")] 0448 pcache["trnames"] = names 0449 0450 elif msg.msgctxt == _trcredit_email_ctxt: 0451 emails = [x.strip() for x in msg.msgstr[0].split(",")] 0452 pcache["tremails"] = emails 0453 0454 for email in emails: 0455 # Check minimal validity of address. 0456 if email and not _valid_email_rx.match(email): 0457 emsg = _("@info", 0458 "Invalid email address '%(email)s'.", 0459 email=escape_c(email)) 0460 errors.append(emsg) 0461 0462 # Check congruence between names and emails. 0463 names = pcache.get("trnames") 0464 emails = pcache.get("tremails") 0465 if emails and names: 0466 if len(names) != len(emails): 0467 emsg = _("@info", 0468 "Different number of translator names (%(num1)d) " 0469 "and email addresses (%(num2)d).", 0470 num1=len(names), num2=len(emails)) 0471 errors.append(emsg) 0472 else: 0473 for name, email, i in zip(names, emails, list(range(1, len(names) + 1))): 0474 if not name and not email: 0475 emsg = _("@info", 0476 "Both name and email address " 0477 "of translator no. %(ord)d are empty.", 0478 ord=i) 0479 errors.append(emsg) 0480 0481 if errors: 0482 hl.append(("msgstr", 0, [(None, None, x) for x in errors])) 0483 0484 return len(errors) 0485 0486 _known_checks["trcredits"] = _check_trcredits 0487 0488 # -------------------------------------- 0489 # Check for query placeholders in Plasma runners. 0490 0491 def _check_plrunq (msg, cat, pcache, hl): 0492 0493 if not msg.active: 0494 return 0 0495 0496 nerrors = 0 0497 if ":q:" in msg.msgid and ":q:" not in msg.msgstr[0]: 0498 errmsg = _("@info", 0499 "Plasma runner query placeholder '%(plhold)s' " 0500 "is missing in translation.", 0501 plhold=":q:") 0502 hl.append(("msgstr", 0, [(None, None, errmsg)])) 0503 nerrors += 1 0504 0505 return nerrors 0506 0507 _known_checks["plrunq"] = _check_plrunq 0508 0509 # -------------------------------------- 0510 # Check for proper format of keyword lists in .dekstop files. 0511 0512 from pology.checks import check_keyword_list 0513 0514 _check_keywlist_hook = _FuncallMemoizer() 0515 0516 def _check_keywlist (msg, cat, pcache, hl): 0517 0518 if not msg.active: 0519 return 0 0520 0521 strict = pcache.get("strict", False) 0522 checkf = _check_keywlist_hook(check_keyword_list, strict) 0523 spans = checkf(msg.msgstr[0], msg, cat) 0524 if spans: 0525 nerrors = 1 0526 hl.append(("msgstr", 0, spans)) 0527 else: 0528 nerrors = 0 0529 0530 return nerrors 0531 0532 _known_checks["keywlist"] = _check_keywlist 0533 0534 # -------------------------------------- 0535 # Helpers for catalog-specific checks. 0536 0537 # Add a catalog-specific checks to one or more catalogs, selected by name. 0538 # For example: 0539 # _add_cat_check(_check_cat_xyz, ["catfoo", "catbar"]) 0540 _known_checks_by_cat = {} 0541 def _add_cat_check_hl (check, catspecs): 0542 for catspec in catspecs: 0543 if catspec not in _known_checks_by_cat: 0544 _known_checks_by_cat[catspec] = [] 0545 if check not in _known_checks_by_cat[catspec]: 0546 _known_checks_by_cat[catspec].append(check) 0547 0548 def _on_cat_hl (catspecs): # as decorator 0549 def dec (check): 0550 _add_cat_check_hl(check, catspecs) 0551 return dec 0552 0553 0554 # Like _add_cat_check_hl, except that instead of updating the highlight, 0555 # check function returns a single error message or a list of error messages. 0556 def _add_cat_check (check, catspecs): 0557 if isinstance(catspecs, str): 0558 catspecs = [catspecs] 0559 def check_mod (msg, cat, pcache, hl): 0560 errors = check(msg, cat, pcache) 0561 if errors: 0562 if isinstance(errors, str): 0563 errors = [errors] 0564 hl.append(("msgstr", 0, [(None, None, x) for x in errors])) 0565 return len(errors) 0566 else: 0567 return 0 0568 _add_cat_check_hl(check_mod, catspecs) 0569 0570 def _on_cat (catspecs): # as decorator 0571 def dec (check): 0572 _add_cat_check(check, catspecs) 0573 return dec 0574 0575 0576 # Global check to apply appropriate catalog-specific checks. 0577 def _check_catspec (msg, cat, pcache, hl): 0578 0579 nproblems = 0 0580 for check in _known_checks_by_cat.get(cat.name, []): 0581 nproblems += check(msg, cat, pcache, hl) 0582 0583 return nproblems 0584 0585 _known_checks["catspec"] = _check_catspec 0586 0587 0588 # Checks that functional tokens are preserved in translation. 0589 def _check_cat_match_tokens (msg, cat, pcache, tokens): 0590 0591 for token in tokens: 0592 if token in msg.msgid: 0593 for msgstr in msg.msgstr: 0594 if token not in msgstr: 0595 return _("@info", 0596 "Translation must contain '%(token)s'.", 0597 token=token) 0598 0599 0600 # Checks that translation is an ASCII identifier-like string. 0601 def _check_cat_ascii_identifier (msg, cat, pcache): 0602 0603 for msgstr in msg.msgstr: 0604 if msgstr.lower() != identify(msgstr): 0605 return _("@info", 0606 "Translation must be composed only of ASCII letters, " 0607 "numbers, and underscores, " 0608 "and must not start with a number.") 0609 0610 0611 # -------------------------------------- 0612 # Catalog-specific checks. 0613 0614 @_on_cat("kdeqt") 0615 def _check_cat_kdeqt (msg, cat, pcache): 0616 0617 if msg.msgid == "QT_LAYOUT_DIRECTION": 0618 if msg.msgstr[0] not in ("LTR", "RTL"): 0619 return _("@info", 0620 "Translation must be exactly '%(text1)s' or '%(text2)s'.", 0621 text1="LTR", text2="RTL") 0622 0623 0624 @_on_cat("kiosktool") 0625 def _check_cat_kiosktool (msg, cat, pcache): 0626 0627 return _check_cat_match_tokens(msg, cat, pcache, ["%action"]) 0628 0629 0630 @_on_cat("kplatolibs") 0631 def _check_cat_kplatolibs (msg, cat, pcache): 0632 0633 if "Letter(s) only" in (msg.msgctxt or ""): 0634 if not msg.msgstr[0].isalpha(): 0635 return _("@info", 0636 "Translation must contain only letters.") 0637 0638 0639 @_on_cat("libkleopatra") 0640 def _check_cat_libkleopatra (msg, cat, pcache): 0641 0642 if "'yes' or 'no'" in (msg.msgctxt or ""): 0643 if msg.msgstr[0] not in ("yes", "no"): 0644 return _("@info", 0645 "Translation must be exactly '%(text1)s' or '%(text2)s'.", 0646 text1="yes", text2="no") 0647 0648 0649 @_on_cat("libknetworkmanager") 0650 def _check_cat_libknetworkmanager (msg, cat, pcache): 0651 0652 if "ASCII letters and underscore" in (msg.msgctxt or ""): 0653 return _check_cat_ascii_identifier(msg, cat, pcache) 0654 0655