File indexing completed on 2024-04-21 16:29:20

0001 # -*- coding: UTF-8 -*-
0002 
0003 """
0004 Check validity of translation in catalogs within KDE Translation Project.
0005 
0006 Documented in C{doc/user/sieving.docbook}.
0007 
0008 @author: Chusslove Illich (Часлав Илић) <caslav.ilic@gmx.net>
0009 @license: GPLv3
0010 """
0011 
0012 import os
0013 import re
0014 
0015 from pology import _, n_
0016 from pology.markup import flag_no_check_markup
0017 from pology.escape import escape_c
0018 from pology.msgreport import report_on_msg_hl, report_msg_content
0019 from pology.msgreport import report_msg_to_lokalize
0020 from pology.normalize import identify
0021 from pology.report import report, format_item_list
0022 from pology.sieve import add_param_poeditors
0023 from pology.sieve import SieveError, SieveCatalogError, parse_sieve_flags
0024 from pology.proj.kde.cattype import get_project_subdir
0025 from pology.proj.kde.cattype import is_txt_cat, is_qt_cat, is_docbook_cat
0026 from pology.proj.kde.cattype import is_html_cat, is_unknown_cat
0027 
0028 
0029 def setup_sieve (p):
0030 
0031     p.set_desc(_("@info sieve discription",
0032     "Check validity of messages in catalogs within KDE Translation Project."
0033     ))
0034     p.add_param("strict", bool, defval=False,
0035                 desc=_("@info sieve parameter discription",
0036     "Check translations strictly: report problems in translation regardless "
0037     "of whether original itself is valid (default is to check translation "
0038     "only if original passes checks)."
0039     ))
0040     chnames = list(_known_checks.keys())
0041     chnames.sort()
0042     p.add_param("check", str, seplist=True,
0043                 metavar=_("@info sieve parameter value placeholder",
0044                           "KEYWORD,..."),
0045                 desc=_("@info sieve parameter discription",
0046     "Run only this check instead of all (currently available: %(chklist)s). "
0047     "Several checks can be specified as a comma-separated list.",
0048     chklist=format_item_list(chnames)
0049     ))
0050     p.add_param("showmsg", bool, defval=False,
0051                 desc=_("@info sieve parameter discription",
0052     "Also show the full message that had some problems."
0053     ))
0054     add_param_poeditors(p)
0055 
0056 
0057 class Sieve (object):
0058 
0059     def __init__ (self, params):
0060 
0061         self.strict = params.strict
0062         self.showmsg = params.showmsg
0063         self.lokalize = params.lokalize
0064 
0065         self.selected_checks = None
0066         if params.check is not None:
0067             unknown_checks = []
0068             for chname in params.check:
0069                 if chname not in _known_checks:
0070                     unknown_checks.append(chname)
0071             if unknown_checks:
0072                 fmtchecks = format_item_list(unknown_checks)
0073                 raise SieveError(
0074                     _("@info",
0075                       "Unknown checks selected: %(chklist)s.",
0076                       chklist=fmtchecks))
0077             self.selected_checks = set(params.check)
0078 
0079         # Indicators to the caller:
0080         self.caller_sync = False # no need to sync catalogs to the caller
0081         self.caller_monitored = False # no need for monitored messages
0082 
0083         self.nproblems = 0
0084 
0085 
0086     def process_header (self, hdr, cat):
0087 
0088         # Collect catalog data for determining type.
0089         cname = cat.name
0090         csubdir = get_project_subdir(cat.filename)
0091         if not csubdir:
0092             raise SieveCatalogError(
0093                 _("@info",
0094                   "Cannot determine project subdirectory "
0095                   "of the catalog '%(file)s'.",
0096                   file=cat.filename))
0097 
0098         # Select checks applicable to current catalog.
0099         self.current_checks = []
0100 
0101         def add_checks (names):
0102             if self.selected_checks is not None:
0103                 names = set(names).intersection(self.selected_checks)
0104             for name in names:
0105                 self.current_checks.append(_known_checks[name])
0106 
0107         if is_txt_cat(cname, csubdir):
0108             add_checks(["nots", "keywlist"])
0109         elif is_qt_cat(cname, csubdir):
0110             add_checks(["qtmarkup", "qtdt", "nots"])
0111         elif is_docbook_cat(cname, csubdir):
0112             add_checks(["dbmarkup", "nots"])
0113         elif is_html_cat(cname, csubdir):
0114             add_checks(["htmlmarkup", "nots"])
0115         elif is_unknown_cat(cname, csubdir):
0116             add_checks([])
0117         else: # default to native KDE4 catalog
0118             add_checks(["kde4markup", "qtdt", "trcredits", "plrunq"])
0119         add_checks(["catspec"]) # to all catalogs, will select internally
0120 
0121         # Reset catalog progress cache, available to checks.
0122         self.pcache = {
0123             "strict": self.strict,
0124         }
0125 
0126 
0127     def process (self, msg, cat):
0128 
0129         if not msg.translated:
0130             return
0131 
0132         highlight = []
0133         for check in self.current_checks:
0134             self.nproblems += check(msg, cat, self.pcache, highlight)
0135 
0136         if highlight:
0137             if self.showmsg:
0138                 report_msg_content(msg, cat, highlight=highlight,
0139                                    delim=("-" * 20))
0140             else:
0141                 report_on_msg_hl(highlight, msg, cat)
0142             if self.lokalize:
0143                 report_msg_to_lokalize(msg, cat, highlight)
0144 
0145 
0146     def finalize (self):
0147 
0148         if self.nproblems > 0:
0149             if not self.strict:
0150                 msg = n_("@info:progress TP stands for Translation Project",
0151                          "Found %(num)d problem in KDE TP translations.",
0152                          "Found %(num)d problems in KDE TP translations.",
0153                          num=self.nproblems)
0154             else:
0155                 msg = n_("@info:progress",
0156                          "Found %(num)d problem in "
0157                          "KDE TP translations (strict mode).",
0158                          "Found %(num)d problems in "
0159                          "KDE TP translations (strict mode).",
0160                          num=self.nproblems)
0161             report("===== " + msg)
0162 
0163 
0164 # --------------------------------------
0165 # Helpers for checks.
0166 
0167 # Memoizer for hook factories.
0168 class _FuncallMemoizer (object):
0169 
0170     def __init__ (self):
0171 
0172         self._cache = {}
0173 
0174     def __call__ (self, func, *args, **kwargs):
0175 
0176         ckey = args + tuple(sorted(kwargs.items()))
0177         if ckey in self._cache:
0178             value = self._cache[ckey]
0179         else:
0180             value = func(*args, **kwargs)
0181             self._cache[ckey] = value
0182         return value
0183 
0184 
0185 # Map of checks by name,
0186 # updated at point of definition of the check.
0187 _known_checks = {}
0188 
0189 # --------------------------------------
0190 # Check for KDE4 markup.
0191 
0192 from pology.markup import validate_kde4_l1
0193 
0194 _tsfence = "|/|"
0195 
0196 def _check_kde4markup (msg, cat, pcache, hl):
0197 
0198     strict = pcache.get("strict", False)
0199 
0200     # Do not check markup if:
0201     # - the check is explicitly skipped for this message
0202     # - the original is bad and not running in strict mode
0203     if flag_no_check_markup in parse_sieve_flags(msg):
0204         return 0
0205     if not strict:
0206         if (   validate_kde4_l1(msg.msgid, ents=[])
0207             or validate_kde4_l1(msg.msgid_plural or "", ents=[])
0208         ):
0209             return 0
0210 
0211     nproblems = 0
0212     for i in range(len(msg.msgstr)):
0213         msgstr = msg.msgstr[i]
0214 
0215         lst = msgstr.split(_tsfence, 1)
0216         msgstr = lst[0]
0217         msgscript = ""
0218         if len(lst) == 2:
0219             # FIXME: No point in checking the scripted part as it is,
0220             # since calls may be used to modify markup in special ways.
0221             # Perhaps it would work to remove calls and check what's left?
0222             #msgscript = lst[1]
0223             pass
0224 
0225         for text in (msgstr, msgscript):
0226             spans = validate_kde4_l1(text, ents=[])
0227             if spans:
0228                 nproblems += len(spans)
0229                 hl.append(("msgstr", i, spans))
0230 
0231     return nproblems
0232 
0233 _known_checks["kde4markup"] = _check_kde4markup
0234 
0235 # --------------------------------------
0236 # Check for Qt markup.
0237 
0238 from pology.markup import validate_qtrich_l1
0239 
0240 def _check_qtmarkup (msg, cat, pcache, hl):
0241 
0242     strict = pcache.get("strict", False)
0243 
0244     if flag_no_check_markup in parse_sieve_flags(msg):
0245         return 0
0246     if not strict:
0247         if (   validate_qtrich_l1(msg.msgid, ents=[])
0248             or validate_qtrich_l1(msg.msgid_plural or "", ents=[])
0249         ):
0250             return 0
0251 
0252     nproblems = 0
0253     for i in range(len(msg.msgstr)):
0254         spans = validate_qtrich_l1(msg.msgstr[i], ents=[])
0255         if spans:
0256             nproblems += len(spans)
0257             hl.append(("msgstr", i, spans))
0258 
0259     return nproblems
0260 
0261 _known_checks["qtmarkup"] = _check_qtmarkup
0262 
0263 # --------------------------------------
0264 # Check for Docbook markup.
0265 
0266 from pology.markup import check_docbook4_msg
0267 
0268 def _check_dbmarkup (msg, cat, pcache, hl):
0269 
0270     check1 = pcache.get("check_dbmarkup_hook")
0271     if not check1:
0272         strict = pcache.get("strict", False)
0273         check1 = check_docbook4_msg(strict=strict, entities=None)
0274         pcache["check_dbmarkup_hook"] = check1
0275 
0276     hl1 = check1(msg, cat)
0277     hl.extend(hl1)
0278     nproblems = sum(len(x[2]) for x in hl1)
0279 
0280     return nproblems
0281 
0282 _known_checks["dbmarkup"] = _check_dbmarkup
0283 
0284 # --------------------------------------
0285 # Check for HTML markup.
0286 
0287 from pology.markup import validate_html_l1
0288 
0289 def _check_htmlmarkup (msg, cat, pcache, hl):
0290 
0291     strict = pcache.get("strict", False)
0292 
0293     if flag_no_check_markup in parse_sieve_flags(msg):
0294         return 0
0295     if not strict:
0296         if (   validate_html_l1(msg.msgid, ents=[])
0297             or validate_html_l1(msg.msgid_plural or "", ents=[])
0298         ):
0299             return 0
0300 
0301     nproblems = 0
0302     for i in range(len(msg.msgstr)):
0303         spans = validate_html_l1(msg.msgstr[i], ents=[])
0304         if spans:
0305             nproblems += len(spans)
0306             hl.append(("msgstr", i, spans))
0307 
0308     return nproblems
0309 
0310 _known_checks["htmlmarkup"] = _check_htmlmarkup
0311 
0312 # --------------------------------------
0313 # Check for no scripting in dumb messages.
0314 
0315 def _check_nots (msg, cat, pcache, hl):
0316 
0317     nproblems = 0
0318     for i in range(len(msg.msgstr)):
0319         msgstr = msg.msgstr[i]
0320         p = msgstr.find(_tsfence)
0321         if p >= 0:
0322             nproblems += 1
0323             hl.append(("msgstr", i,
0324                        [(p, p + len(_tsfence),
0325                          _("@info",
0326                            "Dumb message, translation cannot be scripted."))]))
0327 
0328     return nproblems
0329 
0330 _known_checks["nots"] = _check_nots
0331 
0332 # --------------------------------------
0333 # Qt datetime format messages.
0334 
0335 _qtdt_flag = "qtdt-format"
0336 
0337 _qtdt_clean_rx = re.compile(r"'.*?'")
0338 _qtdt_split_rx = re.compile(r"\W+", re.U)
0339 
0340 def _qtdt_parse (text):
0341 
0342     text = _qtdt_clean_rx.sub("", text)
0343     fields = [x for x in _qtdt_split_rx.split(text) if x]
0344     return fields
0345 
0346 
0347 def _is_qtdt_msg (msg):
0348 
0349     return (   (_qtdt_flag in (msg.msgctxt or "").lower())
0350             or (_qtdt_flag in msg.flag))
0351 
0352 
0353 # Worker for check_qtdt* hooks.
0354 def _check_qtdt_w (msgstr, msg, cat):
0355 
0356     if not _is_qtdt_msg(msg):
0357         return []
0358 
0359     # Get format fields from the msgid.
0360     msgid_fmts = _qtdt_parse(msg.msgid)
0361 
0362     # Expect the same format fields in msgstr.
0363     msgstr_fmts = _qtdt_parse(msgstr)
0364     spans = []
0365     if set(msgid_fmts) != set(msgstr_fmts):
0366         errmsg = _("@info",
0367                    "Qt date-format mismatch: "
0368                    "original contains fields {%(fieldlist1)s} "
0369                    "while translation contains {%(fieldlist2)s}.",
0370                    fieldlist1=format_item_list(sorted(msgid_fmts)),
0371                    fieldlist2=format_item_list(sorted(msgstr_fmts)))
0372         spans.append((None, None, errmsg))
0373 
0374     return spans
0375 
0376 
0377 # Pass-through test hook (for external use).
0378 def check_qtdt (msgstr, msg, cat):
0379     """
0380     Check validity of translation if the message is a Qt date-time format
0381     [type S3C hook].
0382 
0383     TODO: Document further.
0384     """
0385 
0386     spans = _check_qtdt_w(msgstr, msg, cat)
0387     if spans:
0388         report_on_msg(spans[0][-1], msg, cat)
0389         return False
0390     else:
0391         return True
0392 
0393 
0394 # Span-reporting test hook (for external use).
0395 def check_qtdt_sp (msgstr, msg, cat):
0396     """
0397     Check validity of translation if the message is a Qt date-time format
0398     [type V3C hook].
0399 
0400     Span reporting version of L{check_qtdt}.
0401     """
0402 
0403     return _check_qtdt_w(msgstr, msg, cat)
0404 
0405 
0406 # Internal check for this sieve's use.
0407 def _check_qtdt (msg, cat, pcache, hl):
0408 
0409     if not _is_qtdt_msg(msg):
0410         return 0
0411 
0412     nproblems = 0
0413     for i in range(len(msg.msgstr)):
0414         msgstr = msg.msgstr[i]
0415         spans = _check_qtdt_w(msgstr, msg, cat)
0416         if spans:
0417             nproblems += 1
0418             hl.append(("msgstr", i, spans))
0419 
0420     return nproblems
0421 
0422 _known_checks["qtdt"] = _check_qtdt
0423 
0424 # --------------------------------------
0425 # Check for runtime translator data.
0426 
0427 _trcredit_name_ctxt = "NAME OF TRANSLATORS"
0428 _trcredit_email_ctxt = "EMAIL OF TRANSLATORS"
0429 
0430 _trcredit_ctxts = set((
0431     _trcredit_name_ctxt,
0432     _trcredit_email_ctxt,
0433 ))
0434 
0435 _valid_email_rx = re.compile(r"^\S+@\S+\.\S+$", re.U)
0436 
0437 def _check_trcredits (msg, cat, pcache, hl):
0438 
0439     if not msg.active:
0440         return 0
0441     if msg.msgctxt not in _trcredit_ctxts:
0442         return 0
0443 
0444     errors = []
0445 
0446     if msg.msgctxt == _trcredit_name_ctxt:
0447         names = [x.strip() for x in msg.msgstr[0].split(",")]
0448         pcache["trnames"] = names
0449 
0450     elif msg.msgctxt == _trcredit_email_ctxt:
0451         emails = [x.strip() for x in msg.msgstr[0].split(",")]
0452         pcache["tremails"] = emails
0453 
0454         for email in emails:
0455             # Check minimal validity of address.
0456             if email and not _valid_email_rx.match(email):
0457                 emsg = _("@info",
0458                          "Invalid email address '%(email)s'.",
0459                          email=escape_c(email))
0460                 errors.append(emsg)
0461 
0462     # Check congruence between names and emails.
0463     names = pcache.get("trnames")
0464     emails = pcache.get("tremails")
0465     if emails and names:
0466         if len(names) != len(emails):
0467             emsg = _("@info",
0468                      "Different number of translator names (%(num1)d) "
0469                      "and email addresses (%(num2)d).",
0470                      num1=len(names), num2=len(emails))
0471             errors.append(emsg)
0472         else:
0473             for name, email, i in zip(names, emails, list(range(1, len(names) + 1))):
0474                 if not name and not email:
0475                     emsg = _("@info",
0476                              "Both name and email address "
0477                              "of translator no. %(ord)d are empty.",
0478                              ord=i)
0479                     errors.append(emsg)
0480 
0481     if errors:
0482         hl.append(("msgstr", 0, [(None, None, x) for x in errors]))
0483 
0484     return len(errors)
0485 
0486 _known_checks["trcredits"] = _check_trcredits
0487 
0488 # --------------------------------------
0489 # Check for query placeholders in Plasma runners.
0490 
0491 def _check_plrunq (msg, cat, pcache, hl):
0492 
0493     if not msg.active:
0494         return 0
0495 
0496     nerrors = 0
0497     if ":q:" in msg.msgid and ":q:" not in msg.msgstr[0]:
0498         errmsg = _("@info",
0499                    "Plasma runner query placeholder '%(plhold)s' "
0500                    "is missing in translation.",
0501                    plhold=":q:")
0502         hl.append(("msgstr", 0, [(None, None, errmsg)]))
0503         nerrors += 1
0504 
0505     return nerrors
0506 
0507 _known_checks["plrunq"] = _check_plrunq
0508 
0509 # --------------------------------------
0510 # Check for proper format of keyword lists in .dekstop files.
0511 
0512 from pology.checks import check_keyword_list
0513 
0514 _check_keywlist_hook = _FuncallMemoizer()
0515 
0516 def _check_keywlist (msg, cat, pcache, hl):
0517 
0518     if not msg.active:
0519         return 0
0520 
0521     strict = pcache.get("strict", False)
0522     checkf = _check_keywlist_hook(check_keyword_list, strict)
0523     spans = checkf(msg.msgstr[0], msg, cat)
0524     if spans:
0525         nerrors = 1
0526         hl.append(("msgstr", 0, spans))
0527     else:
0528         nerrors = 0
0529 
0530     return nerrors
0531 
0532 _known_checks["keywlist"] = _check_keywlist
0533 
0534 # --------------------------------------
0535 # Helpers for catalog-specific checks.
0536 
0537 # Add a catalog-specific checks to one or more catalogs, selected by name.
0538 # For example:
0539 #   _add_cat_check(_check_cat_xyz, ["catfoo", "catbar"])
0540 _known_checks_by_cat = {}
0541 def _add_cat_check_hl (check, catspecs):
0542     for catspec in catspecs:
0543         if catspec not in _known_checks_by_cat:
0544             _known_checks_by_cat[catspec] = []
0545         if check not in _known_checks_by_cat[catspec]:
0546             _known_checks_by_cat[catspec].append(check)
0547 
0548 def _on_cat_hl (catspecs): # as decorator
0549     def dec (check):
0550         _add_cat_check_hl(check, catspecs)
0551     return dec
0552 
0553 
0554 # Like _add_cat_check_hl, except that instead of updating the highlight,
0555 # check function returns a single error message or a list of error messages.
0556 def _add_cat_check (check, catspecs):
0557     if isinstance(catspecs, str):
0558         catspecs = [catspecs]
0559     def check_mod (msg, cat, pcache, hl):
0560         errors = check(msg, cat, pcache)
0561         if errors:
0562             if isinstance(errors, str):
0563                 errors = [errors]
0564             hl.append(("msgstr", 0, [(None, None, x) for x in errors]))
0565             return len(errors)
0566         else:
0567             return 0
0568     _add_cat_check_hl(check_mod, catspecs)
0569 
0570 def _on_cat (catspecs): # as decorator
0571     def dec (check):
0572         _add_cat_check(check, catspecs)
0573     return dec
0574 
0575 
0576 # Global check to apply appropriate catalog-specific checks.
0577 def _check_catspec (msg, cat, pcache, hl):
0578 
0579     nproblems = 0
0580     for check in _known_checks_by_cat.get(cat.name, []):
0581         nproblems += check(msg, cat, pcache, hl)
0582 
0583     return nproblems
0584 
0585 _known_checks["catspec"] = _check_catspec
0586 
0587 
0588 # Checks that functional tokens are preserved in translation.
0589 def _check_cat_match_tokens (msg, cat, pcache, tokens):
0590 
0591     for token in tokens:
0592         if token in msg.msgid:
0593             for msgstr in msg.msgstr:
0594                 if token not in msgstr:
0595                     return _("@info",
0596                              "Translation must contain '%(token)s'.",
0597                              token=token)
0598 
0599 
0600 # Checks that translation is an ASCII identifier-like string.
0601 def _check_cat_ascii_identifier (msg, cat, pcache):
0602 
0603     for msgstr in msg.msgstr:
0604         if msgstr.lower() != identify(msgstr):
0605             return _("@info",
0606                      "Translation must be composed only of ASCII letters, "
0607                      "numbers, and underscores, "
0608                      "and must not start with a number.")
0609 
0610 
0611 # --------------------------------------
0612 # Catalog-specific checks.
0613 
0614 @_on_cat("kdeqt")
0615 def _check_cat_kdeqt (msg, cat, pcache):
0616 
0617     if msg.msgid == "QT_LAYOUT_DIRECTION":
0618         if msg.msgstr[0] not in ("LTR", "RTL"):
0619             return _("@info",
0620                      "Translation must be exactly '%(text1)s' or '%(text2)s'.",
0621                      text1="LTR", text2="RTL")
0622 
0623 
0624 @_on_cat("kiosktool")
0625 def _check_cat_kiosktool (msg, cat, pcache):
0626 
0627     return _check_cat_match_tokens(msg, cat, pcache, ["%action"])
0628 
0629 
0630 @_on_cat("kplatolibs")
0631 def _check_cat_kplatolibs (msg, cat, pcache):
0632 
0633     if "Letter(s) only" in (msg.msgctxt or ""):
0634         if not msg.msgstr[0].isalpha():
0635             return _("@info",
0636                      "Translation must contain only letters.")
0637 
0638 
0639 @_on_cat("libkleopatra")
0640 def _check_cat_libkleopatra (msg, cat, pcache):
0641 
0642     if "'yes' or 'no'" in (msg.msgctxt or ""):
0643         if msg.msgstr[0] not in ("yes", "no"):
0644             return _("@info",
0645                      "Translation must be exactly '%(text1)s' or '%(text2)s'.",
0646                      text1="yes", text2="no")
0647 
0648 
0649 @_on_cat("libknetworkmanager")
0650 def _check_cat_libknetworkmanager (msg, cat, pcache):
0651 
0652     if "ASCII letters and underscore" in (msg.msgctxt or ""):
0653         return _check_cat_ascii_identifier(msg, cat, pcache)
0654 
0655