File indexing completed on 2024-11-03 11:24:06
0001 # -*- coding: UTF-8 -*- 0002 0003 """ 0004 Catalog statistics: message and word counts, etc. 0005 0006 Documented in C{doc/user/sieving.docbook}. 0007 0008 @author: Chusslove Illich (Часлав Илић) <caslav.ilic@gmx.net> 0009 @license: GPLv3 0010 """ 0011 0012 import codecs 0013 import locale 0014 import os 0015 import sys 0016 0017 from pology import _, n_ 0018 from pology.catalog import Catalog 0019 from pology.message import MessageUnsafe 0020 from pology.colors import ColorString, cjoin, cinterp 0021 from pology.comments import parse_summit_branches 0022 from pology.diff import tdiff 0023 from pology.fsops import collect_catalogs 0024 from pology.getfunc import get_hook_ireq 0025 from pology.report import report, warning, format_item_list 0026 from pology.split import proper_words 0027 from pology.tabulate import tabulate 0028 from pology.sieve import SieveError 0029 0030 0031 def setup_sieve (p): 0032 0033 p.set_desc(_("@info sieve discription", 0034 "Compute translation statistics.\n" 0035 "\n" 0036 "Provides basic count of number of messages by type (translated, fuzzy, " 0037 "etc.), along with words and character counts, and some other derived " 0038 "statistics on request." 0039 )) 0040 0041 p.add_param("accel", str, multival=True, 0042 metavar=_("@info sieve parameter value placeholder", "CHAR"), 0043 desc=_("@info sieve parameter discription", 0044 "Character which is used as UI accelerator marker in text fields, " 0045 "to remove it before counting. " 0046 "If a catalog defines accelerator marker in the header, " 0047 "this value overrides it." 0048 )) 0049 p.add_param("detail", bool, defval=False, 0050 desc=_("@info sieve parameter discription", 0051 "Compute and display some derived statistical quantities." 0052 )) 0053 p.add_param("incomplete", bool, defval=False, 0054 desc=_("@info sieve parameter discription", 0055 "List catalogs which are not fully translated, with incompletness counts." 0056 )) 0057 p.add_param("incompfile", str, 0058 metavar=_("@info sieve parameter value placeholder", "FILE"), 0059 desc=_("@info sieve parameter discription", 0060 "Write paths of catalogs that are not fully translated into a file, " 0061 "one per line." 0062 )) 0063 p.add_param("templates", str, 0064 metavar=_("@info sieve parameter value placeholder", 0065 "FIND:REPLACE"), 0066 desc=_("@info sieve parameter discription", 0067 "Count in templates without a corresponding catalog (i.e. translation on " 0068 "it has not started yet) into statistics. " 0069 "Assumes that translated catalogs and templates live in two root " 0070 "directories with same structure; then for each path of an existing " 0071 "catalog, its directory is taken and the path to corresponding templates " 0072 "directory constructed by replacing first occurence of FIND with REPLACE." 0073 )) 0074 p.add_param("branch", str, seplist=True, 0075 metavar=_("@info sieve parameter value placeholder", "BRANCH"), 0076 desc=_("@info sieve parameter discription", 0077 "In summit catalogs, count in only messages belonging to given branch. " 0078 "Several branches can be given as comma-separated list." 0079 )) 0080 p.add_param("maxwords", int, 0081 metavar=_("@info sieve parameter value placeholder", "NUMBER"), 0082 desc=_("@info sieve parameter discription", 0083 "Count in only messages which have at most this many words, " 0084 "either in original or translation." 0085 )) 0086 p.add_param("minwords", int, 0087 metavar=_("@info sieve parameter value placeholder", "NUMBER"), 0088 desc=_("@info sieve parameter discription", 0089 "Count in only messages which have at least this many words, " 0090 "either in original or translation." 0091 )) 0092 p.add_param("lspan", str, 0093 metavar=_("@info sieve parameter value placeholder", "FROM:TO"), 0094 desc=_("@info sieve parameter discription", 0095 "Count in only messages at or after line FROM, and before line TO. " 0096 "If FROM is empty, 0 is assumed; " 0097 "if TO is empty, total number of lines is assumed." 0098 )) 0099 p.add_param("espan", str, 0100 metavar=_("@info sieve parameter value placeholder", "FROM:TO"), 0101 desc=_("@info sieve parameter discription", 0102 "Count in only messages at or after entry FROM, and before entry TO. " 0103 "If FROM is empty, 0 is assumed; " 0104 "if TO is empty, total number of entries is assumed." 0105 )) 0106 p.add_param("bydir", bool, defval=False, 0107 desc=_("@info sieve parameter discription", 0108 "Report statistics per leaf directory in searched paths." 0109 )) 0110 p.add_param("byfile", bool, defval=False, 0111 desc=_("@info sieve parameter discription", 0112 "Report statistics per catalog." 0113 )) 0114 p.add_param("wbar", bool, defval=False, 0115 desc=_("@info sieve parameter discription", 0116 "Show statistics in form of word bars." 0117 )) 0118 p.add_param("msgbar", bool, defval=False, 0119 desc=_("@info sieve parameter discription", 0120 "Show statistics in form of message bars." 0121 )) 0122 p.add_param("msgfmt", bool, defval=False, 0123 desc=_("@info sieve parameter discription", 0124 "Show a minimal summary of the statistics (like msgfmt)." 0125 )) 0126 p.add_param("absolute", bool, defval=False, 0127 desc=_("@info sieve parameter discription", 0128 "Scale lengths of word and message bars to numbers they represent, " 0129 "rather than relative to percentage of translation state. " 0130 "Useful with '%(par1)s' and '%(par2)s' parameters, " 0131 "to compare sizes of different translation units.", 0132 par1="byfile", par2="bydir" 0133 )) 0134 p.add_param("ondiff", bool, defval=False, 0135 desc=_("@info sieve parameter discription", 0136 "Split word and character counts of fuzzy messages " 0137 "into translated and untranslated categories (leaving zero in fuzzy), " 0138 "based on difference ratio between current and previous original text." 0139 )) 0140 p.add_param("mincomp", float, defval=None, 0141 metavar=_("@info sieve parameter value placeholder", "RATIO"), 0142 desc=_("@info sieve parameter discription", 0143 "Include into statistics only catalogs with sufficient completeness, " 0144 "as ratio of translated to other messages (real value between 0 and 1)." 0145 )) 0146 p.add_param("filter", str, multival=True, 0147 metavar=_("@info sieve parameter value placeholder", "HOOK"), 0148 desc=_("@info sieve parameter discription", 0149 "F1A hook specification, to filter the translation through. " 0150 "Several filters can be specified by repeating the parameter." 0151 )) 0152 0153 0154 class Sieve (object): 0155 0156 def __init__ (self, params): 0157 0158 self.p = params 0159 0160 # Templates correspondence. 0161 # Mapping of catalogs to templates, in form of <search>:<replace>. 0162 # For each catalog file path, the first <search> substring is replaced 0163 # by <replace>, and .po replaced with .pot, to construct its template 0164 # file path. All templates not found under such paths are reported. 0165 # Furthermore, all subdirs of these paths are searched for templates 0166 # without corresponding catalogs, and every such template is counted 0167 # as fully untranslated PO. 0168 if self.p.templates: 0169 if ":" not in self.p.templates: 0170 self.tspec_srch = self.p.templates 0171 self.tspec_repl = "" 0172 else: 0173 self.tspec_srch, self.tspec_repl = self.p.templates.split(":", 1) 0174 0175 # Turn off table display if a bar view has been selected. 0176 self.p.table = True 0177 if self.p.msgbar or self.p.wbar or self.p.msgfmt: 0178 self.p.table = False 0179 0180 # Filenames of catalogs which are not fully translated. 0181 self.incomplete_catalogs = {} 0182 0183 # Counted categories. 0184 self.count_spec = ( 0185 ("trn", 0186 _("@title:row translated messages/words/characters", 0187 "translated")), 0188 ("fuz", 0189 _("@title:row fuzzy messages/words/characters", 0190 "fuzzy")), 0191 ("unt", 0192 _("@title:row untranslated messages/words/characters", 0193 "untranslated")), 0194 ("tot", 0195 _("@title:row fuzzy messages/words/characters", 0196 "total")), 0197 ("obs", 0198 _("@title:row fuzzy messages/words/characters", 0199 "obsolete")), 0200 ) 0201 0202 # FIXME: After parameter parser can deliver requested sequence type. 0203 if self.p.branch is not None: 0204 self.p.branch = set(self.p.branch) 0205 0206 # Parse line/entry spans. 0207 def parse_span (spanspec): 0208 lst = spanspec is not None and spanspec.split(":") or ("", "") 0209 if len(lst) != 2: 0210 raise SieveError( 0211 _("@info", 0212 "Wrong number of elements in span " 0213 "specification '%(spec)s'.", 0214 spec=self.p.lspan)) 0215 nlst = [] 0216 for el in lst: 0217 if not el: 0218 nlst.append(None) 0219 else: 0220 try: 0221 nlst.append(int(el)) 0222 except: 0223 raise SieveError( 0224 _("@info", 0225 "Not an integer number in span " 0226 "specification '%(spec)s'.", 0227 spec=self.p.lspan)) 0228 return tuple(nlst) 0229 self.lspan = parse_span(self.p.lspan) 0230 self.espan = parse_span(self.p.espan) 0231 0232 # Number of counts per category: 0233 # messages, words in original, words in translation, 0234 # characters in original, characters in translation. 0235 self.counts_per_cat = 5 0236 0237 # Category counts per catalog filename. 0238 self.counts = {} 0239 0240 # Collections of all confirmed templates and tentative template subdirs. 0241 self.matched_templates = {} 0242 self.template_subdirs = [] 0243 if self.p.templates: 0244 for rpath in params.root_paths: 0245 if os.path.isfile(rpath): 0246 rpath = os.path.dirname(rpath) 0247 rpath = rpath.replace(self.tspec_srch, self.tspec_repl, 1) 0248 self.template_subdirs.append(rpath) 0249 # Map of template to translation subdirs. 0250 self.mapped_template_subdirs = {} 0251 0252 # Some indicators of metamessages. 0253 self.xml2po_meta_msgid = dict([(x, True) for x in 0254 ("translator-credits",)]) 0255 self.xml2pot_meta_msgid = dict([(x, True) for x in 0256 ("ROLES_OF_TRANSLATORS", "CREDIT_FOR_TRANSLATORS")]) 0257 self.kde_meta_msgctxt = dict([(x, True) for x in 0258 ("NAME OF TRANSLATORS", "EMAIL OF TRANSLATORS")]) 0259 0260 # Resolve filtering hooks. 0261 self.pfilters = [] 0262 for hreq in self.p.filter or []: 0263 self.pfilters.append(get_hook_ireq(hreq, abort=True)) 0264 0265 # Indicators to the caller: 0266 self.caller_sync = False # no need to sync catalogs 0267 self.caller_monitored = False # no need for monitored messages 0268 0269 0270 def _count_zero (self): 0271 0272 return dict([(x[0], [0] * self.counts_per_cat) 0273 for x in self.count_spec]) 0274 0275 0276 def _count_sum (self, c1, c2): 0277 0278 cs = self._count_zero() 0279 for cat, catname in self.count_spec: 0280 for i in range(self.counts_per_cat): 0281 cs[cat][i] = c1[cat][i] + c2[cat][i] 0282 0283 return cs 0284 0285 0286 def process_header (self, hdr, cat): 0287 0288 # Establish counts for this file. 0289 if cat.filename not in self.counts: 0290 self.counts[cat.filename] = self._count_zero() 0291 self.count = self.counts[cat.filename] 0292 0293 # If template correspondence requested, handle template matching. 0294 if ( self.p.templates 0295 and not cat.filename.endswith(".pot")): 0296 0297 # Construct expected template path. 0298 tpath = cat.filename.replace(self.tspec_srch, self.tspec_repl, 1) 0299 pdot = tpath.rfind(".") 0300 if pdot >= 0: 0301 tpath = tpath[:pdot] + ".pot" 0302 # Inform if the template does not exist. 0303 if not os.path.isfile(tpath): 0304 warning(_("@info", 0305 "Expected template catalog '%(file)s' is missing.", 0306 file=tpath)) 0307 # Indicate the template has been matched. 0308 if tpath not in self.matched_templates: 0309 self.matched_templates[tpath] = True 0310 0311 # Force explicitly given accelerators. 0312 if self.p.accel is not None: 0313 cat.set_accelerator(self.p.accel) 0314 0315 0316 def process (self, msg, cat): 0317 0318 # Summit: if branches were given, skip the message if it does not 0319 # belong to any of the given branches. 0320 if self.p.branch: 0321 msg_branches = parse_summit_branches(msg) 0322 if not set.intersection(self.p.branch, msg_branches): 0323 return 0324 0325 # If line/entry spans given, skip message if not in range. 0326 if self.lspan[0] is not None and msg.refline < self.lspan[0]: 0327 return 0328 if self.lspan[1] is not None and msg.refline >= self.lspan[1]: 0329 return 0330 if self.espan[0] is not None and msg.refentry < self.espan[0]: 0331 return 0332 if self.espan[1] is not None and msg.refentry >= self.espan[1]: 0333 return 0334 0335 # Decide if a metamessage: 0336 ismeta = False 0337 # - msgid in form "@@<tag>: ..." from xml2po 0338 if msg.msgid.startswith("@@"): 0339 ps = msg.msgid.find(":") 0340 ismeta = (ps >= 0 and msg.msgid[2:ps].isalnum()) 0341 # - translator credits from xml2po and xml2pot 0342 if ( msg.msgid in self.xml2po_meta_msgid 0343 or msg.msgid in self.xml2pot_meta_msgid 0344 ): 0345 ismeta = True 0346 # - translator credits in KDE GUI 0347 if msg.msgctxt in self.kde_meta_msgctxt: 0348 ismeta = True 0349 0350 # Prepare filtered message for counting. 0351 if self.pfilters: 0352 msg = MessageUnsafe(msg) 0353 for pfilter in self.pfilters: 0354 for i in range(len(msg.msgstr)): 0355 msg.msgstr[i] = pfilter(msg.msgstr[i]) 0356 0357 # Count the words and characters in original and translation. 0358 # Remove shortcut markers prior to counting; don't include words 0359 # which do not start with a letter; remove scripted part. 0360 # For plural messages compute averages of msgid and msgstr groups, 0361 # to normalize comparative counts on varying number of plural forms. 0362 nwords = {"orig" : 0, "tran" : 0} 0363 nchars = {"orig" : 0, "tran" : 0} 0364 msgids = [msg.msgid] 0365 if msg.msgid_plural is not None: 0366 msgids.append(msg.msgid_plural) 0367 for src, texts in (("orig", msgids), ("tran", msg.msgstr)): 0368 if ismeta: # consider metamessages as zero counts 0369 continue 0370 lnwords = [] # this group's word count, for averaging 0371 lnchars = [] # this group's character count, for averaging 0372 for text in texts: 0373 pf = text.find("|/|") 0374 if pf >= 0: 0375 text = text[0:pf] 0376 words = proper_words(text, True, cat.accelerator(), msg.format) 0377 # If there are no proper words but there are some characters, 0378 # set to one empty word in order for a fuzzy or 0379 # an untranslated message not to be considered translated 0380 # when only word counts are observed. 0381 if not words and text: 0382 words = [""] 0383 lnwords.append(len(words)) 0384 lnchars.append(len("".join(words))) 0385 nwords[src] += int(round(float(sum(lnwords)) / len(texts))) 0386 nchars[src] += int(round(float(sum(lnchars)) / len(texts))) 0387 #nchars[src] += (nwords[src] - 1) # nominal space per each two words 0388 0389 # If the number of words has been limited, skip the message if it 0390 # does not fall in the range. 0391 if self.p.maxwords is not None: 0392 if not ( nwords["orig"] <= self.p.maxwords 0393 or nwords["tran"] <= self.p.maxwords): 0394 return 0395 if self.p.minwords is not None: 0396 if not ( nwords["orig"] >= self.p.minwords 0397 or nwords["tran"] >= self.p.minwords): 0398 return 0399 0400 # Split word and character counts in fuzzy original if requested. 0401 nswords = {} 0402 nschars = {} 0403 if self.p.ondiff and msg.fuzzy and msg.msgid_previous is not None: 0404 diff, dr = tdiff(msg.msgid_previous, msg.msgid, diffr=True) 0405 # Reduce difference ratio to a smaller range by some threshold. 0406 # Texts more different than the threshold need full review. 0407 drth = 0.4 0408 #dr2 = dr if dr < drth else 1.0 0409 dr2 = min(dr / drth, 1.0) 0410 # Split counts between primary fuzzy count, and secondary 0411 # translated, so that total remains the same. 0412 nswords.update({"trn": {}, "fuz": {}, "unt": {}}) 0413 nschars.update({"trn": {}, "fuz": {}, "unt": {}}) 0414 for nitems, nitems2, src in ( 0415 (nwords, nswords, "orig"), (nwords, nswords, "tran"), 0416 (nchars, nschars, "orig"), (nchars, nschars, "tran"), 0417 ): 0418 num = nitems[src] 0419 # Difference ratio of 0 can happen if the new and old texts 0420 # are the same, normally when only the context has changed. 0421 # Fuzzy counts should not be totally eliminated then, 0422 # as it should be seen that message needs updating. 0423 if dr2 > 0.0: 0424 rnum = int(round(dr2 * num + 0.5)) # round up 0425 else: 0426 rnum = 1 0427 rnum = min(rnum, num) # in case of rounding overflow 0428 nitems2["trn"][src] = num - rnum 0429 nitems2["fuz"][src] = 0 0430 nitems2["unt"][src] = rnum 0431 0432 # Detect categories and add the counts. 0433 categories = set() 0434 0435 if not msg.obsolete: # do not count obsolete into totals 0436 self.count["tot"][0] += 1 0437 categories.add("tot") 0438 if nswords: 0439 categories.update(list(nswords.keys())) 0440 0441 if msg.obsolete: # do not split obsolete into fuzzy/translated 0442 self.count["obs"][0] += 1 0443 categories.add("obs") 0444 nswords = {} 0445 nschars = {} 0446 elif msg.translated: 0447 self.count["trn"][0] += 1 0448 categories.add("trn") 0449 elif msg.fuzzy: 0450 self.count["fuz"][0] += 1 0451 categories.add("fuz") 0452 if cat.filename not in self.incomplete_catalogs: 0453 self.incomplete_catalogs[cat.filename] = True 0454 elif msg.untranslated: 0455 self.count["unt"][0] += 1 0456 categories.add("unt") 0457 if cat.filename not in self.incomplete_catalogs: 0458 self.incomplete_catalogs[cat.filename] = True 0459 0460 for cat in categories: 0461 nwords1 = nswords.get(cat, nwords) 0462 nchars1 = nschars.get(cat, nchars) 0463 self.count[cat][1] += nwords1["orig"] 0464 self.count[cat][2] += nwords1["tran"] 0465 self.count[cat][3] += nchars1["orig"] 0466 self.count[cat][4] += nchars1["tran"] 0467 0468 0469 # Sort filenames as if templates-only were within language subdirs. 0470 def _sort_equiv_filenames (self, filenames): 0471 0472 def equiv_template_path (x): 0473 cdir = os.path.dirname(x) 0474 if cdir in self.mapped_template_subdirs: 0475 cdir = self.mapped_template_subdirs[cdir] 0476 return os.path.join(cdir, os.path.basename(x)) 0477 else: 0478 return x 0479 0480 filenames.sort(key=lambda x: equiv_template_path(x)) 0481 0482 0483 def finalize (self): 0484 0485 # If template correspondence requested, handle POTs without POs. 0486 if self.template_subdirs: 0487 # Collect all catalogs in template subdirs. 0488 tpaths = collect_catalogs(self.template_subdirs) 0489 tpaths = list(filter(self.p.is_cat_included, tpaths)) 0490 # Filter to have only POTs remain. 0491 tpaths = [x for x in tpaths if x.endswith(".pot")] 0492 # Filter to leave out matched templates. 0493 tpaths = [x for x in tpaths if x not in self.matched_templates] 0494 # Add stats on all unmatched templates. 0495 for tpath in tpaths: 0496 cat = Catalog(tpath, monitored=False) 0497 self.process_header(cat.header, cat) 0498 for msg in cat: 0499 self.process(msg, cat) 0500 # Map template to translation subdirs. 0501 for tpath in tpaths: 0502 tsubdir = os.path.dirname(tpath) 0503 subdir = tsubdir.replace(self.tspec_repl, self.tspec_srch, 1) 0504 self.mapped_template_subdirs[tsubdir] = subdir 0505 0506 # If completeness limit in effect, eliminate catalogs not passing it. 0507 if self.p.mincomp is not None: 0508 ncounts = {} 0509 ninccats = {} 0510 for filename, count in self.counts.items(): 0511 cr = float(count["trn"][0]) / (count["tot"][0] or 1) 0512 if cr >= self.p.mincomp: 0513 ncounts[filename] = count 0514 inccat = self.incomplete_catalogs.get(filename) 0515 if inccat is not None: 0516 ninccats[filename] = inccat 0517 self.counts = ncounts 0518 self.incomplete_catalogs = ninccats 0519 0520 # Assemble sets of total counts by requested divisions. 0521 count_overall = self._count_zero() 0522 counts_bydir = {} 0523 filenames_bydir = {} 0524 for filename, count in self.counts.items(): 0525 0526 count_overall = self._count_sum(count_overall, count) 0527 0528 if self.p.bydir: 0529 cdir = os.path.dirname(filename) 0530 if cdir in self.mapped_template_subdirs: 0531 # Pretend templates-only are within language subdir. 0532 cdir = self.mapped_template_subdirs[cdir] 0533 if cdir not in counts_bydir: 0534 counts_bydir[cdir] = self._count_zero() 0535 filenames_bydir[cdir] = [] 0536 counts_bydir[cdir] = self._count_sum(counts_bydir[cdir], count) 0537 filenames_bydir[cdir].append(filename) 0538 0539 # Arrange sets into ordered list with titles. 0540 counts = [] 0541 if self.p.bydir: 0542 cdirs = list(counts_bydir.keys()); 0543 cdirs.sort() 0544 for cdir in cdirs: 0545 if self.p.byfile: 0546 self._sort_equiv_filenames(filenames_bydir[cdir]) 0547 for filename in filenames_bydir[cdir]: 0548 counts.append((filename, self.counts[filename], False)) 0549 counts.append(("%s/" % cdir, counts_bydir[cdir], False)) 0550 counts.append((_("@item:intable sum of all other entries", 0551 "(overall)"), count_overall, True)) 0552 0553 elif self.p.byfile: 0554 filenames = list(self.counts.keys()) 0555 self._sort_equiv_filenames(filenames) 0556 for filename in filenames: 0557 counts.append((filename, self.counts[filename], False)) 0558 counts.append((_("@item:intable sum of all other entries", 0559 "(overall)"), count_overall, True)) 0560 0561 else: 0562 counts.append((None, count_overall, False)) 0563 0564 # Indicate conspicuously up front modifiers to counting. 0565 modstrs = [] 0566 if self.p.branch: 0567 fmtbranches = format_item_list(self.p.branch) 0568 modstrs.append(_("@item:intext", 0569 "branches (%(branchlist)s)", 0570 branchlist=fmtbranches)) 0571 if self.p.maxwords is not None and self.p.minwords is None: 0572 modstrs.append(n_("@item:intext", 0573 "at most %(num)d word", 0574 "at most %(num)d words", 0575 num=self.p.maxwords)) 0576 if self.p.minwords is not None and self.p.maxwords is None: 0577 modstrs.append(n_("@item:intext", 0578 "at least %(num)d word", 0579 "at least %(num)d words", 0580 num=self.p.minwords)) 0581 if self.p.minwords is not None and self.p.maxwords is not None: 0582 modstrs.append(n_("@item:intext", 0583 "from %(num1)d to %(num)d word", 0584 "from %(num1)d to %(num)d words", 0585 num1=self.p.minwords, num=self.p.maxwords)) 0586 if self.p.lspan: 0587 modstrs.append(_("@item:intext", 0588 "line span %(span)s", 0589 span=self.p.lspan)) 0590 if self.p.espan: 0591 modstrs.append(_("@item:intext", 0592 "entry span %(span)s", 0593 span=self.p.espan)) 0594 if self.p.ondiff: 0595 modstrs.append(_("@item:intext", 0596 "scaled fuzzy counts")) 0597 0598 # Should titles be output in-line or on separate lines. 0599 self.inline = False 0600 maxtitlecw = 0 0601 if (not self.p.wbar or not self.p.msgbar or not self.p.msgfmt) and (not self.p.table): 0602 for title, count, summed in counts: 0603 if title is not None: 0604 self.inline = True 0605 titlecw = len(title) 0606 if maxtitlecw < titlecw: 0607 maxtitlecw = titlecw 0608 0609 # Output statistics in requested forms. 0610 for title, count, summed in counts: 0611 # Output the title if defined. 0612 if title is not None: 0613 if self.inline: 0614 ntitle = (("%%-%ds" % maxtitlecw) % title) 0615 else: 0616 ntitle = title 0617 # Must color after padding, to avoid it seeing the colors. 0618 ntitle = _("@title", 0619 "<bold>%(title)s</bold>", 0620 title=ntitle) 0621 if self.inline: 0622 report(ntitle + " ", newline=False) 0623 else: 0624 report(ntitle) 0625 0626 if self.p.table: 0627 self._tabular_stats(counts, title, count) 0628 if self.p.msgbar: 0629 self._msg_bar_stats(counts, title, count, summed) 0630 if self.p.wbar: 0631 self._w_bar_stats(counts, title, count, summed) 0632 if self.p.msgfmt: 0633 self._msg_simple_stats(title, count, summed) 0634 0635 # Output the table of catalogs which are not fully translated, 0636 # if requested. 0637 if self.p.incomplete and self.incomplete_catalogs: 0638 filenames = list(self.incomplete_catalogs.keys()) 0639 self._sort_equiv_filenames(filenames) 0640 data = [] 0641 # Column of catalog filenames. 0642 data.append(filenames) 0643 data.append([self.counts[x]["fuz"][0] for x in filenames]) 0644 data.append([self.counts[x]["unt"][0] for x in filenames]) 0645 data.append([x + y for x, y in zip(data[1], data[2])]) 0646 data.append([self.counts[x]["fuz"][1] for x in filenames]) 0647 data.append([self.counts[x]["unt"][1] for x in filenames]) 0648 data.append([x + y for x, y in zip(data[4], data[5])]) 0649 # Columns of the two added. 0650 # Column names and formats. 0651 coln = [_("@title:column", 0652 "catalog"), 0653 _("@title:column fuzzy messages", 0654 "msg/f"), 0655 _("@title:column untranslated messages", 0656 "msg/u"), 0657 _("@title:column fuzzy and untranslated messages", 0658 "msg/f+u"), 0659 _("@title:column words in fuzzy messages", 0660 "w/f"), 0661 _("@title:column words in untranslated messages", 0662 "w/u"), 0663 _("@title:column words in fuzzy and untranslated messages", 0664 "w/f+u")] 0665 maxfl = max([len(x) for x in filenames]) 0666 dfmt = ["%%-%ds" % maxfl, "%d", "%d", "%d", "%d", "%d", "%d"] 0667 # Output. 0668 report("-") 0669 report(tabulate(data, coln=coln, dfmt=dfmt, space=" ", none="-", 0670 colorize=True)) 0671 0672 # Write file names of catalogs which are not fully translated 0673 # into a file, if requested. 0674 if self.p.incompfile: 0675 filenames = sorted(self.incomplete_catalogs.keys()) 0676 cmdlenc = locale.getpreferredencoding() 0677 ofl = codecs.open(self.p.incompfile, "w", cmdlenc) 0678 ofl.writelines([x + "\n" for x in filenames]) 0679 ofl.close() 0680 0681 if modstrs: 0682 report(_("@item:intable", 0683 "modifiers: %(modlist)s", 0684 modlist=format_item_list(modstrs))) 0685 0686 0687 def _tabular_stats (self, counts, title, count): 0688 0689 # Order counts in tabular form. 0690 selected_cats = self.count_spec 0691 if False and self.p.incomplete: # skip this for the moment 0692 # Display only fuzzy and untranslated counts. 0693 selected_cats = (self.count_spec[1], self.count_spec[2]) 0694 # Skip display if complete. 0695 really_incomplete = True 0696 for tkey, tname in selected_cats: 0697 for col in range(self.counts_per_cat): 0698 if count[tkey][col] > 0: 0699 really_incomplete = False 0700 break 0701 if really_incomplete: 0702 return 0703 data = [[count[tkey][y] for tkey, tname in selected_cats] 0704 for y in range(self.counts_per_cat)] 0705 0706 # Derived data: messages/words completition ratios. 0707 for col, ins in ((0, 1), (1, 3)): 0708 compr = [] 0709 for tkey, tname in selected_cats: 0710 if tkey not in ("tot", "obs") and count["tot"][col] > 0: 0711 r = float(count[tkey][col]) / count["tot"][col] 0712 compr.append(r * 100) 0713 else: 0714 compr.append(None) 0715 data.insert(ins, compr) 0716 0717 if self.p.detail: 0718 # Derived data: word and character expansion factors. 0719 for o, t, ins, incsp in ((1, 2, 7, None), (3, 4, 8, (1, 2, 0.0))): 0720 ratio = [] 0721 for tkey, tname in selected_cats: 0722 if count[tkey][o] > 0 and count[tkey][t] > 0: 0723 inct, inco = 0.0, 0.0 0724 if incsp: 0725 co, ct, fact = incsp 0726 inco = (count[tkey][co] - 1) * fact 0727 inct = (count[tkey][ct] - 1) * fact 0728 r = (count[tkey][t] + inct) / (count[tkey][o] + inco) 0729 ratio.append((r - 1) * 100) 0730 else: 0731 ratio.append(None) 0732 data.insert(ins, ratio) 0733 0734 if self.p.detail: 0735 # Derived data: character/word ratio, word/message ratio. 0736 for w, c, ins in ((0, 1, 9), (0, 2, 10), (1, 3, 11), (2, 4, 12)): 0737 chpw = [] 0738 for tkey, tname in selected_cats: 0739 if count[tkey][w] > 0 and count[tkey][c] > 0: 0740 r = float(count[tkey][c]) / count[tkey][w] 0741 chpw.append(r) 0742 else: 0743 chpw.append(None) 0744 data.insert(ins, chpw) 0745 0746 # Row, column names and formats. 0747 rown = [tname for tkey, tname in selected_cats] 0748 coln = [_("@title:column messages", 0749 "msg"), 0750 _("@title:column percentage of total messages", 0751 "msg/tot"), 0752 _("@title:column words in original", 0753 "w-or"), 0754 _("@title:column percentage of words to total in original", 0755 "w/tot-or"), 0756 _("@title:column words in translation", 0757 "w-tr"), 0758 _("@title:column characters in original", 0759 "ch-or"), 0760 _("@title:column characters in translation", 0761 "ch-tr")] 0762 dfmt = ["%d", "%.1f%%", 0763 "%d", "%.1f%%", "%d", "%d", "%d"] 0764 if self.p.detail: 0765 coln.extend([_("@title:column word efficiency", 0766 "w-ef"), 0767 _("@title:column character efficiency", 0768 "ch-ef"), 0769 _("@title:column words per message in original", 0770 "w/msg-or"), 0771 _("@title:column words per message in translation", 0772 "w/msg-tr"), 0773 _("@title:column characters per message in original", 0774 "ch/w-or"), 0775 _("@title:column characters per message in translation", 0776 "ch/w-tr")]) 0777 dfmt.extend(["%+.1f%%", "%+.1f%%", 0778 "%.1f", "%.1f", "%.1f", "%.1f"]) 0779 0780 # Output the table. 0781 report(tabulate(data, rown=rown, coln=coln, dfmt=dfmt, 0782 space=" ", none="-", colorize=True)) 0783 0784 0785 def _msg_bar_stats (self, counts, title, count, summed): 0786 0787 self._bar_stats(counts, title, count, summed, 0788 _("@item:intable number of messages", 0789 "msgs"), 0790 0) 0791 0792 0793 def _w_bar_stats (self, counts, title, count, summed): 0794 0795 self._bar_stats(counts, title, count, summed, 0796 _("@item:intable number of words in original", 0797 "w-or"), 0798 1) 0799 0800 0801 def _bar_stats (self, counts, title, count, summed, dlabel, dcolumn): 0802 0803 # Count categories to display and chars/colors associated to them. 0804 # Note: Use only characters from Latin1. 0805 tspecs = (("trn", "×", "green"), 0806 ("fuz", "¤", "blue"), 0807 ("unt", "·", "red")) 0808 0809 # Find out maximum counts overall. 0810 maxcounts = dict(trn=0, fuz=0, unt=0, tot=0) 0811 maxcounts_jumbled = maxcounts.copy() 0812 for otitle, ocount, osummed in counts: 0813 # If absolute bars, compare counts only for non-summed counts. 0814 if self.p.absolute and osummed: 0815 continue 0816 0817 # Count both messages and words, for the number display padding. 0818 for tkey in maxcounts_jumbled: 0819 for dcol in (0, 1): 0820 c = ocount[tkey][dcol] 0821 if maxcounts_jumbled[tkey] < c: 0822 maxcounts_jumbled[tkey] = c 0823 0824 for tkey in maxcounts: 0825 c = ocount[tkey][dcolumn] 0826 if maxcounts[tkey] < c: 0827 maxcounts[tkey] = c 0828 0829 # Character widths of maximum count categories. 0830 maxcountscw = {} 0831 for tkey, tval in maxcounts.items(): 0832 maxcountscw[tkey] = len(str(tval)) 0833 maxcountscw_jumbled = {} 0834 for tkey, tval in maxcounts_jumbled.items(): 0835 maxcountscw_jumbled[tkey] = len(str(tval)) 0836 0837 # Formatted counts by disjunct categories. 0838 fmt_counts = [] 0839 for tkey, tchar, tcol in tspecs: 0840 cstr = str(count[tkey][dcolumn]) 0841 if cstr == "0": 0842 cstr = "-" 0843 cfmt = ("%%%ds" % maxcountscw_jumbled[tkey]) % cstr 0844 if tcol is not None: 0845 fmt_counts.append((ColorString("<%s>%%s</%s>") % (tcol, tcol)) 0846 % cfmt) 0847 else: 0848 fmt_counts.append(cfmt) 0849 fmt_counts = cjoin(fmt_counts, "/") 0850 0851 # Maximum and nominal bar widths in characters. 0852 # TODO: Make parameters. 0853 if self.inline: 0854 nombarcw = 20 0855 maxbarcw = 50 0856 else: 0857 nombarcw = 40 0858 maxbarcw = 80 0859 0860 def roundnear (x): 0861 return int(round(x, 0)) 0862 0863 def roundup (x): 0864 ix = int(x) 0865 if x - ix > 1e-16: 0866 ix += 1 0867 return ix 0868 0869 # Compute number of cells per category. 0870 n_cells = {} 0871 if self.p.absolute: 0872 # Absolute bar. 0873 n_per_cell = 0 0874 for npc in (1, 2, 5, 0875 10, 20, 50, 0876 100, 200, 500, 0877 1000, 2000, 5000, 0878 10000, 20000, 50000, 0879 100000, 200000, 500000): 0880 if npc * maxbarcw > maxcounts["tot"]: 0881 n_per_cell = npc 0882 break 0883 if not n_per_cell: 0884 warning(_("@info", 0885 "Count too large, cannot display bar graph.")) 0886 return 0887 for tkey, roundf in (("fuz", roundup), ("unt", roundup), 0888 ("tot", roundnear)): 0889 c = count[tkey][dcolumn] 0890 n_cells[tkey] = roundf(float(c) / n_per_cell) 0891 0892 # Correct the situation when there are no cells. 0893 if n_cells["tot"] < 1: 0894 n_cells["tot"] = 1 0895 0896 # Correct the situation when the sum of cells fuzzy+untranslated 0897 # goes over the total; give priority to untranslated when reducing. 0898 while n_cells["fuz"] + n_cells["unt"] > n_cells["tot"]: 0899 if n_cells["fuz"] >= n_cells["unt"]: 0900 n_cells["fuz"] -= 1 0901 else: 0902 n_cells["unt"] -= 1 0903 0904 n_cells["trn"] = n_cells["tot"] - n_cells["fuz"] - n_cells["unt"] 0905 0906 else: 0907 # Relative bar. 0908 if count["tot"][dcolumn] > 0: 0909 n_per_cell = float(nombarcw) / count["tot"][dcolumn] 0910 else: 0911 n_per_cell = 0 0912 for tkey in ("fuz", "unt"): 0913 c = count[tkey][dcolumn] 0914 n_cells[tkey] = roundup(c * n_per_cell) 0915 0916 # When there are almost none translated, it may have happened that 0917 # the sum of cells fuzzy+untranslated is over nominal; reduce. 0918 while n_cells["fuz"] + n_cells["unt"] > nombarcw: 0919 if n_cells["fuz"] >= n_cells["unt"]: 0920 n_cells["fuz"] -= 1 0921 else: 0922 n_cells["unt"] -= 1 0923 0924 n_cells["trn"] = nombarcw - n_cells["fuz"] - n_cells["unt"] 0925 0926 # Create the bar. 0927 fmt_bar = [] 0928 for tkey, tchar, tcol in tspecs: 0929 bar = tchar * n_cells[tkey] 0930 if tcol is not None: 0931 bar = (ColorString("<%s>%%s</%s>") % (tcol, tcol)) % bar 0932 fmt_bar.append(bar) 0933 fmt_bar = cjoin(fmt_bar) 0934 0935 # Assemble final output. 0936 if not self.p.absolute or not summed: 0937 if count["tot"][dcolumn] == 0: 0938 fmt_bar = "" 0939 report(cinterp("%s %s |%s|", fmt_counts, dlabel, fmt_bar)) 0940 else: 0941 report(cinterp("%s %s", fmt_counts, dlabel)) 0942 0943 0944 def _msg_simple_stats (self, title, count, summed): 0945 """ msgfmt-style report """ 0946 fmt_trn = n_("@item:intext", 0947 "%(num)d translated message", 0948 "%(num)d translated messages", 0949 num=count["trn"][0]) 0950 fmt_fuz = n_("@item:intext", 0951 "%(num)d fuzzy translation", 0952 "%(num)d fuzzy translations", 0953 num=count["fuz"][0]) 0954 fmt_unt = n_("@item:intext", 0955 "%(num)d untranslated message", 0956 "%(num)d untranslated messages", 0957 num=count["unt"][0]) 0958 report(_("@info composition of three previous messages", 0959 "%(trn)s, %(fuz)s, %(unt)s", 0960 trn=fmt_trn, fuz=fmt_fuz, unt=fmt_unt)) 0961