File indexing completed on 2024-10-27 08:25:08
0001 # -*- coding: UTF-8 -*- 0002 0003 """ 0004 Resolve aggregate messages produced by C{msgcat}. 0005 0006 Documented in C{doc/user/sieving.docbook}. 0007 0008 @author: Chusslove Illich (Часлав Илић) <caslav.ilic@gmx.net> 0009 @license: GPLv3 0010 """ 0011 0012 # DESIGN NOTE: 0013 # If one of the messages is missing one of the parts that others have, 0014 # that part is silently not added to the aggregation -- there is no explicit 0015 # indicator to tell that it was missing. 0016 # PO file names need not be unique either (if collected from a directory tree), 0017 # so it is not possible to deduce this from file names; likewise for project ID. 0018 # This means that there is no way to reconstruct complete original messages, 0019 # so each part has to be resolved independently. 0020 0021 import re 0022 0023 from pology import _, n_ 0024 from pology.header import Header 0025 from pology.message import Message 0026 from pology.report import report 0027 from pology.sieve import SieveError 0028 0029 0030 def setup_sieve (p): 0031 0032 p.set_desc(_("@info sieve discription", 0033 "Resolve aggregate messages produced by '%(cmd)s'.", 0034 cmd="msgcat" 0035 )) 0036 0037 p.add_param("first", bool, defval=False, 0038 desc=_("@info sieve parameter discription", 0039 "Always pick the first variant (by default, aggregate messages " 0040 "are resolved by taking the most frequent variant)." 0041 )) 0042 p.add_param("unfuzzy", bool, defval=False, 0043 desc=_("@info sieve parameter discription", 0044 "Unfuzzy resolved messages. " 0045 "DANGEROUS: Use only if all messages in aggregation can be guaranteed " 0046 "not to be fuzzy." 0047 )) 0048 p.add_param("keepsrc", bool, defval=False, 0049 desc=_("@info sieve parameter discription", 0050 "Keep source reference on resolved messages instead of removing them." 0051 )) 0052 0053 0054 class Sieve (object): 0055 0056 def __init__ (self, params): 0057 0058 exclusive_picks = [params.first] 0059 if sum(exclusive_picks) > 2: 0060 raise SieveError( 0061 _("@info", 0062 "Only one resolution criterion for " 0063 "aggregate messages can be given.")) 0064 0065 if params.first: 0066 self.selvar = _selvar_first 0067 else: 0068 self.selvar = _selvar_frequent 0069 0070 self.unfuzzy = params.unfuzzy 0071 self.keepsrc = params.keepsrc 0072 0073 self.nresolved = 0 0074 self.nresolvedhdr = 0 0075 0076 0077 def process_header (self, hdr, cat): 0078 0079 hmsg = Message(hdr.to_msg()) 0080 if _resolve_msg(hmsg, self.selvar): 0081 self.nresolvedhdr += 1 0082 cat.header = Header(hmsg) 0083 0084 0085 def process (self, msg, cat): 0086 0087 if _resolve_msg(msg, self.selvar): 0088 self.nresolved += 1 0089 if self.unfuzzy: 0090 msg.unfuzzy() 0091 if not self.keepsrc: 0092 msg.source[:] = [] 0093 0094 0095 def finalize (self): 0096 0097 if self.nresolvedhdr > 0: 0098 msg = n_("@info:progress", 0099 "Resolved %(num)d aggregate header.", 0100 "Resolved %(num)d aggregate headers.", 0101 num=self.nresolvedhdr) 0102 report("===== " + msg) 0103 if self.nresolved > 0: 0104 msg = n_("@info:progress", 0105 "Resolved %(num)d aggregate message.", 0106 "Resolved %(num)d aggregate messages.", 0107 num=self.nresolved) 0108 report("===== " + msg) 0109 0110 0111 def _selvar_first (texts): 0112 0113 return texts[0] 0114 0115 0116 def _selvar_frequent (texts): 0117 0118 tinds_by_text = {} 0119 for text, tind in zip(texts, list(range(len(texts)))): 0120 if text not in tinds_by_text: 0121 tinds_by_text[text] = [] 0122 tinds_by_text[text].append(tind) 0123 tinds = sorted(list(tinds_by_text.values()), key=lambda x: (-len(x), x)) 0124 0125 return texts[tinds[0][0]] 0126 0127 0128 def _resolve_msg (msg, selvar): 0129 0130 oldcount = msg.modcount 0131 0132 if msg.manual_comment: 0133 aggtext = "\n".join(msg.manual_comment) 0134 msg.manual_comment[:] = _resolve_aggtext(aggtext, selvar).split("\n") 0135 0136 if msg.auto_comment: 0137 aggtext = "\n".join(msg.auto_comment) 0138 msg.auto_comment[:] = _resolve_aggtext(aggtext, selvar).split("\n") 0139 0140 # Separator swallows trailing newline, put it based on msgid. 0141 need_trailing_nl = msg.msgid.endswith("\n") 0142 for i in range(len(msg.msgstr)): 0143 nmsgstr = _resolve_aggtext(msg.msgstr[i], selvar) 0144 if need_trailing_nl and nmsgstr != msg.msgstr[i]: 0145 nmsgstr += "\n" 0146 msg.msgstr[i] = nmsgstr 0147 0148 return msg.modcount > oldcount 0149 0150 0151 _splitter_rx = re.compile(r"\n?(?:#-){3,}# .*? (?:#-){3,}#\n?") 0152 0153 def _resolve_aggtext (aggtext, selvar): 0154 0155 texts = _splitter_rx.split(aggtext)[1:] 0156 return str(selvar(texts)) if texts else aggtext 0157