File indexing completed on 2024-04-21 16:29:13
0001 # -*- coding: UTF-8 -*- 0002 0003 """ 0004 Common functionality for poediff and poepatch scripts. 0005 0006 @author: Chusslove Illich (Часлав Илић) <caslav.ilic@gmx.net> 0007 @license: GPLv3 0008 0009 @warning: Non-public module. 0010 """ 0011 0012 import os 0013 import time 0014 0015 from pology import PologyError, _ 0016 from pology.catalog import Catalog 0017 import pology.config as pology_config 0018 from pology.diff import msg_ediff, tdiff 0019 from pology.merge import merge_pofile 0020 from pology.message import MessageUnsafe 0021 0022 0023 def _raise_no_inst (clssname): 0024 0025 raise PologyError( 0026 _("@info", 0027 "Class '%(clss)s' only provides static attributes, " 0028 "objects of this type cannot be constructed.", 0029 clss=clssname)) 0030 0031 0032 # FIXME: Define message part categories in message module. 0033 # Message part categories. 0034 class MPC: 0035 curr_fields = [ 0036 "msgctxt", "msgid", "msgid_plural", 0037 ] 0038 prev_fields = [x + "_previous" for x in curr_fields] 0039 currprev_fields = list(zip(curr_fields, prev_fields)) 0040 prevcurr_fields = list(zip(prev_fields, curr_fields)) 0041 0042 def __init__ (self): 0043 _raise_no_inst(self.__class__.__name__) 0044 0045 0046 # Syntax tokens in embedded diff catalogs. 0047 class EDST: 0048 hmsgctxt_field = "X-Ediff-Header-Context" # by spec 0049 hmsgctxt_el = "~" # by spec 0050 filerev_sep = " <<< " # by spec 0051 0052 def __init__ (self): 0053 _raise_no_inst(self.__class__.__name__) 0054 0055 0056 def msg_eq_fields (m1, m2, fields): 0057 0058 if (m1 is None) != (m2 is None): 0059 return False 0060 elif m1 is None and m2 is None: 0061 return True 0062 0063 for field in fields: 0064 if not isinstance(field, tuple): 0065 field = (field, field) 0066 if m1.get(field[0]) != m2.get(field[1]): 0067 return False 0068 0069 return True 0070 0071 0072 def msg_copy_fields (m1, m2, fields): 0073 0074 if m1 is None: 0075 m1 = MessageUnsafe() 0076 0077 for field in fields: 0078 if not isinstance(field, tuple): 0079 field = (field, field) 0080 setattr(m2, field[1], m1.get(field[0])) 0081 0082 0083 def msg_clear_prev_fields (m): 0084 0085 for field in MPC.prev_fields: 0086 setattr(m, field, None) 0087 0088 0089 # Remove previous fields if inconsistent with the message in total. 0090 def msg_cleanup (msg): 0091 0092 # Non-fuzzy messages should have no previous fields. 0093 # msgid_previous must be present, or there must be no previous fields. 0094 if not msg.fuzzy or msg.msgid_previous is None: 0095 for field in MPC.prev_fields: 0096 if msg.get(field) is not None: 0097 setattr(msg, field, None) 0098 0099 def diff_cats (cat1, cat2, ecat, 0100 merge=True, colorize=False, wrem=True, wadd=True, noobs=False, 0101 upprogf=None): 0102 0103 upprogf = upprogf or (lambda: None) 0104 0105 dpairs = _pair_msgs(cat1, cat2, merge, wrem, wadd, noobs, upprogf) 0106 0107 # Order pairings such that they follow order of messages in 0108 # the new catalog wherever the new message exists. 0109 # For unpaired old messages, do heuristic analysis of any 0110 # renamings of source files, and then insert diffed messages 0111 # according to source references of old messages. 0112 dpairs_by2 = [x for x in dpairs if x[1]] 0113 dpairs_by2.sort(key=lambda x: x[1].refentry) 0114 dpairs_by1 = [x for x in dpairs if not x[1]] 0115 fnsyn = None 0116 if dpairs_by1: 0117 fnsyn = cat2.detect_renamed_sources(cat1) 0118 0119 # Make the diffs. 0120 # Must not add diffed messages directly to global ediff catalog, 0121 # because then heuristic insertion would throw them all over. 0122 # Instead add to local ediff catalog, then copy in order to global. 0123 ndiffed = 0 0124 lecat = Catalog("", create=True, monitored=False) 0125 for cdpairs, cfnsyn in ((dpairs_by2, None), (dpairs_by1, fnsyn)): 0126 for msg1, msg2 in cdpairs: 0127 upprogf() 0128 ndiffed += _add_msg_diff(msg1, msg2, lecat, colorize, cfnsyn) 0129 for emsg in lecat: 0130 ecat.add(emsg, len(ecat)) 0131 0132 return ndiffed 0133 0134 0135 def cats_update_effort (cat1, cat2, upprogf=None): 0136 0137 upprogf = upprogf or (lambda: None) 0138 0139 dpairs = _pair_msgs(cat1, cat2, merge=True, wrem=False, wadd=True, 0140 noobs=False, upprogf=upprogf) 0141 0142 nntw_total = 0 0143 0144 for msg1, msg2 in dpairs: 0145 upprogf() 0146 0147 if not msg2.active: 0148 continue 0149 if msg1 is None: 0150 msg1 = MessageUnsafe() 0151 0152 # The update effort of the given old-new message pair is equal 0153 # to "nominal number of newly translated words" (NNTW), 0154 # which is defined as follows: 0155 # - nominal length of a word in msgid is set to 6 characters (WL). 0156 # - number of characters in new msgid is divided by WL 0157 # to give nominal number of words in new msgid (NWO) 0158 # - number of equal characters in old and new msgid is divided by WL 0159 # to give nominal number of equal words in msgid (NEWO) 0160 # - number of characters in new msgstr is divided by number of 0161 # characters in new msgid to give translation expansion factor (EF) 0162 # - number of equal characters in old and new msgstr is divided 0163 # by WL*EF to give nominal number of equal words in msgstr (NEWT) 0164 # - character-based similarity ratio of old and new msgid 0165 # (from 0.0 for no similarity to 1.0 for equality) is computed (SRO) 0166 # - character-based similarity ratio of old and new msgstr 0167 # is computed (SRT) 0168 # - similarity ratio threshold is set to 0.5 (SRB) 0169 # - reduction due to similiarity factor is computed as 0170 # RSF = (min(SRO, SRT) - SRB) / (1 - SRB) 0171 # - nominal number of newly translated words is computed as 0172 # NNTW = min(NWO - max(NEWO, NEWT) * RSF, NWO) 0173 # 0174 # Only those pairs where the new message is active are counted in. 0175 # 0176 # On plural messages, for the moment only msgid and msgstr[0] 0177 # are considered, and the above procedured applied to them. 0178 # This underestimates the effort of updating a new plural message 0179 # when old message was ordinary. 0180 0181 wl = 6.0 0182 nwo = len(msg2.msgid) / wl 0183 diffo, dro = tdiff(msg1.msgid, msg2.msgid, diffr=True) 0184 newo = len([c for t, c in diffo if t == " "]) / wl 0185 ef = float(len(msg2.msgstr[0])) / len(msg2.msgid) 0186 difft, drt = tdiff(msg1.msgstr[0], msg2.msgstr[0], diffr=True) 0187 newt = len([c for t, c in difft if t == " "]) / (wl * ef) 0188 sro = 1.0 - dro 0189 srt = 1.0 - drt 0190 srb = 0.5 0191 rsf = (min(sro, srt) - srb) / (1.0 - srb) 0192 nntw = max(min(nwo - max(newo, newt) * rsf, nwo), 0.0) 0193 0194 nntw_total += nntw 0195 0196 return nntw_total 0197 0198 0199 def _calc_text_update_effort (text1, text2): 0200 0201 dr1 = 0.5 0202 ediff, dr = word_ediff(text1, text2, markup=True, diffr=True) 0203 0204 0205 0206 def _pair_msgs (cat1, cat2, 0207 merge=True, wrem=True, wadd=True, noobs=False, 0208 upprogf=None): 0209 0210 upprogf = upprogf or (lambda: None) 0211 0212 # Remove obsolete messages if they are not to be diffed. 0213 if noobs: 0214 for cat in (cat1, cat2): 0215 _rmobs_no_sync(cat) 0216 0217 # Clean up inconsistencies in messages. 0218 for cat in (cat1, cat2): 0219 for msg in cat: 0220 upprogf() 0221 msg_cleanup(msg) 0222 0223 # Delay inverting of catalogs until necessary. 0224 def icat_w (cat, icat_pack): 0225 if icat_pack[0] is None: 0226 #print("===> inverting: %s" % cat.filename) 0227 icat = Catalog("", create=True, monitored=False) 0228 for msg in cat: 0229 upprogf() 0230 imsg = _msg_invert_cp(msg) 0231 if imsg not in icat: 0232 icat.add_last(imsg) 0233 icat_pack[0] = icat 0234 return icat_pack[0] 0235 0236 icat1_pack = [None] 0237 icat1 = lambda: icat_w(cat1, icat1_pack) 0238 0239 icat2_pack = [None] 0240 icat2 = lambda: icat_w(cat2, icat2_pack) 0241 0242 # Delay merging of catalogs until necessary. 0243 def mcat_w (cat1, cat2, mcat_pack): 0244 if mcat_pack[0] is None: 0245 #print("===> merging: %s -> %s" % (cat1.filename, cat2.filename)) 0246 # Merge is done if requested and both catalogs exist. 0247 if merge and not cat1.created() and not cat2.created(): 0248 mcat_pack[0] = merge_pofile(cat1.filename, cat2.filename, 0249 getcat=True, monitored=False, 0250 quiet=True, abort=True) 0251 if noobs: 0252 _rmobs_no_sync(mcat_pack[0]) 0253 else: 0254 mcat_pack[0] = {} # only tested for membership 0255 return mcat_pack[0] 0256 0257 mcat12_pack = [None] 0258 mcat12 = lambda: mcat_w(cat1, cat2, mcat12_pack) 0259 0260 mcat21_pack = [None] 0261 mcat21 = lambda: mcat_w(cat2, cat1, mcat21_pack) 0262 0263 # Pair messages: 0264 # - first try to find an old message for each new 0265 # - then try to find a new message for each unpaired old 0266 # - finally add remaining unpaired messages to be diffed with None 0267 msgs1_paired = set() 0268 msgs2_paired = set() 0269 dpairs = [] 0270 0271 for msg2 in cat2: 0272 upprogf() 0273 msg1 = _get_msg_pair(msg2, cat1, icat1, mcat12) 0274 if msg1 and msg1 not in msgs1_paired: 0275 # Record pairing. 0276 msgs1_paired.add(msg1) 0277 msgs2_paired.add(msg2) 0278 dpairs.append((msg1, msg2)) 0279 0280 for msg1 in cat1: 0281 upprogf() 0282 if msg1 in msgs1_paired: 0283 continue 0284 msg2 = _get_msg_pair(msg1, cat2, icat2, mcat21) 0285 if msg2 and msg2 not in msgs2_paired: 0286 # Record pairing. 0287 msgs1_paired.add(msg1) 0288 msgs2_paired.add(msg2) 0289 dpairs.append((msg1, msg2)) 0290 0291 for msg2 in (wadd and cat2 or []): 0292 upprogf() 0293 if msg2 not in msgs2_paired: 0294 dpairs.append((None, msg2)) 0295 0296 for msg1 in (wrem and cat1 or []): 0297 upprogf() 0298 if msg1 not in msgs1_paired: 0299 dpairs.append((msg1, None)) 0300 0301 return dpairs 0302 0303 0304 def _rmobs_no_sync (cat): 0305 0306 for msg in cat: 0307 if msg.obsolete: 0308 cat.remove_on_sync(msg) 0309 cat.sync_map() 0310 0311 0312 # Determine the pair of the message in the catalog, if any. 0313 def _get_msg_pair (msg, ocat, icat, mcat): 0314 0315 # If no direct match, try pivoting around any previous fields. 0316 # Iterate through test catalogs in this order, 0317 # to delay construction of those which are not necessary. 0318 for tcat in (ocat, icat, mcat): 0319 if callable(tcat): 0320 tcat = tcat() 0321 omsg = tcat.get(msg) 0322 if not omsg and msg.fuzzy: 0323 omsg = tcat.get(_msg_invert_cp(msg)) 0324 if tcat is not ocat: # tcat is one of pivot catalogs 0325 omsg = ocat.get(_msg_invert_cp(omsg)) 0326 if omsg: 0327 break 0328 0329 return omsg 0330 0331 0332 # Out of a message with previous fields, 0333 # construct a lightweight message with previous and current fields exchanged. 0334 # If there are no previous fields, return None. 0335 # To be used only for lookups 0336 def _msg_invert_cp (msg): 0337 0338 if msg is None: 0339 return None 0340 0341 lmsg = MessageUnsafe() 0342 if msg.key_previous is not None: 0343 # Need to invert only key fields, but whadda hell. 0344 for fcurr, fprev in MPC.currprev_fields: 0345 setattr(lmsg, fcurr, msg.get(fprev)) 0346 setattr(lmsg, fprev, msg.get(fcurr)) 0347 else: 0348 return lmsg.set_key(msg) 0349 0350 return lmsg 0351 0352 0353 def _add_msg_diff (msg1, msg2, ecat, colorize, fnsyn=None): 0354 0355 # Skip diffing if old and new messages are "same". 0356 if msg1 and msg2 and msg1.inv == msg2.inv: 0357 return 0 0358 0359 # Create messages for special pairings. 0360 msg1_s, msg2_s = _create_special_diff_pair(msg1, msg2) 0361 0362 # Create the diff. 0363 tmsg = msg2 or msg1 0364 emsg = msg2_s or msg1_s 0365 if emsg is tmsg: 0366 emsg = MessageUnsafe(tmsg) 0367 emsg = msg_ediff(msg1_s, msg2_s, emsg=emsg, ecat=ecat, colorize=colorize) 0368 0369 # Add to the diff catalog. 0370 if fnsyn is None: 0371 ecat.add(emsg, len(ecat)) 0372 else: 0373 ecat.add(emsg, srefsyn=fnsyn) 0374 0375 return 1 0376 0377 0378 def _create_special_diff_pair (msg1, msg2): 0379 0380 msg1_s, msg2_s = msg1, msg2 0381 0382 if not msg1 or not msg2: 0383 # No special cases if either message non-existant. 0384 pass 0385 0386 # Cases f-nf-*. 0387 elif msg1.fuzzy and msg1.key_previous is not None and not msg2.fuzzy: 0388 # Case f-nf-ecc. 0389 if msg_eq_fields(msg1, msg2, MPC.curr_fields): 0390 msg1_s = MessageUnsafe(msg1) 0391 msg_copy_fields(msg1, msg1_s, MPC.prevcurr_fields) 0392 msg_clear_prev_fields(msg1_s) 0393 # Case f-nf-necc. 0394 else: 0395 msg1_s = MessageUnsafe(msg1) 0396 msg2_s = MessageUnsafe(msg2) 0397 msg_copy_fields(msg1, msg1_s, MPC.prevcurr_fields) 0398 msg_copy_fields(msg1, msg2_s, MPC.currprev_fields) 0399 0400 # Cases nf-f-*. 0401 elif not msg1.fuzzy and msg2.fuzzy and msg2.key_previous is not None: 0402 # Case nf-f-ecp. 0403 if msg_eq_fields(msg1, msg2, MPC.currprev_fields): 0404 msg2_s = MessageUnsafe(msg2) 0405 msg_clear_prev_fields(msg2_s) 0406 # Case nf-f-necp. 0407 else: 0408 msg1_s = MessageUnsafe(msg1) 0409 msg2_s = MessageUnsafe(msg2) 0410 msg_copy_fields(msg2, msg1_s, MPC.prev_fields) 0411 msg_copy_fields(msg2, msg2_s, MPC.currprev_fields) 0412 0413 return msg1_s, msg2_s 0414 0415 0416 def diff_hdrs (hdr1, hdr2, vpath1, vpath2, hmsgctxt, ecat, colorize): 0417 0418 hmsg1, hmsg2 = [x and MessageUnsafe(x.to_msg()) or None 0419 for x in (hdr1, hdr2)] 0420 0421 ehmsg = hmsg2 and MessageUnsafe(hmsg2) or None 0422 ehmsg, dr = msg_ediff(hmsg1, hmsg2, emsg=ehmsg, ecat=ecat, 0423 colorize=colorize, diffr=True) 0424 if dr == 0.0: 0425 # Revert to empty message if no difference between headers. 0426 ehmsg = MessageUnsafe() 0427 0428 # Add visual paths as old/new segments into msgid. 0429 vpaths = [vpath1, vpath2] 0430 # Always use slashes as path separator, for portability of ediffs. 0431 vpaths = [x.replace(os.path.sep, "/") for x in vpaths] 0432 ehmsg.msgid = "- %s\n+ %s" % tuple(vpaths) 0433 # Add trailing newline if msgstr has it, again to appease msgfmt. 0434 if ehmsg.msgstr[0].endswith("\n"): 0435 ehmsg.msgid += "\n" 0436 0437 # Add context identifying the diffed message as header. 0438 ehmsg.msgctxt = hmsgctxt 0439 0440 # Add conspicuous separator at the top of the header. 0441 ehmsg.manual_comment.insert(0, "=" * 76) 0442 0443 return ehmsg, dr > 0.0 0444 0445 0446 def init_ediff_header (ehdr, hmsgctxt=EDST.hmsgctxt_el, extitle=None): 0447 0448 cfgsec = pology_config.section("user") 0449 user = cfgsec.string("name", "J. Random Translator") 0450 email = cfgsec.string("email", None) 0451 0452 listtype = type(ehdr.title) 0453 0454 if extitle is not None: 0455 title = "+- ediff (%s) -+" % extitle 0456 else: 0457 title = "+- ediff -+" 0458 ehdr.title = listtype([title]) 0459 0460 year = time.strftime("%Y") 0461 if email: 0462 author = "%s <%s>, %s." % (user, email, year) 0463 else: 0464 author = "%s, %s." % (user, year) 0465 #ehdr.author = listtype([author]) 0466 ehdr.author = listtype([]) 0467 0468 ehdr.copyright = "" 0469 ehdr.license = "" 0470 ehdr.comment = listtype() 0471 0472 rfv = ehdr.replace_field_value # shortcut 0473 0474 rfv("Project-Id-Version", "ediff") 0475 ehdr.remove_field("Report-Msgid-Bugs-To") 0476 ehdr.remove_field("POT-Creation-Date") 0477 rfv("PO-Revision-Date", str(time.strftime("%Y-%m-%d %H:%M%z"))) 0478 enc = "UTF-8" # strictly, input catalogs may have different encodings 0479 rfv("Content-Type", "text/plain; charset=%s" % enc) 0480 rfv("Content-Transfer-Encoding", "8bit") 0481 if email: 0482 translator = "%s <%s>" % (user, email) 0483 else: 0484 translator = "%s" % user 0485 rfv("Last-Translator", translator) 0486 rfv("Language-Team", "Differs") 0487 # FIXME: Something smarter? (Not trivial.) 0488 ehdr.remove_field("Plural-Forms") 0489 0490 # Context of header messages in the catalog. 0491 ehdr.set_field(EDST.hmsgctxt_field, hmsgctxt) 0492 0493 0494 def get_msgctxt_for_headers (cat): 0495 0496 hmsgctxt = "" 0497 good = False 0498 while not good: 0499 hmsgctxt += EDST.hmsgctxt_el 0500 good = True 0501 for msg in cat: 0502 if hmsgctxt == msg.msgctxt: 0503 good = False 0504 break 0505 0506 return hmsgctxt 0507 0508