File indexing completed on 2024-04-21 16:29:13

0001 # -*- coding: UTF-8 -*-
0002 
0003 """
0004 Common functionality for poediff and poepatch scripts.
0005 
0006 @author: Chusslove Illich (Часлав Илић) <caslav.ilic@gmx.net>
0007 @license: GPLv3
0008 
0009 @warning: Non-public module.
0010 """
0011 
0012 import os
0013 import time
0014 
0015 from pology import PologyError, _
0016 from pology.catalog import Catalog
0017 import pology.config as pology_config
0018 from pology.diff import msg_ediff, tdiff
0019 from pology.merge import merge_pofile
0020 from pology.message import MessageUnsafe
0021 
0022 
0023 def _raise_no_inst (clssname):
0024 
0025     raise PologyError(
0026         _("@info",
0027           "Class '%(clss)s' only provides static attributes, "
0028           "objects of this type cannot be constructed.",
0029           clss=clssname))
0030 
0031 
0032 # FIXME: Define message part categories in message module.
0033 # Message part categories.
0034 class MPC:
0035     curr_fields = [
0036         "msgctxt", "msgid", "msgid_plural",
0037     ]
0038     prev_fields = [x + "_previous" for x in curr_fields]
0039     currprev_fields = list(zip(curr_fields, prev_fields))
0040     prevcurr_fields = list(zip(prev_fields, curr_fields))
0041 
0042     def __init__ (self):
0043         _raise_no_inst(self.__class__.__name__)
0044 
0045 
0046 # Syntax tokens in embedded diff catalogs.
0047 class EDST:
0048     hmsgctxt_field = "X-Ediff-Header-Context" # by spec
0049     hmsgctxt_el = "~" # by spec
0050     filerev_sep = " <<< " # by spec
0051 
0052     def __init__ (self):
0053         _raise_no_inst(self.__class__.__name__)
0054 
0055 
0056 def msg_eq_fields (m1, m2, fields):
0057 
0058     if (m1 is None) != (m2 is None):
0059         return False
0060     elif m1 is None and m2 is None:
0061         return True
0062 
0063     for field in fields:
0064         if not isinstance(field, tuple):
0065             field = (field, field)
0066         if m1.get(field[0]) != m2.get(field[1]):
0067             return False
0068 
0069     return True
0070 
0071 
0072 def msg_copy_fields (m1, m2, fields):
0073 
0074     if m1 is None:
0075         m1 = MessageUnsafe()
0076 
0077     for field in fields:
0078         if not isinstance(field, tuple):
0079             field = (field, field)
0080         setattr(m2, field[1], m1.get(field[0]))
0081 
0082 
0083 def msg_clear_prev_fields (m):
0084 
0085     for field in MPC.prev_fields:
0086         setattr(m, field, None)
0087 
0088 
0089 # Remove previous fields if inconsistent with the message in total.
0090 def msg_cleanup (msg):
0091 
0092     # Non-fuzzy messages should have no previous fields.
0093     # msgid_previous must be present, or there must be no previous fields.
0094     if not msg.fuzzy or msg.msgid_previous is None:
0095         for field in MPC.prev_fields:
0096             if msg.get(field) is not None:
0097                 setattr(msg, field, None)
0098 
0099 def diff_cats (cat1, cat2, ecat,
0100                merge=True, colorize=False, wrem=True, wadd=True, noobs=False,
0101                upprogf=None):
0102 
0103     upprogf = upprogf or (lambda: None)
0104 
0105     dpairs = _pair_msgs(cat1, cat2, merge, wrem, wadd, noobs, upprogf)
0106 
0107     # Order pairings such that they follow order of messages in
0108     # the new catalog wherever the new message exists.
0109     # For unpaired old messages, do heuristic analysis of any
0110     # renamings of source files, and then insert diffed messages
0111     # according to source references of old messages.
0112     dpairs_by2 = [x for x in dpairs if x[1]]
0113     dpairs_by2.sort(key=lambda x: x[1].refentry)
0114     dpairs_by1 = [x for x in dpairs if not x[1]]
0115     fnsyn = None
0116     if dpairs_by1:
0117         fnsyn = cat2.detect_renamed_sources(cat1)
0118 
0119     # Make the diffs.
0120     # Must not add diffed messages directly to global ediff catalog,
0121     # because then heuristic insertion would throw them all over.
0122     # Instead add to local ediff catalog, then copy in order to global.
0123     ndiffed = 0
0124     lecat = Catalog("", create=True, monitored=False)
0125     for cdpairs, cfnsyn in ((dpairs_by2, None), (dpairs_by1, fnsyn)):
0126         for msg1, msg2 in cdpairs:
0127             upprogf()
0128             ndiffed += _add_msg_diff(msg1, msg2, lecat, colorize, cfnsyn)
0129     for emsg in lecat:
0130         ecat.add(emsg, len(ecat))
0131 
0132     return ndiffed
0133 
0134 
0135 def cats_update_effort (cat1, cat2, upprogf=None):
0136 
0137     upprogf = upprogf or (lambda: None)
0138 
0139     dpairs = _pair_msgs(cat1, cat2, merge=True, wrem=False, wadd=True,
0140                         noobs=False, upprogf=upprogf)
0141 
0142     nntw_total = 0
0143 
0144     for msg1, msg2 in dpairs:
0145         upprogf()
0146 
0147         if not msg2.active:
0148             continue
0149         if msg1 is None:
0150             msg1 = MessageUnsafe()
0151 
0152         # The update effort of the given old-new message pair is equal
0153         # to "nominal number of newly translated words" (NNTW),
0154         # which is defined as follows:
0155         # - nominal length of a word in msgid is set to 6 characters (WL).
0156         # - number of characters in new msgid is divided by WL
0157         #   to give nominal number of words in new msgid (NWO)
0158         # - number of equal characters in old and new msgid is divided by WL
0159         #   to give nominal number of equal words in msgid (NEWO)
0160         # - number of characters in new msgstr is divided by number of
0161         #   characters in new msgid to give translation expansion factor (EF)
0162         # - number of equal characters in old and new msgstr is divided
0163         #   by WL*EF to give nominal number of equal words in msgstr (NEWT)
0164         # - character-based similarity ratio of old and new msgid
0165         #   (from 0.0 for no similarity to 1.0 for equality) is computed (SRO)
0166         # - character-based similarity ratio of old and new msgstr
0167         #   is computed (SRT)
0168         # - similarity ratio threshold is set to 0.5 (SRB)
0169         # - reduction due to similiarity factor is computed as
0170         #   RSF = (min(SRO, SRT) - SRB) / (1 - SRB)
0171         # - nominal number of newly translated words is computed as
0172         #   NNTW = min(NWO - max(NEWO, NEWT) * RSF, NWO)
0173         #
0174         # Only those pairs where the new message is active are counted in.
0175         #
0176         # On plural messages, for the moment only msgid and msgstr[0]
0177         # are considered, and the above procedured applied to them.
0178         # This underestimates the effort of updating a new plural message
0179         # when old message was ordinary.
0180 
0181         wl = 6.0
0182         nwo = len(msg2.msgid) / wl
0183         diffo, dro = tdiff(msg1.msgid, msg2.msgid, diffr=True)
0184         newo = len([c for t, c in diffo if t == " "]) / wl
0185         ef = float(len(msg2.msgstr[0])) / len(msg2.msgid)
0186         difft, drt = tdiff(msg1.msgstr[0], msg2.msgstr[0], diffr=True)
0187         newt = len([c for t, c in difft if t == " "]) / (wl * ef)
0188         sro = 1.0 - dro
0189         srt = 1.0 - drt
0190         srb = 0.5
0191         rsf = (min(sro, srt) - srb) / (1.0 - srb)
0192         nntw = max(min(nwo - max(newo, newt) * rsf, nwo), 0.0)
0193 
0194         nntw_total += nntw
0195 
0196     return nntw_total
0197 
0198 
0199 def _calc_text_update_effort (text1, text2):
0200 
0201     dr1 = 0.5
0202     ediff, dr = word_ediff(text1, text2, markup=True, diffr=True)
0203 
0204 
0205 
0206 def _pair_msgs (cat1, cat2,
0207                 merge=True, wrem=True, wadd=True, noobs=False,
0208                 upprogf=None):
0209 
0210     upprogf = upprogf or (lambda: None)
0211 
0212     # Remove obsolete messages if they are not to be diffed.
0213     if noobs:
0214         for cat in (cat1, cat2):
0215             _rmobs_no_sync(cat)
0216 
0217     # Clean up inconsistencies in messages.
0218     for cat in (cat1, cat2):
0219         for msg in cat:
0220             upprogf()
0221             msg_cleanup(msg)
0222 
0223     # Delay inverting of catalogs until necessary.
0224     def icat_w (cat, icat_pack):
0225         if icat_pack[0] is None:
0226             #print("===> inverting: %s" % cat.filename)
0227             icat = Catalog("", create=True, monitored=False)
0228             for msg in cat:
0229                 upprogf()
0230                 imsg = _msg_invert_cp(msg)
0231                 if imsg not in icat:
0232                     icat.add_last(imsg)
0233             icat_pack[0] = icat
0234         return icat_pack[0]
0235 
0236     icat1_pack = [None]
0237     icat1 = lambda: icat_w(cat1, icat1_pack)
0238 
0239     icat2_pack = [None]
0240     icat2 = lambda: icat_w(cat2, icat2_pack)
0241 
0242     # Delay merging of catalogs until necessary.
0243     def mcat_w (cat1, cat2, mcat_pack):
0244         if mcat_pack[0] is None:
0245             #print("===> merging: %s -> %s" % (cat1.filename, cat2.filename))
0246             # Merge is done if requested and both catalogs exist.
0247             if merge and not cat1.created() and not cat2.created():
0248                 mcat_pack[0] = merge_pofile(cat1.filename, cat2.filename,
0249                                             getcat=True, monitored=False,
0250                                             quiet=True, abort=True)
0251                 if noobs:
0252                     _rmobs_no_sync(mcat_pack[0])
0253             else:
0254                 mcat_pack[0] = {} # only tested for membership
0255         return mcat_pack[0]
0256 
0257     mcat12_pack = [None]
0258     mcat12 = lambda: mcat_w(cat1, cat2, mcat12_pack)
0259 
0260     mcat21_pack = [None]
0261     mcat21 = lambda: mcat_w(cat2, cat1, mcat21_pack)
0262 
0263     # Pair messages:
0264     # - first try to find an old message for each new
0265     # - then try to find a new message for each unpaired old
0266     # - finally add remaining unpaired messages to be diffed with None
0267     msgs1_paired = set()
0268     msgs2_paired = set()
0269     dpairs = []
0270 
0271     for msg2 in cat2:
0272         upprogf()
0273         msg1 = _get_msg_pair(msg2, cat1, icat1, mcat12)
0274         if msg1 and msg1 not in msgs1_paired:
0275             # Record pairing.
0276             msgs1_paired.add(msg1)
0277             msgs2_paired.add(msg2)
0278             dpairs.append((msg1, msg2))
0279 
0280     for msg1 in cat1:
0281         upprogf()
0282         if msg1 in msgs1_paired:
0283             continue
0284         msg2 = _get_msg_pair(msg1, cat2, icat2, mcat21)
0285         if msg2 and msg2 not in msgs2_paired:
0286             # Record pairing.
0287             msgs1_paired.add(msg1)
0288             msgs2_paired.add(msg2)
0289             dpairs.append((msg1, msg2))
0290 
0291     for msg2 in (wadd and cat2 or []):
0292         upprogf()
0293         if msg2 not in msgs2_paired:
0294             dpairs.append((None, msg2))
0295 
0296     for msg1 in (wrem and cat1 or []):
0297         upprogf()
0298         if msg1 not in msgs1_paired:
0299             dpairs.append((msg1, None))
0300 
0301     return dpairs
0302 
0303 
0304 def _rmobs_no_sync (cat):
0305 
0306     for msg in cat:
0307         if msg.obsolete:
0308             cat.remove_on_sync(msg)
0309     cat.sync_map()
0310 
0311 
0312 # Determine the pair of the message in the catalog, if any.
0313 def _get_msg_pair (msg, ocat, icat, mcat):
0314 
0315     # If no direct match, try pivoting around any previous fields.
0316     # Iterate through test catalogs in this order,
0317     # to delay construction of those which are not necessary.
0318     for tcat in (ocat, icat, mcat):
0319         if callable(tcat):
0320             tcat = tcat()
0321         omsg = tcat.get(msg)
0322         if not omsg and msg.fuzzy:
0323             omsg = tcat.get(_msg_invert_cp(msg))
0324         if tcat is not ocat: # tcat is one of pivot catalogs
0325             omsg = ocat.get(_msg_invert_cp(omsg))
0326         if omsg:
0327             break
0328 
0329     return omsg
0330 
0331 
0332 # Out of a message with previous fields,
0333 # construct a lightweight message with previous and current fields exchanged.
0334 # If there are no previous fields, return None.
0335 # To be used only for lookups
0336 def _msg_invert_cp (msg):
0337 
0338     if msg is None:
0339         return None
0340 
0341     lmsg = MessageUnsafe()
0342     if msg.key_previous is not None:
0343         # Need to invert only key fields, but whadda hell.
0344         for fcurr, fprev in MPC.currprev_fields:
0345             setattr(lmsg, fcurr, msg.get(fprev))
0346             setattr(lmsg, fprev, msg.get(fcurr))
0347     else:
0348         return lmsg.set_key(msg)
0349 
0350     return lmsg
0351 
0352 
0353 def _add_msg_diff (msg1, msg2, ecat, colorize, fnsyn=None):
0354 
0355     # Skip diffing if old and new messages are "same".
0356     if msg1 and msg2 and msg1.inv == msg2.inv:
0357         return 0
0358 
0359     # Create messages for special pairings.
0360     msg1_s, msg2_s = _create_special_diff_pair(msg1, msg2)
0361 
0362     # Create the diff.
0363     tmsg = msg2 or msg1
0364     emsg = msg2_s or msg1_s
0365     if emsg is tmsg:
0366         emsg = MessageUnsafe(tmsg)
0367     emsg = msg_ediff(msg1_s, msg2_s, emsg=emsg, ecat=ecat, colorize=colorize)
0368 
0369     # Add to the diff catalog.
0370     if fnsyn is None:
0371         ecat.add(emsg, len(ecat))
0372     else:
0373         ecat.add(emsg, srefsyn=fnsyn)
0374 
0375     return 1
0376 
0377 
0378 def _create_special_diff_pair (msg1, msg2):
0379 
0380     msg1_s, msg2_s = msg1, msg2
0381 
0382     if not msg1 or not msg2:
0383         # No special cases if either message non-existant.
0384         pass
0385 
0386     # Cases f-nf-*.
0387     elif msg1.fuzzy and msg1.key_previous is not None and not msg2.fuzzy:
0388         # Case f-nf-ecc.
0389         if msg_eq_fields(msg1, msg2, MPC.curr_fields):
0390             msg1_s = MessageUnsafe(msg1)
0391             msg_copy_fields(msg1, msg1_s, MPC.prevcurr_fields)
0392             msg_clear_prev_fields(msg1_s)
0393         # Case f-nf-necc.
0394         else:
0395             msg1_s = MessageUnsafe(msg1)
0396             msg2_s = MessageUnsafe(msg2)
0397             msg_copy_fields(msg1, msg1_s, MPC.prevcurr_fields)
0398             msg_copy_fields(msg1, msg2_s, MPC.currprev_fields)
0399 
0400     # Cases nf-f-*.
0401     elif not msg1.fuzzy and msg2.fuzzy and msg2.key_previous is not None:
0402         # Case nf-f-ecp.
0403         if msg_eq_fields(msg1, msg2, MPC.currprev_fields):
0404             msg2_s = MessageUnsafe(msg2)
0405             msg_clear_prev_fields(msg2_s)
0406         # Case nf-f-necp.
0407         else:
0408             msg1_s = MessageUnsafe(msg1)
0409             msg2_s = MessageUnsafe(msg2)
0410             msg_copy_fields(msg2, msg1_s, MPC.prev_fields)
0411             msg_copy_fields(msg2, msg2_s, MPC.currprev_fields)
0412 
0413     return msg1_s, msg2_s
0414 
0415 
0416 def diff_hdrs (hdr1, hdr2, vpath1, vpath2, hmsgctxt, ecat, colorize):
0417 
0418     hmsg1, hmsg2 = [x and MessageUnsafe(x.to_msg()) or None
0419                     for x in (hdr1, hdr2)]
0420 
0421     ehmsg = hmsg2 and MessageUnsafe(hmsg2) or None
0422     ehmsg, dr = msg_ediff(hmsg1, hmsg2, emsg=ehmsg, ecat=ecat,
0423                           colorize=colorize, diffr=True)
0424     if dr == 0.0:
0425         # Revert to empty message if no difference between headers.
0426         ehmsg = MessageUnsafe()
0427 
0428     # Add visual paths as old/new segments into msgid.
0429     vpaths = [vpath1, vpath2]
0430     # Always use slashes as path separator, for portability of ediffs.
0431     vpaths = [x.replace(os.path.sep, "/") for x in vpaths]
0432     ehmsg.msgid = "- %s\n+ %s" % tuple(vpaths)
0433     # Add trailing newline if msgstr has it, again to appease msgfmt.
0434     if ehmsg.msgstr[0].endswith("\n"):
0435         ehmsg.msgid += "\n"
0436 
0437     # Add context identifying the diffed message as header.
0438     ehmsg.msgctxt = hmsgctxt
0439 
0440     # Add conspicuous separator at the top of the header.
0441     ehmsg.manual_comment.insert(0, "=" * 76)
0442 
0443     return ehmsg, dr > 0.0
0444 
0445 
0446 def init_ediff_header (ehdr, hmsgctxt=EDST.hmsgctxt_el, extitle=None):
0447 
0448     cfgsec = pology_config.section("user")
0449     user = cfgsec.string("name", "J. Random Translator")
0450     email = cfgsec.string("email", None)
0451 
0452     listtype = type(ehdr.title)
0453 
0454     if extitle is not None:
0455         title = "+- ediff (%s) -+" % extitle
0456     else:
0457         title = "+- ediff -+"
0458     ehdr.title = listtype([title])
0459 
0460     year = time.strftime("%Y")
0461     if email:
0462         author = "%s <%s>, %s." % (user, email, year)
0463     else:
0464         author = "%s, %s." % (user, year)
0465     #ehdr.author = listtype([author])
0466     ehdr.author = listtype([])
0467 
0468     ehdr.copyright = ""
0469     ehdr.license = ""
0470     ehdr.comment = listtype()
0471 
0472     rfv = ehdr.replace_field_value # shortcut
0473 
0474     rfv("Project-Id-Version", "ediff")
0475     ehdr.remove_field("Report-Msgid-Bugs-To")
0476     ehdr.remove_field("POT-Creation-Date")
0477     rfv("PO-Revision-Date", str(time.strftime("%Y-%m-%d %H:%M%z")))
0478     enc = "UTF-8" # strictly, input catalogs may have different encodings
0479     rfv("Content-Type", "text/plain; charset=%s" % enc)
0480     rfv("Content-Transfer-Encoding", "8bit")
0481     if email:
0482         translator = "%s <%s>" % (user, email)
0483     else:
0484         translator = "%s" % user
0485     rfv("Last-Translator", translator)
0486     rfv("Language-Team", "Differs")
0487     # FIXME: Something smarter? (Not trivial.)
0488     ehdr.remove_field("Plural-Forms")
0489 
0490     # Context of header messages in the catalog.
0491     ehdr.set_field(EDST.hmsgctxt_field, hmsgctxt)
0492 
0493 
0494 def get_msgctxt_for_headers (cat):
0495 
0496     hmsgctxt = ""
0497     good = False
0498     while not good:
0499         hmsgctxt += EDST.hmsgctxt_el
0500         good = True
0501         for msg in cat:
0502             if hmsgctxt == msg.msgctxt:
0503                 good = False
0504                 break
0505 
0506     return hmsgctxt
0507 
0508