File indexing completed on 2024-11-10 08:12:42
0001 #!/usr/bin/env python3 0002 # -*- coding: UTF-8 -*- 0003 0004 """ 0005 Patch PO files from an embedded diff. 0006 0007 Documented in C{doc/user/diffpatch.docbook#sec-dpdiff}. 0008 0009 @author: Chusslove Illich (Часлав Илић) <caslav.ilic@gmx.net> 0010 @license: GPLv3 0011 """ 0012 0013 try: 0014 import fallback_import_paths 0015 except: 0016 pass 0017 0018 import sys 0019 import os 0020 import locale 0021 import re 0022 from tempfile import NamedTemporaryFile 0023 0024 from pology import version, _, n_ 0025 from pology.colors import ColorOptionParser 0026 from pology.report import error, warning, report 0027 from pology.msgreport import error_on_msg, warning_on_msg 0028 import pology.config as pology_config 0029 from pology.fsops import str_to_unicode, mkdirpath, collect_catalogs 0030 from pology.fsops import exit_on_exception 0031 from pology.catalog import Catalog 0032 from pology.message import Message, MessageUnsafe 0033 from pology.header import Header 0034 from pology.diff import msg_ediff, msg_ediff_to_new, msg_ediff_to_old 0035 0036 from pology.internal.poediffpatch import MPC, EDST 0037 from pology.internal.poediffpatch import msg_eq_fields, msg_copy_fields 0038 from pology.internal.poediffpatch import msg_clear_prev_fields 0039 from pology.internal.poediffpatch import diff_cats 0040 from pology.internal.poediffpatch import init_ediff_header 0041 from pology.internal.poediffpatch import get_msgctxt_for_headers 0042 from functools import reduce 0043 0044 0045 _flag_ediff = "ediff" 0046 _flag_ediff_to_cur = "%s-to-cur" % _flag_ediff 0047 _flag_ediff_to_new = "%s-to-new" % _flag_ediff 0048 _flag_ediff_no_match = "%s-no-match" % _flag_ediff 0049 _flags_all = ( 0050 _flag_ediff, 0051 _flag_ediff_to_cur, _flag_ediff_to_new, 0052 _flag_ediff_no_match, 0053 ) 0054 0055 0056 def main (): 0057 0058 locale.setlocale(locale.LC_ALL, "") 0059 0060 # Get defaults for command line options from global config. 0061 cfgsec = pology_config.section("poepatch") 0062 def_do_merge = cfgsec.boolean("merge", True) 0063 0064 # Setup options and parse the command line. 0065 usage = _("@info command usage", 0066 "%(cmd)s [OPTIONS] [OPTIONS] < EDIFF\n" 0067 "%(cmd)s -u [OPTIONS] PATHS...", 0068 cmd="%prog") 0069 desc = _("@info command description", 0070 "Apply embedded diff of PO files as patch.") 0071 ver = _("@info command version", 0072 "%(cmd)s (Pology) %(version)s\n" 0073 "Copyright © 2009, 2010 " 0074 "Chusslove Illich (Часлав Илић) <%(email)s>", 0075 cmd="%prog", version=version(), email="caslav.ilic@gmx.net") 0076 0077 opars = ColorOptionParser(usage=usage, description=desc, version=ver) 0078 opars.add_option( 0079 "-a", "--aggressive", 0080 action="store_true", dest="aggressive", default=False, 0081 help=_("@info command line option description", 0082 "Apply every message to its paired message in the target file, " 0083 "irrespective of whether its non-pairing parts match too.")) 0084 opars.add_option( 0085 "-d", "--directory", 0086 metavar=_("@info command line value placeholder", "DIR"), 0087 dest="directory", 0088 help=_("@info command line option description", 0089 "Prepend this directory path to any resolved target file path.")) 0090 opars.add_option( 0091 "-e", "--embed", 0092 action="store_true", dest="embed", default=False, 0093 help=_("@info command line option description", 0094 "Instead of applying resolved newer version of the message, " 0095 "add the full embedded diff into the target file.")) 0096 opars.add_option( 0097 "-i", "--input", 0098 metavar=_("@info command line value placeholder", "FILE"), 0099 dest="input", 0100 help=_("@info command line option description", 0101 "Read the patch from the given file instead of standard input.")) 0102 opars.add_option( 0103 "-n", "--no-merge", 0104 action="store_false", dest="do_merge", default=def_do_merge, 0105 help=_("@info command line option description", 0106 "Do not try to indirectly pair messages by merging catalogs.")) 0107 opars.add_option( 0108 "-p", "--strip", 0109 metavar=_("@info command line value placeholder", "NUM"), 0110 dest="strip", 0111 help=_("@info command line option description", 0112 "Strip the smallest prefix containing NUM leading slashes from " 0113 "each file name found in the ediff file (like in patch(1)). " 0114 "If not given, only the base name of each file is taken.")) 0115 opars.add_option( 0116 "-u", "--unembed", 0117 action="store_true", dest="unembed", default=False, 0118 help=_("@info command line option description", 0119 "Instead of applying a patch, resolve all embedded differences " 0120 "in given paths to newer versions of messages.")) 0121 0122 (op, free_args) = opars.parse_args(str_to_unicode(sys.argv[1:])) 0123 0124 # Could use some speedup. 0125 try: 0126 import psyco 0127 psyco.full() 0128 except ImportError: 0129 pass 0130 0131 if not op.unembed: 0132 if free_args: 0133 error(_("@info", 0134 "Too many arguments in command line: %(argspec)s", 0135 argspec=" ".join(free_args))) 0136 if op.strip and not op.strip.isdigit(): 0137 error(_("@info", 0138 "Option %(opt)s expects a positive integer value.", 0139 opt="--strip")) 0140 apply_ediff(op) 0141 else: 0142 paths = [] 0143 for path in free_args: 0144 if not os.path.exists(path): 0145 warning(_("@info", 0146 "Path '%(path)s' does not exist.", 0147 path=path)) 0148 if os.path.isdir(path): 0149 paths.extend(collect_catalogs(path)) 0150 else: 0151 paths.append(path) 0152 for path in paths: 0153 unembed_ediff(path) 0154 0155 0156 def apply_ediff (op): 0157 0158 # Read the ediff PO. 0159 dummy_stream_path = "<stdin>" 0160 if op.input: 0161 if not os.path.isfile(op.input): 0162 error(_("@info", 0163 "Path '%(path)s' is not a file or does not exist.", 0164 path=op.input)) 0165 edfpath = op.input 0166 readfh = None 0167 else: 0168 edfpath = dummy_stream_path 0169 readfh = sys.stdin 0170 try: 0171 ecat = Catalog(edfpath, monitored=False, readfh=readfh) 0172 except: 0173 error(_("@info ediff is shorthand for \"embedded difference\"", 0174 "Error reading ediff '%(file)s'.", 0175 file=edfpath)) 0176 0177 # Split ediff by diffed catalog into original and new file paths, 0178 # header message, and ordinary messages. 0179 hmsgctxt = ecat.header.get_field_value(EDST.hmsgctxt_field) 0180 if hmsgctxt is None: 0181 error(_("@info", 0182 "Header field '%(field)s' is missing in the ediff.", 0183 field=EDST.hmsgctxt_field)) 0184 edsplits = [] 0185 cehmsg = None 0186 smsgid = "\x00" 0187 ecat.add_last(MessageUnsafe(dict(msgctxt=hmsgctxt, msgid=smsgid))) # sentry 0188 for emsg in ecat: 0189 if emsg.msgctxt == hmsgctxt: 0190 if cehmsg: 0191 # Record previous section. 0192 edsplits.append((fpaths, cehmsg, cemsgs)) 0193 if emsg.msgid == smsgid: # end sentry, avoid parsing below 0194 break 0195 0196 # Mine original and new file paths out of header. 0197 fpaths = [] 0198 for fpath in emsg.msgid.split("\n")[:2]: 0199 # Strip leading "+ "/"- " 0200 fpath = fpath[2:] 0201 # Convert to planform path separators. 0202 fpath = re.sub(r"/+", os.path.sep, fpath) 0203 # Remove revision indicator. 0204 p = fpath.find(EDST.filerev_sep) 0205 if p >= 0: 0206 fpath = fpath[:p] 0207 # Strip path and append directory as requested. 0208 if op.strip: 0209 preflen = int(op.strip) 0210 lst = fpath.split(os.path.sep, preflen) 0211 if preflen + 1 == len(lst): 0212 fpath = lst[preflen] 0213 else: 0214 fpath = os.path.basename(fpath) 0215 else: 0216 fpath = os.path.basename(fpath) 0217 if op.directory and fpath: 0218 fpath = os.path.join(op.directory, fpath) 0219 # All done. 0220 fpaths.append(fpath) 0221 0222 cehmsg = emsg 0223 cemsgs = [] 0224 else: 0225 cemsgs.append(emsg) 0226 0227 # Prepare catalog for rejects and merges. 0228 rcat = Catalog("", create=True, monitored=False, wrapping=ecat.wrapping()) 0229 init_ediff_header(rcat.header, hmsgctxt=hmsgctxt, extitle="rejects") 0230 0231 # Apply diff to catalogs. 0232 for fpaths, ehmsg, emsgs in edsplits: 0233 # Open catalog for patching. 0234 fpath1, fpath2 = fpaths 0235 if fpath1: 0236 # Diff from an existing catalog, open it. 0237 if not os.path.isfile(fpath1): 0238 warning(_("@info", 0239 "Path '%(path)s' is not a file or does not exist, " 0240 "skipping it.", 0241 path=fpath1)) 0242 continue 0243 try: 0244 cat = Catalog(fpath1) 0245 except: 0246 warning(_("@info", 0247 "Error reading catalog '%(file)s', skipping it.", 0248 file=fpath1)) 0249 continue 0250 elif fpath2: 0251 # New catalog added in diff, create it (or open if it exists). 0252 try: 0253 mkdirpath(os.path.dirname(fpath2)) 0254 cat = Catalog(fpath2, create=True) 0255 if cat.created(): 0256 cat.set_wrapping(ecat.wrapping()) 0257 except: 0258 if os.path.isfile(fpath2): 0259 warning(_("@info", 0260 "Error reading catalog '%(file)s', skipping it.", 0261 file=fpath1)) 0262 else: 0263 warning(_("@info", 0264 "Cannot create catalog '%(file)s', skipping it.", 0265 file=fpath2)) 0266 continue 0267 else: 0268 error(_("@info", 0269 "Both catalogs in ediff indicated not to exist.")) 0270 0271 # Do not try to patch catalog with embedded differences 0272 # (i.e. previously patched using -e). 0273 if cat.header.get_field_value(EDST.hmsgctxt_field) is not None: 0274 warning(_("@info", 0275 "Catalog '%(file)s' already contains " 0276 "embedded differences, skipping it.", 0277 file=cat.filename)) 0278 continue 0279 0280 # Do not try to patch catalog if the patch contains 0281 # unresolved split differences. 0282 if reduce(lambda r, x: r or _flag_ediff_to_new in x.flag, 0283 emsgs, False): 0284 warning(_("@info", 0285 "Patch for catalog '%(file)s' contains unresolved " 0286 "split differences, skipping it.", 0287 file=cat.filename)) 0288 continue 0289 0290 # Patch the catalog. 0291 rejected_ehmsg = patch_header(cat, ehmsg, ecat, op) 0292 rejected_emsgs_flags = patch_messages(cat, emsgs, ecat, op) 0293 any_rejected = rejected_ehmsg or rejected_emsgs_flags 0294 if fpath2 or any_rejected: 0295 created = cat.created() 0296 if cat.sync(): 0297 if not created: 0298 if any_rejected and op.embed: 0299 report(_("@info:progress E is for \"with embedding\"", 0300 "Partially patched (E): %(file)s", 0301 file=cat.filename)) 0302 elif any_rejected: 0303 report(_("@info:progress", 0304 "Partially patched: %(file)s", 0305 file=cat.filename)) 0306 elif op.embed: 0307 report(_("@info:progress E is for \"with embedding\"", 0308 "Patched (E): %(file)s", 0309 file=cat.filename)) 0310 else: 0311 report(_("@info:progress", 0312 "Patched: %(file)s", 0313 file=cat.filename)) 0314 else: 0315 if op.embed: 0316 report(_("@info:progress E is for \"with embedding\"", 0317 "Created (E): %(file)s", 0318 file=cat.filename)) 0319 else: 0320 report(_("@info:progress", 0321 "Created: %(file)s", 0322 file=cat.filename)) 0323 else: 0324 pass #report("unchanged: %s" % cat.filename) 0325 else: 0326 os.unlink(fpath1) 0327 report(_("@info:progress", 0328 "Removed: %(file)s", 0329 file=fpath1)) 0330 0331 # If there were any rejects and reembedding is not in effect, 0332 # record the necessary to present them. 0333 if any_rejected and not op.embed: 0334 if not rejected_ehmsg: 0335 # Clean header diff. 0336 ehmsg.manual_comment = ehmsg.manual_comment[:1] 0337 ehmsg.msgstr[0] = "" 0338 rcat.add_last(ehmsg) 0339 for emsg, flag in rejected_emsgs_flags: 0340 # Reembed to avoid any conflicts. 0341 msg1, msg2, msg1_s, msg2_s = resolve_diff_pair(emsg) 0342 emsg = msg_ediff(msg1_s, msg2_s, 0343 emsg=msg2_s, ecat=rcat, enoctxt=hmsgctxt) 0344 if flag: 0345 emsg.flag.add(flag) 0346 rcat.add_last(emsg) 0347 0348 # If there were any rejects, write them out. 0349 if len(rcat) > 0: 0350 # Construct paths for embedded diffs of rejects. 0351 rsuff = "rej" 0352 if ecat.filename != dummy_stream_path: 0353 rpath = ecat.filename 0354 p = rpath.rfind(".") 0355 if p < 0: 0356 p = len(rpath) 0357 rpath = rpath[:p] + (".%s" % rsuff) + rpath[p:] 0358 else: 0359 rpath = "stdin.%s.po" % rsuff 0360 0361 rcat.filename = rpath 0362 rcat.sync(force=True, noobsend=True) 0363 report(_("@info:progress file to which rejected parts of the patch " 0364 "have been written to", 0365 "*** Rejects: %(file)s", 0366 file=rcat.filename)) 0367 0368 0369 # Patch application types. 0370 _pt_merge, _pt_insert, _pt_remove = list(range(3)) 0371 0372 def patch_messages (cat, emsgs, ecat, options): 0373 0374 # It may happen that a single message from original catalog 0375 # is paired with more than one from the diff 0376 # (e.g. single old translated message going into two new fuzzy). 0377 # Therefore paired messages must be tracked, to know if patched 0378 # message can be merged into the existing, or it must be inserted. 0379 pmsgkeys = set() 0380 0381 # Triplets for splitting directly unapplicable patches into two. 0382 # Delay building of triplets until needed for the first time. 0383 striplets_pack = [None] 0384 def striplets (): 0385 if striplets_pack[0] is None: 0386 striplets_pack[0] = build_splitting_triplets(emsgs, cat, options) 0387 return striplets_pack[0] 0388 0389 # Check whether diffs apply, and where and how if they do. 0390 rejected_emsgs_flags = [] 0391 patch_specs = [] 0392 for emsg in emsgs: 0393 pspecs = msg_apply_diff(cat, emsg, ecat, pmsgkeys, striplets) 0394 for pspec in pspecs: 0395 emsg_m, flag = pspec[:2] 0396 if flag == _flag_ediff or options.embed: 0397 patch_specs.append(pspec) 0398 if flag != _flag_ediff: 0399 rejected_emsgs_flags.append((emsg_m, flag)) 0400 0401 # Sort accepted patches by position of application. 0402 patch_specs.sort(key=lambda x: x[3]) 0403 0404 # Add accepted patches to catalog. 0405 incpos = 0 0406 for emsg, flag, typ, pos, msg1, msg2, msg1_s, msg2_s in patch_specs: 0407 if pos is not None: 0408 pos += incpos 0409 0410 if options.embed: 0411 # Embedded diff may conflict one of the messages in catalog. 0412 # Make a new diff of special messages, 0413 # and embed them either into existing message in catalog, 0414 # or into new message. 0415 if typ == _pt_merge: 0416 tmsg = cat[pos] 0417 tpos = pos 0418 else: 0419 tmsg = MessageUnsafe(msg2 or {}) 0420 tpos = None 0421 emsg = msg_ediff(msg1_s, msg2_s, emsg=tmsg, ecat=cat, eokpos=tpos) 0422 0423 if 0:pass 0424 elif typ == _pt_merge: 0425 if not options.embed: 0426 cat[pos].set_inv(msg2) 0427 else: 0428 cat[pos].flag.add(flag) 0429 elif typ == _pt_insert: 0430 if not options.embed: 0431 cat.add(Message(msg2), pos) 0432 else: 0433 cat.add(Message(emsg), pos) 0434 cat[pos].flag.add(flag) 0435 incpos += 1 0436 elif typ == _pt_remove: 0437 if pos is None: 0438 continue 0439 if not options.embed: 0440 cat.remove(pos) 0441 incpos -= 1 0442 else: 0443 cat[pos].flag.add(flag) 0444 else: 0445 error_on_msg(_("@info", 0446 "Unknown patch type %(type)s.", 0447 type=typ), emsg, ecat) 0448 0449 return rejected_emsgs_flags 0450 0451 0452 def msg_apply_diff (cat, emsg, ecat, pmsgkeys, striplets): 0453 0454 msg1, msg2, msg1_s, msg2_s = resolve_diff_pair(emsg) 0455 0456 # Try to select existing message from the original messages. 0457 # Order is important, should try first new, then old 0458 # (e.g. if an old fuzzy was resolved to new after diff was made). 0459 msg = None 0460 if msg2 and msg2 in cat: 0461 msg = cat[msg2] 0462 elif msg1 and msg1 in cat: 0463 msg = cat[msg1] 0464 0465 patch_specs = [] 0466 0467 # Try to apply the patch. 0468 if msg_patchable(msg, msg1, msg2): 0469 # Patch can be directly applied. 0470 if msg1 and msg2: 0471 if msg.key not in pmsgkeys: 0472 typ = _pt_merge 0473 pos = cat.find(msg) 0474 pmsgkeys.add(msg.key) 0475 else: 0476 typ = _pt_insert 0477 pos, weight = cat.insertion_inquiry(msg2) 0478 elif msg2: # patch adds a message 0479 if msg: 0480 typ = _pt_merge 0481 pos = cat.find(msg) 0482 pmsgkeys.add(msg.key) 0483 else: 0484 typ = _pt_insert 0485 pos, weight = cat.insertion_inquiry(msg2) 0486 elif msg1: # patch removes a message 0487 if msg: 0488 typ = _pt_remove 0489 pos = cat.find(msg) 0490 pmsgkeys.add(msg.key) 0491 else: 0492 typ = _pt_remove 0493 pos = None # no position to remove from 0494 else: 0495 # Cannot happen. 0496 error_on_msg(_("@info", 0497 "Neither the old nor the new message " 0498 "in the diff is indicated to exist."), 0499 emsg, ecat) 0500 patch_specs.append((emsg, _flag_ediff, typ, pos, 0501 msg1, msg2, msg1_s, msg2_s)) 0502 else: 0503 # Patch cannot be applied directly, 0504 # try to split into old-to-current and current-to-new diffs. 0505 split_found = False 0506 if callable(striplets): 0507 striplets = striplets() # delayed creation of splitting triplets 0508 for i in range(len(striplets)): 0509 m1_t, m1_ts, m2_t, m2_ts, m_t, m_ts1, m_ts2 = striplets[i] 0510 if msg1.inv == m1_t.inv and msg2.inv == m2_t.inv: 0511 striplets.pop(i) # remove to not slow further searches 0512 split_found = True 0513 break 0514 if split_found: 0515 # Construct new corresponding diffs. 0516 em_1c = msg_ediff(m1_ts, m_ts1, emsg=MessageUnsafe(m_t)) 0517 em_c2 = msg_ediff(m_ts2, m2_ts, emsg=MessageUnsafe(m2_t)) 0518 # Current-to-new can be merged or inserted, 0519 # and old-to-current is then inserted just before it. 0520 if m_t.key not in pmsgkeys: 0521 typ = _pt_merge 0522 pos = cat.find(m_t) 0523 pmsgkeys.add(m_t.key) 0524 else: 0525 typ = _pt_insert 0526 pos, weight = cat.insertion_inquiry(m2_t) 0527 # Order of adding patch specs here important for rejects file. 0528 patch_specs.append((em_1c, _flag_ediff_to_cur, _pt_insert, pos, 0529 m1_t, m_t, m1_ts, m_ts1)) 0530 patch_specs.append((em_c2, _flag_ediff_to_new, typ, pos, 0531 m_t, m2_t, m_ts2, m2_ts)) 0532 0533 # The patch is totally rejected. 0534 # Will be inserted if reembedding requested, so compute insertion. 0535 if not patch_specs: 0536 typ = _pt_insert 0537 if msg2 is not None: 0538 pos, weight = cat.insertion_inquiry(msg2) 0539 else: 0540 pos = len(cat) 0541 patch_specs.append((emsg, _flag_ediff_no_match, typ, pos, 0542 msg1, msg2, msg1_s, msg2_s)) 0543 0544 return patch_specs 0545 0546 0547 def msg_patchable (msg, msg1, msg2): 0548 0549 # Check for cases where current message does not match old or new, 0550 # but there is a transformation that can also be cleanly merged. 0551 msg_m = msg 0552 if 0: pass 0553 0554 # Old and new are translated, but current is fuzzy and has previous fields. 0555 # Transform current to its previous state, from which it may have became 0556 # fuzzy by merging with templates. 0557 elif ( msg and msg.fuzzy and msg.key_previous is not None 0558 and msg1 and not msg1.fuzzy and msg2 and not msg2.fuzzy 0559 ): 0560 msg_m = MessageUnsafe(msg) 0561 msg_copy_fields(msg, msg_m, MPC.prevcurr_fields) 0562 msg_clear_prev_fields(msg_m) 0563 msg_m.fuzzy = False 0564 0565 # Old is None, new is translated, and current is untranslated. 0566 # Add translation of new to current, since it may have been added as 0567 # untranslated after merging with templates. 0568 elif msg and msg.untranslated and not msg1 and msg2 and msg2.translated: 0569 msg_m = MessageUnsafe(msg) 0570 msg_copy_fields(msg2, msg_m, ["msgstr"]) 0571 0572 if msg1 and msg2: 0573 return msg and msg_m.inv in (msg1.inv, msg2.inv) 0574 elif msg2: 0575 return not msg or msg_m.inv == msg2.inv 0576 elif msg1: 0577 return not msg or msg_m.inv == msg1.inv 0578 else: 0579 return not msg 0580 0581 0582 def resolve_diff_pair (emsg): 0583 0584 # Recover old and new message according to diff. 0585 # Resolve into copies of ediff message, to preserve non-inv parts. 0586 emsg1 = MessageUnsafe(emsg) 0587 msg1_s = msg_ediff_to_old(emsg1, rmsg=emsg1) 0588 emsg2 = MessageUnsafe(emsg) 0589 msg2_s = msg_ediff_to_new(emsg2, rmsg=emsg2) 0590 0591 # Resolve any special pairings. 0592 msg1, msg2 = msg1_s, msg2_s 0593 if not msg1_s or not msg2_s: 0594 # No special cases if either message non-existant. 0595 pass 0596 0597 # Cases f-nf-*. 0598 elif msg1_s.fuzzy and not msg2_s.fuzzy: 0599 # Case f-nf-ecc. 0600 if ( msg2_s.key_previous is None 0601 and not msg_eq_fields(msg1_s, msg2_s, MPC.curr_fields) 0602 ): 0603 msg1 = MessageUnsafe(msg1_s) 0604 msg_copy_fields(msg1_s, msg1, MPC.currprev_fields) 0605 msg_copy_fields(msg2_s, msg1, MPC.curr_fields) 0606 # Case f-nf-necc. 0607 elif msg2_s.key_previous is not None: 0608 msg1 = MessageUnsafe(msg1_s) 0609 msg2 = MessageUnsafe(msg2_s) 0610 msg_copy_fields(msg2_s, msg1, MPC.prevcurr_fields) 0611 msg_clear_prev_fields(msg2) 0612 0613 # Cases nf-f-*. 0614 elif not msg1_s.fuzzy and msg2_s.fuzzy: 0615 # Case nf-f-ecp. 0616 if ( msg1_s.key_previous is None 0617 and not msg_eq_fields(msg1_s, msg2_s, MPC.curr_fields) 0618 ): 0619 msg2 = MessageUnsafe(msg2_s) 0620 msg_copy_fields(msg1_s, msg2, MPC.currprev_fields) 0621 # Case nf-f-necp. 0622 elif msg1_s.key_previous is not None: 0623 msg1 = MessageUnsafe(msg1_s) 0624 msg2 = MessageUnsafe(msg2_s) 0625 msg_copy_fields(msg1_s, msg2, MPC.prev_fields) 0626 msg_clear_prev_fields(msg1) 0627 0628 return msg1, msg2, msg1_s, msg2_s 0629 0630 0631 def build_splitting_triplets (emsgs, cat, options): 0632 0633 # Create catalogs of old and new messages. 0634 cat1 = Catalog("", create=True, monitored=False) 0635 cat2 = Catalog("", create=True, monitored=False) 0636 for emsg in emsgs: 0637 msg1, msg2, msg1_s, msg2_s = resolve_diff_pair(emsg) 0638 if msg1: 0639 cat1.add_last(msg1) 0640 if msg2: 0641 cat2.add_last(msg2) 0642 # Make headers same, to avoid any diffs there. 0643 cat1.header = cat.header 0644 cat2.header = cat.header 0645 0646 # Write created catalogs to disk if 0647 # msgmerge may be used on files during diffing. 0648 if options.do_merge: 0649 tmpfs = [] # to avoid garbage collection until the function returns 0650 for tcat, tsuff in ((cat1, "1"), (cat2, "2")): 0651 tmpf = NamedTemporaryFile(prefix="poepatch-split-%s-" % tsuff, 0652 suffix=".po") 0653 tmpfs.append(tmpf) 0654 tcat.filename = tmpf.name 0655 tcat.sync(force=True) 0656 0657 # Create the old-to-current and current-to-new diffs. 0658 ecat_1c = Catalog("", create=True, monitored=False) 0659 diff_cats(cat1, cat, ecat_1c, options.do_merge, wadd=False, wrem=False) 0660 ecat_c2 = Catalog("", create=True, monitored=False) 0661 diff_cats(cat, cat2, ecat_c2, options.do_merge, wadd=False, wrem=False) 0662 0663 # Mine splitting triplets out of diffs. 0664 sdoublets_1c = {} 0665 for emsg in ecat_1c: 0666 m1_t, m_t, m1_ts, m_ts1 = resolve_diff_pair(emsg) 0667 sdoublets_1c[m_t.key] = [m1_t, m1_ts, m_t, m_ts1] 0668 sdoublets_c2 = {} 0669 for emsg in ecat_c2: 0670 m_t, m2_t, m_ts2, m2_ts = resolve_diff_pair(emsg) 0671 sdoublets_c2[m_t.key] = [m_t, m_ts2, m2_t, m2_ts] 0672 common_keys = set(sdoublets_1c).intersection(sdoublets_c2) 0673 striplets = [] 0674 for key in common_keys: 0675 m1_t, m1_ts, m_t, m_ts1 = sdoublets_1c[key] 0676 m_t, m_ts2, m2_t, m2_ts = sdoublets_c2[key] 0677 striplets.append((m1_t, m1_ts, m2_t, m2_ts, m_t, m_ts1, m_ts2)) 0678 0679 return striplets 0680 0681 0682 def patch_header (cat, ehmsg, ecat, options): 0683 0684 if not ehmsg.msgstr[0]: # no header diff, only metadata 0685 return None 0686 0687 ehmsg_clean = clear_header_metadata(ehmsg) 0688 0689 # Create reduced headers. 0690 hmsg1 = msg_ediff_to_old(ehmsg_clean) 0691 hmsg2 = msg_ediff_to_new(ehmsg_clean) 0692 hmsg = not cat.created() and cat.header.to_msg() or None 0693 hdrs = [] 0694 for m in (hmsg, hmsg1, hmsg2): 0695 h = m is not None and reduce_header_fields(Header(m)) or None 0696 hdrs.append(h) 0697 rhdr, rhdr1, rhdr2 = hdrs 0698 0699 # Decide if the header can be cleanly patched. 0700 clean = False 0701 if not rhdr: 0702 clean = rhdr1 or rhdr2 0703 else: 0704 clean = (rhdr1 and rhdr == rhdr1) or (rhdr2 and rhdr == rhdr2) 0705 0706 if clean: 0707 if not options.embed: 0708 if hmsg2: 0709 cat.header = Header(hmsg2) 0710 else: 0711 # Catalog will be removed if no messages are rejected, 0712 # and otherwise the header should stay as-is. 0713 pass 0714 else: 0715 if cat.created(): 0716 cat.header = Header(hmsg2) 0717 ehmsg = MessageUnsafe(ehmsg) 0718 ehmsg.flag.add(_flag_ediff) 0719 hmsgctxt = get_msgctxt_for_headers(cat) 0720 ehmsg.msgctxt = hmsgctxt 0721 cat.header.set_field(EDST.hmsgctxt_field, hmsgctxt) 0722 cat.add(Message(ehmsg), 0) 0723 return None 0724 else: 0725 return ehmsg 0726 0727 0728 # Clear header diff message of metadata. 0729 # A copy of the message is returned. 0730 def clear_header_metadata (ehmsg): 0731 0732 ehmsg = MessageUnsafe(ehmsg) 0733 ehmsg.manual_comment.pop(0) 0734 ehmsg.msgctxt = None 0735 ehmsg.msgid = "" 0736 0737 return ehmsg 0738 0739 0740 # Remove known unimportant fields from the header, 0741 # to ignore them on comparisons. 0742 def reduce_header_fields (hdr): 0743 0744 rhdr = Header(hdr) 0745 for field in ( 0746 "POT-Creation-Date", 0747 "PO-Revision-Date", 0748 "Last-Translator", 0749 "X-Generator", 0750 ): 0751 rhdr.remove_field(field) 0752 0753 return rhdr 0754 0755 0756 def unembed_ediff (path, all=False, old=False): 0757 0758 try: 0759 cat = Catalog(path) 0760 except: 0761 warning(_("@info", 0762 "Error reading catalog '%(file)s', skipping it.", 0763 file=path)) 0764 return 0765 0766 hmsgctxt = cat.header.get_field_value(EDST.hmsgctxt_field) 0767 if hmsgctxt is not None: 0768 cat.header.remove_field(EDST.hmsgctxt_field) 0769 0770 uehmsg = None 0771 unembedded = {} 0772 for msg in cat: 0773 ediff_flag = None 0774 for flag in _flags_all: 0775 if flag in msg.flag: 0776 ediff_flag = flag 0777 msg.flag.remove(flag) 0778 if not ediff_flag and not all: 0779 continue 0780 if ediff_flag in (_flag_ediff_no_match, _flag_ediff_to_new): 0781 # Throw away fully rejected embeddings, i.e. reject the patch. 0782 # For split-difference embeddings, throw away the current-to-new; 0783 # this effectively rejects the patch, which is safest thing to do. 0784 cat.remove_on_sync(msg) 0785 elif hmsgctxt is not None and msg.msgctxt == hmsgctxt: 0786 if uehmsg: 0787 warning_on_msg(_("@info", 0788 "Unembedding results in duplicate header, " 0789 "previous header at %(line)d(#%(entry)d); " 0790 "skipping it.", 0791 line=uehmsg.refline, entry=uehmsg.refentry), 0792 msg, cat) 0793 return 0794 msg_ediff_to_x = not old and msg_ediff_to_new or msg_ediff_to_old 0795 hmsg = msg_ediff_to_x(clear_header_metadata(msg)) 0796 if hmsg.msgstr and hmsg.msgstr[0]: 0797 cat.header = Header(hmsg) 0798 cat.remove_on_sync(msg) 0799 uehmsg = msg 0800 else: 0801 msg1, msg2, msg1_s, msg2_s = resolve_diff_pair(msg) 0802 tmsg = (not old and (msg2,) or (msg1,))[0] 0803 if tmsg is not None: 0804 if tmsg.key in unembedded: 0805 msg_p = unembedded[tmsg.key] 0806 warning_on_msg(_("@info", 0807 "Unembedding results in " 0808 "duplicate message, previous message " 0809 "at %(line)d(#%(entry)d); skipping it.", 0810 line=msg_p.refline, entry=msg_p.refentry), 0811 msg, cat) 0812 return 0813 msg.set(Message(msg2)) 0814 unembedded[tmsg.key] = msg 0815 else: 0816 cat.remove_on_sync(msg) 0817 0818 if cat.sync(): 0819 report(_("@info:progress", 0820 "Unembedded: %(file)s", 0821 file=cat.filename)) 0822 0823 0824 if __name__ == '__main__': 0825 exit_on_exception(main)