File indexing completed on 2024-11-10 11:08:51
0001 #!/usr/bin/env python3 0002 # -*- coding: UTF-8 -*- 0003 0004 """ 0005 Create embedded diffs of PO files. 0006 0007 Documented in C{doc/user/diffpatch.docbook#sec-dppatch}. 0008 0009 @author: Chusslove Illich (Часлав Илић) <caslav.ilic@gmx.net> 0010 @license: GPLv3 0011 """ 0012 0013 import filecmp 0014 import locale 0015 import os 0016 import shutil 0017 import sys 0018 0019 try: 0020 import fallback_import_paths 0021 except: 0022 pass 0023 0024 from pology import version, _, n_, t_ 0025 from pology.catalog import Catalog 0026 from pology.message import MessageUnsafe 0027 from pology.colors import ColorOptionParser, set_coloring_globals, cjoin 0028 import pology.config as pology_config 0029 from pology.fsops import str_to_unicode, collect_catalogs 0030 from pology.fsops import exit_on_exception 0031 from pology.diff import msg_ediff 0032 from pology.report import error, warning, report, format_item_list 0033 from pology.report import list_options 0034 from pology.report import init_file_progress 0035 from pology.stdcmdopt import add_cmdopt_colors 0036 from pology.vcs import available_vcs, make_vcs 0037 0038 from pology.internal.poediffpatch import MPC, EDST 0039 from pology.internal.poediffpatch import msg_eq_fields, msg_copy_fields 0040 from pology.internal.poediffpatch import msg_clear_prev_fields 0041 from pology.internal.poediffpatch import diff_cats, diff_hdrs 0042 from pology.internal.poediffpatch import init_ediff_header 0043 from pology.internal.poediffpatch import get_msgctxt_for_headers 0044 from pology.internal.poediffpatch import cats_update_effort 0045 0046 0047 def main (): 0048 0049 locale.setlocale(locale.LC_ALL, "") 0050 0051 # Get defaults for command line options from global config. 0052 cfgsec = pology_config.section("poediff") 0053 def_do_merge = cfgsec.boolean("merge", True) 0054 0055 # Setup options and parse the command line. 0056 usage = _("@info command usage", 0057 "%(cmd)s [OPTIONS] FILE1 FILE2\n" 0058 "%(cmd)s [OPTIONS] DIR1 DIR2\n" 0059 "%(cmd)s -c VCS [OPTIONS] [PATHS...]", 0060 cmd="%prog") 0061 desc = _("@info command description", 0062 "Create embedded diffs of PO files.") 0063 ver = _("@info command version", 0064 "%(cmd)s (Pology) %(version)s\n" 0065 "Copyright © 2009, 2010 " 0066 "Chusslove Illich (Часлав Илић) <%(email)s>", 0067 cmd="%prog", version=version(), email="caslav.ilic@gmx.net") 0068 0069 showvcs = list(set(available_vcs()).difference(["none"])) 0070 showvcs.sort() 0071 0072 opars = ColorOptionParser(usage=usage, description=desc, version=ver) 0073 opars.add_option( 0074 "-b", "--skip-obsolete", 0075 action="store_true", dest="skip_obsolete", default=False, 0076 help=_("@info command line option description", 0077 "Do not diff obsolete messages.")) 0078 opars.add_option( 0079 "-c", "--vcs", 0080 metavar=_("@info command line value placeholder", "VCS"), 0081 dest="version_control", 0082 help=_("@info command line option description", 0083 "Paths are under version control by given VCS; " 0084 "can be one of: %(vcslist)s.", 0085 vcslist=format_item_list(showvcs))) 0086 opars.add_option( 0087 "--list-options", 0088 action="store_true", dest="list_options", default=False, 0089 help=_("@info command line option description", 0090 "List the names of available options.")) 0091 opars.add_option( 0092 "--list-vcs", 0093 action="store_true", dest="list_vcs", default=False, 0094 help=_("@info command line option description", 0095 "List the keywords of known version control systems.")) 0096 opars.add_option( 0097 "-n", "--no-merge", 0098 action="store_false", dest="do_merge", default=def_do_merge, 0099 help=_("@info command line option description", 0100 "Do not try to indirectly pair messages by merging catalogs.")) 0101 opars.add_option( 0102 "-o", "--output", 0103 metavar=_("@info command line value placeholder", "POFILE"), 0104 dest="output", 0105 help=_("@info command line option description", 0106 "Output diff catalog to a file instead of stdout.")) 0107 opars.add_option( 0108 "-p", "--paired-only", 0109 action="store_true", dest="paired_only", default=False, 0110 help=_("@info command line option description", 0111 "When two directories are diffed, ignore catalogs which " 0112 "are not present in both directories.")) 0113 opars.add_option( 0114 "-q", "--quiet", 0115 action="store_true", dest="quiet", default=False, 0116 help=_("@info command line option description", 0117 "Do not display any progress info.")) 0118 opars.add_option( 0119 "-Q", "--quick", 0120 action="store_true", dest="quick", default=False, 0121 help=_("@info command line option description", 0122 "Equivalent to %(opt)s.", 0123 opt="-bns")) 0124 opars.add_option( 0125 "-r", "--revision", 0126 metavar=_("@info command line value placeholder", "REV1[:REV2]"), 0127 dest="revision", 0128 help=_("@info command line option description", 0129 "Revision from which to diff to current working copy, " 0130 "or from first to second revision (if VCS is given).")) 0131 opars.add_option( 0132 "-s", "--strip-headers", 0133 action="store_true", dest="strip_headers", default=False, 0134 help=_("@info command line option description", 0135 "Do not diff headers and do not write out the top header " 0136 "(resulting output cannot be used as patch).")) 0137 opars.add_option( 0138 "-U", "--update-effort", 0139 action="store_true", dest="update_effort", default=False, 0140 help=_("@info command line option description", 0141 "Instead of outputting the diff, calculate and output " 0142 "an estimate of the effort that was needed to update " 0143 "the translation from old to new paths. " 0144 "Ignores %(opt1)s and %(opt1)s options.", 0145 opt1="-b", opt2="-n")) 0146 add_cmdopt_colors(opars) 0147 0148 (op, free_args) = opars.parse_args(str_to_unicode(sys.argv[1:])) 0149 0150 if op.list_options: 0151 report(list_options(opars)) 0152 sys.exit(0) 0153 if op.list_vcs: 0154 report("\n".join(showvcs)) 0155 sys.exit(0) 0156 0157 # Could use some speedup. 0158 try: 0159 import psyco 0160 psyco.full() 0161 except ImportError: 0162 pass 0163 0164 set_coloring_globals(ctype=op.coloring_type, outdep=(not op.raw_colors)) 0165 0166 if op.quick: 0167 op.do_merge = False 0168 op.skip_obsolete = True 0169 op.strip_headers = True 0170 0171 # Create VCS. 0172 vcs = None 0173 if op.version_control: 0174 if op.version_control not in available_vcs(flat=True): 0175 error_wcl(_("@info", 0176 "Unknown VCS '%(vcs)s' selected.", 0177 vcs=op.version_control)) 0178 vcs = make_vcs(op.version_control) 0179 0180 # Sanity checks on paths. 0181 paths = free_args 0182 if not vcs: 0183 if len(paths) != 2: 0184 error_wcl(_("@info", 0185 "Exactly two paths are needed for diffing.")) 0186 for path in paths: 0187 if not os.path.exists(path): 0188 error_wcl("path does not exist: %s" % path) 0189 p1, p2 = paths 0190 if (not ( (os.path.isfile(p1) and (os.path.isfile(p2))) 0191 or (os.path.isdir(p1) and (os.path.isdir(p2)))) 0192 ): 0193 error_wcl(_("@info", 0194 "Both paths must be either files or directories.")) 0195 else: 0196 # Default to current working dir if no paths given. 0197 paths = paths or ["."] 0198 for path in paths: 0199 if not os.path.exists(path): 0200 error_wcl(_("@info", 0201 "Path '%(path)s' does not exist.", 0202 path=path)) 0203 if not vcs.is_versioned(path): 0204 error_wcl(_("@info", 0205 "Path '%(path)s' is not under version control.", 0206 path=path)) 0207 0208 # Collect and pair PO files in given paths. 0209 # Each pair specification is in the form of 0210 # ((path1, path2), (vpath1, vpath2)) 0211 # where path* are the real paths, and vpath* the visual paths to be 0212 # presented in diff output. 0213 if not vcs: 0214 fpairs = collect_file_pairs(paths[0], paths[1], op.paired_only) 0215 pspecs = [(x, x) for x in fpairs] 0216 else: 0217 lst = op.revision and op.revision.split(":", 1) or [] 0218 if len(lst) > 2: 0219 error_wcl(_("@info", 0220 "Too many revisions given: %(revlist)s.", 0221 revspec=format_item_list(lst))) 0222 elif len(lst) == 2: 0223 revs = lst # diff between revisions 0224 elif len(lst) == 1: 0225 revs = [lst[0], None] # diff from revision to working copy 0226 else: 0227 revs = ["", None] # diff from head to working copy 0228 # Replace original paths with modified/added catalogs. 0229 paths_nc = [] 0230 for path in paths: 0231 for path in vcs.to_commit(path): 0232 if path.endswith(".po") or path.endswith(".pot"): 0233 paths_nc.append(path) 0234 paths = paths_nc 0235 paths.sort() 0236 pspecs = collect_pspecs_from_vcs(vcs, paths, revs, op.paired_only) 0237 0238 if not op.update_effort: 0239 ecat, ndiffed = diff_pairs(pspecs, op.do_merge, 0240 colorize=(not op.output), 0241 shdr=op.strip_headers, 0242 noobs=op.skip_obsolete, 0243 quiet=op.quiet) 0244 if ndiffed > 0: 0245 hmsgctxt = ecat.header.get_field_value(EDST.hmsgctxt_field) 0246 lines = [] 0247 msgs = list(ecat) 0248 if not op.strip_headers: 0249 msgs.insert(0, ecat.header.to_msg()) 0250 for msg in msgs: 0251 if op.strip_headers and msg.msgctxt == hmsgctxt: 0252 sepl = [] 0253 sepl += [msg.manual_comment[0]] 0254 sepl += msg.msgid.split("\n")[:2] 0255 lines.extend(["# %s\n" % x for x in sepl]) 0256 lines.append("\n") 0257 else: 0258 lines.extend(msg.to_lines(force=True, wrapf=ecat.wrapf())) 0259 diffstr = cjoin(lines)[:-1] # remove last newline 0260 if op.output: 0261 file = open(op.output, "w") 0262 file.write(diffstr.encode(ecat.encoding())) 0263 file.close() 0264 else: 0265 report(diffstr) 0266 else: 0267 updeff = pairs_update_effort(pspecs, quiet=op.quiet) 0268 ls = [] 0269 for kw, desc, val, fmtval in updeff: 0270 ls.append(_("@info", 0271 "%(quantity)s: %(value)s", 0272 quantity=desc, value=fmtval)) 0273 report("\n".join(ls)) 0274 0275 # Clean up. 0276 cleanup_tmppaths() 0277 0278 0279 def diff_pairs (pspecs, merge, 0280 colorize=False, wrem=True, wadd=True, shdr=False, noobs=False, 0281 quiet=False): 0282 0283 # Create diffs of messages. 0284 # Note: Headers will be collected and diffed after all messages, 0285 # to be able to check if any decoration to their message keys is needed. 0286 wrappings = {} 0287 ecat = Catalog("", create=True, monitored=False) 0288 hspecs = [] 0289 ndiffed = 0 0290 update_progress = None 0291 if len(pspecs) > 1 and not quiet: 0292 update_progress = init_file_progress([vp[1] for fp, vp in pspecs], 0293 addfmt=t_("@info:progress", "Diffing: %(file)s")) 0294 for fpaths, vpaths in pspecs: 0295 upprogf = None 0296 if update_progress: 0297 upprogf = lambda: update_progress(vpaths[1]) 0298 upprogf() 0299 # Quick check if files are binary equal. 0300 if fpaths[0] and fpaths[1] and filecmp.cmp(*fpaths): 0301 continue 0302 cats = [] 0303 for fpath in fpaths: 0304 try: 0305 cats.append(Catalog(fpath, create=True, monitored=False)) 0306 except: 0307 error_wcl(_("@info", 0308 "Cannot parse catalog '%(file)s'.", 0309 file=fpath), norem=[fpath]) 0310 tpos = len(ecat) 0311 cndiffed = diff_cats(cats[0], cats[1], ecat, 0312 merge, colorize, wrem, wadd, noobs, upprogf) 0313 hspecs.append(([not x.created() and x.header or None 0314 for x in cats], vpaths, tpos, cndiffed)) 0315 ndiffed += cndiffed 0316 # Collect and count wrapping policy used for to-catalog. 0317 wrapping = cats[1].wrapping() 0318 if wrapping not in wrappings: 0319 wrappings[wrapping] = 0 0320 wrappings[wrapping] += 1 0321 if update_progress: 0322 update_progress() 0323 0324 # Find appropriate length of context for header messages. 0325 hmsgctxt = get_msgctxt_for_headers(ecat) 0326 init_ediff_header(ecat.header, hmsgctxt=hmsgctxt) 0327 0328 # Create diffs of headers. 0329 # If some of the messages were diffed, 0330 # header must be added even if there is no difference. 0331 incpos = 0 0332 for hdrs, vpaths, pos, cndiffed in hspecs: 0333 ehmsg, anydiff = diff_hdrs(hdrs[0], hdrs[1], vpaths[0], vpaths[1], 0334 hmsgctxt, ecat, colorize) 0335 if anydiff or cndiffed: 0336 ecat.add(ehmsg, pos + incpos) 0337 incpos += 1 0338 # Add diffed headers to total count only if header stripping not in effect. 0339 if not shdr: 0340 ndiffed += incpos 0341 0342 # Set the most used wrapping policy for the ediff catalog. 0343 if wrappings: 0344 wrapping = sorted(list(wrappings.items()), key=lambda x: x[1])[-1][0] 0345 ecat.set_wrapping(wrapping) 0346 if wrapping is not None: 0347 ecat.header.set_field("X-Wrapping", ", ".join(wrapping)) 0348 0349 return ecat, ndiffed 0350 0351 0352 # Collect and pair catalogs as list [(fpath1, fpath2)]. 0353 # Where a pair cannot be found, empty string is given for path 0354 # (unless paired_only is True, when non-paired catalogs are ignored). 0355 def collect_file_pairs (dpath1, dpath2, paired_only): 0356 0357 if os.path.isfile(dpath1): 0358 return [(dpath1, dpath2)] 0359 0360 bysub1, bysub2 = list(map(collect_and_split_fpaths, (dpath1, dpath2))) 0361 0362 # Try to pair files by subdirectories. 0363 # FIXME: Can and should anything smarter be done? 0364 fpairs = [] 0365 subdirs = list(set(list(bysub1.keys()) + list(bysub2.keys()))) 0366 subdirs.sort() 0367 for subdir in subdirs: 0368 flinks1 = bysub1.get(subdir, {}) 0369 flinks2 = bysub2.get(subdir, {}) 0370 filenames = list(set(list(flinks1.keys()) + list(flinks2.keys()))) 0371 filenames.sort() 0372 for filename in filenames: 0373 fpath1 = flinks1.get(filename, "") 0374 fpath2 = flinks2.get(filename, "") 0375 if not paired_only or (fpath1 and fpath2): 0376 fpairs.append((fpath1, fpath2)) 0377 0378 return fpairs 0379 0380 0381 # Collect all catalog paths in given root, and construct mapping 0382 # {subdir: {filename: path}}, where subdir is relative to root. 0383 def collect_and_split_fpaths (dpath): 0384 0385 dpath = dpath.rstrip(os.path.sep) + os.path.sep 0386 fpaths = collect_catalogs(dpath) 0387 bysub = {} 0388 for fpath in fpaths: 0389 if not fpath.startswith(dpath): 0390 error_wcl(_("@info", 0391 "Internal problem with path collection (200).")) 0392 subdir = os.path.dirname(fpath[len(dpath):]) 0393 if subdir not in bysub: 0394 bysub[subdir] = {} 0395 bysub[subdir][os.path.basename(fpath)] = fpath 0396 0397 return bysub 0398 0399 0400 def collect_pspecs_from_vcs (vcs, paths, revs, paired_only): 0401 0402 pspecs = [] 0403 # FIXME: Use tempfile module. 0404 expref = "/tmp/poediff-export-" 0405 exind = 0 0406 for path in paths: 0407 expaths = {} 0408 for rev in revs: 0409 if rev is None: 0410 expaths[rev] = path 0411 else: 0412 expath = expref + "%d-%d-%s" % (os.getpid(), exind, rev) 0413 exind += 1 0414 if os.path.isfile(path): 0415 expath += ".po" 0416 if not vcs.export(path, rev or None, expath): 0417 error_wcl(_("@info", 0418 "Cannot export path '%(path)s' " 0419 "in revision '%(rev)s'.", 0420 path=path, rev=rev)) 0421 record_tmppath(expath) 0422 expaths[rev] = expath 0423 expaths = [os.path.normpath(expaths[x]) for x in revs] 0424 fpairs = collect_file_pairs(expaths[0], expaths[1], paired_only) 0425 for fpair in fpairs: 0426 fpaths = [] 0427 vpaths = [] 0428 for fpath, expath, rev in zip(fpair, expaths, revs): 0429 if rev is not None: 0430 if not fpath: 0431 fpath_m = "" 0432 elif os.path.isdir(path): 0433 fpath_m = fpath[len(expath) + len(os.path.sep):] 0434 fpath_m = os.path.join(path, fpath_m) 0435 else: 0436 fpath_m = path 0437 rev_m = rev or vcs.revision(path) 0438 vpath = fpath_m + EDST.filerev_sep + rev_m 0439 else: 0440 vpath = fpath 0441 fpaths.append(fpath) 0442 vpaths.append(vpath) 0443 pspecs.append((fpaths, vpaths)) 0444 0445 return pspecs 0446 0447 0448 def pairs_update_effort (pspecs, quiet=False): 0449 0450 update_progress = None 0451 if len(pspecs) > 1 and not quiet: 0452 update_progress = init_file_progress([vp[1] for fp, vp in pspecs], 0453 addfmt=t_("@info:progress", "Diffing: %(file)s")) 0454 nntw_total = 0.0 0455 for fpaths, vpaths in pspecs: 0456 upprogf = None 0457 if update_progress: 0458 upprogf = lambda: update_progress(vpaths[1]) 0459 upprogf() 0460 # Quick check if files are binary equal. 0461 if fpaths[0] and fpaths[1] and filecmp.cmp(*fpaths): 0462 continue 0463 cats = [] 0464 for fpath in fpaths: 0465 try: 0466 cats.append(Catalog(fpath, create=True, monitored=False)) 0467 except: 0468 error_wcl(_("@info", 0469 "Cannot parse catalog '%(file)s'.", 0470 file=fpath), norem=[fpath]) 0471 nntw = cats_update_effort(cats[0], cats[1], upprogf) 0472 nntw_total += nntw 0473 if update_progress: 0474 update_progress() 0475 0476 updeff = [ 0477 ("nntw", _("@item", "nominal newly translated words"), 0478 nntw_total, "%.0f" % nntw_total), 0479 ] 0480 return updeff 0481 0482 0483 # Cleanup of temporary paths. 0484 _tmppaths = set() 0485 0486 def record_tmppath (path): 0487 0488 _tmppaths.add(path) 0489 0490 0491 def cleanup_tmppaths (norem=set()): 0492 0493 for path in _tmppaths: 0494 if path in norem: 0495 continue 0496 if os.path.isfile(path): 0497 os.unlink(path) 0498 elif os.path.isdir(path): 0499 shutil.rmtree(path) 0500 0501 0502 def error_wcl (msg, norem=set()): 0503 0504 if not isinstance(norem, set): 0505 norem = set(norem) 0506 cleanup_tmppaths(norem) 0507 error(msg) 0508 0509 0510 if __name__ == '__main__': 0511 exit_on_exception(main, cleanup_tmppaths)