File indexing completed on 2024-11-10 11:08:51

0001 #!/usr/bin/env python3
0002 # -*- coding: UTF-8 -*-
0003 
0004 """
0005 Create embedded diffs of PO files.
0006 
0007 Documented in C{doc/user/diffpatch.docbook#sec-dppatch}.
0008 
0009 @author: Chusslove Illich (Часлав Илић) <caslav.ilic@gmx.net>
0010 @license: GPLv3
0011 """
0012 
0013 import filecmp
0014 import locale
0015 import os
0016 import shutil
0017 import sys
0018 
0019 try:
0020     import fallback_import_paths
0021 except:
0022     pass
0023 
0024 from pology import version, _, n_, t_
0025 from pology.catalog import Catalog
0026 from pology.message import MessageUnsafe
0027 from pology.colors import ColorOptionParser, set_coloring_globals, cjoin
0028 import pology.config as pology_config
0029 from pology.fsops import str_to_unicode, collect_catalogs
0030 from pology.fsops import exit_on_exception
0031 from pology.diff import msg_ediff
0032 from pology.report import error, warning, report, format_item_list
0033 from pology.report import list_options
0034 from pology.report import init_file_progress
0035 from pology.stdcmdopt import add_cmdopt_colors
0036 from pology.vcs import available_vcs, make_vcs
0037 
0038 from pology.internal.poediffpatch import MPC, EDST
0039 from pology.internal.poediffpatch import msg_eq_fields, msg_copy_fields
0040 from pology.internal.poediffpatch import msg_clear_prev_fields
0041 from pology.internal.poediffpatch import diff_cats, diff_hdrs
0042 from pology.internal.poediffpatch import init_ediff_header
0043 from pology.internal.poediffpatch import get_msgctxt_for_headers
0044 from pology.internal.poediffpatch import cats_update_effort
0045 
0046 
0047 def main ():
0048 
0049     locale.setlocale(locale.LC_ALL, "")
0050 
0051     # Get defaults for command line options from global config.
0052     cfgsec = pology_config.section("poediff")
0053     def_do_merge = cfgsec.boolean("merge", True)
0054 
0055     # Setup options and parse the command line.
0056     usage = _("@info command usage",
0057         "%(cmd)s [OPTIONS] FILE1 FILE2\n"
0058         "%(cmd)s [OPTIONS] DIR1 DIR2\n"
0059         "%(cmd)s -c VCS [OPTIONS] [PATHS...]",
0060         cmd="%prog")
0061     desc = _("@info command description",
0062         "Create embedded diffs of PO files.")
0063     ver = _("@info command version",
0064         "%(cmd)s (Pology) %(version)s\n"
0065         "Copyright © 2009, 2010 "
0066         "Chusslove Illich (Часлав Илић) &lt;%(email)s&gt;",
0067         cmd="%prog", version=version(), email="caslav.ilic@gmx.net")
0068 
0069     showvcs = list(set(available_vcs()).difference(["none"]))
0070     showvcs.sort()
0071 
0072     opars = ColorOptionParser(usage=usage, description=desc, version=ver)
0073     opars.add_option(
0074         "-b", "--skip-obsolete",
0075         action="store_true", dest="skip_obsolete", default=False,
0076         help=_("@info command line option description",
0077                "Do not diff obsolete messages."))
0078     opars.add_option(
0079         "-c", "--vcs",
0080         metavar=_("@info command line value placeholder", "VCS"),
0081         dest="version_control",
0082         help=_("@info command line option description",
0083                "Paths are under version control by given VCS; "
0084                "can be one of: %(vcslist)s.",
0085                vcslist=format_item_list(showvcs)))
0086     opars.add_option(
0087         "--list-options",
0088         action="store_true", dest="list_options", default=False,
0089         help=_("@info command line option description",
0090                "List the names of available options."))
0091     opars.add_option(
0092         "--list-vcs",
0093         action="store_true", dest="list_vcs", default=False,
0094         help=_("@info command line option description",
0095                "List the keywords of known version control systems."))
0096     opars.add_option(
0097         "-n", "--no-merge",
0098         action="store_false", dest="do_merge", default=def_do_merge,
0099         help=_("@info command line option description",
0100                "Do not try to indirectly pair messages by merging catalogs."))
0101     opars.add_option(
0102         "-o", "--output",
0103         metavar=_("@info command line value placeholder", "POFILE"),
0104         dest="output",
0105         help=_("@info command line option description",
0106                "Output diff catalog to a file instead of stdout."))
0107     opars.add_option(
0108         "-p", "--paired-only",
0109         action="store_true", dest="paired_only", default=False,
0110         help=_("@info command line option description",
0111                "When two directories are diffed, ignore catalogs which "
0112                "are not present in both directories."))
0113     opars.add_option(
0114         "-q", "--quiet",
0115         action="store_true", dest="quiet", default=False,
0116         help=_("@info command line option description",
0117                "Do not display any progress info."))
0118     opars.add_option(
0119         "-Q", "--quick",
0120         action="store_true", dest="quick", default=False,
0121         help=_("@info command line option description",
0122                "Equivalent to %(opt)s.",
0123                opt="-bns"))
0124     opars.add_option(
0125         "-r", "--revision",
0126         metavar=_("@info command line value placeholder", "REV1[:REV2]"),
0127         dest="revision",
0128         help=_("@info command line option description",
0129                "Revision from which to diff to current working copy, "
0130                "or from first to second revision (if VCS is given)."))
0131     opars.add_option(
0132         "-s", "--strip-headers",
0133         action="store_true", dest="strip_headers", default=False,
0134         help=_("@info command line option description",
0135                "Do not diff headers and do not write out the top header "
0136                "(resulting output cannot be used as patch)."))
0137     opars.add_option(
0138         "-U", "--update-effort",
0139         action="store_true", dest="update_effort", default=False,
0140         help=_("@info command line option description",
0141                "Instead of outputting the diff, calculate and output "
0142                "an estimate of the effort that was needed to update "
0143                "the translation from old to new paths. "
0144                "Ignores %(opt1)s and %(opt1)s options.",
0145                opt1="-b", opt2="-n"))
0146     add_cmdopt_colors(opars)
0147 
0148     (op, free_args) = opars.parse_args(str_to_unicode(sys.argv[1:]))
0149 
0150     if op.list_options:
0151         report(list_options(opars))
0152         sys.exit(0)
0153     if op.list_vcs:
0154         report("\n".join(showvcs))
0155         sys.exit(0)
0156 
0157     # Could use some speedup.
0158     try:
0159         import psyco
0160         psyco.full()
0161     except ImportError:
0162         pass
0163 
0164     set_coloring_globals(ctype=op.coloring_type, outdep=(not op.raw_colors))
0165 
0166     if op.quick:
0167         op.do_merge = False
0168         op.skip_obsolete = True
0169         op.strip_headers = True
0170 
0171     # Create VCS.
0172     vcs = None
0173     if op.version_control:
0174         if op.version_control not in available_vcs(flat=True):
0175             error_wcl(_("@info",
0176                         "Unknown VCS '%(vcs)s' selected.",
0177                         vcs=op.version_control))
0178         vcs = make_vcs(op.version_control)
0179 
0180     # Sanity checks on paths.
0181     paths = free_args
0182     if not vcs:
0183         if len(paths) != 2:
0184             error_wcl(_("@info",
0185                         "Exactly two paths are needed for diffing."))
0186         for path in paths:
0187             if not os.path.exists(path):
0188                 error_wcl("path does not exist: %s" % path)
0189         p1, p2 = paths
0190         if (not (   (os.path.isfile(p1) and (os.path.isfile(p2)))
0191                  or (os.path.isdir(p1) and (os.path.isdir(p2))))
0192         ):
0193             error_wcl(_("@info",
0194                         "Both paths must be either files or directories."))
0195     else:
0196         # Default to current working dir if no paths given.
0197         paths = paths or ["."]
0198         for path in paths:
0199             if not os.path.exists(path):
0200                 error_wcl(_("@info",
0201                             "Path '%(path)s' does not exist.",
0202                             path=path))
0203             if not vcs.is_versioned(path):
0204                 error_wcl(_("@info",
0205                             "Path '%(path)s' is not under version control.",
0206                             path=path))
0207 
0208     # Collect and pair PO files in given paths.
0209     # Each pair specification is in the form of
0210     # ((path1, path2), (vpath1, vpath2))
0211     # where path* are the real paths, and vpath* the visual paths to be
0212     # presented in diff output.
0213     if not vcs:
0214         fpairs = collect_file_pairs(paths[0], paths[1], op.paired_only)
0215         pspecs = [(x, x) for x in fpairs]
0216     else:
0217         lst = op.revision and op.revision.split(":", 1) or []
0218         if len(lst) > 2:
0219             error_wcl(_("@info",
0220                         "Too many revisions given: %(revlist)s.",
0221                         revspec=format_item_list(lst)))
0222         elif len(lst) == 2:
0223             revs = lst # diff between revisions
0224         elif len(lst) == 1:
0225             revs = [lst[0], None] # diff from revision to working copy
0226         else:
0227             revs = ["", None] # diff from head to working copy
0228             # Replace original paths with modified/added catalogs.
0229             paths_nc = []
0230             for path in paths:
0231                 for path in vcs.to_commit(path):
0232                     if path.endswith(".po") or path.endswith(".pot"):
0233                         paths_nc.append(path)
0234             paths = paths_nc
0235             paths.sort()
0236         pspecs = collect_pspecs_from_vcs(vcs, paths, revs, op.paired_only)
0237 
0238     if not op.update_effort:
0239         ecat, ndiffed = diff_pairs(pspecs, op.do_merge,
0240                                    colorize=(not op.output),
0241                                    shdr=op.strip_headers,
0242                                    noobs=op.skip_obsolete,
0243                                    quiet=op.quiet)
0244         if ndiffed > 0:
0245             hmsgctxt = ecat.header.get_field_value(EDST.hmsgctxt_field)
0246             lines = []
0247             msgs = list(ecat)
0248             if not op.strip_headers:
0249                 msgs.insert(0, ecat.header.to_msg())
0250             for msg in msgs:
0251                 if op.strip_headers and msg.msgctxt == hmsgctxt:
0252                     sepl = []
0253                     sepl += [msg.manual_comment[0]]
0254                     sepl += msg.msgid.split("\n")[:2]
0255                     lines.extend(["# %s\n" % x for x in sepl])
0256                     lines.append("\n")
0257                 else:
0258                     lines.extend(msg.to_lines(force=True, wrapf=ecat.wrapf()))
0259             diffstr = cjoin(lines)[:-1] # remove last newline
0260             if op.output:
0261                 file = open(op.output, "w")
0262                 file.write(diffstr.encode(ecat.encoding()))
0263                 file.close()
0264             else:
0265                 report(diffstr)
0266     else:
0267         updeff = pairs_update_effort(pspecs, quiet=op.quiet)
0268         ls = []
0269         for kw, desc, val, fmtval in updeff:
0270             ls.append(_("@info",
0271                         "%(quantity)s: %(value)s",
0272                         quantity=desc, value=fmtval))
0273         report("\n".join(ls))
0274 
0275     # Clean up.
0276     cleanup_tmppaths()
0277 
0278 
0279 def diff_pairs (pspecs, merge,
0280                 colorize=False, wrem=True, wadd=True, shdr=False, noobs=False,
0281                 quiet=False):
0282 
0283     # Create diffs of messages.
0284     # Note: Headers will be collected and diffed after all messages,
0285     # to be able to check if any decoration to their message keys is needed.
0286     wrappings = {}
0287     ecat = Catalog("", create=True, monitored=False)
0288     hspecs = []
0289     ndiffed = 0
0290     update_progress = None
0291     if len(pspecs) > 1 and not quiet:
0292         update_progress = init_file_progress([vp[1] for fp, vp in pspecs],
0293                             addfmt=t_("@info:progress", "Diffing: %(file)s"))
0294     for fpaths, vpaths in pspecs:
0295         upprogf = None
0296         if update_progress:
0297             upprogf = lambda: update_progress(vpaths[1])
0298             upprogf()
0299         # Quick check if files are binary equal.
0300         if fpaths[0] and fpaths[1] and filecmp.cmp(*fpaths):
0301             continue
0302         cats = []
0303         for fpath in fpaths:
0304             try:
0305                 cats.append(Catalog(fpath, create=True, monitored=False))
0306             except:
0307                 error_wcl(_("@info",
0308                             "Cannot parse catalog '%(file)s'.",
0309                             file=fpath), norem=[fpath])
0310         tpos = len(ecat)
0311         cndiffed = diff_cats(cats[0], cats[1], ecat,
0312                              merge, colorize, wrem, wadd, noobs, upprogf)
0313         hspecs.append(([not x.created() and x.header or None
0314                         for x in cats], vpaths, tpos, cndiffed))
0315         ndiffed += cndiffed
0316         # Collect and count wrapping policy used for to-catalog.
0317         wrapping = cats[1].wrapping()
0318         if wrapping not in wrappings:
0319             wrappings[wrapping] = 0
0320         wrappings[wrapping] += 1
0321     if update_progress:
0322         update_progress()
0323 
0324     # Find appropriate length of context for header messages.
0325     hmsgctxt = get_msgctxt_for_headers(ecat)
0326     init_ediff_header(ecat.header, hmsgctxt=hmsgctxt)
0327 
0328     # Create diffs of headers.
0329     # If some of the messages were diffed,
0330     # header must be added even if there is no difference.
0331     incpos = 0
0332     for hdrs, vpaths, pos, cndiffed in hspecs:
0333         ehmsg, anydiff = diff_hdrs(hdrs[0], hdrs[1], vpaths[0], vpaths[1],
0334                                    hmsgctxt, ecat, colorize)
0335         if anydiff or cndiffed:
0336             ecat.add(ehmsg, pos + incpos)
0337             incpos += 1
0338     # Add diffed headers to total count only if header stripping not in effect.
0339     if not shdr:
0340         ndiffed += incpos
0341 
0342     # Set the most used wrapping policy for the ediff catalog.
0343     if wrappings:
0344         wrapping = sorted(list(wrappings.items()), key=lambda x: x[1])[-1][0]
0345         ecat.set_wrapping(wrapping)
0346         if wrapping is not None:
0347             ecat.header.set_field("X-Wrapping", ", ".join(wrapping))
0348 
0349     return ecat, ndiffed
0350 
0351 
0352 # Collect and pair catalogs as list [(fpath1, fpath2)].
0353 # Where a pair cannot be found, empty string is given for path
0354 # (unless paired_only is True, when non-paired catalogs are ignored).
0355 def collect_file_pairs (dpath1, dpath2, paired_only):
0356 
0357     if os.path.isfile(dpath1):
0358         return [(dpath1, dpath2)]
0359 
0360     bysub1, bysub2 = list(map(collect_and_split_fpaths, (dpath1, dpath2)))
0361 
0362     # Try to pair files by subdirectories.
0363     # FIXME: Can and should anything smarter be done?
0364     fpairs = []
0365     subdirs = list(set(list(bysub1.keys()) + list(bysub2.keys())))
0366     subdirs.sort()
0367     for subdir in subdirs:
0368         flinks1 = bysub1.get(subdir, {})
0369         flinks2 = bysub2.get(subdir, {})
0370         filenames = list(set(list(flinks1.keys()) + list(flinks2.keys())))
0371         filenames.sort()
0372         for filename in filenames:
0373             fpath1 = flinks1.get(filename, "")
0374             fpath2 = flinks2.get(filename, "")
0375             if not paired_only or (fpath1 and fpath2):
0376                 fpairs.append((fpath1, fpath2))
0377 
0378     return fpairs
0379 
0380 
0381 # Collect all catalog paths in given root, and construct mapping
0382 # {subdir: {filename: path}}, where subdir is relative to root.
0383 def collect_and_split_fpaths (dpath):
0384 
0385     dpath = dpath.rstrip(os.path.sep) + os.path.sep
0386     fpaths = collect_catalogs(dpath)
0387     bysub = {}
0388     for fpath in fpaths:
0389         if not fpath.startswith(dpath):
0390             error_wcl(_("@info",
0391                         "Internal problem with path collection (200)."))
0392         subdir = os.path.dirname(fpath[len(dpath):])
0393         if subdir not in bysub:
0394             bysub[subdir] = {}
0395         bysub[subdir][os.path.basename(fpath)] = fpath
0396 
0397     return bysub
0398 
0399 
0400 def collect_pspecs_from_vcs (vcs, paths, revs, paired_only):
0401 
0402     pspecs = []
0403     # FIXME: Use tempfile module.
0404     expref = "/tmp/poediff-export-"
0405     exind = 0
0406     for path in paths:
0407         expaths = {}
0408         for rev in revs:
0409             if rev is None:
0410                 expaths[rev] = path
0411             else:
0412                 expath = expref + "%d-%d-%s" % (os.getpid(), exind, rev)
0413                 exind += 1
0414                 if os.path.isfile(path):
0415                     expath += ".po"
0416                 if not vcs.export(path, rev or None, expath):
0417                     error_wcl(_("@info",
0418                                 "Cannot export path '%(path)s' "
0419                                 "in revision '%(rev)s'.",
0420                                 path=path, rev=rev))
0421                 record_tmppath(expath)
0422                 expaths[rev] = expath
0423         expaths = [os.path.normpath(expaths[x]) for x in revs]
0424         fpairs = collect_file_pairs(expaths[0], expaths[1], paired_only)
0425         for fpair in fpairs:
0426             fpaths = []
0427             vpaths = []
0428             for fpath, expath, rev in zip(fpair, expaths, revs):
0429                 if rev is not None:
0430                     if not fpath:
0431                         fpath_m = ""
0432                     elif os.path.isdir(path):
0433                         fpath_m = fpath[len(expath) + len(os.path.sep):]
0434                         fpath_m = os.path.join(path, fpath_m)
0435                     else:
0436                         fpath_m = path
0437                     rev_m = rev or vcs.revision(path)
0438                     vpath = fpath_m + EDST.filerev_sep + rev_m
0439                 else:
0440                     vpath = fpath
0441                 fpaths.append(fpath)
0442                 vpaths.append(vpath)
0443             pspecs.append((fpaths, vpaths))
0444 
0445     return pspecs
0446 
0447 
0448 def pairs_update_effort (pspecs, quiet=False):
0449 
0450     update_progress = None
0451     if len(pspecs) > 1 and not quiet:
0452         update_progress = init_file_progress([vp[1] for fp, vp in pspecs],
0453                             addfmt=t_("@info:progress", "Diffing: %(file)s"))
0454     nntw_total = 0.0
0455     for fpaths, vpaths in pspecs:
0456         upprogf = None
0457         if update_progress:
0458             upprogf = lambda: update_progress(vpaths[1])
0459             upprogf()
0460         # Quick check if files are binary equal.
0461         if fpaths[0] and fpaths[1] and filecmp.cmp(*fpaths):
0462             continue
0463         cats = []
0464         for fpath in fpaths:
0465             try:
0466                 cats.append(Catalog(fpath, create=True, monitored=False))
0467             except:
0468                 error_wcl(_("@info",
0469                             "Cannot parse catalog '%(file)s'.",
0470                             file=fpath), norem=[fpath])
0471         nntw = cats_update_effort(cats[0], cats[1], upprogf)
0472         nntw_total += nntw
0473     if update_progress:
0474         update_progress()
0475 
0476     updeff = [
0477         ("nntw", _("@item", "nominal newly translated words"),
0478          nntw_total, "%.0f" % nntw_total),
0479     ]
0480     return updeff
0481 
0482 
0483 # Cleanup of temporary paths.
0484 _tmppaths = set()
0485 
0486 def record_tmppath (path):
0487 
0488     _tmppaths.add(path)
0489 
0490 
0491 def cleanup_tmppaths (norem=set()):
0492 
0493     for path in _tmppaths:
0494         if path in norem:
0495             continue
0496         if os.path.isfile(path):
0497             os.unlink(path)
0498         elif os.path.isdir(path):
0499             shutil.rmtree(path)
0500 
0501 
0502 def error_wcl (msg, norem=set()):
0503 
0504     if not isinstance(norem, set):
0505         norem = set(norem)
0506     cleanup_tmppaths(norem)
0507     error(msg)
0508 
0509 
0510 if __name__ == '__main__':
0511     exit_on_exception(main, cleanup_tmppaths)