Warning, /sdk/pology/bin/poselfmerge is written in an unsupported language. File is not indexed.
0001 #!/usr/bin/env python3
0002 # -*- coding: UTF-8 -*-
0003
0004 """
0005 Merge PO file with itself or compendium,
0006 to produce fuzzy matches on similar messages.
0007
0008 Documented in C{doc/user/misctools.docbook#sec-miselfmerge}.
0009
0010 @author: Chusslove Illich (Часлав Илић) <caslav.ilic@gmx.net>
0011 @license: GPLv3
0012 """
0013
0014 import locale
0015 import os
0016 import shutil
0017 import sys
0018
0019 try:
0020 import fallback_import_paths
0021 except:
0022 pass
0023
0024 from pology import version, _, n_
0025 from pology.catalog import Catalog
0026 from pology.message import MessageUnsafe
0027 from pology.colors import ColorOptionParser
0028 import pology.config as pology_config
0029 from pology.fsops import collect_paths_cmdline, collect_catalogs
0030 from pology.fsops import exit_on_exception
0031 from pology.merge import merge_pofile
0032 from pology.report import report, error
0033 from pology.stdcmdopt import add_cmdopt_filesfrom, add_cmdopt_wrapping
0034 from pology.wrap import select_field_wrapping
0035
0036
0037 def main ():
0038
0039 locale.setlocale(locale.LC_ALL, "")
0040
0041 # Get defaults for command line options from global config.
0042 cfgsec = pology_config.section("poselfmerge")
0043 def_minwnex = cfgsec.integer("min-words-exact", 0)
0044 def_minasfz = cfgsec.real("min-adjsim-fuzzy", 0.0)
0045 def_fuzzex = cfgsec.boolean("fuzzy-exact", False)
0046 def_refuzz = cfgsec.boolean("rebase-fuzzies", False)
0047
0048 # Setup options and parse the command line.
0049 usage = _("@info command usage",
0050 "%(cmd)s [options] POFILE...",
0051 cmd="%prog")
0052 desc = _("@info command description",
0053 "Merge PO file with itself or compendium, "
0054 "to produce fuzzy matches on similar messages.")
0055 ver = _("@info command version",
0056 "%(cmd)s (Pology) %(version)s\n"
0057 "Copyright © 2009, 2010 "
0058 "Chusslove Illich (Часлав Илић) <%(email)s>",
0059 cmd="%prog", version=version(), email="caslav.ilic@gmx.net")
0060
0061 opars = ColorOptionParser(usage=usage, description=desc, version=ver)
0062 opars.add_option(
0063 "-A", "--min-adjsim-fuzzy",
0064 metavar=_("@info command line value placeholder", "RATIO"),
0065 action="store", dest="min_adjsim_fuzzy", default=def_minasfz,
0066 help=_("@info command line option description",
0067 "On fuzzy matches, the minimum adjusted similarity "
0068 "to accept the match, or else the message is left untranslated. "
0069 "Range is 0.0-1.0, where 0 means always to accept the match, "
0070 "and 1 never to accept; a practical range is 0.6-0.8."))
0071 opars.add_option(
0072 "-b", "--rebase-fuzzies",
0073 action="store_true", dest="rebase_fuzzies", default=def_refuzz,
0074 help=_("@info command line option description",
0075 "Before merging, clear those fuzzy messages whose predecessor "
0076 "(determined by previous fields) is still in the catalog."))
0077 opars.add_option(
0078 "-C", "--compendium",
0079 metavar=_("@info command line value placeholder", "POFILE"),
0080 action="append", dest="compendiums", default=[],
0081 help=_("@info command line option description",
0082 "Catalog with existing translations, to additionally use for "
0083 "direct and fuzzy matches. Can be repeated."))
0084 opars.add_option(
0085 "-v", "--verbose",
0086 action="store_true", dest="verbose", default=False,
0087 help=_("@info command line option description",
0088 "More detailed progress information."))
0089 opars.add_option(
0090 "-W", "--min-words-exact",
0091 metavar=_("@info command line value placeholder", "NUMBER"),
0092 action="store", dest="min_words_exact", default=def_minwnex,
0093 help=_("@info command line option description",
0094 "When using compendium, in case of exact match, "
0095 "minimum number of words that original text must have "
0096 "to accept translation without making it fuzzy. "
0097 "Zero means to always accept an exact match."))
0098 opars.add_option(
0099 "-x", "--fuzzy-exact",
0100 action="store_true", dest="fuzzy_exact", default=def_fuzzex,
0101 help=_("@info command line option description",
0102 "When using compendium, make all exact matches fuzzy."))
0103 add_cmdopt_wrapping(opars)
0104 add_cmdopt_filesfrom(opars)
0105
0106 (op, fargs) = opars.parse_args()
0107
0108 if len(fargs) < 1 and not op.files_from:
0109 error(_("@info", "No input files given."))
0110
0111 # Could use some speedup.
0112 try:
0113 import psyco
0114 psyco.full()
0115 except ImportError:
0116 pass
0117
0118 # Convert non-string options to needed types.
0119 try:
0120 op.min_words_exact = int(op.min_words_exact)
0121 except:
0122 error(_("@info",
0123 "Value to option %(opt)s must be an integer number, "
0124 "given '%(val)s' instead.",
0125 opt="--min-words-exact", val=op.min_words_exact))
0126 try:
0127 op.min_adjsim_fuzzy = float(op.min_adjsim_fuzzy)
0128 except:
0129 error(_("@info",
0130 "Value to option %(opt)s must be a real number, "
0131 "given '%(val)s' instead.",
0132 opt="--min-adjsim-fuzzy", val=op.min_ajdsim_fuzzy))
0133
0134 # Assemble list of files.
0135 fnames = collect_paths_cmdline(rawpaths=fargs,
0136 filesfrom=op.files_from,
0137 respathf=collect_catalogs,
0138 abort=True)
0139
0140 # Self-merge all catalogs.
0141 for fname in fnames:
0142 if op.verbose:
0143 report(_("@info:progress", "Self-merging: %(file)s", file=fname))
0144 self_merge_pofile(fname, op.compendiums,
0145 op.fuzzy_exact, op.min_words_exact,
0146 op.min_adjsim_fuzzy, op.rebase_fuzzies,
0147 cfgsec, op)
0148
0149
0150 def self_merge_pofile (catpath, compendiums=[],
0151 fuzzex=False, minwnex=0, minasfz=0.0, refuzzy=False,
0152 cfgsec=None, cmlopt=None):
0153
0154 # Create temporary files for merging.
0155 ext = ".tmp-selfmerge"
0156 catpath_mod = catpath + ext
0157 if ".po" in catpath:
0158 potpath = catpath.replace(".po", ".pot") + ext
0159 else:
0160 potpath = catpath + ".pot" + ext
0161 shutil.copyfile(catpath, catpath_mod)
0162 shutil.copyfile(catpath, potpath)
0163
0164 # Open catalog for pre-processing.
0165 cat = Catalog(potpath, monitored=False)
0166
0167 # Decide wrapping policy.
0168 wrapping = select_field_wrapping(cfgsec, cat, cmlopt)
0169
0170 # From the dummy template, clean all active messages and
0171 # remove all obsolete messages.
0172 for msg in cat:
0173 if msg.obsolete:
0174 cat.remove_on_sync(msg)
0175 else:
0176 msg.clear()
0177 cat.sync()
0178
0179 # Merge with dummy template.
0180 merge_pofile(catpath_mod, potpath, update=True, wrapping=wrapping,
0181 cmppaths=compendiums, fuzzex=fuzzex,
0182 minwnex=minwnex, minasfz=minasfz, refuzzy=refuzzy,
0183 abort=True)
0184
0185 # Overwrite original with temporary catalog.
0186 shutil.move(catpath_mod, catpath)
0187 os.unlink(potpath)
0188
0189
0190 if __name__ == '__main__':
0191 exit_on_exception(main)