File indexing completed on 2024-04-21 16:29:16

0001 # -*- coding: UTF-8 -*-
0002 
0003 """
0004 Merge PO files.
0005 
0006 @author: Chusslove Illich (Часлав Илић) <caslav.ilic@gmx.net>
0007 @license: GPLv3
0008 """
0009 
0010 import os
0011 import shutil
0012 from tempfile import NamedTemporaryFile
0013 
0014 from pology import PologyError, _, n_
0015 from pology.catalog import Catalog
0016 from pology.diff import editprob
0017 from pology.fsops import unicode_to_str
0018 from pology.message import Message
0019 from pology.split import proper_words
0020 
0021 
0022 def merge_pofile (catpath, tplpath,
0023                   outpath=None, update=False, wrapping=None,
0024                   fuzzymatch=True, cmppaths=None, quiet=False,
0025                   fuzzex=False, minwnex=0, minasfz=0.0, refuzzy=False,
0026                   getcat=False, monitored=True,
0027                   ignpotdate=False, abort=False):
0028     """
0029     Merge a PO file with the PO template.
0030 
0031     This function is a frontend to C{msgmerge} command,
0032     providing some additional features on demand.
0033 
0034     This function is usually used in one of three ways:
0035       - create a new PO file: the path is given with C{outpath} parameter
0036       - update the original PO file: C{update} is set to C{True}
0037             and C{outpath} is not given
0038       - only get merged catalog object: C{getcat} is set to C{True} and
0039             neither C{outpath} nor C{update} are issued;
0040             no PO file gets created or modified (except for temporaries,
0041             which are cleaned up on return)
0042       - check whether merging is possible: neither of C{outpath},
0043             C{update}, or C{getcat} are issued;
0044             if C{True} is returned, merging succedded.
0045 
0046     The return value differs based on C{getcat}.
0047     If C{getcat} is C{False}, the return value is C{True} if merging
0048     succedded (C{msgmerge} exited normally), and C{False} if not.
0049     If C{getcat} is C{True}, a catalog object on the merged catalog
0050     is returned if the merging succedded, and C{None} if not.
0051     However, if C{abort} is set to C{True}, if C{msgmerge} fails
0052     the program aborts with an error message.
0053 
0054     When catalog object is returned, its sync state is undefined.
0055     If it needs to be in sync before use, it should be synced manually.
0056 
0057     @param catpath: path to PO file to merge
0058     @type catpath: string
0059     @param tplpath: path to PO template
0060     @type tplpath: string
0061     @param outpath: path to output PO file
0062     @type outpath: string
0063     @param update: whether to update the PO file in place
0064     @type update: bool
0065     @param wrapping: the wrapping policy (see the parameter of the same name
0066         to L{catalog constructor<catalog.Catalog>})
0067     @type wrapping: sequence of strings
0068     @param fuzzymatch: whether to perform fuzzy matching
0069     @type fuzzymatch: bool
0070     @param cmppaths: paths to compendium files to be used on merging
0071     @type cmppaths: sequence of strings
0072     @param quiet: whether C{msgmerge} should operate quietly
0073     @type quiet: bool
0074     @param fuzzex: whether to fuzzy exact matches from compendia
0075     @type fuzzex: bool
0076     @param minwnex: minimal number of words in the original in exact match
0077         from compendia to not fuzzy the message (a very large number
0078         approximates C{fuzzex} set to C{True}).
0079     @type minwnex: int
0080     @param refuzzy: whether to "rebase" fuzzy messages, i.e. remove prior
0081         to merging those fuzzy messages whose translated counterparts
0082         (determined by previous fields) still exist in the catalog.
0083         This puts possibly newer translation into such messages,
0084         or even leads to a better fuzzy match.
0085     @type refuzzy: bool
0086     @param getcat: whether to return catalog object on merged file
0087     @type getcat: L{Catalog<catalog.Catalog>} or C{None}
0088     @param monitored: if C{getcat} is in effect, whether to open catalog
0089         in monitoring mode (like the parameter to catalog constructor)
0090     @type monitored: bool
0091     @param ignpotdate: whether to ignore changed C{POT-Creation-Date}
0092         if there were no other changes, resetting it to original value
0093     @type ignpotdate: bool
0094     @param abort: whether to abort execution if C{msgmerge} fails
0095     @type abort: bool
0096 
0097     @returns: whether merging succedded, or catalog object
0098     @rtype: bool or L{Catalog<catalog.Catalog>} or C{None}
0099     """
0100 
0101     if wrapping is not None:
0102         wrap = "basic" in wrapping
0103         otherwrap = set(wrapping).difference(["basic"])
0104     else:
0105         wrap = True
0106         otherwrap = False
0107 
0108     # Store original catalog if change in template creation date
0109     # alone should be ignored, for check at the end.
0110     if ignpotdate:
0111         orig_cat = Catalog(catpath, monitored=False)
0112 
0113     # Determine which special operations are to be done.
0114     correct_exact_matches = cmppaths and (fuzzex or minwnex > 0)
0115     correct_fuzzy_matches = minasfz > 0.0
0116     rebase_existing_fuzzies = refuzzy and fuzzymatch
0117 
0118     # Pre-process catalog if necessary.
0119     if correct_exact_matches or rebase_existing_fuzzies:
0120         may_modify = rebase_existing_fuzzies
0121         cat = Catalog(catpath, monitored=may_modify)
0122 
0123         # In case compendium is being used,
0124         # collect keys of all non-translated messages,
0125         # to later check which exact matches need to be fuzzied.
0126         # New non-translated messages can come from the template,
0127         # make sure these too are taken into account.
0128         if correct_exact_matches:
0129             nontrkeys = set()
0130             trkeys = set()
0131             for msg in cat:
0132                 if not msg.translated:
0133                     nontrkeys.add(msg.key)
0134                 else:
0135                     trkeys.add(msg.key)
0136             tcat = Catalog(tplpath, monitored=False)
0137             for msg in tcat:
0138                 if msg.key not in trkeys:
0139                     nontrkeys.add(msg.key)
0140 
0141         # If requested, remove all untranslated messages,
0142         # and replace every fuzzy message which has previous fields
0143         # with a dummy previous translated message
0144         # (unless such message already exists in the catalog).
0145         # This way, untranslated messages will get fuzzy matched again,
0146         # and fuzzy messages may get updated translation.
0147         # However, do not do this for messages where a previous translated
0148         # message does already exist in the catalog, is fuzzy, and
0149         # does not have previous fields, since then that one will be
0150         # fuzzy matched and propagate its lack of previous fields.
0151         if rebase_existing_fuzzies:
0152             rebase_dummy_messages = []
0153             for msg in cat:
0154                 if msg.untranslated:
0155                     cat.remove_on_sync(msg)
0156                 elif msg.fuzzy and msg.msgid_previous:
0157                     omsgs = cat.select_by_key(msg.msgctxt_previous,
0158                                               msg.msgid_previous)
0159                     if (   not omsgs
0160                         or not omsgs[0].fuzzy
0161                         or omsgs[0].msgid_previous is not None
0162                     ):
0163                         cat.remove_on_sync(msg)
0164                     if not omsgs:
0165                         dmsg = Message()
0166                         dmsg.msgctxt = msg.msgctxt_previous
0167                         dmsg.msgid = msg.msgid_previous
0168                         dmsg.msgid_plural = msg.msgid_plural_previous
0169                         dmsg.msgstr = msg.msgstr
0170                         cat.add_last(dmsg)
0171                         rebase_dummy_messages.append(dmsg)
0172 
0173         if may_modify:
0174             cat.sync()
0175 
0176     # Prepare temporary file if output path not given and not in update mode.
0177     if not outpath and not update:
0178         tmpf = NamedTemporaryFile(prefix="pology-merged-", suffix=".po")
0179         outpath = tmpf.name
0180 
0181     # Merge.
0182     opts = []
0183     if not update:
0184         opts.append("--output-file %s" % outpath)
0185     else:
0186         opts.append("--update")
0187         opts.append("--backup none")
0188     if fuzzymatch:
0189         opts.append("--previous")
0190     else:
0191         opts.append("--no-fuzzy-matching")
0192     if not wrap:
0193         opts.append("--no-wrap")
0194     for cmppath in (cmppaths or []):
0195         if not os.path.isfile(cmppath):
0196             raise PologyError(
0197                 _("@info",
0198                   "Compendium does not exist at '%(path)s'.",
0199                   path=cmppath))
0200         opts.append("--compendium %s" % cmppath)
0201     if quiet:
0202         opts.append("--quiet")
0203     fmtopts = " ".join(opts)
0204     cmdline = "msgmerge %s %s %s" % (fmtopts, catpath, tplpath)
0205     mrgres = os.system(unicode_to_str(cmdline))
0206     if mrgres != 0:
0207         if abort:
0208             raise PologyError(
0209                 _("@info",
0210                   "Cannot merge PO file '%(file1)s' with template '%(file2)s'.",
0211                   file1=catpath, file2=tplpath))
0212         return None if getcat else False
0213 
0214     # If the catalog had only header and no messages,
0215     # msgmerge will not write out anything.
0216     # In such case, just copy the initial file to output path.
0217     if outpath and not os.path.isfile(outpath):
0218         shutil.copyfile(catpath, outpath)
0219     # If both the output path has been given and update requested,
0220     # copy the output file over the initial file.
0221     if update and outpath and catpath != outpath:
0222         shutil.copyfile(outpath, catpath)
0223 
0224     # Post-process merged catalog if necessary.
0225     if (   getcat or otherwrap or correct_exact_matches
0226         or correct_fuzzy_matches or ignpotdate or rebase_existing_fuzzies
0227     ):
0228         # If fine wrapping requested and catalog should not be returned,
0229         # everything has to be reformatted, so no need to monitor the catalog.
0230         catpath1 = outpath or catpath
0231         monitored1 = monitored if getcat else (not otherwrap)
0232         cat = Catalog(catpath1, monitored=monitored1, wrapping=wrapping)
0233 
0234         # In case compendium is being used,
0235         # make fuzzy exact matches which do not pass the word limit.
0236         if correct_exact_matches:
0237             acc = cat.accelerator()
0238             for msg in cat:
0239                 if (    msg.key in nontrkeys and msg.translated
0240                     and (   fuzzex
0241                          or len(proper_words(msg.msgid, accels=acc)) < minwnex)
0242                 ):
0243                     msg.fuzzy = True
0244                     msg.msgctxt_previous = msg.msgctxt
0245                     msg.msgid_previous = msg.msgid
0246                     msg.msgid_plural_previous = msg.msgid_plural
0247 
0248         # Eliminate fuzzy matches not passing the adjusted similarity limit.
0249         if correct_fuzzy_matches:
0250             for msg in cat:
0251                 if msg.fuzzy and msg.msgid_previous is not None:
0252                     if editprob(msg.msgid_previous, msg.msgid) < minasfz:
0253                         msg.clear()
0254 
0255         # Revert template creation date change if it was the only change.
0256         if ignpotdate:
0257             fname = "POT-Creation-Date"
0258             orig_potdate = orig_cat.header.get_field_value(fname)
0259             new_potdate = cat.header.get_field_value(fname)
0260             cat.header.replace_field_value(fname, orig_potdate)
0261             if cat != orig_cat:
0262                 cat.header.replace_field_value(fname, new_potdate)
0263 
0264         # Remove dummy messages added for rebasing of fuzzy messages
0265         # that were obsoleted instead of promoted to fuzzy.
0266         if rebase_existing_fuzzies:
0267             for dmsg in rebase_dummy_messages:
0268                 if dmsg in cat and cat[dmsg].obsolete:
0269                     cat.remove_on_sync(dmsg)
0270 
0271         if not getcat:
0272             cat.sync(force=otherwrap)
0273 
0274     return cat if getcat else True
0275