File indexing completed on 2024-10-27 11:34:18
0001 # -*- coding: UTF-8 -*- 0002 0003 """ 0004 Merge PO files. 0005 0006 @author: Chusslove Illich (Часлав Илић) <caslav.ilic@gmx.net> 0007 @license: GPLv3 0008 """ 0009 0010 import os 0011 import shutil 0012 from tempfile import NamedTemporaryFile 0013 0014 from pology import PologyError, _, n_ 0015 from pology.catalog import Catalog 0016 from pology.diff import editprob 0017 from pology.fsops import unicode_to_str 0018 from pology.message import Message 0019 from pology.split import proper_words 0020 0021 0022 def merge_pofile (catpath, tplpath, 0023 outpath=None, update=False, wrapping=None, 0024 fuzzymatch=True, cmppaths=None, quiet=False, 0025 fuzzex=False, minwnex=0, minasfz=0.0, refuzzy=False, 0026 getcat=False, monitored=True, 0027 ignpotdate=False, abort=False): 0028 """ 0029 Merge a PO file with the PO template. 0030 0031 This function is a frontend to C{msgmerge} command, 0032 providing some additional features on demand. 0033 0034 This function is usually used in one of three ways: 0035 - create a new PO file: the path is given with C{outpath} parameter 0036 - update the original PO file: C{update} is set to C{True} 0037 and C{outpath} is not given 0038 - only get merged catalog object: C{getcat} is set to C{True} and 0039 neither C{outpath} nor C{update} are issued; 0040 no PO file gets created or modified (except for temporaries, 0041 which are cleaned up on return) 0042 - check whether merging is possible: neither of C{outpath}, 0043 C{update}, or C{getcat} are issued; 0044 if C{True} is returned, merging succedded. 0045 0046 The return value differs based on C{getcat}. 0047 If C{getcat} is C{False}, the return value is C{True} if merging 0048 succedded (C{msgmerge} exited normally), and C{False} if not. 0049 If C{getcat} is C{True}, a catalog object on the merged catalog 0050 is returned if the merging succedded, and C{None} if not. 0051 However, if C{abort} is set to C{True}, if C{msgmerge} fails 0052 the program aborts with an error message. 0053 0054 When catalog object is returned, its sync state is undefined. 0055 If it needs to be in sync before use, it should be synced manually. 0056 0057 @param catpath: path to PO file to merge 0058 @type catpath: string 0059 @param tplpath: path to PO template 0060 @type tplpath: string 0061 @param outpath: path to output PO file 0062 @type outpath: string 0063 @param update: whether to update the PO file in place 0064 @type update: bool 0065 @param wrapping: the wrapping policy (see the parameter of the same name 0066 to L{catalog constructor<catalog.Catalog>}) 0067 @type wrapping: sequence of strings 0068 @param fuzzymatch: whether to perform fuzzy matching 0069 @type fuzzymatch: bool 0070 @param cmppaths: paths to compendium files to be used on merging 0071 @type cmppaths: sequence of strings 0072 @param quiet: whether C{msgmerge} should operate quietly 0073 @type quiet: bool 0074 @param fuzzex: whether to fuzzy exact matches from compendia 0075 @type fuzzex: bool 0076 @param minwnex: minimal number of words in the original in exact match 0077 from compendia to not fuzzy the message (a very large number 0078 approximates C{fuzzex} set to C{True}). 0079 @type minwnex: int 0080 @param refuzzy: whether to "rebase" fuzzy messages, i.e. remove prior 0081 to merging those fuzzy messages whose translated counterparts 0082 (determined by previous fields) still exist in the catalog. 0083 This puts possibly newer translation into such messages, 0084 or even leads to a better fuzzy match. 0085 @type refuzzy: bool 0086 @param getcat: whether to return catalog object on merged file 0087 @type getcat: L{Catalog<catalog.Catalog>} or C{None} 0088 @param monitored: if C{getcat} is in effect, whether to open catalog 0089 in monitoring mode (like the parameter to catalog constructor) 0090 @type monitored: bool 0091 @param ignpotdate: whether to ignore changed C{POT-Creation-Date} 0092 if there were no other changes, resetting it to original value 0093 @type ignpotdate: bool 0094 @param abort: whether to abort execution if C{msgmerge} fails 0095 @type abort: bool 0096 0097 @returns: whether merging succedded, or catalog object 0098 @rtype: bool or L{Catalog<catalog.Catalog>} or C{None} 0099 """ 0100 0101 if wrapping is not None: 0102 wrap = "basic" in wrapping 0103 otherwrap = set(wrapping).difference(["basic"]) 0104 else: 0105 wrap = True 0106 otherwrap = False 0107 0108 # Store original catalog if change in template creation date 0109 # alone should be ignored, for check at the end. 0110 if ignpotdate: 0111 orig_cat = Catalog(catpath, monitored=False) 0112 0113 # Determine which special operations are to be done. 0114 correct_exact_matches = cmppaths and (fuzzex or minwnex > 0) 0115 correct_fuzzy_matches = minasfz > 0.0 0116 rebase_existing_fuzzies = refuzzy and fuzzymatch 0117 0118 # Pre-process catalog if necessary. 0119 if correct_exact_matches or rebase_existing_fuzzies: 0120 may_modify = rebase_existing_fuzzies 0121 cat = Catalog(catpath, monitored=may_modify) 0122 0123 # In case compendium is being used, 0124 # collect keys of all non-translated messages, 0125 # to later check which exact matches need to be fuzzied. 0126 # New non-translated messages can come from the template, 0127 # make sure these too are taken into account. 0128 if correct_exact_matches: 0129 nontrkeys = set() 0130 trkeys = set() 0131 for msg in cat: 0132 if not msg.translated: 0133 nontrkeys.add(msg.key) 0134 else: 0135 trkeys.add(msg.key) 0136 tcat = Catalog(tplpath, monitored=False) 0137 for msg in tcat: 0138 if msg.key not in trkeys: 0139 nontrkeys.add(msg.key) 0140 0141 # If requested, remove all untranslated messages, 0142 # and replace every fuzzy message which has previous fields 0143 # with a dummy previous translated message 0144 # (unless such message already exists in the catalog). 0145 # This way, untranslated messages will get fuzzy matched again, 0146 # and fuzzy messages may get updated translation. 0147 # However, do not do this for messages where a previous translated 0148 # message does already exist in the catalog, is fuzzy, and 0149 # does not have previous fields, since then that one will be 0150 # fuzzy matched and propagate its lack of previous fields. 0151 if rebase_existing_fuzzies: 0152 rebase_dummy_messages = [] 0153 for msg in cat: 0154 if msg.untranslated: 0155 cat.remove_on_sync(msg) 0156 elif msg.fuzzy and msg.msgid_previous: 0157 omsgs = cat.select_by_key(msg.msgctxt_previous, 0158 msg.msgid_previous) 0159 if ( not omsgs 0160 or not omsgs[0].fuzzy 0161 or omsgs[0].msgid_previous is not None 0162 ): 0163 cat.remove_on_sync(msg) 0164 if not omsgs: 0165 dmsg = Message() 0166 dmsg.msgctxt = msg.msgctxt_previous 0167 dmsg.msgid = msg.msgid_previous 0168 dmsg.msgid_plural = msg.msgid_plural_previous 0169 dmsg.msgstr = msg.msgstr 0170 cat.add_last(dmsg) 0171 rebase_dummy_messages.append(dmsg) 0172 0173 if may_modify: 0174 cat.sync() 0175 0176 # Prepare temporary file if output path not given and not in update mode. 0177 if not outpath and not update: 0178 tmpf = NamedTemporaryFile(prefix="pology-merged-", suffix=".po") 0179 outpath = tmpf.name 0180 0181 # Merge. 0182 opts = [] 0183 if not update: 0184 opts.append("--output-file %s" % outpath) 0185 else: 0186 opts.append("--update") 0187 opts.append("--backup none") 0188 if fuzzymatch: 0189 opts.append("--previous") 0190 else: 0191 opts.append("--no-fuzzy-matching") 0192 if not wrap: 0193 opts.append("--no-wrap") 0194 for cmppath in (cmppaths or []): 0195 if not os.path.isfile(cmppath): 0196 raise PologyError( 0197 _("@info", 0198 "Compendium does not exist at '%(path)s'.", 0199 path=cmppath)) 0200 opts.append("--compendium %s" % cmppath) 0201 if quiet: 0202 opts.append("--quiet") 0203 fmtopts = " ".join(opts) 0204 cmdline = "msgmerge %s %s %s" % (fmtopts, catpath, tplpath) 0205 mrgres = os.system(unicode_to_str(cmdline)) 0206 if mrgres != 0: 0207 if abort: 0208 raise PologyError( 0209 _("@info", 0210 "Cannot merge PO file '%(file1)s' with template '%(file2)s'.", 0211 file1=catpath, file2=tplpath)) 0212 return None if getcat else False 0213 0214 # If the catalog had only header and no messages, 0215 # msgmerge will not write out anything. 0216 # In such case, just copy the initial file to output path. 0217 if outpath and not os.path.isfile(outpath): 0218 shutil.copyfile(catpath, outpath) 0219 # If both the output path has been given and update requested, 0220 # copy the output file over the initial file. 0221 if update and outpath and catpath != outpath: 0222 shutil.copyfile(outpath, catpath) 0223 0224 # Post-process merged catalog if necessary. 0225 if ( getcat or otherwrap or correct_exact_matches 0226 or correct_fuzzy_matches or ignpotdate or rebase_existing_fuzzies 0227 ): 0228 # If fine wrapping requested and catalog should not be returned, 0229 # everything has to be reformatted, so no need to monitor the catalog. 0230 catpath1 = outpath or catpath 0231 monitored1 = monitored if getcat else (not otherwrap) 0232 cat = Catalog(catpath1, monitored=monitored1, wrapping=wrapping) 0233 0234 # In case compendium is being used, 0235 # make fuzzy exact matches which do not pass the word limit. 0236 if correct_exact_matches: 0237 acc = cat.accelerator() 0238 for msg in cat: 0239 if ( msg.key in nontrkeys and msg.translated 0240 and ( fuzzex 0241 or len(proper_words(msg.msgid, accels=acc)) < minwnex) 0242 ): 0243 msg.fuzzy = True 0244 msg.msgctxt_previous = msg.msgctxt 0245 msg.msgid_previous = msg.msgid 0246 msg.msgid_plural_previous = msg.msgid_plural 0247 0248 # Eliminate fuzzy matches not passing the adjusted similarity limit. 0249 if correct_fuzzy_matches: 0250 for msg in cat: 0251 if msg.fuzzy and msg.msgid_previous is not None: 0252 if editprob(msg.msgid_previous, msg.msgid) < minasfz: 0253 msg.clear() 0254 0255 # Revert template creation date change if it was the only change. 0256 if ignpotdate: 0257 fname = "POT-Creation-Date" 0258 orig_potdate = orig_cat.header.get_field_value(fname) 0259 new_potdate = cat.header.get_field_value(fname) 0260 cat.header.replace_field_value(fname, orig_potdate) 0261 if cat != orig_cat: 0262 cat.header.replace_field_value(fname, new_potdate) 0263 0264 # Remove dummy messages added for rebasing of fuzzy messages 0265 # that were obsoleted instead of promoted to fuzzy. 0266 if rebase_existing_fuzzies: 0267 for dmsg in rebase_dummy_messages: 0268 if dmsg in cat and cat[dmsg].obsolete: 0269 cat.remove_on_sync(dmsg) 0270 0271 if not getcat: 0272 cat.sync(force=otherwrap) 0273 0274 return cat if getcat else True 0275