Warning, /sdk/pology/bin/pomtrans is written in an unsupported language. File is not indexed.
0001 #!/usr/bin/env python3
0002 # -*- coding: UTF-8 -*-
0003
0004 """
0005 Perform machine translation of PO files.
0006
0007 Documented in C{doc/user/lingo.docbook#sec-lgmtrans}.
0008
0009 @author: Chusslove Illich (Часлав Илић) <caslav.ilic@gmx.net>
0010 @license: GPLv3
0011 """
0012
0013 try:
0014 import fallback_import_paths
0015 except:
0016 pass
0017
0018 import locale
0019 import subprocess
0020 import sys
0021 import os
0022
0023 from pology import datadir, version, _, n_
0024 from pology.catalog import Catalog
0025 from pology.colors import ColorOptionParser
0026 import pology.config as pology_config
0027 from pology.entities import read_entities
0028 from pology.fsops import collect_catalogs, collect_system
0029 from pology.fsops import str_to_unicode
0030 from pology.fsops import exit_on_exception
0031 from pology.message import MessageUnsafe
0032 from pology.remove import remove_accel_msg
0033 from pology.report import report, error, warning
0034 from pology.resolve import resolve_entities_simple
0035
0036
0037 def main ():
0038
0039 locale.setlocale(locale.LC_ALL, "")
0040
0041 # Get defaults for command line options from global config.
0042 cfgsec = pology_config.section("pomtrans")
0043
0044 showservs = list()
0045 showservs.sort()
0046
0047 # Setup options and parse the command line.
0048 usage = _("@info command usage",
0049 "%(cmd)s [OPTIONS] TRANSERV PATHS...",
0050 cmd="%prog")
0051 desc = _("@info command description",
0052 "Perform machine translation of PO files.")
0053 ver = _("@info command version",
0054 "%(cmd)s (Pology) %(version)s\n"
0055 "Copyright © 2009, 2010 "
0056 "Chusslove Illich (Часлав Илић) <%(email)s>",
0057 cmd="%prog", version=version(), email="caslav.ilic@gmx.net")
0058
0059 opars = ColorOptionParser(usage=usage, description=desc, version=ver)
0060 opars.add_option(
0061 "-a", "--accelerator", dest="accel",
0062 metavar=_("@info command line value placeholder", "CHAR"),
0063 help=_("@info command line option description",
0064 "Accelerator marker character used in messages. "
0065 "Detected from catalogs if not given."))
0066 opars.add_option(
0067 "-c", "--parallel-compendium", dest="parcomp",
0068 metavar=_("@info command line value placeholder", "FILE"),
0069 help=_("@info command line option description",
0070 "Translate from translation to another language, "
0071 "found in compendium file at the given path."))
0072 opars.add_option(
0073 "-l", "--list-transervs",
0074 action="store_true", dest="list_transervs", default=False,
0075 help="List available translation services.")
0076 opars.add_option(
0077 "-m", "--flag-%s" % _flag_mtrans,
0078 action="store_true", dest="flag_mtrans", default=False,
0079 help=_("@info command line option description",
0080 "Add '%(flag)s' flag to translated messages.",
0081 flag=_flag_mtrans))
0082 opars.add_option(
0083 "-M", "--translation-mode", dest="tmode",
0084 metavar=_("@info command line value placeholder", "MODE"),
0085 help=_("@info command line option description",
0086 "Translation mode for the chosen translation service. "
0087 "Overrides the default translation mode constructed "
0088 "based on source and target language. "
0089 "Mode string format is translation service dependent."))
0090 opars.add_option(
0091 "-n", "--no-fuzzy-flag",
0092 action="store_false", dest="flag_fuzzy", default=True,
0093 help=_("@info command line option description",
0094 "Do not add '%(flag)s' flag to translated messages.",
0095 flag="fuzzy"))
0096 opars.add_option(
0097 "-p", "--parallel-catalogs", dest="parcats",
0098 metavar=_("@info command line value placeholder", "SEARCH:REPLACE"),
0099 help=_("@info command line option description",
0100 "Translate from translation to another language "
0101 "found in parallel catalogs. "
0102 "For given target catalog path, the path to parallel catalog "
0103 "is constructed by replacing once SEARCH with REPLACE."))
0104 opars.add_option(
0105 "-s", "--source-lang", dest="slang",
0106 metavar=_("@info command line value placeholder", "LANG"),
0107 help=_("@info command line option description",
0108 "Source language code. "
0109 "Detected from catalogs if not given."))
0110 opars.add_option(
0111 "-t", "--target-lang", dest="tlang",
0112 metavar=_("@info command line value placeholder", "LANG"),
0113 help=_("@info command line option description",
0114 "Target language code. "
0115 "Detected from catalogs if not given."))
0116 opars.add_option(
0117 "-T", "--transerv-bin", dest="transerv_bin",
0118 metavar=_("@info command line value placeholder", "PATH"),
0119 help=_("@info command line option description",
0120 "Custom path to translation service executable "
0121 "(where applicable)."))
0122 opars.add_option(
0123 "-d", "--data-directory", dest="data_directory",
0124 metavar=_("@info command line value placeholder", "FOLDER"),
0125 help=_("@info command line option description",
0126 "Custom path to a translation data directory (where applicable)."))
0127
0128 (op, free_args) = opars.parse_args(str_to_unicode(sys.argv[1:]))
0129
0130 # Could use some speedup.
0131 try:
0132 import psyco
0133 psyco.full()
0134 except ImportError:
0135 pass
0136
0137 if op.list_transervs:
0138 report("\n".join(sorted(_known_transervs.keys())))
0139 sys.exit(0)
0140
0141 if len(free_args) < 1:
0142 error(_("@info",
0143 "Translation service not specified."))
0144 transervkey = free_args.pop(0)
0145 if transervkey not in _known_transervs:
0146 error(_("@info",
0147 "Translation service '%(serv)s' not known.",
0148 serv=transervkey))
0149
0150 tsbuilder_wopts = _known_transervs[transervkey]
0151 tsbuilder = lambda slang, tlang: tsbuilder_wopts(slang, tlang, op)
0152
0153 paths = free_args
0154 if not op.parcomp and not op.parcats:
0155 translate_direct(paths, tsbuilder, op)
0156 else:
0157 translate_parallel(paths, tsbuilder, op)
0158
0159
0160 def translate_direct (paths, tsbuilder, options):
0161
0162 transervs = {}
0163
0164 catpaths = collect_catalogs(paths)
0165 for catpath in catpaths:
0166
0167 # Collect messages and texts to translate.
0168 cat = Catalog(catpath)
0169 if options.accel is not None: # force explicitly given accelerator
0170 cat.set_accelerator(options.accel)
0171 texts = []
0172 msgs = []
0173 for msg in cat:
0174 if to_translate(msg, options):
0175 msgf = MessageUnsafe(msg)
0176 remove_accel_msg(msgf, cat)
0177 texts.append(msgf.msgid)
0178 if msg.msgid_plural is not None:
0179 texts.append(msgf.msgid_plural)
0180 msgs.append(msg)
0181
0182 # Translate collected texts.
0183 slang = options.slang or "en"
0184 transerv = get_transerv(slang, options.tlang, cat, cat, tsbuilder)
0185 texts_tr = transerv.translate(texts) if texts else []
0186 if texts_tr is None:
0187 warning(_("@info",
0188 "Translation service failure on '%(file)s'.",
0189 file=catpath))
0190 continue
0191 for i, text in enumerate(texts_tr):
0192 text = reduce_for_encoding(text, cat.encoding())
0193 texts_tr[i] = text
0194
0195 # Put translated texts into messages.
0196 singlepls = cat.plural_indices_single()
0197 for msg in msgs:
0198 msgid_tr = texts_tr.pop(0)
0199 if msg.msgid_plural is not None:
0200 msgid_plural_tr = texts_tr.pop(0)
0201 if msgid_tr:
0202 if msg.msgid_plural is not None:
0203 for i in range(len(msg.msgstr)):
0204 if i in singlepls:
0205 msg.msgstr[i] = msgid_tr
0206 else:
0207 msg.msgstr[i] = msgid_plural_tr
0208 else:
0209 msg.msgstr[0] = msgid_tr
0210 decorate(msg, options)
0211
0212 sync_rep(cat, msgs)
0213
0214
0215 def translate_parallel (paths, tsbuilder, options):
0216
0217 pathrepl = options.parcats
0218 comppath = options.parcomp
0219 slang = options.slang
0220 tlang = options.tlang
0221
0222 ccat = None
0223 if comppath is not None:
0224 if not os.path.isfile(comppath):
0225 error(_("@info",
0226 "Compendium '%(file)s' does not exist.",
0227 file=comppath))
0228 ccat = Catalog(comppath, monitored=False)
0229
0230 if pathrepl is not None:
0231 lst = pathrepl.split(":")
0232 if len(lst) != 2:
0233 error(_("@info",
0234 "Invalid search and replace specification '%(spec)s'.",
0235 spec=pathrepl))
0236 pathsrch, pathrepl = lst
0237
0238 catpaths = collect_catalogs(paths)
0239 for catpath in catpaths:
0240
0241 # Open parallel catalog if it exists.
0242 pcat = None
0243 if pathrepl is not None:
0244 pcatpath = catpath.replace(pathsrch, pathrepl, 1)
0245 if catpath == pcatpath:
0246 error(_("@info",
0247 "Parallel catalog and target catalog are same files "
0248 "for '%(file)s'.",
0249 file=catpath))
0250 if os.path.isfile(pcatpath):
0251 pcat = Catalog(pcatpath, monitored=False)
0252
0253 # If there is neither the parallel catalog nor the compendium,
0254 # skip processing current target catalog.
0255 if not pcat and not ccat:
0256 continue
0257
0258 # Collect messages and texts to translate.
0259 cat = Catalog(catpath)
0260 pmsgs, psmsgs, ptexts = [], [], []
0261 cmsgs, csmsgs, ctexts = [], [], []
0262 for msg in cat:
0263 if to_translate(msg, options):
0264 # Priority: parallel catalog, then compendium.
0265 for scat, msgs, smsgs, texts in (
0266 (pcat, pmsgs, psmsgs, ptexts),
0267 (ccat, cmsgs, csmsgs, ctexts),
0268 ):
0269 if scat and msg in scat:
0270 smsg = scat[msg]
0271 if smsg.translated:
0272 msgs.append(msg)
0273 smsgs.append(smsg)
0274 texts.extend(smsg.msgstr)
0275 break
0276
0277 # Translate collected texts.
0278 texts_tr = []
0279 for texts, scat in ((ptexts, pcat), (ctexts, ccat)):
0280 transerv = get_transerv(slang, tlang, scat, cat, tsbuilder)
0281 texts_tr.append(transerv.translate(texts) if texts else [])
0282 if texts_tr[-1] is None:
0283 texts_tr = None
0284 break
0285 if texts_tr is None:
0286 warning(_("@info",
0287 "Translation service failure on '%(file)s'.",
0288 file=catpath))
0289 continue
0290 ptexts_tr, ctexts_tr = texts_tr
0291
0292 # Put translated texts into messages.
0293 # For plural messages, assume 1-1 match to parallel language.
0294 for msgs, smsgs, texts in (
0295 (pmsgs, psmsgs, ptexts_tr),
0296 (cmsgs, csmsgs, ctexts_tr),
0297 ):
0298 for msg, smsg in zip(msgs, smsgs):
0299 ctexts = []
0300 for i in range(len(smsg.msgstr)):
0301 text = texts.pop(0)
0302 text = reduce_for_encoding(text, cat.encoding())
0303 ctexts.append(text)
0304 for i in range(len(msg.msgstr)):
0305 msg.msgstr[i] = i < len(ctexts) and ctexts[i] or ctexts[-1]
0306 decorate(msg, options)
0307
0308 sync_rep(cat, pmsgs + cmsgs)
0309
0310
0311 def to_translate (msg, options):
0312
0313 return msg.untranslated
0314
0315
0316 _flag_mtrans = "mtrans"
0317
0318 def decorate (msg, options):
0319
0320 msg.unfuzzy() # clear any previous fuzzy stuff
0321 if options.flag_fuzzy:
0322 msg.fuzzy = True
0323 if options.flag_mtrans:
0324 msg.flag.add(_flag_mtrans)
0325
0326
0327 # Cache of translation services by (source, target) language pair.
0328 _transervs = {}
0329
0330 # Return translation service for (slang, tlang) pair.
0331 # If the service was not created yet, create it and cache it.
0332 # If slang or tlang are None, use target language of corresponding catalog.
0333 def get_transerv (slang, tlang, scat, tcat, tsbuilder):
0334
0335 if not slang:
0336 slang = scat.header.get_field_value("Language")
0337 if not slang:
0338 error(_("@info",
0339 "Cannot determine language of source catalog '%(file)s'.",
0340 file=scat.filename))
0341 if not tlang:
0342 tlang = tcat.header.get_field_value("Language")
0343 if not tlang:
0344 error(_("@info",
0345 "Cannot determine language of target catalog '%(file)s'.",
0346 file=tcat.filename))
0347
0348 trdir = (slang, tlang)
0349 if trdir not in _transervs:
0350 _transervs[trdir] = tsbuilder(slang, tlang)
0351
0352 return _transervs[trdir]
0353
0354
0355 def sync_rep (cat, mmsgs):
0356
0357 if cat.sync():
0358 report("! %s (%s)" % (cat.filename, len(mmsgs)))
0359
0360
0361 def reduce_for_encoding (text, enc):
0362
0363 while True:
0364 try:
0365 text.encode(enc)
0366 except UnicodeEncodeError as e:
0367 start, end = e[2], e[3]
0368 text = text[:start] + ("?" * (end - start)) + text[end:]
0369 finally:
0370 break
0371 return text
0372
0373
0374 # ----------------------------------------
0375 # Apertium -- a free/open-source machine translation platform
0376 # http://www.apertium.org/
0377
0378 class Translator_apertium (object):
0379
0380 def __init__ (self, slang, tlang, options):
0381
0382 cmdpath = options.transerv_bin or "apertium"
0383 try:
0384 subprocess.call(cmdpath,
0385 stdout=subprocess.PIPE, stderr=subprocess.PIPE)
0386 except OSError:
0387 error(_("@info Apertium is machine translation software",
0388 "Apertium executable not found at '%(path)s'.",
0389 path=cmdpath))
0390
0391 if options.tmode is not None:
0392 mode = options.tmode
0393 else:
0394 mode = "%s-%s" % (slang, tlang)
0395
0396 optional_parameters = ""
0397 if options.data_directory:
0398 optional_parameters = "-d %s" % options.data_directory
0399
0400 self.cmdline = "%s -u -f html-noent %s %s" % (
0401 cmdpath, optional_parameters, mode)
0402
0403 entpath = os.path.join(datadir(), "spec", "html.entities")
0404 self.htmlents = read_entities(entpath)
0405
0406
0407 def translate (self, texts):
0408
0409 # Serialize texts to send to Apertium in one go.
0410 # Separate texts with an inplace tag followed by dot,
0411 # to have each text interpreted as standalone sentence.
0412 # FIXME: Any way to really translate each text in turn,
0413 # without it being horribly slow?
0414 tag = "<br>"
0415 sep = None
0416 nsep = 1
0417 while not sep: # determine shortest acceptable separator
0418 sep = tag * nsep + "."
0419 for text in texts:
0420 if sep in text:
0421 sep = None
0422 nsep += 1
0423 break
0424 stext = sep.join(texts)
0425
0426 # Translate empty string to test language pair.
0427 # Otherwise, if a lot of text is sent and language pair not good,
0428 # Apertium may just signal broken pipe.
0429 res = collect_system(self.cmdline, instr="")
0430 if res[2] != 0:
0431 warning(_("@info",
0432 "Executing Apertium failed:\n%(output)s",
0433 output=res[0]))
0434 # ...really res[0], error is output to stdout. Tsk.
0435 return None
0436
0437 res = collect_system(self.cmdline, instr=stext)
0438 if res[2] != 0:
0439 warning(_("@info",
0440 "Executing Apertium failed:\n%(output)s",
0441 output=res[0]))
0442 # ...really res[0], error is output to stdout. Tsk.
0443 return None
0444
0445 texts_tr = res[0].split(sep)
0446 if len(texts_tr) != len(texts):
0447 warning(_("@info",
0448 "Apertium reported wrong number of translations, "
0449 "%(num1)d instead of %(num2)d.",
0450 num1=len(texts_tr), num2=len(texts)))
0451 return None
0452
0453 texts_tr = [resolve_entities_simple(x, self.htmlents) for x in texts_tr]
0454
0455 return texts_tr
0456
0457
0458 # ----------------------------------------
0459 # Google Translate
0460 # http://translate.google.com
0461
0462 # Communication code derived from py-gtranslate library
0463 # http://code.google.com/p/py-gtranslate/
0464
0465 # Updated for v2.0 API by Víctor R. Rodríguez Domínguez
0466 # http://vrdominguez.es
0467
0468
0469 class Translator_google (object):
0470
0471 def __init__ (self, slang, tlang, options):
0472
0473 if options.tmode is not None:
0474 ( self.lang_in, self.lang_out ) = options.tmode.split('|')
0475 else:
0476 self.lang_in = slang
0477 self.lang_out = tlang
0478
0479 self.apikey = pology_config.section("pomtrans").string("google-api-key")
0480
0481
0482 def translate (self, texts):
0483
0484 import urllib.request, urllib.parse, urllib.error
0485 try:
0486 import simplejson
0487 except:
0488 error(_("@info",
0489 "Python module '%(mod)s' not available. "
0490 "Try installing the '%(pkg)s' package.",
0491 mod="simplejson", pkg="python-simplejson"))
0492
0493 baseurl = "https://www.googleapis.com/language/translate/v2"
0494 baseparams = (("key", self.apikey), ("source", self.lang_in),
0495 ("target", self.lang_out), ("target","json"))
0496
0497 texts_tr = []
0498 for text in texts:
0499 params = baseparams + (("q", text.encode("utf8")),)
0500 parfmt = "&".join(["%s=%s" % (p, urllib.parse.quote_plus(v))
0501 for p, v in params])
0502 execurl = "%s?%s" % (baseurl, parfmt)
0503 try:
0504 res = simplejson.load(urllib.request.FancyURLopener().open(execurl))
0505 text_tr = str(res["data"]["translations"][0]["translatedText"])
0506 except:
0507 text_tr = ""
0508 texts_tr.append(text_tr)
0509
0510 return texts_tr
0511
0512
0513 # ----------------------------------------
0514
0515 # Collect defined translation services by name.
0516 _known_transervs = {}
0517 def _init ():
0518 tspref = "Translator_"
0519 for locvar, locval in list(globals().items()):
0520 if locvar.startswith(tspref):
0521 _known_transervs[locvar[len(tspref):]] = locval
0522 _init()
0523
0524
0525 if __name__ == '__main__':
0526 exit_on_exception(main)