File indexing completed on 2024-04-28 17:02:23

0001 #!/usr/bin/env python3
0002 # -*- coding: utf-8 -*-
0003 
0004 # Obtains a list of proper words (that that begins with a capital letter or
0005 # contains an intermediate capital letter)
0006 # that are not included yet in the local dictionary.
0007 # It is a tool that helps to complete the local dictionary.
0008 # The code is adapted from the Servian team pology scripts by Chusslove Illich.
0009 
0010 import fallback_import_paths
0011 
0012 import sys
0013 import os
0014 import re
0015 import locale
0016 import enchant
0017 
0018 from pology import version, _, n_
0019 from pology.catalog import Catalog
0020 from pology.colors import ColorOptionParser
0021 from pology.fsops import str_to_unicode, collect_catalogs
0022 from pology.fsops import collect_paths_cmdline
0023 from pology.split import proper_words
0024 # from pology.msgreport import warning_on_msg, report_msg_content
0025 # from pology.report import report, warning, error, format_item_list
0026 from pology.stdcmdopt import add_cmdopt_filesfrom
0027 
0028 
0029 def _main ():
0030 
0031     locale.setlocale(locale.LC_ALL, "")
0032 
0033     usage= _("@info command usage",
0034         "%(cmd)s [OPTIONS] VCS [POPATHS...]",
0035         cmd="%prog")
0036     desc = _("@info command description",
0037         "Obtains a list of proper words from the message text ")
0038     ver = _("@info command version",
0039         u"%(cmd)s (Pology) %(version)s\n"
0040         u"Copyright ©  2011 "
0041         u"Javier Viñal <%(email)s>",
0042         cmd="%prog", version=version(), email="fjvinal@gmail.com")
0043 
0044     opars = ColorOptionParser(usage=usage, description=desc, version=ver)
0045     add_cmdopt_filesfrom(opars)
0046 
0047     (options, free_args) = opars.parse_args(str_to_unicode(sys.argv[1:]))
0048 
0049     # Collect PO files in given paths.
0050     popaths = collect_paths_cmdline(rawpaths=free_args,
0051                                     filesfrom=options.files_from,
0052                                     elsecwd=True,
0053                                     respathf=collect_catalogs,
0054                                     abort=True)
0055 
0056     dict_en = enchant.Dict("en")
0057     dict_local = enchant.Dict("es")
0058 
0059     for path in popaths:
0060         extract_proper_words(path, dict_en, dict_local)
0061 
0062     dict_en.close()
0063     for word in sorted(dict_local.session_dict()):
0064         print(word)
0065     dict_local.session_dict(clear=True)
0066     dict_local.close()
0067 
0068 _ent_proper_word = re.compile("^\w*?[A-Z]\w*$")
0069 
0070 def extract_proper_words (path, dict_en, dict_local):
0071 
0072     cat = Catalog(path)
0073 
0074     for msg in cat:
0075         words = proper_words(msg.msgstr[0], True, cat.accelerator(), msg.format)
0076         for word in words:
0077             if _ent_proper_word.match(word):
0078                 if not dict_en.check(str(word)) and not dict_local.check(str(word)):
0079                     #report("%s" %(word))
0080                     dict_local.session_dict(str(word))
0081 
0082 
0083 if __name__ == '__main__':
0084     _main()
0085