File indexing completed on 2024-04-28 17:02:23
0001 #!/usr/bin/env python3 0002 # -*- coding: utf-8 -*- 0003 0004 # Obtains a list of proper words (that that begins with a capital letter or 0005 # contains an intermediate capital letter) 0006 # that are not included yet in the local dictionary. 0007 # It is a tool that helps to complete the local dictionary. 0008 # The code is adapted from the Servian team pology scripts by Chusslove Illich. 0009 0010 import fallback_import_paths 0011 0012 import sys 0013 import os 0014 import re 0015 import locale 0016 import enchant 0017 0018 from pology import version, _, n_ 0019 from pology.catalog import Catalog 0020 from pology.colors import ColorOptionParser 0021 from pology.fsops import str_to_unicode, collect_catalogs 0022 from pology.fsops import collect_paths_cmdline 0023 from pology.split import proper_words 0024 # from pology.msgreport import warning_on_msg, report_msg_content 0025 # from pology.report import report, warning, error, format_item_list 0026 from pology.stdcmdopt import add_cmdopt_filesfrom 0027 0028 0029 def _main (): 0030 0031 locale.setlocale(locale.LC_ALL, "") 0032 0033 usage= _("@info command usage", 0034 "%(cmd)s [OPTIONS] VCS [POPATHS...]", 0035 cmd="%prog") 0036 desc = _("@info command description", 0037 "Obtains a list of proper words from the message text ") 0038 ver = _("@info command version", 0039 u"%(cmd)s (Pology) %(version)s\n" 0040 u"Copyright © 2011 " 0041 u"Javier Viñal <%(email)s>", 0042 cmd="%prog", version=version(), email="fjvinal@gmail.com") 0043 0044 opars = ColorOptionParser(usage=usage, description=desc, version=ver) 0045 add_cmdopt_filesfrom(opars) 0046 0047 (options, free_args) = opars.parse_args(str_to_unicode(sys.argv[1:])) 0048 0049 # Collect PO files in given paths. 0050 popaths = collect_paths_cmdline(rawpaths=free_args, 0051 filesfrom=options.files_from, 0052 elsecwd=True, 0053 respathf=collect_catalogs, 0054 abort=True) 0055 0056 dict_en = enchant.Dict("en") 0057 dict_local = enchant.Dict("es") 0058 0059 for path in popaths: 0060 extract_proper_words(path, dict_en, dict_local) 0061 0062 dict_en.close() 0063 for word in sorted(dict_local.session_dict()): 0064 print(word) 0065 dict_local.session_dict(clear=True) 0066 dict_local.close() 0067 0068 _ent_proper_word = re.compile("^\w*?[A-Z]\w*$") 0069 0070 def extract_proper_words (path, dict_en, dict_local): 0071 0072 cat = Catalog(path) 0073 0074 for msg in cat: 0075 words = proper_words(msg.msgstr[0], True, cat.accelerator(), msg.format) 0076 for word in words: 0077 if _ent_proper_word.match(word): 0078 if not dict_en.check(str(word)) and not dict_local.check(str(word)): 0079 #report("%s" %(word)) 0080 dict_local.session_dict(str(word)) 0081 0082 0083 if __name__ == '__main__': 0084 _main() 0085