File indexing completed on 2024-04-14 05:37:53
0001 #!/usr/bin/env python3 0002 # -*- coding: UTF-8 -*- 0003 0004 """ 0005 Create Pology rules from the KOffice KWord autocorrect xml file. 0006 This script is intended to be run standalone. 0007 0008 Usage:: 0009 python create_rules_from_koffice_autocorrect.py <autocorrect file> <output rule file> 0010 0011 @author: Sébastien Renard <sebastien.renard@digitalfox.org> 0012 @license: GPLv3 0013 """ 0014 0015 import re 0016 import sys 0017 from codecs import open 0018 import locale 0019 0020 def main(): 0021 locale.setlocale(locale.LC_ALL, "") 0022 0023 if len(sys.argv)!=3: 0024 usage() 0025 0026 #TODO: check file is readable 0027 kofficeFile=open(sys.argv[1], "r", "utf-8") 0028 #TODO: check path is writable 0029 ruleFile=open(sys.argv[2], "w", "utf-8") 0030 0031 # Regexp to find autocorrect items 0032 regexp=re.compile('<item find="(.*?)" replace="(.*?)" />') 0033 0034 #Header 0035 ruleFile.write("# Generated rules from KOffice autocorrect file\n") 0036 ruleFile.write("# by the KOffice project (http://www.koffice.org)\n") 0037 ruleFile.write("# License: GPLv3\n\n") 0038 0039 #TODO: exceptions should be in a separated file, not hardcoded. 0040 exceptions=["http:/", "http://", "etc...", "language"] 0041 for line in kofficeFile: 0042 match=regexp.match(line.strip()) 0043 if match: 0044 find=match.group(1) 0045 replace=match.group(2) 0046 if find not in exceptions: 0047 ruleFile.write(u'[&lwb;%s&rwb;]\nhint="%s => %s (d\'après le fichier de correction de KOffice)"\n\n' % (find, find, replace)) 0048 #Footer 0049 ruleFile.write("\n#End of rule file\n") 0050 ruleFile.close() 0051 0052 def usage(): 0053 print("\t%s <autocorrect file> <output rule file>" % sys.argv[0]) 0054 sys.exit(1) 0055 0056 if __name__ == '__main__': 0057 main()