File indexing completed on 2024-04-14 05:37:53

0001 #!/usr/bin/env python3
0002 # -*- coding: UTF-8 -*-
0003 
0004 """
0005 Create Pology rules from the KOffice KWord autocorrect xml file.
0006 This script is intended to be run standalone.
0007 
0008 Usage::
0009     python create_rules_from_koffice_autocorrect.py <autocorrect file> <output rule file>
0010 
0011 @author: Sébastien Renard <sebastien.renard@digitalfox.org>
0012 @license: GPLv3
0013 """
0014 
0015 import re
0016 import sys
0017 from codecs import open
0018 import locale
0019 
0020 def main():
0021     locale.setlocale(locale.LC_ALL, "")
0022     
0023     if len(sys.argv)!=3:
0024         usage()
0025     
0026     #TODO: check file is readable
0027     kofficeFile=open(sys.argv[1], "r", "utf-8")
0028     #TODO: check path is writable
0029     ruleFile=open(sys.argv[2], "w", "utf-8")
0030     
0031     # Regexp to find autocorrect items
0032     regexp=re.compile('<item find="(.*?)" replace="(.*?)" />')
0033     
0034     #Header
0035     ruleFile.write("# Generated rules from KOffice autocorrect file\n")
0036     ruleFile.write("# by the KOffice project (http://www.koffice.org)\n")
0037     ruleFile.write("# License: GPLv3\n\n")
0038     
0039     #TODO: exceptions should be in a separated file, not hardcoded.
0040     exceptions=["http:/", "http://", "etc...", "language"]
0041     for line in kofficeFile:
0042         match=regexp.match(line.strip())
0043         if match:
0044             find=match.group(1)
0045             replace=match.group(2)
0046             if find not in exceptions:
0047                 ruleFile.write(u'[&lwb;%s&rwb;]\nhint="%s => %s (d\'après le fichier de correction de KOffice)"\n\n' % (find, find, replace))
0048     #Footer
0049     ruleFile.write("\n#End of rule file\n")
0050     ruleFile.close()
0051 
0052 def usage():
0053     print("\t%s <autocorrect file> <output rule file>" % sys.argv[0])
0054     sys.exit(1)
0055 
0056 if __name__ == '__main__':
0057     main()