File indexing completed on 2025-02-02 04:54:48
0001 #!/usr/bin/env python 0002 # -*- coding: utf-8 -*- 0003 0004 #*************************************************************************** 0005 # SPDX-FileCopyrightText: 2013 Volker Paul <volker.paul@v-paul.de> 0006 # SPDX-License-Identifier: GPL-2.0-or-later 0007 #***************************************************************************/ 0008 0009 # Usage: 0010 # 0. Prepare .acst2qif.cfg according to your needs 0011 # 1. Get account statement from your bank as PDF file 0012 # 2. python acst2qif.py <options> 0013 # 3. import account_statement.qif into KMyMoney using File - Import - QIF 0014 # (May be used with other QIF-importing applications as well, but tested only with KMyMoney) 0015 # Investment transactions are not yet implemented. 0016 0017 # Step by step 0018 # Monthly do: 0019 # For each account: 0020 # - acquire account statement as PDF 0021 # - put it in the dir specified in the account's section; ordered chronologically 0022 # Run acst2quif.py without arguments so it uses default .acst2qif.cfg config file. 0023 # It produces output in outfile specified in [General] section. 0024 # Import outfile in KMyMoney. 0025 # Check each account in KMyMoney against PDF account statement: 0026 # - If you missed an account statement, there will be a difference between 0027 # KMyMoney and the statement's balance. 0028 # - If you import an account statement twice, KMyMoney will most likely report it. 0029 0030 # Needs pdftotext in the path. Tested under Linux only. 0031 0032 # Caveat: This script completely relies on the configuration file and the regexps in it. 0033 # A basic understanding of Python regexps is required to use it. 0034 # If you have problems writing regexps, maybe I can help. 0035 # This script stores the PDF file converted to text in an .acst file. 0036 # You can run the script with an .acst file as input instead of the PDF file. 0037 # Send me the .acst file, your current .cfg file and a description of 0038 # what you expect the script to do. 0039 # Note that I can't write all regexps for you, I can only help you 0040 # find errors and provide examples. 0041 0042 __author__ = "Volker Paul" 0043 __copyright__ = "Copyright 2013, Volker Paul" 0044 __license__ = "GPL 2" 0045 __maintainer__ = "Volker Paul" 0046 __email__ = "volker.paul@v-paul.de" 0047 __docformat__ = 'restructuredtext' 0048 __status__ = "Production" 0049 0050 import sys, os, os.path, copy, re, textwrap, datetime, subprocess 0051 from optparse import OptionParser 0052 import ConfigParser 0053 0054 def uc(s): 0055 """Try to get around "'ascii' codec can't encode character xyz". 0056 Convert to Unicode. 0057 """ 0058 try: 0059 res = unicode(s.decode('utf-8')) 0060 except: 0061 return s 0062 else: 0063 return res 0064 0065 def getFileList(dir, encoding='utf-8'): 0066 """Get a list of all files (only filenames, not complete paths) 0067 in given directory. Subdirectories and their contents are ignored. 0068 Output is sorted alphabetically. 0069 """ 0070 if not os.path.isdir(dir): 0071 print "ERROR: No such directory: " + dir 0072 return None 0073 for root,dirs,files in os.walk(dir): 0074 if root==dir: 0075 files.sort() 0076 return [uc(f) for f in files if f.endswith(".pdf")] 0077 0078 def outputTransaction(output, tdate, text, amount, categoryDic): 0079 #print "OUTPUT:", text 0080 text = ' '.join(text.split()) 0081 output.write("!Type:Cash\n") 0082 date = tdate.strftime("%d.%m.%Y") 0083 output.write('D' + date + '\n') 0084 output.write('M' + text + '\n') 0085 output.write('T' + amount + '\n') 0086 categ = guessCategory(text, categoryDic) 0087 if categ: 0088 output.write('L' + categ + '\n') 0089 nc = 0 0090 else: 0091 print "No category for: %s %s %s" % (date, text[:40], amount) 0092 nc = 1 0093 output.write('^\n') 0094 return nc 0095 0096 def getAmount(rawAmount, credit_regexp, debit_regexp): 0097 """Recognize credits by credit_regexp, debits by debit_regexp. 0098 These regular expressions also split up the amount in integer and fractional part. 0099 """ 0100 mc = re.compile(credit_regexp).match(rawAmount) 0101 if not mc: 0102 md = re.compile(debit_regexp).match(rawAmount) 0103 if not md: 0104 print "ERROR: Can't get amount from string: '%s'" % rawAmount 0105 print " Matches neither credit_regexp: %s", credit_regexp 0106 print " nor debit_regexp: %s", debit_regexp 0107 return '' 0108 sign = '+' if mc else '-' 0109 m = mc or md 0110 int_part = m.group('int').replace('.','') # delete all non-digit characters 0111 # print "amount: '%s'" % (sign + int_part + '.' + m.group('frac')) 0112 return sign + int_part + '.' + m.group('frac') 0113 0114 def guessCategory(text, categoryDic): 0115 """Get category resp. account name from text. 0116 Try to match with value from an entry of categoryDic. 0117 """ 0118 for regexp, name in categoryDic.iteritems(): 0119 if re.compile(regexp).match(text): 0120 return name 0121 return None 0122 0123 def convert(inpath, output, options, config, accountName, type, categoryDic): 0124 # convert(inpath, output, options, config) 0125 # line types: 0126 # ihead item head line, contains date, transfer type and value 0127 # iadd item additional details 0128 # other other line, to be ignored 0129 0130 textfile = inpath 0131 (root, ext) = os.path.splitext(textfile) 0132 if ext == ".pdf": 0133 # print "This is a PDF file, converting to text." 0134 pdffile = textfile 0135 textfile = root + ".acst" 0136 res = subprocess.Popen(["pdftotext", "-layout", pdffile, textfile], stdout=subprocess.PIPE).communicate()[0] 0137 categoryDicString = config.get(type, 'categoryDic') 0138 categoryDicAdd = eval(categoryDicString) 0139 # Some categoryDic entries come from the individual account, some from the account type. 0140 categoryDic.update(categoryDicAdd) 0141 # ihead_regexp = config.get(type, 'ihead_regexp') 0142 ihead_re = re.compile(config.get(type, 'ihead_regexp')) 0143 iadd_re = re.compile(config.get(type, 'iadd_regexp')) 0144 #date_re = re.compile(config.get(type, 'date_regexp')) 0145 #balance_re = re.compile(config.get(type, 'balance_regexp')) 0146 f = open(textfile, 'r') 0147 now = datetime.date.today() 0148 statementDate = None 0149 year = now.year 0150 text = '' 0151 count = 0; noCat = 0 0152 for l in f.readlines(): 0153 if options.verbose: 0154 print "line: ", l, 0155 m = ihead_re.match(l) 0156 if m: 0157 if text: # There is old text, output it first. 0158 noCat += outputTransaction(output, tdate, text, amount, categoryDic) 0159 text = '' 0160 count += 1 0161 if options.verbose: 0162 print "HEAD LINE: ", l 0163 day = int(m.group('day')) 0164 month = int(m.group('month')) 0165 if 'year' in m.groupdict(): 0166 year = int(m.group('year')) 0167 if year<100: year = 2000+year 0168 tdate = datetime.date(year, month, day) 0169 if tdate>now: tdate = datetime.date(year-1, month, day) 0170 text = m.group('detail').strip() 0171 rawAmount = m.group('amount') 0172 if options.verbose: print 'rawAmount: "%s"' % rawAmount 0173 amount = getAmount(rawAmount, config.get(type, 'credit_regexp'), config.get(type, 'debit_regexp')) 0174 if options.verbose: print "amount:", amount 0175 if options.verbose: 0176 print "date:", tdate, " text:", text, " rawAmount:", rawAmount 0177 continue 0178 m = iadd_re.match(l) 0179 if m: 0180 addedtext = m.group(1) 0181 if options.verbose: 0182 print "ADDED TEXT:", addedtext 0183 if text and addedtext: text += ' ' + addedtext.strip() 0184 continue 0185 if text: # If we still have some text and data from an entry above, output it now. 0186 noCat += outputTransaction(output, tdate, text, amount, categoryDic) 0187 text = '' 0188 count += 1 0189 print "%d transactions, %d without category, in account %s, file: %s" % (count, noCat, accountName, uc(textfile)) 0190 #print "%d transactions in account %s, file: %s" % (count, accountName, "omitted") 0191 f.close() 0192 0193 def main(): 0194 usage = textwrap.dedent(""" 0195 %prog [options] 0196 Converter of ACcount STatements to QIF format. 0197 Needs configuration file by default in ~/.acst2qif.cfg, 0198 see comments there. 0199 Typical usage after setting up directories 0200 and adapting the configuration file: 0201 1. Get account statements from your banks as PDF files, 0202 save them to directories set up above. 0203 2. Run this program (usually without arguments). 0204 3. Import file following "Results written to: " 0205 into KMyMoney or other financial software. 0206 4. Check results in financial software. 0207 """)[1:-1] 0208 parser = OptionParser(version="%prog "+__version__, usage=usage) 0209 parser.add_option("-v", "--verbose", action="store_true", dest="verbose", 0210 default=False, help="be verbose") 0211 parser.add_option("-l", "--list", action="store_true", dest="listAccounts", 0212 default=False, help="only list accounts in config file and quit") 0213 parser.add_option("-i", "--input", default=None, dest="input", help="input file (list)") 0214 parser.add_option("-o", "--output", default=None, dest="output", help="output file") 0215 parser.add_option("-a", "--account", dest="account", help="account (list)") 0216 # parser.add_option("-t", "--type", dest="type", help="account statement type, defines format") 0217 parser.add_option("-c", "--configfile", default=os.path.expanduser("~/.acst2qif.cfg"), 0218 dest="configfile", help="configuration file, default ~/.acst2qif.cfg") 0219 (options, args) = parser.parse_args() 0220 if len(args)!=0: 0221 parser.print_help() 0222 exit(1) 0223 config = ConfigParser.RawConfigParser() 0224 config.read(options.configfile) 0225 if options.listAccounts: 0226 print "Accounts: ", config.get("General", "accounts") 0227 exit(0) 0228 accountString = options.account or config.get("General", "accounts") 0229 accounts = [a.strip() for a in accountString.split(',')] 0230 outfile = options.output or config.get("General", "outfile") 0231 output = open(outfile, 'w') 0232 # User can specify a list of input files (.pdf or .acst) explicitly (exactly as many as accounts). 0233 filelist = [f.strip() for f in options.input.split(',')] if options.input else None 0234 # If no filelist is specified, the latest file from the account's directory is taken as input. 0235 if filelist and len(filelist) != len(accounts): 0236 print "There must be as many files (given %d) as accounts (%d)!" % (len(filelist), len(accounts)) 0237 exit(2) 0238 for i, account in enumerate(accounts): 0239 accountName = config.get(account, "name") 0240 qifAccountType = config.get(account, "qif_account_type") 0241 accountType = config.get(account, "type") 0242 # print "name:", accountName, " type:", accountType 0243 output.write('!Account\n') 0244 output.write('N%s\n' % accountName) 0245 output.write('T%s\n' % qifAccountType) 0246 output.write('^\n') 0247 if filelist: 0248 inpath = filelist[i] 0249 else: 0250 dir = uc(config.get(account, "dir")) 0251 fl = getFileList(dir) 0252 if not fl: 0253 print "ERROR: No input file" 0254 return 0255 infile = uc(fl[-1]) 0256 inpath = dir + '/' + infile 0257 categoryDicString = config.get(account, 'categoryDic') 0258 categoryDic = eval(categoryDicString) 0259 convert(inpath, output, options, config, accountName, accountType, categoryDic) 0260 output.close() 0261 print "Results written to:", outfile 0262 0263 main()