File indexing completed on 2024-07-21 04:36:26

0001 #!/usr/bin/env python
0002 # -*- coding: utf-8 -*-
0003 
0004 #***************************************************************************
0005 # SPDX-FileCopyrightText: 2013 Volker Paul <volker.paul@v-paul.de>
0006 # SPDX-License-Identifier: GPL-2.0-or-later
0007 #***************************************************************************/
0008 
0009 # Usage:
0010 # 0. Prepare .acst2qif.cfg according to your needs
0011 # 1. Get account statement from your bank as PDF file
0012 # 2. python acst2qif.py <options>
0013 # 3. import account_statement.qif into KMyMoney using File - Import - QIF
0014 # (May be used with other QIF-importing applications as well, but tested only with KMyMoney)
0015 # Investment transactions are not yet implemented.
0016 
0017 # Step by step
0018 # Monthly do:
0019 # For each account:
0020 # - acquire account statement as PDF
0021 # - put it in the dir specified in the account's section; ordered chronologically
0022 # Run acst2quif.py without arguments so it uses default .acst2qif.cfg config file.
0023 # It produces output in outfile specified in [General] section.
0024 # Import outfile in KMyMoney.
0025 # Check each account in KMyMoney against PDF account statement:
0026 # - If you missed an account statement, there will be a difference between
0027 #   KMyMoney and the statement's balance.
0028 # - If you import an account statement twice, KMyMoney will most likely report it.
0029 
0030 # Needs pdftotext in the path. Tested under Linux only.
0031 
0032 # Caveat: This script completely relies on the configuration file and the regexps in it.
0033 # A basic understanding of Python regexps is required to use it.
0034 # If you have problems writing regexps, maybe I can help.
0035 # This script stores the PDF file converted to text in an .acst file.
0036 # You can run the script with an .acst file as input instead of the PDF file.
0037 # Send me the .acst file, your current .cfg file and a description of 
0038 # what you expect the script to do.
0039 # Note that I can't write all regexps for you, I can only help you 
0040 # find errors and provide examples.
0041 
0042 __author__     = "Volker Paul"
0043 __copyright__  = "Copyright 2013, Volker Paul"
0044 __license__    = "GPL 2"
0045 __maintainer__ = "Volker Paul"
0046 __email__      = "volker.paul@v-paul.de"
0047 __docformat__  = 'restructuredtext'
0048 __status__     = "Production"
0049 
0050 import sys, os, os.path, copy, re, textwrap, datetime, subprocess
0051 from optparse import OptionParser
0052 import ConfigParser
0053 
0054 def uc(s):
0055     """Try to get around "'ascii' codec can't encode character xyz".
0056     Convert to Unicode.
0057     """
0058     try:
0059         res = unicode(s.decode('utf-8'))
0060     except:
0061         return s
0062     else:
0063         return res
0064 
0065 def getFileList(dir, encoding='utf-8'):
0066     """Get a list of all files (only filenames, not complete paths)
0067     in given directory. Subdirectories and their contents are ignored.
0068     Output is sorted alphabetically.
0069     """
0070     if not os.path.isdir(dir):
0071         print "ERROR: No such directory: " + dir
0072         return None
0073     for root,dirs,files in os.walk(dir):
0074         if root==dir:
0075             files.sort()
0076             return [uc(f) for f in files if f.endswith(".pdf")]
0077 
0078 def outputTransaction(output, tdate, text, amount, categoryDic):
0079     #print "OUTPUT:",  text
0080     text = ' '.join(text.split())
0081     output.write("!Type:Cash\n")
0082     date = tdate.strftime("%d.%m.%Y") 
0083     output.write('D' + date + '\n')
0084     output.write('M' + text + '\n')
0085     output.write('T' + amount + '\n')
0086     categ = guessCategory(text, categoryDic)
0087     if categ: 
0088         output.write('L' + categ + '\n')
0089         nc = 0
0090     else:
0091         print "No category for: %s %s %s" % (date, text[:40], amount)
0092         nc = 1
0093     output.write('^\n')
0094     return nc
0095 
0096 def getAmount(rawAmount, credit_regexp, debit_regexp):
0097     """Recognize credits by credit_regexp, debits by debit_regexp.
0098     These regular expressions also split up the amount in integer and fractional part.
0099     """
0100     mc = re.compile(credit_regexp).match(rawAmount)
0101     if not mc: 
0102         md = re.compile(debit_regexp).match(rawAmount)
0103         if not md:
0104             print "ERROR: Can't get amount from string: '%s'" % rawAmount
0105             print "  Matches neither credit_regexp: %s", credit_regexp 
0106             print "  nor debit_regexp: %s", debit_regexp
0107             return ''
0108     sign = '+' if mc else '-'
0109     m = mc or md
0110     int_part = m.group('int').replace('.','')   # delete all non-digit characters
0111 #    print "amount: '%s'" % (sign + int_part + '.' + m.group('frac'))
0112     return sign + int_part + '.' + m.group('frac')
0113 
0114 def guessCategory(text, categoryDic):
0115     """Get category resp. account name from text.
0116     Try to match with value from an entry of categoryDic.
0117     """
0118     for regexp, name in categoryDic.iteritems():
0119         if re.compile(regexp).match(text): 
0120             return name
0121     return None
0122 
0123 def convert(inpath, output, options, config, accountName, type, categoryDic):
0124 #        convert(inpath, output, options, config)
0125     # line types:
0126     # ihead         item head line, contains date, transfer type and value
0127     # iadd          item additional details
0128     # other         other line, to be ignored
0129 
0130     textfile = inpath
0131     (root,  ext) = os.path.splitext(textfile)
0132     if ext == ".pdf":
0133         # print "This is a PDF file, converting to text."
0134         pdffile = textfile
0135         textfile = root + ".acst"
0136         res = subprocess.Popen(["pdftotext", "-layout", pdffile,  textfile], stdout=subprocess.PIPE).communicate()[0]
0137     categoryDicString = config.get(type, 'categoryDic')
0138     categoryDicAdd = eval(categoryDicString)
0139     # Some categoryDic entries come from the individual account, some from the account type.
0140     categoryDic.update(categoryDicAdd)
0141 #    ihead_regexp = config.get(type, 'ihead_regexp')
0142     ihead_re = re.compile(config.get(type, 'ihead_regexp'))
0143     iadd_re = re.compile(config.get(type, 'iadd_regexp'))
0144     #date_re = re.compile(config.get(type, 'date_regexp'))
0145     #balance_re = re.compile(config.get(type, 'balance_regexp'))
0146     f = open(textfile, 'r')
0147     now = datetime.date.today()
0148     statementDate = None
0149     year = now.year
0150     text = ''
0151     count = 0; noCat = 0
0152     for l in f.readlines():
0153         if options.verbose:
0154             print "line: ", l,
0155         m = ihead_re.match(l)
0156         if m:
0157             if text: # There is old text, output it first.
0158                 noCat += outputTransaction(output, tdate, text, amount, categoryDic)
0159                 text = ''
0160                 count += 1
0161             if options.verbose:
0162                 print "HEAD LINE: ", l
0163             day = int(m.group('day'))
0164             month = int(m.group('month'))
0165             if 'year' in m.groupdict(): 
0166                 year = int(m.group('year'))
0167                 if year<100: year = 2000+year 
0168             tdate = datetime.date(year, month, day)
0169             if tdate>now: tdate = datetime.date(year-1, month, day)
0170             text = m.group('detail').strip()
0171             rawAmount = m.group('amount')
0172             if options.verbose: print 'rawAmount: "%s"' % rawAmount
0173             amount = getAmount(rawAmount, config.get(type, 'credit_regexp'), config.get(type, 'debit_regexp'))
0174             if options.verbose: print "amount:", amount 
0175             if options.verbose:
0176                 print "date:", tdate, "   text:", text, "   rawAmount:", rawAmount
0177             continue
0178         m = iadd_re.match(l)
0179         if m:
0180             addedtext = m.group(1)
0181             if options.verbose:
0182                 print "ADDED TEXT:", addedtext
0183             if text and addedtext: text += ' ' + addedtext.strip()
0184             continue
0185         if text: # If we still have some text and data from an entry above, output it now.
0186             noCat += outputTransaction(output, tdate, text, amount, categoryDic)
0187             text = ''
0188             count += 1
0189     print "%d transactions, %d without category, in account %s, file: %s" % (count, noCat, accountName, uc(textfile))
0190     #print "%d transactions in account %s, file: %s" % (count, accountName, "omitted")
0191     f.close()
0192 
0193 def main():
0194     usage = textwrap.dedent("""
0195         %prog [options]
0196         Converter of ACcount STatements to QIF format.
0197         Needs configuration file by default in ~/.acst2qif.cfg, 
0198         see comments there.
0199         Typical usage after setting up directories 
0200         and adapting the configuration file:
0201         1. Get account statements from your banks as PDF files, 
0202            save them to directories set up above.
0203         2. Run this program (usually without arguments).
0204         3. Import file following "Results written to: " 
0205            into KMyMoney or other financial software.
0206         4. Check results in financial software.
0207     """)[1:-1]
0208     parser = OptionParser(version="%prog "+__version__, usage=usage)
0209     parser.add_option("-v", "--verbose", action="store_true", dest="verbose",
0210         default=False, help="be verbose")
0211     parser.add_option("-l", "--list", action="store_true", dest="listAccounts",
0212         default=False, help="only list accounts in config file and quit")
0213     parser.add_option("-i", "--input", default=None, dest="input", help="input file (list)")
0214     parser.add_option("-o", "--output", default=None, dest="output", help="output file")
0215     parser.add_option("-a", "--account", dest="account", help="account (list)")
0216 #    parser.add_option("-t", "--type", dest="type", help="account statement type, defines format")
0217     parser.add_option("-c", "--configfile", default=os.path.expanduser("~/.acst2qif.cfg"), 
0218         dest="configfile", help="configuration file, default ~/.acst2qif.cfg")
0219     (options, args) = parser.parse_args()
0220     if len(args)!=0: 
0221         parser.print_help()
0222         exit(1)
0223     config = ConfigParser.RawConfigParser()
0224     config.read(options.configfile)
0225     if options.listAccounts:
0226     print "Accounts: ", config.get("General", "accounts")
0227     exit(0)
0228     accountString = options.account or config.get("General", "accounts")
0229     accounts = [a.strip() for a in accountString.split(',')]
0230     outfile = options.output or config.get("General", "outfile")
0231     output = open(outfile, 'w')
0232     # User can specify a list of input files (.pdf or .acst) explicitly (exactly as many as accounts).
0233     filelist = [f.strip() for f in options.input.split(',')] if options.input else None
0234     # If no filelist is specified, the latest file from the account's directory is taken as input.
0235     if filelist and len(filelist) != len(accounts):
0236         print "There must be as many files (given %d) as accounts (%d)!" % (len(filelist), len(accounts))
0237         exit(2)
0238     for i, account in enumerate(accounts):
0239         accountName = config.get(account, "name")
0240         qifAccountType = config.get(account, "qif_account_type")
0241         accountType = config.get(account, "type")
0242         # print "name:",  accountName,  "  type:",  accountType 
0243         output.write('!Account\n')
0244         output.write('N%s\n' % accountName)
0245         output.write('T%s\n' % qifAccountType)
0246         output.write('^\n')
0247         if filelist:
0248             inpath = filelist[i]
0249         else:
0250             dir = uc(config.get(account, "dir"))
0251             fl = getFileList(dir)
0252             if not fl:
0253                 print "ERROR: No input file"
0254                 return
0255             infile = uc(fl[-1])
0256             inpath = dir + '/' + infile
0257         categoryDicString = config.get(account, 'categoryDic')
0258         categoryDic = eval(categoryDicString)
0259         convert(inpath, output, options, config, accountName, accountType, categoryDic)
0260     output.close()
0261     print "Results written to:", outfile
0262 
0263 main()