From d39b0edefea4c36386e68fe6250dd1898b248b3e Mon Sep 17 00:00:00 2001 From: Jorgen Schaefer Date: Sat, 18 Aug 2012 20:27:10 +0200 Subject: Initial release --- gnucashxml.py | 298 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 298 insertions(+) create mode 100644 gnucashxml.py diff --git a/gnucashxml.py b/gnucashxml.py new file mode 100644 index 0000000..877645d --- /dev/null +++ b/gnucashxml.py @@ -0,0 +1,298 @@ +import decimal +import gzip + +from dateutil.parser import parse as parse_date +from xml.etree import ElementTree + + +class Book(object): + """ + A book is the main container for GNU Cash data. + + It doesn't really do anything at all by itself, except to have + a reference to the accounts, transactions, and commodities. + """ + def __init__(self, guid, transactions=None, root_account=None, + commodities=None): + self.guid = guid + self.transactions = transactions or [] + self.root_account = root_account + self.commodities = commodities or [] + + def __repr__(self): + return "".format(self.guid) + + def walk(self): + return self.root_account.walk() + + def find_account(self, name): + for account, children, splits in self.walk(): + if account.name == name: + return account + + +class Commodity(object): + """ + A commodity is something that's stored in GNU Cash accounts. + + Consists of a name (or id) and a space (namespace). + """ + def __init__(self, name, space=None): + self.name = name + self.space = space + + def __str__(self): + return self.name + + def __repr__(self): + return "".format(self.space, self.name) + + +class Account(object): + """ + An account is part of a tree structure of accounts and contains splits. + """ + def __init__(self, name, guid, actype, parent=None, + commodity=None, commodity_scu=None): + self.name = name + self.guid = guid + self.actype = actype + self.parent = parent + self.children = [] + self.commodity = commodity + self.commodity_scu = commodity_scu + self.splits = [] + + def __repr__(self): + return "".format(self.guid) + + def walk(self): + """ + Generate splits in this account tree by walking the tree. + + For each account, it yields a 3-tuple (account, subaccounts, splits). + + You can modify the list of subaccounts, but should not modify + the list of splits. + """ + accounts = [self] + while accounts: + acc, accounts = accounts[0], accounts[1:] + children = list(acc.children) + yield (acc, children, acc.splits) + accounts.extend(children) + + def get_all_splits(self): + split_list = [] + for account, children, splits in self.walk(): + split_list.extend(splits) + return sorted(split_list) + + +class Transaction(object): + """ + A transaction is a balanced group of splits. + """ + + def __init__(self, guid=None, currency=None, + date=None, date_entered=None, + description=None, splits=None): + self.guid = guid + self.currency = currency + self.date = date + self.date_entered = date_entered + self.description = description + self.splits = splits or [] + + def __repr__(self): + return u"".format(self.guid) + + def __lt__(self, other): + # For sorted() only + if isinstance(other, Transaction): + return self.date < other.date + else: + False + + +class Split(object): + """ + A split is one entry in a transaction. + """ + + def __init__(self, guid=None, memo=None, + reconciled_state=None, value=None, + quantity=None, account=None, transaction=None): + self.guid = guid + self.reconciled_state = reconciled_state + self.value = value + self.quantity = quantity + self.account = account + self.transaction = transaction + self.memo = memo + + def __repr__(self): + return "".format(self.guid) + + def __lt__(self, other): + # For sorted() only + if isinstance(other, Split): + return self.transaction < other.transaction + else: + False + + + +################################################################## +# XML file parsing + +def from_filename(filename): + """Parse a GNU Cash file and return a Book object.""" + return parse(gzip.open(filename, "rb")) + + +def parse(fobj): + """Parse GNU Cash XML data from a file object and return a Book object.""" + tree = ElementTree.parse(fobj) + root = tree.getroot() + if root.tag != 'gnc-v2': + raise ValueError("File stream was not a valid GNU Cash v2 XML file") + return _book_from_tree(root.find("{http://www.gnucash.org/XML/gnc}book")) + + +def _book_from_tree(tree): + guid = tree.find('{http://www.gnucash.org/XML/book}id').text + + commodities = [] + commoditydict = {} + for child in tree.findall('{http://www.gnucash.org/XML/gnc}commodity'): + comm = _commodity_from_tree(child) + commodities.append(comm) + commoditydict[(comm.space, comm.name)] = comm + + root_account = None + accountdict = {} + parentdict = {} + for child in tree.findall('{http://www.gnucash.org/XML/gnc}account'): + parent_guid, acc = _account_from_tree(child, commoditydict) + if acc.actype == 'ROOT': + root_account = acc + accountdict[acc.guid] = acc + parentdict[acc.guid] = parent_guid + for acc in accountdict.values(): + if acc.parent is None and acc.actype != 'ROOT': + parent = accountdict[parentdict[acc.guid]] + acc.parent = parent + parent.children.append(acc) + + transactions = [] + for child in tree.findall('{http://www.gnucash.org/XML/gnc}' + 'transaction'): + transactions.append(_transaction_from_tree(child, + accountdict, + commoditydict)) + # '{http://www.gnucash.org/XML/gnc}schedxaction' + # '{http://www.gnucash.org/XML/gnc}template-transactions' + # '{http://www.gnucash.org/XML/gnc}count-data' + return Book(guid=guid, + transactions=transactions, + root_account=root_account, + commodities=commodities) + + +def _commodity_from_tree(tree): + name = tree.find('{http://www.gnucash.org/XML/cmdty}id').text + space = tree.find('{http://www.gnucash.org/XML/cmdty}space').text + # Ignored: + # - cmdty:get_quotes + # - cmdty:quote_tz + # - cmdty:source + # - cmdty:name + # - cmdty:xcode + # - cmdty:fraction + return Commodity(name=name, space=space) + + +def _account_from_tree(tree, commoditydict): + act = '{http://www.gnucash.org/XML/act}' + cmdty = '{http://www.gnucash.org/XML/cmdty}' + + name = tree.find(act + 'name').text + guid = tree.find(act + 'id').text + actype = tree.find(act + 'type').text + if actype == 'ROOT': + parent_guid = None + commodity = None + commodity_scu = None + else: + parent_guid = tree.find(act + 'parent').text + commodity_space = tree.find(act + 'commodity/' + + cmdty + 'space').text + commodity_name = tree.find(act + 'commodity/' + + cmdty + 'id').text + commodity_scu = tree.find(act + 'commodity-scu').text + commodity = commoditydict[(commodity_space, commodity_name)] + # We ignore act:slots + return parent_guid, Account(name=name, + guid=guid, + actype=actype, + commodity=commodity, + commodity_scu=commodity_scu) + + +def _transaction_from_tree(tree, accountdict, commoditydict): + trn = '{http://www.gnucash.org/XML/trn}' + cmdty = '{http://www.gnucash.org/XML/cmdty}' + ts = '{http://www.gnucash.org/XML/ts}' + split = '{http://www.gnucash.org/XML/split}' + + guid = tree.find(trn + "id").text + currency_space = tree.find(trn + "currency/" + + cmdty + "space").text + currency_name = tree.find(trn + "currency/" + + cmdty + "id").text + currency = commoditydict[(currency_space, currency_name)] + date = parse_date(tree.find(trn + "date-posted/" + + ts + "date").text) + date_entered = parse_date(tree.find(trn + "date-entered/" + + ts + "date").text) + description = tree.find(trn + "description").text + transaction = Transaction(guid=guid, + currency=currency, + date=date, + date_entered=date_entered, + description=description) + + for subtree in tree.findall(trn + "splits/" + trn + "split"): + split = _split_from_tree(subtree, accountdict, transaction) + transaction.splits.append(split) + + return transaction + + +def _split_from_tree(tree, accountdict, transaction): + split = '{http://www.gnucash.org/XML/split}' + + guid = tree.find(split + "id").text + memo = tree.find(split + "memo") + if memo is not None: + memo = memo.text + reconciled_state = tree.find(split + "reconciled-state").text + value = _parse_number(tree.find(split + "value").text) + quantity = _parse_number(tree.find(split + "quantity").text) + account_guid = tree.find(split + "account").text + account = accountdict[account_guid] + split = Split(guid=guid, + memo=memo, + reconciled_state=reconciled_state, + value=value, + quantity=quantity, + account=account, + transaction=transaction) + account.splits.append(split) + return split + + +def _parse_number(numstring): + num, denum = numstring.split("/") + return decimal.Decimal(num) / decimal.Decimal(denum) -- cgit v1.2.3