summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJorgen Schaefer <forcer@forcix.cx>2012-08-18 20:27:10 +0200
committerJorgen Schaefer <forcer@forcix.cx>2012-08-18 20:27:10 +0200
commitd39b0edefea4c36386e68fe6250dd1898b248b3e (patch)
tree5376b19cf73962101a8d5864632093742cd0d749
Initial release
-rw-r--r--gnucashxml.py298
1 files changed, 298 insertions, 0 deletions
diff --git a/gnucashxml.py b/gnucashxml.py
new file mode 100644
index 0000000..877645d
--- /dev/null
+++ b/gnucashxml.py
@@ -0,0 +1,298 @@
+import decimal
+import gzip
+
+from dateutil.parser import parse as parse_date
+from xml.etree import ElementTree
+
+
+class Book(object):
+ """
+ A book is the main container for GNU Cash data.
+
+ It doesn't really do anything at all by itself, except to have
+ a reference to the accounts, transactions, and commodities.
+ """
+ def __init__(self, guid, transactions=None, root_account=None,
+ commodities=None):
+ self.guid = guid
+ self.transactions = transactions or []
+ self.root_account = root_account
+ self.commodities = commodities or []
+
+ def __repr__(self):
+ return "<Book {}>".format(self.guid)
+
+ def walk(self):
+ return self.root_account.walk()
+
+ def find_account(self, name):
+ for account, children, splits in self.walk():
+ if account.name == name:
+ return account
+
+
+class Commodity(object):
+ """
+ A commodity is something that's stored in GNU Cash accounts.
+
+ Consists of a name (or id) and a space (namespace).
+ """
+ def __init__(self, name, space=None):
+ self.name = name
+ self.space = space
+
+ def __str__(self):
+ return self.name
+
+ def __repr__(self):
+ return "<Commodity {}:{}>".format(self.space, self.name)
+
+
+class Account(object):
+ """
+ An account is part of a tree structure of accounts and contains splits.
+ """
+ def __init__(self, name, guid, actype, parent=None,
+ commodity=None, commodity_scu=None):
+ self.name = name
+ self.guid = guid
+ self.actype = actype
+ self.parent = parent
+ self.children = []
+ self.commodity = commodity
+ self.commodity_scu = commodity_scu
+ self.splits = []
+
+ def __repr__(self):
+ return "<Account {}>".format(self.guid)
+
+ def walk(self):
+ """
+ Generate splits in this account tree by walking the tree.
+
+ For each account, it yields a 3-tuple (account, subaccounts, splits).
+
+ You can modify the list of subaccounts, but should not modify
+ the list of splits.
+ """
+ accounts = [self]
+ while accounts:
+ acc, accounts = accounts[0], accounts[1:]
+ children = list(acc.children)
+ yield (acc, children, acc.splits)
+ accounts.extend(children)
+
+ def get_all_splits(self):
+ split_list = []
+ for account, children, splits in self.walk():
+ split_list.extend(splits)
+ return sorted(split_list)
+
+
+class Transaction(object):
+ """
+ A transaction is a balanced group of splits.
+ """
+
+ def __init__(self, guid=None, currency=None,
+ date=None, date_entered=None,
+ description=None, splits=None):
+ self.guid = guid
+ self.currency = currency
+ self.date = date
+ self.date_entered = date_entered
+ self.description = description
+ self.splits = splits or []
+
+ def __repr__(self):
+ return u"<Transaction {}>".format(self.guid)
+
+ def __lt__(self, other):
+ # For sorted() only
+ if isinstance(other, Transaction):
+ return self.date < other.date
+ else:
+ False
+
+
+class Split(object):
+ """
+ A split is one entry in a transaction.
+ """
+
+ def __init__(self, guid=None, memo=None,
+ reconciled_state=None, value=None,
+ quantity=None, account=None, transaction=None):
+ self.guid = guid
+ self.reconciled_state = reconciled_state
+ self.value = value
+ self.quantity = quantity
+ self.account = account
+ self.transaction = transaction
+ self.memo = memo
+
+ def __repr__(self):
+ return "<Split {}>".format(self.guid)
+
+ def __lt__(self, other):
+ # For sorted() only
+ if isinstance(other, Split):
+ return self.transaction < other.transaction
+ else:
+ False
+
+
+
+##################################################################
+# XML file parsing
+
+def from_filename(filename):
+ """Parse a GNU Cash file and return a Book object."""
+ return parse(gzip.open(filename, "rb"))
+
+
+def parse(fobj):
+ """Parse GNU Cash XML data from a file object and return a Book object."""
+ tree = ElementTree.parse(fobj)
+ root = tree.getroot()
+ if root.tag != 'gnc-v2':
+ raise ValueError("File stream was not a valid GNU Cash v2 XML file")
+ return _book_from_tree(root.find("{http://www.gnucash.org/XML/gnc}book"))
+
+
+def _book_from_tree(tree):
+ guid = tree.find('{http://www.gnucash.org/XML/book}id').text
+
+ commodities = []
+ commoditydict = {}
+ for child in tree.findall('{http://www.gnucash.org/XML/gnc}commodity'):
+ comm = _commodity_from_tree(child)
+ commodities.append(comm)
+ commoditydict[(comm.space, comm.name)] = comm
+
+ root_account = None
+ accountdict = {}
+ parentdict = {}
+ for child in tree.findall('{http://www.gnucash.org/XML/gnc}account'):
+ parent_guid, acc = _account_from_tree(child, commoditydict)
+ if acc.actype == 'ROOT':
+ root_account = acc
+ accountdict[acc.guid] = acc
+ parentdict[acc.guid] = parent_guid
+ for acc in accountdict.values():
+ if acc.parent is None and acc.actype != 'ROOT':
+ parent = accountdict[parentdict[acc.guid]]
+ acc.parent = parent
+ parent.children.append(acc)
+
+ transactions = []
+ for child in tree.findall('{http://www.gnucash.org/XML/gnc}'
+ 'transaction'):
+ transactions.append(_transaction_from_tree(child,
+ accountdict,
+ commoditydict))
+ # '{http://www.gnucash.org/XML/gnc}schedxaction'
+ # '{http://www.gnucash.org/XML/gnc}template-transactions'
+ # '{http://www.gnucash.org/XML/gnc}count-data'
+ return Book(guid=guid,
+ transactions=transactions,
+ root_account=root_account,
+ commodities=commodities)
+
+
+def _commodity_from_tree(tree):
+ name = tree.find('{http://www.gnucash.org/XML/cmdty}id').text
+ space = tree.find('{http://www.gnucash.org/XML/cmdty}space').text
+ # Ignored:
+ # - cmdty:get_quotes
+ # - cmdty:quote_tz
+ # - cmdty:source
+ # - cmdty:name
+ # - cmdty:xcode
+ # - cmdty:fraction
+ return Commodity(name=name, space=space)
+
+
+def _account_from_tree(tree, commoditydict):
+ act = '{http://www.gnucash.org/XML/act}'
+ cmdty = '{http://www.gnucash.org/XML/cmdty}'
+
+ name = tree.find(act + 'name').text
+ guid = tree.find(act + 'id').text
+ actype = tree.find(act + 'type').text
+ if actype == 'ROOT':
+ parent_guid = None
+ commodity = None
+ commodity_scu = None
+ else:
+ parent_guid = tree.find(act + 'parent').text
+ commodity_space = tree.find(act + 'commodity/' +
+ cmdty + 'space').text
+ commodity_name = tree.find(act + 'commodity/' +
+ cmdty + 'id').text
+ commodity_scu = tree.find(act + 'commodity-scu').text
+ commodity = commoditydict[(commodity_space, commodity_name)]
+ # We ignore act:slots
+ return parent_guid, Account(name=name,
+ guid=guid,
+ actype=actype,
+ commodity=commodity,
+ commodity_scu=commodity_scu)
+
+
+def _transaction_from_tree(tree, accountdict, commoditydict):
+ trn = '{http://www.gnucash.org/XML/trn}'
+ cmdty = '{http://www.gnucash.org/XML/cmdty}'
+ ts = '{http://www.gnucash.org/XML/ts}'
+ split = '{http://www.gnucash.org/XML/split}'
+
+ guid = tree.find(trn + "id").text
+ currency_space = tree.find(trn + "currency/" +
+ cmdty + "space").text
+ currency_name = tree.find(trn + "currency/" +
+ cmdty + "id").text
+ currency = commoditydict[(currency_space, currency_name)]
+ date = parse_date(tree.find(trn + "date-posted/" +
+ ts + "date").text)
+ date_entered = parse_date(tree.find(trn + "date-entered/" +
+ ts + "date").text)
+ description = tree.find(trn + "description").text
+ transaction = Transaction(guid=guid,
+ currency=currency,
+ date=date,
+ date_entered=date_entered,
+ description=description)
+
+ for subtree in tree.findall(trn + "splits/" + trn + "split"):
+ split = _split_from_tree(subtree, accountdict, transaction)
+ transaction.splits.append(split)
+
+ return transaction
+
+
+def _split_from_tree(tree, accountdict, transaction):
+ split = '{http://www.gnucash.org/XML/split}'
+
+ guid = tree.find(split + "id").text
+ memo = tree.find(split + "memo")
+ if memo is not None:
+ memo = memo.text
+ reconciled_state = tree.find(split + "reconciled-state").text
+ value = _parse_number(tree.find(split + "value").text)
+ quantity = _parse_number(tree.find(split + "quantity").text)
+ account_guid = tree.find(split + "account").text
+ account = accountdict[account_guid]
+ split = Split(guid=guid,
+ memo=memo,
+ reconciled_state=reconciled_state,
+ value=value,
+ quantity=quantity,
+ account=account,
+ transaction=transaction)
+ account.splits.append(split)
+ return split
+
+
+def _parse_number(numstring):
+ num, denum = numstring.split("/")
+ return decimal.Decimal(num) / decimal.Decimal(denum)