Converting xml to dictionary using ElementTree
The following XML-to-Python-dict snippet parses entities as well as attributes following this XML-to-JSON "specification":
from collections import defaultdictdef etree_to_dict(t): d = {t.tag: {} if t.attrib else None} children = list(t) if children: dd = defaultdict(list) for dc in map(etree_to_dict, children): for k, v in dc.items(): dd[k].append(v) d = {t.tag: {k: v[0] if len(v) == 1 else v for k, v in dd.items()}} if t.attrib: d[t.tag].update(('@' + k, v) for k, v in t.attrib.items()) if t.text: text = t.text.strip() if children or t.attrib: if text: d[t.tag]['#text'] = text else: d[t.tag] = text return d
It is used:
from xml.etree import cElementTree as ETe = ET.XML('''<root> <e /> <e>text</e> <e name="value" /> <e name="value">text</e> <e> <a>text</a> <b>text</b> </e> <e> <a>text</a> <a>text</a> </e> <e> text <a>text</a> </e></root>''')from pprint import pprintd = etree_to_dict(e)pprint(d)
The output of this example (as per above-linked "specification") should be:
{'root': {'e': [None, 'text', {'@name': 'value'}, {'#text': 'text', '@name': 'value'}, {'a': 'text', 'b': 'text'}, {'a': ['text', 'text']}, {'#text': 'text', 'a': 'text'}]}}
Not necessarily pretty, but it is unambiguous, and simpler XML inputs result in simpler JSON. :)
Update
If you want to do the reverse, emit an XML string from a JSON/dict, you can use:
try: basestringexcept NameError: # python3 basestring = strdef dict_to_etree(d): def _to_etree(d, root): if not d: pass elif isinstance(d, str): root.text = d elif isinstance(d, dict): for k,v in d.items(): assert isinstance(k, str) if k.startswith('#'): assert k == '#text' and isinstance(v, str) root.text = v elif k.startswith('@'): assert isinstance(v, str) root.set(k[1:], v) elif isinstance(v, list): for e in v: _to_etree(e, ET.SubElement(root, k)) else: _to_etree(v, ET.SubElement(root, k)) else: assert d == 'invalid type', (type(d), d) assert isinstance(d, dict) and len(d) == 1 tag, body = next(iter(d.items())) node = ET.Element(tag) _to_etree(body, node) return nodeprint(ET.tostring(dict_to_etree(d)))
def etree_to_dict(t): d = {t.tag : map(etree_to_dict, t.iterchildren())} d.update(('@' + k, v) for k, v in t.attrib.iteritems()) d['text'] = t.text return d
Call as
tree = etree.parse("some_file.xml")etree_to_dict(tree.getroot())
This works as long as you don't actually have an attribute text
; if you do, then change the third line in the function body to use a different key. Also, you can't handle mixed content with this.
(Tested on LXML.)
Based on @larsmans, if you don't need attributes, this will give you a tighter dictionary --
def etree_to_dict(t): return {t.tag : map(etree_to_dict, t.iterchildren()) or t.text}