How to convert an XML string to a dictionary? How to convert an XML string to a dictionary? python python

How to convert an XML string to a dictionary?


xmltodict (full disclosure: I wrote it) does exactly that:

xmltodict.parse("""<?xml version="1.0" ?><person>  <name>john</name>  <age>20</age></person>""")# {u'person': {u'age': u'20', u'name': u'john'}}


This is a great module that someone created. I've used it several times. http://code.activestate.com/recipes/410469-xml-as-dictionary/

Here is the code from the website just in case the link goes bad.

from xml.etree import cElementTree as ElementTreeclass XmlListConfig(list):    def __init__(self, aList):        for element in aList:            if element:                # treat like dict                if len(element) == 1 or element[0].tag != element[1].tag:                    self.append(XmlDictConfig(element))                # treat like list                elif element[0].tag == element[1].tag:                    self.append(XmlListConfig(element))            elif element.text:                text = element.text.strip()                if text:                    self.append(text)class XmlDictConfig(dict):    '''    Example usage:    >>> tree = ElementTree.parse('your_file.xml')    >>> root = tree.getroot()    >>> xmldict = XmlDictConfig(root)    Or, if you want to use an XML string:    >>> root = ElementTree.XML(xml_string)    >>> xmldict = XmlDictConfig(root)    And then use xmldict for what it is... a dict.    '''    def __init__(self, parent_element):        if parent_element.items():            self.update(dict(parent_element.items()))        for element in parent_element:            if element:                # treat like dict - we assume that if the first two tags                # in a series are different, then they are all different.                if len(element) == 1 or element[0].tag != element[1].tag:                    aDict = XmlDictConfig(element)                # treat like list - we assume that if the first two tags                # in a series are the same, then the rest are the same.                else:                    # here, we put the list in dictionary; the key is the                    # tag name the list elements all share in common, and                    # the value is the list itself                     aDict = {element[0].tag: XmlListConfig(element)}                # if the tag has attributes, add those to the dict                if element.items():                    aDict.update(dict(element.items()))                self.update({element.tag: aDict})            # this assumes that if you've got an attribute in a tag,            # you won't be having any text. This may or may not be a             # good idea -- time will tell. It works for the way we are            # currently doing XML configuration files...            elif element.items():                self.update({element.tag: dict(element.items())})            # finally, if there are no child tags and no attributes, extract            # the text            else:                self.update({element.tag: element.text})

Example usage:

tree = ElementTree.parse('your_file.xml')root = tree.getroot()xmldict = XmlDictConfig(root)

//Or, if you want to use an XML string:

root = ElementTree.XML(xml_string)xmldict = XmlDictConfig(root)


The following XML-to-Python-dict snippet parses entities as well as attributes following this XML-to-JSON "specification". It is the most general solution handling all cases of XML.

from collections import defaultdictdef etree_to_dict(t):    d = {t.tag: {} if t.attrib else None}    children = list(t)    if children:        dd = defaultdict(list)        for dc in map(etree_to_dict, children):            for k, v in dc.items():                dd[k].append(v)        d = {t.tag: {k:v[0] if len(v) == 1 else v for k, v in dd.items()}}    if t.attrib:        d[t.tag].update(('@' + k, v) for k, v in t.attrib.items())    if t.text:        text = t.text.strip()        if children or t.attrib:            if text:              d[t.tag]['#text'] = text        else:            d[t.tag] = text    return d

It is used:

from xml.etree import cElementTree as ETe = ET.XML('''<root>  <e />  <e>text</e>  <e name="value" />  <e name="value">text</e>  <e> <a>text</a> <b>text</b> </e>  <e> <a>text</a> <a>text</a> </e>  <e> text <a>text</a> </e></root>''')from pprint import pprintpprint(etree_to_dict(e))

The output of this example (as per above-linked "specification") should be:

{'root': {'e': [None,                'text',                {'@name': 'value'},                {'#text': 'text', '@name': 'value'},                {'a': 'text', 'b': 'text'},                {'a': ['text', 'text']},                {'#text': 'text', 'a': 'text'}]}}

Not necessarily pretty, but it is unambiguous, and simpler XML inputs result in simpler JSON. :)


Update

If you want to do the reverse, emit an XML string from a JSON/dict, you can use:

try:  basestringexcept NameError:  # python3  basestring = strdef dict_to_etree(d):    def _to_etree(d, root):        if not d:            pass        elif isinstance(d, basestring):            root.text = d        elif isinstance(d, dict):            for k,v in d.items():                assert isinstance(k, basestring)                if k.startswith('#'):                    assert k == '#text' and isinstance(v, basestring)                    root.text = v                elif k.startswith('@'):                    assert isinstance(v, basestring)                    root.set(k[1:], v)                elif isinstance(v, list):                    for e in v:                        _to_etree(e, ET.SubElement(root, k))                else:                    _to_etree(v, ET.SubElement(root, k))        else:            raise TypeError('invalid type: ' + str(type(d)))    assert isinstance(d, dict) and len(d) == 1    tag, body = next(iter(d.items()))    node = ET.Element(tag)    _to_etree(body, node)    return ET.tostring(node)pprint(dict_to_etree(d))