Sorting XML in python etree Sorting XML in python etree xml xml

Sorting XML in python etree


Using ElementTree you can do this:

import xml.etree.ElementTree as ETdef sortchildrenby(parent, attr):    parent[:] = sorted(parent, key=lambda child: child.get(attr))tree = ET.parse('input.xml')root = tree.getroot()sortchildrenby(root, 'NAME')for child in root:    sortchildrenby(child, 'NAME')tree.write('output.xml')


If your want to sort in a recursive way, handling comments and sort along all attributes:

#!/usr/bin/env python# encoding: utf-8from __future__ import print_functionimport loggingfrom lxml import etreedef get_node_key(node, attr=None):    """Return the sorting key of an xml node    using tag and attributes    """    if attr is None:        return '%s' % node.tag + ':'.join([node.get(attr)                                        for attr in sorted(node.attrib)])    if attr in node.attrib:        return '%s:%s' % (node.tag, node.get(attr))    return '%s' % node.tagdef sort_children(node, attr=None):    """ Sort children along tag and given attribute.    if attr is None, sort along all attributes"""    if not isinstance(node.tag, str):  # PYTHON 2: use basestring instead        # not a TAG, it is comment or DATA        # no need to sort        return    # sort child along attr    node[:] = sorted(node, key=lambda child: get_node_key(child, attr))    # and recurse    for child in node:        sort_children(child, attr)def sort(unsorted_file, sorted_file, attr=None):    """Sort unsorted xml file and save to sorted_file"""    tree = etree.parse(unsorted_file)    root = tree.getroot()    sort_children(root, attr)    sorted_unicode = etree.tostring(root,                                    pretty_print=True,                                    encoding='unicode')    with open(sorted_file, 'w') as output_fp:        output_fp.write('%s' % sorted_unicode)        logging.info('written sorted file %s', sorted_unicode)

Note: I am using lxml.etree (http://lxml.de/tutorial.html)