ElementTree iterparse strategy
Here's one possible approach: we maintain a path list and peek backwards to find the parent node(s).
path = []for event, elem in ET.iterparse(file_path, events=("start", "end")): if event == 'start': path.append(elem.tag) elif event == 'end': # process the tag if elem.tag == 'name': if 'members' in path: print 'member' else: print 'nonmember' path.pop()
pulldom is excellent for this. You get a sax stream. You can iterate through the stream, and when you find a node that your are interested in, load that node in to a dom fragment.
import xml.dom.pulldom as pulldomimport xpath # from http://code.google.com/p/py-dom-xpath/events = pulldom.parse('families.xml')for event, node in events: if event == 'START_ELEMENT' and node.tagName=='family': events.expandNode(node) # node now contains a dom fragment family_name = xpath.findvalue('name', node) members = xpath.findvalues('members/name', node) print('family name: {0}, members: {1}'.format(family_name, members))
output:
family name: Simpson, members: [u'Hommer', u'Marge', u'Bart']family name: Griffin, members: [u'Peter', u'Brian', u'Meg']