How to filter nested JSON and preserve branch hierarchy for matches?
I think this does what you want.
tree=[{'id': 20, 'name': 'education', 'subterms': [ {'id': 21, 'name': 'schools', 'synonyms': []}, {'id': 22, 'name': 'schoolbooks', 'synonyms': ['literature']}, {'id': 23, 'name': 'higher education', 'synonyms': ['university']}, {'id': 25, 'name': 'conference', 'synonyms': ['lecture']}]}, {'id': 26, 'name': 'health', 'subterms': [ {'id': 27, 'name': 'health issues', 'synonyms': []}, {'id': 28, 'name': 'nutrition', 'synonyms': []}, {'id': 29, 'name': 'medicine', 'synonyms': []}]}, {'id': 1, 'name': 'business', 'subterms': [{'id': 2, 'name': 'industry', 'subterms': [{'id': 21, 'name': 'service', 'synonyms': []}, {'id': 21, 'name': 'agriculture', 'synonyms': []}], 'synonyms': []}, {'id': 3, 'name': 'professions', 'synonyms': ['jobs']}]}]def filter_by_name(node, names): if isinstance(node, list): return filter(None, (filter_by_name(x, names) for x in node if x)) subterms = filter(None, filter_by_name(node.get('subterms',[]), names)) if set([node['name']]+node.get('synonyms',[])).intersection(names): return dict(node, subterms=subterms) if subterms: return dict(node, subterms=subterms) return Nonefrom pprint import pprintpprint(filter_by_name(tree, ['business']))pprint(filter_by_name(tree, ['literature']))pprint(filter_by_name(tree, ['literature', 'agriculture']))
Result:
[{'id': 1, 'name': 'business', 'subterms': []}][{'id': 20, 'name': 'education', 'subterms': [{'id': 22, 'name': 'schoolbooks', 'subterms': [], 'synonyms': ['literature']}]}][{'id': 20, 'name': 'education', 'subterms': [{'id': 22, 'name': 'schoolbooks', 'subterms': [], 'synonyms': ['literature']}]}, {'id': 1, 'name': 'business', 'subterms': [{'id': 2, 'name': 'industry', 'subterms': [{'id': 21, 'name': 'agriculture', 'subterms': [], 'synonyms': []}], 'synonyms': []}]}]