Return a list of all variable names in a python nested dict/json document in dot notation
def dot_notation(obj, prefix=''): if isinstance(obj, dict): if prefix: prefix += '.' for k, v in obj.items(): for res in dot_notation(v, prefix+str(k)): yield res elif isinstance(obj, list): for i, v in enumerate(obj): for res in dot_notation(v, prefix+'['+str(i)+']'): yield res else: yield prefix
Example:
>>> list(dot_notation(x))['a', 'b.c', 'd[0].e', 'd[0].f', 'd[1].e', 'd[1].f']
This is a fun one. I solved it using recursion.
def parse(d): return parse_dict(d)def parse_dict(d): items = [] for key, val in d.iteritems(): if isinstance(val, dict): # use dot notation for dicts items += ['{}.{}'.format(key, vals) for vals in parse_dict(val)] elif isinstance(val, list): # use bracket notation for lists items += ['{}{}'.format(key, vals) for vals in parse_list(val)] else: # just use the key for everything else items.append(key) return itemsdef parse_list(l): items = [] for idx, val in enumerate(l): if isinstance(val, dict): items += ['[{}].{}'.format(idx, vals) for vals in parse_dict(val)] elif isinstance(val, list): items += ['[{}]{}'.format(idx, vals) for vals in parse_list(val)] else: items.append('[{}]'.format(val)) return items
Here is my result:
>>> parse(x)['a', 'b.c', 'd[0].e', 'd[0].f', 'd[1].e', 'd[1].f']
EDIT
Here it is again using generators, because I liked the answer by F.j
def parse(d): return list(parse_dict(d))def parse_dict(d): for key, val in d.iteritems(): if isinstance(val, dict): # use dot notation for dicts for item in parse_dict(val): yield '{}.{}'.format(key, item) elif isinstance(val, list): # use bracket notation for item in parse_list(val): yield '{}{}'.format(key, item) else: # lowest level - just use the key yield keydef parse_list(l): for idx, val in enumerate(l): if isinstance(val, dict): for item in parse_dict(val): yield '[{}].{}'.format(idx, item) elif isinstance(val, list): for item in parse_list(val): yield '[{}]{}'.format(idx, item) else: yield '[{}]'.format(val)
The same result:
>>> parse(x)['a', 'b.c', 'd[0].e', 'd[0].f', 'd[1].e', 'd[1].f']
If the top level of your object can be a list (array, in JSON terminology), your output format doesn't work: for e.g ["foo", "bar"]
you'd logically return ['[0]', '[1]']
, which probably isn't what you want. You can solve this by also passing in the name of the object to a slight modification of F.J's answer:
def paths(container, name): if isinstance(container, list): for i, element in enumerate(container): for path in paths(element, "%s[%d]" % (name, i)): yield path elif isinstance(container, dict): for k, element in container.items(): for path in paths(element, "%s.%s" % (name, k)): yield path else: yield name
Usage:
>>> list(paths(x, "x"))['x.a', 'x.b.c', 'x.d[0].e', 'x.d[0].f', 'x.d[1].e', 'x.d[1].f']>>> list(paths(["foo", "bar"], "array"))['array[0]', 'array[1]']
Python 3.3 introduces a yield from
syntax which makes this a little cleaner:
def paths(container, name): if isinstance(container, list): for i, element in enumerate(container): yield from paths(element, "%s[%d]" % (name, i)) elif isinstance(container, dict): for k, element in container.items(): yield from paths(element, "%s.%s" % (name, k)) else: yield name