Return a list of all variable names in a python nested dict/json document in dot notation Return a list of all variable names in a python nested dict/json document in dot notation json json

Return a list of all variable names in a python nested dict/json document in dot notation


def dot_notation(obj, prefix=''):     if isinstance(obj, dict):         if prefix: prefix += '.'         for k, v in obj.items():             for res in dot_notation(v, prefix+str(k)):                 yield res     elif isinstance(obj, list):         for i, v in enumerate(obj):             for res in dot_notation(v, prefix+'['+str(i)+']'):                 yield res     else:         yield prefix

Example:

>>> list(dot_notation(x))['a', 'b.c', 'd[0].e', 'd[0].f', 'd[1].e', 'd[1].f']


This is a fun one. I solved it using recursion.

def parse(d):    return parse_dict(d)def parse_dict(d):    items = []    for key, val in d.iteritems():        if isinstance(val, dict):            # use dot notation for dicts            items += ['{}.{}'.format(key, vals) for vals in parse_dict(val)]        elif isinstance(val, list):            # use bracket notation for lists            items += ['{}{}'.format(key, vals) for vals in parse_list(val)]        else:            # just use the key for everything else            items.append(key)    return itemsdef parse_list(l):    items = []    for idx, val in enumerate(l):        if isinstance(val, dict):            items += ['[{}].{}'.format(idx, vals) for vals in parse_dict(val)]        elif isinstance(val, list):            items += ['[{}]{}'.format(idx, vals) for vals in parse_list(val)]        else:            items.append('[{}]'.format(val))    return items

Here is my result:

>>> parse(x)['a', 'b.c', 'd[0].e', 'd[0].f', 'd[1].e', 'd[1].f']

EDIT

Here it is again using generators, because I liked the answer by F.j

def parse(d):    return list(parse_dict(d))def parse_dict(d):    for key, val in d.iteritems():        if isinstance(val, dict):            # use dot notation for dicts            for item in parse_dict(val):                yield '{}.{}'.format(key, item)        elif isinstance(val, list):            # use bracket notation            for item in parse_list(val):                yield '{}{}'.format(key, item)        else:            # lowest level - just use the key            yield keydef parse_list(l):    for idx, val in enumerate(l):        if isinstance(val, dict):            for item in parse_dict(val):                yield '[{}].{}'.format(idx, item)        elif isinstance(val, list):            for item in parse_list(val):                yield '[{}]{}'.format(idx, item)        else:            yield '[{}]'.format(val)

The same result:

>>> parse(x)['a', 'b.c', 'd[0].e', 'd[0].f', 'd[1].e', 'd[1].f']


If the top level of your object can be a list (array, in JSON terminology), your output format doesn't work: for e.g ["foo", "bar"] you'd logically return ['[0]', '[1]'], which probably isn't what you want. You can solve this by also passing in the name of the object to a slight modification of F.J's answer:

def paths(container, name):    if isinstance(container, list):        for i, element in enumerate(container):            for path in paths(element, "%s[%d]" % (name, i)):                yield path    elif isinstance(container, dict):        for k, element in container.items():            for path in paths(element, "%s.%s" % (name, k)):                yield path    else:        yield name

Usage:

>>> list(paths(x, "x"))['x.a', 'x.b.c', 'x.d[0].e', 'x.d[0].f', 'x.d[1].e', 'x.d[1].f']>>> list(paths(["foo", "bar"], "array"))['array[0]', 'array[1]']

Python 3.3 introduces a yield from syntax which makes this a little cleaner:

def paths(container, name):    if isinstance(container, list):        for i, element in enumerate(container):            yield from paths(element, "%s[%d]" % (name, i))    elif isinstance(container, dict):        for k, element in container.items():            yield from paths(element, "%s.%s" % (name, k))    else:        yield name