Writing Nested JSON Dictionary List To CSV Writing Nested JSON Dictionary List To CSV json json

Writing Nested JSON Dictionary List To CSV


I'm sure someone will come by with a much more elegant solution. That being said:

You have a few problems.

  • You have inconsistent entries with the fields you want to align.
  • Even if you pad your data you have intermediate lists that need flattened out.
  • Then you still have separated data that needs to be merged together.
  • DictWriter AFAIK expects it's data in the format of [{'column': 'entry'},{'column': 'entry'} so even if you do all the previous steps you're still not in the right format.

So let's get started.

For the first two parts we can combine.

def pad_list(lst, size, padding=None):    # we wouldn't have to make a copy but I prefer to    # avoid the possibility of getting bitten by mutability    _lst = lst[:]    for _ in range(len(lst), size):        _lst.append(padding)    return _lst# this expects already parsed json datadef flatten(json_data):    lst = []    for dct in json_data:        # here we're just setting a max size of all dict entries        # this is in case the shorter entry is in the first iteration        max_size = 0        # we initialize a dict for each of the list entries        # this is in case you have inconsistent lengths between lists        flattened = dict()        for k, v in dct.items():            entries = list(next(iter(v), dict()).values())            flattened[k] = entries            max_size = max(len(entries), max_size)        # here we append the padded version of the keys for the dict        lst.append({k: pad_list(v, max_size) for k, v in flattened.items()})    return lst

So now we have a flattened, list of dicts whos values are lists of consistent length. Essentially:

[    {        "Basic_Information_Source": [            "image1.png",            "PNG",            "RGB",            574,            262,            277274        ],        "Basic_Information_Destination": [            "image1_dst.png",            "PNG",            "RGB",            574,            262,            277539        ],        "Values": [            75.05045463635267,            0.006097560975609756,            0.045083481733371615,            0.008639858263904898,            None,            None        ]    }]

But this list has multiple dicts that need to be merged, not just one.

So we need to merge.

# this should be self explanatorydef merge(flattened):    merged = dict()    for dct in flattened:        for k, v in dct.items():            if k not in merged:                merged[k] = []            merged[k].extend(v)    return merged

This gives us something close to this:

{    "Basic_Information_Source": [        "image1.png",        "PNG",        "RGB",        574,        262,        277274,        "image2.png",        "PNG",        "RGB",        1600,        1066,        1786254    ],    "Basic_Information_Destination": [        "image1_dst.png",        "PNG",        "RGB",        574,        262,        277539,        "image2_dst.png",        "PNG",        "RGB",        1600,        1066,        1782197    ],    "Values": [        75.05045463635267,        0.006097560975609756,        0.045083481733371615,        0.008639858263904898,        None,        None,        85.52662890580055,        0.0005464352720450282,        0.013496113910369758,        0.003800236380811839,        None,        None    ]}

But wait, we still need to format it for the writer.

Our data needs to be in the format of [{'column_1': 'entry', column_2: 'entry'},{'column_1': 'entry', column_2: 'entry'}

So we format:

def format_for_writer(merged):    formatted = []    for k, v in merged.items():        for i, item in enumerate(v):            # on the first pass this will append an empty dict            # on subsequent passes it will be ignored            # and add keys into the existing dict            if i >= len(formatted):                formatted.append(dict())            formatted[i][k] = item    return formatted

So finally, we have a nice clean formatted data structure we can just hand to our writer function.

def convert_csv(formatted):    keys = formatted[0].keys()    with open('test.csv', 'w', encoding='utf8', newline='')  as output_file:        dict_writer = csv.DictWriter(output_file, keys)        dict_writer.writeheader()        dict_writer.writerows(formatted)

Full code with json string:

import jsonimport csvjson_raw = """\[    {        "Basic_Information_Source": [            {                "Image": "image1.png",                "Image_Format": "PNG",                "Image_Mode": "RGB",                "Image_Width": 574,                "Image_Height": 262,                "Image_Size": 277274            }        ],        "Basic_Information_Destination": [            {                "Image": "image1_dst.png",                "Image_Format": "PNG",                "Image_Mode": "RGB",                "Image_Width": 574,                "Image_Height": 262,                "Image_Size": 277539            }        ],        "Values": [            {                "Value1": 75.05045463635267,                "Value2": 0.006097560975609756,                "Value3": 0.045083481733371615,                "Value4": 0.008639858263904898            }        ]    },    {        "Basic_Information_Source": [            {                "Image": "image2.png",                "Image_Format": "PNG",                "Image_Mode": "RGB",                "Image_Width": 1600,                "Image_Height": 1066,                "Image_Size": 1786254            }        ],        "Basic_Information_Destination": [            {                "Image": "image2_dst.png",                "Image_Format": "PNG",                "Image_Mode": "RGB",                "Image_Width": 1600,                "Image_Height": 1066,                "Image_Size": 1782197            }        ],        "Values": [            {                "Value1": 85.52662890580055,                "Value2": 0.0005464352720450282,                "Value3": 0.013496113910369758,                "Value4": 0.003800236380811839            }        ]    }]"""def pad_list(lst, size, padding=None):    _lst = lst[:]    for _ in range(len(lst), size):        _lst.append(padding)    return _lstdef flatten(json_data):    lst = []    for dct in json_data:        max_size = 0        flattened = dict()        for k, v in dct.items():            entries = list(next(iter(v), dict()).values())            flattened[k] = entries            max_size = max(len(entries), max_size)        lst.append({k: pad_list(v, max_size) for k, v in flattened.items()})    return lstdef merge(flattened):    merged = dict()    for dct in flattened:        for k, v in dct.items():            if k not in merged:                merged[k] = []            merged[k].extend(v)    return mergeddef format_for_writer(merged):    formatted = []    for k, v in merged.items():        for i, item in enumerate(v):            if i >= len(formatted):                formatted.append(dict())            formatted[i][k] = item    return formatteddef convert_csv(formatted):    keys = formatted[0].keys()    with open('test.csv', 'w', encoding='utf8', newline='')  as output_file:        dict_writer = csv.DictWriter(output_file, keys)        dict_writer.writeheader()        dict_writer.writerows(formatted)def main():    json_data = json.loads(json_raw)    flattened = flatten(json_data)    merged = merge(flattened)    formatted = format_for_writer(merged)    convert_csv(formatted)if __name__ == '__main__':    main()