Convert Pandas Dataframe to nested JSON Convert Pandas Dataframe to nested JSON json json

Convert Pandas Dataframe to nested JSON


UPDATE:

j = (df.groupby(['ID','Location','Country','Latitude','Longitude'])       .apply(lambda x: x[['timestamp','tide']].to_dict('records'))       .reset_index()       .rename(columns={0:'Tide-Data'})       .to_json(orient='records'))     

Result (formatted):

In [103]: print(json.dumps(json.loads(j), indent=2, sort_keys=True))[  {    "Country": "FRA",    "ID": 1,    "Latitude": 48.383,    "Location": "BREST",    "Longitude": -4.495,    "Tide-Data": [      {        "tide": 6905.0,        "timestamp": "1807-01-01"      },      {        "tide": 6931.0,        "timestamp": "1807-02-01"      },      {        "tide": 6896.0,        "timestamp": "1807-03-01"      },      {        "tide": 6953.0,        "timestamp": "1807-04-01"      },      {        "tide": 7043.0,        "timestamp": "1807-05-01"      }    ]  },  {    "Country": "DEU",    "ID": 7,    "Latitude": 53.867,    "Location": "CUXHAVEN 2",    "Longitude": 8.717,    "Tide-Data": [      {        "tide": 7093.0,        "timestamp": "1843-01-01"      },      {        "tide": 6688.0,        "timestamp": "1843-02-01"      },      {        "tide": 6493.0,        "timestamp": "1843-03-01"      },      {        "tide": 6723.0,        "timestamp": "1843-04-01"      },      {        "tide": 6533.0,        "timestamp": "1843-05-01"      }    ]  },  {    "Country": "DEU",    "ID": 8,    "Latitude": 53.899,    "Location": "WISMAR 2",    "Longitude": 11.458,    "Tide-Data": [      {        "tide": 6957.0,        "timestamp": "1848-07-01"      },      {        "tide": 6944.0,        "timestamp": "1848-08-01"      },      {        "tide": 7084.0,        "timestamp": "1848-09-01"      },      {        "tide": 6898.0,        "timestamp": "1848-10-01"      },      {        "tide": 6859.0,        "timestamp": "1848-11-01"      }    ]  },  {    "Country": "NLD",    "ID": 9,    "Latitude": 51.918,    "Location": "MAASSLUIS",    "Longitude": 4.25,    "Tide-Data": [      {        "tide": 6880.0,        "timestamp": "1848-02-01"      },      {        "tide": 6700.0,        "timestamp": "1848-03-01"      },      {        "tide": 6775.0,        "timestamp": "1848-04-01"      },      {        "tide": 6580.0,        "timestamp": "1848-05-01"      },      {        "tide": 6685.0,        "timestamp": "1848-06-01"      }    ]  },  {    "Country": "USA",    "ID": 10,    "Latitude": 37.807,    "Location": "SAN FRANCISCO",    "Longitude": -122.465,    "Tide-Data": [      {        "tide": 6909.0,        "timestamp": "1854-07-01"      },      {        "tide": 6940.0,        "timestamp": "1854-08-01"      },      {        "tide": 6961.0,        "timestamp": "1854-09-01"      },      {        "tide": 6952.0,        "timestamp": "1854-10-01"      },      {        "tide": 6952.0,        "timestamp": "1854-11-01"      }    ]  }]

OLD answer:

You can do it using groupby(), apply() and to_json() methods:

j = (df.groupby(['ID','Location','Country','Latitude','Longitude'], as_index=False)       .apply(lambda x: dict(zip(x.timestamp,x.tide)))       .reset_index()       .rename(columns={0:'Tide-Data'})       .to_json(orient='records'))

Output:

In [112]: print(json.dumps(json.loads(j), indent=2, sort_keys=True))[  {    "Country": "FRA",    "ID": 1,    "Latitude": 48.383,    "Location": "BREST",    "Longitude": -4.495,    "Tide-Data": {      "1807-01-01": 6905.0,      "1807-02-01": 6931.0,      "1807-03-01": 6896.0,      "1807-04-01": 6953.0,      "1807-05-01": 7043.0    }  },  {    "Country": "DEU",    "ID": 7,    "Latitude": 53.867,    "Location": "CUXHAVEN 2",    "Longitude": 8.717,    "Tide-Data": {      "1843-01-01": 7093.0,      "1843-02-01": 6688.0,      "1843-03-01": 6493.0,      "1843-04-01": 6723.0,      "1843-05-01": 6533.0    }  },  {    "Country": "DEU",    "ID": 8,    "Latitude": 53.899,    "Location": "WISMAR 2",    "Longitude": 11.458,    "Tide-Data": {      "1848-07-01": 6957.0,      "1848-08-01": 6944.0,      "1848-09-01": 7084.0,      "1848-10-01": 6898.0,      "1848-11-01": 6859.0    }  },  {    "Country": "NLD",    "ID": 9,    "Latitude": 51.918,    "Location": "MAASSLUIS",    "Longitude": 4.25,    "Tide-Data": {      "1848-02-01": 6880.0,      "1848-03-01": 6700.0,      "1848-04-01": 6775.0,      "1848-05-01": 6580.0,      "1848-06-01": 6685.0    }  },  {    "Country": "USA",    "ID": 10,    "Latitude": 37.807,    "Location": "SAN FRANCISCO",    "Longitude": -122.465,    "Tide-Data": {      "1854-07-01": 6909.0,      "1854-08-01": 6940.0,      "1854-09-01": 6961.0,      "1854-10-01": 6952.0,      "1854-11-01": 6952.0    }  }]

PS if you don't care of idents you can write directly to JSON file:

(df.groupby(['ID','Location','Country','Latitude','Longitude'], as_index=False)   .apply(lambda x: dict(zip(x.timestamp,x.tide)))   .reset_index()   .rename(columns={0:'Tide-Data'})   .to_json('/path/to/file_name.json', orient='records'))