How to implement custom indentation when pretty-printing with the JSON module? How to implement custom indentation when pretty-printing with the JSON module? python python

How to implement custom indentation when pretty-printing with the JSON module?


(Note:The code in this answer only works with json.dumps() which returns a JSON formatted string, but not with json.dump() which writes directly to file-like objects. There's a modified version of it that works with both in my answer to the question Write two-dimensional list to JSON file.)

Updated

Below is a version of my original answer that has been revised several times. Unlike the original, which I posted only to show how to get the first idea in J.F.Sebastian's answer to work, and which like his, returned a non-indented string representation of the object. The latest updated version returns the Python object JSON formatted in isolation.

The keys of each coordinate dict will appear in sorted order, as per one of the OP's comments, but only if a sort_keys=True keyword argument is specified in the initial json.dumps() call driving the process, and it no longer changes the object's type to a string along the way. In other words, the actual type of the "wrapped" object is now maintained.

I think not understanding the original intent of my post resulted in number of folks downvoting it—so, primarily for that reason, I have "fixed" and improved my answer several times. The current version is a hybrid of my original answer coupled with some of the ideas @Erik Allik used in his answer, plus useful feedback from other users shown in the comments below this answer.

The following code appears to work unchanged in both Python 2.7.16 and 3.7.4.

from _ctypes import PyObj_FromPtrimport jsonimport reclass NoIndent(object):    """ Value wrapper. """    def __init__(self, value):        self.value = valueclass MyEncoder(json.JSONEncoder):    FORMAT_SPEC = '@@{}@@'    regex = re.compile(FORMAT_SPEC.format(r'(\d+)'))    def __init__(self, **kwargs):        # Save copy of any keyword argument values needed for use here.        self.__sort_keys = kwargs.get('sort_keys', None)        super(MyEncoder, self).__init__(**kwargs)    def default(self, obj):        return (self.FORMAT_SPEC.format(id(obj)) if isinstance(obj, NoIndent)                else super(MyEncoder, self).default(obj))    def encode(self, obj):        format_spec = self.FORMAT_SPEC  # Local var to expedite access.        json_repr = super(MyEncoder, self).encode(obj)  # Default JSON.        # Replace any marked-up object ids in the JSON repr with the        # value returned from the json.dumps() of the corresponding        # wrapped Python object.        for match in self.regex.finditer(json_repr):            # see https://stackoverflow.com/a/15012814/355230            id = int(match.group(1))            no_indent = PyObj_FromPtr(id)            json_obj_repr = json.dumps(no_indent.value, sort_keys=self.__sort_keys)            # Replace the matched id string with json formatted representation            # of the corresponding Python object.            json_repr = json_repr.replace(                            '"{}"'.format(format_spec.format(id)), json_obj_repr)        return json_reprif __name__ == '__main__':    from string import ascii_lowercase as letters    data_structure = {        'layer1': {            'layer2': {                'layer3_1': NoIndent([{"x":1,"y":7}, {"x":0,"y":4}, {"x":5,"y":3},                                      {"x":6,"y":9},                                      {k: v for v, k in enumerate(letters)}]),                'layer3_2': 'string',                'layer3_3': NoIndent([{"x":2,"y":8,"z":3}, {"x":1,"y":5,"z":4},                                      {"x":6,"y":9,"z":8}]),                'layer3_4': NoIndent(list(range(20))),            }        }    }    print(json.dumps(data_structure, cls=MyEncoder, sort_keys=True, indent=2))

Output:

{  "layer1": {    "layer2": {      "layer3_1": [{"x": 1, "y": 7}, {"x": 0, "y": 4}, {"x": 5, "y": 3}, {"x": 6, "y": 9}, {"a": 0, "b": 1, "c": 2, "d": 3, "e": 4, "f": 5, "g": 6, "h": 7, "i": 8, "j": 9, "k": 10, "l": 11, "m": 12, "n": 13, "o": 14, "p": 15, "q": 16, "r": 17, "s": 18, "t": 19, "u": 20, "v": 21, "w": 22, "x": 23, "y": 24, "z": 25}],      "layer3_2": "string",      "layer3_3": [{"x": 2, "y": 8, "z": 3}, {"x": 1, "y": 5, "z": 4}, {"x": 6, "y": 9, "z": 8}],      "layer3_4": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]    }  }}


A bodge, but once you have the string from dumps(), you can perform a regular expression substitution on it, if you're sure of the format of its contents. Something along the lines of:

s = json.dumps(data_structure, indent=2)s = re.sub('\s*{\s*"(.)": (\d+),\s*"(.)": (\d+)\s*}(,?)\s*', r'{"\1":\2,"\3":\4}\5', s)


The following solution seems to work correctly on Python 2.7.x. It uses a workaround taken from Custom JSON encoder in Python 2.7 to insert plain JavaScript code to avoid custom-encoded objects ending up as JSON strings in the output by using a UUID-based replacement scheme.

class NoIndent(object):    def __init__(self, value):        self.value = valueclass NoIndentEncoder(json.JSONEncoder):    def __init__(self, *args, **kwargs):        super(NoIndentEncoder, self).__init__(*args, **kwargs)        self.kwargs = dict(kwargs)        del self.kwargs['indent']        self._replacement_map = {}    def default(self, o):        if isinstance(o, NoIndent):            key = uuid.uuid4().hex            self._replacement_map[key] = json.dumps(o.value, **self.kwargs)            return "@@%s@@" % (key,)        else:            return super(NoIndentEncoder, self).default(o)    def encode(self, o):        result = super(NoIndentEncoder, self).encode(o)        for k, v in self._replacement_map.iteritems():            result = result.replace('"@@%s@@"' % (k,), v)        return result

Then this

obj = {  "layer1": {    "layer2": {      "layer3_2": "string",       "layer3_1": NoIndent([{"y": 7, "x": 1}, {"y": 4, "x": 0}, {"y": 3, "x": 5}, {"y": 9, "x": 6}])    }  }}print json.dumps(obj, indent=2, cls=NoIndentEncoder)

produces the follwing output:

{  "layer1": {    "layer2": {      "layer3_2": "string",       "layer3_1": [{"y": 7, "x": 1}, {"y": 4, "x": 0}, {"y": 3, "x": 5}, {"y": 9, "x": 6}]    }  }}

It also correctly passes all options (except indent) e.g. sort_keys=True down to the nested json.dumps call.

obj = {    "layer1": {        "layer2": {            "layer3_1": NoIndent([{"y": 7, "x": 1, }, {"y": 4, "x": 0}, {"y": 3, "x": 5, }, {"y": 9, "x": 6}]),            "layer3_2": "string",        }    }}    print json.dumps(obj, indent=2, sort_keys=True, cls=NoIndentEncoder)

correctly outputs:

{  "layer1": {    "layer2": {      "layer3_1": [{"x": 1, "y": 7}, {"x": 0, "y": 4}, {"x": 5, "y": 3}, {"x": 6, "y": 9}],       "layer3_2": "string"    }  }}

It can also be combined with e.g. collections.OrderedDict:

obj = {    "layer1": {        "layer2": {            "layer3_2": "string",            "layer3_3": NoIndent(OrderedDict([("b", 1), ("a", 2)]))        }    }}print json.dumps(obj, indent=2, cls=NoIndentEncoder)

outputs:

{  "layer1": {    "layer2": {      "layer3_3": {"b": 1, "a": 2},       "layer3_2": "string"    }  }}

UPDATE: In Python 3, there is no iteritems. You can replace encode with this:

def encode(self, o):    result = super(NoIndentEncoder, self).encode(o)    for k, v in iter(self._replacement_map.items()):        result = result.replace('"@@%s@@"' % (k,), v)    return result