Extended dict-like subclass to support casting and JSON dumping without extras Extended dict-like subclass to support casting and JSON dumping without extras json json

Extended dict-like subclass to support casting and JSON dumping without extras


Since the real problem here is really json.dumps's default encoder's inability to consider MutableMapping (or ruamel.yaml.comments.CommentedMap in your real-world example) as a dict, instead of telling people to set the default parameter of json.dumps to your json_default function like you mentioned, you can use functools.partial to make json_default a default value for the default parameter of json.dumps so that people don't have to do anything differently when they use your package:

from functools import partialjson.dumps = partial(json.dumps, default=json_default)

Or if you need to allow people to specify their own default parameter or even their own json.JSONEncoder subclass, you can use a wrapper around json.dumps so that it wraps the default function specified by the default parameter and the default method of the custom encoder specified by the cls parameter, whichever one is specified:

import inspectclass override_json_default:    # keep track of the default methods that have already been wrapped    # so we don't wrap them again    _wrapped_defaults = set()    def __call__(self, func):        def override_default(default_func):            def default_wrapper(o):                o = default_func(o)                if isinstance(o, MutableMapping):                    o = dict(o)                return o            return default_wrapper        def override_default_method(default_func):            def default_wrapper(self, o):                try:                    return default_func(self, o)                except TypeError:                    if isinstance(o, MutableMapping):                        return dict(o)                    raise            return default_wrapper        def wrapper(*args, **kwargs):            bound = sig.bind(*args, **kwargs)            bound.apply_defaults()            default = bound.arguments.get('default')            if default:                bound.arguments['default'] = override_default(default)            encoder = bound.arguments.get('cls')            if not default and not encoder:                bound.arguments['cls'] = encoder = json.JSONEncoder            if encoder:                default = getattr(encoder, 'default')                if default not in self._wrapped_defaults:                    default = override_default_method(default)                    self._wrapped_defaults.add(default)                setattr(encoder, 'default', default)            return func(*bound.args, **bound.kwargs)        sig = inspect.signature(func)        return wrapperjson.dumps=override_json_default()(json.dumps)

so that the following test code with both a custom default function and a custom encoder that handle datetime objects, as well as one without a custom default or encoder:

from datetime import datetimedef datetime_encoder(o):    if isinstance(o, datetime):        return o.isoformat()    return oclass DateTimeEncoder(json.JSONEncoder):    def default(self, o):        if isinstance(o, datetime):            return o.isoformat()        return super(DateTimeEncoder, self).default(o)def dump(data):    print(list(data.items()))    try:        print('cast:', dict(**data))    except Exception as e:        print('ERROR:', e)    try:        print('json with custom default:', json.dumps(data, default=datetime_encoder))        print('json wtih custom encoder:', json.dumps(data, cls=DateTimeEncoder))        del data['c']        print('json without datetime:', json.dumps(data))    except Exception as e:        print('ERROR:', e)t = T(a=1, b=2, c=datetime.now())dump(t)

would all give the proper output:

[('a', 1), ('b', 2), ('c', datetime.datetime(2018, 9, 15, 23, 59, 25, 575642)), ('default', 'DEFAULT')]cast: {'a': 1, 'b': 2, 'c': datetime.datetime(2018, 9, 15, 23, 59, 25, 575642), 'default': 'DEFAULT'}json with custom default: {"a": 1, "b": 2, "c": "2018-09-15T23:59:25.575642", "default": "DEFAULT"}json wtih custom encoder: {"a": 1, "b": 2, "c": "2018-09-15T23:59:25.575642", "default": "DEFAULT"}json without datetime: {"a": 1, "b": 2, "default": "DEFAULT"}

As pointed out in the comments, the above code uses inspect.signature, which is not available until Python 3.3, and even then, inspect.BoundArguments.apply_defaults is not available until Python 3.5, and the funcsigs package, a backport of Python 3.3's inspect.signature, does not have the apply_defaults method either. To make the code as backward-compatible as possible, you can simply copy and paste the code of Python 3.5+'s inspect.BoundArguments.apply_defaults to your module and assign it as an attribute of inspect.BoundArguments after importing funcsigs as necessary:

from collections import OrderedDictif not hasattr(inspect, 'signature'):    import funcsigs    for attr in funcsigs.__all__:        setattr(inspect, attr, getattr(funcsigs, attr))if not hasattr(inspect.BoundArguments, 'apply_defaults'):    def apply_defaults(self):        arguments = self.arguments        new_arguments = []        for name, param in self._signature.parameters.items():            try:                new_arguments.append((name, arguments[name]))            except KeyError:                if param.default is not funcsigs._empty:                    val = param.default                elif param.kind is funcsigs._VAR_POSITIONAL:                    val = ()                elif param.kind is funcsigs._VAR_KEYWORD:                    val = {}                else:                    continue                new_arguments.append((name, val))        self.arguments = OrderedDict(new_arguments)    inspect.BoundArguments.apply_defaults = apply_defaults


The answers to Q1 and Q2 are: "You cannot" resp. "No"

In short: you cannot add a key on-the-fly within Python and have JSON outputas well (without patching json.dumps or providing a default to it).

The reason for that is that for JSON to work at all, you need to makeyour class a subclass of dict (or some other object implemented atthe C level) so that its call of PyDict_Check() returns non-zero(which means the tp_flags field in the objectheader has thePy_TPFLAGS_DICT_SUBCLASS bit set).

The cast (dict(**data))) first does this check on the C level aswell (in dictobject.c:dict_merge). But there is a difference in howthings proceed from there. When dumping JSON the code actuallyiterates over the key/values using routines provided by the subclassif these are available.

On the contrary the cast doesn't look if there is any subclassinggoing on and copies the values from the C level implementation (dict, ruamel.ordereddict, etc.).

When casting something that is not a subclass of dict, then thenormal Python class level interface (__iter__) is called to get thekey/value pairs. This is why subclassing MutableMapping makes castingworks, but unfortunately it breaks JSON dumping.

It will not suffice to create a stripped down C level class that returns non-zero onPyDict_Check(), as the casting will iterate on the C level over that class' keys and values.

The only way to implement this transparently, is by implementing a C level dict like class, that does theon-the-fly insertion of the key default and its value. It has to do so by faking alength that is one bigger than the actual number of entries andsomehow implement indexing at the C level of ma_keys and ma_values to have thatextra item. If possible at all, that is going to be hard, as dict_merge assumesfixed knowledge about quite a bit of the internals of the source object.

An alternative for fixing json.dumps is to fix dict_merge, but the latter would affect a lot of code negatively in speed, so that is less likely to happen (and also would notbe done retroactively on older versions of Python either).


You can approach the problem in a completely different way. Instead of trying to produce a value when the key 'default' is requested on the fly, you can initialize the dict with the key 'default' set to your desired value, and then protect the value of the 'default' key by overriding all the methods that can potentially alter the content of the dict so that the value of the key 'default' is never altered:

class T(dict):    def __init__(self, **kwargs):        kwargs['default'] = 'DEFAULT'        super(T, self).__init__(**kwargs)    def __setitem__(self, key, value):        if key != 'default':            super(T, self).__setitem__(key, value)    def __delitem__(self, key):        if key != 'default':            super(T, self).__delitem__(key)    def clear(self):        super(T, self).clear()        self.__init__()    def pop(self, key, **kwargs):        if key == 'default':            return self[key]        return super(T, self).pop(key, **kwargs)    def popitem(self):        key, value = super(T, self).popitem()        if key == 'default':            key2, value2 = super(T, self).popitem()            super(T, self).__setitem__(key, value)            return key2, value2        return key, value    def update(self, other, **kwargs):        if kwargs:            if 'default' in kwargs:                del kwargs['default']        elif 'default' in other:            del other['default']        super(T, self).update(other, **kwargs)