How to change json encoding behaviour for serializable python object?

前端 未结 13 1170
无人及你
无人及你 2020-12-02 09:47

It is easy to change the format of an object which is not JSON serializable eg datetime.datetime.

My requirement, for debugging purposes, is to alter the way some cu

相关标签:
13条回答
  • 2020-12-02 10:08

    You should be able to override JSONEncoder.encode():

    class MyEncoder(JSONEncoder):
      def encode(self, o):
        if isinstance(o, dict):
          # directly call JSONEncoder rather than infinite-looping through self.encode()
          return JSONEncoder.encode(self, {'orig': o, 'attrs': vars(o)})
        elif isinstance(o, list):
          return JSONEncoder.encode(self, {'orig': o, 'attrs': vars(o)})
        else:
          return JSONEncoder.encode(self, o)
    

    and then if you want to patch it into json.dumps it looks from http://docs.buildbot.net/latest/reference/json-pysrc.html like you'll need to replace json._default_encoder with an instance of MyEncoder.

    0 讨论(0)
  • 2020-12-02 10:09

    The answer by FastTurtle might be a much cleaner solution.

    Here's something close to what you want based on the technique as explained in my question/answer: Overriding nested JSON encoding of inherited default supported objects like dict, list

    import json
    import datetime
    
    
    class mDict(dict):
        pass
    
    
    class mList(list):
        pass
    
    
    class JsonDebugEncoder(json.JSONEncoder):
        def _iterencode(self, o, markers=None):
            if isinstance(o, mDict):
                yield '{"__mDict__": '
                # Encode dictionary
                yield '{"orig": '
                for chunk in super(JsonDebugEncoder, self)._iterencode(o, markers):
                    yield chunk
                yield ', '
                # / End of Encode dictionary
                # Encode attributes
                yield '"attr": '
                for key, value in o.__dict__.iteritems():
                    yield '{"' + key + '": '
                    for chunk in super(JsonDebugEncoder, self)._iterencode(value, markers):
                        yield chunk
                    yield '}'
                yield '}'
                # / End of Encode attributes
                yield '}'
            elif isinstance(o, mList):
                yield '{"__mList__": '
                # Encode list
                yield '{"orig": '
                for chunk in super(JsonDebugEncoder, self)._iterencode(o, markers):
                    yield chunk
                yield ', '
                # / End of Encode list
                # Encode attributes
                yield '"attr": '
                for key, value in o.__dict__.iteritems():
                    yield '{"' + key + '": '
                    for chunk in super(JsonDebugEncoder, self)._iterencode(value, markers):
                        yield chunk
                    yield '}'
                yield '}'
                # / End of Encode attributes
                yield '}'
            else:
                for chunk in super(JsonDebugEncoder, self)._iterencode(o, markers=markers):
                    yield chunk
    
        def default(self, obj):
            if isinstance(obj, datetime.datetime):
                return obj.isoformat()
    
    
    class JsonDebugDecoder(json.JSONDecoder):
        def decode(self, s):
            obj = super(JsonDebugDecoder, self).decode(s)
            obj = self.recursiveObjectDecode(obj)
            return obj
    
        def recursiveObjectDecode(self, obj):
            if isinstance(obj, dict):
                decoders = [("__mList__", self.mListDecode),
                            ("__mDict__", self.mDictDecode)]
                for placeholder, decoder in decoders:
                    if placeholder in obj:                  # We assume it's supposed to be converted
                        return decoder(obj[placeholder])
                    else:
                        for k in obj:
                            obj[k] = self.recursiveObjectDecode(obj[k])
            elif isinstance(obj, list):
                for x in range(len(obj)):
                    obj[x] = self.recursiveObjectDecode(obj[x])
            return obj
    
        def mDictDecode(self, o):
            res = mDict()
            for key, value in o['orig'].iteritems():
                res[key] = self.recursiveObjectDecode(value)
            for key, value in o['attr'].iteritems():
                res.__dict__[key] = self.recursiveObjectDecode(value)
            return res
    
        def mListDecode(self, o):
            res = mList()
            for value in o['orig']:
                res.append(self.recursiveObjectDecode(value))
            for key, value in o['attr'].iteritems():
                res.__dict__[key] = self.recursiveObjectDecode(value)
            return res
    
    
    def test_debug_json():
        games = mList(['mario','contra','tetris'])
        games.src = 'console'
        scores = mDict({'dp':10,'pk':45})
        scores.processed = "unprocessed"
        test_json = { 'games' : games, 'scores' : scores ,'date': datetime.datetime.now() }
        jsonDump = json.dumps(test_json, cls=JsonDebugEncoder)
        print jsonDump
        test_pyObject = json.loads(jsonDump, cls=JsonDebugDecoder)
        print test_pyObject
    
    if __name__ == '__main__':
        test_debug_json()
    

    This results in:

    {"date": "2013-05-06T22:28:08.967000", "games": {"__mList__": {"orig": ["mario", "contra", "tetris"], "attr": {"src": "console"}}}, "scores": {"__mDict__": {"orig": {"pk": 45, "dp": 10}, "attr": {"processed": "unprocessed"}}}}
    

    This way you can encode it and decode it back to the python object it came from.

    EDIT:

    Here's a version that actually encodes it to the output you wanted and can decode it as well. Whenever a dictionary contains 'orig' and 'attr' it will check if 'orig' contains a dictionary or a list, if so it will respectively convert the object back to the mDict or mList.

    import json
    import datetime
    
    
    class mDict(dict):
        pass
    
    
    class mList(list):
        pass
    
    
    class JsonDebugEncoder(json.JSONEncoder):
        def _iterencode(self, o, markers=None):
            if isinstance(o, mDict):    # Encode mDict
                yield '{"orig": '
                for chunk in super(JsonDebugEncoder, self)._iterencode(o, markers):
                    yield chunk
                yield ', '
                yield '"attr": '
                for key, value in o.__dict__.iteritems():
                    yield '{"' + key + '": '
                    for chunk in super(JsonDebugEncoder, self)._iterencode(value, markers):
                        yield chunk
                    yield '}'
                yield '}'
                # / End of Encode attributes
            elif isinstance(o, mList):    # Encode mList
                yield '{"orig": '
                for chunk in super(JsonDebugEncoder, self)._iterencode(o, markers):
                    yield chunk
                yield ', '
                yield '"attr": '
                for key, value in o.__dict__.iteritems():
                    yield '{"' + key + '": '
                    for chunk in super(JsonDebugEncoder, self)._iterencode(value, markers):
                        yield chunk
                    yield '}'
                yield '}'
            else:
                for chunk in super(JsonDebugEncoder, self)._iterencode(o, markers=markers):
                    yield chunk
    
        def default(self, obj):
            if isinstance(obj, datetime.datetime):    # Encode datetime
                return obj.isoformat()
    
    
    class JsonDebugDecoder(json.JSONDecoder):
        def decode(self, s):
            obj = super(JsonDebugDecoder, self).decode(s)
            obj = self.recursiveObjectDecode(obj)
            return obj
    
        def recursiveObjectDecode(self, obj):
            if isinstance(obj, dict):
                if "orig" in obj and "attr" in obj and isinstance(obj["orig"], list):
                    return self.mListDecode(obj)
                elif "orig" in obj and "attr" in obj and isinstance(obj['orig'], dict):
                    return self.mDictDecode(obj)
                else:
                    for k in obj:
                        obj[k] = self.recursiveObjectDecode(obj[k])
            elif isinstance(obj, list):
                for x in range(len(obj)):
                    obj[x] = self.recursiveObjectDecode(obj[x])
            return obj
    
        def mDictDecode(self, o):
            res = mDict()
            for key, value in o['orig'].iteritems():
                res[key] = self.recursiveObjectDecode(value)
            for key, value in o['attr'].iteritems():
                res.__dict__[key] = self.recursiveObjectDecode(value)
            return res
    
        def mListDecode(self, o):
            res = mList()
            for value in o['orig']:
                res.append(self.recursiveObjectDecode(value))
            for key, value in o['attr'].iteritems():
                res.__dict__[key] = self.recursiveObjectDecode(value)
            return res
    
    
    def test_debug_json():
        games = mList(['mario','contra','tetris'])
        games.src = 'console'
        scores = mDict({'dp':10,'pk':45})
        scores.processed = "unprocessed"
        test_json = { 'games' : games, 'scores' : scores ,'date': datetime.datetime.now() }
        jsonDump = json.dumps(test_json, cls=JsonDebugEncoder)
        print jsonDump
        test_pyObject = json.loads(jsonDump, cls=JsonDebugDecoder)
        print test_pyObject
        print test_pyObject['games'].src
    
    if __name__ == '__main__':
        test_debug_json()
    

    Here's some more info about the output:

    # Encoded
    {"date": "2013-05-06T22:41:35.498000", "games": {"orig": ["mario", "contra", "tetris"], "attr": {"src": "console"}}, "scores": {"orig": {"pk": 45, "dp": 10}, "attr": {"processed": "unprocessed"}}}
    
    # Decoded ('games' contains the mList with the src attribute and 'scores' contains the mDict processed attribute)
    # Note that printing the python objects doesn't directly show the processed and src attributes, as seen below.
    {u'date': u'2013-05-06T22:41:35.498000', u'games': [u'mario', u'contra', u'tetris'], u'scores': {u'pk': 45, u'dp': 10}}
    

    Sorry for any bad naming conventions, it's a quick setup. ;)

    Note: The datetime doesn't get decoded back to the python representation. Implementing that could be done by checking for any dict key that is called 'date' and contains a valid string representation of a datetime.

    0 讨论(0)
  • 2020-12-02 10:09

    Why can't you just create a new object type to pass to the encoder? Try:

    class MStuff(object):
        def __init__(self, content):
            self.content = content
    
    class mDict(MStuff):
        pass
    
    class mList(MStuff):
        pass
    
    def json_debug_handler(obj):
        print("object received:")
        print(type(obj))
        print("\n\n")
        if  isinstance(obj, datetime.datetime):
            return obj.isoformat()
        elif isinstance(obj,MStuff):
            attrs = {}
            for key in obj.__dict__:
                if not ( key.startswith("_") or key == "content"):
                    attrs[key] = obj.__dict__[key]
    
            return {'orig':obj.content , 'attrs': attrs}
        else:
            return None
    

    You could add validation on the mDict and mList if desired.

    0 讨论(0)
  • 2020-12-02 10:10

    Along the lines of FastTurtle's suggestion, but requiring somewhat less code and much deeper monkeying, you can override isinstance itself, globally. This is probably Not A Good Idea, and may well break something. But it does work, in that it produces your required output, and it's quite simple.

    First, before json is imported anywhere, monkey-patch the builtins module to replace isinstance with one that lies, just a little bit, and only in a specific context:

    _original_isinstance = isinstance
    
    def _isinstance(obj, class_or_tuple):
        if '_make_iterencode' in globals():
            if not _original_isinstance(class_or_tuple, tuple):
                class_or_tuple = (class_or_tuple,)
            for custom in mList, mDict:
                if _original_isinstance(obj, custom):
                    return custom in class_or_tuple
        return _original_isinstance(obj, class_or_tuple)
    
    try:
        import builtins # Python 3
    except ImportError:
        import __builtin__ as builtins # Python 2
    builtins.isinstance = _isinstance
    

    Then, create your custom encoder, implementing your custom serialization and forcing the use of _make_iterencode (since the c version won't be affected by the monkeypatching):

    class CustomEncoder(json.JSONEncoder):
        def iterencode(self, o, _one_shot = False):
            return super(CustomEncoder, self).iterencode(o, _one_shot=False)
    
        def default(self, obj):
            if isinstance(obj, datetime.datetime):
                return obj.isoformat()
            elif isinstance(obj,mDict):
                return {'orig':dict(obj) , 'attrs': vars(obj)}
            elif isinstance(obj,mList):
                return {'orig':list(obj), 'attrs': vars(obj)}
            else:
                return None
    

    And that's really all there is to it! Output from Python 3 and Python 2 below.

    Python 3.6.3 (default, Oct 10 2017, 21:06:48)
    ...
    >>> from test import test_debug_json
    >>> test_debug_json()
    {"games": {"orig": ["mario", "contra", "tetris"], "attrs": {"src": "console"}}, "scores": {"orig": {"dp": 10, "pk": 45}, "attrs": {"processed": "unprocessed"}}, "date": "2018-01-27T13:56:15.666655"}
    
    Python 2.7.13 (default, May  9 2017, 12:06:13)
    ...
    >>> from test import test_debug_json
    >>> test_debug_json()
    {"date": "2018-01-27T13:57:04.681664", "games": {"attrs": {"src": "console"}, "orig": ["mario", "contra", "tetris"]}, "scores": {"attrs": {"processed": "unprocessed"}, "orig": {"pk": 45, "dp": 10}}}
    
    0 讨论(0)
  • 2020-12-02 10:13

    If you are only looking for serialization and not deserialization then you can process the object before sending it to json.dumps. See below example

    import datetime
    import json
    
    
    def is_inherited_from(obj, objtype):
        return isinstance(obj, objtype) and not type(obj).__mro__[0] == objtype
    
    
    def process_object(data):
        if isinstance(data, list):
            if is_inherited_from(data, list):
                return process_object({"orig": list(data), "attrs": vars(data)})
            new_data = []
            for d in data:
                new_data.append(process_object(d))
        elif isinstance(data, tuple):
            if is_inherited_from(data, tuple):
                return process_object({"orig": tuple(data), "attrs": vars(data)})
            new_data = []
            for d in data:
                new_data.append(process_object(d))
            return tuple(new_data)
        elif isinstance(data, dict):
            if is_inherited_from(data, dict):
                return process_object({"orig": list(data), "attrs": vars(data)})
            new_data = {}
            for k, v in data.items():
                new_data[k] = process_object(v)
        else:
            return data
        return new_data
    
    
    def json_debug_handler(obj):
        print("object received:")
        print("\n\n")
        if isinstance(obj, datetime.datetime):
            return obj.isoformat()
    
    
    class mDict(dict):
        pass
    
    
    class mList(list):
        pass
    
    
    def test_debug_json():
        games = mList(['mario', 'contra', 'tetris'])
        games.src = 'console'
        scores = mDict({'dp': 10, 'pk': 45})
        scores.processed = "unprocessed"
        test_json = {'games': games, 'scores': scores, 'date': datetime.datetime.now()}
        new_object = process_object(test_json)
        print(json.dumps(new_object, default=json_debug_handler))
    
    
    if __name__ == '__main__':
        test_debug_json()
    

    The output of the same is

    {"games": {"orig": ["mario", "contra", "tetris"], "attrs": {"src": "console"}}, "scores": {"orig": ["dp", "pk"], "attrs": {"processed": "unprocessed"}}, "date": "2018-01-24T12:59:36.581689"}

    It is also possible to override the JSONEncoder, but since it uses nested methods, it would be complex and require techniques discussed in below

    Can you patch *just* a nested function with closure, or must the whole outer function be repeated?

    Since you want to keep things simple, I would not suggest going that route

    0 讨论(0)
  • 2020-12-02 10:13

    I try to change the default resolver priority and change the default iterator outputs to achieve your purposes.

    1. change the default resolver priority, executed precede all standard type verifying:

      Inherits the json.JSONEncoder and overrides the iterencode() method.

      All values should be wrapped by ValueWrapper type, avoid the values are resolved by default standard resolvers.

    2. change the default iterator output;

      Implement three custom wrapper classes ValueWrapper, ListWrapper, and DictWrapper. The ListWrapper implement __iter__() and the DictWrapper implement __iter__(), items() and iteritems().

    import datetime
    import json
    
    class DebugJsonEncoder(json.JSONEncoder):
        def iterencode(self, o, _one_shot=False):
            default_resolver = self.default
            # Rewrites the default resolve, self.default(), with the custom resolver.
            # It will process the Wrapper classes
            def _resolve(o):
                if isinstance(o, ValueWrapper):
                    # Calls custom resolver precede others. Due to the _make_iterencode()
                    # call the custom resolver following by all standard type verifying 
                    # failed. But we want custom resolver can be executed by all standard 
                    # verifying.
                    # see https://github.com/python/cpython/blob/2.7/Lib/json/encoder.py#L442
                    result = default_resolver(o.data)
                    if (o.data is not None) and (result is not None):
                        return result
                    elif isinstance(o.data, (list, tuple)):
                        return ListWrapper(o.data)
                    elif isinstance(o.data, dict):
                        return DictWrapper(o.data)
                    else:
                        return o.data
                else:
                    return default_resolver(o)
    
            # re-assign the default resolver self.default with custom resolver.
            # see https://github.com/python/cpython/blob/2.7/Lib/json/encoder.py#L161
            self.default = _resolve
            # The input value must be wrapped by ValueWrapper, avoid the values are 
            # resolved by the standard resolvers.
            # The last one arguemnt _one_shot must be False, we want to encode with
            # _make_iterencode().
            # see https://github.com/python/cpython/blob/2.7/Lib/json/encoder.py#L259
            return json.JSONEncoder.iterencode(self, _resolve(ValueWrapper(o)), False)
    
    
    class ValueWrapper():
        """
        a wrapper wrapped the given object
        """
    
        def __init__(self, o):
            self.data = o
    
    class ListWrapper(ValueWrapper, list):
        """
        a wrapper wrapped the given list
        """
    
        def __init__(self, o):
            ValueWrapper.__init__(self, o)
    
        # see https://github.com/python/cpython/blob/2.7/Lib/json/encoder.py#L307
        def __iter__(self):
            for chunk in self.data:
                yield ValueWrapper(chunk)
    
    class DictWrapper(ValueWrapper, dict):
        """
        a wrapper wrapped the given dict
        """
    
        def __init__(self, d):
            dict.__init__(self, d)
    
        def __iter__(self):
            for key, value in dict.items(self):
                yield key, ValueWrapper(value)
    
        # see https://github.com/python/cpython/blob/2.7/Lib/json/encoder.py#L361
        def items(self):
            for key, value in dict.items(self):
                yield key, ValueWrapper(value)
    
        # see https://github.com/python/cpython/blob/2.7/Lib/json/encoder.py#L363
        def iteritems(self):
            for key, value in dict.iteritems(self):
                yield key, ValueWrapper(value)
    
    
    def json_debug_handler(obj):
        print("object received:")
        print type(obj)
        print("\n\n")
        if  isinstance(obj, datetime.datetime):
            return obj.isoformat()
        elif isinstance(obj,mDict):
            return {'orig':obj , 'attrs': vars(obj)}
        elif isinstance(obj,mList):
            return {'orig':obj, 'attrs': vars(obj)}
        else:
            return None
    
    
    class mDict(dict):
        pass
    
    class mList(list):
        pass
    
    
    def test_debug_json():
        games = mList(['mario','contra','tetris'])
        games.src = 'console'
        scores = mDict({'dp':10,'pk':45})
        scores.processed = "unprocessed"
        test_json = { 'games' : games , 'scores' : scores , 'date': datetime.datetime.now(), 'default': None}
        print(json.dumps(test_json,cls=DebugJsonEncoder,default=json_debug_handler))
    
    if __name__ == '__main__':
        test_debug_json()
    
    0 讨论(0)
提交回复
热议问题