It is easy to change the format of an object which is not JSON serializable eg datetime.datetime.
My requirement, for debugging purposes, is to alter the way some cu
You should be able to override JSONEncoder.encode():
class MyEncoder(JSONEncoder):
def encode(self, o):
if isinstance(o, dict):
# directly call JSONEncoder rather than infinite-looping through self.encode()
return JSONEncoder.encode(self, {'orig': o, 'attrs': vars(o)})
elif isinstance(o, list):
return JSONEncoder.encode(self, {'orig': o, 'attrs': vars(o)})
else:
return JSONEncoder.encode(self, o)
and then if you want to patch it into json.dumps
it looks from http://docs.buildbot.net/latest/reference/json-pysrc.html like you'll need to replace json._default_encoder
with an instance of MyEncoder
.
The answer by FastTurtle might be a much cleaner solution.
Here's something close to what you want based on the technique as explained in my question/answer: Overriding nested JSON encoding of inherited default supported objects like dict, list
import json
import datetime
class mDict(dict):
pass
class mList(list):
pass
class JsonDebugEncoder(json.JSONEncoder):
def _iterencode(self, o, markers=None):
if isinstance(o, mDict):
yield '{"__mDict__": '
# Encode dictionary
yield '{"orig": '
for chunk in super(JsonDebugEncoder, self)._iterencode(o, markers):
yield chunk
yield ', '
# / End of Encode dictionary
# Encode attributes
yield '"attr": '
for key, value in o.__dict__.iteritems():
yield '{"' + key + '": '
for chunk in super(JsonDebugEncoder, self)._iterencode(value, markers):
yield chunk
yield '}'
yield '}'
# / End of Encode attributes
yield '}'
elif isinstance(o, mList):
yield '{"__mList__": '
# Encode list
yield '{"orig": '
for chunk in super(JsonDebugEncoder, self)._iterencode(o, markers):
yield chunk
yield ', '
# / End of Encode list
# Encode attributes
yield '"attr": '
for key, value in o.__dict__.iteritems():
yield '{"' + key + '": '
for chunk in super(JsonDebugEncoder, self)._iterencode(value, markers):
yield chunk
yield '}'
yield '}'
# / End of Encode attributes
yield '}'
else:
for chunk in super(JsonDebugEncoder, self)._iterencode(o, markers=markers):
yield chunk
def default(self, obj):
if isinstance(obj, datetime.datetime):
return obj.isoformat()
class JsonDebugDecoder(json.JSONDecoder):
def decode(self, s):
obj = super(JsonDebugDecoder, self).decode(s)
obj = self.recursiveObjectDecode(obj)
return obj
def recursiveObjectDecode(self, obj):
if isinstance(obj, dict):
decoders = [("__mList__", self.mListDecode),
("__mDict__", self.mDictDecode)]
for placeholder, decoder in decoders:
if placeholder in obj: # We assume it's supposed to be converted
return decoder(obj[placeholder])
else:
for k in obj:
obj[k] = self.recursiveObjectDecode(obj[k])
elif isinstance(obj, list):
for x in range(len(obj)):
obj[x] = self.recursiveObjectDecode(obj[x])
return obj
def mDictDecode(self, o):
res = mDict()
for key, value in o['orig'].iteritems():
res[key] = self.recursiveObjectDecode(value)
for key, value in o['attr'].iteritems():
res.__dict__[key] = self.recursiveObjectDecode(value)
return res
def mListDecode(self, o):
res = mList()
for value in o['orig']:
res.append(self.recursiveObjectDecode(value))
for key, value in o['attr'].iteritems():
res.__dict__[key] = self.recursiveObjectDecode(value)
return res
def test_debug_json():
games = mList(['mario','contra','tetris'])
games.src = 'console'
scores = mDict({'dp':10,'pk':45})
scores.processed = "unprocessed"
test_json = { 'games' : games, 'scores' : scores ,'date': datetime.datetime.now() }
jsonDump = json.dumps(test_json, cls=JsonDebugEncoder)
print jsonDump
test_pyObject = json.loads(jsonDump, cls=JsonDebugDecoder)
print test_pyObject
if __name__ == '__main__':
test_debug_json()
This results in:
{"date": "2013-05-06T22:28:08.967000", "games": {"__mList__": {"orig": ["mario", "contra", "tetris"], "attr": {"src": "console"}}}, "scores": {"__mDict__": {"orig": {"pk": 45, "dp": 10}, "attr": {"processed": "unprocessed"}}}}
This way you can encode it and decode it back to the python object it came from.
EDIT:
Here's a version that actually encodes it to the output you wanted and can decode it as well. Whenever a dictionary contains 'orig' and 'attr' it will check if 'orig' contains a dictionary or a list, if so it will respectively convert the object back to the mDict or mList.
import json
import datetime
class mDict(dict):
pass
class mList(list):
pass
class JsonDebugEncoder(json.JSONEncoder):
def _iterencode(self, o, markers=None):
if isinstance(o, mDict): # Encode mDict
yield '{"orig": '
for chunk in super(JsonDebugEncoder, self)._iterencode(o, markers):
yield chunk
yield ', '
yield '"attr": '
for key, value in o.__dict__.iteritems():
yield '{"' + key + '": '
for chunk in super(JsonDebugEncoder, self)._iterencode(value, markers):
yield chunk
yield '}'
yield '}'
# / End of Encode attributes
elif isinstance(o, mList): # Encode mList
yield '{"orig": '
for chunk in super(JsonDebugEncoder, self)._iterencode(o, markers):
yield chunk
yield ', '
yield '"attr": '
for key, value in o.__dict__.iteritems():
yield '{"' + key + '": '
for chunk in super(JsonDebugEncoder, self)._iterencode(value, markers):
yield chunk
yield '}'
yield '}'
else:
for chunk in super(JsonDebugEncoder, self)._iterencode(o, markers=markers):
yield chunk
def default(self, obj):
if isinstance(obj, datetime.datetime): # Encode datetime
return obj.isoformat()
class JsonDebugDecoder(json.JSONDecoder):
def decode(self, s):
obj = super(JsonDebugDecoder, self).decode(s)
obj = self.recursiveObjectDecode(obj)
return obj
def recursiveObjectDecode(self, obj):
if isinstance(obj, dict):
if "orig" in obj and "attr" in obj and isinstance(obj["orig"], list):
return self.mListDecode(obj)
elif "orig" in obj and "attr" in obj and isinstance(obj['orig'], dict):
return self.mDictDecode(obj)
else:
for k in obj:
obj[k] = self.recursiveObjectDecode(obj[k])
elif isinstance(obj, list):
for x in range(len(obj)):
obj[x] = self.recursiveObjectDecode(obj[x])
return obj
def mDictDecode(self, o):
res = mDict()
for key, value in o['orig'].iteritems():
res[key] = self.recursiveObjectDecode(value)
for key, value in o['attr'].iteritems():
res.__dict__[key] = self.recursiveObjectDecode(value)
return res
def mListDecode(self, o):
res = mList()
for value in o['orig']:
res.append(self.recursiveObjectDecode(value))
for key, value in o['attr'].iteritems():
res.__dict__[key] = self.recursiveObjectDecode(value)
return res
def test_debug_json():
games = mList(['mario','contra','tetris'])
games.src = 'console'
scores = mDict({'dp':10,'pk':45})
scores.processed = "unprocessed"
test_json = { 'games' : games, 'scores' : scores ,'date': datetime.datetime.now() }
jsonDump = json.dumps(test_json, cls=JsonDebugEncoder)
print jsonDump
test_pyObject = json.loads(jsonDump, cls=JsonDebugDecoder)
print test_pyObject
print test_pyObject['games'].src
if __name__ == '__main__':
test_debug_json()
Here's some more info about the output:
# Encoded
{"date": "2013-05-06T22:41:35.498000", "games": {"orig": ["mario", "contra", "tetris"], "attr": {"src": "console"}}, "scores": {"orig": {"pk": 45, "dp": 10}, "attr": {"processed": "unprocessed"}}}
# Decoded ('games' contains the mList with the src attribute and 'scores' contains the mDict processed attribute)
# Note that printing the python objects doesn't directly show the processed and src attributes, as seen below.
{u'date': u'2013-05-06T22:41:35.498000', u'games': [u'mario', u'contra', u'tetris'], u'scores': {u'pk': 45, u'dp': 10}}
Sorry for any bad naming conventions, it's a quick setup. ;)
Note: The datetime doesn't get decoded back to the python representation. Implementing that could be done by checking for any dict key that is called 'date' and contains a valid string representation of a datetime.
Why can't you just create a new object type to pass to the encoder? Try:
class MStuff(object):
def __init__(self, content):
self.content = content
class mDict(MStuff):
pass
class mList(MStuff):
pass
def json_debug_handler(obj):
print("object received:")
print(type(obj))
print("\n\n")
if isinstance(obj, datetime.datetime):
return obj.isoformat()
elif isinstance(obj,MStuff):
attrs = {}
for key in obj.__dict__:
if not ( key.startswith("_") or key == "content"):
attrs[key] = obj.__dict__[key]
return {'orig':obj.content , 'attrs': attrs}
else:
return None
You could add validation on the mDict and mList if desired.
Along the lines of FastTurtle's suggestion, but requiring somewhat less code and much deeper monkeying, you can override isinstance
itself, globally. This is probably Not A Good Idea, and may well break something. But it does work, in that it produces your required output, and it's quite simple.
First, before json is imported anywhere, monkey-patch the builtins module to replace isinstance
with one that lies, just a little bit, and only in a specific context:
_original_isinstance = isinstance
def _isinstance(obj, class_or_tuple):
if '_make_iterencode' in globals():
if not _original_isinstance(class_or_tuple, tuple):
class_or_tuple = (class_or_tuple,)
for custom in mList, mDict:
if _original_isinstance(obj, custom):
return custom in class_or_tuple
return _original_isinstance(obj, class_or_tuple)
try:
import builtins # Python 3
except ImportError:
import __builtin__ as builtins # Python 2
builtins.isinstance = _isinstance
Then, create your custom encoder, implementing your custom serialization and forcing the use of _make_iterencode
(since the c version won't be affected by the monkeypatching):
class CustomEncoder(json.JSONEncoder):
def iterencode(self, o, _one_shot = False):
return super(CustomEncoder, self).iterencode(o, _one_shot=False)
def default(self, obj):
if isinstance(obj, datetime.datetime):
return obj.isoformat()
elif isinstance(obj,mDict):
return {'orig':dict(obj) , 'attrs': vars(obj)}
elif isinstance(obj,mList):
return {'orig':list(obj), 'attrs': vars(obj)}
else:
return None
And that's really all there is to it! Output from Python 3 and Python 2 below.
Python 3.6.3 (default, Oct 10 2017, 21:06:48)
...
>>> from test import test_debug_json
>>> test_debug_json()
{"games": {"orig": ["mario", "contra", "tetris"], "attrs": {"src": "console"}}, "scores": {"orig": {"dp": 10, "pk": 45}, "attrs": {"processed": "unprocessed"}}, "date": "2018-01-27T13:56:15.666655"}
Python 2.7.13 (default, May 9 2017, 12:06:13)
...
>>> from test import test_debug_json
>>> test_debug_json()
{"date": "2018-01-27T13:57:04.681664", "games": {"attrs": {"src": "console"}, "orig": ["mario", "contra", "tetris"]}, "scores": {"attrs": {"processed": "unprocessed"}, "orig": {"pk": 45, "dp": 10}}}
If you are only looking for serialization and not deserialization then you can process the object before sending it to json.dumps
. See below example
import datetime
import json
def is_inherited_from(obj, objtype):
return isinstance(obj, objtype) and not type(obj).__mro__[0] == objtype
def process_object(data):
if isinstance(data, list):
if is_inherited_from(data, list):
return process_object({"orig": list(data), "attrs": vars(data)})
new_data = []
for d in data:
new_data.append(process_object(d))
elif isinstance(data, tuple):
if is_inherited_from(data, tuple):
return process_object({"orig": tuple(data), "attrs": vars(data)})
new_data = []
for d in data:
new_data.append(process_object(d))
return tuple(new_data)
elif isinstance(data, dict):
if is_inherited_from(data, dict):
return process_object({"orig": list(data), "attrs": vars(data)})
new_data = {}
for k, v in data.items():
new_data[k] = process_object(v)
else:
return data
return new_data
def json_debug_handler(obj):
print("object received:")
print("\n\n")
if isinstance(obj, datetime.datetime):
return obj.isoformat()
class mDict(dict):
pass
class mList(list):
pass
def test_debug_json():
games = mList(['mario', 'contra', 'tetris'])
games.src = 'console'
scores = mDict({'dp': 10, 'pk': 45})
scores.processed = "unprocessed"
test_json = {'games': games, 'scores': scores, 'date': datetime.datetime.now()}
new_object = process_object(test_json)
print(json.dumps(new_object, default=json_debug_handler))
if __name__ == '__main__':
test_debug_json()
The output of the same is
{"games": {"orig": ["mario", "contra", "tetris"], "attrs": {"src": "console"}}, "scores": {"orig": ["dp", "pk"], "attrs": {"processed": "unprocessed"}}, "date": "2018-01-24T12:59:36.581689"}
It is also possible to override the JSONEncoder, but since it uses nested methods, it would be complex and require techniques discussed in below
Can you patch *just* a nested function with closure, or must the whole outer function be repeated?
Since you want to keep things simple, I would not suggest going that route
I try to change the default resolver priority and change the default iterator outputs to achieve your purposes.
change the default resolver priority, executed precede all standard type verifying:
Inherits the json.JSONEncoder and overrides the
iterencode()
method.All values should be wrapped by ValueWrapper type, avoid the values are resolved by default standard resolvers.
change the default iterator output;
Implement three custom wrapper classes ValueWrapper, ListWrapper, and DictWrapper. The ListWrapper implement
__iter__()
and the DictWrapper implement__iter__()
,items()
anditeritems()
.
import datetime
import json
class DebugJsonEncoder(json.JSONEncoder):
def iterencode(self, o, _one_shot=False):
default_resolver = self.default
# Rewrites the default resolve, self.default(), with the custom resolver.
# It will process the Wrapper classes
def _resolve(o):
if isinstance(o, ValueWrapper):
# Calls custom resolver precede others. Due to the _make_iterencode()
# call the custom resolver following by all standard type verifying
# failed. But we want custom resolver can be executed by all standard
# verifying.
# see https://github.com/python/cpython/blob/2.7/Lib/json/encoder.py#L442
result = default_resolver(o.data)
if (o.data is not None) and (result is not None):
return result
elif isinstance(o.data, (list, tuple)):
return ListWrapper(o.data)
elif isinstance(o.data, dict):
return DictWrapper(o.data)
else:
return o.data
else:
return default_resolver(o)
# re-assign the default resolver self.default with custom resolver.
# see https://github.com/python/cpython/blob/2.7/Lib/json/encoder.py#L161
self.default = _resolve
# The input value must be wrapped by ValueWrapper, avoid the values are
# resolved by the standard resolvers.
# The last one arguemnt _one_shot must be False, we want to encode with
# _make_iterencode().
# see https://github.com/python/cpython/blob/2.7/Lib/json/encoder.py#L259
return json.JSONEncoder.iterencode(self, _resolve(ValueWrapper(o)), False)
class ValueWrapper():
"""
a wrapper wrapped the given object
"""
def __init__(self, o):
self.data = o
class ListWrapper(ValueWrapper, list):
"""
a wrapper wrapped the given list
"""
def __init__(self, o):
ValueWrapper.__init__(self, o)
# see https://github.com/python/cpython/blob/2.7/Lib/json/encoder.py#L307
def __iter__(self):
for chunk in self.data:
yield ValueWrapper(chunk)
class DictWrapper(ValueWrapper, dict):
"""
a wrapper wrapped the given dict
"""
def __init__(self, d):
dict.__init__(self, d)
def __iter__(self):
for key, value in dict.items(self):
yield key, ValueWrapper(value)
# see https://github.com/python/cpython/blob/2.7/Lib/json/encoder.py#L361
def items(self):
for key, value in dict.items(self):
yield key, ValueWrapper(value)
# see https://github.com/python/cpython/blob/2.7/Lib/json/encoder.py#L363
def iteritems(self):
for key, value in dict.iteritems(self):
yield key, ValueWrapper(value)
def json_debug_handler(obj):
print("object received:")
print type(obj)
print("\n\n")
if isinstance(obj, datetime.datetime):
return obj.isoformat()
elif isinstance(obj,mDict):
return {'orig':obj , 'attrs': vars(obj)}
elif isinstance(obj,mList):
return {'orig':obj, 'attrs': vars(obj)}
else:
return None
class mDict(dict):
pass
class mList(list):
pass
def test_debug_json():
games = mList(['mario','contra','tetris'])
games.src = 'console'
scores = mDict({'dp':10,'pk':45})
scores.processed = "unprocessed"
test_json = { 'games' : games , 'scores' : scores , 'date': datetime.datetime.now(), 'default': None}
print(json.dumps(test_json,cls=DebugJsonEncoder,default=json_debug_handler))
if __name__ == '__main__':
test_debug_json()