python json.dumps 详解,TypeError: Object of type 'xxx' is not JSON serializable
一、原始问题
1.执行如下代码
import json def test_dumps(): data={"keys":"string",1:[2,3],"dict":{"a":"b"},"key_bytes":b'123'} ans=json.dumps(data) print(ans) if __name__ == "__main__": test_dumps()
2.对于如上代码,我们会遇到如下错误
Traceback (most recent call last): File "test_dumps.py", line 8, in <module> test_dumps() File "test_dumps.py", line 4, in test_dumps ans=json.dumps(data) File "/usr/lib/python3.6/json/__init__.py", line 231, in dumps return _default_encoder.encode(obj) File "/usr/lib/python3.6/json/encoder.py", line 199, in encode chunks = self.iterencode(o, _one_shot=True) File "/usr/lib/python3.6/json/encoder.py", line 257, in iterencode return _iterencode(o, 0) File "/usr/lib/python3.6/json/encoder.py", line 180, in default o.__class__.__name__) TypeError: Object of type 'bytes' is not JSON serializable
二、代码追踪
1.dumps函数
针对以上问题,我们一步一步看源码,进入到json.dumps源码,可以看到如下内容,这里删除了源码中的注释。可以看到是通过JSONEncoder这个类的encode方法来编码输入的obj数据
def dumps(obj, *, skipkeys=False, ensure_ascii=True, check_circular=True, allow_nan=True, cls=None, indent=None, separators=None, default=None, sort_keys=False, **kw): # cached encoder if (not skipkeys and ensure_ascii and check_circular and allow_nan and cls is None and indent is None and separators is None and default is None and not sort_keys and not kw): return _default_encoder.encode(obj) if cls is None: cls = JSONEncoder return cls( skipkeys=skipkeys, ensure_ascii=ensure_ascii, check_circular=check_circular, allow_nan=allow_nan, indent=indent, separators=separators, default=default, sort_keys=sort_keys, **kw).encode(obj)
2.encode函数实现
再次进入到encode中,如果数据o是字符串,则有两种编码方式。
一种(encode_basestring_ascii)是使用ascii码表示,这种是会把中文字符自动转化为unicode,然后在其他语言时会把Unicode解析为字符,而非一个中文字符。比如会把"中"转化为"\u4e2d",而其他语言会讲这个看为6个字符,而非"中"
另一种(encode_basestring)是编码为二进制。
在这个函数中,主要是利用self.iterencode这个方法处理数据。
def encode(self, o): # This is for extremely simple cases and benchmarks. if isinstance(o, str): if self.ensure_ascii: return encode_basestring_ascii(o) else: return encode_basestring(o) chunks = self.iterencode(o, _one_shot=True) if not isinstance(chunks, (list, tuple)): chunks = list(chunks) return ''.join(chunks)
3.核心处理函数_make_iterencode,其中包含了可扩展的_default
真正要编码的数据的部分是如下,可以看到在_iterencode中各个if条件处理数据,其中dict和list又单独写了一个函数处理,而如果数据不在if中,会调用_default处理。
def _make_iterencode(markers, _default, _encoder, _indent, _floatstr, _key_separator, _item_separator, _sort_keys, _skipkeys, _one_shot, ## HACK: hand-optimized bytecode; turn globals into locals ValueError=ValueError, dict=dict, float=float, id=id, int=int, isinstance=isinstance, list=list, str=str, tuple=tuple, _intstr=int.__str__, ): if _indent is not None and not isinstance(_indent, str): _indent = ' ' * _indent def _iterencode_list(lst, _current_indent_level): #只给出函数定义,具体方法这里不列出 def _iterencode_dict(dct, _current_indent_level): #只给出函数定义,具体方法这里不列出 def _iterencode(o, _current_indent_level): if isinstance(o, str): yield _encoder(o) elif o is None: yield 'null' elif o is True: yield 'true' elif o is False: yield 'false' elif isinstance(o, int): # see comment for int/float in _make_iterencode yield _intstr(o) elif isinstance(o, float): # see comment for int/float in _make_iterencode yield _floatstr(o) elif isinstance(o, (list, tuple)): yield from _iterencode_list(o, _current_indent_level) elif isinstance(o, dict): yield from _iterencode_dict(o, _current_indent_level) else: if markers is not None: markerid = id(o) if markerid in markers: raise ValueError("Circular reference detected") markers[markerid] = o o = _default(o) yield from _iterencode(o, _current_indent_level) if markers is not None: del markers[markerid] return _iterencode
三、解决问题
再次回到开始的问题,我们需要重写json.JSONEncoder中的default函数,这个default函数就是上述提到的_default函数,在default中添加处理bytes类型,修改后代码如下。
import json import numpy as np class Encoder(json.JSONEncoder): def default(self, obj): if isinstance(obj, np.ndarray): return obj.tolist() elif isinstance(obj, bytes): return str(obj, encoding='utf-8') return json.JSONEncoder.default(self, obj) def test_dumps(): data={"keys":"string",1:[2,3],"dict":{"a":"b"},"key_bytes":b'123'} ans=json.dumps(data,cls=Encoder) print(ans) if __name__ == "__main__": test_dumps()
再次运行,可以获得如下结果,成功解决问题
{"keys": "string", "1": [2, 3], "dict": {"a": "b"}, "key_bytes": "123"}