python序列化模块的速度比较
# -*- coding: utf-8 -*-
# @Time : 2019-04-01 17:41
# @Author : cxa
# @File : dictest.py
# @Software: PyCharm
import time
import pickle
import marshal
import ujson
import msgpack
def test(data, method):
if method == 'pickle':
dumps = pickle.dumps
loads = pickle.loads
elif method == 'marshal':
dumps = marshal.dumps
loads = marshal.loads
elif method == 'ujson':
dumps = ujson.dumps
loads = ujson.loads
elif method == "msgpack":
dumps = msgpack.dumps
loads = msgpack.loads
b = time.time()
s = ''
loop = 10000
for i in range(loop):
s = dumps(data)
print('{} dumps time cost: {}'.format(method, time.time() - b))
b = time.time()
for i in range(loop):
loads(s)
print('{} loads time cost: {}'.format(method, time.time() - b))
def main():
# generate test data
data = {}
count = 80
for i in range(10000):
k = '%05d' % (i % count)
if k in data:
data[k].append(i / count)
else:
data[k] = [i / count]
print('data:', len(data))
# test
test(data, 'pickle')
test(data, 'marshal')
test(data, 'ujson')
test(data, 'msgpack')
if __name__ == '__main__':
main()
结果
pickle dumps time cost: 2.6049489974975586
pickle loads time cost: 3.6289799213409424
marshal dumps time cost: 2.2469120025634766
marshal loads time cost: 2.907557964324951
ujson dumps time cost: 8.108527183532715
ujson loads time cost: 4.873885869979858
msgpack dumps time cost: 2.671412944793701
msgpack loads time cost: 3.2492690086364746
注意:
千万不要对不信任的数据使用pickle.load()。
pickle在加载时有一个副作用就是它会自动加载相应模块并构造实例对象。
但是某个坏人如果知道pickle的工作原理,
他就可以创建一个恶意的数据导致Python执行随意指定的系统命令。
因此,一定要保证pickle只在相互之间可以认证对方的解析器的内部使用。