python中 jsonchema 与 shema 效率比较

前面几篇文章总结了python中jsonschema与schema的用法,这里测试一下两者的效率:

上代码:

import time
from jsonschema import validate, draft7_format_checker
from jsonschema.exceptions import SchemaError, ValidationError
from schema import Schema, And, Optional, SchemaError, Regex


def tags_check(tags_list):
    if len(tags_list) < 1 or len(tags_list) > 5:
        return False
    for tag in tags_list:
        if len(tag) < 2:
            return False
    return True


def id_generator(start=1):
    while 1:
        yield start
        start += 1


class DataFactory(object):
    def __init__(self):
        self.id_g = id_generator()

    def create_data(self):
        idn = next(self.id_g)
        price = 5.5 + idn
        json_data = {
            "id": idn,
            "name": "jarvis手册%d" % idn,
            "info": "贾维斯平台使用手册%d" % idn,
            "price": price,
            "tags": ["jar"],
            "date": "2019-5-25",
            "others": {
                "info1": "1111",
                "info2": "2222"
            }
        }
        return json_data


schema1 = {
    "$schema": "http://json-schema.org/draft-07/schema#",
    "title": "book info",
    "description": "some information about book",
    "type": "object",
    "properties": {
        "id": {
            "description": "The unique identifier for a book",
            "type": "integer",
            "minimum": 1
        },
        "name": {
            "description": "book name",
            "type": "string",
            "minLength": 3,
            "maxLength": 30
        },
        "info": {
            "description": "simple information about book",
            "type": "string",
            "minLength": 10,
            "maxLength": 60
        },
        "price": {
            "description": "book price",
            "type": "number",
            "multipleOf": 0.5,
            "minimum": 5.0,
            "maximum": 111111.0,
        },
        "tags": {
            "type": "array",
            "additonalItems": {
                "type": "string",
                "miniLength": 2
            },
            "miniItems": 1,
            "maxItems": 5,
        },
        "date": {
            "description": "书籍出版日期",
            "type": "string",
            "format": "date",
        },
        "bookcoding": {
            "description": "书籍编码",
            "type": "string",
            "pattern": "^[A-Z]+[a-zA-Z0-9]{12}$"
        },
        "others": {
            "description": "其他信息",
            "type": "object",
            "properties": {
                "info1": {
                    "type": "string"
                },
                "info2": {
                    "type": "string"
                }
            }
        }
    },
    "required": [
        "id", "name", "info", "price", "tags"
    ]
}

schema2 = {
    "id": And(int, lambda x: 1 <= x, error="id必须是整数,大于等于100"),
    "name": And(str, lambda s: 3 <= len(s) <= 30, error="name长度3-10"),
    "info": And(str, lambda s: 10 <= len(s) <= 60, error="info信息出错"),
    "price": And(float, lambda x: (5.0 < x < 111111.0) and (x % 0.5 == 0), error="price必须是大于5.0小于111.0的小数,且能被0.5整除"),
    "tags": And(list, tags_check, error="tags出错"),
    Optional("date"): And(str, error="日期格式出错"),
    Optional("bookcoding"): And(str, Regex("^[A-Z]+[a-zA-Z0-9]{12}$"), error="书籍编码出错"),
    Optional("others"): {
        "info1": str,
        "info2": str
    },
}


def time_jsonschema(data):
    start_time = time.time()
    for json_data in data:
        try:
            validate(instance=json_data, schema=schema1, format_checker=draft7_format_checker)
        except SchemaError as e:
            print("验证模式出错:{}\n提示信息:{}".format(" --> ".join([i for i in e.path]), e.message))
        except ValidationError as e:
            print("出错字段:{}\n提示信息:{}".format(" --> ".join([i for i in e.path]), e.message))
        else:
            continue
    end_time = time.time()
    return end_time - start_time


def time_schema(data):
    start_time = time.time()
    for json_data in data:
        try:
            Schema(schema2).validate(json_data)
        except SchemaError as e:
            print(e)
        else:
            continue
    end_time = time.time()
    return end_time - start_time


if __name__ == "__main__":
    data = DataFactory()
    data_list = [data.create_data() for i in range(10000)]
    t1 = time_jsonschema(data_list)
    t2 = time_schema(data_list)
    print("jsonschema:schema = {}:{} = {}:1\n".format(t1, t2, t1/t2))


结果分析:

# 10条数据时:
jsonschema:schema = 0.012000083923339844:0.0019941329956054688 = 5.517694882831181:1
# 100条数据时:
jsonschema:schema = 0.10173273086547852:0.023936033248901367 = 4.180191742616664:1
# 1000条数据时:
jsonschema:schema = 0.9435069561004639:0.2263953685760498 = 4.127518805860752:1
# 10000条数据时:
jsonschema:schema = 9.319035053253174:2.2689626216888428 = 4.1371787451116295:1

数据在10条的时候,多次测验,最终结果不稳定,耗时比在6.0 ,5.5,3.6左右,波动较大。
数据在100条的时候,多次测验,最终结果比较稳定,耗时比在3.85—4.3之间
数据在1000条的时候,多次测验,最终结果的耗时比在4.0—4.2之间
数据在10000条的时候,由于每次测试时间都比较长,故测试数据相对比较少,但耗时比在4.1左右

试验次数不是很多,基于上面代码和测试数据,schema 效率比 jsonschema 大约高出 4倍

posted @ 2019-07-26 15:27  长安223  阅读(474)  评论(0编辑  收藏  举报