in运算符的性能测试

# ------------------------------------in 运算符的性能测试------------------------------------
def save_data():
    # 生成由不同的浮点数组成的数组,然后写入文件,以供使用
    import random
    from array import array

    MAX_EXPONENT = 7
    HAYSTACK_LEN = 10 ** MAX_EXPONENT
    NEEDLES_LEN = 10 ** (MAX_EXPONENT - 1)
    SAMPLE_LEN = HAYSTACK_LEN + NEEDLES_LEN // 2

    sample = {1 / random.random() for i in range(SAMPLE_LEN)}
    print('最初的样本: %d 个元素' % len(sample))

    # 完整的样本,防止丢弃了重复的随机数
    while len(sample) < SAMPLE_LEN:
        sample.add(1 / random.random)

    print('最终的样本: %d 个元素' % len(sample))

    sample = array('d', sample)
    random.shuffle(sample)

    not_selected = sample[:NEEDLES_LEN // 2]
    print('未选中的样本: %d samples' % len(not_selected))
    print('将未选中的样本写入 not_selected.arr')
    with open('not_selected.arr', 'wb') as fp:
        not_selected.tofile(fp)

    selected = sample[NEEDLES_LEN // 2:]
    print('选中的样本: %d samples' % len(selected))
    print('写入selected.arr')
    with open('selected.arr', 'wb') as fp:
        selected.tofile(fp)


def test_in(container_type):
    import timeit
    SETUP = '''
from array import array
selected = array('d')
with open('selected.arr', 'rb') as fp:
    selected.fromfile(fp, {size})

if {container_type} is dict:
    haystack = dict.fromkeys(selected, 1)
else:
    haystack = {container_type}(selected)

print(type(haystack), end=' ')
print('haystack: %10d' % len(haystack), end=' ')

needles = array('d')
with open('not_selected.arr', 'rb') as fp:
    needles.frombytes(fp.read())

needles.extend(selected[::{size}//500])
print('needles: %10d' % len(needles), end=' ')
    '''
    TEST = '''
found = 0
for n in needles:
    if n in haystack:
        found += 1
print(' found: %10d' % found)
    '''
    MAX_EXPONENT = 7
    for n in range(3, MAX_EXPONENT + 1):
        size = 10 ** n
        setup = SETUP.format(container_type=container_type, size=size)
        test = TEST.format()
        tt = timeit.repeat(stmt=test, setup=setup, repeat=5, number=1)
        print('|{:{}d}|{:f}'.format(size, MAX_EXPONENT + 1, min(tt)))


if __name__ == '__main__':
    # save_data()  # 创建测试数据
    test_in('dict')  # set list
posted @ 2020-06-20 16:40  怀心抱素  阅读(301)  评论(0编辑  收藏  举报