# ------------------------------------in 运算符的性能测试------------------------------------
def save_data():
# 生成由不同的浮点数组成的数组,然后写入文件,以供使用
import random
from array import array
MAX_EXPONENT = 7
HAYSTACK_LEN = 10 ** MAX_EXPONENT
NEEDLES_LEN = 10 ** (MAX_EXPONENT - 1)
SAMPLE_LEN = HAYSTACK_LEN + NEEDLES_LEN // 2
sample = {1 / random.random() for i in range(SAMPLE_LEN)}
print('最初的样本: %d 个元素' % len(sample))
# 完整的样本,防止丢弃了重复的随机数
while len(sample) < SAMPLE_LEN:
sample.add(1 / random.random)
print('最终的样本: %d 个元素' % len(sample))
sample = array('d', sample)
random.shuffle(sample)
not_selected = sample[:NEEDLES_LEN // 2]
print('未选中的样本: %d samples' % len(not_selected))
print('将未选中的样本写入 not_selected.arr')
with open('not_selected.arr', 'wb') as fp:
not_selected.tofile(fp)
selected = sample[NEEDLES_LEN // 2:]
print('选中的样本: %d samples' % len(selected))
print('写入selected.arr')
with open('selected.arr', 'wb') as fp:
selected.tofile(fp)
def test_in(container_type):
import timeit
SETUP = '''
from array import array
selected = array('d')
with open('selected.arr', 'rb') as fp:
selected.fromfile(fp, {size})
if {container_type} is dict:
haystack = dict.fromkeys(selected, 1)
else:
haystack = {container_type}(selected)
print(type(haystack), end=' ')
print('haystack: %10d' % len(haystack), end=' ')
needles = array('d')
with open('not_selected.arr', 'rb') as fp:
needles.frombytes(fp.read())
needles.extend(selected[::{size}//500])
print('needles: %10d' % len(needles), end=' ')
'''
TEST = '''
found = 0
for n in needles:
if n in haystack:
found += 1
print(' found: %10d' % found)
'''
MAX_EXPONENT = 7
for n in range(3, MAX_EXPONENT + 1):
size = 10 ** n
setup = SETUP.format(container_type=container_type, size=size)
test = TEST.format()
tt = timeit.repeat(stmt=test, setup=setup, repeat=5, number=1)
print('|{:{}d}|{:f}'.format(size, MAX_EXPONENT + 1, min(tt)))
if __name__ == '__main__':
# save_data() # 创建测试数据
test_in('dict') # set list