1.安装:
pip install pybloom
or:
https://pypi.python.org/pypi/pybloom/1.0.2
2.使用:
from pybloom import BloomFilter
bl = BloomFilter(capacity=10000, error_rate=0.001) #容器大小10000条,错误率为0.001
for i in datalist:
bl.add(i)
for i in newdata:
if i in bl:
print 'has this data'
else:
bl.add(i)
-----------
try: bl = BloomFilter(capacity=1000, error_rate=0.001) with open('allfile','a+') as fd: [bl.add(x)for x in fd.readlines()] if os.path.isdir(path): filelist = os.listdir(path) for i in filelist: with open(path+'/'+i,'r') as fdd: for c in fdd.readlines(): con = c.strip('\n') url = urlparse(con) print url.netloc if url.netloc in bl: pass else: bl.add(url.netloc) fd.write(url.netloc+'\n') fd.flush() elif os.path.isfile(path): print 'file..' except Exception,e: print str(e)
------------