hello cc

python pybloom

Posted on 2017-12-22 03:31  星际海盗  阅读(541)  评论(0编辑  收藏  举报

1.安装:

pip install pybloom

or:

https://pypi.python.org/pypi/pybloom/1.0.2

2.使用:

from pybloom import BloomFilter

bl = BloomFilter(capacity=10000, error_rate=0.001) #容器大小10000条,错误率为0.001

for i in datalist:

    bl.add(i)

for i in newdata:

    if i in bl:

        print 'has this data'

    else:

       bl.add(i)

 -----------

	try:
		bl = BloomFilter(capacity=1000, error_rate=0.001)
		with open('allfile','a+') as fd:
			[bl.add(x)for x in fd.readlines()]
			if os.path.isdir(path):
				filelist = os.listdir(path)
				for i in filelist:
					with open(path+'/'+i,'r') as fdd:
						for c in fdd.readlines():
							con = c.strip('\n')
							url = urlparse(con)
							print url.netloc
							if url.netloc in bl:
								pass
							else:
								bl.add(url.netloc)
								fd.write(url.netloc+'\n')
								fd.flush()

			elif os.path.isfile(path):
				print 'file..'
			
	except Exception,e:
		print str(e)

  

------------

hello man