[原创]大数据:布隆过滤器C#版简单实现。
public class BloomFilter { public BitArray _BloomArray; public Int64 BloomArryLength { get; } public Int64 DataArrayLeng { get; } public Int64 BitIndexCount { get; } /// <summary> /// 初始化 /// </summary> /// <param name="BloomArryLength">布隆数组的大小</param> /// <param name="DataArrayLeng">数据的长度</param> /// <param name="bitIndexCount">hash数</param> public BloomFilter(int BloomArryLength,int DataArrayLeng,int bitIndexCount) { _BloomArray = new BitArray(BloomArryLength); this.BloomArryLength = BloomArryLength; this.DataArrayLeng = DataArrayLeng; this.BitIndexCount = bitIndexCount; } public void Add(string str) { var hashCode = GetHashCode(str); Random random = new Random(hashCode); for (int i = 0; i < BitIndexCount; i++) { var c = random.Next((int)(this.BloomArryLength - 1)); _BloomArray[c] = true; } } public bool isExist(string str) { var hashCode = GetHashCode(str); Random random = new Random(hashCode); for (int i = 0; i < BitIndexCount; i++) { if(!_BloomArray[random.Next((int)(this.BloomArryLength - 1))]) { return false; } } return true; } public int GetHashCode(object value) { return value.GetHashCode(); } public double getFalsePositiveProbability() { // (1 - e^(-k * n / m)) ^ k return Math.Pow((1 - Math.Exp(-BitIndexCount * (double)DataArrayLeng / BloomArryLength)), BitIndexCount); } }
static void Main(string[] args) { Bloom_Filter.BloomFilter bloom = new Bloom_Filter.BloomFilter(200000000, 50000000, 3);//五千万条数据 for (int i = 0; i < bloom.DataArrayLeng; i++)//五千万条数据 { bloom.Add(i.ToString()); } do { var c = Console.ReadLine(); if (c == "e") break; Stopwatch sw = new Stopwatch(); sw.Start(); var temp=bloom.isExist(c); sw.Stop(); Console.WriteLine($"查找:{c}\n结果:{temp}\n总耗时:{sw.ElapsedTicks}\n错误概率:{bloom.getFalsePositiveProbability()}"); } while (true); }
结果:使用内存27MB,查找结果一般在100毫秒以内。