自己改编的布隆选择器。。

布隆过滤器的原理就不介绍了,到网上搜搜会找到的。这里说一下最佳的选取参数。P是需要的误差率,n是读取的数据条数。

系统首先要计算需要的内存大小m bits:

clip_image002[60]

 

再由m,n得到hash function的个数:

clip_image002[52]

 

如果误差率在万分之一,n是一亿条,则需要19亿比特内存,13个hash函数。下面是我参考别人的布隆过滤器,修改的。。进攻参考。。

 

public class BloomFilter
    {
        //  BitArray 初始分配2^29个bit
        private static int DEFAULT_SIZE = 1 << 30;
        //不同哈希函数的种子,一般应取质数
        private static int[] seeds = new int[] { 5, 7, 11, 13, 31, 37, 61 };
        private BitArray bits = new BitArray(DEFAULT_SIZE);
        //哈希函数对象
        private SimpleHash[] func = new SimpleHash[seeds.Length];

        public BloomFilter()
        {
            for (int i = 0; i < seeds.Length; i++)
            {
                func[i] = new SimpleHash(DEFAULT_SIZE, seeds[i]);
            }
        }

        /// <summary>
        /// 长度不小于默认的长度
        /// </summary>
        /// <param name="size"></param>
        public BloomFilter(int size)
        {
            if (size % 2 == 0 && size>= DEFAULT_SIZE)
            {
                for (int i = 0; i < seeds.Length; i++)
                {
                    func[i] = new SimpleHash(size, seeds[i]);
                }
            }
            else
            {
                throw new Exception("布隆过滤器长度错误");
            }
        }

        // 将字符串标记到bits中
        public void add(String value)
        {
            foreach (SimpleHash f in func)
            {
                bits.Set(f.hash(value), true);
            }
        }

        //判断字符串是否已经被bits标记
        public Boolean contains(String value)
        {
            if (value == null)
            {
                return false;
            }
            Boolean ret = true;
            foreach (SimpleHash f in func)
            {
                ret = ret && bits.Get(f.hash(value));
            }
            return ret;
        }

        /* 哈希函数类 */
        public class SimpleHash
        {
            private int size;
            private int seed;

            public SimpleHash(int size, int seed)
            {
                this.size = size;
                this.seed = seed;
            }

            //hash函数,采用简单的加权和hash
            public int hash(String value)
            {
                long hash = seed;

                for (int i = 0; i < value.Length; i++)
                {
                    if ((i & 1) == 0)
                    {
                        hash ^= ((hash << 7) ^ value[i] ^ (hash >> 3));
                    }
                    else
                    {
                        hash ^= (~((hash << 11) ^ value[i] ^ (hash >> 5)));
                    }
                }
                unchecked
                {
                    return size - Math.Abs((int)hash % (size / 2)) - 1;
                }
            }
        }
    }

posted @ 2012-09-13 15:57  doo  阅读(239)  评论(0编辑  收藏  举报