自己改编的布隆选择器。。

public class BloomFilter
    {
        //种子个数
        private int _seedcount ;
        //内存大小,单位bits
        private int _memsize;
        //不同哈希函数的种子,一般应取质数
        private static List<int> seeds = new List<int>{ 5, 37, 73, 31, 17, 47, 67, 29, 13, 19, 89, 43, 7, 53, 41, 61, 23, 71, 11, 79, 83, 59, 97 };
        private BitArray bits;
        //哈希函数对象
        private List<SimpleHash> func = new List<SimpleHash>();

        /// <summary>
        ///
        /// </summary>
        /// <param name="ncount">存储对象的数目</param>
        /// <param name="correction">误差</param>
        public BloomFilter(int ncount, double correction)
        {
            _seedcount = Convert.ToInt16(-0.7 * Math.Log(correction, Math.E) / (Math.Log(2, Math.E) * Math.Log(2, Math.E)));
            _memsize = Convert.ToInt32(-ncount * Math.Log(correction, Math.E) / (Math.Log(2, Math.E) * Math.Log(2, Math.E)));
            bits = new BitArray(_memsize);
            List<int> nseeds = seeds.GetRange(0, _seedcount);
            for (int i = 0; i < nseeds.Count; i++)
            {
                func.Add(new SimpleHash(_memsize, nseeds[i]));
            }
        }

        // 将字符串标记到bits中
        public void add(String value)
        {
            foreach (SimpleHash f in func)
            {
                bits.Set(f.hash(value), true);
            }
        }

        //判断字符串是否已经被bits标记
        public Boolean contains(String value)
        {
            if (value == null)
            {
                return false;
            }
            Boolean ret = true;
            foreach (SimpleHash f in func)
            {
                ret = ret && bits.Get(f.hash(value));
            }
            return ret;
        }

        /* 哈希函数类 */
        public class SimpleHash
        {
            private int size;
            private int seed;

            public SimpleHash(int size, int seed)
            {
                this.size = size;
                this.seed = seed;
            }

            //hash函数,采用简单的加权和hash
            public int hash(String value)
            {
                long hash = seed;

                for (int i = 0; i < value.Length; i++)
                {
                    if ((i & 1) == 0)
                    {
                        hash ^= ((hash << 7) ^ value[i] ^ (hash >> 3));
                    }
                    else
                    {
                        hash ^= (~((hash << 11) ^ value[i] ^ (hash >> 5)));
                    }
                }
                unchecked
                {
                    return size - Math.Abs((int)hash % (size / 2)) - 1;
                }
            }
        }
    }

posted @ 2012-09-18 11:02  doo  阅读(195)  评论(0编辑  收藏  举报