BloomFilter算法在搜索引擎方面的应用一般作为URL消重(http://wiki.commerce.net/wiki/URL_History_Bloom_Filters或http://tianchunfeng.spaces.live.com/blog/cns!819E33AA1808A272!288.entry),其具体原理请参考:http://blog.csdn.net/jiaomeng/,我只给出C#实现:
1 public class BloomFilter<T>
2 {
3 private BitArray _bitArray = null;
4 private int _count = 0;
5 private int _hashcount = 1;
6
7 public BloomFilter(int size, int hashcount)
8 {
9 _bitArray = new BitArray(size, false);
10 _hashcount = hashcount;
11 }
12
13 public void Add(T item)
14 {
15 int h1 = item.GetHashCode();
16 int h2 = Hash(h1.ToString());
17
18 bool result = false;
19 unchecked
20 {
21 h1 = (int)(((uint)h1) % _bitArray.Count);
22 h2 = (int)(((uint)h2) % _bitArray.Count);
23 }
24 for (int i = 0; i < _hashcount; i++)
25 {
26 if (!_bitArray[h1])
27 {
28 _bitArray[h1] = result = true;
29 }
30
31 unchecked
32 {
33 h1 = (int)((uint)(h1 + h2) % _bitArray.Count);
34 h2 = (int)((uint)(h2 + i) % _bitArray.Count);
35 }
36 }
37 if (result)
38 {
39 _count++;
40 }
41 }
42
43 public bool Contains(T item)
44 {
45
46 int h1 = item.GetHashCode();
47 int h2 = Hash(h1.ToString());
48 unchecked
49 {
50 h1 = (int)(((uint)h1) % _bitArray.Count);
51 h2 = (int)(((uint)h2) % _bitArray.Count);
52 }
53 for (int i = 0; i < _hashcount; i++)
54 {
55 if (_bitArray[h1] == false)
56 {
57 return false;
58 }
59 unchecked
60 {
61 h1 = (int)((uint)(h1 + h2) % _bitArray.Count);
62 h2 = (int)((uint)(h2 + i) % _bitArray.Count);
63 }
64 }
65 return true;
66
67 }
68
69
70
71 protected int Hash(T item)
72 {
73 int hashcode = item.GetHashCode();
74
75 hashcode = Hash(hashcode.ToString());
76
77 return hashcode;
78 }
79
80 /// <summary>
81 /// 字符串Hash函数(AP Hash Function)
82 /// </summary>
83 /// <param name="str">需要Hash的字符串</param>
84 /// <returns></returns>
85 protected int Hash(string str)
86 {
87 long hash = 0;
88
89 for (int i = 0; i < str.Length; i++)
90 {
91 if ((i & 1) == 0)
92 {
93 hash ^= ((hash << 7) ^ str[i] ^ (hash >> 3));
94 }
95 else
96 {
97 hash ^= (~((hash << 11) ^ str[i] ^ (hash >> 5)));
98 }
99 }
100 unchecked
101 {
102 return (int)hash;
103 }
104 }
105
106
107 /// <summary>
108 /// 返回BloomFilter中的元素个数
109 /// </summary>
110 public int Count
111 {
112 get
113 {
114 return _count;
115 }
116 }
117
118 public int SizeBytes
119 {
120 get
121 {
122 return _bitArray.Length;
123 }
124 }
2 {
3 private BitArray _bitArray = null;
4 private int _count = 0;
5 private int _hashcount = 1;
6
7 public BloomFilter(int size, int hashcount)
8 {
9 _bitArray = new BitArray(size, false);
10 _hashcount = hashcount;
11 }
12
13 public void Add(T item)
14 {
15 int h1 = item.GetHashCode();
16 int h2 = Hash(h1.ToString());
17
18 bool result = false;
19 unchecked
20 {
21 h1 = (int)(((uint)h1) % _bitArray.Count);
22 h2 = (int)(((uint)h2) % _bitArray.Count);
23 }
24 for (int i = 0; i < _hashcount; i++)
25 {
26 if (!_bitArray[h1])
27 {
28 _bitArray[h1] = result = true;
29 }
30
31 unchecked
32 {
33 h1 = (int)((uint)(h1 + h2) % _bitArray.Count);
34 h2 = (int)((uint)(h2 + i) % _bitArray.Count);
35 }
36 }
37 if (result)
38 {
39 _count++;
40 }
41 }
42
43 public bool Contains(T item)
44 {
45
46 int h1 = item.GetHashCode();
47 int h2 = Hash(h1.ToString());
48 unchecked
49 {
50 h1 = (int)(((uint)h1) % _bitArray.Count);
51 h2 = (int)(((uint)h2) % _bitArray.Count);
52 }
53 for (int i = 0; i < _hashcount; i++)
54 {
55 if (_bitArray[h1] == false)
56 {
57 return false;
58 }
59 unchecked
60 {
61 h1 = (int)((uint)(h1 + h2) % _bitArray.Count);
62 h2 = (int)((uint)(h2 + i) % _bitArray.Count);
63 }
64 }
65 return true;
66
67 }
68
69
70
71 protected int Hash(T item)
72 {
73 int hashcode = item.GetHashCode();
74
75 hashcode = Hash(hashcode.ToString());
76
77 return hashcode;
78 }
79
80 /// <summary>
81 /// 字符串Hash函数(AP Hash Function)
82 /// </summary>
83 /// <param name="str">需要Hash的字符串</param>
84 /// <returns></returns>
85 protected int Hash(string str)
86 {
87 long hash = 0;
88
89 for (int i = 0; i < str.Length; i++)
90 {
91 if ((i & 1) == 0)
92 {
93 hash ^= ((hash << 7) ^ str[i] ^ (hash >> 3));
94 }
95 else
96 {
97 hash ^= (~((hash << 11) ^ str[i] ^ (hash >> 5)));
98 }
99 }
100 unchecked
101 {
102 return (int)hash;
103 }
104 }
105
106
107 /// <summary>
108 /// 返回BloomFilter中的元素个数
109 /// </summary>
110 public int Count
111 {
112 get
113 {
114 return _count;
115 }
116 }
117
118 public int SizeBytes
119 {
120 get
121 {
122 return _bitArray.Length;
123 }
124 }
如果大家发现程序有问题,请及时的反馈给我,nsharp at 8u8.com谢谢