(3) 插入元素(k4,v4,14)
k4的哈希码为14,14 % 11 = 3,而索引3处已被k3占据,所以使用二度哈希重新计算地址,得到新地址为14。索引3处存在冲突,所以需要置高位为“1”。
(12)10 = (00000000000000000000000000001100)2 高位置“1”后
(10000000000000000000000000001100)2 = (-2147483636)10
最终效果如图8.8所示。
(4) 删除元素k1和k2
Hashtable在删除一个存在冲突的元素时(hash_coll为负数),会把这个元素的key指向数组buckets,同时将该元素的hash_coll的低31位全部置“0”而保留最高位,由于原hash_coll为负数,所以最高位为“1”。
(10000000000000000000000000000000)2 = (-2147483648)10
单凭判断hash_coll的值是否为-2147483648无法判断某个索引处是否为空,因为当索引0处存在冲突时,它的hash_coll的值同样也为-2147483648,这也是为什么要把key指向buckets的原因。这里把key指向buckets并且hash_coll值为-2147483648的空位称为“有冲突空位”。如图8.8所示,当k1被删除后,索引1处的空位就是有冲突空位。
Hashtable在删除一个不存在冲突的元素时(hash_coll为正数),会把键和值都设为null,hash_coll的值设为0。这种没有冲突的空位称为“无冲突空位”,如图8.9所示,k2被删除后索引2处就属于无冲突空位,当一个Hashtable被初始化后,buckets数组中的所有位置都是无冲突空位。
哈希表通过关键字查找元素时,首先计算出键的哈希地址,然后通过这个哈希地址直接访问数组的相应位置并对比两个键值,如果相同,则查找成功并返回;如果不同,则根据hash_coll的值来决定下一步操作。当hash_coll为0或正数时,表明没有冲突,此时查找失败;如果hash_coll为负数时,表明存在冲突,此时需通过二度哈希继续计算哈希地址进行查找,如此反复直到找到相应的键值表明查找成功,如果在查找过程中遇到hash_coll为正数或计算二度哈希的次数等于哈希表长度则查找失败。由此可知,将hash_coll的高位设为冲突位主要是为了提高查找速度,避免无意义地多次计算二度哈希的情况。
Hashtable的代码实现
哈希表的实现较为复杂,为了简化代码,本例忽略了部分出错判断,在测试时请不要设key值为空
1 using System; 2 2 public class Hashtable 3 3 { 4 4 private struct bucket 5 5 { 6 6 public Object key; //键 7 7 public Object val; //值 8 8 public int hash_coll; //哈希码 9 9 } 10 10 private bucket[] buckets; //存储哈希表数据的数组(数据桶) 11 11 private int count; //元素个数 12 12 private int loadsize; //当前允许存储的元素个数 13 13 private float loadFactor; //填充因子 14 14 //默认构造方法 15 15 public Hashtable() : this(0, 1.0f) { } 16 16 //指定容量的构造方法 17 17 public Hashtable(int capacity, float loadFactor) 18 18 { 19 19 if (!(loadFactor >= 0.1f && loadFactor <= 1.0f)) 20 20 throw new ArgumentOutOfRangeException( 21 21 "填充因子必须在0.1~1之间"); 22 22 this.loadFactor = loadFactor > 0.72f ? 0.72f : loadFactor; 23 23 //根据容量计算表长 24 24 double rawsize = capacity / this.loadFactor; 25 25 int hashsize = (rawsize > 11) ? //表长为大于11的素数 26 26 HashHelpers.GetPrime((int)rawsize) : 11; 27 27 buckets = new bucket[hashsize]; //初始化容器 28 28 loadsize = (int)(this.loadFactor * hashsize); 29 29 } 30 30 public virtual void Add(Object key, Object value) //添加 31 31 { 32 32 Insert(key, value, true); 33 33 } 34 34 //哈希码初始化 35 35 private uint InitHash(Object key,int hashsize, 36 36 out uint seed,out uint incr) 37 37 { 38 38 uint hashcode = (uint)GetHash(key) & 0x7FFFFFFF; //取绝对值39 seed = (uint)hashcode; //h1 39 40 incr = (uint)(1 + (((seed >> 5)+1) % ((uint)hashsize-1)));//h2 40 41 return hashcode; //返回哈希码 41 42 } 42 43 public virtual Object this[Object key] //索引器 43 44 { 44 45 get 45 46 { 46 47 uint seed; //h1 47 48 uint incr; //h2 48 49 uint hashcode = InitHash(key, buckets.Length, 49 50 out seed, out incr); 50 51 int ntry = 0; //用于表示h(key,i)中的i值 51 52 bucket b; 52 53 int bn = (int)(seed % (uint)buckets.Length); //h(key,0)54 do 53 55 { 54 56 b = buckets[bn]; 55 57 if (b.key == null) //b为无冲突空位时 56 58 { //找不到相应的键,返回空 57 59 return null; 58 60 } 59 61 if (((b.hash_coll & 0x7FFFFFFF) == hashcode) &&62 KeyEquals(b.key, key))63 { //查找成功64 return b.val;65 }66 bn = (int)(((long)bn + incr) % 67 (uint)buckets.Length); //h(key+i)68 } while (b.hash_coll < 0 && ++ntry < buckets.Length);69 return null;70 }71 set72 {73 Insert(key, value, false);74 }75 }76 private void expand() //扩容77 { //使新的容量为旧容量的近似两倍78 int rawsize = HashHelpers.GetPrime(buckets.Length * 2); 79 rehash(rawsize);80 }81 private void rehash(int newsize) //按新容量扩容82 {83 bucket[] newBuckets = new bucket[newsize];84 for (int nb = 0; nb < buckets.Length; nb++)85 {86 bucket oldb = buckets[nb];87 if ((oldb.key != null) && (oldb.key != buckets))88 {89 putEntry(newBuckets, oldb.key, oldb.val, 90 oldb.hash_coll & 0x7FFFFFFF);91 }92 }93 buckets = newBuckets;94 loadsize = (int)(loadFactor * newsize);95 return;96 }97 //在新数组内添加旧数组的一个元素98 private void putEntry(bucket[] newBuckets, Object key, 99 Object nvalue, int hashcode)100 {101 uint seed = (uint)hashcode; //h1102 uint incr = (uint)(1 + (((seed >> 5) + 1) % 103 ((uint)newBuckets.Length - 1))); //h2104 int bn = (int)(seed % (uint)newBuckets.Length);//哈希地址105 do106 { //当前位置为有冲突空位或无冲突空位时都可添加新元素107 if ((newBuckets[bn].key == null) || 108 (newBuckets[bn].key == buckets))109 { //赋值110 newBuckets[bn].val = nvalue;111 newBuckets[bn].key = key;112 newBuckets[bn].hash_coll |= hashcode;113 return;114 }115 //当前位置已存在其他元素时116 if (newBuckets[bn].hash_coll >= 0)117 { //置hash_coll的高位为1118 newBuckets[bn].hash_coll |= 119 unchecked((int)0x80000000);120 }121 //二度哈希h1(key)+h2(key)122 bn = (int)(((long)bn + incr) % (uint)newBuckets.Length);123 } while (true);124 }125 protected virtual int GetHash(Object key)126 { //获取哈希码127 return key.GetHashCode();128 }129 protected virtual bool KeyEquals(Object item, Object key)130 { //用于判断两key是否相等131 return item == null ? false : item.Equals(key);132 }133 //当add为true时用作添加元素,当add为false时用作修改元素值134 private void Insert(Object key, Object nvalue, bool add)135 { //如果超过允许存放元素个数的上限则扩容136 if (count >= loadsize)137 { 138 expand();139 }140 uint seed; //h1141 uint incr; //h2142 uint hashcode = InitHash(key, buckets.Length,out seed, out incr);143 int ntry = 0; //用于表示h(key,i)中的i值144 int emptySlotNumber = -1; //用于记录空位145 int bn = (int)(seed % (uint)buckets.Length); //索引号146 do147 { //如果是有冲突空位,需继续向后查找以确定是否存在相同的键148 if (emptySlotNumber == -1 && (buckets[bn].key == buckets) &&149 (buckets[bn].hash_coll < 0))150 {151 emptySlotNumber = bn;152 }153 if (buckets[bn].key == null) //确定没有重复键才添加154 {155 if (emptySlotNumber != -1) //使用之前的空位156 bn = emptySlotNumber;157 buckets[bn].val = nvalue;158 buckets[bn].key = key;159 buckets[bn].hash_coll |= (int)hashcode;160 count++;161 return;162 }163 //找到重复键164 if (((buckets[bn].hash_coll & 0x7FFFFFFF)==hashcode) &&165 KeyEquals(buckets[bn].key, key))166 { //如果处于添加元素状态,则由于出现重复键而报错167 if (add)168 {169 throw new ArgumentException("添加了重复的键值!");170 }171 buckets[bn].val = nvalue; //修改批定键的元素172 return;173 }174 //存在冲突则置hash_coll的最高位为1175 if (emptySlotNumber == -1)176 {177 if (buckets[bn].hash_coll >= 0)178 {179 buckets[bn].hash_coll |= unchecked((int)0x80000000);180 }181 }182 bn = (int)(((long)bn + incr) % (uint)buckets.Length);//二度哈希183 } while (++ntry < buckets.Length);184 throw new InvalidOperationException("添加失败!");185 }186 public virtual void Remove(Object key) //移除一个元素187 {188 uint seed; //h1189 uint incr; //h2190 uint hashcode = InitHash(key, buckets.Length,out seed, out incr);191 int ntry = 0; //h(key,i)中的i192 bucket b;193 int bn = (int)(seed % (uint)buckets.Length); //哈希地址194 do195 {196 b = buckets[bn];197 if (((b.hash_coll & 0x7FFFFFFF) == hashcode) &&198 KeyEquals(b.key, key)) //如果找到相应的键值199 { //保留最高位,其余清0200 buckets[bn].hash_coll &= unchecked((int)0x80000000);201 if (buckets[bn].hash_coll != 0) //如果原来存在冲突202 { //使key指向buckets203 buckets[bn].key = buckets;204 }205 else //原来不存在冲突206 { //置key为空207 buckets[bn].key = null;208 }209 buckets[bn].val = null; //释放相应的“值”。210 count--;211 return;212 } //二度哈希213 bn = (int)(((long)bn + incr) % (uint)buckets.Length);214 } while (b.hash_coll < 0 && ++ntry < buckets.Length);215 }216 public override string ToString()217 {218 string s = string.Empty;219 for (int i = 0; i < buckets.Length; i++)220 {221 if (buckets[i].key != null && buckets[i].key != buckets)222 { //不为空位时打印索引、键、值、hash_coll223 s += string.Format("{0,-5}{1,-8}{2,-8}{3,-8}\r\n",224 i.ToString(), buckets[i].key.ToString(),225 buckets[i].val.ToString(), 226 buckets[i].hash_coll.ToString());227 }228 else229 { //是空位时则打印索引和hash_coll230 s += string.Format("{0,-21}{1,-8}\r\n", i.ToString(),231 buckets[i].hash_coll.ToString());232 }233 }234 return s;235 }236 public virtual int Count //属性237 { //获取元素个数238 get { return count; }239 }240 }
Hashtable和ArrayList的实现有似的地方,比如两者都是以数组为基础做进一步地抽象而来,两者都可以成倍地自动扩展容量。