散列
选择键值,冲突的时候采取不同的策略
散列函数:
简单的散列函数:
1 int hash(const string & key,int tableSize) 2 { 3 int hashVal = 0; 4 for(int i = 0; i < key.length();++i) 5 { 6 hashVal + = key[i]; 7 } 8 return hashVal % tableSize; 9 }
比较好的散列函数:
1 int hash( const string & key,int tableSize ) 2 { 3 int hashVal = 0; 4 for(int i = 0; i <key.length();++i) 5 { 6 hashVal = 37*hashVal + key[i]; 7 } 8 hashVal %= tableSize; 9 if(hashVal < 0) 10 hashVal +=tableSize; 11 12 return hashVal; 13 }
键的长度和性质 影响选择。
分离链接法
分离链接散列表的类构架
1 template <typename HashedObj> 2 class HashTable 3 { 4 public: 5 explicit HashTable( int size = 101); 6 bool contains (const HashedObj & x) const; 7 void makeEmpty(); 8 void insert(const HashedObj & x); 9 void remove(const HashedObj & x); 10 private: 11 vector<list<HashedObj> > theLists; 12 int currentSize; 13 void rehash(); 14 int myhash( const HashedObj & x) const; 15 }; 16 int hash(const string & key); 17 int hash(int key);
散列表myhash的成员函数:
1 int myhash(const HashedObj & x) const 2 { 3 int hashVal = hash(x); 4 hashVal %= theLists.size(); 5 if(hashVal < 0) 6 hashVal += theLists.size(); 7 return hashVal; 8 }
使用name成员为键,提供的散列函数实例:
1 class Employee 2 { 3 public: 4 const string & getName() const 5 { 6 return name; 7 } 8 bool operator==( const Employee & rhs ) const 9 { 10 return getName() == rhs.getName(); 11 } 12 bool operator!=(const Employee & rhs) const 13 { 14 return !(*this == rhs); 15 } 16 private: 17 string name; 18 double salary; 19 int seniority; 20 }; 21 int hash(const Employee & item) 22 { 23 return hash(item.getName()); 24 }
实现makeEmpty contains remove:
1 void makeEmpty() 2 { 3 for(int i =0;i<theLists.size();i++) 4 theLists[i].clear(); 5 } 6 bool contains(const HashedObj & x) const 7 { 8 const list<HashedObj> & whichList = theLists[myhash(x)]; 9 return find(whichList.begin(),whichList.end(),x)!=whichList.end(); 10 } 11 bool remove(const HashedObj & x) const 12 { 13 list<HashedObj> & whichList = theLists[myhash(x)]; 14 list<HashedObj>::iterator itr = find(whichList.begin(),whichList.end(),x); 15 16 if(itr == whichList.end()) 17 return false; 18 19 whichList.erase(itr); 20 --currentSize; 21 return true; 22 }
分离散列表的insert实例
1 bool insert(const HashedObj & x) 2 { 3 list<HashedObj> & whichList = theLists[myhash(x)]; 4 if(find(whichList.begin(),whichList.end(),x)!=whichList.end()) 5 return false; 6 whichList.push_back(x); 7 if(++currentSize > theLists.size()) 8 rehash(); 9 10 return true; 11 }
装填因子:散列表中的元素个数 与 散列表大小的 比值
执行一次查找所需的时间:计算散列函数值所需要的常数时间加上遍历表所用的时间
不使用链表的散列表:
当冲突发生时,直接寻找下一单元
<线性探测>
<平方探测>
使用探测策略的散列表的类接口
1 template <typename HashedObj> 2 class HashedObj 3 { 4 public: 5 explicit HashTable(int size = 101); 6 bool contains(const HashedObj & x) const; 7 void makeEmpty(); 8 bool insert(const HashedObj & x); 9 bool remove(const HashedObj & x); 10 enum EntryType{ACTIVE,EMPTY,DELETED}; 11 private: 12 struct HashEntry 13 { 14 HashedObj element; 15 EntryType info; 16 17 HashEntry(const HashedObj & e = HashedObj(),EntryType i = EMPTY):element(e),info(i){} 18 }; 19 vector<HashEntry> array; 20 int currentSize; 21 bool isActive(int currentPos) const; 22 int findPos(const HashedObj & x) const; 23 void rehash(); 24 int myhash(const HashedObj & x) const; 25 };
初始化平方探测散列表
1 explicit HashTable(int size = 101):array(nextPrime(size)) 2 { 3 makeEmpty(); 4 } 5 void makeEmpty() 6 { 7 currentSize = 0; 8 for(int i = 0 ; i < array.size(); i++) 9 array[i].info = EMPTY; 10 }
使用平方探测进行散列的contains findPos isActive
1 bool contains(const HashedObj & x) const 2 { 3 return isActive(findPos(x)); 4 } 5 int findPos(const HashedObj & x) const 6 { 7 int offset = 1; 8 int currentPos = myhash(x); 9 10 while(array[currentPos].info != EMPTY && array[currentPos].element != x) 11 { 12 currentPos += offset; 13 offset += 2; 14 if(currentPos >= array.size()) 15 currentPos -= array.size(); 16 } 17 return currentPos; 18 } 19 bool isActive(int currentPos) const 20 { 21 return array[currentPos].info == ACTIVE; 22 }
使用平方探测的insert remove
bool insert( const HashedObj & x) { int currentPos = findPos(x); if(isActive( currentPos )) return false; array[currentPos] = HashEntry(x,ACTIVE); if(++currnetSize>array.size()/2) rehash(); } bool remove(const HashedObj & x) { int currentPos = findPos(x); if(!isActive(currentPos)) return false; array[currentPos].info = DELETED; return true; }
<双散列>
对分离散列表的再散列
1 void rehash() 2 { 3 vector<HashEntry> oldArray = array; 4 array.size(nextPrime(2*oldArray.size())); 5 for(int j = 0; j < array.size(); j++) 6 { 7 array[j].info = EMPTY; 8 } 9 currentSize = 0; 10 for(int i = 0; i < array.size(); i++) 11 { 12 if(oldArray[i].info == ACTIVE) 13 insert(oldArray[i].element); 14 } 15 }
对探测散列表的再散列
1 void rehash() 2 { 3 vector<list<HashedObj> > oldLists = theLists; 4 theLists.resize( nextPrime( 2*theLists.size() ) ); 5 for(int j = 0; j < theLists.size(); j++) 6 { 7 theLists[j].clear(); 8 } 9 currentSize = 0; 10 for(int i = 0; i < oldLists.size(); i++) 11 { 12 list<HashedObj>::iterator itr = oldLists[i].begin(); 13 while( itr != oldLists[i].end( ) ) 14 insert(*itr++); 15 } 16 }