hash_table
c++11之前,标准库里的哈希容器是std::hash_set、std::hash_multiset、std::hash_map、std::hash_multisetmap;
c++11之后,他们的名字改成了std::unorder_set、std::unorder_multiset、std::unoreder_map、std::unorder_multimap, 称之为无序容器,这个实现比之前的要复杂很多。
标准库的hash容器的底层数据结构自然是基于hash表,自己参考旧版本实现hashtable。
hashtable的数据结构原理大致如下图:
1 #ifndef __MY_HASH_TABLE_H_ 2 #define __MY_HASH_TABLE_H_ 3 #include <functional> 4 #include <vector> 5 #include <algorithm> 6 // hash节点 7 template <typename Value> 8 struct Hashtable_node 9 { 10 Hashtable_node(Value val):value(val), next(nullptr) {} 11 Hashtable_node* next; 12 Value value; 13 }; 14 15 // 先声明模板类和迭代器 16 template<typename Value, typename Key, typename Hash, typename ExtractKey, typename Equal> 17 class MyHashtable; 18 template <typename Value, typename Key, typename Hash, typename ExtractKey, typename Equal> 19 struct Hashtable_iterator; 20 21 // 定义迭代器 22 template <typename Value, typename Key, typename Hash, typename ExtractKey, typename Equal> 23 struct Hashtable_iterator 24 { 25 // 类型声明 26 typedef MyHashtable<Value, Key, Hash, ExtractKey, Equal> Hashtable; 27 typedef Hashtable_iterator<Value, Key, Hash, ExtractKey, Equal> iterator; 28 typedef Hashtable_node<Value> Node; 29 30 typedef Value value_type; 31 typedef Value& reference; 32 typedef Value* pointer; 33 34 Node* m_curNode; // 指向的自身节点 35 Hashtable* m_hashtable; // 指向的hash表对象 36 37 // 构造函数 38 Hashtable_iterator(Node* node,Hashtable* hashtable) : m_curNode(node), m_hashtable(hashtable) {} 39 Hashtable_iterator() : m_curNode(nullptr), m_hashtable(nullptr) {} 40 41 // 重载操作符,注意hashtable的迭代器无自减操作 42 reference operator*() const { return m_curNode->value; } 43 pointer operator->() const { return &(operator*()); } 44 iterator& operator++() // 前++ 45 { 46 const Node* old = m_curNode; 47 m_curNode = m_curNode->next; // 先链表自增 48 if (!m_curNode) 49 { // 如果桶只有一个节点,就取下一个桶, 循环直到取得非空的节点 50 size_t bucket = m_hashtable->get_bkt_num(old->value, m_hashtable->m_buckets.size()); 51 while (!m_curNode && ++bucket < m_hashtable->m_buckets.size()) 52 { 53 m_curNode = m_hashtable->m_buckets[bucket]; 54 } 55 } 56 return *this; 57 } 58 iterator operator++(int) // 后++ 59 { 60 iterator __tmp = *this; 61 ++*this; 62 return __tmp; 63 } 64 bool operator==(const iterator& it) const 65 { 66 return m_curNode == it.m_curNode; 67 } 68 bool operator!=(const iterator& it) const 69 { 70 return m_curNode != it.m_curNode; 71 } 72 }; 73 74 // Note: assumes long is at least 32 bits. 75 enum { num_primes = 28 }; 76 77 // 质数表,bucket的个数从该表中取,因为bucket下标值是哈希值取余bucket的size,如果size为质数,会减少冲突 78 static const unsigned long prime_list[num_primes] = 79 { 80 53ul, 97ul, 193ul, 389ul, 769ul, 81 1543ul, 3079ul, 6151ul, 12289ul, 24593ul, 82 49157ul, 98317ul, 196613ul, 393241ul, 786433ul, 83 1572869ul, 3145739ul, 6291469ul, 12582917ul, 25165843ul, 84 50331653ul, 100663319ul, 201326611ul, 402653189ul, 805306457ul, 85 1610612741ul, 3221225473ul, 4294967291ul 86 }; 87 88 // 在质数表中查找第一个大于或等于n的质数 89 inline unsigned long next_prime(unsigned long n) 90 { 91 const unsigned long* first = prime_list; 92 const unsigned long* last = prime_list + (int)num_primes; 93 const unsigned long* pos = std::lower_bound(first, last, n); 94 return pos == last ? *(last - 1) : *pos; 95 } 96 97 // 定义模板类 98 template <typename Value, typename Key, typename Hash, typename ExtractKey, typename Equal> 99 class MyHashtable 100 { 101 private: 102 typedef Hashtable_node<Value> Node; 103 public: 104 typedef Hashtable_iterator<Value, Key, Hash, ExtractKey, Equal> iterator; 105 typedef Value value_type; 106 typedef Value& reference; 107 typedef Value* pointer; 108 typedef Key key_type; 109 public: 110 MyHashtable(size_t n) 111 : m_hash(Hash()), 112 m_equals(Equal()), 113 m_get_key(ExtractKey()), 114 m_num_elements(0) 115 { 116 initialize_buckets(n); 117 } 118 MyHashtable(const MyHashtable& hashtable) 119 : m_num_elements(0) 120 { 121 copy_from(hashtable); 122 } 123 124 MyHashtable& operator= (const MyHashtable& hashtable) 125 { 126 if (&hashtable != this) 127 { 128 clear(); 129 copy_from(hashtable); 130 } 131 return *this; 132 } 133 134 // 析构函数 135 ~MyHashtable() { clear(); } 136 137 // 获取size 138 size_t size() const { return m_num_elements; } 139 // 判断是否为空 140 bool empty() const { return size() == 0; } 141 // 交换函数 142 void swap(MyHashtable& hashtable) 143 { 144 m_buckets.swap(hashtable.m_buckets); 145 std::swap(m_num_elements, hashtable.m_num_elements); 146 } 147 // 指向第一个元素的迭代器 148 iterator begin() 149 { 150 for (size_t n = 0; n < m_buckets.size(); ++n) 151 { // 找到第一个非空的bucket 152 if (m_buckets[n]) 153 { 154 return iterator(m_buckets[n], this); 155 } 156 } 157 return end(); 158 } 159 iterator end() { return iterator(0, this); } // 直接指向空指针的迭代器 160 // 获取bucket的个数 161 size_t bucket_count() const { return m_buckets.size(); } 162 163 // 用于非multi容器的插入 164 std::pair<iterator, bool> insert_unique(const value_type& obj) 165 { 166 resize(m_num_elements + 1); 167 return insert_unique_noresize(obj); 168 } 169 // 用于multi容器的插入 170 iterator insert_equal(const value_type& obj) 171 { 172 resize(m_num_elements + 1); 173 return insert_equal_noresize(obj); 174 } 175 // 获取bucket下标 176 size_t get_bkt_num(const value_type& obj, size_t n) const 177 { 178 const key_type& key = m_get_key(obj); 179 return m_hash(key) % n; 180 } 181 // 查找key值对应的迭代器 182 iterator find(const key_type& key) 183 { 184 size_t n = m_hash(key) % m_buckets.size(); 185 Node* first; 186 for (first = m_buckets[n]; first && !m_equals(m_get_key(first->value), key); first = first->next) 187 { } 188 iterator it = iterator(first,this); 189 return it; 190 } 191 // 返回容器中key值节点的个数 192 size_t count(const key_type& key) const 193 { 194 const size_t n = m_hash(key) % m_buckets.size(); 195 size_t result = 0; 196 for (const Node* cur = m_buckets[n]; cur; cur = cur->next) 197 { 198 if (m_equals(m_get_key(cur->value), key)) 199 { 200 ++result; 201 } 202 } 203 return result; 204 } 205 // 查找obj,如果找到就返回,如果找不到就插入obj 206 reference find_or_insert(const value_type& obj) 207 { 208 resize(m_num_elements + 1); 209 size_t n = get_bkt_num(obj,m_buckets.size()); 210 Node* first = m_buckets[n]; 211 212 for (Node* cur = first; cur; cur = cur->next) 213 { 214 if (m_equals(m_get_key(cur->value), m_get_key(obj))) 215 { 216 return cur->value; 217 } 218 } 219 220 Node* tmp = new Node(obj); 221 tmp->next = first; 222 m_buckets[n] = tmp; 223 ++m_num_elements; 224 return tmp->value; 225 } 226 227 // 删除所有key值节点,返回key值节点的个数 228 size_t erase(const key_type& key) 229 { 230 const size_t n = m_hash(key) % m_buckets.size(); 231 Node* first = m_buckets[n]; 232 size_t erased = 0; 233 234 if (first) 235 { 236 Node* cur = first; 237 Node* next = cur->next; 238 while (next) 239 { 240 if (m_equals(m_get_key(next->value), key)) 241 { 242 cur->next = next->next; 243 delete next; 244 next = cur->next; 245 ++erased; 246 --m_num_elements; 247 } 248 else 249 { 250 cur = next; 251 next = cur->next; 252 } 253 } 254 if (m_equals(m_get_key(first->value), key)) 255 { 256 m_buckets[n] = first->next; 257 delete first; 258 ++erased; 259 --m_num_elements; 260 } 261 } 262 return erased; 263 } 264 // 删除迭代器 265 void erase(const iterator& it) 266 { 267 Node* p = it.m_curNode; 268 if (p) 269 { 270 const size_t n = get_bkt_num(p->value,m_buckets.size()); 271 Node* cur = m_buckets[n]; 272 if (cur == p) 273 { 274 m_buckets[n] = cur->next; 275 delete cur; 276 --m_num_elements; 277 } 278 else 279 { 280 Node* next = cur->next; 281 while (next) 282 { 283 if (next == p) 284 { 285 cur->next = next->next; 286 delete next; 287 --m_num_elements; 288 break; 289 } 290 else { 291 cur = next; 292 next = cur->next; 293 } 294 } 295 } 296 } 297 } 298 private: 299 // 初始化buckets 300 void initialize_buckets(size_t n) 301 { 302 // 这里桶的数量取质数,减少取余桶数量的冲突 303 const size_t nBuckets = next_prime(n); 304 m_buckets.reserve(nBuckets); 305 // 全部初始化为空指针 306 m_buckets.insert(m_buckets.end(), nBuckets, (Node*)0); 307 m_num_elements = 0; 308 } 309 // 将hashtable的bucket拷贝到本地 310 void copy_from(const MyHashtable& hashtable) 311 { 312 m_buckets.clear(); 313 m_buckets.reserve(hashtable.m_buckets.size()); 314 m_buckets.insert(m_buckets.end(), hashtable.m_buckets.size(), (Node*)0); 315 for (size_t i = 0; i < hashtable.m_buckets.size(); ++i) 316 { 317 const Node* curNode = hashtable.m_buckets[i]; 318 if (curNode) 319 { 320 Node* copy = new Node(curNode->value); 321 m_buckets[i] = copy; 322 for (Node* next = curNode->next; next;curNode = next, next = curNode->next) 323 { 324 copy->next = new Node(next->value); 325 copy = copy->next; 326 } 327 } 328 } 329 m_num_elements = hashtable.m_num_elements; 330 } 331 // 释放所有节点 332 void clear() 333 { 334 for (size_t i = 0; i < m_buckets.size(); ++i) 335 { 336 Node* curNode = m_buckets[i]; 337 while (curNode != 0) 338 { 339 Node* next = curNode->next; 340 delete curNode; 341 curNode = next; 342 } 343 m_buckets[i] = 0; 344 } 345 m_num_elements = 0; 346 } 347 // 判读bucket的size是否够用,不够重新申请个vector 348 void resize(size_t num_elements_hint) 349 { 350 const size_t old_n = m_buckets.size(); 351 if (num_elements_hint > old_n) 352 { 353 const size_t n = next_prime(num_elements_hint); 354 if (n > old_n) 355 { 356 std::vector<Node*> tmp(n, (Node*)(0)); 357 for (size_t bucket = 0; bucket < old_n; ++bucket) 358 { 359 Node* first = m_buckets[bucket]; 360 while (first) 361 { 362 size_t new_bucket = get_bkt_num(first->value, n); 363 m_buckets[bucket] = first->next; 364 first->next = tmp[new_bucket]; 365 tmp[new_bucket] = first; 366 first = m_buckets[bucket]; 367 } 368 } 369 m_buckets.swap(tmp); 370 } 371 } 372 } 373 // 非multi插入 374 std::pair<iterator, bool> insert_unique_noresize(const value_type& obj) 375 { 376 const size_t n = get_bkt_num(obj, m_buckets.size()); 377 Node* first = m_buckets[n]; 378 for (Node* curNode = first; curNode; curNode = curNode->next) 379 { 380 if (m_equals(m_get_key(curNode->value), m_get_key(obj))) 381 { 382 return std::pair<iterator, bool>(iterator(curNode, this), false); 383 } 384 } 385 Node* tmp = new Node(obj); 386 tmp->next = first; 387 m_buckets[n] = tmp; 388 ++m_num_elements; 389 return std::pair<iterator, bool>(iterator(tmp, this), true); 390 } 391 // multi参入 392 iterator insert_equal_noresize(const value_type& obj) 393 { 394 const size_t n = get_bkt_num(obj, m_buckets.size()); 395 Node* first = m_buckets[n]; 396 397 for (Node* curNode = first; curNode; curNode = curNode->next) 398 { 399 if (m_equals(m_get_key(curNode->value), m_get_key(obj))) 400 { 401 Node* tmp = new Node(obj); 402 tmp->next = curNode->next; 403 curNode->next = tmp; 404 ++m_num_elements; 405 return iterator(tmp, this); 406 } 407 } 408 Node* tmp = new Node(obj); 409 tmp->next = first; 410 m_buckets[n] = tmp; 411 ++m_num_elements; 412 return iterator(tmp, this); 413 } 414 private: 415 Hash m_hash; 416 Equal m_equals; 417 ExtractKey m_get_key; 418 std::vector<Hashtable_node<Value>*> m_buckets; 419 size_t m_num_elements; 420 }; 421 #endif//__MY_HASH_TABLE_H_