hash_table

c++11之前,标准库里的哈希容器是std::hash_set、std::hash_multiset、std::hash_map、std::hash_multisetmap;

c++11之后,他们的名字改成了std::unorder_set、std::unorder_multiset、std::unoreder_map、std::unorder_multimap, 称之为无序容器,这个实现比之前的要复杂很多。

标准库的hash容器的底层数据结构自然是基于hash表,自己参考旧版本实现hashtable。

hashtable的数据结构原理大致如下图:

 

 

 

  1 #ifndef __MY_HASH_TABLE_H_
  2 #define __MY_HASH_TABLE_H_
  3 #include <functional>
  4 #include <vector>
  5 #include <algorithm>
  6 // hash节点
  7 template <typename Value>
  8 struct Hashtable_node
  9 {
 10     Hashtable_node(Value val):value(val), next(nullptr) {}
 11     Hashtable_node*        next;
 12     Value                value;
 13 };
 14 
 15 // 先声明模板类和迭代器
 16 template<typename Value, typename Key, typename Hash, typename ExtractKey, typename Equal>
 17 class MyHashtable;
 18 template <typename Value, typename Key, typename Hash, typename ExtractKey, typename Equal>
 19 struct Hashtable_iterator;
 20 
 21 // 定义迭代器
 22 template <typename Value, typename Key, typename Hash, typename ExtractKey, typename Equal>
 23 struct Hashtable_iterator 
 24 {
 25     // 类型声明
 26     typedef MyHashtable<Value, Key, Hash, ExtractKey, Equal>        Hashtable;
 27     typedef Hashtable_iterator<Value, Key, Hash, ExtractKey, Equal>    iterator;
 28     typedef Hashtable_node<Value>                                    Node;
 29 
 30     typedef Value value_type;
 31     typedef Value& reference;
 32     typedef Value* pointer;
 33 
 34     Node*                    m_curNode;     // 指向的自身节点
 35     Hashtable*                m_hashtable;  // 指向的hash表对象
 36 
 37     // 构造函数
 38     Hashtable_iterator(Node* node,Hashtable* hashtable) : m_curNode(node), m_hashtable(hashtable) {}
 39     Hashtable_iterator() : m_curNode(nullptr), m_hashtable(nullptr) {}
 40 
 41     // 重载操作符,注意hashtable的迭代器无自减操作
 42     reference operator*() const { return m_curNode->value; }
 43     pointer operator->() const { return &(operator*()); }
 44     iterator& operator++() // 前++
 45     {
 46         const Node* old = m_curNode;
 47         m_curNode = m_curNode->next; // 先链表自增
 48         if (!m_curNode) 
 49         {   // 如果桶只有一个节点,就取下一个桶, 循环直到取得非空的节点
 50             size_t bucket = m_hashtable->get_bkt_num(old->value, m_hashtable->m_buckets.size());
 51             while (!m_curNode && ++bucket < m_hashtable->m_buckets.size())
 52             {
 53                 m_curNode = m_hashtable->m_buckets[bucket];
 54             }
 55         }
 56         return *this;
 57     }
 58     iterator operator++(int) // 后++
 59     {
 60         iterator __tmp = *this;
 61         ++*this;
 62         return __tmp;
 63     }
 64     bool operator==(const iterator& it) const
 65     {
 66         return m_curNode == it.m_curNode;
 67     }
 68     bool operator!=(const iterator& it) const
 69     {
 70         return m_curNode != it.m_curNode;
 71     }
 72 };
 73 
 74 // Note: assumes long is at least 32 bits.
 75 enum { num_primes = 28 };
 76 
 77 // 质数表,bucket的个数从该表中取,因为bucket下标值是哈希值取余bucket的size,如果size为质数,会减少冲突
 78 static const unsigned long prime_list[num_primes] =
 79 {
 80   53ul,         97ul,         193ul,       389ul,       769ul,
 81   1543ul,       3079ul,       6151ul,      12289ul,     24593ul,
 82   49157ul,      98317ul,      196613ul,    393241ul,    786433ul,
 83   1572869ul,    3145739ul,    6291469ul,   12582917ul,  25165843ul,
 84   50331653ul,   100663319ul,  201326611ul, 402653189ul, 805306457ul,
 85   1610612741ul, 3221225473ul, 4294967291ul
 86 };
 87 
 88 // 在质数表中查找第一个大于或等于n的质数
 89 inline unsigned long next_prime(unsigned long n)
 90 {
 91     const unsigned long* first = prime_list;
 92     const unsigned long* last = prime_list + (int)num_primes;
 93     const unsigned long* pos = std::lower_bound(first, last, n);
 94     return pos == last ? *(last - 1) : *pos;
 95 }
 96 
 97 // 定义模板类
 98 template <typename Value, typename Key, typename Hash, typename ExtractKey, typename Equal>
 99 class MyHashtable
100 {
101 private:
102     typedef Hashtable_node<Value> Node;
103 public:
104     typedef Hashtable_iterator<Value, Key, Hash, ExtractKey, Equal> iterator;
105     typedef Value    value_type;
106     typedef Value&    reference;
107     typedef Value*    pointer;
108     typedef Key        key_type;
109 public:
110     MyHashtable(size_t n) 
111       : m_hash(Hash()),
112         m_equals(Equal()),
113         m_get_key(ExtractKey()),
114         m_num_elements(0)
115     {
116         initialize_buckets(n);
117     }
118     MyHashtable(const MyHashtable& hashtable)
119         : m_num_elements(0)
120     {
121         copy_from(hashtable);
122     }
123 
124     MyHashtable& operator= (const MyHashtable& hashtable)
125     {
126         if (&hashtable != this)
127         {
128             clear();
129             copy_from(hashtable);
130         }
131         return *this;
132     }
133 
134     // 析构函数
135     ~MyHashtable() { clear(); }
136 
137     // 获取size
138     size_t size() const { return m_num_elements; }
139     // 判断是否为空
140     bool empty() const { return size() == 0; }
141     // 交换函数
142     void swap(MyHashtable& hashtable)
143     {
144         m_buckets.swap(hashtable.m_buckets);
145         std::swap(m_num_elements, hashtable.m_num_elements);
146     }
147     // 指向第一个元素的迭代器
148     iterator begin()
149     {
150         for (size_t n = 0; n < m_buckets.size(); ++n)
151         { // 找到第一个非空的bucket
152             if (m_buckets[n])
153             {
154                 return iterator(m_buckets[n], this);
155             }
156         }
157         return end();
158     }
159     iterator end() { return iterator(0, this); } // 直接指向空指针的迭代器
160     // 获取bucket的个数
161     size_t bucket_count() const { return m_buckets.size(); }
162 
163     // 用于非multi容器的插入
164     std::pair<iterator, bool> insert_unique(const value_type& obj)
165     {
166         resize(m_num_elements + 1);
167         return insert_unique_noresize(obj);
168     }
169     // 用于multi容器的插入
170     iterator insert_equal(const value_type& obj)
171     {
172         resize(m_num_elements + 1);
173         return insert_equal_noresize(obj);
174     }
175     // 获取bucket下标
176     size_t get_bkt_num(const value_type& obj, size_t n) const
177     {
178         const key_type& key = m_get_key(obj);
179         return m_hash(key) % n;
180     }
181     // 查找key值对应的迭代器
182     iterator find(const key_type& key)
183     {
184         size_t n = m_hash(key) % m_buckets.size();
185         Node* first;
186         for (first = m_buckets[n]; first && !m_equals(m_get_key(first->value), key); first = first->next)
187         { }
188         iterator it = iterator(first,this);
189         return it;
190     }
191     // 返回容器中key值节点的个数
192     size_t count(const key_type& key) const
193     {
194         const size_t n = m_hash(key) % m_buckets.size();
195         size_t result = 0;
196         for (const Node* cur = m_buckets[n]; cur; cur = cur->next)
197         {
198             if (m_equals(m_get_key(cur->value), key))
199             {
200                 ++result;
201             }
202         }
203         return result;
204     }
205     // 查找obj,如果找到就返回,如果找不到就插入obj
206     reference find_or_insert(const value_type& obj)
207     {
208         resize(m_num_elements + 1);
209         size_t n = get_bkt_num(obj,m_buckets.size());
210         Node* first = m_buckets[n];
211 
212         for (Node* cur = first; cur; cur = cur->next)
213         {
214             if (m_equals(m_get_key(cur->value), m_get_key(obj)))
215             {
216                 return cur->value;
217             }
218         }
219 
220         Node* tmp = new Node(obj);
221         tmp->next = first;
222         m_buckets[n] = tmp;
223         ++m_num_elements;
224         return tmp->value;
225     }
226 
227     // 删除所有key值节点,返回key值节点的个数
228     size_t erase(const key_type& key)
229     {
230         const size_t n = m_hash(key) % m_buckets.size();
231         Node* first = m_buckets[n];
232         size_t erased = 0;
233 
234         if (first) 
235         {
236             Node* cur = first;
237             Node* next = cur->next;
238             while (next) 
239             {
240                 if (m_equals(m_get_key(next->value), key)) 
241                 {
242                     cur->next = next->next;
243                     delete next;
244                     next = cur->next;
245                     ++erased;
246                     --m_num_elements;
247                 }
248                 else 
249                 {
250                     cur = next;
251                     next = cur->next;
252                 }
253             }
254             if (m_equals(m_get_key(first->value), key)) 
255             {
256                 m_buckets[n] = first->next;
257                 delete first;
258                 ++erased;
259                 --m_num_elements;
260             }
261         }
262         return erased;
263     }
264     // 删除迭代器
265     void erase(const iterator& it)
266     {
267         Node* p = it.m_curNode;
268         if (p) 
269         {
270             const size_t n = get_bkt_num(p->value,m_buckets.size());
271             Node* cur = m_buckets[n];
272             if (cur == p) 
273             {
274                 m_buckets[n] = cur->next;
275                 delete cur;
276                 --m_num_elements;
277             }
278             else 
279             {
280                 Node* next = cur->next;
281                 while (next)
282                 {
283                     if (next == p) 
284                     {
285                         cur->next = next->next;
286                         delete next;
287                         --m_num_elements;
288                         break;
289                     }
290                     else {
291                         cur = next;
292                         next = cur->next;
293                     }
294                 }
295             }
296         }
297     }
298 private:
299     // 初始化buckets
300     void initialize_buckets(size_t n)
301     {
302         // 这里桶的数量取质数,减少取余桶数量的冲突
303         const size_t nBuckets = next_prime(n); 
304         m_buckets.reserve(nBuckets);
305         // 全部初始化为空指针
306         m_buckets.insert(m_buckets.end(), nBuckets, (Node*)0);
307         m_num_elements = 0;
308     }
309     // 将hashtable的bucket拷贝到本地
310     void copy_from(const MyHashtable& hashtable)
311     {
312         m_buckets.clear();
313         m_buckets.reserve(hashtable.m_buckets.size());
314         m_buckets.insert(m_buckets.end(), hashtable.m_buckets.size(), (Node*)0);
315         for (size_t i = 0; i < hashtable.m_buckets.size(); ++i)
316         {
317             const Node* curNode = hashtable.m_buckets[i];
318             if (curNode)
319             {
320                 Node* copy = new Node(curNode->value);
321                 m_buckets[i] = copy;
322                 for (Node* next = curNode->next; next;curNode = next, next = curNode->next) 
323                 {
324                     copy->next = new Node(next->value);
325                     copy = copy->next;
326                 }
327             }
328         }
329         m_num_elements = hashtable.m_num_elements;
330     }
331     // 释放所有节点
332     void clear()
333     {
334         for (size_t i = 0; i < m_buckets.size(); ++i)
335         {
336             Node* curNode = m_buckets[i];
337             while (curNode != 0) 
338             {
339                 Node* next = curNode->next;
340                 delete curNode;
341                 curNode = next;
342             }
343             m_buckets[i] = 0;
344         }
345         m_num_elements = 0;
346     }
347     // 判读bucket的size是否够用,不够重新申请个vector
348     void  resize(size_t num_elements_hint)
349     {
350         const size_t old_n = m_buckets.size();
351         if (num_elements_hint > old_n)
352         {
353             const size_t n = next_prime(num_elements_hint);
354             if (n > old_n)
355             {
356                 std::vector<Node*> tmp(n, (Node*)(0));
357                  for (size_t bucket = 0; bucket < old_n; ++bucket)
358                  {
359                     Node* first = m_buckets[bucket];
360                     while (first)
361                     {
362                       size_t new_bucket = get_bkt_num(first->value, n);
363                       m_buckets[bucket] = first->next;
364                       first->next = tmp[new_bucket];
365                       tmp[new_bucket] = first;
366                       first = m_buckets[bucket];
367                     }
368                   }
369                   m_buckets.swap(tmp);
370             }
371         }
372     }
373     // 非multi插入
374     std::pair<iterator, bool> insert_unique_noresize(const value_type& obj)
375     {
376         const size_t n = get_bkt_num(obj, m_buckets.size());
377         Node* first = m_buckets[n];
378         for (Node* curNode = first; curNode; curNode = curNode->next)
379         {
380             if (m_equals(m_get_key(curNode->value), m_get_key(obj)))
381             {
382                 return std::pair<iterator, bool>(iterator(curNode, this), false);
383             }
384         }
385         Node* tmp = new Node(obj);
386         tmp->next = first;
387         m_buckets[n] = tmp;
388         ++m_num_elements;
389         return std::pair<iterator, bool>(iterator(tmp, this), true);
390     }
391     // multi参入
392     iterator insert_equal_noresize(const value_type& obj)
393     {
394         const size_t n = get_bkt_num(obj, m_buckets.size());
395         Node* first = m_buckets[n];
396 
397         for (Node* curNode = first; curNode; curNode = curNode->next)
398         {
399             if (m_equals(m_get_key(curNode->value), m_get_key(obj)))
400             {
401                 Node*  tmp = new Node(obj);
402                 tmp->next = curNode->next;
403                 curNode->next = tmp;
404                 ++m_num_elements;
405                 return iterator(tmp, this);
406             }
407         }
408         Node* tmp = new Node(obj);
409         tmp->next = first;
410         m_buckets[n] = tmp;
411         ++m_num_elements;
412         return iterator(tmp, this);
413     }
414 private:
415     Hash                                m_hash;
416     Equal                                m_equals;
417     ExtractKey                            m_get_key;
418     std::vector<Hashtable_node<Value>*> m_buckets;
419     size_t                                m_num_elements;
420 };
421 #endif//__MY_HASH_TABLE_H_

 

posted @ 2021-02-25 20:40  ho966  阅读(90)  评论(0编辑  收藏  举报