std::unordered_map

std::unordered_map

  版本XcodeDefault.xctoolchain/usr/include/c++/v1

1:unorderd_map typedef

   例子:typedef std::unordered_map<std::string, int> 

    模板参数:

 1 template <class _Key, class _Tp, class _Hash = hash<_Key>, class _Pred = equal_to<_Key>,
 2           class _Alloc = allocator<pair<const _Key, _Tp> > >
 3 class _LIBCPP_TEMPLATE_VIS unordered_map
 4 {
 5 public:
 6     // types
 7     typedef _Key                                           key_type;
 8     typedef _Tp                                            mapped_type;
 9     typedef _Hash                                          hasher;
10     typedef _Pred                                          key_equal;
11     typedef _Alloc                                         allocator_type;
12     typedef pair<const key_type, mapped_type>              value_type;
13     typedef value_type&                                    reference;
14     typedef const value_type&                              const_reference;
15     static_assert((is_same<value_type, typename allocator_type::value_type>::value),
16                   "Invalid allocator::value_type");
17 
18 private:
19     typedef __hash_value_type<key_type, mapped_type>                 __value_type;
20     typedef __unordered_map_hasher<key_type, __value_type, hasher>   __hasher;
21     typedef __unordered_map_equal<key_type, __value_type, key_equal> __key_equal;
22     typedef typename __rebind_alloc_helper<allocator_traits<allocator_type>,
23                                                  __value_type>::type __allocator_type;
24 
25     typedef __hash_table<__value_type, __hasher,
26                          __key_equal,  __allocator_type>   __table;
27 
28     __table __table_;
29 
30     ......
31       
32 }
  • key_type -> _Key -> std::string
  • mapped_type -> _Tp -> int
  • hasher - > _Hash = hash<_Key>   -> hash<std::string> 
  • key_equal -> _Pred = equal_to<_Key>  -> equal_to<std::string>
  • _Alloc = allocator<pair<const _Key, _Tp> > > -> allocator<pair<const std::string, int> > 

  unorderd_map内部持有__hash_table对象,std::unordered_map<std::string, int>特化模板的_hash_table类型应该是

  __hash_table<

    pair<const std::string, int>,

    hash<std::string>,

    equal_to<std::string>,

    allocator<pair<const std::string, int> >

  >

 1 template <class _Tp, class _Hash, class _Equal, class _Alloc>
 2 class __hash_table
 3 {
 4 public:
 5     typedef _Tp    value_type;
 6     typedef _Hash  hasher;
 7     typedef _Equal key_equal;
 8     typedef _Alloc allocator_type;
 9 
10 private:
11     typedef unique_ptr<__next_pointer[], __bucket_list_deleter> __bucket_list;
12     // --- Member data begin ---
13     __bucket_list                                         __bucket_list_;
14     __compressed_pair<__first_node, __node_allocator>     __p1_;
15     __compressed_pair<size_type, hasher>                  __p2_;
16     __compressed_pair<float, key_equal>                   __p3_;
17     // --- Member data end ---
18     
19    ......
20   
21 }

  __hash_table内部持有4个局部变量,

  • __bucket_list_,__next_pointer数组,储存插入节点node,内部含有多个bucket(node节点的集合),以node节点的形式链式组织
  • __p1_,head node  --  node分配器;
  • __p2_,node总数量 -- hash key size_t计算器;每成功插入一个node,node总数量+1
  • __p3_,负载因子 -- 数据比较器;负载因子调整bucket的数量(rehash方法),数据比较器用于比较参数和bucket node中_Key是否相同(因为是bucket是链式储存,在hash key sizt_t到bucket index之后,会从bucket的头node开始,逐一比较node是否和参数相同)

  模板推导出类型后,就可以得知unorder map的几个关键要点

  • __p2_->second, hash<std::string>,提供string到hash key sizt_t的计算
  • __bucket_list_,unorderd_map的存储区
  • __p3_->first, 负载因子, rebase,决定bucket数量
  • hash key sizt_t -> bucket index, __constrain_hash
  • __p3_ -> second, equal_to<std::string>,数据的比较器

2: 散列计算器,string -> hash

  hash<std::string>, 在std::string实现。提供operator()操作符,作为计算hash数值的入口方法

 1 template <class _CharT, class _Allocator>
 2 struct _LIBCPP_TEMPLATE_VIS
 3     hash<basic_string<_CharT, char_traits<_CharT>, _Allocator> >
 4     : public unary_function<
 5           basic_string<_CharT, char_traits<_CharT>, _Allocator>, size_t>
 6 {
 7     size_t
 8     operator()(const basic_string<_CharT, char_traits<_CharT>, _Allocator>& __val) const _NOEXCEPT
 9     { return __do_string_hash(__val.data(), __val.data() + __val.size()); }
10 };

  hash<std::string>::operator()   调用 __do_string_hash

  __do_string_hash 调用 __murmur2_or_cityhash<size_t>::operator(const void* __key, _Size __len)

  __murmur2_or_cityhash<size_t>::operator(const void* __key, _Size __len) 按照字符串长度__len,分成若干计算

 1 template <class _Size>
 2 _Size
 3 __murmur2_or_cityhash<_Size, 64>::operator()(const void* __key, _Size __len)
 4 {
 5   const char* __s = static_cast<const char*>(__key);
 6   if (__len <= 32) {
 7     if (__len <= 16) {
 8       return __hash_len_0_to_16(__s, __len);
 9     } else {
10       return __hash_len_17_to_32(__s, __len);
11     }
12   } else if (__len <= 64) {
13     return __hash_len_33_to_64(__s, __len);
14   }
15 
16   // For strings over 64 bytes we hash the end first, and then as we
17   // loop we keep 56 bytes of state: v, w, x, y, and z.
18   _Size __x = __loadword<_Size>(__s + __len - 40);
19   _Size __y = __loadword<_Size>(__s + __len - 16) +
20               __loadword<_Size>(__s + __len - 56);
21   _Size __z = __hash_len_16(__loadword<_Size>(__s + __len - 48) + __len,
22                           __loadword<_Size>(__s + __len - 24));
23   pair<_Size, _Size> __v = __weak_hash_len_32_with_seeds(__s + __len - 64, __len, __z);
24   pair<_Size, _Size> __w = __weak_hash_len_32_with_seeds(__s + __len - 32, __y + __k1, __x);
25   __x = __x * __k1 + __loadword<_Size>(__s);
26 
27   // Decrease len to the nearest multiple of 64, and operate on 64-byte chunks.
28   __len = (__len - 1) & ~static_cast<_Size>(63);
29   do {
30     __x = __rotate(__x + __y + __v.first + __loadword<_Size>(__s + 8), 37) * __k1;
31     __y = __rotate(__y + __v.second + __loadword<_Size>(__s + 48), 42) * __k1;
32     __x ^= __w.second;
33     __y += __v.first + __loadword<_Size>(__s + 40);
34     __z = __rotate(__z + __w.first, 33) * __k1;
35     __v = __weak_hash_len_32_with_seeds(__s, __v.second * __k1, __x + __w.first);
36     __w = __weak_hash_len_32_with_seeds(__s + 32, __z + __w.second,
37                                         __y + __loadword<_Size>(__s + 16));
38     std::swap(__z, __x);
39     __s += 64;
40     __len -= 64;
41   } while (__len != 0);
42   return __hash_len_16(
43       __hash_len_16(__v.first, __w.first) + __shift_mix(__y) * __k1 + __z,
44       __hash_len_16(__v.second, __w.second) + __x);
45 }

  举例,__hash_len_0_to_16

 1 static _Size __hash_len_0_to_16(const char* __s, _Size __len)
 2      _LIBCPP_DISABLE_UBSAN_UNSIGNED_INTEGER_CHECK
 3   {
 4     if (__len > 8) {
 5       const _Size __a = __loadword<_Size>(__s);
 6       const _Size __b = __loadword<_Size>(__s + __len - 8);
 7       return __hash_len_16(__a, __rotate_by_at_least_1(__b + __len, __len)) ^ __b;
 8     }
 9     if (__len >= 4) {
10       const uint32_t __a = __loadword<uint32_t>(__s);
11       const uint32_t __b = __loadword<uint32_t>(__s + __len - 4);
12       return __hash_len_16(__len + (__a << 3), __b);
13     }
14     if (__len > 0) {
15       const unsigned char __a = __s[0];
16       const unsigned char __b = __s[__len >> 1];
17       const unsigned char __c = __s[__len - 1];
18       const uint32_t __y = static_cast<uint32_t>(__a) +
19                            (static_cast<uint32_t>(__b) << 8);
20       const uint32_t __z = __len + (static_cast<uint32_t>(__c) << 2);
21       return __shift_mix(__y * __k2 ^ __z * __k3) * __k2;
22     }
23     return __k2;
24   }

   同理,其余类型type均实现hash<type>::operator()方法

 

3:构造bucket

  __p3_->first, 负载因子, rehash,决定bucket数量

 1 template <class _Tp, class _Hash, class _Equal, class _Alloc>
 2 _LIBCPP_INLINE_VISIBILITY
 3 typename __hash_table<_Tp, _Hash, _Equal, _Alloc>::__next_pointer
 4 __hash_table<_Tp, _Hash, _Equal, _Alloc>::__node_insert_unique_prepare(
 5     size_t __hash, value_type& __value)
 6 {
 7     size_type __bc = bucket_count();
 8 
 9     if (__bc != 0)
10     {
11         size_t __chash = __constrain_hash(__hash, __bc);
12         __next_pointer __ndptr = __bucket_list_[__chash];
13         if (__ndptr != nullptr)
14         {
15             for (__ndptr = __ndptr->__next_; __ndptr != nullptr &&
16                                              __constrain_hash(__ndptr->__hash(), __bc) == __chash;
17                                                      __ndptr = __ndptr->__next_)
18             {
19                 if (key_eq()(__ndptr->__upcast()->__value_, __value))
20                     return __ndptr;
21             }
22         }
23     }
24     if (size()+1 > __bc * max_load_factor() || __bc == 0)
25     {
26         rehash(_VSTD::max<size_type>(2 * __bc + !__is_hash_power2(__bc),
27                                      size_type(ceil(float(size() + 1) / max_load_factor()))));
28     }
29     return nullptr;
30 }

  插入node时,如果满足公式

  size()+1 > __bc * max_load_factor() || __bc == 0,则调用rehash方法,构造

  _VSTD::max<size_type>(2 * __bc + !__is_hash_power2(__bc), size_type(ceil(float(size() + 1) / max_load_factor())))

  hash_table默认构造函数,提供的负载因子是1,rehash传入的参数为1  

 1 template <class _Tp, class _Hash, class _Equal, class _Alloc>
 2 void
 3 __hash_table<_Tp, _Hash, _Equal, _Alloc>::rehash(size_type __n)
 4 _LIBCPP_DISABLE_UBSAN_UNSIGNED_INTEGER_CHECK
 5 {
 6     if (__n == 1)
 7         __n = 2;
 8     else if (__n & (__n - 1))
 9         __n = __next_prime(__n);
10     size_type __bc = bucket_count();
11     if (__n > __bc)
12         __rehash(__n);
13     else if (__n < __bc)
14     {
15         __n = _VSTD::max<size_type>
16               (
17                   __n,
18                   __is_hash_power2(__bc) ? __next_hash_pow2(size_t(ceil(float(size()) / max_load_factor()))) :
19                                            __next_prime(size_t(ceil(float(size()) / max_load_factor())))
20               );
21         if (__n < __bc)
22             __rehash(__n);
23     }
24 }

  rehash内部接收到__n == 1,调整__n = 2。然后调用__rehash方法创建2个bucket

 1 template <class _Tp, class _Hash, class _Equal, class _Alloc>
 2 void
 3 __hash_table<_Tp, _Hash, _Equal, _Alloc>::__rehash(size_type __nbc)
 4 {
 5 #if _LIBCPP_DEBUG_LEVEL >= 2
 6     __get_db()->__invalidate_all(this);
 7 #endif  // _LIBCPP_DEBUG_LEVEL >= 2
 8     __pointer_allocator& __npa = __bucket_list_.get_deleter().__alloc();
 9     __bucket_list_.reset(__nbc > 0 ?
10                       __pointer_alloc_traits::allocate(__npa, __nbc) : nullptr);
11     __bucket_list_.get_deleter().size() = __nbc;
12     if (__nbc > 0)
13     {
14         for (size_type __i = 0; __i < __nbc; ++__i)
15             __bucket_list_[__i] = nullptr;
16         __next_pointer __pp = __p1_.first().__ptr();
17         __next_pointer __cp = __pp->__next_;
18         if (__cp != nullptr)
19         {
20             size_type __chash = __constrain_hash(__cp->__hash(), __nbc);
21             __bucket_list_[__chash] = __pp;
22             size_type __phash = __chash;
23             for (__pp = __cp, __cp = __cp->__next_; __cp != nullptr;
24                                                            __cp = __pp->__next_)
25             {
26                 __chash = __constrain_hash(__cp->__hash(), __nbc);
27                 if (__chash == __phash)
28                     __pp = __cp;
29                 else
30                 {
31                     if (__bucket_list_[__chash] == nullptr)
32                     {
33                         __bucket_list_[__chash] = __pp;
34                         __pp = __cp;
35                         __phash = __chash;
36                     }
37                     else
38                     {
39                         __next_pointer __np = __cp;
40                         for (; __np->__next_ != nullptr &&
41                                key_eq()(__cp->__upcast()->__value_,
42                                         __np->__next_->__upcast()->__value_);
43                                                            __np = __np->__next_)
44                             ;
45                         __pp->__next_ = __np->__next_;
46                         __np->__next_ = __bucket_list_[__chash]->__next_;
47                         __bucket_list_[__chash]->__next_ = __cp;
48 
49                     }
50                 }
51             }
52         }
53     }
54 }

 

4:插入操作(碰撞冲突,二次探测)

 hash key size_t定位到bucket index的计算方法  

1 inline _LIBCPP_INLINE_VISIBILITY
2 size_t
3 __constrain_hash(size_t __h, size_t __bc)
4 {
5     return !(__bc & (__bc - 1)) ? __h & (__bc - 1) :
6         (__h < __bc ? __h : __h % __bc);
7 }

  第一个参数为hash值,第二个参数为bucket数量

  !(__bc & (__bc - 1))  ->  满足表达式为true,则__bc为2的N次方

  __h & (__bc - 1) -> __bc - 1,形如0x111,直接取__h作为存储地址

  (__h < __bc ? __h : __h % __bc) -> 如果__h < __bc,则直接获取__h 作为存储地址;否则,取模运算为存储地址,__h % __bc(除留余数法)

  插入bucket之前,先探测bucket内是否有相同散列地址。

  bucket内部采用链表存储node,从头节点开始,顺序比较hash值,如果hash值相同,再使用key_eq比较具体数值

 1 // Prepare the container for an insertion of the value __value with the hash
 2 // __hash. This does a lookup into the container to see if __value is already
 3 // present, and performs a rehash if necessary. Returns a pointer to the
 4 // existing element if it exists, otherwise nullptr.
 5 //
 6 // Note that this function does forward exceptions if key_eq() throws, and never
 7 // mutates __value or actually inserts into the map.
 8 template <class _Tp, class _Hash, class _Equal, class _Alloc>
 9 _LIBCPP_INLINE_VISIBILITY
10 typename __hash_table<_Tp, _Hash, _Equal, _Alloc>::__next_pointer
11 __hash_table<_Tp, _Hash, _Equal, _Alloc>::__node_insert_unique_prepare(
12     size_t __hash, value_type& __value)
13 {
14     size_type __bc = bucket_count();
15 
16     if (__bc != 0)
17     {
18         size_t __chash = __constrain_hash(__hash, __bc);
19         __next_pointer __ndptr = __bucket_list_[__chash];
20         if (__ndptr != nullptr)
21         {
22             for (__ndptr = __ndptr->__next_; __ndptr != nullptr &&
23                                              __constrain_hash(__ndptr->__hash(), __bc) == __chash;
24                                                      __ndptr = __ndptr->__next_)
25             {
26                 if (key_eq()(__ndptr->__upcast()->__value_, __value))
27                     return __ndptr;
28             }
29         }
30     }
31     if (size()+1 > __bc * max_load_factor() || __bc == 0)
32     {
33         rehash(_VSTD::max<size_type>(2 * __bc + !__is_hash_power2(__bc),
34                                      size_type(ceil(float(size() + 1) / max_load_factor()))));
35     }
36     return nullptr;
37 }

   如果未发现相同hash值,则插入节点

 1 // Insert the node __nd into the container by pushing it into the right bucket,
 2 // and updating size(). Assumes that __nd->__hash is up-to-date, and that
 3 // rehashing has already occurred and that no element with the same key exists
 4 // in the map.
 5 template <class _Tp, class _Hash, class _Equal, class _Alloc>
 6 _LIBCPP_INLINE_VISIBILITY
 7 void
 8 __hash_table<_Tp, _Hash, _Equal, _Alloc>::__node_insert_unique_perform(
 9     __node_pointer __nd) _NOEXCEPT
10 {
11     size_type __bc = bucket_count();
12     size_t __chash = __constrain_hash(__nd->__hash(), __bc);
13     // insert_after __bucket_list_[__chash], or __first_node if bucket is null
14     __next_pointer __pn = __bucket_list_[__chash];
15     if (__pn == nullptr)
16     {
17         __pn =__p1_.first().__ptr();
18         __nd->__next_ = __pn->__next_;
19         __pn->__next_ = __nd->__ptr();
20         // fix up __bucket_list_
21         __bucket_list_[__chash] = __pn;
22         if (__nd->__next_ != nullptr)
23             __bucket_list_[__constrain_hash(__nd->__next_->__hash(), __bc)] = __nd->__ptr();
24     }
25     else
26     {
27         __nd->__next_ = __pn->__next_;
28         __pn->__next_ = __nd->__ptr();
29     }
30     ++size();
31 }

  将新建节点插入bucket头部

  __nd->__next_ = __pn->__next_;

  __pn->__next_ = __nd->__ptr();   

 

5:查找操作

  __p3_ -> second, equal_to<std::string>,数据的比较器 

 1 template <class _Tp, class _Hash, class _Equal, class _Alloc>
 2 template <class _Key>
 3 typename __hash_table<_Tp, _Hash, _Equal, _Alloc>::iterator
 4 __hash_table<_Tp, _Hash, _Equal, _Alloc>::find(const _Key& __k)
 5 {
 6     size_t __hash = hash_function()(__k);
 7     size_type __bc = bucket_count();
 8     if (__bc != 0)
 9     {
10         size_t __chash = __constrain_hash(__hash, __bc);
11         __next_pointer __nd = __bucket_list_[__chash];
12         if (__nd != nullptr)
13         {
14             for (__nd = __nd->__next_; __nd != nullptr &&
15                 (__nd->__hash() == __hash
16                   || __constrain_hash(__nd->__hash(), __bc) == __chash);
17                                                            __nd = __nd->__next_)
18             {
19                 if ((__nd->__hash() == __hash)
20                     && key_eq()(__nd->__upcast()->__value_, __k))
21 #if _LIBCPP_DEBUG_LEVEL >= 2
22                     return iterator(__nd, this);
23 #else
24                     return iterator(__nd);
25 #endif
26             }
27         }
28     }
29     return end();
30 }

  查找方法:

  • 生成入参hash key size_t : size_t __hash= hash_function()(__k); 
  • 获取bucket数量:size_type __bc = bucket_count();
  • 生成bucket index:size_t __chash = __constrain_hash(__hash, __bc);
  • 获取bucket头节点指针:__next_pointer __nd = __bucket_list_[__chash];
  • 循环比较node hash key size_t 和 入参hash key size_t
  • 比较入参 和 node key:key_eq()(__nd->__upcast()->__value_, __k)
  • 返回结果

 

posted on 2020-08-12 21:16  炽离  阅读(1791)  评论(0编辑  收藏  举报

导航