hashtable ~

开链法:在每个表格元素中维护一个list,冲突元素添加到list中;

其他方法可以参考:index

一、hashtable的桶和节点

桶为vector,每个桶的节点指向一个list;

//hash table 的基本结点, 链表形式
template<class Value>
class _hashtable_node{
public:
    _hashtable_node* next; 
    Value val;
};

二、hashtable的迭代器

定义:

//hash table 的迭代器
template <class Value, class Key, class HashFcn, class ExtractKey, class EqualKey>
struct _hashtable_iterator{
    typedef hashtable<Value,Key,HashFcn,ExtractKey,EqualKey> hashtable;
    typedef _hashtable_iterator<Value,Key,HashFcn,ExtractKey,EqualKey> iterator;
    typedef _hashtable_iterator<Value,Key,HashFcn,ExtractKey,EqualKey> const_iterator;
    typedef _hashtable_node<Value> node;
    
    typedef std::forward_iterator_tag iterator_category;
    typedef Value value_type;
    typedef ptrdiff_t difference_type;
    typedef size_t size_type;
    typedef Value& reference;
    typedef Value* pointer;

    node *cur;              // 迭代器目前所指节点
    hashtable* ht;           // 保持与容器的关联关系

    _hashtable_iterator(node *n, hashtable *tab) : cur(n), ht(tab) {}
    _hashtable_iterator() {}
    reference operator*() const { return cur->val; }
    pointer &operator->() const { return &(operator*()); }
    iterator &operator++();
    iterator operator++(int);
    bool operator==(const iterator &it) const { return cur == it->cur; }
    bool operator!=(const iterator &it) const { return cur != it->cur; }
};

迭代器自增操作:(无自减操作 )

template <class Value, class Key, class HashFcn, class ExtractKey, class EqualKey>
_hashtable_iterator<Value, Key, HashFcn, ExtractKey, EqualKey> &
_hashtable_iterator<Value, Key, HashFcn, ExtractKey, EqualKey>::operator++() {
    const node *old = cur;
    cur = cur->next;
    if( !cur ){    // 如果cur存在,则结果就为cur,否则需要去下一个桶
        size_type loc = ht->bkt_num(old->val);          // 找到当前桶
        while(!cur && ++loc < ht->bucket_count())           // 找到下一个有元素的桶
            cur = ht->buckets[loc];
    }
    return *this;
}

template <class Value, class Key, class HashFcn, class ExtractKey, class EqualKey>
_hashtable_iterator<Value, Key, HashFcn, ExtractKey, EqualKey>
_hashtable_iterator<Value, Key, HashFcn, ExtractKey, EqualKey>::operator++(int) {
    iterator tmp = *this;
    ++ (*this);
    return tmp;
}

三、hashtable的数据结构

buckets桶通过vector实现,每个vector中指向_hashtable_node形成的链表
定义:

// 模板参数:Value(实值类型)、Key(键值类型)、HashFcn(散列函数类型)
// ExtractKey(从节点中取出键值的方法,为函数或仿函数)、EqualKey(判断键值是否相同,为函数或仿函数)
// Alloc(空间配置器,这里为了简化,没有使用该参数)
template <class Value, class Key, class HashFcn, class ExtractKey, class EqualKey>
class hashtable{
public:
    typedef HashFcn hasher;          
    typedef EqualKey key_equal;        
    typedef size_t size_type;
    typedef Value value_type;
    typedef Key key_type;
    typedef _hashtable_iterator<Value, Key, HashFcn, ExtractKey, EqualKey> iterator;
    typedef _hashtable_iterator<Value, Key, HashFcn, ExtractKey, EqualKey> const_iterator;
private:
    hasher hash;                              // 散列函数
    key_equal equals;                          // 比较函数
    ExtractKey get_key;                          // 获取键的函数

    typedef _hashtable_node<Value> node;

    std::vector<node *> buckets;                // 桶的数据内容
    size_type num_elements;
    void initialize_buckets(size_type);
    size_type bkt_num(const value_type &obj, size_type n) const{
        return bkt_num_key(get_key(obj), n);
    }

    size_type bkt_num(const value_type &obj) const{
        return bkt_num_key(get_key(obj));
    }
    size_type bkt_num_key(const key_type &key, size_type n) const{
        return hash(key) % n;
    }
    size_type bkt_num_key(const key_type &key) const{
        return bkt_num_key(key, bucket_count());
    }

    void resize(size_type new_size);

public:
    hashtable(size_type n, const HashFcn &hf, const key_equal &eql)
            : hash(hf), equals(eql), get_key(ExtractKey()), num_elements(0){
        initialize_buckets(n);
    }

    size_type bucket_count() const { return buckets.size(); }
    size_type max_bucket_count() const { return _stl_prime_list[_stl_num_primes - 1];}

    node *new_node(const value_type &obj){
        node *tmp = new node();
        tmp->next = 0;
        tmp->val = obj;
        return tmp;
    }

    void delete_node(node *n){
        delete &(n->val);
        delete n;
    }

public:
    std::pair<iterator, bool> insert_unique(const value_type &obj);
    std::pair<iterator, bool> insert_unique_noresize(const value_type &obj);

    iterator insert_equal(const value_type &obj);
    iterator insert_equal_noresize(const value_type &obj);

    iterator find(const key_type &obj);
    size_type count(const key_type &obj);
    size_type size() const { return num_elements; }
    int elems_in_buckets(const int i);
};

hashtable的大小选择,使用28个质数作为大小,要扩展时,选择“最接近某数并大于某数”的质数(为了减少哈希冲突);

static const int _stl_num_primes = 28;  // 28个质数
static const unsigned long _stl_prime_list[_stl_num_primes] =
{
        53,         97,           193,         389,       769,
        1543,       3079,         6151,        12289,     24593,
        49157,      98317,        196613,      393241,    786433,
        1572869,    3145739,      6291469,     12582917,  25165843,
        50331653,   100663319,    201326611,   402653189, 805306457,
        1610612741, 3221225473ul, 4294967291ul
};
/ 选择“最接近某数并大于某数”的质数,选择最接近且大于等于n的质数
inline unsigned long _std_next_prime(unsigned long n){
    for(int i=0; i<_stl_num_primes; ++i)
        if(n < _stl_prime_list[i])
            return _stl_prime_list[i];
    return _stl_prime_list[_stl_num_primes - 1];
}

hashtable构造函数:

hashtable(size_type n, const HashFcn &hf, const key_equal &eql)
            : hash(hf), equals(eql), get_key(ExtractKey()), num_elements(0){
     initialize_buckets(n);  //初始化大小为n
}

template <class Value, class Key, class HashFcn, class ExtractKey, class EqualKey>
void hashtable<Value, Key, HashFcn, ExtractKey, EqualKey>::initialize_buckets(size_type n) {
    const size_type bucket_n = _std_next_prime(n); // 找到第一个大于等于n的质数
    buckets.reserve(bucket_n);                  // 分配空间
    buckets.insert(buckets.end(), bucket_n, (node *)0);          // 全部设为0
    num_elements = 0;
}

四、插入操作和表格重整

插入前需要判断是否需要表格重整(元素个数等于桶个数是需要)。
插入代码:

template <class Value, class Key, class HashFcn, class ExtractKey, class EqualKey>
std::pair<typename hashtable<Value, Key, HashFcn, ExtractKey, EqualKey>::iterator, bool>
hashtable<Value, Key, HashFcn, ExtractKey, EqualKey>::insert_unique(const value_type &obj) {
    resize(num_elements + 1);                // 表格重整
    return insert_unique_noresize(obj);          // 插入obj(不允许重复)
}

template <class Value, class Key, class HashFcn, class ExtractKey, class EqualKey>
std::pair<typename hashtable<Value, Key, HashFcn, ExtractKey, EqualKey>::iterator, bool>
hashtable<Value, Key, HashFcn, ExtractKey, EqualKey>::insert_unique_noresize(const value_type &obj) {
    const size_type new_loc = bkt_num(obj);      // 找到目标桶
    node *first = buckets[new_loc];           // 找到目标桶的头指针
    while(first){                      
        if(first->val == obj)                  // 已有obj,返回false
            return std::pair<iterator, bool>(iterator(first, this), false);
        first = first->next;
    }
    node *tmp = new_node(obj);              // 说明无重复obj,接在链表最后面,元素个数加一
    tmp->next = buckets[new_loc];
    buckets[new_loc] = tmp;
    ++ num_elements;
    return std::pair<iterator, bool>(iterator(tmp, this), true);
}

表格重整代码:

template <class Value, class Key, class HashFcn, class ExtractKey, class EqualKey>
void hashtable<Value, Key, HashFcn, ExtractKey, EqualKey>::resize(size_type new_size) {
    const size_type old_size = bucket_count();   // 获取元素个数
    if(new_size > old_size){            //  个数未达到重整条件则直接返回
        const size_type n = _std_next_prime(old_size);         // 获得要调整到的大小
        std::vector<node *> tmp (n, (node *)0);          // 创建一个更大的新桶
        for(size_type i = 0; i < old_size; ++ i){    // 将原桶中的所有值重新哈希插入到新桶之中
            node *first = buckets[i];
            while(first) {
                size_type new_loc = bkt_num(first->val, new_size);
                buckets[i] = first->next;
                first->next = tmp[new_loc];
                tmp[new_loc] = first;
                first = buckets[i];
            }
        }
        buckets.swap(tmp);            // 交换两个桶,使得hashtable使用新桶
    }
}

获取元素所在桶:

size_type bkt_num(const value_type &obj, size_type n) const{
    return bkt_num_key(get_key(obj), n);
}

size_type bkt_num(const value_type &obj) const{
    return bkt_num_key(get_key(obj));
}
size_type bkt_num_key(const key_type &key, size_type n) const{
    return hash(key) % n;             // 真正实现~
}
size_type bkt_num_key(const key_type &key) const{
    return bkt_num_key(key, bucket_count());
}

查找

template <class Value, class Key, class HashFcn, class ExtractKey, class EqualKey>
typename hashtable<Value, Key, HashFcn, ExtractKey, EqualKey>::iterator
hashtable<Value, Key, HashFcn, ExtractKey, EqualKey>::find(const key_type &key) {
    size_type loc = bkt_num_key(key);
    node *n = buckets[loc];
    while(n){
        if(equals(get_key(n->val), key))
            break;
        n = n->next;
    }
    return iterator (n, this);
}

获取键的数目

template <class Value, class Key, class HashFcn, class ExtractKey, class EqualKey>
typename hashtable<Value, Key, HashFcn, ExtractKey, EqualKey>::size_type
hashtable<Value, Key, HashFcn, ExtractKey, EqualKey>::count(const key_type &key) {
    size_type loc = bkt_num_key(key);
    node *n = buckets[loc];
    int result = 0;
    while(n){
        if(equals(get_key(n->val), key))
            result ++;
        n = n->next;
    }
    return result;
}

五、全部代码

//
// Created by fwx on 2021/11/3.
//
#include <vector>
#include <utility>
#ifndef HASHTABLE_HASHTABLE_H
#define HASHTABLE_HASHTABLE_H

static const int _stl_num_primes = 28;
static const unsigned long _stl_prime_list[_stl_num_primes] =
{
        53,         97,           193,         389,       769,
        1543,       3079,         6151,        12289,     24593,
        49157,      98317,        196613,      393241,    786433,
        1572869,    3145739,      6291469,     12582917,  25165843,
        50331653,   100663319,    201326611,   402653189, 805306457,
        1610612741, 3221225473ul, 4294967291ul
};

inline unsigned long _std_next_prime(unsigned long n){
    for(int i=0; i<_stl_num_primes; ++i)
        if(n < _stl_prime_list[i])
            return _stl_prime_list[i];
    return _stl_prime_list[_stl_num_primes - 1];
}

//hash table 的基本结点
template<class Value>
class _hashtable_node{
public:
    _hashtable_node* next;
    Value val;
};

template <class Value, class Key, class HashFcn, class ExtractKey, class EqualKey>
class hashtable;

//hash table 的迭代器
template <class Value, class Key, class HashFcn, class ExtractKey, class EqualKey>
struct _hashtable_iterator{
    typedef hashtable<Value,Key,HashFcn,ExtractKey,EqualKey> hashtable;
    typedef _hashtable_iterator<Value,Key,HashFcn,ExtractKey,EqualKey> iterator;
    typedef _hashtable_iterator<Value,Key,HashFcn,ExtractKey,EqualKey> const_iterator;
    typedef _hashtable_node<Value> node;
    
    typedef std::forward_iterator_tag iterator_category;
    typedef Value value_type;
    typedef ptrdiff_t difference_type;
    typedef size_t size_type;
    typedef Value& reference;
    typedef Value* pointer;

    node *cur;
    hashtable* ht;

    _hashtable_iterator(node *n, hashtable *tab) : cur(n), ht(tab) {}
    _hashtable_iterator() {}
    reference operator*() const { return cur->val; }
    pointer &operator->() const { return &(operator*()); }
    iterator &operator++();
    iterator operator++(int);
    bool operator==(const iterator &it) const { return cur == it->cur; }
    bool operator!=(const iterator &it) const { return cur != it->cur; }
};


template <class Value, class Key, class HashFcn, class ExtractKey, class EqualKey>
class hashtable{
public:
    typedef HashFcn hasher;
    typedef EqualKey key_equal;
    typedef size_t size_type;
    typedef Value value_type;
    typedef Key key_type;
    typedef _hashtable_iterator<Value, Key, HashFcn, ExtractKey, EqualKey> iterator;
    typedef _hashtable_iterator<Value, Key, HashFcn, ExtractKey, EqualKey> const_iterator;
private:
    hasher hash;
    key_equal equals;
    ExtractKey get_key;

    typedef _hashtable_node<Value> node;

    std::vector<node *> buckets;
    size_type num_elements;
    void initialize_buckets(size_type);
    size_type bkt_num(const value_type &obj, size_type n) const{
        return bkt_num_key(get_key(obj), n);
    }

    size_type bkt_num(const value_type &obj) const{
        return bkt_num_key(get_key(obj));
    }
    size_type bkt_num_key(const key_type &key, size_type n) const{
        return hash(key) % n;
    }
    size_type bkt_num_key(const key_type &key) const{
        return bkt_num_key(key, bucket_count());
    }

    void resize(size_type new_size);

public:
    hashtable(size_type n, const HashFcn &hf, const key_equal &eql)
            : hash(hf), equals(eql), get_key(ExtractKey()), num_elements(0){
    }

    size_type bucket_count() const { return buckets.size(); }
    size_type max_bucket_count() const { return _stl_prime_list[_stl_num_primes - 1];}

    node *new_node(const value_type &obj){
        node *tmp = new node();
        tmp->next = 0;
        tmp->val = obj;
        return tmp;
    }

    void delete_node(node *n){
        delete &(n->val);
        delete n;
    }

public:
    std::pair<iterator, bool> insert_unique(const value_type &obj);
    std::pair<iterator, bool> insert_unique_noresize(const value_type &obj);

    iterator insert_equal(const value_type &obj);
    iterator insert_equal_noresize(const value_type &obj);

    iterator find(const key_type &obj);
    size_type count(const key_type &obj);
    size_type size() const { return num_elements; }
    int elems_in_buckets(const int i);
};

template <class Value, class Key, class HashFcn, class ExtractKey, class EqualKey>
int hashtable<Value, Key, HashFcn, ExtractKey, EqualKey>::elems_in_buckets(const int i) {
    int tmp = 0;
    node *n = buckets[i];
    while(n){
        tmp ++;
        n = n->next;
    }
    return tmp;
}

template <class Value, class Key, class HashFcn, class ExtractKey, class EqualKey>
_hashtable_iterator<Value, Key, HashFcn, ExtractKey, EqualKey> &
_hashtable_iterator<Value, Key, HashFcn, ExtractKey, EqualKey>::operator++() {
    const node *old = cur;
    cur = cur->next;
    if( !cur ){
        size_type loc = ht->bkt_num(old->val);
        while(!cur && ++loc < ht->bucket_count())
            cur = ht->buckets[loc];
    }
    return *this;
}

template <class Value, class Key, class HashFcn, class ExtractKey, class EqualKey>
_hashtable_iterator<Value, Key, HashFcn, ExtractKey, EqualKey>
_hashtable_iterator<Value, Key, HashFcn, ExtractKey, EqualKey>::operator++(int) {
    iterator tmp = *this;
    ++ (*this);
    return tmp;
}

template <class Value, class Key, class HashFcn, class ExtractKey, class EqualKey>
void hashtable<Value, Key, HashFcn, ExtractKey, EqualKey>::initialize_buckets(size_type n) {
    const size_type bucket_n = _std_next_prime(n);
    buckets.reserve(bucket_n);
    buckets.insert(buckets.end(), bucket_n, (node *)0);
    num_elements = 0;
}

template <class Value, class Key, class HashFcn, class ExtractKey, class EqualKey>
typename hashtable<Value, Key, HashFcn, ExtractKey, EqualKey>::iterator
hashtable<Value, Key, HashFcn, ExtractKey, EqualKey>::insert_equal(const value_type &obj) {
    resize(num_elements + 1);
    return insert_equal_noresize(obj);
}

template <class Value, class Key, class HashFcn, class ExtractKey, class EqualKey>
typename hashtable<Value, Key, HashFcn, ExtractKey, EqualKey>::iterator
hashtable<Value, Key, HashFcn, ExtractKey, EqualKey>::insert_equal_noresize(const value_type &obj) {
    const size_type new_loc = bkt_num(obj);
    node *first = buckets[new_loc];
    while(first){
        if(first->val == obj){
            node *tmp = new_node(obj);
            tmp->next = first->next;
            first->next = tmp;
            num_elements ++;
            return std::pair<iterator, bool> (iterator(first, this), true);
        }
        first = first->next;
    }
    node *tmp = new_node(obj);
    tmp->next = buckets[new_loc];
    buckets[new_loc] = tmp;
    ++ num_elements;
    return std::pair<iterator, bool>(iterator(tmp, this), true);
}

template <class Value, class Key, class HashFcn, class ExtractKey, class EqualKey>
std::pair<typename hashtable<Value, Key, HashFcn, ExtractKey, EqualKey>::iterator, bool>
hashtable<Value, Key, HashFcn, ExtractKey, EqualKey>::insert_unique(const value_type &obj) {
    resize(num_elements + 1);
    return insert_unique_noresize(obj);
}

template <class Value, class Key, class HashFcn, class ExtractKey, class EqualKey>
std::pair<typename hashtable<Value, Key, HashFcn, ExtractKey, EqualKey>::iterator, bool>
hashtable<Value, Key, HashFcn, ExtractKey, EqualKey>::insert_unique_noresize(const value_type &obj) {
    const size_type new_loc = bkt_num(obj);
    node *first = buckets[new_loc];
    while(first){
        if(first->val == obj)
            return std::pair<iterator, bool>(iterator(first, this), false);
        first = first->next;
    }
    node *tmp = new_node(obj);
    tmp->next = buckets[new_loc];
    buckets[new_loc] = tmp;
    ++ num_elements;
    return std::pair<iterator, bool>(iterator(tmp, this), true);
}

template <class Value, class Key, class HashFcn, class ExtractKey, class EqualKey>
void hashtable<Value, Key, HashFcn, ExtractKey, EqualKey>::resize(size_type new_size) {
    const size_type old_size = bucket_count();
    if(new_size > old_size){
        const size_type n = _std_next_prime(old_size);
        std::vector<node *> tmp (n, (node *)0);
        for(size_type i = 0; i < old_size; ++ i){
            node *first = buckets[i];
            while(first) {
                size_type new_loc = bkt_num(first->val, new_size);
                buckets[i] = first->next;
                first->next = tmp[new_loc];
                tmp[new_loc] = first;
                first = buckets[i];
            }
        }
        buckets.swap(tmp);
    }
}

template <class Value, class Key, class HashFcn, class ExtractKey, class EqualKey>
typename hashtable<Value, Key, HashFcn, ExtractKey, EqualKey>::iterator
hashtable<Value, Key, HashFcn, ExtractKey, EqualKey>::find(const key_type &key) {
    size_type loc = bkt_num_key(key);
    node *n = buckets[loc];
    while(n){
        if(equals(get_key(n->val), key))
            break;
        n = n->next;
    }
    return iterator (n, this);
}

template <class Value, class Key, class HashFcn, class ExtractKey, class EqualKey>
typename hashtable<Value, Key, HashFcn, ExtractKey, EqualKey>::size_type
hashtable<Value, Key, HashFcn, ExtractKey, EqualKey>::count(const key_type &key) {
    size_type loc = bkt_num_key(key);
    node *n = buckets[loc];
    int result = 0;
    while(n){
        if(equals(get_key(n->val), key))
            result ++;
        n = n->next;
    }
    return result;
}

#endif //HASHTABLE_HASHTABLE_H
posted @   fwx  阅读(37)  评论(0编辑  收藏  举报
相关博文:
阅读排行:
· 被坑几百块钱后,我竟然真的恢复了删除的微信聊天记录!
· 没有Manus邀请码?试试免邀请码的MGX或者开源的OpenManus吧
· 【自荐】一款简洁、开源的在线白板工具 Drawnix
· 园子的第一款AI主题卫衣上架——"HELLO! HOW CAN I ASSIST YOU TODAY
· Docker 太简单,K8s 太复杂?w7panel 让容器管理更轻松!
点击右上角即可分享
微信分享提示