STL标准库-一个万用的hash function

技术在于交流、沟通,本文为博主原创文章转载请注明出处并保持作品的完整性

在前面我介绍过hash的使用,本次主要介绍一下Hash Function

Hash Function即获得hash code的函数,根据其获得的hash code放到指定的bucket中,那么为了保证其hash的效率我们应尽量避免碰撞,所以hash Function所产生的hash code应足够的乱

下面介绍一个万用的hash function及其测试代码

首先我们创建一个客户类,它有三个成员变量 姓,名,年龄

class Customer
{
public:
    string mFirstName;
    string mLastName;
    string mAge;
    Customer(string firstName, string lastName, string age):mFirstName(firstName),mLastName(lastName),mAge(age){}
};

下面我们来创建hash function

复制代码
class CustomerHash
{
    public:
    std::size_t operator()(const Customer& c) const
    {
        return hash_val(c.mFirstName, c.mLastName, c.mAge);
    }

    template <typename... Types>
    size_t hash_val(const Types&... args)const
    {
        size_t seed = 0;//seed 为需要返回的hash code
        hash_value(seed, args...);//C++11 新语法 我在http://www.cnblogs.com/LearningTheLoad/p/7208680.html中有介绍
        return seed;
    }

    template <typename T, typename... Types>
    void hash_value(size_t& seed,
                         const T& firstArg,
                         const Types&... args) const
    {
        hash_combine(seed, firstArg);
        hash_value(seed, args...);
    }

    template <typename T>
    void hash_value(size_t& seed,
                         const T& val) const //参数仅剩一个时
    {
        hash_combine(seed, val);
    }

    template<typename T>
    void hash_combine(size_t& seed,
                             const T& val) const
    {
        seed ^= std::hash<T>()(val) + 0x9e3779b9 + (seed << 6) + (seed >> 2); //0x9e3779b9  黄金分割比例
    }
};
复制代码

 测试代码

复制代码
int main(int argc, char *argv[])
{
    unordered_multiset<Customer, CustomerHash> set;

    set.insert(Customer("a", "b", "1"));
    set.insert(Customer("c", "d", "2"));
    set.insert(Customer("e", "f", "3"));
    set.insert(Customer("g", "h", "4"));

    int myBucket_count = set.bucket_count();//返回有多少个篮子
    cout << set.bucket_count() << endl;

    CustomerHash hh;
    cout << "bucket postion of " << hh(Customer("a", "b", "1")) %myBucket_count << endl;//取余后 得出落在哪个篮子上
    cout << "bucket postion of " << hh(Customer("c", "d", "2")) %myBucket_count << endl;
    cout << "bucket postion of " << hh(Customer("e", "f", "3")) %myBucket_count << endl;
    cout << "bucket postion of " << hh(Customer("a", "b", "4")) %myBucket_count << endl;

    for (int i = 0; i< myBucket_count; i++)
    {//检测落在哪个篮子上
        cout << "bucket at #: " << i << "has: " << set.bucket_size(i) << endl;
    }
    return 0;
}
复制代码

 

测试结果

  

全部测试代码

复制代码
#include <iostream>
#include <unordered_set>

using namespace std;

class Customer
{
public:
    string mFirstName;
    string mLastName;
    string mAge;

    Customer(string firstName, string lastName, string age):mFirstName(firstName),mLastName(lastName),mAge(age){}

    operator ==(const Customer& c) const
    {
        return (mFirstName == c.mFirstName && mLastName == c.mLastName && mAge == c.mAge);
    }
};

class CustomerHash
{
    public:
    std::size_t operator()(const Customer& c) const
    {
        return hash_val(c.mFirstName, c.mLastName, c.mAge);
    }

    template <typename... Types>
    size_t hash_val(const Types&... args)const
    {
        size_t seed = 0;
        hash_value(seed, args...);
        return seed;
    }

    template <typename T, typename... Types>
    void hash_value(size_t& seed,
                         const T& firstArg,
                         const Types&... args) const
    {
        hash_combine(seed, firstArg);
        hash_value(seed, args...);
    }

    template <typename T>
    void hash_value(size_t& seed,
                         const T& val) const
    {
        hash_combine(seed, val);
    }

    template<typename T>
    void hash_combine(size_t& seed,
                             const T& val) const
    {
        seed ^= std::hash<T>()(val) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
    }
};

int main(int argc, char *argv[])
{
    unordered_multiset<Customer, CustomerHash> set;

    set.insert(Customer("a", "b", "1"));
    set.insert(Customer("c", "d", "2"));
    set.insert(Customer("e", "f", "3"));
    set.insert(Customer("g", "h", "4"));

    int myBucket_count = set.bucket_count();
    cout << set.bucket_count() << endl;

    CustomerHash hh;
    cout << "bucket postion of " << hh(Customer("a", "b", "1")) %myBucket_count << endl;
    cout << "bucket postion of " << hh(Customer("c", "d", "2")) %myBucket_count << endl;
    cout << "bucket postion of " << hh(Customer("e", "f", "3")) %myBucket_count << endl;
    cout << "bucket postion of " << hh(Customer("a", "b", "4")) %myBucket_count << endl;

    for (int i = 0; i< myBucket_count; i++)
    {
        cout << "bucket at #: " << i << "has: " << set.bucket_size(i) << endl;
    }
    return 0;
}
View Code
复制代码

这是一个万用的hash function ,在我们自定义hash function时就可以定义为上面的函数

参考侯捷<<STL源码剖析>> 

posted @   WangZijian  阅读(517)  评论(0)    收藏  举报
编辑推荐:
· MySQL下200GB大表备份,利用传输表空间解决停服发版表备份问题
· 记一次 .NET某固高运动卡测试 卡慢分析
· 微服务架构学习与思考:微服务拆分的原则
· 记一次 .NET某云HIS系统 CPU爆高分析
· 如果单表数据量大,只能考虑分库分表吗?
阅读排行:
· 7 个最近很火的开源项目「GitHub 热点速览」
· DeepSeekV3:写代码很强了
· 记一次 .NET某固高运动卡测试 卡慢分析
· Visual Studio 2022 v17.13新版发布:强化稳定性和安全,助力 .NET 开发提
· MySQL下200GB大表备份,利用传输表空间解决停服发版表备份问题
点击右上角即可分享
微信分享提示