无聊写的HashTable~

写了个HashTable的模板类, 因为C++不是很专业所以写的有点挫, 模仿SGI STL的HashTable写的。

SGI STL用的vector, 我木有用, 主要是想锻炼下。~ 

 

在数据增长到千万级的时候,内存占用居然达到了坑爹的1.2G, 我一直以为是内存泄露,结果实测以后发现确实就是要占用这么多空间~。。

希望能找到什么优化方法。~

 

模板类:

/************************************************************************
* @file      : HashTable.h
* @author    : kevin <zhangtaopy-kevin@hotmail.com>
* @date      : 2012/9/6 9:52:25
* @brief     : 
*
* $Id: $
/************************************************************************/
#ifndef __HASHTABLE_H__
#define __HASHTABLE_H__

#include <stdlib.h>
#include <memory.h>
#include <Windows.h>

// -------------------------------------------------------------------------
static const int number_of_primes = 28;
static const unsigned long prime_list[number_of_primes] = 
{
	53,				97,				193,			389,			769,
	1543,			3079,			6151,			12289,			24593,
	49157,			98317,			196613,			393241,			786433,
	1572869,		3145739,		6291469,		12582917,		25165843,
	50331653,		100663319,		201326611,		402653189,		805306457,
	1610612741,		3221225473ul,	4294967291ul	
};

template <class Value>
struct __hash_node
{
	__hash_node* next;
	Value val;
};

template <class Value, class Hashfunc, class EqualKey>
class HashTable
{
public:
	typedef Hashfunc hasher;
	typedef EqualKey key_equal;
	typedef __hash_node<Value> node;

public:
	~HashTable();
	HashTable(unsigned long ulSize);
	HashTable(unsigned long ulSize, const Hashfunc& hf, const EqualKey& eq);
	void init_bucket(unsigned long ulSize);
	void resize(bool isUnique);
	int	 insert_unique(const Value& obj);
	int	 insert_equal(const Value& obj);
	int  insert_unique_noresize(const Value& obj);
	int	 insert_equal_noresize(const Value& obj);
	int  find(const Value& obj);

private:
	unsigned long	m_ulFullSize;
	unsigned long	m_ulCurSize;
	hasher			m_hash;
	key_equal		m_equals;
	node**			m_bucket;
};


unsigned long LowerBound(const unsigned long* primelist, int n, unsigned long key)
{
	int start = 0;
	int end = n-1;

	if(n == 0)
		return -1;

	while(start <= end)
	{
		int mid = (end - start)/2 + start;
		if(key > primelist[mid])
		{
			start = mid + 1;
		}
		else if(key < primelist[mid])
		{
			end = mid - 1;
		}
		else
		{
			return primelist[mid];
		}
	}
	if(end != n-1)
		return primelist[end+1];
	else
		return primelist[n];
}


template <class V, class Hf, class Eq>
HashTable<V,Hf,Eq>::HashTable(unsigned long ulSize, const Hf& hf, const Eq& eq):m_ulCurSize(0),m_bucket(NULL),m_ulFullSize(0)
{
	init_bucket(ulSize);
}

template <class V, class Hf, class Eq>
HashTable<V,Hf,Eq>::HashTable(unsigned long ulSize):m_ulCurSize(0),m_bucket(NULL),m_ulFullSize(0)
{
	init_bucket(ulSize);
}

template <class V, class Hf, class Eq>
HashTable<V,Hf,Eq>::~HashTable()
{
	if(m_bucket != NULL)
		free(m_bucket);
}


template <class V>
__hash_node<V>* create_node(const V& obj)
{
		__hash_node<V>* p = new __hash_node<V>;
		p->next = NULL;
		p->val = obj;

		return p;
}


template <class V>
void destory_node(__hash_node<V>* p)
{
	delete p;
}


template <class V, class Hf, class Eq>
void HashTable<V,Hf,Eq>::init_bucket(unsigned long ulSize)
{
	m_ulFullSize = LowerBound(prime_list, number_of_primes, ulSize);
	m_bucket = (node**)malloc(sizeof(node*) * m_ulFullSize);
	memset(m_bucket,	0,	sizeof(node*) * m_ulFullSize);
}


template <class V, class Hf, class Eq>
void HashTable<V,Hf,Eq>::resize(bool isUnique)
{
	if(m_ulCurSize > m_ulFullSize && m_ulCurSize < prime_list[number_of_primes-1])
	{
		node**			oldbucket = NULL;
		node			*p = NULL,*q = NULL;
		unsigned long	ulSize = 0;

		printf("resize() cursize:%ld , fullsize:%ld,\n", m_ulCurSize, m_ulFullSize);
		oldbucket = m_bucket;
		ulSize = m_ulFullSize;
		m_ulCurSize = 0;
		m_ulFullSize = LowerBound(prime_list, number_of_primes, m_ulFullSize + 1);
		m_bucket = (node**)malloc(sizeof(node*) * m_ulFullSize);
		memset(m_bucket,	0,	sizeof(node*) * m_ulFullSize);
		//将原来的所有元素移到新的bucket中
		for(unsigned long i=0; i< ulSize; i++)
		{
			if(oldbucket[i] == NULL)
				continue;
			q = oldbucket[i];
			do
			{
				if(isUnique)
					insert_unique_noresize(q->val);
				else
					insert_equal_noresize(q->val);
				p = q;
				q = q->next;
				destory_node(p);
			}while(q != NULL);
		}
		if(oldbucket != NULL)
			free(oldbucket);
	}
}


template <class V, class Hf, class Eq>
int HashTable<V,Hf,Eq>::insert_unique_noresize(const V& obj)
{
	unsigned long key = m_hash(obj, m_ulFullSize);
	node *p = NULL, *q = NULL;
	bool isExist = false;

	if(m_bucket[key] == NULL)
	{
		m_bucket[key] = create_node(obj);
	}
	else
	{
		q = m_bucket[key];
		do
		{
			if(m_equals(q->val, obj))
			{
				//已经存在相同的节点了。
				isExist = true;
				break;
			}
			p = q;
			q = q->next;
		}while(q != NULL);

		if(!isExist)
		{
			p->next = create_node(obj);
		}
	}

	m_ulCurSize ++;
	return 1;
}

template <class V, class Hf, class Eq>
int HashTable<V,Hf,Eq>::insert_unique(const V& obj)
{
	insert_unique_noresize(obj);
	resize(true);
	return 1;
}

template <class V, class Hf, class Eq>
int HashTable<V,Hf,Eq>::insert_equal_noresize(const V& obj)
{
	unsigned long key = m_hash(obj, m_ulFullSize);
	node *p = NULL, *q = NULL;

	p = create_node(obj);
	if(m_bucket[key] == NULL)
	{
		m_bucket[key] = p;
	}
	else
	{
		p->next = m_bucket[key]->next;
		m_bucket[key]->next = p;
	}

	m_ulCurSize ++;
	return 1;
}

template <class V, class Hf, class Eq>
int HashTable<V,Hf,Eq>::insert_equal(const V& obj)
{
	insert_equal_noresize(obj);
	resize(false);
	return 1;
}

template <class V, class Hf, class Eq>
int HashTable<V,Hf,Eq>::find(const V& obj)
{
	unsigned long key = m_hash(obj, m_ulFullSize);
	node *p = NULL;

	if(m_bucket[key] != NULL)
	{
		p = m_bucket[key];
		do
		{
			if(m_equals(p->val, obj))
				return 1;
			p = p->next;
		}while(p != NULL);
	}
	else
		return 0;

	return 0;

}

#endif /* __HASHTABLE_H__ */

  

 

测试代码:

 

#include "stdafx.h"
#include "HashTable.h"

struct st 
{
	int key;
	int value;
};

struct st_equal
{
	bool operator() (const st& first, const st& second) const
	{
		return first.value == second.value;
	}
};

struct st_hash
{
	unsigned long operator() (const st& first, const unsigned long n) const
	{
		return first.value % n;
	}
};

int _tmain(int argc, _TCHAR* argv[])
{
	HashTable<st, st_hash, st_equal> myHashTable(10);
	st tmp;
	int count = 0;
	for(unsigned long i = 0; i<4294967291; i++)
	{
		tmp.key = 10;
		tmp.value = i;
		myHashTable.insert_unique(tmp);
	}
	tmp.value = 10;
	myHashTable.find(tmp);
	tmp.value = 64;
	myHashTable.find(tmp);
	return 0;
}

  

posted @ 2012-09-06 22:07  Kevin!=NULL  阅读(729)  评论(0编辑  收藏  举报