分布式缓存系统 Memcached 哈希表操作

memcached 中有两张hash 表，一个是“主hash 表”（primary_hashtable），另外一个是“原hash 表”（old_hashtable）。一般情况下都在主表中接受操作，在插入新item时判断是否需要进行扩；每次操作的时候，先会检测表是否正处于扩展(expanding)状态，如果是，则原表中进行操作，当扩容完成在转移到主表中进行操作。在扩容时，采取逐步迁移策略：即每次只从原表中迁移一个bucket节点的item到新主表中，进行逐步迁移。

总的来看，这与Redis中的hash操作几乎一致。因此不再做详细讲解，具体分析见代码注释。

分布式缓存系统 Memcached

//hash表的初始化，参数hashtable_init为所设置的hashpower大小（阶数），默认大小为16
void assoc_init(const int hashtable_init) {
if (hashtable_init) {
hashpower = hashtable_init;
}
//创建主表（hashsize(hashpower)：计算bucket节点数目=2的hashpower次方）
primary_hashtable = calloc(hashsize(hashpower), sizeof(void *));
if (! primary_hashtable) {
fprintf(stderr, "Failed to init hashtable.\n");
exit(EXIT_FAILURE);
}
//emcached内部有很多全局的统计信息，用于实时获取各个资源的使用情况，
//对统计信息的更新都需要加锁
STATS_LOCK();//对全局统计信息加锁，已更新信息
stats.hash_power_level = hashpower;
stats.hash_bytes = hashsize(hashpower) * sizeof(void *);
STATS_UNLOCK();//解锁
}

//在哈希表中查找给定key的item：找到对应的哈希表,再找对应的桶节点，最后遍历链表找到目标key的item
item *assoc_find(const char *key, const size_t nkey, const uint32_t hv) {
item *it;//桶节点
unsigned int oldbucket;//在原表中的桶节点索引

//正在扩容，且当前节点在愿表中，还未迁移到主表
//注意：i&(2^n-1)结果即为i除以2^n的余数
if (expanding &&
(oldbucket = (hv & hashmask(hashpower - 1))) >= expand_bucket)
{
it = old_hashtable[oldbucket];
} else {//没有扩容，或者已经迁移到主表中
it = primary_hashtable[hv & hashmask(hashpower)];
}

item *ret = NULL;
int depth = 0;//目标节点在桶中的深度
while (it) {//遍历桶节点链表
if ((nkey == it->nkey) && (memcmp(key, ITEM_key(it), nkey) == 0)) {
ret = it;
break;
}
it = it->h_next;
++depth;
}
MEMCACHED_ASSOC_FIND(key, nkey, depth);
return ret;
}

/* returns the address of the item pointer before the key. if *item == 0,
the item wasn't found */
//内部函数：返回目标key item的前一个item的指针，这样在删除目标item时只需要将该返回item指针的next指针指向目标item的next item即可。
static item** _hashitem_before (const char *key, const size_t nkey, const uint32_t hv) {
item **pos;
unsigned int oldbucket;

if (expanding &&
(oldbucket = (hv & hashmask(hashpower - 1))) >= expand_bucket)
{
pos = &old_hashtable[oldbucket];
} else {
pos = &primary_hashtable[hv & hashmask(hashpower)];
}

while (*pos && ((nkey != (*pos)->nkey) || memcmp(key, ITEM_key(*pos), nkey))) {
pos = &(*pos)->h_next;
}
return pos;
}

/* grows the hashtable to the next power of 2. */
//哈希表扩容为原来的2倍（将原来的主表拷贝到久表中，对主表扩容）
static void assoc_expand(void) {
old_hashtable = primary_hashtable;

primary_hashtable = calloc(hashsize(hashpower + 1), sizeof(void *));
if (primary_hashtable) {
if (settings.verbose > 1)
fprintf(stderr, "Hash table expansion starting\n");
hashpower++;
expanding = true;
expand_bucket = 0;
STATS_LOCK();
stats.hash_power_level = hashpower;
stats.hash_bytes += hashsize(hashpower) * sizeof(void *);
stats.hash_is_expanding = 1;
STATS_UNLOCK();
} else {
primary_hashtable = old_hashtable;
/* Bad news, but we can keep running. */
}
}

static void assoc_start_expand(void) {
if (started_expanding)
return;
started_expanding = true;
pthread_cond_signal(&maintenance_cond);
}

/* Note: this isn't an assoc_update. The key must not already exist to call this */
//将给定item插入到哈希表的桶的头部中注意：该item不能已经存在于hash表中（hv：哈希值）
int assoc_insert(item *it, const uint32_t hv) {
unsigned int oldbucket;

// assert(assoc_find(ITEM_key(it), it->nkey) == 0); /* shouldn't have duplicately named things defined */

//正在扩容，还未完成，则将该item放到原hashtable的对应bucket的单链表的头部
if (expanding &&
(oldbucket = (hv & hashmask(hashpower - 1))) >= expand_bucket)//注意hashpower已经加倍，因此是hashpower-1
{
it->h_next = old_hashtable[oldbucket];
old_hashtable[oldbucket] = it;
} else {//没有正在扩容则放到主hashtable中
it->h_next = primary_hashtable[hv & hashmask(hashpower)];
primary_hashtable[hv & hashmask(hashpower)] = it;
}

hash_items++;
//是否需要开始扩容
if (! expanding && hash_items > (hashsize(hashpower) * 3) / 2) {
assoc_start_expand();
}

MEMCACHED_ASSOC_INSERT(ITEM_key(it), it->nkey, hash_items);
return 1;
}

//删除对应item（只是将item从桶链表中移除）
void assoc_delete(const char *key, const size_t nkey, const uint32_t hv) {
item **before = _hashitem_before(key, nkey, hv);//查找该item的前一个item

if (*before) {
item *nxt;
hash_items--;//hash表中的item总数
/* The DTrace probe cannot be triggered as the last instruction
* due to possible tail-optimization by the compiler
*/
MEMCACHED_ASSOC_DELETE(key, nkey, hash_items);
nxt = (*before)->h_next;
(*before)->h_next = 0; /* probably pointless, but whatever. */
*before = nxt;
return;
}
/* Note: we never actually get here. the callers don't delete things
they can't find. */
assert(*before != 0);
}

//迁移函数start_assoc_maintenance_thread()，创建迁移线程，调用函数assoc_maintenance_thread进行迁移
//线程函数：迁移bucket节点，默认一次迁移一个bucket
static void *assoc_maintenance_thread(void *arg) {

while (do_run_maintenance_thread) {
int ii = 0;

/* Lock the cache, and bulk move multiple buckets to the new
* hash table. */
item_lock_global();
mutex_lock(&cache_lock);

for (ii = 0; ii < hash_bulk_move && expanding; ++ii) {
item *it, *next;
int bucket;

for (it = old_hashtable[expand_bucket]; NULL != it; it = next) {
next = it->h_next;

//计算哈希值，并计算得桶节点索引值
bucket = hash(ITEM_key(it), it->nkey) & hashmask(hashpower);
it->h_next = primary_hashtable[bucket];
primary_hashtable[bucket] = it;
}

//每迁移完一个bucket，就在久表中移除该bucket
old_hashtable[expand_bucket] = NULL;

expand_bucket++;
//扩容结束
if (expand_bucket == hashsize(hashpower - 1)) {
expanding = false;
free(old_hashtable);
STATS_LOCK();
stats.hash_bytes -= hashsize(hashpower - 1) * sizeof(void *);
stats.hash_is_expanding = 0;
STATS_UNLOCK();
if (settings.verbose > 1)
fprintf(stderr, "Hash table expansion done\n");
}
}

mutex_unlock(&cache_lock);
item_unlock_global();

if (!expanding) {
/* finished expanding. tell all threads to use fine-grained locks */
switch_item_lock_type(ITEM_LOCK_GRANULAR);
slabs_rebalancer_resume();
/* We are done expanding.. just wait for next invocation */
mutex_lock(&cache_lock);
started_expanding = false;
pthread_cond_wait(&maintenance_cond, &cache_lock);
/* Before doing anything, tell threads to use a global lock */
mutex_unlock(&cache_lock);
slabs_rebalancer_pause();
switch_item_lock_type(ITEM_LOCK_GLOBAL);
mutex_lock(&cache_lock);
assoc_expand();
mutex_unlock(&cache_lock);
}
}
return NULL;
}

posted on 2016-01-17 23:39 duanxz 阅读(763) 评论(0) 编辑收藏举报