memcached(八)-- set指令内部实现
在一个set指令传输到memcached后:
set abc 0 3600 5 hello
memcached是怎么样执行set/add等对象的咧?
首先从memcached启动后,要先监听端口,监听到有对应的动作后,执行指令。从源码中,堆栈是这样子:
main_base从启动到监听 main——>(初始化main_base了)server_sockets——>server_socket——> conn_new——>event_handler——>drive_machine——> try_read_command——>process_command——>(process_get_command/process_update_command/process_arithmetic_command/process_delete_command/process_touch_command/process_stat); (flush_all/version/quit/shutdown/slabs/lru_crawler/tocrawl/sleep/verbosity/最后ERROR) 自定义协议类型,在try_read_command中根据字符编码,判定是否二进制协议。 二进制协议的话执行dispatch_bin_command
drive_machine是memcached的主循环函数。根据connection的状态,决定接下来的动作。也会把当前动作执行后,改变为某个状态,从而执行下一个动作。
connection的状态有这些(见memcached.h):
enum conn_states { conn_listening, /**< the socket which listens for connections */ conn_new_cmd, /**< Prepare connection for next command */ conn_waiting, /**< waiting for a readable socket */ conn_read, /**< reading in a command line */ conn_parse_cmd, /**< try to parse a command from the input buffer */ conn_write, /**< writing out a simple response */ conn_nread, /**< reading in a fixed number of bytes */ conn_swallow, /**< swallowing unnecessary bytes w/o storing */ conn_closing, /**< closing this connection */ conn_mwrite, /**< writing out many items sequentially */ conn_closed, /**< connection is closed */ conn_max_state /**< Max state value (used for assertion) */ };
nread还细分了几个宏,区分不同的指令:
#define NREAD_ADD 1 #define NREAD_SET 2 #define NREAD_REPLACE 3 #define NREAD_APPEND 4 #define NREAD_PREPEND 5 #define NREAD_CAS 6
set指令就是,NREAD_SET 2。
在set命令中,分为2个动作:
set abc 0 3600 5 /*conn_parse_cmd*/ hello /*conn_nread*/
针对set命令来说,第一个动作是分配空间,第二个动作是关联hash桶。
分配空间
源码中,针对文本协议,process_update_command函数就是执行set/add等指令的。
else if ((ntokens == 6 || ntokens == 7) && ((strcmp(tokens[COMMAND_TOKEN].value, "add") == 0 && (comm = NREAD_ADD)) || (strcmp(tokens[COMMAND_TOKEN].value, "set") == 0 && (comm = NREAD_SET)) || (strcmp(tokens[COMMAND_TOKEN].value, "replace") == 0 && (comm = NREAD_REPLACE)) || (strcmp(tokens[COMMAND_TOKEN].value, "prepend") == 0 && (comm = NREAD_PREPEND)) || (strcmp(tokens[COMMAND_TOKEN].value, "append") == 0 && (comm = NREAD_APPEND)) )) { process_update_command(c, tokens, ntokens, comm, false); } else if ((ntokens == 7 || ntokens == 8) && (strcmp(tokens[COMMAND_TOKEN].value, "cas") == 0 && (comm = NREAD_CAS))) { process_update_command(c, tokens, ntokens, comm, true); }
跟着堆栈走, 分配空间的代码是:
it = item_alloc(key, nkey, flags, realtime(exptime), vlen);
item_alloc在thread.c中(其实就是通过worker线程执行代码逻辑),方法的参数说明一下:
/* * Allocates a new item. * @param key 字符串 * @param nkey 字符串长度 * @param flags 命令中的第二个参数 很多情况下都系0 * @param exptime 过期时间,精确到毫秒 * @param nbytes 命令中第四个参数 value的长度 * * @return 分配的空间(具体chunk)的指针 */ item *item_alloc(char *key, size_t nkey, int flags, rel_time_t exptime, int nbytes) { item *it; /* do_item_alloc handles its own locks */ it = do_item_alloc(key, nkey, flags, exptime, nbytes, 0); return it; }
会调用到item.c中的“do_item_alloc”函数。这个函数是分配内存空间给这个对象的最关键部分:
/** * @param key 字符串 * @param nkey 字符串长度 * @param flags 命令中的第二个参数 很多情况下都系0 * @param exptime 过期时间,精确到毫秒 * @param nbytes 命令中的最后一个参数的长度 * @param cur_hv 在thread.c的item_alloc方法中, 设置成0了 */ item *do_item_alloc(char *key, const size_t nkey, const int flags, const rel_time_t exptime, const int nbytes, const uint32_t cur_hv)
在这个函数中, 关键步骤如下:
- 先要知道这个对象应该放到哪个slab中
size_t ntotal = item_make_header(nkey + 1, flags, nbytes, suffix, &nsuffix); if (settings.use_cas) { ntotal += sizeof(uint64_t); } //计算长度,得知这个item应该在哪个slab unsigned int id = slabs_clsid(ntotal);
得知在哪个slab:
unsigned int slabs_clsid(const size_t size) { int res = POWER_SMALLEST; if (size == 0) return 0; while (size > slabclass[res].size) if (res++ == power_largest) /* won't fit in the biggest slab */ return 0; return res; }
slabs_clsid返回slabclass下标,告知这个对象属于哪一个slab。
PS:mc创建的时候,会初始化slabclass[]这个数组。下一个slabclass元素的size是一定比现在这个slabclass元素更大的。
- LRU判定
可能指定的slabclass存满数据了,分配空间和LRU的逻辑:
/** * 1、先执行一次lru。 * 2、分配空间。分配得到空间就退出循环,否则重复5次。 */ for (i = 0; i < 5; i++) { /* Try to reclaim memory first lru_maintainer_thread默认就是false。 */ if (!settings.lru_maintainer_thread) { lru_pull_tail(id, COLD_LRU, 0, false, cur_hv); } it = slabs_alloc(ntotal, id, &total_chunks); if (settings.expirezero_does_not_evict) total_chunks -= noexp_lru_size(id); if (it == NULL) { if (settings.lru_maintainer_thread) { lru_pull_tail(id, HOT_LRU, total_chunks, false, cur_hv); lru_pull_tail(id, WARM_LRU, total_chunks, false, cur_hv); lru_pull_tail(id, COLD_LRU, total_chunks, true, cur_hv); } else { lru_pull_tail(id, COLD_LRU, 0, true, cur_hv); } } else { break; } }
- slabs.c中,分配空间
static void *do_slabs_alloc(const size_t size, unsigned int id, unsigned int *total_chunks) { slabclass_t *p; void *ret = NULL; item *it = NULL; if (id < POWER_SMALLEST || id > power_largest) { MEMCACHED_SLABS_ALLOCATE_FAILED(size, 0); return NULL; } p = &slabclass[id]; assert(p->sl_curr == 0 || ((item *)p->slots)->slabs_clsid == 0); *total_chunks = p->slabs * p->perslab; /* fail unless we have space at the end of a recently allocated page, we have something on our freelist, or we could allocate a new page */ if (! (p->sl_curr != 0 || do_slabs_newslab(id) != 0)) { /* We don't have more memory available */ ret = NULL; } else if (p->sl_curr != 0) { /* return off our freelist */ it = (item *)p->slots; p->slots = it->next; if (it->next) it->next->prev = 0; /* Kill flag and initialize refcount here for lock safety in slab * mover's freeness detection. */ it->it_flags &= ~ITEM_SLABBED; it->refcount = 1; p->sl_curr--; ret = (void *)it; } if (ret) { p->requested += size; MEMCACHED_SLABS_ALLOCATE(size, id, p->size, ret); } else { MEMCACHED_SLABS_ALLOCATE_FAILED(size, id); } return ret; }
sl_curr记录当前这个slabclass还有多少个可用的位置。slots是一个指针链表,记录每个'位置' 的指针。
最后回到process_update_command。
- ITEM_data这个宏就是存储数据。
- 把分配空间的指针放到当前connection,
- connection的状态修改为"conn_nread"状态。
driver_machine循环会把这个connection case到 conn_nread状态的代码中执行。
c->item = it; c->ritem = ITEM_data(it); c->rlbytes = it->nbytes; c->cmd = comm; conn_set_state(c, conn_nread); //conn_nread 状态来处理接下来的东西
关联hashTable
在driver_machine中,conn_nread状态会执行complete_nread方法——>complete_nread_ascii(不谈论二进制协议了)。store_item(worker线程实现)——>do_store_item。
在上一步的分配空间中,对象还没有跟hashtable进行关联的。而且要根据指令判定一下,是替换还是删除旧对象。比如说set指令,就是要删除旧的对象了。
enum store_item_type do_store_item(item *it, int comm, conn *c, const uint32_t hv) { char *key = ITEM_key(it); item *old_it = do_item_get(key, it->nkey, hv); //通过哈希表找到这个对象 enum store_item_type stored = NOT_STORED; item *new_it = NULL; int flags; if (old_it != NULL && comm == NREAD_ADD) { //有旧的数据而且是add操作,add不成功 /* add only adds a nonexistent item, but promote to head of LRU */ do_item_update(old_it); } else if (!old_it && (comm == NREAD_REPLACE || comm == NREAD_APPEND || comm == NREAD_PREPEND)) //无旧的数据,replace append prepend 操作 { /* replace only replaces an existing value; don't store */ } else if (comm == NREAD_CAS) { //cas read操作-----开始 /* validate cas operation */ if(old_it == NULL) { // LRU expired stored = NOT_FOUND; pthread_mutex_lock(&c->thread->stats.mutex); c->thread->stats.cas_misses++; pthread_mutex_unlock(&c->thread->stats.mutex); } else if (ITEM_get_cas(it) == ITEM_get_cas(old_it)) { // cas validates // it and old_it may belong to different classes. // I'm updating the stats for the one that's getting pushed out pthread_mutex_lock(&c->thread->stats.mutex); c->thread->stats.slab_stats[ITEM_clsid(old_it)].cas_hits++; pthread_mutex_unlock(&c->thread->stats.mutex); item_replace(old_it, it, hv); stored = STORED; } else { pthread_mutex_lock(&c->thread->stats.mutex); c->thread->stats.slab_stats[ITEM_clsid(old_it)].cas_badval++; pthread_mutex_unlock(&c->thread->stats.mutex); if(settings.verbose > 1) { fprintf(stderr, "CAS: failure: expected %llu, got %llu\n", (unsigned long long)ITEM_get_cas(old_it), (unsigned long long)ITEM_get_cas(it)); } stored = EXISTS; } //cas read操作-----结束 } else { //其他, 应该正确的情况 /* * Append - combine new and old record into single one. Here it's * atomic and thread-safe. */ if (comm == NREAD_APPEND || comm == NREAD_PREPEND) { //append preappend开始 /* * Validate CAS */ if (ITEM_get_cas(it) != 0) { // CAS much be equal if (ITEM_get_cas(it) != ITEM_get_cas(old_it)) { stored = EXISTS; } } if (stored == NOT_STORED) { //新对象的操作 /* we have it and old_it here - alloc memory to hold both */ /* flags was already lost - so recover them from ITEM_suffix(it) */ flags = (int) strtol(ITEM_suffix(old_it), (char **) NULL, 10); new_it = do_item_alloc(key, it->nkey, flags, old_it->exptime, it->nbytes + old_it->nbytes - 2 /* CRLF */, hv); if (new_it == NULL) { /* SERVER_ERROR out of memory */ if (old_it != NULL) do_item_remove(old_it); return NOT_STORED; } /* copy data from it and old_it to new_it */ if (comm == NREAD_APPEND) { memcpy(ITEM_data(new_it), ITEM_data(old_it), old_it->nbytes); memcpy(ITEM_data(new_it) + old_it->nbytes - 2 /* CRLF */, ITEM_data(it), it->nbytes); } else { /* NREAD_PREPEND */ memcpy(ITEM_data(new_it), ITEM_data(it), it->nbytes); memcpy(ITEM_data(new_it) + it->nbytes - 2 /* CRLF */, ITEM_data(old_it), old_it->nbytes); } it = new_it; } } //append preappend结束 if (stored == NOT_STORED) { if (old_it != NULL) item_replace(old_it, it, hv); else do_item_link(it, hv); c->cas = ITEM_get_cas(it); stored = STORED; } } if (old_it != NULL) do_item_remove(old_it); if (new_it != NULL) do_item_remove(new_it); if (stored == STORED) { c->cas = ITEM_get_cas(it); } return stored; }
参考
memcached connection的状态:http://www.cnblogs.com/ilfmonday/p/memcached_conn.html
posted on 2015-07-08 19:48 Emmerich.Luang 阅读(514) 评论(0) 编辑 收藏 举报