md.c-记录
红黑树的操作:
1 /* 在rb中插入一个节点,成功返回NULL,否则返回error 2 * Insert a new node into the rbtree. This returns NULL on success, or the 3 * existing node on error. 4 */ 5 #define rb_insert(root, new, member, compar) \ 6 ({ \ 7 struct rb_node **__n = &(root)->rb_node, *__parent = NULL; \ 8 typeof(new) __old = NULL, __data; \ 9 \ 10 while (*__n) { \ 11 __data = rb_entry(*__n, typeof(*new), member); \ 12 int __cmp = compar(new, __data); \ 13 \ 14 __parent = *__n; \ 15 if (__cmp < 0) \ 16 __n = &((*__n)->rb_left); \ 17 else if (__cmp > 0) \ 18 __n = &((*__n)->rb_right); \ 19 else { \ 20 __old = __data; \ 21 break; \ 22 } \ 23 } \ 24 \ 25 if (__old == NULL) { \ 26 /* Add new node and rebalance tree. */ \ 27 rb_link_node(&((new)->member), __parent, __n); \ 28 rb_insert_color(&((new)->member), root); \ 29 } \ 30 \ 31 __old; \ 32 })
在rbtree中查找一个节点:
1 /* 2 * Search for a value in the rbtree. When the key is not found in the rbtree, 3 * this returns the next greater node. Note, if key > greatest node, we'll 4 * return first node. 5 * 6 * For an empty tree, we return NULL. 7 */ 8 #define rb_nsearch(root, key, member, compar) \ 9 ({ \ 10 struct rb_node *__n = (root)->rb_node; \ 11 typeof(key) __ret = NULL, __data; \ 12 \ 13 while (__n) { \ 14 __data = rb_entry(__n, typeof(*key), member); \ 15 int __cmp = compar(key, __data); \ 16 \ 17 if (__cmp < 0) { \ 18 __ret = __data; \ 19 __n = __n->rb_left; \ 20 } else if (__cmp > 0) \ 21 __n = __n->rb_right; \ 22 else { \ 23 __ret = __data; \ 24 break; \ 25 } \ 26 } \ 27 if (!__ret && !RB_EMPTY_ROOT(root)) \ 28 __ret = rb_entry(rb_first(root), typeof(*key), member); \ 29 __ret; \ 30 })
创建vdisks:一个物理disk的记录就是一颗红黑子树,如果删除该disk,那么对应的是删除整棵子树
1 static void create_vdisks(const struct disk *disk) 2 { 3 //计算disk路径的hash值 4 uint64_t hval = sd_hash(disk->path, strlen(disk->path)); 5 const struct sd_node *n = &sys->this_node; 6 uint64_t node_hval; 7 int nr; 8 // 判断当前使用的模式:Disk mode for cluster 9 if (is_cluster_diskmode(&sys->cinfo)) { 10 // node的hash值 11 node_hval = sd_hash(&n->nid, offsetof(typeof(n->nid), io_addr)); 13 hval = fnv_64a_64(node_hval, hval); 14 // nr即将disk按照4G大小划分后的个数 15 nr = DIV_ROUND_UP(disk->space, WEIGHT_MIN); 16 if (0 == n->nid.port) 17 return; 18 } else 19 // 按照16M空间大小划分磁盘空间后的个数 20 nr = vdisk_number(disk); 21 // 上述主要是获取到vdisk的个数,接下来将vdisk信息存储在rbtree中 22 for (int i = 0; i < nr; i++) { 23 struct vdisk *v = xmalloc(sizeof(*v)); 25 hval = sd_hash_next(hval); 26 v->hash = hval; 27 v->disk = disk; 28 if (unlikely(vdisk_insert(v))) // 在rbtree中插入vdisk,树的根在md.vroot这样的全局变量中 29 panic("vdisk hash collison"); 30 } 31 }
添加一个物理磁盘:
1 /* We don't need lock at init stage */
2 bool md_add_disk(const char *path, bool purge)
3 {
4 struct disk *new;
5 // 是否已经存在
6 if (path_to_disk(path)) {
7 sd_err("duplicate path %s", path);
8 return false;
9 }
10 // 创建相应的文件夹目录
11 if (xmkdir(path, sd_def_dmode) < 0) {
12 sd_err("can't mkdir for %s, %m", path);
13 return false;
14 }
15
16 new = xmalloc(sizeof(*new));
17 pstrcpy(new->path, PATH_MAX, path);
18 trim_last_slash(new->path);
19 new->space = init_path_space(new->path, purge);
20 if (!new->space) {
21 free(new);
22 return false;
23 }
24 // 创建相应的vdisks
25 create_vdisks(new);
26 rb_insert(&md.root, new, rb, disk_cmp);
27 md.space += new->space;
28 md.nr_disks++;
29
30 sd_info("%s, vdisk nr %d, total disk %d", new->path, vdisk_number(new),md.nr_disks);
31 return true;
32 }
与之相反的是删除vdisk
1 static inline void vdisk_free(struct vdisk *v) 2 { 3 // 删除树的结构 4 rb_erase(&v->rb, &md.vroot); 5 free(v); 6 } 7 8 static void remove_vdisks(const struct disk *disk) 9 { 10 uint64_t hval = sd_hash(disk->path, strlen(disk->path)); 11 const struct sd_node *n = &sys->this_node; 12 uint64_t node_hval; 13 int nr; 14 15 if (is_cluster_diskmode(&sys->cinfo)) { 16 node_hval = sd_hash(&n->nid, offsetof(typeof(n->nid), io_addr)); 17 hval = fnv_64a_64(node_hval, hval); 18 nr = DIV_ROUND_UP(disk->space, WEIGHT_MIN); 19 } else 20 nr = vdisk_number(disk); 21 22 for (int i = 0; i < nr; i++) { 23 struct vdisk *v; 24 25 hval = sd_hash_next(hval); 26 v = hval_to_vdisk(hval); 27 sd_assert(v->hash == hval); 28 // 逐个释放vdisk 29 vdisk_free(v); 30 } 31 }
删除物理disk操作:
1 static void remove_vdisks(const struct disk *disk) 2 { 3 uint64_t hval = sd_hash(disk->path, strlen(disk->path)); 4 const struct sd_node *n = &sys->this_node; 5 uint64_t node_hval; 6 int nr; 7 8 if (is_cluster_diskmode(&sys->cinfo)) { 9 node_hval = sd_hash(&n->nid, offsetof(typeof(n->nid), io_addr)); 10 hval = fnv_64a_64(node_hval, hval); 11 nr = DIV_ROUND_UP(disk->space, WEIGHT_MIN); 12 } else 13 nr = vdisk_number(disk); 14 15 for (int i = 0; i < nr; i++) { 16 struct vdisk *v; 17 18 hval = sd_hash_next(hval); 19 v = hval_to_vdisk(hval); 20 sd_assert(v->hash == hval); 21 22 vdisk_free(v); 23 } 24 } 25 26 27 static inline void md_remove_disk(struct disk *disk) 28 { 29 sd_info("%s from multi-disk array", disk->path); 30 rb_erase(&disk->rb, &md.root); 31 md.nr_disks--; 32 remove_vdisks(disk); 33 free(disk); 34 } 35 36 static inline void md_del_disk(const char *path) 37 { 38 struct disk *disk = path_to_disk(path); 39 40 if (!disk) { 41 sd_err("invalid path %s", path); 42 return; 43 } 44 md_remove_disk(disk); 45 }
更新nodes disks,这里仅在配置为disk_vnodes模式才会定义:
1 #ifdef HAVE_DISKVNODES 2 void update_node_disks(void) 3 { 4 const struct disk *disk; 5 int i = 0; 6 bool rb_empty = false; 7 8 if (!sys) 9 return; 10 11 memset(sys->this_node.disks, 0, sizeof(struct disk_info) * DISK_MAX); 12 sd_read_lock(&md.lock); 13 rb_for_each_entry(disk, &md.root, rb) { 14 sys->this_node.disks[i].disk_id = 15 sd_hash(disk->path, strlen(disk->path)); 16 sys->this_node.disks[i].disk_space = disk->space; 17 i++; 18 } 19 sd_rw_unlock(&md.lock); 20 21 if (RB_EMPTY_ROOT(&md.vroot)) 22 rb_empty = true; 23 sd_write_lock(&md.lock); 24 rb_for_each_entry(disk, &md.root, rb) { 25 if (!rb_empty) 26 remove_vdisks(disk); 27 create_vdisks(disk); 28 } 29 sd_rw_unlock(&md.lock); 30 } 31 #else 32 void update_node_disks(void) 33 { 34 } 35 #endif
本地磁盘插拔操作:
1 static int do_plug_unplug(char *disks, bool plug) 2 { 3 const char *path; 4 int old_nr, new_nr, ret = SD_RES_UNKNOWN; 5 6 sd_write_lock(&md.lock); 7 old_nr = md.nr_disks; 8 path = strtok(disks, ","); 9 do { 10 if (plug) { 11 if (!md_add_disk(path, true)) 12 sd_err("failed to add %s", path); 13 } else { 14 md_del_disk(path); 15 } 16 } while ((path = strtok(NULL, ","))); 17 new_nr = md.nr_disks; 18 19 /* If no disks change, bail out */ 20 if (old_nr == new_nr) 21 goto out; 22 23 ret = SD_RES_SUCCESS; 24 out: 25 sd_rw_unlock(&md.lock); 26 27 if (ret == SD_RES_SUCCESS) { 28 if (new_nr > 0) { 29 update_node_disks(); 30 kick_recover(); 31 } else { 32 sd_warn("no disks plugged, going down"); 33 leave_cluster(); 34 sys->cinfo.status = SD_STATUS_KILLED; 35 } 36 } 37 38 return ret; 39 } 40 41 int md_plug_disks(char *disks) 42 { 43 return do_plug_unplug(disks, true); 44 } 45 46 int md_unplug_disks(char *disks) 47 { 48 return do_plug_unplug(disks, false); 49 }
oid到vdisk的映射:
1 /* If v1_hash < hval <= v2_hash, then oid is resident in v2, 在rbtree中寻找其位置*/ 2 static struct vdisk *hval_to_vdisk(uint64_t hval) 3 { 4 struct vdisk dummy = { .hash = hval }; 5 6 return rb_nsearch(&md.vroot, &dummy, rb, vdisk_cmp); 7 } 8 /* 将oid的hash值作为入参 */ 9 static struct vdisk *oid_to_vdisk(uint64_t oid) 10 { 11 return hval_to_vdisk(sd_hash_oid(oid)); 12 }