sheep.h
1 static inline uint64_t 2 node_disk_to_vnodes(const struct sd_node *n, struct rb_root *vroot) 3 { 4 5 uint64_t node_hval = sd_hash(&n->nid, offsetof(typeof(n->nid), 6 io_addr)); 7 uint64_t hval, disk_vnodes, total = 0; 8 9 for (int j = 0; j < DISK_MAX; j++) { 10 if (!n->disks[j].disk_id) 11 continue; 12 hval = fnv_64a_64(node_hval, n->disks[j].disk_id); 13 disk_vnodes = DIV_ROUND_UP(n->disks[j].disk_space, WEIGHT_MIN); 14 total += disk_vnodes; 15 for (int k = 0; k < disk_vnodes; k++) { 16 hval = sd_hash_next(hval); 17 struct sd_vnode *v = xmalloc(sizeof(*v)); 18 v->hash = hval; 19 v->node = n; 20 if (unlikely(rb_insert(vroot, v, rb, vnode_cmp))) 21 panic("vdisk hash collison"); 22 } 23 } 24 return total; 25 } 26 // 计算该节点上的disk一共需要对应多少个vnode 27 static inline void 28 disks_to_vnodes(struct rb_root *nroot, struct rb_root *vroot) 29 { 30 struct sd_node *n; 31 32 rb_for_each_entry(n, nroot, rb) 33 n->nr_vnodes = node_disk_to_vnodes(n, vroot); 34 }
nodes到vnodes的映射:
1 static inline void 2 node_to_vnodes(const struct sd_node *n, struct rb_root *vroot) 3 { 4 uint64_t hval = sd_hash(&n->nid, offsetof(typeof(n->nid), 5 io_addr)); 6 // 逐个记录vnode,vnode信息包含:vnode的hash值,vnode所归属的node的信息 7 for (int i = 0; i < n->nr_vnodes; i++) { 8 struct sd_vnode *v = xmalloc(sizeof(*v)); 9 10 hval = sd_hash_next(hval); 11 v->hash = hval; 12 v->node = n; 13 if (unlikely(rb_insert(vroot, v, rb, vnode_cmp))) 14 panic("vdisk hash collison"); 15 } 16 } 17 18 static inline void 19 nodes_to_vnodes(struct rb_root *nroot, struct rb_root *vroot) 20 { 21 struct sd_node *n; 22 // 从nroot出发逐个去为node分配vnodes 23 rb_for_each_entry(n, nroot, rb) 24 node_to_vnodes(n, vroot); 25 }
sheep.h文件
1 // vnode的结构体:包含所依附的node和hash信息 2 struct sd_vnode { 3 struct rb_node rb; 4 const struct sd_node *node; 5 uint64_t hash; 6 }; 7 // vnode基本信息 8 struct vnode_info { 9 struct rb_root vroot; // vnode的红黑树根 10 struct rb_root nroot; // node的红黑树根 11 int nr_nodes; // 映射的nodes的个数 ? 12 int nr_zones; // 映射的分区的个数,应该一样啊 ? 13 refcnt_t refcnt; // 引用计数 14 }; 15 16 static inline void sd_init_req(struct sd_req *req, uint8_t opcode) 17 { 18 memset(req, 0, sizeof(*req)); 19 req->opcode = opcode; 20 req->proto_ver = opcode < 0x80 ? SD_PROTO_VER : SD_SHEEP_PROTO_VER; 21 } 22 // 判断两个vnode是否为同一个zone 23 static inline int same_zone(const struct sd_vnode *v1, 24 const struct sd_vnode *v2) 25 { 26 return v1->node->zone == v2->node->zone; 27 } 28 // 判断是否是同一个vnode 29 static inline int vnode_cmp(const struct sd_vnode *node1, 30 const struct sd_vnode *node2) 31 { 32 return intcmp(node1->hash, node2->hash); 33 } 34 /* If v1_hash < oid_hash <= v2_hash, then oid is resident on v2 */ 35 static inline struct sd_vnode * 36 oid_to_first_vnode(uint64_t oid, struct rb_root *root) 37 { 38 struct sd_vnode dummy = { 39 .hash = sd_hash_oid(oid), 40 }; 41 return rb_nsearch(root, &dummy, rb, vnode_cmp); 42 } 43 /* Replica are placed along the ring one by one with different zones */ 44 static inline void oid_to_vnodes(uint64_t oid, struct rb_root *root, 45 int nr_copies, 46 const struct sd_vnode **vnodes) 47 { 48 const struct sd_vnode *next = oid_to_first_vnode(oid, root); 49 50 vnodes[0] = next; 51 for (int i = 1; i < nr_copies; i++) { 52 next: 53 next = rb_entry(rb_next(&next->rb), struct sd_vnode, rb); 54 if (!next) /* Wrap around */ 55 next = rb_entry(rb_first(root), struct sd_vnode, rb); 56 if (unlikely(next == vnodes[0])) 57 panic("can't find a valid vnode"); 58 for (int j = 0; j < i; j++) 59 if (same_zone(vnodes[j], next)) 60 goto next; 61 vnodes[i] = next; 62 } 63 } 64 65 static inline const struct sd_vnode * 66 oid_to_vnode(uint64_t oid, struct rb_root *root, int copy_idx) 67 { 68 const struct sd_vnode *vnodes[SD_MAX_COPIES]; 69 70 oid_to_vnodes(oid, root, copy_idx + 1, vnodes); 71 72 return vnodes[copy_idx]; 73 } 74 75 static inline const struct sd_node * 76 oid_to_node(uint64_t oid, struct rb_root *root, int copy_idx) 77 { 78 const struct sd_vnode *vnode; 79 80 vnode = oid_to_vnode(oid, root, copy_idx); 81 82 return vnode->node; 83 } 84 // 从oid获取其所在的vnode,返回值在nodes参数中 85 static inline void oid_to_nodes(uint64_t oid, struct rb_root *root, 86 int nr_copies, 87 const struct sd_node **nodes) 88 { 89 const struct sd_vnode *vnodes[SD_MAX_COPIES]; 90 91 oid_to_vnodes(oid, root, nr_copies, vnodes); 92 for (int i = 0; i < nr_copies; i++) 93 nodes[i] = vnodes[i]->node; 94 } 95 96 static inline int oid_cmp(const uint64_t *oid1, const uint64_t *oid2) 97 { 98 return intcmp(*oid1, *oid2); 99 } 100 101 static inline int node_id_cmp(const struct node_id *node1, 102 const struct node_id *node2) 103 { 104 int cmp = memcmp(node1->addr, node2->addr, sizeof(node1->addr)); 105 if (cmp != 0) 106 return cmp; 107 108 return intcmp(node1->port, node2->port); 109 } 110 111 static inline int node_cmp(const struct sd_node *node1, 112 const struct sd_node *node2) 113 { 114 return node_id_cmp(&node1->nid, &node2->nid); 115 } 116 117 static inline int oid_entry_cmp(const struct oid_entry *entry1, 118 const struct oid_entry *entry2) 119 { 120 return node_cmp(entry1->node, entry2->node); 121 } 122 123 static inline bool node_eq(const struct sd_node *a, const struct sd_node *b) 124 { 125 return node_cmp(a, b) == 0; 126 } 127 // 为node的disks分配vnode,记录在rbtree中 128 static inline uint64_t 129 node_disk_to_vnodes(const struct sd_node *n, struct rb_root *vroot) 130 { 131 132 uint64_t node_hval = sd_hash(&n->nid, offsetof(typeof(n->nid), 133 io_addr)); 134 uint64_t hval, disk_vnodes, total = 0; 135 136 for (int j = 0; j < DISK_MAX; j++) { 137 if (!n->disks[j].disk_id) 138 continue; 139 hval = fnv_64a_64(node_hval, n->disks[j].disk_id); 140 disk_vnodes = DIV_ROUND_UP(n->disks[j].disk_space, WEIGHT_MIN); 141 total += disk_vnodes; 142 for (int k = 0; k < disk_vnodes; k++) { 143 hval = sd_hash_next(hval); 144 struct sd_vnode *v = xmalloc(sizeof(*v)); 145 v->hash = hval; 146 v->node = n; 147 if (unlikely(rb_insert(vroot, v, rb, vnode_cmp))) 148 panic("vdisk hash collison"); 149 } 150 } 151 return total; 152 } 153 // 一个节点对应的vnode的个数 154 static inline void 155 disks_to_vnodes(struct rb_root *nroot, struct rb_root *vroot) 156 { 157 struct sd_node *n; 158 159 rb_for_each_entry(n, nroot, rb) 160 n->nr_vnodes = node_disk_to_vnodes(n, vroot); 161 } 162 // 非disk模式下:disk到vnode的映射 163 static inline void 164 node_to_vnodes(const struct sd_node *n, struct rb_root *vroot) 165 { 166 uint64_t hval = sd_hash(&n->nid, offsetof(typeof(n->nid), 167 io_addr)); 168 169 for (int i = 0; i < n->nr_vnodes; i++) { 170 struct sd_vnode *v = xmalloc(sizeof(*v)); 171 172 hval = sd_hash_next(hval); 173 v->hash = hval; 174 v->node = n; 175 if (unlikely(rb_insert(vroot, v, rb, vnode_cmp))) 176 panic("vdisk hash collison"); 177 } 178 } 179 180 static inline void 181 nodes_to_vnodes(struct rb_root *nroot, struct rb_root *vroot) 182 { 183 struct sd_node *n; 184 185 rb_for_each_entry(n, nroot, rb) 186 node_to_vnodes(n, vroot); 187 } 188 189 // 判断cluster的模式:diskmode或者非disk 190 static inline bool is_cluster_diskmode(const struct cluster_info *cinfo) 191 { 192 return (cinfo->flags & SD_CLUSTER_FLAG_DISKMODE) > 0; 193 } 194 195 static inline bool is_cluster_autovnodes(const struct cluster_info *cinfo) 196 { 197 return (cinfo->flags & SD_CLUSTER_FLAG_AUTO_VNODES) > 0; 198 } 199 200 static inline size_t count_data_objs(const struct sd_inode *inode) 201 { 202 return DIV_ROUND_UP(inode->vdi_size, 203 (1UL << inode->block_size_shift)); 204 } 205 206 static inline __attribute__((used)) void __sd_proto_build_bug_ons(void) 207 { 208 /* never called, only for checking BUILD_BUG_ON()s */ 209 BUILD_BUG_ON(sizeof(struct sd_req) != SD_REQ_SIZE); 210 BUILD_BUG_ON(sizeof(struct sd_rsp) != SD_RSP_SIZE); 211 } 212 213 #define SD_FORMAT_VERSION 0x0006 214 #define SD_CONFIG_SIZE 40 215 216 struct sheepdog_config { 217 uint64_t ctime; 218 uint16_t flags; 219 uint8_t copies; 220 uint8_t default_store[STORE_LEN]; 221 uint8_t shutdown; 222 uint8_t copy_policy; 223 uint8_t block_size_shift; 224 uint16_t version; 225 uint64_t space; 226 };