创建Ceph crush运行图实现基于HDD和SSD磁盘实现数据冷热数据分类存储
ceph crush
ceph 集群中由 mon 服务器维护的的五种运行图
1、Monitor map,监视器运行图
2、OSD map,OSD 运行图
3、PG map,PG 运行图
4、Crush map,Controllers replication under scalable hashing——可控的、可复制的、可伸缩的一致性 hash 算法。
5、MDS map ,cephfs metadata 运行图
crush 运行图,当新建存储池时会基于 OSD map 创建出新的 PG 组合列表用于存储数据。
crush 算法针对目的节点的选择:
目前有 5 种算法来实现节点的选择,包括 Uniform、List、Tree、Straw、Straw2,早期版本使用的是 ceph 项目的发起者发明的算法 straw,目前已经发展社区优化的 straw2 版本。
straw(抽签算法):
抽签是指挑取一个最长的签,而这个签值就是 OSD 的权重,当创建存储池的时候会向 PG分配 OSD,straw 算法会遍历当前可用的 OSD 并优先使用中签的 OSD,以让权重高的 OSD被分配较多的 PG 以存储更多的数据。
手动编辑CRUSH Map
导出crush运行图
root@ceph-deploy:/opt/crush# ceph osd getcrushmap -o ./crushmap-v3
将运行图转换为文本
root@ceph-deploy:/opt/crush# crushtool -d ./crushmap-v3 > ./crushmap-v3.txt
添加HDD机械盘规则
添加自定义配置
添加包含 hdd磁盘的存储池规则crush map,并且将每台node存储节点的sdd磁盘剔除。剔除的磁盘为(osd.4、9、14、19)
添加的内容分为三类:
1、bucket:节点分布关系
2、root: osd的映射关系
3、rules: 存储池的规则
#添加针对hdd磁盘的存储池
############hdd buckets###############
host ceph-hddnode1 {
id -13 # do not change unnecessarily
id -14 class hdd # do not change unnecessarily
# weight 0.490
alg straw2
hash 0 # rjenkins1
item osd.0 weight 0.098
item osd.1 weight 0.098
item osd.2 weight 0.098
item osd.3 weight 0.098
}
host ceph-hddnode2 {
id -15 # do not change unnecessarily
id -16 class hdd # do not change unnecessarily
# weight 0.490
alg straw2
hash 0 # rjenkins1
item osd.5 weight 0.098
item osd.6 weight 0.098
item osd.7 weight 0.098
item osd.8 weight 0.098
}
host ceph-hddnode3 {
id -17 # do not change unnecessarily
id -18 class hdd # do not change unnecessarily
# weight 0.490
alg straw2
hash 0 # rjenkins1
item osd.10 weight 0.098
item osd.11 weight 0.098
item osd.12 weight 0.098
item osd.13 weight 0.098
}
host ceph-hddnode4 {
id -19 # do not change unnecessarily
id -20 class hdd # do not change unnecessarily
# weight 0.472
alg straw2
hash 0 # rjenkins1
item osd.15 weight 0.098
item osd.16 weight 0.098
item osd.17 weight 0.098
item osd.18 weight 0.080
}
root default {
id -1 # do not change unnecessarily
id -2 class hdd # do not change unnecessarily
# weight 1.935
alg straw2
hash 0 # rjenkins1
item ceph-node1 weight 0.488
item ceph-node2 weight 0.488
item ceph-node3 weight 0.488
item ceph-node4 weight 0.471
}
#添加hdd存储池的 osd 映射配置
########/hdd root/########
root hdd {
id -21 # do not change unnecessarily
id -22 class hdd # do not change unnecessarily
# weight 1.935
alg straw2
hash 0 # rjenkins1
item ceph-hddnode1 weight 0.488
item ceph-hddnode2 weight 0.488
item ceph-hddnode3 weight 0.488
item ceph-hddnode4 weight 0.471
}
#添加hdd机械盘存储池的副本机制
######/hdd rules/######
rule hdd_replicated_rule {
id 30
type replicated
min_size 1
max_size 12
step take hdd #选择root为hdd
step chooseleaf firstn 0 type host
step emit
}
完整的 crush map 如下
root@ceph-deploy:/opt/crush# cat crushmap-v3.txt
# begin crush map
tunable choose_local_tries 0
tunable choose_local_fallback_tries 0
tunable choose_total_tries 50
tunable chooseleaf_descend_once 1
tunable chooseleaf_vary_r 1
tunable chooseleaf_stable 1
tunable straw_calc_version 1
tunable allowed_bucket_algs 54
# devices
device 0 osd.0 class hdd
device 1 osd.1 class hdd
device 2 osd.2 class hdd
device 3 osd.3 class hdd
device 4 osd.4 class hdd
device 5 osd.5 class hdd
device 6 osd.6 class hdd
device 7 osd.7 class hdd
device 8 osd.8 class hdd
device 9 osd.9 class hdd
device 10 osd.10 class hdd
device 11 osd.11 class hdd
device 12 osd.12 class hdd
device 13 osd.13 class hdd
device 14 osd.14 class hdd
device 15 osd.15 class hdd
device 16 osd.16 class hdd
device 17 osd.17 class hdd
device 18 osd.18 class hdd
device 19 osd.19 class hdd
# types
type 0 osd
type 1 host
type 2 chassis
type 3 rack
type 4 row
type 5 pdu
type 6 pod
type 7 room
type 8 datacenter
type 9 zone
type 10 region
type 11 root
# buckets
host ceph-node1 {
id -3 # do not change unnecessarily
id -4 class hdd # do not change unnecessarily
# weight 0.490
alg straw2
hash 0 # rjenkins1
item osd.0 weight 0.098
item osd.1 weight 0.098
item osd.2 weight 0.098
item osd.3 weight 0.098
item osd.4 weight 0.098
}
host ceph-node2 {
id -5 # do not change unnecessarily
id -6 class hdd # do not change unnecessarily
# weight 0.490
alg straw2
hash 0 # rjenkins1
item osd.5 weight 0.098
item osd.6 weight 0.098
item osd.7 weight 0.098
item osd.8 weight 0.098
item osd.9 weight 0.098
}
host ceph-node3 {
id -7 # do not change unnecessarily
id -8 class hdd # do not change unnecessarily
# weight 0.490
alg straw2
hash 0 # rjenkins1
item osd.10 weight 0.098
item osd.11 weight 0.098
item osd.12 weight 0.098
item osd.13 weight 0.098
item osd.14 weight 0.098
}
host ceph-node4 {
id -9 # do not change unnecessarily
id -10 class hdd # do not change unnecessarily
# weight 0.472
alg straw2
hash 0 # rjenkins1
item osd.15 weight 0.098
item osd.16 weight 0.098
item osd.17 weight 0.098
item osd.18 weight 0.080
item osd.19 weight 0.098
}
#添加针对hdd磁盘的存储池
############hdd buckets###############
host ceph-hddnode1 {
id -13 # do not change unnecessarily
id -14 class hdd # do not change unnecessarily
# weight 0.490
alg straw2
hash 0 # rjenkins1
item osd.0 weight 0.098
item osd.1 weight 0.098
item osd.2 weight 0.098
item osd.3 weight 0.098
}
host ceph-hddnode2 {
id -15 # do not change unnecessarily
id -16 class hdd # do not change unnecessarily
# weight 0.490
alg straw2
hash 0 # rjenkins1
item osd.5 weight 0.098
item osd.6 weight 0.098
item osd.7 weight 0.098
item osd.8 weight 0.098
}
host ceph-hddnode3 {
id -17 # do not change unnecessarily
id -18 class hdd # do not change unnecessarily
# weight 0.490
alg straw2
hash 0 # rjenkins1
item osd.10 weight 0.098
item osd.11 weight 0.098
item osd.12 weight 0.098
item osd.13 weight 0.098
}
host ceph-hddnode4 {
id -19 # do not change unnecessarily
id -20 class hdd # do not change unnecessarily
# weight 0.472
alg straw2
hash 0 # rjenkins1
item osd.15 weight 0.098
item osd.16 weight 0.098
item osd.17 weight 0.098
item osd.18 weight 0.080
}
root default {
id -1 # do not change unnecessarily
id -2 class hdd # do not change unnecessarily
# weight 1.935
alg straw2
hash 0 # rjenkins1
item ceph-node1 weight 0.488
item ceph-node2 weight 0.488
item ceph-node3 weight 0.488
item ceph-node4 weight 0.471
}
#添加hdd存储池的 osd 映射配置
########/hdd root/########
root hdd {
id -21 # do not change unnecessarily
id -22 class hdd # do not change unnecessarily
# weight 1.935
alg straw2
hash 0 # rjenkins1
item ceph-hddnode1 weight 0.488
item ceph-hddnode2 weight 0.488
item ceph-hddnode3 weight 0.488
item ceph-hddnode4 weight 0.471
}
# rules
rule replicated_rule {
id 0
type replicated
min_size 1
max_size 12
step take default
step chooseleaf firstn 0 type host
step emit
}
#添加hdd机械盘存储池的副本机制
######/hdd rules/######
rule hdd_replicated_rule {
id 30
type replicated
min_size 1
max_size 12
step take hdd #选择root为hdd
step chooseleaf firstn 0 type host
step emit
}
rule erasure-code {
id 1
type erasure
min_size 3
max_size 4
step set_chooseleaf_tries 5
step set_choose_tries 100
step take default
step chooseleaf indep 0 type host
step emit
}
# end crush map
将运行图文本文件转换为crush格式
root@ceph-deploy:/opt/crush# crushtool -c ./crushmap-v3.txt -o crushmap-v4
导入crush
root@ceph-deploy:/opt/crush# ceph osd setcrushmap -i ./crushmap-v4
验证crush map
root@ceph-deploy:/opt/crush# ceph osd crush rule dump
[
{
"rule_id": 0,
"rule_name": "replicated_rule",
"ruleset": 0,
"type": 1,
"min_size": 1,
"max_size": 12,
"steps": [
{
"op": "take",
"item": -1,
"item_name": "default"
},
{
"op": "chooseleaf_firstn",
"num": 0,
"type": "host"
},
{
"op": "emit"
}
]
},
{
"rule_id": 1,
"rule_name": "erasure-code",
"ruleset": 1,
"type": 3,
"min_size": 3,
"max_size": 4,
"steps": [
{
"op": "set_chooseleaf_tries",
"num": 5
},
{
"op": "set_choose_tries",
"num": 100
},
{
"op": "take",
"item": -1,
"item_name": "default"
},
{
"op": "chooseleaf_indep",
"num": 0,
"type": "host"
},
{
"op": "emit"
}
]
},
{
"rule_id": 30,
"rule_name": "hdd_replicated_rule",
"ruleset": 30,
"type": 1,
"min_size": 1,
"max_size": 12,
"steps": [
{
"op": "take",
"item": -21,
"item_name": "hdd"
},
{
"op": "chooseleaf_firstn",
"num": 0,
"type": "host"
},
{
"op": "emit"
}
]
}
]
应用规则到已有存储池
注意:应用到已有存储池会立刻触发数据pg的迁移!
cephadmin@ceph-deploy:~$ ceph osd pool set cephfs-data crush_rule hdd_replicated_rule
测试创建 hdd 规则存储池
指定规则创建命令格式如下:
osd pool create <pool> [<pg_num:int>] [<pgp_num:int>] [replicated|erasure] [<erasure_code_profile>] [<rule>]
默认情况下,如果创建存储池没有指定rule,则使用的是crush map中默认的 replicated_rule
创建hdd存储池,指定hdd存储池的rule规则
root@ceph-deploy:/opt/crush# ceph osd pool create hddpool 32 32 hdd_replicated_rule
验证 pgp 状态
验证 hddpool 存储池的 pgp 组合关系应不包含sdd的osd(osd.4、9、14、19)
以下查找中并未包含 ssd 的 osd,测试创建 hddpool 成功
root@ceph-deploy:/opt/crush# ceph pg ls-by-pool hddpool | awk '{print $1,$2,$15}'|grep 4
20.4 0 [1,7,13]p1
20.14 0 [5,16,2]p5
root@ceph-deploy:/opt/crush# ceph pg ls-by-pool hddpool | awk '{print $1,$2,$15}'|grep 9
20.9 0 [7,11,15]p7
20.19 0 [2,16,8]p2
root@ceph-deploy:/opt/crush# ceph pg ls-by-pool hddpool | awk '{print $1,$2,$15}'|grep 14
20.14 0 [5,16,2]p5
root@ceph-deploy:/opt/crush# ceph pg ls-by-pool hddpool | awk '{print $1,$2,$15}'|grep 19
20.19 0 [2,16,8]p2
完整的ppg组合如下:
root@ceph-deploy:/opt/crush# ceph pg ls-by-pool hddpool | awk '{print $1,$2,$15}'
PG OBJECTS ACTING
20.0 0 [3,12,16]p3
20.1 0 [0,8,17]p0
20.2 0 [3,13,5]p3
20.3 0 [16,13,2]p16
20.4 0 [1,7,13]p1
20.5 0 [1,18,5]p1
20.6 0 [10,18,6]p10
20.7 0 [17,8,10]p17
20.8 0 [13,7,15]p13
20.9 0 [7,11,15]p7
20.a 0 [2,5,13]p2
20.b 0 [0,8,12]p0
20.c 0 [15,5,12]p15
20.d 0 [8,15,0]p8
20.e 0 [10,6,1]p10
20.f 0 [2,16,13]p2
20.10 0 [13,18,1]p13
20.11 0 [11,16,8]p11
20.12 0 [18,13,1]p18
20.13 0 [6,0,15]p6
20.14 0 [5,16,2]p5
20.15 0 [12,5,2]p12
20.16 0 [18,5,1]p18
20.17 0 [11,6,0]p11
20.18 0 [3,6,17]p3
20.19 0 [2,16,8]p2
20.1a 0 [1,8,11]p1
20.1b 0 [8,16,11]p8
20.1c 0 [15,6,2]p15
20.1d 0 [16,5,12]p16
20.1e 0 [10,5,15]p10
20.1f 0 [15,5,13]p15
添加SDD固态盘规则
添加自定义配置
添加包含 ssd 磁盘的存储池规则crush map,ssd 的磁盘为(osd.4、9、14、19),由于使用hdd模拟的ssd,因此 class 类型都为hdd。
添加的内容分为三类:
1、bucket:节点分布关系
2、root: osd的映射关系
3、rules: 存储池的规则
############ssd buckets###############
host ceph-ssdnode1 {
id -33 # do not change unnecessarily
id -34 class hdd # do not change unnecessarily,此处实际为hdd ,模拟的ssd,因此类型还为hdd
# weight 0.490
alg straw2
hash 0 # rjenkins1
item osd.4 weight 0.098
}
host ceph-ssdnode2 {
id -35 # do not change unnecessarily
id -36 class hdd # do not change unnecessarily,此处实际为hdd ,模拟的ssd,因此类型还为hdd
# weight 0.490
alg straw2
hash 0 # rjenkins1
item osd.9 weight 0.098
}
host ceph-ssdnode3 {
id -37 # do not change unnecessarily
id -38 class hdd # do not change unnecessarily,此处实际为hdd ,模拟的ssd,因此类型还为hdd
# weight 0.490
alg straw2
hash 0 # rjenkins1
item osd.14 weight 0.098
}
host ceph-ssdnode4 {
id -39 # do not change unnecessarily
id -40 class hdd # do not change unnecessarily,此处实际为hdd ,模拟的ssd,因此类型还为hdd
# weight 0.472
alg straw2
hash 0 # rjenkins1
item osd.19 weight 0.098
}
########/ssd root/########
root ssd {
id -41 # do not change unnecessarily
id -42 class hdd # do not change unnecessarily,此处实际为hdd ,模拟的ssd,因此类型还为hdd
# weight 1.935
alg straw2
hash 0 # rjenkins1
item ceph-ssdnode1 weight 0.488
item ceph-ssdnode2 weight 0.488
item ceph-ssdnode3 weight 0.488
item ceph-ssdnode4 weight 0.488
}
######/ssd rules/######
rule ssd_replicated_rule {
id 50
type replicated
min_size 1
max_size 12
step take ssd #此处的root 为 ssd,对应 root ssd
step chooseleaf firstn 0 type host
step emit
}
完整的 crush map 如下
root@ceph-deploy:/opt/crush# cat crushmap-v3.txt
# begin crush map
tunable choose_local_tries 0
tunable choose_local_fallback_tries 0
tunable choose_total_tries 50
tunable chooseleaf_descend_once 1
tunable chooseleaf_vary_r 1
tunable chooseleaf_stable 1
tunable straw_calc_version 1
tunable allowed_bucket_algs 54
# devices
device 0 osd.0 class hdd
device 1 osd.1 class hdd
device 2 osd.2 class hdd
device 3 osd.3 class hdd
device 4 osd.4 class hdd
device 5 osd.5 class hdd
device 6 osd.6 class hdd
device 7 osd.7 class hdd
device 8 osd.8 class hdd
device 9 osd.9 class hdd
device 10 osd.10 class hdd
device 11 osd.11 class hdd
device 12 osd.12 class hdd
device 13 osd.13 class hdd
device 14 osd.14 class hdd
device 15 osd.15 class hdd
device 16 osd.16 class hdd
device 17 osd.17 class hdd
device 18 osd.18 class hdd
device 19 osd.19 class hdd
# types
type 0 osd
type 1 host
type 2 chassis
type 3 rack
type 4 row
type 5 pdu
type 6 pod
type 7 room
type 8 datacenter
type 9 zone
type 10 region
type 11 root
# buckets
host ceph-node1 {
id -3 # do not change unnecessarily
id -4 class hdd # do not change unnecessarily
# weight 0.490
alg straw2
hash 0 # rjenkins1
item osd.0 weight 0.098
item osd.1 weight 0.098
item osd.2 weight 0.098
item osd.3 weight 0.098
item osd.4 weight 0.098
}
host ceph-node2 {
id -5 # do not change unnecessarily
id -6 class hdd # do not change unnecessarily
# weight 0.490
alg straw2
hash 0 # rjenkins1
item osd.5 weight 0.098
item osd.6 weight 0.098
item osd.7 weight 0.098
item osd.8 weight 0.098
item osd.9 weight 0.098
}
host ceph-node3 {
id -7 # do not change unnecessarily
id -8 class hdd # do not change unnecessarily
# weight 0.490
alg straw2
hash 0 # rjenkins1
item osd.10 weight 0.098
item osd.11 weight 0.098
item osd.12 weight 0.098
item osd.13 weight 0.098
item osd.14 weight 0.098
}
host ceph-node4 {
id -9 # do not change unnecessarily
id -10 class hdd # do not change unnecessarily
# weight 0.472
alg straw2
hash 0 # rjenkins1
item osd.15 weight 0.098
item osd.16 weight 0.098
item osd.17 weight 0.098
item osd.18 weight 0.080
item osd.19 weight 0.098
}
############/hdd buckets/###############
host ceph-hddnode1 {
id -13 # do not change unnecessarily
id -14 class hdd # do not change unnecessarily
# weight 0.490
alg straw2
hash 0 # rjenkins1
item osd.0 weight 0.098
item osd.1 weight 0.098
item osd.2 weight 0.098
item osd.3 weight 0.098
}
host ceph-hddnode2 {
id -15 # do not change unnecessarily
id -16 class hdd # do not change unnecessarily
# weight 0.490
alg straw2
hash 0 # rjenkins1
item osd.5 weight 0.098
item osd.6 weight 0.098
item osd.7 weight 0.098
item osd.8 weight 0.098
}
host ceph-hddnode3 {
id -17 # do not change unnecessarily
id -18 class hdd # do not change unnecessarily
# weight 0.490
alg straw2
hash 0 # rjenkins1
item osd.10 weight 0.098
item osd.11 weight 0.098
item osd.12 weight 0.098
item osd.13 weight 0.098
}
host ceph-hddnode4 {
id -19 # do not change unnecessarily
id -20 class hdd # do not change unnecessarily
# weight 0.472
alg straw2
hash 0 # rjenkins1
item osd.15 weight 0.098
item osd.16 weight 0.098
item osd.17 weight 0.098
item osd.18 weight 0.080
}
############/ssd buckets/###############
host ceph-ssdnode1 {
id -33 # do not change unnecessarily
id -34 class hdd # do not change unnecessarily
# weight 0.490
alg straw2
hash 0 # rjenkins1
item osd.4 weight 0.098
}
host ceph-ssdnode2 {
id -35 # do not change unnecessarily
id -36 class hdd # do not change unnecessarily
# weight 0.490
alg straw2
hash 0 # rjenkins1
item osd.9 weight 0.098
}
host ceph-ssdnode3 {
id -37 # do not change unnecessarily
id -38 class hdd # do not change unnecessarily
# weight 0.490
alg straw2
hash 0 # rjenkins1
item osd.14 weight 0.098
}
host ceph-ssdnode4 {
id -39 # do not change unnecessarily
id -40 class hdd # do not change unnecessarily
# weight 0.472
alg straw2
hash 0 # rjenkins1
item osd.19 weight 0.098
}
root default {
id -1 # do not change unnecessarily
id -2 class hdd # do not change unnecessarily
# weight 1.935
alg straw2
hash 0 # rjenkins1
item ceph-node1 weight 0.488
item ceph-node2 weight 0.488
item ceph-node3 weight 0.488
item ceph-node4 weight 0.471
}
########/hdd root/########
root hdd {
id -21 # do not change unnecessarily
id -22 class hdd # do not change unnecessarily
# weight 1.935
alg straw2
hash 0 # rjenkins1
item ceph-hddnode1 weight 0.488
item ceph-hddnode2 weight 0.488
item ceph-hddnode3 weight 0.488
item ceph-hddnode4 weight 0.471
}
########/ssd root/########
root ssd {
id -41 # do not change unnecessarily
id -42 class hdd # do not change unnecessarily
# weight 1.935
alg straw2
hash 0 # rjenkins1
item ceph-ssdnode1 weight 0.488
item ceph-ssdnode2 weight 0.488
item ceph-ssdnode3 weight 0.488
item ceph-ssdnode4 weight 0.488
}
# rules
rule replicated_rule {
id 0
type replicated
min_size 1
max_size 12
step take default
step chooseleaf firstn 0 type host
step emit
}
######/hdd rules/######
rule hdd_replicated_rule {
id 30
type replicated
min_size 1
max_size 12
step take hdd
step chooseleaf firstn 0 type host
step emit
}
######/ssd rules/######
rule ssd_replicated_rule {
id 50
type replicated
min_size 1
max_size 12
step take ssd
step chooseleaf firstn 0 type host
step emit
}
rule erasure-code {
id 1
type erasure
min_size 3
max_size 4
step set_chooseleaf_tries 5
step set_choose_tries 100
step take default
step chooseleaf indep 0 type host
step emit
}
# end crush map
将运行图文本文件转换为crush格式
root@ceph-deploy:/opt/crush# crushtool -c ./crushmap-v3.txt -o crushmap-v5
导入crush
root@ceph-deploy:/opt/crush# ceph osd setcrushmap -i ./crushmap-v5
验证crush map
root@ceph-deploy:/opt/crush# ceph osd crush rule dump
[
{
"rule_id": 0,
"rule_name": "replicated_rule",
"ruleset": 0,
"type": 1,
"min_size": 1,
"max_size": 12,
"steps": [
{
"op": "take",
"item": -1,
"item_name": "default"
},
{
"op": "chooseleaf_firstn",
"num": 0,
"type": "host"
},
{
"op": "emit"
}
]
},
{
"rule_id": 1,
"rule_name": "erasure-code",
"ruleset": 1,
"type": 3,
"min_size": 3,
"max_size": 4,
"steps": [
{
"op": "set_chooseleaf_tries",
"num": 5
},
{
"op": "set_choose_tries",
"num": 100
},
{
"op": "take",
"item": -1,
"item_name": "default"
},
{
"op": "chooseleaf_indep",
"num": 0,
"type": "host"
},
{
"op": "emit"
}
]
},
{
"rule_id": 30,
"rule_name": "hdd_replicated_rule",
"ruleset": 30,
"type": 1,
"min_size": 1,
"max_size": 12,
"steps": [
{
"op": "take",
"item": -21,
"item_name": "hdd"
},
{
"op": "chooseleaf_firstn",
"num": 0,
"type": "host"
},
{
"op": "emit"
}
]
},
{
"rule_id": 50,
"rule_name": "ssd_replicated_rule",
"ruleset": 50,
"type": 1,
"min_size": 1,
"max_size": 12,
"steps": [
{
"op": "take",
"item": -41,
"item_name": "ssd"
},
{
"op": "chooseleaf_firstn",
"num": 0,
"type": "host"
},
{
"op": "emit"
}
]
}
]
应用规则到已有存储池
注意:应用到已有存储池会立刻触发数据pg的迁移!
cephadmin@ceph-deploy:~$ ceph osd pool set cephfs-data crush_rule ssd_replicated_rule
测试创建 ssd 规则存储池
指定规则创建命令格式如下:
osd pool create <pool> [<pg_num:int>] [<pgp_num:int>] [replicated|erasure] [<erasure_code_profile>] [<rule>]
默认情况下,如果创建存储池没有指定rule,则使用的是crush map中默认的 replicated_rule
创建hdd存储池,指定hdd存储池的rule规则
root@ceph-deploy:/opt/crush# ceph osd pool create hddpool 32 32 ssd_replicated_rule
验证 pgp 状态
验证 ssdpool 存储池的 pgp 组合关系应包含sdd的osd(osd.4、9、14、19)
以下查找中包含 ssd 的 osd,测试创建 ssdpool 成功
root@ceph-deploy:/opt/crush# ceph pg ls-by-pool ssdpool | awk '{print $1,$2,$15}'
测试使用存储池
测试使用RBD
开启存储池rbd功能
root@ceph-deploy:/opt/crush# ceph osd pool application enable ssdpool rbd
enabled application 'rbd' on pool 'ssdpool'
创建镜像卷
root@ceph-deploy:/opt/crush# rbd create mysql-volume --size 100G --pool ssdpool --image-format 2 --image-feature layering
root@ceph-deploy:/opt/crush# rbd ls --pool ssdpool -l
NAME SIZE PARENT FMT PROT LOCK
mysql-volume 100 GiB 2
客户端使用 rbd 镜像
0、创建普通用户授权
#创建普通用户
cephadmin@ceph-deploy:~/ceph-cluster$ ceph auth add client.lxhssd mon 'allow r' osd 'allow rwx pool=ssdpool'
#验证
cephadmin@ceph-deploy:~/ceph-cluster$ ceph auth get client.lxhssd
[client.lxhssd]
key = AQDJf6JjqumMARAAwtJaKMCCSiI4AnpD4KAiHA==
caps mon = "allow r"
caps osd = "allow rwx pool=ssdpool"
exported keyring for client.lxhssd
#创建普通用户认证文件
cephadmin@ceph-deploy:~/ceph-cluster$ ceph-authtool --create-keyring ceph.client.lxhssd.keyring
#导出认证文件
cephadmin@ceph-deploy:~/ceph-cluster$ ceph auth get client.lxhssd -o ceph.client.lxhssd.keyring
#验证文件
cephadmin@ceph-deploy:~/ceph-cluster$ cat ceph.client.lxhssd.keyring
[client.lxhssd]
key = AQDJf6JjqumMARAAwtJaKMCCSiI4AnpD4KAiHA==
caps mon = "allow r"
caps osd = "allow rwx pool=ssdpool"
1、ceph拷贝普通用户认证文件到客户端
cephadmin@ceph-deploy:~/ceph-cluster$ scp ceph.conf ceph.client.lxhssd.keyring root@192.168.100.80:/etc/ceph/
2、客户端验证ceph使用权限
[root@ansible ~]# ceph --user lxhssd -s
3、客户端映射 rbd 镜像
[root@ansible ceph]# rbd --user lxhssd -p ssdpool map mysql-volume
4、客户端格式化并挂载使用
[root@ansible ceph]# mkfs.xfs /dev/rbd0
[root@ansible ceph]# mkdir /data
[root@ansible ceph]# mount /dev/rbd0 /data
[root@ansible ceph]# df -h /data
Filesystem Size Used Avail Use% Mounted on
/dev/rbd0 100G 33M 100G 1% /data
复制文件测试
[root@ansible ceph]# tar zcf etc.tar.gz /etc
[root@ansible ceph]# cp etc.tar.gz /data/
[root@ansible ceph]# ll /data
total 22196
-rw-r--r-- 1 root root 22728360 Dec 21 11:47 etc.tar.gz
[root@ansible ceph]# df -h /data
Filesystem Size Used Avail Use% Mounted on
/dev/rbd0 100G 55M 100G 1% /data
自定义crush map 注意事项
掉电的osd,通过service ceph-osd@{osd.id} start重启后 osd回到了默认的host节点下,而不是自己定制后的节点
添加配置,禁止osd重启或写操作时,自动修改crush map
cephadmin@ceph-deploy:~/ceph-cluster$ vim ceph.conf
[osd]
osd crush update on start = false
再将crush map中 osd.9 移动 root ssd下的 ceph-node2节点下
cephadmin@ceph-deploy:~/ceph-cluster$ ceph osd crush move osd.9 host=ceph-ssdnode3 root=ssd
之后无论重启osd还是节点宕机都不会触发osd的迁移到默认default规则下的存储池
本文来自博客园,作者:PunchLinux,转载请注明原文链接:https://www.cnblogs.com/punchlinux/p/17072854.html