15 Ceph 高级参数配置(转载)

Ceph 高级参数配置

调整命名空间

参考官网地址

cd cluster/examples/kubernetes/ceph

export ROOK_OPERATOR_NAMESPACE="rook-ceph"
export ROOK_CLUSTER_NAMESPACE="rook-ceph"

sed -i.bak \
    -e "s/\(.*\):.*# namespace:operator/\1: $ROOK_OPERATOR_NAMESPACE # namespace:operator/g" \
    -e "s/\(.*\):.*# namespace:cluster/\1: $ROOK_CLUSTER_NAMESPACE # namespace:cluster/g" \
    -e "s/\(.*serviceaccount\):.*:\(.*\) # serviceaccount:namespace:operator/\1:$ROOK_OPERATOR_NAMESPACE:\2 # serviceaccount:namespace:operator/g" \
    -e "s/\(.*serviceaccount\):.*:\(.*\) # serviceaccount:namespace:cluster/\1:$ROOK_CLUSTER_NAMESPACE:\2 # serviceaccount:namespace:cluster/g" \
    -e "s/\(.*\): [-_A-Za-z0-9]*\.\(.*\) # driver:namespace:operator/\1: $ROOK_OPERATOR_NAMESPACE.\2 # driver:namespace:operator/g" \
    -e "s/\(.*\): [-_A-Za-z0-9]*\.\(.*\) # driver:namespace:cluster/\1: $ROOK_CLUSTER_NAMESPACE.\2 # driver:namespace:cluster/g" \
  common.yaml operator.yaml cluster.yaml # add other files or change these as desired for your config

# You need to use `apply` for all Ceph clusters after the first if you have only one Operator
kubectl apply -f common.yaml -f operator.yaml -f cluster.yaml # add other files as desired for yourconfig

采集容器日志

参考官网地址

[root@m1 rbd]# cat /tmp/log.sh
#!/bin/bash

log_path="/var/log/rook"
DATE=`date +%F`

mkdir -pv log_path

for p in $(kubectl -n rook-ceph get pods -o jsonpath='{.items[*].metadata.name}')
do
    for c in $(kubectl -n rook-ceph get pod ${p} -o jsonpath='{.spec.containers[*].name}')
    do
        echo "BEGIN logs from pod: ${p} ${c}"
        kubectl -n rook-ceph logs -c ${c} ${p} | tee ${log_path}/${c}_${p}_${DATE}.logs
        echo "END logs from pod: ${p} ${c}"
    done
done
EOF
[root@m1 rbd]# ls /var/log/rook/ -lh
total 11M
-rw-r--r-- 1 root root   64 Dec  2 16:35 ceph-crash_rook-ceph-crashcollector-192.168.100.133-778bbd9bc5-slv77_2022-12-02.logs
-rw-r--r-- 1 root root  329 Dec  2 16:35 ceph-crash_rook-ceph-crashcollector-192.168.100.134-55bffbcd86-8db2m_2022-12-02.logs
-rw-r--r-- 1 root root   64 Dec  2 16:35 ceph-crash_rook-ceph-crashcollector-192.168.100.135-568bff4f85-ftdvx_2022-12-02.logs
-rw-r--r-- 1 root root   64 Dec  2 16:35 ceph-crash_rook-ceph-crashcollector-192.168.100.136-55fdc6f5bd-kqtgh_2022-12-02.logs
-rw-r--r-- 1 root root   64 Dec  2 16:35 ceph-crash_rook-ceph-crashcollector-192.168.100.137-9c7cb5f7-svz9z_2022-12-02.logs
-rw-r--r-- 1 root root  517 Dec  2 16:35 csi-attacher_csi-cephfsplugin-provisioner-8658f67749-jxshb_2022-12-02.logs
-rw-r--r-- 1 root root  517 Dec  2 16:34 csi-attacher_csi-cephfsplugin-provisioner-8658f67749-jxshb_.logs
-rw-r--r-- 1 root root  734 Dec  2 16:35 csi-attacher_csi-cephfsplugin-provisioner-8658f67749-whmrx_2022-12-02.logs
-rw-r--r-- 1 root root 1.0K Dec  2 16:35 csi-attacher_csi-rbdplugin-provisioner-94f699d86-bh4fv_2022-12-02.logs
-rw-r--r-- 1 root root  728 Dec  2 16:35 csi-attacher_csi-rbdplugin-provisioner-94f699d86-p6vpm_2022-12-02.logs
......

采集 OSD 信息

Ceph 里面包含有很多的配置信息,这些配置信息可以通过 socket 的方式去获取到,如 ceph --admin-daemon /var/run/ceph/mon-node-1.socket config show 即可获取到 m1 这个 mon 的配置信息,同时可以通过 config set,config get 的方式临时配置和获取到配置参数信息, rook 将节点容器化之后我们需要进入到相关的容器中去获取信息,如下

[root@m1 rbd]# kubectl -n rook-ceph exec -it rook-ceph-mon-a-6cfc46ccd8-xrmzn -- bash
[root@rook-ceph-mon-a-6cfc46ccd8-xrmzn /]# ceph --admin-daemon /var/run/ceph/ceph-mon.a.asok config show | head
{
    "name": "mon.a",
    "cluster": "ceph",
    "admin_socket": "/var/run/ceph/ceph-mon.a.asok",
    "admin_socket_mode": "",
    "allow_ansi": "Terminal",
    "auth_client_required": "cephx, none",
    "auth_cluster_required": "cephx",
    "auth_debug": "false",
    "auth_mon_ticket_ttl": "43200.000000",

同样,OSD 作为一个容器的形式运行在集群中,可以进入到容器中去查看相关的信息

[root@m1 rbd]# kubectl -n rook-ceph exec -it rook-ceph-osd-1-5866f9f558-jq994 -- bash
[root@rook-ceph-osd-1-5866f9f558-jq994 /]# ls -l /var/lib/ceph/osd/ceph-1/
total 28
lrwxrwxrwx 1 ceph ceph 93 Dec  1 05:29 block -> /dev/ceph-4a84daae-a3c8-40a3-822b-100c8e47d8d2/osd-block-da31a738-a678-4aa1-b238-5ce69ba5aaa0
-rw------- 1 ceph ceph 37 Dec  1 05:29 ceph_fsid
-rw------- 1 ceph ceph 37 Dec  1 05:29 fsid
-rw------- 1 ceph ceph 55 Dec  1 05:29 keyring
-rw------- 1 ceph ceph  6 Dec  1 05:29 ready
-rw------- 1 ceph ceph  3 Dec  1 05:29 require_osd_release
-rw------- 1 ceph ceph 10 Dec  1 05:29 type
-rw------- 1 ceph ceph  2 Dec  1 05:29 whoami
[root@rook-ceph-osd-1-5866f9f558-jq994 /]# lsblk -f
NAME                                                                              FSTYPE      LABEL           UUID                                   MOUNTPOINT
sda                                                                                                                                                  
|-sda1                                                                            xfs                         fef3e1da-62e1-41eb-b1cb-19021cce0cf5   
`-sda2                                                                            LVM2_member                 wzFi9m-QD1s-G4Yi-SBN4-7ud9-8wYk-NGx1dS 
  |-centos-root                                                                   xfs                         9886a31d-694b-4957-ad05-247edc04dd88   /var/lib/ceph/osd/ceph-1
  `-centos-swap                                                                   swap                        239e607c-1a07-44d5-ae89-1b415e160ff8   
sdb                                                                               LVM2_member                 3EQj3n-Gkb8-aDVN-iTJj-JrpZ-mhjU-Yyph3G 
`-ceph--4a84daae--a3c8--40a3--822b--100c8e47d8d2-osd--block--da31a738--a678--4aa1--b238--5ce69ba5aaa0
                                                                                                                                                     
sr0                                                                               iso9660     CentOS 7 x86_64 2020-11-03-14-55-29-00    

官方提供查看 OSD 信息的脚本

参考官网地址

# Get OSD Pods
# This uses the example/default cluster name "rook"
OSD_PODS=$(kubectl get pods --all-namespaces -l \
  app=rook-ceph-osd,rook_cluster=rook-ceph -o jsonpath='{.items[*].metadata.name}')

# Find node and drive associations from OSD pods
for pod in $(echo ${OSD_PODS})
do
 echo "Pod:  ${pod}"
 echo "Node: $(kubectl -n rook-ceph get pod ${pod} -o jsonpath='{.spec.nodeName}')"
 kubectl -n rook-ceph exec ${pod} -- sh -c '\
  for i in /var/lib/ceph/osd/ceph-*; do
    [ -f ${i}/ready ] || continue
    echo -ne "-$(basename ${i}) "
    echo $(lsblk -n -o NAME,SIZE ${i}/block 2> /dev/null || \
    findmnt -n -v -o SOURCE,SIZE -T ${i}) $(cat ${i}/type)
  done | sort -V
  echo'
done

配置 pool 参数

Ceph 提供了调整 pool 的参数,即ceph osd pool set size|pg_num|pgp_num等参数,如调整副本数量

[root@m1 rbd]# ceph osd pool get testpool1 size
size: 3
[root@m1 rbd]# ceph osd pool set testpool1 size 2
set pool 13 size to 2
[root@m1 rbd]# ceph osd pool get testpool1 size
size: 2

除了副本数量之外, pool 还有一个很重要的参数是 PG 数量, PGplace group 的简写,和 PGP 一起代表数据的分布情况,通过 crush 算法将 PG 分布到同步的 OSD 上, OSD 上的 PG 如果分布过少的话可能会导致数据的丢失,因此需要设定一个相对合理的数值,默认已经开启了 自动调整pg 的功能,如下

[root@m1 rbd]# ceph osd pool autoscale-status
POOL                           SIZE  TARGET SIZE  RATE  RAW CAPACITY   RATIO  TARGET RATIO  EFFECTIVE RATIO  BIAS  PG_NUM  NEW PG_NUM  AUTOSCALE  
device_health_metrics            0                 3.0        249.9G  0.0000                                  1.0       1              on         
replicapool                   1454M                3.0        249.9G  0.0171                                  1.0      32              on         
myfs-metadata                 3252k                3.0        249.9G  0.0000                                  4.0      32              on         
myfs-data0                   108.4M                3.0        249.9G  0.0013                                  1.0      32              on         
my-store.rgw.control             0                 3.0        249.9G  0.0000                                  1.0       8              on         
my-store.rgw.meta             7525                 3.0        249.9G  0.0000                                  1.0       8              on         
my-store.rgw.log             222.5k                3.0        249.9G  0.0000                                  1.0       8              on         
my-store.rgw.buckets.index   830.3k                3.0        249.9G  0.0000                                  1.0       8              on         
my-store.rgw.buckets.non-ec   1352                 3.0        249.9G  0.0000                                  1.0       8              on         
.rgw.root                     3927                 3.0        249.9G  0.0000                                  1.0       8              on         
my-store.rgw.buckets.data     3314                 3.0        249.9G  0.0000                                  1.0      32              on         
evescn_test                  900.9k                3.0        249.9G  0.0000                                  1.0      32              on         
testpool1                        0                 2.0        249.9G  0.0000                                  1.0      16              on         
testpool2                        0                 2.0        249.9G  0.0000                                  1.0      16              on  

Ceph 会根据数据的情况,自动调整的 PG 的的大小,一般而言生产中需要手动设定 PG 的大小,因此需要将其关闭

[root@m1 rbd]# ceph osd pool get testpool1 pg_autoscale_mode
pg_autoscale_mode: on

[root@m1 rbd]# ceph osd pool set testpool1 pg_autoscale_mode off
set pool 13 pg_autoscale_mode to off

[root@m1 rbd]# ceph osd pool get testpool1 pg_autoscale_mode
pg_autoscale_mode: off

关闭之后,则需要手动设定 PGPGP 的数量

[root@m1 rbd]# ceph osd pool set testpool1 pg_num 32
set pool 13 pg_num to 32

[root@m1 rbd]# ceph osd pool set testpool1 pgp_num 32
set pool 13 pgp_num to 32

[root@m1 rbd]# ceph osd pool autoscale-status
POOL                           SIZE  TARGET SIZE  RATE  RAW CAPACITY   RATIO  TARGET RATIO  EFFECTIVE RATIO  BIAS  PG_NUM  NEW PG_NUM  AUTOSCALE  
device_health_metrics            0                 3.0        249.9G  0.0000                                  1.0       1              on         
replicapool                   1454M                3.0        249.9G  0.0171                                  1.0      32              on         
myfs-metadata                 3252k                3.0        249.9G  0.0000                                  4.0      32              on         
myfs-data0                   108.4M                3.0        249.9G  0.0013                                  1.0      32              on         
my-store.rgw.control             0                 3.0        249.9G  0.0000                                  1.0       8              on         
my-store.rgw.meta             7525                 3.0        249.9G  0.0000                                  1.0       8              on         
my-store.rgw.log             222.5k                3.0        249.9G  0.0000                                  1.0       8              on         
my-store.rgw.buckets.index   830.3k                3.0        249.9G  0.0000                                  1.0       8              on         
my-store.rgw.buckets.non-ec   1352                 3.0        249.9G  0.0000                                  1.0       8              on         
.rgw.root                     3927                 3.0        249.9G  0.0000                                  1.0       8              on         
my-store.rgw.buckets.data     3314                 3.0        249.9G  0.0000                                  1.0      32              on         
evescn_test                  900.9k                3.0        249.9G  0.0000                                  1.0      32              on         
testpool1                        0                 2.0        249.9G  0.0000                                  1.0      32              off        
testpool2                        0                 2.0        249.9G  0.0000                                  1.0      32              on   

调整 mon 参数

调整Ceph的参数一般有两种方式:

  • 临时调整,通过 config set 的方式做调整,可以在线调整 Ceph 的参数,这种调整是临时有效
  • 永久生效,调整 ceph.conf 置文件,调整后需要重启各个组件进程

先来看一个例子,创建 pool 的时候都会分配 pg_numpgp_num 数量,这个值默认是 32

[root@m1 rbd]# kubectl -n rook-ceph exec -it rook-ceph-mon-a-6cfc46ccd8-xrmzn -- bash

[root@rook-ceph-mon-a-6cfc46ccd8-xrmzn /]# ceph --admin-daemon /var/run/ceph/ceph-mon.a.asok config show | grep pg_num
    "mgr_debug_aggressive_pg_num_changes": "false",
    "mon_max_pool_pg_num": "65536",
    "mon_warn_on_pool_pg_num_not_power_of_two": "true",
    "osd_pool_default_pg_num": "32",    # 每个 pool 默认的 pg_num 为 32
    "rgw_rados_pool_pg_num_min": "8",

此时创建一个 pool 其默认的 pg_numpgp_num32 ,可以通过如下的例子来验证

[root@m1 rbd]# ceph osd pool create pool1
pool 'pool1' created
[root@m1 rbd]# ceph osd pool get pool1 pg_num
pg_num: 32
[root@m1 rbd]# ceph osd pool get pool1 pgp_num
pgp_num: 32

调整参数,将其值从 32 修改为 16

[root@rook-ceph-mon-a-6cfc46ccd8-xrmzn /]# ceph --admin-daemon /var/run/ceph/ceph-mon.a.asok config set osd_pool_default_pg_num 16
{
    "success": "osd_pool_default_pg_num = '16' (not observed, change may require restart) "
}

相同的方法,修改另外两个 mon 的参数,修改完毕后测试如下

[root@m1 rbd]# ceph osd pool create pool2
pool 'pool1' created
[root@m1 rbd]# ceph osd pool get pool2 pg_num
pg_num: 16
[root@m1 rbd]# ceph osd pool get pool2 pgp_num
pgp_num: 16

防止池误删除

默认可以删除 pool ,其通过 mon_allow_pool_delete 参数的开关进行控制,默认是 true ,因此可以删除 pool

[root@rook-ceph-mon-a-6cfc46ccd8-xrmzn /]# ceph --admin-daemon /var/run/ceph/ceph-mon.a.asok config show | grep delete
    "mon_allow_pool_delete": "true",

因此可以将 pool 删除

[root@m1 rbd]# ceph osd pool rm pool1 pool1 --yes-i-really-really-mean-it
pool 'pool1' removed

为了防止误删除,将开关关闭,3mon 服务都需要执行

[root@rook-ceph-mon-a-6cfc46ccd8-xrmzn /]# ceph --admin-daemon /var/run/ceph/ceph-mon.a.asok config set mon_allow_pool_delete false
{
    "success": "mon_allow_pool_delete = 'false' "
}

关闭之后再次删除时,会提示 deletion 禁用而导致无法删除

[root@m1 rbd]# ceph osd pool rm pool2 pool2 --yes-i-really-really-mean-it
Error EPERM: pool deletion is disabled; you must first set the mon_allow_pool_delete config option to true before you can destroy a pool

制定 Ceph 配置

config set 的方式是临时有效的,如果需要使配置永久生效需要修改 ceph.conf 配置文件,使配置能够永久生效,在 rook 中需要通过修改 rook-config-override 这个 configmap 实现配置的管理,如:

[root@m1 rbd]# cat overide.yaml
apiVersion: v1
kind: ConfigMap
metadata:
  name: rook-config-override
  namespace: rook-ceph
data:
  config: |
    [global]
    osd crush update on start = false
    osd pool default size = 2
    mon_allow_pool_delete = true
    osd_pool_default_pg_num = 64

将其 apply 到集群中

[root@m1 rbd]# kubectl apply -f overide.yaml
Warning: resource configmaps/rook-config-override is missing the kubectl.kubernetes.io/last-applied-configuration annotation which is required by kubectl apply. kubectl apply should only be used on resources created declaratively by either kubectl create --save-config or kubectl apply. The missing annotation will be patched automatically.
configmap/rook-config-override configured

查看配置文件

[root@m1 rbd]# kubectl get configmaps -n rook-ceph rook-config-override -o yaml
apiVersion: v1
data:
  config: |
    [global]
    osd crush update on start = false
    osd pool default size = 2
    mon_allow_pool_delete = true
    osd_pool_default_pg_num = 64
kind: ConfigMap
......

重启组件技巧

修改 rook-config-override 之后,容器中的 configmap 会自动去读区到 configmap 配置文件的内容,如下

[root@rook-ceph-mon-a-6cfc46ccd8-xrmzn /]# cat /etc/ceph/ceph.conf 
[global]
osd crush update on start = false
osd pool default size = 2
mon_allow_pool_delete = true
osd_pool_default_pg_num = 64

然而读取之后其配置并未生效,如果需要使配置生效,需要将对应的组件做重启,包括 MON,MGR,RGW,MDS,OSD 等,重启的时候需要注意单次不要重启一个 pods 进程,确保 pods 启动完毕之后,结合 ceph -s 观察状态,待 ceph 状态正常之后再重启其他的进程,如下重启 mon 的进程

[root@m1 rbd]# kubectl -n rook-ceph delete pods rook-ceph-mon-a-6cfc46ccd8-xrmzn 

观察 Ceph 集群的状态,待 Ceph 状态正常之后再重启另外的 monitor 进程

[root@m1 rbd]# ceph -s
  cluster:
    id:     17a413b5-f140-441a-8b35-feec8ae29521
    health: HEALTH_WARN
            2 daemons have recently crashed
 
  services:
    mon: 3 daemons, quorum b,d,e (age 3s)
    mgr: a(active, since 61m)
    mds: myfs:2 {0=myfs-d=up:active,1=myfs-b=up:active} 2 up:standby-replay
    osd: 5 osds: 5 up (since 4m), 5 in (since 26h)
    rgw: 2 daemons active (my.store.a, my.store.b)
 
  task status:
 
  data:
    pools:   15 pools, 289 pgs
    objects: 910 objects, 1.5 GiB
    usage:   10 GiB used, 240 GiB / 250 GiB avail
    pgs:     289 active+clean
 
  io:
    client:   2.1 KiB/s rd, 4 op/s rd, 0 op/s wr

此外,重启 OSD 需要特别注意,其涉及到数据的迁移,因此需要确保重启过程中 ceph 的状态为 active+clean 才能继续下一步骤的重启,避免因为重启而导致数据大规模移动影响正常的业务,如下是官网提供的重启组件建议

  • mons: ensure all three mons are online and healthy before restarting each mon pod, one at a time.
  • mgrs: the pods are stateless and can be restarted as needed, but note that this will disrupt the Ceph dashboard during restart.
  • OSDs: restart your the pods by deleting them, one at a time, and running ceph -s between each restart to ensure the cluster goes back to “active/clean” state.
  • RGW: the pods are stateless and can be restarted as needed.
  • MDS: the pods are stateless and can be restarted as needed.

重启完毕 mon,mgr,rgw,mds,osd 组件之后,查看配置文件,可以看到已经生效

[root@m1 rbd]# kubectl -n rook-ceph exec -it rook-ceph-mon-b-7486b4b679-hbsng -- bash

[root@rook-ceph-mon-b-7486b4b679-hbsng /]# ceph --admin-daemon /var/run/ceph/ceph-mon.b.asok config show | grep  osd_pool_default_pg_num
    "osd_pool_default_pg_num": "64",

[root@rook-ceph-mon-b-7486b4b679-hbsng /]# ceph --admin-daemon /var/run/ceph/ceph-mon.b.asok config show | grep "osd_pool_default_size"
    "osd_pool_default_size": "2",

Ceph 调优实践

apiVersion: v1
kind: ConfigMap
metadata:
  name: rook-config-override
  namespace: rook-ceph
data:
  config: |
    [global]
    osd crush update on start = false
    osd pool default size = 2
    mon_allow_pool_delete = true
    osd_pool_default_pg_num = 32
    mon_max_pg_per_osd = 250            # 每个osd上最多PG数量,超过则告警 
    mon_osd_full_ratio = 0.95           # osd利用率达到95%时数据无法写入 
    mon_osd_nearfull_ratio = 0.85       # 接近写满时告警
    
    [osd]
    osd_recovery_op_priority = 1        # osd数据恢复时优先级,默认为3 
    osd_recovery_max_active = 1         # osd同时恢复时pg的数量,默认是0 
    osd_max_backfills = 1               # backfills数据填充的数量 
    osd_recovery_max_chunk = 1048576    # 恢复时数据块大小,默认8388608 
    osd_scrub_begin_hour = 1            # scrub一致性校验开始的时间,默认为0 
    osd_scrub_end_hour = 6              # scrub一致性校验结束的时间,默认为24

调整 CRUSH 结构

crushmapCeph 决定数据分布的方式,一把采用默认的 crushmap 即可,有些场景需要做调整,如:

  • 数据分布:如SSD+HDD融合环境,需要将SSD资源池和HDD资源池分开,给两种不同的业务混合使用
  • 权重分配:OSD默认会根据容量分配对应的weight,但数据不是绝对的平均,容量不平均的时候可以调整
  • OSD亲和力:调整OSD数据主写的亲和力机制

数据的分布,如混合场景,调整相对复杂,可以参考此博客

img

如某个 OSD 利用率过高,达到 85% 的时候会提示 nearfull ,这个时候需要扩容 OSD 到集群中,如果其他的 OSD 利用率不高,则可以根据需要调整 OSD 的权重,触发数据的重新分布,如下:

[root@m1 rbd]# ceph osd df
ID  CLASS  WEIGHT   REWEIGHT  SIZE     RAW USE  DATA      OMAP     META      AVAIL    %USE  VAR   PGS  STATUS
 0    hdd  0.04880   1.00000   50 GiB  1.9 GiB   937 MiB  600 KiB  1023 MiB   48 GiB  3.83  0.96  172      up
 1    hdd  0.04880   1.00000   50 GiB  1.9 GiB   967 MiB  732 KiB  1023 MiB   48 GiB  3.89  0.98  178      up
 2    hdd  0.04880   1.00000   50 GiB  2.2 GiB   1.2 GiB  369 KiB  1024 MiB   48 GiB  4.48  1.12  160      up
 3    hdd  0.04880   1.00000   50 GiB  1.9 GiB   909 MiB  2.2 MiB  1022 MiB   48 GiB  3.78  0.95  185      up
 4    hdd  0.04880   1.00000   50 GiB  2.0 GiB  1001 MiB  2.1 MiB  1022 MiB   48 GiB  3.96  0.99  172      up
                       TOTAL  250 GiB   10 GiB   5.0 GiB  5.9 MiB   5.0 GiB  240 GiB  3.99                   
MIN/MAX VAR: 0.95/1.12  STDDEV: 0.25

[root@m1 rbd]# ceph osd crush reweight osd.3 0.8
reweighted item id 3 name 'osd.3' to 0.8 in crush map

[root@m1 rbd]# ceph osd df
ID  CLASS  WEIGHT   REWEIGHT  SIZE     RAW USE  DATA      OMAP     META      AVAIL    %USE  VAR   PGS  STATUS
 0    hdd  0.04880   1.00000   50 GiB  1.9 GiB   937 MiB  600 KiB  1023 MiB   48 GiB  3.83  0.96  173      up
 1    hdd  0.04880   1.00000   50 GiB  1.9 GiB   967 MiB  732 KiB  1023 MiB   48 GiB  3.89  0.98  167      up
 2    hdd  0.04880   1.00000   50 GiB  2.2 GiB   1.2 GiB  369 KiB  1024 MiB   48 GiB  4.48  1.12  170      up
 3    hdd  0.79999   1.00000   50 GiB  1.9 GiB   909 MiB  2.2 MiB  1022 MiB   48 GiB  3.78  0.95  206      up
 4    hdd  0.04880   1.00000   50 GiB  2.0 GiB  1001 MiB  2.1 MiB  1022 MiB   48 GiB  3.96  0.99  151      up
                       TOTAL  250 GiB   10 GiB   5.0 GiB  5.9 MiB   5.0 GiB  240 GiB  3.99     

调整之后,会自动的做数据的 rebalance

[root@m1 rbd]# ceph -s
  cluster:
    id:     17a413b5-f140-441a-8b35-feec8ae29521
    health: HEALTH_WARN
            Degraded data redundancy: 815/2730 objects degraded (29.853%), 72 pgs degraded
            2 daemons have recently crashed
 
  services:
    mon: 3 daemons, quorum b,d,e (age 8m)
    mgr: a(active, since 69m)
    mds: myfs:2 {0=myfs-d=up:active,1=myfs-b=up:active} 2 up:standby-replay
    osd: 5 osds: 5 up (since 12m), 5 in (since 26h); 57 remapped pgs
    rgw: 2 daemons active (my.store.a, my.store.b)
 
  task status:
 
  data:
    pools:   15 pools, 289 pgs
    objects: 910 objects, 1.5 GiB
    usage:   10 GiB used, 240 GiB / 250 GiB avail
    pgs:     815/2730 objects degraded (29.853%)
             465/2730 objects misplaced (17.033%)
             190 active+clean
             41  active+recovery_wait+degraded
             31  active+recovery_wait+undersized+degraded+remapped
             24  active+remapped+backfill_wait
             2   active+recovery_wait+remapped
             1   active+recovering
 
  io:
    recovery: 0 B/s, 2 keys/s, 0 objects/s

定制 OSD 网络

Ceph 提供了两个不同的网络,用于不同的功能:

  • public network :业务网络,用于连接 Ceph 集群建立数据通道
  • cluster network :数据网络,用于 Ceph 内部的心跳,数据同步
apiVersion: v1
data:
  config: |
    [global]
    # override配置文件中进行设置
    public network =  10.0.7.0/24
    cluster network = 10.0.10.0/24
    public addr = ""
    cluster addr = ""

默认这两个网络集成在一起,如果有两张不同的网卡,可以将其进行分开,首先需要将网络设置为 hostNetworkhostNetwork 意味着容器网络和宿主机网络位于同一个网络类型,这个调整只能在 rook 初始化集群的时候做调整,配置位于 cluster.yaml 文件

[root@m1 rbd]# cd ../../
[root@m1 ceph]# vim cluster.yaml
 71   network:
 72     # enable host networking
 73     #provider: host

调整故障域

Ceph 支持设置资源池的故障域,何为故障域?故障域是指当出现异常时能容忍的范围, Ceph 支持多种不同类型的故障域,常⻅的故障域有:

  • datacenter :数据中心级别,如三个副本,分别落在三个不同的数据中心
  • rack :机架级别,如三个副本,分别落在三个不同的数据机柜
  • host :宿主机级别,如三个副本,分别落在三个不同的宿主机,默认规则
  • osd :磁盘级别,如三个副本,分别落在三个不同的磁盘上

创建 pool 的时候可以定义 pool 所使用的故障域,如下创建一个 pool 所使用的故障域为 osd

#修改配置
[root@m1 ceph]# grep -v "[.*#]" pool.yaml 

apiVersion: ceph.rook.io/v1
kind: CephBlockPool
metadata:
  name: test-domain
  namespace: rook-ceph
spec:
  failureDomain: osd #故障域 
  replicated:
    size: 3
    requireSafeReplicaSize: true

创建之后,可以通过如下命令进行校验

[root@m1 ceph]# kubectl apply -f pool.yaml 
cephblockpool.ceph.rook.io/test-domain created

# 查看底层 pool 信息
[root@m1 ceph]# ceph osd lspools
1 device_health_metrics
2 replicapool
3 myfs-metadata
4 myfs-data0
5 my-store.rgw.control
6 my-store.rgw.meta
7 my-store.rgw.log
8 my-store.rgw.buckets.index
9 my-store.rgw.buckets.non-ec
10 .rgw.root
11 my-store.rgw.buckets.data
12 evescn_test
13 testpool1
14 testpool2
16 pool2
17 test-domain

# 查看 crush_rule 规则
[root@m1 ceph]# ceph osd pool get test-domain  crush_rule
crush_rule: test-domain

# 查看故障域类型
[root@m1 ceph]# ceph osd crush rule dump test-domain
{
    "rule_id": 12,
    "rule_name": "test-domain",
    "ruleset": 12,
    "type": 1,
    "min_size": 1,
    "max_size": 10,
    "steps": [
        {
            "op": "take",
            "item": -1,
            "item_name": "default"
        },
        {
            "op": "choose_firstn",
            "num": 0,
            "type": "osd"     # 故障域类型
        },
        {
            "op": "emit"
        }
    ]
}

对于 osd 的故障域来说,其将数据分布在三个不同的磁盘上,不管这三个磁盘是否落在同个宿主机上,因此存在有数据丢失的⻛险,基于各种因素需要调整故障域时,可以通过如下的方法进行调整,首先需要创建一个故障域关联的规则

[root@m1 ceph]# ceph osd crush rule create-replicated happylau-rule default host

[root@m1 ceph]# ceph osd crush rule dump happylau-rule
{
    "rule_id": 13,
    "rule_name": "happylau-rule",
    "ruleset": 13,
    "type": 1,
    "min_size": 1,
    "max_size": 10,
    "steps": [
        {
            "op": "take",
            "item": -1,
            "item_name": "default"
        },
        {
            "op": "chooseleaf_firstn",
            "num": 0,
            "type": "host"    # 创建 happylau-rule 的规则,故障域为 host
        },
        {
            "op": "emit"
        }
    ]
}

创建 rule 规则之后,将其应用在对应的 pool

[root@m1 ceph]# ceph osd pool set test-domain crush_rule happylau-rule
set pool 17 crush_rule to happylau-rule
[root@m1 ceph]# ceph osd pool get test-domain crush_rule
crush_rule: happylau-rule
posted @ 2022-12-02 17:42  evescn  阅读(799)  评论(0编辑  收藏  举报