xone

  博客园 :: 首页 :: 博问 :: 闪存 :: 新随笔 :: 联系 :: 订阅 订阅 :: 管理 ::

环境准备

准备三台主机,每台机器三个硬盘
master1-admin是管理节点:192.168.1.161
node1-monitor是监控节点:192.168.1.162
node2-osd是对象存储节点:192.168.1.163

修改主机名

# 在master1-admin上操作
hostnamectl set-hostname master1-admin
# 在node1-monitor上操作
hostnamectl set-hostname node1-monitor
# 在node2-osd上操作
hostnamectl set-hostname node2-osd

修改hosts文件

# 在所有主机上
192.168.1.161 master1-admin
192.168.1.162 node1-monitor
192.168.1.163 node2-osd

所有主机

yum -y install wget net-tools nfs-utils lrzsz gcc gcc-c++ make cmake libxml2-devel openssl-devel curl curl-devel unzip sudo ntp libaio-devel ncurses-devel autoconf automake zlib-devel pthon-devel epel-release openssh-server
yum -y install deltarpm

所有主机

systemctl stop firewalld && systemctl disable firewalld

配置master到其他节点的免密登录

echo -e "\n"|ssh-keygen -t rsa -N ""
ssh-copy-id root@192.168.1.167
ssh-copy-id root@192.168.1.168

时间同步

# 在master1-admin上
ntpdate cn.pool.ntp.org
systemctl start ntpd && systemctl enable ntpd

# 在node1-monitor上
ntpdate master1-admin
计划任务:
*/5 * * * * /usr/sbin/ntpdate master1-admin

# 在node2-osd上
ntpdate master1-admin
计划任务:
*/5 * * * * /usr/sbin/ntpdate master1-admin

安装ceph-deploy

# 在master1-admin上操作
[root@test-k8s-master1 ~]# yum install -y yum-utils && yum-config-manager --add-repo https://dl.fedoraproject.org/pub/epel/7/x86_64 && yum install --nogpgcheck -y epel-release && rpm --import /etc/pki/rpm-gpg/RPM-GPG-KEY-EPEL-7 && rm /etc/yum.repos.d/dl.fedoraproject.org*
[root@test-k8s-master1 ~]# cat /etc/yum.repos.d/ceph.repo 
[RPMS]
name=Ceph SRPMS packages
baseurl=https://mirrors.aliyun.com/ceph/rpm-jewel/el7/SRPMS/
enabled=1
gpgcheck=0
type=rpm-md
[Ceph-aarch64]
name=Ceph aarch64 packages
baseurl=https://mirrors.aliyun.com/ceph/rpm-jewel/el7/aarch64/
enabled=1
gpgcheck=0
type=rpm-md
[Ceph-noarch]
name=Ceph noarch packages
baseurl=https://mirrors.aliyun.com/ceph/rpm-jewel/el7/noarch/
enabled=1
gpgcheck=0
type=rpm-md
[Ceph-x86_64]
name=Ceph x86_64 packages
baseurl=https://mirrors.aliyun.com/ceph/rpm-jewel/el7/x86_64/
enabled=1
gpgcheck=0
type=rpm-md
[root@test-k8s-master1 ~]# yum update -y
[root@test-k8s-master1 ~]# yum install ceph-deploy -y
[root@test-k8s-master1 ~]# yum install yum-plugin-priorities -y

搭建集群

# 创建一个目录,用于保存ceph-deploy生成的配置文件信息
[root@test-k8s-master1 ~]# mkdir -p /data/ceph-deploy && cd /data/ceph-deploy
# 创建集群和monitor节点
ceph-deploy new node1-monitor
# 把ceph配置文件里的默认副本数从3改成1(要设置为奇数),这样只有1个OSD也可以达到active + clean状态。把osd pool default size = 1 加入[global]段:
sed -i '$a\osd pool default size = 1' ceph.conf
# 安装ceph集群
ceph-deploy install master1-admin node1-monitor node2-osd
# 报错 [ceph_deploy][ERROR ] RuntimeError: NoSectionError: No section: 'ceph'
# 解决方法:
yum remove ceph-release -y
# 重新安装
ceph-deploy install master1-admin node1-monitor node2-osd
# 配置初始monitor,并收集所有的密钥
ceph-deploy mon create-initial
# /data/ceph-deploy会看到很多密钥
[root@master1-admin ceph-deploy]# ll
total 284
-rw-------  1 root root    113 Oct 25 17:17 ceph.bootstrap-mds.keyring
-rw-------  1 root root     71 Oct 25 17:17 ceph.bootstrap-mgr.keyring
-rw-------  1 root root    113 Oct 25 17:17 ceph.bootstrap-osd.keyring
-rw-------  1 root root    113 Oct 25 17:17 ceph.bootstrap-rgw.keyring
-rw-------  1 root root    129 Oct 25 17:17 ceph.client.admin.keyring
-rw-r--r--  1 root root    230 Oct 25 16:48 ceph.conf
-rw-r--r--. 1 root root 188718 Oct 25 17:17 ceph-deploy-ceph.log
-rw-------. 1 root root     73 Oct 25 15:54 ceph.mon.keyring

给ceph添加osd,并完成激活

# Ceph OSD
# OSD的全称是Object Storage Device,它的主要功能是存储数据、复制数据、平衡数据、恢复数据
# 在osd节点node2-osd上操作
mkdir /var/local/osd1
chmod 777 /var/local/osd1
# 回到master1-admin上操作,准备osd
ceph-deploy osd prepare node2-osd:/var/local/osd1
# 回到master1-admin上操作,激活osd
ceph-deploy osd activate node2-osd:/var/local/osd1
# 把配置文件拷贝到管理节点或ceph节点,在master1-admin上操作
ceph-deploy admin master1-admin node1-monitor node2-osd
# 在master1-admin node1-monitor node2-osd节点上操作
chmod +r /etc/ceph/ceph.client.admin.keyring
# 查看集群状态
ceph health

扩容,添加两个osd

# 通过 ceph-deploy 命令在node3-osd node4-osd节点上安装 ceph 相关的软件包,在master1-admin上操作
ceph-deploy install --no-adjust-repos node3-osd node4-osd
# 在osd节点node3-osd,node4-osd上操作
mkdir /var/local/osd1
chmod 777 /var/local/osd1
# 回到master1-admin上操作,准备osd
ceph-deploy osd prepare node3-osd:/var/local/osd1 node4-osd:/var/local/osd1
# 回到master1-admin上操作,激活osd
ceph-deploy osd activate node3-osd:/var/local/osd1 node4-osd:/var/local/osd1
# 使用 ceph-deploy 命令将配置文件 ceph.conf 和 admin key 复制到 ceph-node04 节点的 "/etc/ceph" 目录下,以便每次执行
# ceph CLI 命令无需指定 monitor 地址和 ceph.client.admin.keyring,在master1-admin上操作
ceph-deploy admin node3-osd node4-osd
# 在node3-osd node4-osd节点上操作
chmod +r /etc/ceph/ceph.client.admin.keyring
# 查看集群状态
ceph health

报错HEALTH_WARN too few PGs per OSD (21 < min 30)

[root@master1-admin ceph-deploy]# ceph health
HEALTH_WARN pool rbd pg_num 128 > pgp_num 64
[root@master1-admin ceph-deploy]# ceph osd pool ls detail
pool 0 'rbd' replicated size 1 min_size 1 crush_ruleset 0 object_hash rjenkins pg_num 64 pgp_num 64 last_change 1 flags hashpspool stripe_width 0

[root@master1-admin ceph-deploy]# rados lspools
rbd
[root@master1-admin ceph-deploy]# ceph osd pool set rbd pg_num 128
set pool 0 pg_num to 128
[root@master1-admin ceph-deploy]# ceph osd pool set rbd pgp_num 128
set pool 0 pgp_num to 128
[root@master1-admin ceph-deploy]# ceph health
HEALTH_OK

删除osd

# 查看sod节点状况
[root@master1-admin ceph-deploy]# ceph osd tree
ID WEIGHT  TYPE NAME          UP/DOWN REWEIGHT PRIMARY-AFFINITY 
-1 0.29128 root default                                         
-2 0.09709     host node2-osd                                   
 0 0.09709         osd.0           up  1.00000          1.00000 
-3 0.09709     host node3-osd                                   
 1 0.09709         osd.1           up  1.00000          1.00000 
-4 0.09709     host node4-osd                                   
 2 0.09709         osd.2           up  1.00000          1.00000
# 停掉osd进程
systemctl stop ceph-osd@1
systemctl stop ceph-osd@2
# 将节点标记成out,down
# 这一步是告诉mon,这个节点已经不能服务了,需要在其他osd上进行数据的恢复
ceph osd out 1
ceph osd out 2
ceph osd down 1
ceph osd down 2
# 从crush移除节点
ceph osd crush remove osd.1
ceph osd crush remove osd.2
# 删除节点
ceph osd rm 1
ceph osd rm 2
# 删除节点认证,这是从认证当中去删除这个节点的信息
ceph auth del 1
ceph auth del 2
posted on 2022-10-21 17:24  周小百  阅读(97)  评论(0编辑  收藏  举报