查看集群状态
[root@storage-ceph01 ~]# ceph -s
cluster:
id: 4d8fec26-e363-4753-b60f-49d69ab44cab
health: HEALTH_WARN
Reduced data availability: 1 pg inactive
services:
mon: 3 daemons, quorum storage-ceph03,storage-ceph02,storage-ceph01 (age 3m)
mgr: storage-ceph01(active, since 3m), standbys: storage-ceph02, storage-ceph03
mds: 3 up:standby
osd: 5 osds: 5 up (since 83s), 5 in (since 85s)
data:
pools: 1 pools, 1 pgs
objects: 0 objects, 0 B
usage: 5.1 GiB used, 340 GiB / 345 GiB avail
pgs: 100.000% pgs unknown
1 unknown
progress:
Rebalancing after osd.3 marked in (98s)
[............................]
[root@storage-ceph01 ~]# ceph health detail
HEALTH_WARN Reduced data availability: 1 pg inactive
[WRN] PG_AVAILABILITY: Reduced data availability: 1 pg inactive
pg 1.0 is stuck inactive for 5m, current state unknown, last acting []
查看卡住的 pgs 的信息
[root@storage-ceph01 ~]# ceph pg dump_stuck inactive
ok
PG_STAT STATE UP UP_PRIMARY ACTING ACTING_PRIMARY
1.0 unknown [] -1 [] -1
查看详细pg状态
[root@storage-ceph01 ~]# ceph pg 1.0 query
Error ENOENT: i don‘t have pgid 1.0
重新创建pg
[root@storage-ceph01 ~]# ceph osd force-create-pg 1.0 --yes-i-really-mean-it
pg 1.0 now creating, ok
验证pg状态
[root@storage-ceph01 ~]# ceph pg 1.0 query | head
{
"snap_trimq": "[]",
"snap_trimq_len": 0,
"state": "active+clean",
"epoch": 100,
"up": [
3,
0,
1
],
检查集群状态
[root@storage-ceph01 ~]# ceph -s
cluster:
id: 4d8fec26-e363-4753-b60f-49d69ab44cab
health: HEALTH_OK
services:
mon: 3 daemons, quorum storage-ceph03,storage-ceph02,storage-ceph01 (age 13m)
mgr: storage-ceph01(active, since 13m), standbys: storage-ceph02, storage-ceph03
mds: 3 up:standby
osd: 5 osds: 5 up (since 11m), 5 in (since 11m)
data:
pools: 1 pools, 1 pgs
objects: 0 objects, 0 B
usage: 5.1 GiB used, 340 GiB / 345 GiB avail
pgs: 1 active+clean