daemons have recently crashed
官方文档
- https://docs.ceph.com/en/latest/rados/operations/health-checks/#recent-crash
- https://docs.ceph.com/en/latest/mgr/crash/?highlight=crash
列出警告
点击查看代码
ceph@ceph-deploy:~/ceph-cluster$ ceph crash ls
ID ENTITY NEW
2021-09-07T19:49:25.439268Z_f11b2d43-6b0e-42cc-92df-912b640c1ff2 mon.ceph-mon-03
2021-09-07T19:49:49.499661Z_1f780239-faaa-4836-98a6-950dd0f8a4a8 mon.ceph-mon-02
2021-09-09T09:51:12.851909Z_e05836d7-a7a9-4e58-8ef8-6628c76edc03 mon.ceph-mon-03
2021-09-09T10:01:33.228488Z_989dfa43-13a0-4980-846e-610448a92294 mon.ceph-mon-02
2021-09-10T20:06:58.913744Z_efa01200-df2e-4611-9291-1ef23c1184c5 mon.ceph-mon-03 *
列出新的警告
点击查看代码
ceph@ceph-deploy:~/ceph-cluster$ ceph crash ls-new
ID ENTITY NEW
2021-09-10T20:06:58.913744Z_efa01200-df2e-4611-9291-1ef23c1184c5 mon.ceph-mon-03 *
查看特定警告信息
点击查看代码
ceph@ceph-deploy:~/ceph-cluster$ ceph crash info 2021-09-10T20:06:58.913744Z_efa01200-df2e-4611-9291-1ef23c1184c5
{
"backtrace": [
"/lib/x86_64-linux-gnu/libpthread.so.0(+0x12980) [0x7fcdbbe18980]",
"gsignal()",
"abort()",
"/usr/lib/x86_64-linux-gnu/libstdc++.so.6(+0x8c957) [0x7fcdbb905957]",
"/usr/lib/x86_64-linux-gnu/libstdc++.so.6(+0x92ae6) [0x7fcdbb90bae6]",
"/usr/lib/x86_64-linux-gnu/libstdc++.so.6(+0x92b21) [0x7fcdbb90bb21]",
"/usr/lib/x86_64-linux-gnu/libstdc++.so.6(+0x92d54) [0x7fcdbb90bd54]",
"/usr/lib/x86_64-linux-gnu/libtcmalloc.so.4(+0x1424b) [0x7fcdbc03924b]",
"tc_new()",
"(rocksdb::Arena::AllocateNewBlock(unsigned long)+0x4e) [0x562c2e8e2f2e]",
"(rocksdb::Arena::AllocateFallback(unsigned long, bool)+0xcb) [0x562c2e8e2b1b]",
"(rocksdb::Arena::AllocateAligned(unsigned long, unsigned long, rocksdb::Logger*)+0x1c2) [0x562c2e8e2ea6]",
"(rocksdb::ConcurrentArena::AllocateAligned(unsigned long, unsigned long, rocksdb::Logger*)::{lambda()#1}::operator()() const+0x37) [0x562c2e806595]",
"(char* rocksdb::ConcurrentArena::AllocateImpl<rocksdb::ConcurrentArena::AllocateAligned(unsigned long, unsigned long, rocksdb::Logger*)::{lambda()#1}>(unsigned long, bool, rocksdb::ConcurrentArena::AllocateAligned(unsigned long, unsigned long, rocksdb::Logger*)::{lambda()#1} const&)+0x14c) [0x562c2e808806]",
"(rocksdb::ConcurrentArena::AllocateAligned(unsigned long, unsigned long, rocksdb::Logger*)+0xbd) [0x562c2e806655]",
"(rocksdb::InlineSkipList<rocksdb::MemTableRep::KeyComparator const&>::AllocateNode(unsigned long, int)+0x58) [0x562c2e8e6ffe]",
"(rocksdb::InlineSkipList<rocksdb::MemTableRep::KeyComparator const&>::AllocateKey(unsigned long)+0x31) [0x562c2e8e68c5]",
"/usr/bin/ceph-mon(+0xc6170d) [0x562c2e8e570d]",
"(rocksdb::MemTable::Add(unsigned long, rocksdb::ValueType, rocksdb::Slice const&, rocksdb::Slice const&, bool, rocksdb::MemTablePostProcessInfo*, void**)+0x131) [0x562c2e801faf]",
"(rocksdb::MemTableInserter::PutCFImpl(unsigned int, rocksdb::Slice const&, rocksdb::Slice const&, rocksdb::ValueType)+0x355) [0x562c2e8acdd1]",
"(rocksdb::MemTableInserter::PutCF(unsigned int, rocksdb::Slice const&, rocksdb::Slice const&)+0x4e) [0x562c2e8ad566]",
"(rocksdb::WriteBatchInternal::Iterate(rocksdb::WriteBatch const*, rocksdb::WriteBatch::Handler*, unsigned long, unsigned long)+0x515) [0x562c2e8a665b]",
"(rocksdb::WriteBatch::Iterate(rocksdb::WriteBatch::Handler*) const+0xa5) [0x562c2e8a612b]",
"(rocksdb::WriteBatchInternal::InsertInto(rocksdb::WriteThread::WriteGroup&, unsigned long, rocksdb::ColumnFamilyMemTables*, rocksdb::FlushScheduler*, rocksdb::TrimHistoryScheduler*, bool, unsigned long, rocksdb::DB*, bool, bool, bool)+0x22d) [0x562c2e8a9c29]",
"(rocksdb::DBImpl::WriteImpl(rocksdb::WriteOptions const&, rocksdb::WriteBatch*, rocksdb::WriteCallback*, unsigned long*, unsigned long, bool, unsigned long*, unsigned long, rocksdb::PreReleaseCallback*)+0x1886) [0x562c2e7428f6]",
"(rocksdb::DBImpl::Write(rocksdb::WriteOptions const&, rocksdb::WriteBatch*)+0x59) [0x562c2e740fd5]",
"(RocksDBStore::submit_common(rocksdb::WriteOptions&, std::shared_ptr<KeyValueDB::TransactionImpl>)+0x7c) [0x562c2e69b20c]",
"(RocksDBStore::submit_transaction_sync(std::shared_ptr<KeyValueDB::TransactionImpl>)+0xa1) [0x562c2e69bcc1]",
"(MonitorDBStore::apply_transaction(std::shared_ptr<MonitorDBStore::Transaction>)+0x70f) [0x562c2e3e7def]",
"(Paxos::store_state(MMonPaxos*)+0x4c5) [0x562c2e5322f5]",
"(Paxos::handle_commit(boost::intrusive_ptr<MonOpRequest>)+0x14b) [0x562c2e5366eb]",
"(Paxos::dispatch(boost::intrusive_ptr<MonOpRequest>)+0x433) [0x562c2e53e813]",
"(Monitor::dispatch_op(boost::intrusive_ptr<MonOpRequest>)+0x1362) [0x562c2e430ec2]",
"(Monitor::_ms_dispatch(Message*)+0x50a) [0x562c2e431aba]",
"(Dispatcher::ms_dispatch2(boost::intrusive_ptr<Message> const&)+0x58) [0x562c2e460068]",
"(DispatchQueue::entry()+0x11ea) [0x7fcdbd19fd2a]",
"(DispatchQueue::DispatchThread::entry()+0xd) [0x7fcdbd24ceed]",
"/lib/x86_64-linux-gnu/libpthread.so.0(+0x76db) [0x7fcdbbe0d6db]",
"clone()"
],
"ceph_version": "16.2.5",
"crash_id": "2021-09-10T20:06:58.913744Z_efa01200-df2e-4611-9291-1ef23c1184c5",
"entity_name": "mon.ceph-mon-03",
"os_id": "ubuntu",
"os_name": "Ubuntu",
"os_version": "18.04.5 LTS (Bionic Beaver)",
"os_version_id": "18.04",
"process_name": "ceph-mon",
"stack_sig": "0e8ae1a4c4f0b6025d69ef669a7de2953728fb20c318e860f8ada6de0c856798",
"timestamp": "2021-09-10T20:06:58.913744Z",
"utsname_hostname": "ceph-mon-03",
"utsname_machine": "x86_64",
"utsname_release": "4.15.0-156-generic",
"utsname_sysname": "Linux",
"utsname_version": "#163-Ubuntu SMP Thu Aug 19 23:31:58 UTC 2021"
}
消除警告
点击查看代码
ceph@ceph-deploy:~/ceph-cluster$ ceph crash archive 2021-09-10T20:06:58.913744Z_efa01200-df2e-4611-9291-1ef23c1184c5
消除所有警告
点击查看代码
ceph@ceph-deploy:~/ceph-cluster$ ceph crash archive-all
osds down
官方文档
- https://docs.ceph.com/en/latest/rados/operations/health-checks/#osd-down
确认OSD
点击查看代码
ceph@ceph-deploy:/etc/ceph$ ceph osd tree
ID CLASS WEIGHT TYPE NAME STATUS REWEIGHT PRI-AFF
-1 0.17537 root default
-3 0.05846 host ceph-node-01
0 hdd 0.01949 osd.0 up 1.00000 1.00000
1 hdd 0.01949 osd.1 up 1.00000 1.00000
2 hdd 0.01949 osd.2 up 1.00000 1.00000
-5 0.05846 host ceph-node-02
3 hdd 0.01949 osd.3 up 1.00000 1.00000
4 hdd 0.01949 osd.4 up 1.00000 1.00000
5 hdd 0.01949 osd.5 up 1.00000 1.00000
-7 0.05846 host ceph-node-03
6 hdd 0.01949 osd.6 down 0 1.00000
7 hdd 0.01949 osd.7 up 1.00000 1.00000
8 hdd 0.01949 osd.8 up 1.00000 1.00000
查看日志
点击查看代码
root@ceph-node-03:~# tail -f /var/log/ceph/ceph-osd.6.log
解决办法
- 创建大文件读写时OSD会down掉有可能是主机配置较低,建议增大CPU和内存。
客户端问题
mount(2) system call failed: Structure needs cleaning.
解决方法
- 卸载RBD镜像
- 删除RBD镜像
- 新建RBD镜像
- 映射RBD镜像
- mount /dev/rbd0 /rbd