mongodb从库无法启动一例(replication_recovery.cpp)
环境:
OS:centos 7
mongodb:4.4.22
背景:
1主1从1仲裁的环境,修改从库的集群ip后,执行如下操作后发现无法启动
myrepl:PRIMARY> rs.remove("192.168.1.104:29001")
myrepl:PRIMARY> conf=rs.conf()
myrepl:PRIMARY> conf.members[1].host="192.168.1.107:29001"
myrepl:PRIMARY> rs.reconfig(conf,{"force":true})
myrepl:PRIMARY>rs.add({ host: "192.168.1.107:29001", priority: 0, votes: 0 })
报错信息:
{"t":{"$date":"2023-07-16T20:44:39.569-04:00"},"s":"F", "c":"REPL", "id":40313, "ctx":"initandlisten","msg":"A
pplied op oplogApplicationStartPoint not found","attr":{"oplogApplicationStartPoint":{"":{"$timestamp":{"t":168932683
3,"i":1}}},"topOfOplog":{"":{"$timestamp":{"t":1689325501,"i":1}}}}}
{"t":{"$date":"2023-07-16T20:44:39.569-04:00"},"s":"F", "c":"-", "id":23091, "ctx":"initandlisten","msg":"F
atal assertion","attr":{"msgid":40313,"file":"src/mongo/db/repl/replication_recovery.cpp","line":632}}
{"t":{"$date":"2023-07-16T20:44:39.569-04:00"},"s":"F", "c":"-", "id":23092, "ctx":"initandlisten","msg":"\n\n***aborting after fassert() failure\n\n"}
原因是oplog被覆盖了,找不到相应的数据,需要重新初始化该节点:
处理步骤:
删除data目录然后重新启动
[root@localhost mongodb]# cd /home/middle/mongodb
[root@localhost mongodb]# mv data bak_data
[root@localhost mongodb]# mkdir data
[root@localhost conf]# /usr/local/services/mongodb/bin/mongod -f /home/middle/mongodb/conf/mongo.cnf
这个时候查看集群状态
myrepl:PRIMARY> rs.status()
{
"set" : "myrepl",
"date" : ISODate("2023-07-17T00:54:53.232Z"),
"myState" : 1,
"term" : NumberLong(4),
"syncSourceHost" : "",
"syncSourceId" : -1,
"heartbeatIntervalMillis" : NumberLong(2000),
"majorityVoteCount" : 2,
"writeMajorityCount" : 1,
"votingMembersCount" : 2,
"writableVotingMembersCount" : 1,
"optimes" : {
"lastCommittedOpTime" : {
"ts" : Timestamp(1689555291, 1),
"t" : NumberLong(4)
},
"lastCommittedWallTime" : ISODate("2023-07-17T00:54:51.641Z"),
"readConcernMajorityOpTime" : {
"ts" : Timestamp(1689555291, 1),
"t" : NumberLong(4)
},
"readConcernMajorityWallTime" : ISODate("2023-07-17T00:54:51.641Z"),
"appliedOpTime" : {
"ts" : Timestamp(1689555291, 1),
"t" : NumberLong(4)
},
"durableOpTime" : {
"ts" : Timestamp(1689555291, 1),
"t" : NumberLong(4)
},
"lastAppliedWallTime" : ISODate("2023-07-17T00:54:51.641Z"),
"lastDurableWallTime" : ISODate("2023-07-17T00:54:51.641Z")
},
"lastStableRecoveryTimestamp" : Timestamp(1689555251, 1),
"electionCandidateMetrics" : {
"lastElectionReason" : "electionTimeout",
"lastElectionDate" : ISODate("2023-07-17T00:43:31.531Z"),
"electionTerm" : NumberLong(4),
"lastCommittedOpTimeAtElection" : {
"ts" : Timestamp(0, 0),
"t" : NumberLong(-1)
},
"lastSeenOpTimeAtElection" : {
"ts" : Timestamp(1689328811, 1),
"t" : NumberLong(3)
},
"numVotesNeeded" : 2,
"priorityAtElection" : 1,
"electionTimeoutMillis" : NumberLong(10000),
"numCatchUpOps" : NumberLong(0),
"newTermStartDate" : ISODate("2023-07-17T00:43:31.548Z"),
"wMajorityWriteAvailabilityDate" : ISODate("2023-07-17T00:43:31.655Z")
},
"members" : [
{
"_id" : 0,
"name" : "192.168.1.102:29001",
"health" : 1,
"state" : 1,
"stateStr" : "PRIMARY",
"uptime" : 695,
"optime" : {
"ts" : Timestamp(1689555291, 1),
"t" : NumberLong(4)
},
"optimeDate" : ISODate("2023-07-17T00:54:51Z"),
"lastAppliedWallTime" : ISODate("2023-07-17T00:54:51.641Z"),
"lastDurableWallTime" : ISODate("2023-07-17T00:54:51.641Z"),
"syncSourceHost" : "",
"syncSourceId" : -1,
"infoMessage" : "",
"electionTime" : Timestamp(1689554611, 1),
"electionDate" : ISODate("2023-07-17T00:43:31Z"),
"configVersion" : 23819,
"configTerm" : -1,
"self" : true,
"lastHeartbeatMessage" : ""
},
{
"_id" : 3,
"name" : "192.168.1.105:29001",
"health" : 1,
"state" : 7,
"stateStr" : "ARBITER",
"uptime" : 688,
"lastHeartbeat" : ISODate("2023-07-17T00:54:52.042Z"),
"lastHeartbeatRecv" : ISODate("2023-07-17T00:54:53.123Z"),
"pingMs" : NumberLong(1),
"lastHeartbeatMessage" : "",
"syncSourceHost" : "",
"syncSourceId" : -1,
"infoMessage" : "",
"configVersion" : 23819,
"configTerm" : -1
},
{
"_id" : 4,
"name" : "192.168.1.107:29001",
"health" : 1,
"state" : 5,
"stateStr" : "STARTUP2",
"uptime" : 61,
"optime" : {
"ts" : Timestamp(0, 0),
"t" : NumberLong(-1)
},
"optimeDurable" : {
"ts" : Timestamp(0, 0),
"t" : NumberLong(-1)
},
"optimeDate" : ISODate("1970-01-01T00:00:00Z"),
"optimeDurableDate" : ISODate("1970-01-01T00:00:00Z"),
"lastAppliedWallTime" : ISODate("1970-01-01T00:00:00Z"),
"lastDurableWallTime" : ISODate("1970-01-01T00:00:00Z"),
"lastHeartbeat" : ISODate("2023-07-17T00:54:52.518Z"),
"lastHeartbeatRecv" : ISODate("2023-07-17T00:54:52.412Z"),
"pingMs" : NumberLong(5),
"lastHeartbeatMessage" : "",
"syncSourceHost" : "192.168.1.102:29001",
"syncSourceId" : 0,
"infoMessage" : "",
"configVersion" : 23819,
"configTerm" : -1
}
],
"ok" : 1
}
目前该节点处于STARTUP2状态
startup2:整个初始化同步过程都处于这个状态
同步数据的过程中,新节点的状态stateStr 为STARTUP2,待同步完成后会变为SECONDARY.