3.实现redis哨兵,模拟master故障场景
3.实现redis哨兵,模拟master故障场景
实验拓扑图
3.1 哨兵的准备实现主从复制架构
哨兵的前提是已经实现了一个redis的主从复制的运行环境,从而实现一个一主两从基于哨兵的高可用redis架构。
注意: master 的配置文件中的masterauth 和slave的都必须相同
所有主从节点的redis.conf中关健配置
3.2 准备主从环境配置
#在所有主从节点执行
#在所有主从节点执行
[root@centos8 ~]#dnf -y install redis
[root@centos8 ~]#vim /etc/redis.conf
bind 0.0.0.0
masterauth "123456"
requirepass "123456"
#或者非交互执行
[root@centos8 ~]#sed -i.bak -e 's/bind 127.0.0.1/bind 0.0.0.0/' -e 's/^# masterauth .*/masterauth 123456/' -e 's/^# requirepass .*/requirepass 123456/' /etc/redis.conf
#在所有从节点执行
[root@centos8 ~]#echo "replicaof 10.0.0.8 6379" >> /etc/redis.conf
#在所有主从节点执行
[root@centos8 ~]#systemctl enable --now redis
master服务器状态
[root@CentOS8 ~]# hostnamectl set-hostname redis-master;exit
[root@master ~]# redis-cli -a 123456
127.0.0.1:6379> info replication
# Replication
role:master
connected_slaves:2
slave0:ip=10.0.0.18,port=6379,state=online,offset=1218,lag=0
slave1:ip=10.0.0.28,port=6379,state=online,offset=1218,lag=1
master_replid:be9ca408e9c9f8ca1a74de85dc71bf65dc3dba06
master_replid2:0000000000000000000000000000000000000000
master_repl_offset:1218
second_repl_offset:-1
repl_backlog_active:1
repl_backlog_size:1048576
repl_backlog_first_byte_offset:1
repl_backlog_histlen:1218
配置slave1
[root@CentOS8 ~]# hostnamectl set-hostname redis-slave1;exit
[root@redis-slave1 ~]# redis-cli -a 123456
Warning: Using a password with '-a' or '-u' option on the command line interface may not be safe.
127.0.0.1:6379> REPLICAOF 10.0.0.8 6379
OK
127.0.0.1:6379> CONFIG SET masterauth "123456"
OK
127.0.0.1:6379> info replication
# Replication
role:slave
master_host:10.0.0.8
master_port:6379
master_link_status:up
master_last_io_seconds_ago:0
master_sync_in_progress:0
slave_repl_offset:966
slave_priority:100
slave_read_only:1
connected_slaves:0
master_replid:be9ca408e9c9f8ca1a74de85dc71bf65dc3dba06
master_replid2:0000000000000000000000000000000000000000
master_repl_offset:966
second_repl_offset:-1
repl_backlog_active:1
repl_backlog_size:1048576
repl_backlog_first_byte_offset:1
repl_backlog_histlen:966
配置slave2
[root@redis-slave2 ~]# redis-cli -a 123456
127.0.0.1:6379> REPLICAOF 10.0.0.8 6379
OK
127.0.0.1:6379> CONFIG SET masterauth "123456"
OK
127.0.0.1:6379> info replication
# Replication
role:slave
master_host:10.0.0.8
master_port:6379
master_link_status:up
master_last_io_seconds_ago:10
master_sync_in_progress:0
slave_repl_offset:1134
slave_priority:100
slave_read_only:1
connected_slaves:0
master_replid:be9ca408e9c9f8ca1a74de85dc71bf65dc3dba06
master_replid2:0000000000000000000000000000000000000000
master_repl_offset:1134
second_repl_offset:-1
repl_backlog_active:1
repl_backlog_size:1048576
repl_backlog_first_byte_offset:1051
repl_backlog_histlen:84
127.0.0.1:6379>
3.2.3.2 编辑哨兵的配置文件
sentinel配置
Sentinel实际上是一个特殊的redis服务器,有些redis指令支持,但很多指令并不支持.默认监听在
26379/tcp端口.
哨兵可以不和Redis服务器部署在一起,但一般部署在一起以节约成本
所有redis节点使用相同的以下示例的配置文件
#如果是编译安装,在源码目录有sentinel.conf,复制到安装目录即可,
如:/apps/redis/etc/sentinel.conf
[root@centos8 ~]#vim /etc/redis-sentinel.conf
bind 0.0.0.0
port 26379
daemonize yes
pidfile "redis-sentinel.pid"
logfile "sentinel_26379.log"
dir "/tmp" #工作目录
sentinel monitor mymaster 10.0.0.8 6379 2
#mymaster是集群的名称,此行指定当前mymaster集群中master服务器的地址和端口
#2为法定人数限制(quorum),即有几个sentinel认为master down了就进行故障转移,一般此值是所有
sentinel节点(一般总数是>=3的 奇数,如:3,5,7等)的一半以上的整数值,比如,总数是3,即3/2=1.5,
取整为2,是master的ODOWN客观下线的依据
sentinel auth-pass mymaster 123456
#mymaster集群中master的密码,注意此行要在上面行的下面
sentinel down-after-milliseconds mymaster 30000
#(SDOWN)判断mymaster集群中所有节点的主观下线的时间,单位:毫秒,建议3000
sentinel parallel-syncs mymaster 1
#发生故障转移后,可以同时向新master同步数据的slave的数量,数字越小总同步时间越长,但可以减轻新
master的负载压力
sentinel failover-timeout mymaster 180000
#所有slaves指向新的master所需的超时时间,单位:毫秒
sentinel deny-scripts-reconfig yes #禁止修改脚本
logfile /var/log/redis/sentinel.log
三个哨兵服务器的配置都如下
[root@master ~]#grep -vE "^#|^$" /etc/redis-sentinel.conf
port 26379
daemonize no
pidfile "/var/run/redis-sentinel.pid"
logfile "/var/log/redis/sentinel.log"
dir "/tmp"
sentinel monitor mymaster 10.0.0.8 6379 2 #修改此行
sentinel auth-pass mymaster 123456 #增加此行
sentinel down-after-milliseconds mymaster 3000 #修改此行
sentinel parallel-syncs mymaster 1
sentinel failover-timeout mymaster 180000
sentinel deny-scripts-reconfig yes
#以下内容自动生成,不需要修改
sentinel myid 50547f34ed71fd48c197924969937e738a39975b
#此行自动生成必须唯一,修改此值需重启redis和sentinel服务
.....
# Generated by CONFIG REWRITE
protected-mode no
supervised systemd
sentinel leader-epoch mymaster 0
sentinel known-replica mymaster 10.0.0.28 6379
sentinel known-replica mymaster 10.0.0.18 6379
sentinel current-epoch 0
[root@master ~]#scp /etc/redis-sentinel.conf 10.0.0.18:/etc/
[root@master ~]#scp /etc/redis-sentinel.conf 10.0.0.28:/etc/
[root@redis-master ~]# systemctl start redis-sentinel.service
[root@redis-slave1 ~]# systemctl start redis-sentinel.service
[root@redis-slave2 ~]# systemctl start redis-sentinel.service
3.2.3.3 启动哨兵
三台哨兵服务器都要启动
#确保每个哨兵主机myid不同
#确保每个哨兵主机myid不同
[root@redis-master ~]# vim /etc/redis-sentinel.conf
sentinel myid 8c13e9663e38ddfaa9660af24551e3644a4ead87
[root@redis-slave1 ~]# vim /etc/redis-sentinel.conf
sentinel myid abcc57337ea96ee21277b98e715baf130028aa1e
[root@redis-slave2 ~]# vim /etc/redis-sentinel.conf
sentinel myid e2968b16bc36824ee1918aab53181621a2427730
[root@redis-master ~]#systemctl enable --now redis-sentinel.service
[root@redis-slave1 ~]#systemctl enable --now redis-sentinel.service
[root@redis-slave2 ~]#systemctl enable --now redis-sentinel.service
3.2.3.4 验证哨兵端口
[root@redis-master ~]# ss -ntl
State Recv-Q Send-Q Local Address:Port Peer Address:Port Process
LISTEN 0 128 0.0.0.0:22 0.0.0.0:*
LISTEN 0 5 127.0.0.1:631 0.0.0.0:*
LISTEN 0 128 127.0.0.1:6010 0.0.0.0:*
LISTEN 0 128 127.0.0.1:6011 0.0.0.0:*
LISTEN 0 128 0.0.0.0:6379 0.0.0.0:*
LISTEN 0 128 0.0.0.0:26379 0.0.0.0:*
3.2.3.5 查看哨兵日志
master的哨兵日志
[root@redis-master ~]# tail -f /var/log/redis/sentinel.log
2531:X 24 Aug 2022 12:14:35.719 # WARNING: The TCP backlog setting of 511 cannot be enforced because /proc/sys/net/core/somaxconn is set to the lower value of 128.
2531:X 24 Aug 2022 12:14:35.725 # Sentinel ID is 8c13e9663e38ddfaa9660af24551e3644a4ead87
2531:X 24 Aug 2022 12:14:35.725 # +monitor master mymaster 10.0.0.8 6379 quorum 2
2531:X 24 Aug 2022 12:14:35.726 * +slave slave 10.0.0.18:6379 10.0.0.18 6379 @ mymaster 10.0.0.8 6379
2531:X 24 Aug 2022 12:14:35.726 * +slave slave 10.0.0.28:6379 10.0.0.28 6379 @ mymaster 10.0.0.8 6379
2531:X 24 Aug 2022 12:14:36.610 * +sentinel sentinel abcc57337ea96ee21277b98e715baf130028aa1e 10.0.0.18 26379 @ mymaster 10.0.0.8 6379
2531:X 24 Aug 2022 12:14:37.673 * +sentinel sentinel e2968b16bc36824ee1918aab53181621a2427730 10.0.0.28 26379 @ mymaster 10.0.0.8 6379
2531:X 24 Aug 2022 12:15:25.891 * +reboot master mymaster 10.0.0.8 6379
2531:X 24 Aug 2022 12:15:36.107 * +reboot slave 10.0.0.18:6379 10.0.0.18 6379 @ mymaster 10.0.0.8 6379
2531:X 24 Aug 2022 12:15:46.146 * +reboot slave 10.0.0.28:6379 10.0.0.28 6379 @ mymaster 10.0.0.8 6379
slave的哨兵日志
[root@redis-slave1 ~]# tail -f /var/log/redis/sentinel.log
2396:X 24 Aug 2022 12:11:50.616 # WARNING: The TCP backlog setting of 511 cannot be enforced because /proc/sys/net/core/somaxconn is set to the lower value of 128.
2396:X 24 Aug 2022 12:11:50.623 # Sentinel ID is abcc57337ea96ee21277b98e715baf130028aa1e
2396:X 24 Aug 2022 12:11:50.623 # +monitor master mymaster 10.0.0.8 6379 quorum 2
2396:X 24 Aug 2022 12:11:50.625 * +slave slave 10.0.0.18:6379 10.0.0.18 6379 @ mymaster 10.0.0.8 6379
2396:X 24 Aug 2022 12:11:50.626 * +slave slave 10.0.0.28:6379 10.0.0.28 6379 @ mymaster 10.0.0.8 6379
2396:X 24 Aug 2022 12:11:51.555 * +sentinel sentinel e2968b16bc36824ee1918aab53181621a2427730 10.0.0.28 26379 @ mymaster 10.0.0.8 6379
2396:X 24 Aug 2022 12:14:38.887 * +sentinel sentinel 8c13e9663e38ddfaa9660af24551e3644a4ead87 10.0.0.8 26379 @ mymaster 10.0.0.8 6379
2396:X 24 Aug 2022 12:15:21.499 * +reboot master mymaster 10.0.0.8 6379
2396:X 24 Aug 2022 12:15:31.534 * +reboot slave 10.0.0.18:6379 10.0.0.18 6379 @ mymaster 10.0.0.8 6379
2396:X 24 Aug 2022 12:15:51.646 * +reboot slave 10.0.0.28:6379 10.0.0.28 6379 @ mymaster 10.0.0.8 6379
[root@redis-slave2 ~]# tail -f /var/log/redis/sentinel.log
2408:X 24 Aug 2022 12:11:35.081 # WARNING: The TCP backlog setting of 511 cannot be enforced because /proc/sys/net/core/somaxconn is set to the lower value of 128.
2408:X 24 Aug 2022 12:11:35.087 # Sentinel ID is e2968b16bc36824ee1918aab53181621a2427730
2408:X 24 Aug 2022 12:11:35.087 # +monitor master mymaster 10.0.0.8 6379 quorum 2
2408:X 24 Aug 2022 12:11:35.088 * +slave slave 10.0.0.18:6379 10.0.0.18 6379 @ mymaster 10.0.0.8 6379
2408:X 24 Aug 2022 12:11:35.089 * +slave slave 10.0.0.28:6379 10.0.0.28 6379 @ mymaster 10.0.0.8 6379
2408:X 24 Aug 2022 12:11:52.322 * +sentinel sentinel abcc57337ea96ee21277b98e715baf130028aa1e 10.0.0.18 26379 @ mymaster 10.0.0.8 6379
2408:X 24 Aug 2022 12:14:38.585 * +sentinel sentinel 8c13e9663e38ddfaa9660af24551e3644a4ead87 10.0.0.8 26379 @ mymaster 10.0.0.8 6379
2408:X 24 Aug 2022 12:15:26.079 * +reboot master mymaster 10.0.0.8 6379
2408:X 24 Aug 2022 12:15:36.084 * +reboot slave 10.0.0.18:6379 10.0.0.18 6379 @ mymaster 10.0.0.8 6379
2408:X 24 Aug 2022 12:15:46.091 * +reboot slave 10.0.0.28:6379 10.0.0.28 6379 @ mymaster 10.0.0.8 6379
3.2.3.6 当前sentinel状态
在sentinel状态中尤其是最后一行,涉及到masterIP是多少,有几个slave,有几个sentinels,必须是符 合全部服务器数量.
[root@redis-master ~]# redis-cli -p 26379
127.0.0.1:26379> info sentinel
# Sentinel
sentinel_masters:1
sentinel_tilt:0
sentinel_running_scripts:0
sentinel_scripts_queue_length:0
sentinel_simulate_failure_flags:0
master0:name=mymaster,status=ok,address=10.0.0.8:6379,slaves=2,sentinels=3
3.2.3.7 停止Redis Master 节点测试故障转移
[root@redis-master ~]# killall redis-server
查看各节点上哨兵信息:
[root@redis-master ~]# redis-cli -a 123456 -p 26379
Warning: Using a password with '-a' or '-u' option on the command line interface may not be safe.
127.0.0.1:26379> info sentinel
# Sentinel
sentinel_masters:1
sentinel_tilt:0
sentinel_running_scripts:0
sentinel_scripts_queue_length:0
sentinel_simulate_failure_flags:0
master0:name=mymaster,status=ok,address=10.0.0.28:6379,slaves=2,sentinels=3
故障转移时sentinel的信息:
[root@redis-master ~]# tail -20 /var/log/redis/sentinel.log
2531:X 24 Aug 2022 12:27:29.928 # +failover-state-select-slave master mymaster 10.0.0.8 6379
2531:X 24 Aug 2022 12:27:29.981 # +selected-slave slave 10.0.0.28:6379 10.0.0.28 6379 @ mymaster 10.0.0.8 6379
2531:X 24 Aug 2022 12:27:29.981 * +failover-state-send-slaveof-noone slave 10.0.0.28:6379 10.0.0.28 6379 @ mymaster 10.0.0.8 6379
2531:X 24 Aug 2022 12:27:30.053 * +failover-state-wait-promotion slave 10.0.0.28:6379 10.0.0.28 6379 @ mymaster 10.0.0.8 6379
2531:X 24 Aug 2022 12:27:30.390 # +promoted-slave slave 10.0.0.28:6379 10.0.0.28 6379 @ mymaster 10.0.0.8 6379
2531:X 24 Aug 2022 12:27:30.390 # +failover-state-reconf-slaves master mymaster 10.0.0.8 6379
2531:X 24 Aug 2022 12:27:30.445 * +slave-reconf-sent slave 10.0.0.18:6379 10.0.0.18 6379 @ mymaster 10.0.0.8 6379
2531:X 24 Aug 2022 12:27:30.960 # -odown master mymaster 10.0.0.8 6379
2531:X 24 Aug 2022 12:27:31.414 * +slave-reconf-inprog slave 10.0.0.18:6379 10.0.0.18 6379 @ mymaster 10.0.0.8 6379
2531:X 24 Aug 2022 12:27:31.414 * +slave-reconf-done slave 10.0.0.18:6379 10.0.0.18 6379 @ mymaster 10.0.0.8 6379
2531:X 24 Aug 2022 12:27:31.500 # +failover-end master mymaster 10.0.0.8 6379
2531:X 24 Aug 2022 12:27:31.501 # +switch-master mymaster 10.0.0.8 6379 10.0.0.28 6379
2531:X 24 Aug 2022 12:27:31.504 * +slave slave 10.0.0.18:6379 10.0.0.18 6379 @ mymaster 10.0.0.28 6379
2531:X 24 Aug 2022 12:27:31.504 * +slave slave 10.0.0.8:6379 10.0.0.8 6379 @ mymaster 10.0.0.28 6379
2531:X 24 Aug 2022 12:27:34.573 # +sdown slave 10.0.0.8:6379 10.0.0.8 6379 @ mymaster 10.0.0.28 6379
3.2.3.8 故障转移后的redis配置文件会被自动修改
故障转移后redis.conf中的replicaof行的master IP会被修改
[root@redis-slave1 ~]#grep ^replicaof /etc/redis.conf
replicaof 10.0.0.28 6379
哨兵配置文件的sentinel monitor IP 同样也会被修改
[root@redis-slave1 ~]# grep "^[a-z]" /etc/redis-sentinel.conf
port 26379
daemonize no
pidfile "/var/run/redis-sentinel.pid"
logfile "/var/log/redis/sentinel.log"
dir "/tmp"
sentinel myid abcc57337ea96ee21277b98e715baf130028aa1e
sentinel deny-scripts-reconfig yes
sentinel monitor mymaster 10.0.0.28 6379 2
sentinel down-after-milliseconds mymaster 3000
sentinel auth-pass mymaster 123456
sentinel config-epoch mymaster 1
sentinel leader-epoch mymaster 1
protected-mode no
supervised systemd
sentinel known-replica mymaster 10.0.0.18 6379
sentinel known-replica mymaster 10.0.0.8 6379
sentinel known-sentinel mymaster 10.0.0.8 26379 8c13e9663e38ddfaa9660af24551e3644a4ead87
sentinel known-sentinel mymaster 10.0.0.28 26379 e2968b16bc36824ee1918aab53181621a2427730
sentinel current-epoch 1
[root@redis-slave2 ~]# grep "^[a-z]" /etc/redis-sentinel.conf
port 26379
daemonize no
pidfile "/var/run/redis-sentinel.pid"
logfile "/var/log/redis/sentinel.log"
dir "/tmp"
sentinel myid e2968b16bc36824ee1918aab53181621a2427730
sentinel deny-scripts-reconfig yes
sentinel monitor mymaster 10.0.0.28 6379 2
sentinel down-after-milliseconds mymaster 3000
sentinel auth-pass mymaster 123456
sentinel config-epoch mymaster 1
protected-mode no
supervised systemd
sentinel leader-epoch mymaster 1
sentinel known-replica mymaster 10.0.0.8 6379
sentinel known-replica mymaster 10.0.0.18 6379
sentinel known-sentinel mymaster 10.0.0.8 26379 8c13e9663e38ddfaa9660af24551e3644a4ead87
sentinel known-sentinel mymaster 10.0.0.18 26379 abcc57337ea96ee21277b98e715baf130028aa1e
sentinel current-epoch 1
3.2.3.9 当前 redis状态
新的master 状态
[root@redis-slave2 ~]# redis-cli -a 123456
Warning: Using a password with '-a' or '-u' option on the command line interface may not be safe.
127.0.0.1:6379> INFO replication
# Replication
role:master #提升为master
connected_slaves:1
slave0:ip=10.0.0.18,port=6379,state=online,offset=532461,lag=1
master_replid:fbf7ab274dcabbcebac985ffb23dd980d23728bc
master_replid2:89fbb7d25d9edc34150201a10b949f9e7a6435fb
master_repl_offset:532608
second_repl_offset:142442
repl_backlog_active:1
repl_backlog_size:1048576
repl_backlog_first_byte_offset:5450
repl_backlog_histlen:527159
另一个slave指向新的master
[root@redis-slave1 ~]# redis-cli -a 123456
Warning: Using a password with '-a' or '-u' option on the command line interface may not be safe.
127.0.0.1:6379> INFO replication
# Replication
role:slave
master_host:10.0.0.28
master_port:6379
master_link_status:up
master_last_io_seconds_ago:2
master_sync_in_progress:0
slave_repl_offset:545030
slave_priority:100
slave_read_only:1
connected_slaves:0
master_replid:fbf7ab274dcabbcebac985ffb23dd980d23728bc
master_replid2:89fbb7d25d9edc34150201a10b949f9e7a6435fb
master_repl_offset:545030
second_repl_offset:142442
repl_backlog_active:1
repl_backlog_size:1048576
repl_backlog_first_byte_offset:2408
repl_backlog_histlen:542623
3.2.3.10 恢复故障的原master重新加入redis集群
[root@redis-master ~]#cat /etc/redis.conf
#sentinel会自动修改下面行指向新的master
# Generated by CONFIG REWRITE
replicaof 10.0.0.28 6379
在原 master上观察状态
[root@redis-master ~]# redis-cli -a 123456
127.0.0.1:6379> info replication
# Replication
role:slave
master_host:10.0.0.28
master_port:6379
master_link_status:up
master_last_io_seconds_ago:1
master_sync_in_progress:0
slave_repl_offset:727724
slave_priority:100
slave_read_only:1
connected_slaves:0
master_replid:fbf7ab274dcabbcebac985ffb23dd980d23728bc
master_replid2:0000000000000000000000000000000000000000
master_repl_offset:727724
second_repl_offset:-1
repl_backlog_active:1
repl_backlog_size:1048576
repl_backlog_first_byte_offset:623475
repl_backlog_histlen:104250
观察新master上状态和日志
[root@redis-slave2 ~]# redis-cli -a 123456
Warning: Using a password with '-a' or '-u' option on the command line interface may not be safe.
127.0.0.1:6379> INFO replication
# Replication
role:master
connected_slaves:2
slave0:ip=10.0.0.18,port=6379,state=online,offset=738687,lag=0
slave1:ip=10.0.0.8,port=6379,state=online,offset=738554,lag=1
master_replid:fbf7ab274dcabbcebac985ffb23dd980d23728bc
master_replid2:89fbb7d25d9edc34150201a10b949f9e7a6435fb
master_repl_offset:738952
second_repl_offset:142442
repl_backlog_active:1
repl_backlog_size:1048576
repl_backlog_first_byte_offset:5450
repl_backlog_histlen:733503
[root@redis-slave2 ~]# tail -f /var/log/redis/sentinel.log
2711:X 24 Aug 2022 12:35:22.806 # WARNING: The TCP backlog setting of 511 cannot be enforced because /proc/sys/net/core/somaxconn is set to the lower value of 128.
2711:X 24 Aug 2022 12:35:22.806 # Sentinel ID is e2968b16bc36824ee1918aab53181621a2427730
2711:X 24 Aug 2022 12:35:22.806 # +monitor master mymaster 10.0.0.28 6379 quorum 2
2711:X 24 Aug 2022 12:35:25.861 # +sdown slave 10.0.0.8:6379 10.0.0.8 6379 @ mymaster 10.0.0.28 6379
2711:X 24 Aug 2022 13:03:07.891 # -sdown slave 10.0.0.8:6379 10.0.0.8 6379 @ mymaster 10.0.0.28 6379
2711:X 24 Aug 2022 13:08:21.371 # +sdown slave 10.0.0.8:6379 10.0.0.8 6379 @ mymaster 10.0.0.28 6379
2711:X 24 Aug 2022 13:08:25.195 # +sdown sentinel 8c13e9663e38ddfaa9660af24551e3644a4ead87 10.0.0.8 26379 @ mymaster 10.0.0.28 6379
2711:X 24 Aug 2022 13:08:58.813 * +reboot slave 10.0.0.8:6379 10.0.0.8 6379 @ mymaster 10.0.0.28 6379
2711:X 24 Aug 2022 13:08:58.887 # -sdown slave 10.0.0.8:6379 10.0.0.8 6379 @ mymaster 10.0.0.28 6379
2711:X 24 Aug 2022 13:09:04.539 # -sdown sentinel 8c13e9663e38ddfaa9660af24551e3644a4ead87 10.0.0.8 26379 @ mymaster 10.0.0.28 6379