redis cluster + sentinel详细过程和错误处理三主三备三哨兵

redis cluster + sentinel详细过程和错误处理三主三备三哨兵
1、基本架构
192.168.70.215 7001 Master + sentinel 27001
192.168.70.216 7002 Master + sentinel 27002
192.168.70.217 7003 Master + sentinel 27003

192.168.71.213 7004 Slave
192.168.71.214 7005 Slave
192.168.71.215 7006 Slave

2、内核参数
vi /etc/sysctl.conf
vm.overcommit_memory = 1

3、软件包
yum -y install gcc automake autoconf libtool make telnet ruby-devel ruby-irb ruby-libs ruby-rdoc ruby rubygems-devel rubygems
gem install redis #忘了这一步会导致使用redis-trib.rb创建集群的时候报错

4、编译安装Redis
cd ~
wget http://download.redis.io/releases/redis-3.2.*.tar.gz
tar -zxf redis-3.2.*.tar.gz
cd redis-3.2.8
make
make install PREFIX=/usr/local/redis
mkdir /usr/local/redis/etc
mkdir /usr/local/redis/db;
cp redis.conf /usr/local/redis/etc/
cp ./src/redis-trib.rb /usr/local/redis/bin/

5、修改ruby脚本(client.rb),否则无法创建集群
修改DEFAULTS里面的password一项,我们这里用的是"password",一定要加引号。----但这次没改也OK了?
[root@localhost src]# find / -name client.rb
/usr/lib/ruby/gems/1.8/gems/redis-3.2.1/lib/redis/client.rb
/usr/lib/ruby/1.8/xmlrpc/client.rb
[root@localhost src]#
[root@localhost src]# vi /usr/lib/ruby/gems/1.8/gems/redis-3.2.1/lib/redis/client.rb
require "redis/errors"
require "socket"
require "cgi"
class Redis
class Client
DEFAULTS = {
:url => lambda { ENV["REDIS_URL"] },
:scheme => "redis",
:host => "127.0.0.1",
:port => 6379,
:path => nil,
:timeout => 5.0,
:connect_timeout => 5.0,
:password => nil,
:db => 0,
:driver => nil,
:id => nil,
:tcp_keepalive => 0,
:reconnect_attempts => 1,
:inherit_socket => false
}

def options
Marshal.load(Marshal.dump(@options))
end

def scheme
@options[:scheme]
end

def host
@options[:host]
end

def port
@options[:port]
end

def path
@options[:path]
end

def timeout
"/usr/lib/ruby/gems/1.8/gems/redis-3.2.1/lib/redis/client.rb" 574L, 14329C written

6、redis.conf配置文件修改,密码为password,每台都要修改下
vi /usr/local/redis/etc/redis.conf
#bind 127.0.0.1 #这里注释掉,否则会导致其它节点无法与本节点通信
protected-mode yes
protected-mode no(需要不同服务器的节点连通,这个就要设置为 no)
port 7001 #不同机器端口不一样,分别用7001-7006 #如果用一样的端口,创建集群会出问题
daemonize yes
pidfile /var/run/redis_7001.pid #不同机器PID文件不一样,分别用7001-7006 #可以用同一个,为了统一还是建议改成不同的
logfile "/var/log/redis7001.log"
dir /usr/local/redis/db/
--masterauth password(不改)
--requirepass password(改password测试时应用访问被拒绝?分析中)
appendonly yes
cluster-enabled yes
cluster-config-file nodes-7001.conf #不同机器nodes.conf文件不一样,分别用7001-7006 #原因同pidfile
cluster-node-timeout 15000

启动Redis测试,及时查看日志,每台都要启动
/usr/local/redis/bin/redis-server /usr/local/redis/etc/redis.conf
tail -f /var/log/redis7001.log

启动正常后备份配置文件,以便以后比较
cp -p redis.conf redis.conf.bak

7、集群启动
[root@localhost src]# /usr/local/redis/bin/redis-trib.rb create --replicas 1 192.168.70.215:7001 192.168.70.216:7002 192.168.70.217:7003 192.168.71.213:7004 192.168.71.214:7005 192.168.71.215:7006
>>> Creating cluster
[ERR] Sorry, can't connect to node 192.168.70.215:7001
上面的错误可能很眼熟,原因可能五花八门,稍有不慎就出现这个错误了,requirepass不设置密码(不知道会不会影响安全?后续研究),果断把appendonly.aof、dump.rdb、nodes*.conf干掉
[root@localhost src]# /usr/local/redis/bin/redis-trib.rb create --replicas 1 192.168.70.215:7001 192.168.70.216:7002 192.168.70.217:7003 192.168.71.213:7004 192.168.71.214:7005 192.168.71.215:7006
>>> Creating cluster
>>> Performing hash slots allocation on 6 nodes...
Using 3 masters:
192.168.70.217:7003
192.168.70.216:7002
192.168.70.215:7001
Adding replica 192.168.71.215:7006 to 192.168.70.217:7003
Adding replica 192.168.71.214:7005 to 192.168.70.216:7002
Adding replica 192.168.71.213:7004 to 192.168.70.215:7001
M: 1ef4062a283192c7c380b09b8ebb61df1889b70a 192.168.70.215:7001
slots:10923-16383 (5461 slots) master
M: a4cc1f873ae86c5ae00286be9895ea08e729922f 192.168.70.216:7002
slots:5461-10922 (5462 slots) master
M: e14d62d50e08c3298423317afd472c1c94eea0ff 192.168.70.217:7003
slots:0-5460 (5461 slots) master
S: 1e8b9ff1d3f1e53e8f732701609ed7ca63b53c91 192.168.71.213:7004
replicates 1ef4062a283192c7c380b09b8ebb61df1889b70a
S: 2e845f8bb7f60dec7b7a2e8162f76678202bd6de 192.168.71.214:7005
replicates a4cc1f873ae86c5ae00286be9895ea08e729922f
S: 4f67f5dc2c6693146dc92fd4307ea93a6762c7a2 192.168.71.215:7006
replicates e14d62d50e08c3298423317afd472c1c94eea0ff
Can I set the above configuration? (type 'yes' to accept): yes
>>> Nodes configuration updated
>>> Assign a different config epoch to each node
>>> Sending CLUSTER MEET messages to join the cluster
Waiting for the cluster to join....
>>> Performing Cluster Check (using node 192.168.70.215:7001)
M: 1ef4062a283192c7c380b09b8ebb61df1889b70a 192.168.70.215:7001
slots:10923-16383 (5461 slots) master
1 additional replica(s)
S: 2e845f8bb7f60dec7b7a2e8162f76678202bd6de 192.168.71.214:7005
slots: (0 slots) slave
replicates a4cc1f873ae86c5ae00286be9895ea08e729922f
M: e14d62d50e08c3298423317afd472c1c94eea0ff 192.168.70.217:7003
slots:0-5460 (5461 slots) master
1 additional replica(s)
M: a4cc1f873ae86c5ae00286be9895ea08e729922f 192.168.70.216:7002
slots:5461-10922 (5462 slots) master
1 additional replica(s)
S: 1e8b9ff1d3f1e53e8f732701609ed7ca63b53c91 192.168.71.213:7004
slots: (0 slots) slave
replicates 1ef4062a283192c7c380b09b8ebb61df1889b70a
S: 4f67f5dc2c6693146dc92fd4307ea93a6762c7a2 192.168.71.215:7006
slots: (0 slots) slave
replicates e14d62d50e08c3298423317afd472c1c94eea0ff
[OK] All nodes agree about slots configuration.
>>> Check for open slots...
>>> Check slots coverage...
[OK] All 16384 slots covered.
[root@localhost src]#

集群查看和数据插入
[root@localhost ~]# /usr/local/redis/bin/redis-cli -h 192.168.70.215 -c -p 7001 -a 123456
192.168.70.215:7001> cluster nodes
2e845f8bb7f60dec7b7a2e8162f76678202bd6de 192.168.71.214:7005 slave a4cc1f873ae86c5ae00286be9895ea08e729922f 0 1534474964068 5 connected
e14d62d50e08c3298423317afd472c1c94eea0ff 192.168.70.217:7003 master - 0 1534474966068 3 connected 0-5460
a4cc1f873ae86c5ae00286be9895ea08e729922f 192.168.70.216:7002 master - 0 1534474965068 2 connected 5461-10922
1ef4062a283192c7c380b09b8ebb61df1889b70a 192.168.70.215:7001 myself,master - 0 0 1 connected 10923-16383
1e8b9ff1d3f1e53e8f732701609ed7ca63b53c91 192.168.71.213:7004 slave 1ef4062a283192c7c380b09b8ebb61df1889b70a 0 1534474963068 4 connected
4f67f5dc2c6693146dc92fd4307ea93a6762c7a2 192.168.71.215:7006 slave e14d62d50e08c3298423317afd472c1c94eea0ff 0 1534474967068 6 connected
192.168.70.215:7001>
192.168.70.216:7002> set q qq
-> Redirected to slot [11958] located at 192.168.70.215:7001
OK
192.168.70.215:7001> get q
"qq"
192.168.70.215:7001> set w ww
-> Redirected to slot [3696] located at 192.168.70.217:7003
OK
192.168.70.217:7003> get w
"ww"
192.168.70.217:7003> set e ee
-> Redirected to slot [15363] located at 192.168.70.215:7001
OK
192.168.70.215:7001> get e
"ee"
192.168.70.215:7001> set r rr
-> Redirected to slot [7893] located at 192.168.70.216:7002
OK
192.168.70.216:7002> get r
"rr"
192.168.70.216:7002>

8、sentinel.conf参数配置
vi /usr/local/redis/etc/sentinel.conf
protected-mode no
port 27001 #不同Sentinel端口不一样,分别用27001-27003

sentinel monitor mymaster1 172.18.1.101 7001 2
sentinel monitor mymaster2 172.18.1.102 7002 2
sentinel monitor mymaster3 172.18.1.103 7003 2

sentinel down-after-milliseconds mymaster1 10000
sentinel down-after-milliseconds mymaster2 10000
sentinel down-after-milliseconds mymaster3 10000

sentinel parallel-syncs mymaster1 1
sentinel parallel-syncs mymaster2 1
sentinel parallel-syncs mymaster3 1

sentinel failover-timeout mymaster1 15000
sentinel failover-timeout mymaster2 15000
sentinel failover-timeout mymaster3 15000

daemonize yes
logfile "/var/log/sentinel.log"

9、哨兵启动
/home/redis/src/redis-sentinel /usr/local/redis/etc/sentinel.conf
tail -f /var/log/sentinel.log

10、常用命令和问题处理
--清理redis进程,快速重测
[root@localhost src]# ps -ef|grep redis
[root@localhost src]# kill $(ps -ef | grep redis | grep -v grep | awk '{print $2}')
--删除旧的文件,上次写进去的一些东西,配置改变后有可能影响正常启动
[root@localhost src]# find / -name appendonly.aof
/usr/local/redis/db/appendonly.aof
/home/redis/redisdb/appendonly.aof
[root@localhost src]# find / -name dump.rdb
/usr/local/redis/db/dump.rdb
/home/redis/redisdb/dump.rdb
[root@localhost src]# find / -name nodes*.conf
/usr/local/redis/db/nodes-7001.conf
[root@localhost src]# rm -Rf /usr/local/redis/db/dump.rdb /usr/local/redis/db/appendonly.aof /usr/local/redis/db/nodes-700*.conf
有可能用到
ulimit -n 10032
echo "vm.overcommit_memory=1" > /etc/sysctl.conf
echo 511 > /proc/sys/net/core/somaxconn

posted @ 2018-08-17 13:07  ritchy  阅读(706)  评论(0编辑  收藏  举报