K8s-K8s部署Redis高可用主从集群+哨兵模式+动态持久化数据
1.引
基于
K8s
,搭建部署3主3从
的Redis
集群哨兵模式,当Master
宕机时,会自动选举一个Slave
来充当Master
,原来宕机的Master
恢复后,会自动变为Slave
,即实现集群的高可用。持久化数据选择用
storageclass
,动态创建pv
存储,若没装动态存储需先安装:安装NFS动态存储~
2.文件结构
[root@k8s-master01 redis-cluster-3master]# tree
.
├── 1-ns.yaml
├── 2-configmap-redis.yaml
├── 3-configmap-probe.yaml
├── 4-svc-account-rolebinding.yaml
├── 5-svc.yaml
├── 6-statefulset.yaml
└── run.sh
3.配置清单
1)1-ns.yaml
apiVersion: v1
kind: Namespace
metadata:
name: redis-cluster
2)2-configmap-redis.yaml
apiVersion: v1
kind: ConfigMap
metadata:
name: redis-configmap
namespace: redis-cluster
labels:
app: redis
data:
redis.conf: |
dir "/data"
maxmemory 0
maxmemory-policy volatile-lru
min-slaves-max-lag 5
min-slaves-to-write 1
rdbchecksum yes
rdbcompression yes
repl-diskless-sync yes
save 900 1
sentinel.conf: |
dir "/data"
sentinel down-after-milliseconds mymaster 10000
sentinel failover-timeout mymaster 180000
sentinel parallel-syncs mymaster 5
init.sh: |
HOSTNAME="$(hostname)"
INDEX="${HOSTNAME##*-}"
MASTER="$(redis-cli -h redis -p 26379 sentinel get-master-addr-by-name mymaster | grep -E '[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}')"
MASTER_GROUP="mymaster"
QUORUM="2"
REDIS_CONF=/data/conf/redis.conf
REDIS_PORT=6379
SENTINEL_CONF=/data/conf/sentinel.conf
SENTINEL_PORT=26379
SERVICE=redis-headless
set -eu
sentinel_update() {
echo "Updating sentinel config"
eval MY_SENTINEL_ID="\${SENTINEL_ID_$INDEX}"
sed -i "1s/^/sentinel myid $MY_SENTINEL_ID\\n/" "$SENTINEL_CONF"
sed -i "2s/^/sentinel monitor $MASTER_GROUP $1 $REDIS_PORT $QUORUM \\n/" "$SENTINEL_CONF"
echo "sentinel announce-ip $ANNOUNCE_IP" >> $SENTINEL_CONF
echo "sentinel announce-port $SENTINEL_PORT" >> $SENTINEL_CONF
}
redis_update() {
echo "Updating redis config"
echo "slaveof $1 $REDIS_PORT" >> "$REDIS_CONF"
echo "slave-announce-ip $ANNOUNCE_IP" >> $REDIS_CONF
echo "slave-announce-port $REDIS_PORT" >> $REDIS_CONF
}
copy_config() {
cp /readonly-config/redis.conf "$REDIS_CONF"
cp /readonly-config/sentinel.conf "$SENTINEL_CONF"
}
setup_defaults() {
echo "Setting up defaults"
if [ "$INDEX" = "0" ]; then
echo "Setting this pod as the default master"
redis_update "$ANNOUNCE_IP"
sentinel_update "$ANNOUNCE_IP"
sed -i "s/^.*slaveof.*//" "$REDIS_CONF"
else
DEFAULT_MASTER="$(getent hosts "redis-0.$SERVICE" | awk '{ print $1 }')"
if [ -z "$DEFAULT_MASTER" ]; then
echo "Unable to resolve host"
exit 1
fi
echo "Setting default slave config.."
redis_update "$DEFAULT_MASTER"
sentinel_update "$DEFAULT_MASTER"
fi
}
find_master() {
echo "Attempting to find master"
if [ "$(redis-cli -h "$MASTER" ping)" != "PONG" ]; then
echo "Can't ping master, attempting to force failover"
if redis-cli -h "$SERVICE" -p "$SENTINEL_PORT" sentinel failover "$MASTER_GROUP" | grep -q 'NOGOODSLAVE' ; then
setup_defaults
return 0
fi
sleep 10
MASTER="$(redis-cli -h $SERVICE -p $SENTINEL_PORT sentinel get-master-addr-by-name $MASTER_GROUP | grep -E '[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}')"
if [ "$MASTER" ]; then
sentinel_update "$MASTER"
redis_update "$MASTER"
else
echo "Could not failover, exiting..."
exit 1
fi
else
echo "Found reachable master, updating config"
sentinel_update "$MASTER"
redis_update "$MASTER"
fi
}
mkdir -p /data/conf/
echo "Initializing config.."
copy_config
ANNOUNCE_IP=$(getent hosts "redis-$INDEX.$SERVICE" | awk '{ print $1 }')
if [ -z "$ANNOUNCE_IP" ]; then
"Could not resolve the announce ip for this pod"
exit 1
elif [ "$MASTER" ]; then
find_master
else
setup_defaults
fi
if [ "${AUTH:-}" ]; then
echo "Setting auth values"
ESCAPED_AUTH=$(echo "$AUTH" | sed -e 's/[\/&]/\\&/g');
sed -i "s/replace-default-auth/${ESCAPED_AUTH}/" "$REDIS_CONF" "$SENTINEL_CONF"
fi
echo "Ready..."
3)3-configmap-probe.yaml
apiVersion: v1
kind: ConfigMap
metadata:
name: redis-probes
namespace: redis-cluster
labels:
app: redis
data:
check-quorum.sh: |
#!/bin/sh
set -eu
MASTER_GROUP="mymaster"
SENTINEL_PORT=26379
REDIS_PORT=6379
NUM_SLAVES=$(redis-cli -p "$SENTINEL_PORT" sentinel master mymaster | awk '/num-slaves/{getline; print}')
MIN_SLAVES=1
if [ "$1" = "$SENTINEL_PORT" ]; then
if redis-cli -p "$SENTINEL_PORT" sentinel ckquorum "$MASTER_GROUP" | grep -q NOQUORUM ; then
echo "ERROR: NOQUORUM. Sentinel quorum check failed, not enough sentinels found"
exit 1
fi
elif [ "$1" = "$REDIS_PORT" ]; then
if [ "$MIN_SLAVES" -gt "$NUM_SLAVES" ]; then
echo "Could not find enough replicating slaves. Needed $MIN_SLAVES but found $NUM_SLAVES"
exit 1
fi
fi
sh /probes/readiness.sh "$1"
readiness.sh: |
#!/bin/sh
set -eu
CHECK_SERVER="$(redis-cli -p "$1" ping)"
if [ "$CHECK_SERVER" != "PONG" ]; then
echo "Server check failed with: $CHECK_SERVER"
exit 1
fi
4)4-svc-account-rolebinding.yaml
apiVersion: v1
kind: ServiceAccount
metadata:
name: redis
namespace: redis-cluster
labels:
app: redis
---
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
name: redis
namespace: redis-cluster
labels:
app: redis
rules:
- apiGroups:
- ""
resources:
- endpoints
verbs:
- get
---
kind: RoleBinding
apiVersion: rbac.authorization.k8s.io/v1
metadata:
name: redis
namespace: redis-cluster
labels:
app: redis
subjects:
- kind: ServiceAccount
name: redis
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: Role
name: redis
5)5-svc.yaml
apiVersion: v1
kind: Service
metadata:
name: redis-headless
namespace: redis-cluster
labels:
app: redis-ha
annotations:
service.alpha.kubernetes.io/tolerate-unready-endpoints: "true"
spec:
publishNotReadyAddresses: true
type: ClusterIP
clusterIP: None
ports:
- name: server
port: 6379
protocol: TCP
targetPort: redis
- name: sentinel
port: 26379
protocol: TCP
targetPort: sentinel
selector:
app: redis-ha
---
apiVersion: v1
kind: Service
metadata:
name: redis
namespace: redis-cluster
labels:
app: redis-ha
annotations:
spec:
type: ClusterIP
ports:
- name: server
port: 6379
protocol: TCP
targetPort: redis
- name: sentinel
port: 26379
protocol: TCP
targetPort: sentinel
selector:
app: redis-ha
6)6-statefulset.yaml
apiVersion: apps/v1
kind: StatefulSet
metadata:
name: redis
namespace: redis-cluster
labels:
app: redis-ha
spec:
selector:
matchLabels:
app: redis-ha
serviceName: redis-headless
replicas: 3
podManagementPolicy: OrderedReady
updateStrategy:
type: RollingUpdate
template:
metadata:
labels:
app: redis-ha
spec:
affinity:
podAntiAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
- labelSelector:
matchLabels:
app: redis-ha
topologyKey: kubernetes.io/hostname
preferredDuringSchedulingIgnoredDuringExecution:
- weight: 100
podAffinityTerm:
labelSelector:
matchLabels:
app: redis-ha
topologyKey: failure-domain.beta.kubernetes.io/zone
securityContext:
fsGroup: 1000
runAsNonRoot: true
runAsUser: 1000
serviceAccountName: redis
initContainers:
- name: config-init
image: redis
#image: redis:5.0.3-alpine # 此镜像也可用
imagePullPolicy: IfNotPresent
resources:
{}
command:
- sh
args:
- /readonly-config/init.sh
env:
- name: SENTINEL_ID_0
value: 0c09a3866dba0f3b43ef2e383b5dc05980900fd8
- name: SENTINEL_ID_1
value: e6be0f70406122877338f7c814b17a7c7b648d82
- name: SENTINEL_ID_2
value: 31f8f52b34feaddcabdd6bf1827aeb02be44d2e3
volumeMounts:
- name: config
mountPath: /readonly-config
readOnly: true
- name: data
mountPath: /data
containers:
- name: redis
image: redis:5.0.3-alpine
imagePullPolicy: IfNotPresent
command:
- redis-server
args:
- /data/conf/redis.conf
livenessProbe:
exec:
command: [ "sh", "/probes/readiness.sh", "6379"]
initialDelaySeconds: 15
periodSeconds: 5
readinessProbe:
exec:
command: ["sh", "/probes/readiness.sh", "6379"]
initialDelaySeconds: 15
periodSeconds: 5
resources:
{}
ports:
- name: redis
containerPort: 6379
volumeMounts:
- mountPath: /data
name: data
- mountPath: /probes
name: probes
- name: sentinel
image: redis:5.0.3-alpine
imagePullPolicy: IfNotPresent
command:
- redis-sentinel
args:
- /data/conf/sentinel.conf
livenessProbe:
exec:
command: [ "sh", "/probes/readiness.sh", "26379"]
initialDelaySeconds: 15
periodSeconds: 5
readinessProbe:
exec:
command: ["sh", "/probes/readiness.sh", "26379"]
initialDelaySeconds: 15
periodSeconds: 5
resources:
{}
ports:
- name: sentinel
containerPort: 26379
volumeMounts:
- mountPath: /data
name: data
- mountPath: /probes
name: probes
volumes:
- name: config
configMap:
name: redis-configmap
- name: probes
configMap:
name: redis-probes
volumeClaimTemplates:
- metadata:
name: data
annotations:
spec:
accessModes:
- "ReadWriteMany"
resources:
requests:
storage: "1Gi"
storageClassName: nfs-client
7)run.sh
#!/bin/bash
kubectl apply -f ./
3.开始部署
由于我是用的是
storageclass
动态存储,所以直接动态创建pvc
[root@k8s-master01 redis-cluster-3master]# sh run.sh
namespace/redis-cluster created
configmap/redis-configmap created
configmap/redis-probes created
serviceaccount/redis created
role.rbac.authorization.k8s.io/redis created
rolebinding.rbac.authorization.k8s.io/redis created
service/redis-headless created
service/redis created
statefulset.apps/redis created
4.检测部署
[root@k8s-master01 redis-cluster-3master]# kubectl get all -n redis-cluster
# Pod
NAME READY STATUS RESTARTS AGE
pod/redis-0 2/2 Running 0 33m
pod/redis-1 2/2 Running 0 33m
pod/redis-2 2/2 Running 0 32m
# Service
NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE
service/redis ClusterIP 192.168.61.113 <none> 6379/TCP,26379/TCP 71m
service/redis-headless ClusterIP None <none> 6379/TCP,26379/TCP 71m
# Statefulset.apps
NAME READY AGE
statefulset.apps/redis 3/3 33m
# PVC
[root@k8s-master01 redis-cluster-3master]# kubectl get pvc -n redis-cluster
NAME STATUS VOLUME CAPACITY ACCESS MODES STORAGECLASS AGE
data-redis-0 Bound pvc-7c2c4069-14a0-43bc-ac3c-25fa6f510587 1Gi RWX nfs-client 45m
data-redis-1 Bound pvc-5a2f71cb-7541-4ec7-b5b7-5cd25db9f12e 1Gi RWX nfs-client 45m
data-redis-2 Bound pvc-75ef88e7-933c-4171-a504-f35ade5dbda3 1Gi RWX nfs-client 44m
# PV
[root@k8s-master01 redis-cluster-3master]# kubectl get pv | grep redis-cluster
pvc-5a2f71cb-7541-4ec7-b5b7-5cd25db9f12e 1Gi RWX Retain Bound redis-cluster/data-redis-1 nfs-client 45m
pvc-75ef88e7-933c-4171-a504-f35ade5dbda3 1Gi RWX Retain Bound redis-cluster/data-redis-2 nfs-client 45m
pvc-7c2c4069-14a0-43bc-ac3c-25fa6f510587 1Gi RWX Retain Bound redis-cluster/data-redis-0 nfs-client 45m
# 持久化目录结构
[root@k8s-master01 redis-cluster-3master]# tree -L 2 /data/nfs/redis-cluster-data-redis-*
/data/nfs/redis-cluster-data-redis-0-pvc-7c2c4069-14a0-43bc-ac3c-25fa6f510587
├── conf
│ ├── redis.conf
│ └── sentinel.conf
└── dump.rdb
/data/nfs/redis-cluster-data-redis-1-pvc-5a2f71cb-7541-4ec7-b5b7-5cd25db9f12e
├── conf
│ ├── redis.conf
│ └── sentinel.conf
└── dump.rdb
/data/nfs/redis-cluster-data-redis-2-pvc-75ef88e7-933c-4171-a504-f35ade5dbda3
├── conf
│ ├── redis.conf
│ └── sentinel.conf
└── dump.rdb
3 directories, 9 files
5.测试高可用
Master
:可读可写
Slave
:只读
1)检测节点集群状态
[root@k8s-master01 redis-cluster-3master]# kubectl exec -it -n redis-cluster redis-0 -- sh -c redis-cli
127.0.0.1:6379> info replication
# Replication
role:master # 当前实例的角色master还是slave
connected_slaves:2 # slave的数量
min_slaves_good_slaves:2
slave0:ip=172.27.14.237,port=6379,state=online,offset=13743,lag=1 # slave机器的信息、状态
slave1:ip=172.17.125.29,port=6379,state=online,offset=14027,lag=0 # slave机器的信息、状态
master_replid:85c61cd91010f1ad7121bcec7aca749a532de278 # 主实例启动随机字符串
master_replid2:0000000000000000000000000000000000000000 # 主实例启动随机字符串
master_repl_offset:14027 # 主从同步偏移量,此值如果和上面的offset相同说明主从一致没延迟,与master_replid可被用来标识主实例复制流中的位置
second_repl_offset:-1 # 主从同步偏移量2,此值如果和上面的offset相同说明主从一致没延迟
repl_backlog_active:1 # 复制缓冲区是否开启
repl_backlog_size:1048576 # 复制积压缓冲区
repl_backlog_first_byte_offset:1 # 复制缓冲区里偏移量的大小
repl_backlog_histlen:14027 # 此值等于 master_repl_offset - repl_backlog_first_byte_offset,该值不会超过repl_backlog_size的大小
2)测试Master
插入数据
# Master节点插入数据
[root@k8s-master01 redis-cluster-3master]# kubectl exec -it -n redis-cluster redis-0 -- sh -c redis-cli
127.0.0.1:6379> set a ZhangSan
OK
127.0.0.1:6379> get a
"ZhangSan"
127.0.0.1:6379> exit
3)测试Slave
读取数据
# Slave1节点读取数据
[root@k8s-master01 redis-cluster-3master]# kubectl exec -it -n redis-cluster redis-1 -- sh -c redis-cli
127.0.0.1:6379> get a
"ZhangSan"
127.0.0.1:6379> set a Lisi
# Slave1节点读取数据
[root@k8s-master01 redis-cluster-3master]# kubectl exec -it -n redis-cluster redis-2 -- sh -c redis-cli
127.0.0.1:6379> get a
"ZhangSan"
4)测试Slave
插入数据
[root@k8s-master01 redis-cluster-3master]# kubectl exec -it -n redis-cluster redis-1 -- sh -c redis-cli
127.0.0.1:6379> set a Lisi
(error) READONLY You can't write against a read only replica. # Slave节点插入数据失败,权限为只读
5)测试Master
宕机
1> 删除Master_Pod
[root@k8s-master01 redis-cluster-3master]# kubectl delete pod -n redis-cluster redis-0
pod "redis-0" deleted
2> 查看Slave
是否被选举为Master
Slave2
节点已被选举为Master
[root@k8s-master01 redis-cluster-3master]# kubectl exec -it -n redis-cluster redis-1 -- sh -c redis-cli
127.0.0.1:6379> info replication
# Replication
role:slave # Slave1还是Slave
master_host:172.17.125.29
master_port:6379
master_link_status:up
[root@k8s-master01 redis-cluster-3master]# kubectl exec -it -n redis-cluster redis-2 -- sh -c redis-cli
127.0.0.1:6379> info replication
# Replication
role:master # Slave已被选举为Master
connected_slaves:1 # 当前可用节点为1,待宕机的主节点重启后再查看即为2
min_slaves_good_slaves:1
slave0:ip=172.27.14.237,port=6379,state=online,offset=429859,lag=1
master_replid:1401322b3c76bb6b8af1484037a19fbbe5405142
master_replid2:85c61cd91010f1ad7121bcec7aca749a532de278
master_repl_offset:429859
second_repl_offset:348304
repl_backlog_active:1
repl_backlog_size:1048576
repl_backlog_first_byte_offset:5979
repl_backlog_histlen:423881
6)原Master
变为Slave
Pod
被删除后会自动启动,且启动成功后会自动变为Slave
节点原
Master
节点宕机过程中若有数据插入,待重启成功后会变为Slave
,且自动同步期间的数据,不会遗漏
# Master_Pod(redis-0)已自动启动
[root@k8s-master01 redis-cluster-3master]# kubectl get pod -n redis-cluster
NAME READY STATUS RESTARTS AGE
redis-0 2/2 Running 0 2m40s
redis-1 2/2 Running 0 37m
redis-2 2/2 Running 0 36m
# 查看是否变为Slave节点
[root@k8s-master01 redis-cluster-3master]# kubectl exec -it -n redis-cluster redis-0 -- sh -c redis-cli
Defaulted container "redis" out of: redis, sentinel, config-init (init)
127.0.0.1:6379> get a
"ZhangSan"
127.0.0.1:6379> set a Lisi
(error) READONLY You can't write against a read only replica.
127.0.0.1:6379> info replication
# Replication
role:slave # 已变为Slave节点
master_host:172.17.125.29
master_port:6379
master_link_status:up
7)获取集群节点
待
Master
变为Slave
后,进入新的Master
节点获取集群节点数结果为:保持原来的
1主2从
[root@k8s-master01 redis-cluster-3master]# kubectl exec -it -n redis-cluster redis-2 -- sh -c redis-cli
Defaulted container "redis" out of: redis, sentinel, config-init (init)
127.0.0.1:6379> info replication
# Replication
role:master
connected_slaves:2
min_slaves_good_slaves:2
slave0:ip=172.27.14.237,port=6379,state=online,offset=551082,lag=1
slave1:ip=172.25.244.248,port=6379,state=online,offset=551364,lag=1
master_replid:1401322b3c76bb6b8af1484037a19fbbe5405142
master_replid2:85c61cd91010f1ad7121bcec7aca749a532de278
master_repl_offset:551364
second_repl_offset:348304
repl_backlog_active:1
repl_backlog_size:1048576
repl_backlog_first_byte_offset:5979
repl_backlog_histlen:545386