K8s-K8s部署Redis高可用主从集群+哨兵模式+动态持久化数据

1.引

基于K8s,搭建部署3主3从Redis集群哨兵模式,当Master宕机时,会自动选举一个Slave来充当Master,原来宕机的Master恢复后,会自动变为Slave,即实现集群的高可用。

持久化数据选择用storageclass,动态创建pv存储,若没装动态存储需先安装:安装NFS动态存储~

2.文件结构

[root@k8s-master01 redis-cluster-3master]# tree
.
├── 1-ns.yaml
├── 2-configmap-redis.yaml
├── 3-configmap-probe.yaml
├── 4-svc-account-rolebinding.yaml
├── 5-svc.yaml
├── 6-statefulset.yaml
└── run.sh

3.配置清单

1)1-ns.yaml

apiVersion: v1
kind: Namespace
metadata:
  name: redis-cluster

2)2-configmap-redis.yaml

apiVersion: v1
kind: ConfigMap
metadata:
  name: redis-configmap
  namespace: redis-cluster
  labels:
    app: redis
data:
  redis.conf: |
    dir "/data"
    maxmemory 0
    maxmemory-policy volatile-lru
    min-slaves-max-lag 5
    min-slaves-to-write 1
    rdbchecksum yes
    rdbcompression yes
    repl-diskless-sync yes
    save 900 1

  sentinel.conf: |
    dir "/data"
    sentinel down-after-milliseconds mymaster 10000
    sentinel failover-timeout mymaster 180000
    sentinel parallel-syncs mymaster 5

  init.sh: |
    HOSTNAME="$(hostname)"
    INDEX="${HOSTNAME##*-}"
    MASTER="$(redis-cli -h redis -p 26379 sentinel get-master-addr-by-name mymaster | grep -E '[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}')"
    MASTER_GROUP="mymaster"
    QUORUM="2"
    REDIS_CONF=/data/conf/redis.conf
    REDIS_PORT=6379
    SENTINEL_CONF=/data/conf/sentinel.conf
    SENTINEL_PORT=26379
    SERVICE=redis-headless
    set -eu

    sentinel_update() {
        echo "Updating sentinel config"
        eval MY_SENTINEL_ID="\${SENTINEL_ID_$INDEX}"
        sed -i "1s/^/sentinel myid $MY_SENTINEL_ID\\n/" "$SENTINEL_CONF"
        sed -i "2s/^/sentinel monitor $MASTER_GROUP $1 $REDIS_PORT $QUORUM \\n/" "$SENTINEL_CONF"
        echo "sentinel announce-ip $ANNOUNCE_IP" >> $SENTINEL_CONF
        echo "sentinel announce-port $SENTINEL_PORT" >> $SENTINEL_CONF
    }

    redis_update() {
        echo "Updating redis config"
        echo "slaveof $1 $REDIS_PORT" >> "$REDIS_CONF"
        echo "slave-announce-ip $ANNOUNCE_IP" >> $REDIS_CONF
        echo "slave-announce-port $REDIS_PORT" >> $REDIS_CONF
    }

    copy_config() {
        cp /readonly-config/redis.conf "$REDIS_CONF"
        cp /readonly-config/sentinel.conf "$SENTINEL_CONF"
    }

    setup_defaults() {
        echo "Setting up defaults"
        if [ "$INDEX" = "0" ]; then
            echo "Setting this pod as the default master"
            redis_update "$ANNOUNCE_IP"
            sentinel_update "$ANNOUNCE_IP"
            sed -i "s/^.*slaveof.*//" "$REDIS_CONF"
        else
            DEFAULT_MASTER="$(getent hosts "redis-0.$SERVICE" | awk '{ print $1 }')"
            if [ -z "$DEFAULT_MASTER" ]; then
                echo "Unable to resolve host"
                exit 1
            fi
            echo "Setting default slave config.."
            redis_update "$DEFAULT_MASTER"
            sentinel_update "$DEFAULT_MASTER"
        fi
    }

    find_master() {
        echo "Attempting to find master"
        if [ "$(redis-cli -h "$MASTER" ping)" != "PONG" ]; then
           echo "Can't ping master, attempting to force failover"
           if redis-cli -h "$SERVICE" -p "$SENTINEL_PORT" sentinel failover "$MASTER_GROUP" | grep -q 'NOGOODSLAVE' ; then 
               setup_defaults
               return 0
           fi
           sleep 10
           MASTER="$(redis-cli -h $SERVICE -p $SENTINEL_PORT sentinel get-master-addr-by-name $MASTER_GROUP | grep -E '[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}')"
           if [ "$MASTER" ]; then
               sentinel_update "$MASTER"
               redis_update "$MASTER"
           else
              echo "Could not failover, exiting..."
              exit 1
           fi
        else
            echo "Found reachable master, updating config"
            sentinel_update "$MASTER"
            redis_update "$MASTER"
        fi
    }

    mkdir -p /data/conf/

    echo "Initializing config.."
    copy_config

    ANNOUNCE_IP=$(getent hosts "redis-$INDEX.$SERVICE" | awk '{ print $1 }')
    if [ -z "$ANNOUNCE_IP" ]; then
        "Could not resolve the announce ip for this pod"
        exit 1
    elif [ "$MASTER" ]; then
        find_master
    else
        setup_defaults
    fi

    if [ "${AUTH:-}" ]; then
        echo "Setting auth values"
        ESCAPED_AUTH=$(echo "$AUTH" | sed -e 's/[\/&]/\\&/g');
        sed -i "s/replace-default-auth/${ESCAPED_AUTH}/" "$REDIS_CONF" "$SENTINEL_CONF"
    fi

    echo "Ready..."

3)3-configmap-probe.yaml

apiVersion: v1
kind: ConfigMap
metadata:
  name: redis-probes
  namespace: redis-cluster
  labels:
    app: redis
data:
  check-quorum.sh: |
    #!/bin/sh
    set -eu
    MASTER_GROUP="mymaster"
    SENTINEL_PORT=26379
    REDIS_PORT=6379
    NUM_SLAVES=$(redis-cli -p "$SENTINEL_PORT" sentinel master mymaster | awk '/num-slaves/{getline; print}')
    MIN_SLAVES=1

    if [ "$1" = "$SENTINEL_PORT" ]; then
        if redis-cli -p "$SENTINEL_PORT" sentinel ckquorum "$MASTER_GROUP" | grep -q NOQUORUM ; then
            echo "ERROR: NOQUORUM. Sentinel quorum check failed, not enough sentinels found"
            exit 1
        fi
    elif [ "$1" = "$REDIS_PORT" ]; then
        if [ "$MIN_SLAVES" -gt "$NUM_SLAVES" ]; then
            echo "Could not find enough replicating slaves. Needed $MIN_SLAVES but found $NUM_SLAVES"
            exit 1
        fi
    fi
    sh /probes/readiness.sh "$1"

  readiness.sh: |
    #!/bin/sh
    set -eu
    CHECK_SERVER="$(redis-cli -p "$1" ping)"

    if [ "$CHECK_SERVER" != "PONG" ]; then
        echo "Server check failed with: $CHECK_SERVER"
        exit 1
    fi

4)4-svc-account-rolebinding.yaml

apiVersion: v1
kind: ServiceAccount
metadata:
  name: redis
  namespace: redis-cluster
  labels:
    app: redis

---
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
  name: redis
  namespace: redis-cluster
  labels:
    app: redis
rules:
- apiGroups:
    - ""
  resources:
    - endpoints
  verbs:
    - get

---
kind: RoleBinding
apiVersion: rbac.authorization.k8s.io/v1
metadata:
  name: redis
  namespace: redis-cluster
  labels:
    app: redis
subjects:
- kind: ServiceAccount
  name: redis
roleRef:
  apiGroup: rbac.authorization.k8s.io
  kind: Role
  name: redis

5)5-svc.yaml

apiVersion: v1
kind: Service
metadata:
  name: redis-headless
  namespace: redis-cluster
  labels:
    app: redis-ha
  annotations:
    service.alpha.kubernetes.io/tolerate-unready-endpoints: "true"
spec:
  publishNotReadyAddresses: true
  type: ClusterIP
  clusterIP: None
  ports:
  - name: server
    port: 6379
    protocol: TCP
    targetPort: redis
  - name: sentinel
    port: 26379
    protocol: TCP
    targetPort: sentinel
  selector:
    app: redis-ha

---
apiVersion: v1
kind: Service
metadata:
  name: redis
  namespace: redis-cluster
  labels:
    app: redis-ha
  annotations:
spec:
  type: ClusterIP
  ports:
  - name: server
    port: 6379
    protocol: TCP
    targetPort: redis
  - name: sentinel
    port: 26379
    protocol: TCP
    targetPort: sentinel
  selector:
    app: redis-ha

6)6-statefulset.yaml

apiVersion: apps/v1
kind: StatefulSet
metadata:
  name: redis
  namespace: redis-cluster
  labels:
    app: redis-ha
spec:
  selector:
    matchLabels:
      app: redis-ha
  serviceName: redis-headless
  replicas: 3
  podManagementPolicy: OrderedReady
  updateStrategy:
    type: RollingUpdate
  template:
    metadata:
      labels:
        app: redis-ha
    spec:
      affinity:
        podAntiAffinity:
          requiredDuringSchedulingIgnoredDuringExecution:
            - labelSelector:
                matchLabels:
                  app: redis-ha
              topologyKey: kubernetes.io/hostname
          preferredDuringSchedulingIgnoredDuringExecution:
            - weight: 100
              podAffinityTerm:
                labelSelector:
                  matchLabels:
                    app:  redis-ha
                topologyKey: failure-domain.beta.kubernetes.io/zone

      securityContext:
        fsGroup: 1000
        runAsNonRoot: true
        runAsUser: 1000

      serviceAccountName: redis
      initContainers:
      - name: config-init
        image: redis
        #image: redis:5.0.3-alpine  # 此镜像也可用
        imagePullPolicy: IfNotPresent
        resources:
          {}

        command:
        - sh
        args:
        - /readonly-config/init.sh
        env:
        - name: SENTINEL_ID_0
          value: 0c09a3866dba0f3b43ef2e383b5dc05980900fd8

        - name: SENTINEL_ID_1
          value: e6be0f70406122877338f7c814b17a7c7b648d82

        - name: SENTINEL_ID_2
          value: 31f8f52b34feaddcabdd6bf1827aeb02be44d2e3

        volumeMounts:
        - name: config
          mountPath: /readonly-config
          readOnly: true
        - name: data
          mountPath: /data
      containers:
      - name: redis
        image: redis:5.0.3-alpine
        imagePullPolicy: IfNotPresent
        command:
        - redis-server
        args:
        - /data/conf/redis.conf
        livenessProbe:
          exec:
            command: [ "sh", "/probes/readiness.sh", "6379"]
          initialDelaySeconds: 15
          periodSeconds: 5
        readinessProbe:
          exec:
            command: ["sh", "/probes/readiness.sh", "6379"]
          initialDelaySeconds: 15
          periodSeconds: 5
        resources:
          {}

        ports:
        - name: redis
          containerPort: 6379
        volumeMounts:
        - mountPath: /data
          name: data
        - mountPath: /probes
          name: probes
      - name: sentinel
        image: redis:5.0.3-alpine
        imagePullPolicy: IfNotPresent
        command:
          - redis-sentinel
        args:
          - /data/conf/sentinel.conf
        livenessProbe:
          exec:
            command: [ "sh", "/probes/readiness.sh", "26379"]
          initialDelaySeconds: 15
          periodSeconds: 5
        readinessProbe:
          exec:
            command: ["sh", "/probes/readiness.sh", "26379"]
          initialDelaySeconds: 15
          periodSeconds: 5
        resources:
          {}

        ports:
          - name: sentinel
            containerPort: 26379
        volumeMounts:
        - mountPath: /data
          name: data
        - mountPath: /probes
          name: probes
      volumes:
      - name: config
        configMap:
          name: redis-configmap
      - name: probes
        configMap:
          name: redis-probes
  volumeClaimTemplates:
  - metadata:
      name: data
      annotations:
    spec:
      accessModes:
        - "ReadWriteMany"
      resources:
        requests:
          storage: "1Gi"
      storageClassName: nfs-client

7)run.sh

#!/bin/bash
kubectl apply -f ./

3.开始部署

由于我是用的是storageclass动态存储,所以直接动态创建pvc

[root@k8s-master01 redis-cluster-3master]# sh run.sh 
namespace/redis-cluster created
configmap/redis-configmap created
configmap/redis-probes created
serviceaccount/redis created
role.rbac.authorization.k8s.io/redis created
rolebinding.rbac.authorization.k8s.io/redis created
service/redis-headless created
service/redis created
statefulset.apps/redis created

4.检测部署

[root@k8s-master01 redis-cluster-3master]# kubectl get all -n redis-cluster 
# Pod
NAME          READY   STATUS    RESTARTS   AGE
pod/redis-0   2/2     Running   0          33m
pod/redis-1   2/2     Running   0          33m
pod/redis-2   2/2     Running   0          32m

# Service
NAME                     TYPE        CLUSTER-IP       EXTERNAL-IP   PORT(S)              AGE
service/redis            ClusterIP   192.168.61.113   <none>        6379/TCP,26379/TCP   71m
service/redis-headless   ClusterIP   None             <none>        6379/TCP,26379/TCP   71m

# Statefulset.apps
NAME                     READY   AGE
statefulset.apps/redis   3/3     33m

# PVC
[root@k8s-master01 redis-cluster-3master]# kubectl get pvc -n redis-cluster 
NAME           STATUS   VOLUME                                     CAPACITY   ACCESS MODES   STORAGECLASS   AGE
data-redis-0   Bound    pvc-7c2c4069-14a0-43bc-ac3c-25fa6f510587   1Gi        RWX            nfs-client     45m
data-redis-1   Bound    pvc-5a2f71cb-7541-4ec7-b5b7-5cd25db9f12e   1Gi        RWX            nfs-client     45m
data-redis-2   Bound    pvc-75ef88e7-933c-4171-a504-f35ade5dbda3   1Gi        RWX            nfs-client     44m

# PV
[root@k8s-master01 redis-cluster-3master]# kubectl get pv | grep redis-cluster
pvc-5a2f71cb-7541-4ec7-b5b7-5cd25db9f12e   1Gi        RWX    Retain       Bound      redis-cluster/data-redis-1    nfs-client  45m
pvc-75ef88e7-933c-4171-a504-f35ade5dbda3   1Gi        RWX    Retain       Bound      redis-cluster/data-redis-2    nfs-client  45m
pvc-7c2c4069-14a0-43bc-ac3c-25fa6f510587   1Gi        RWX    Retain       Bound      redis-cluster/data-redis-0    nfs-client  45m

# 持久化目录结构
[root@k8s-master01 redis-cluster-3master]# tree -L 2 /data/nfs/redis-cluster-data-redis-*
/data/nfs/redis-cluster-data-redis-0-pvc-7c2c4069-14a0-43bc-ac3c-25fa6f510587
├── conf
│   ├── redis.conf
│   └── sentinel.conf
└── dump.rdb
/data/nfs/redis-cluster-data-redis-1-pvc-5a2f71cb-7541-4ec7-b5b7-5cd25db9f12e
├── conf
│   ├── redis.conf
│   └── sentinel.conf
└── dump.rdb
/data/nfs/redis-cluster-data-redis-2-pvc-75ef88e7-933c-4171-a504-f35ade5dbda3
├── conf
│   ├── redis.conf
│   └── sentinel.conf
└── dump.rdb
3 directories, 9 files

5.测试高可用

Master:可读可写

Slave:只读

1)检测节点集群状态

[root@k8s-master01 redis-cluster-3master]# kubectl exec -it -n redis-cluster redis-0 -- sh -c redis-cli
127.0.0.1:6379> info replication
# Replication
role:master				    # 当前实例的角色master还是slave
connected_slaves:2			# slave的数量
min_slaves_good_slaves:2
slave0:ip=172.27.14.237,port=6379,state=online,offset=13743,lag=1	# slave机器的信息、状态
slave1:ip=172.17.125.29,port=6379,state=online,offset=14027,lag=0	# slave机器的信息、状态
master_replid:85c61cd91010f1ad7121bcec7aca749a532de278	# 主实例启动随机字符串
master_replid2:0000000000000000000000000000000000000000 # 主实例启动随机字符串
master_repl_offset:14027	 # 主从同步偏移量,此值如果和上面的offset相同说明主从一致没延迟,与master_replid可被用来标识主实例复制流中的位置
second_repl_offset:-1		# 主从同步偏移量2,此值如果和上面的offset相同说明主从一致没延迟
repl_backlog_active:1		# 复制缓冲区是否开启
repl_backlog_size:1048576	# 复制积压缓冲区
repl_backlog_first_byte_offset:1 # 复制缓冲区里偏移量的大小
repl_backlog_histlen:14027	# 此值等于 master_repl_offset - repl_backlog_first_byte_offset,该值不会超过repl_backlog_size的大小

2)测试Master插入数据

# Master节点插入数据
[root@k8s-master01 redis-cluster-3master]# kubectl exec -it -n redis-cluster redis-0 -- sh -c redis-cli
127.0.0.1:6379> set a ZhangSan
OK
127.0.0.1:6379> get a
"ZhangSan"
127.0.0.1:6379> exit

3)测试Slave读取数据

# Slave1节点读取数据
[root@k8s-master01 redis-cluster-3master]# kubectl exec -it -n redis-cluster redis-1 -- sh -c redis-cli
127.0.0.1:6379> get a
"ZhangSan"
127.0.0.1:6379> set a Lisi

# Slave1节点读取数据
[root@k8s-master01 redis-cluster-3master]# kubectl exec -it -n redis-cluster redis-2 -- sh -c redis-cli
127.0.0.1:6379> get a
"ZhangSan"

4)测试Slave插入数据

[root@k8s-master01 redis-cluster-3master]# kubectl exec -it -n redis-cluster redis-1 -- sh -c redis-cli
127.0.0.1:6379> set a Lisi
(error) READONLY You can't write against a read only replica.	# Slave节点插入数据失败,权限为只读

5)测试Master宕机

1> 删除Master_Pod
[root@k8s-master01 redis-cluster-3master]# kubectl delete pod -n redis-cluster redis-0
pod "redis-0" deleted
2> 查看Slave是否被选举为Master

Slave2节点已被选举为Master

[root@k8s-master01 redis-cluster-3master]#  kubectl exec -it -n redis-cluster redis-1 -- sh -c redis-cli
127.0.0.1:6379> info replication
# Replication
role:slave		# Slave1还是Slave
master_host:172.17.125.29
master_port:6379
master_link_status:up

[root@k8s-master01 redis-cluster-3master]#  kubectl exec -it -n redis-cluster redis-2 -- sh -c redis-cli
127.0.0.1:6379> info replication
# Replication
role:master		# Slave已被选举为Master
connected_slaves:1	# 当前可用节点为1,待宕机的主节点重启后再查看即为2
min_slaves_good_slaves:1
slave0:ip=172.27.14.237,port=6379,state=online,offset=429859,lag=1
master_replid:1401322b3c76bb6b8af1484037a19fbbe5405142
master_replid2:85c61cd91010f1ad7121bcec7aca749a532de278
master_repl_offset:429859
second_repl_offset:348304
repl_backlog_active:1
repl_backlog_size:1048576
repl_backlog_first_byte_offset:5979
repl_backlog_histlen:423881

6)原Master变为Slave

Pod被删除后会自动启动,且启动成功后会自动变为Slave节点

Master节点宕机过程中若有数据插入,待重启成功后会变为Slave,且自动同步期间的数据,不会遗漏

# Master_Pod(redis-0)已自动启动
[root@k8s-master01 redis-cluster-3master]# kubectl get pod -n redis-cluster 
NAME      READY   STATUS    RESTARTS   AGE
redis-0   2/2     Running   0          2m40s
redis-1   2/2     Running   0          37m
redis-2   2/2     Running   0          36m

# 查看是否变为Slave节点
[root@k8s-master01 redis-cluster-3master]# kubectl exec -it -n redis-cluster redis-0 -- sh -c redis-cli
Defaulted container "redis" out of: redis, sentinel, config-init (init)
127.0.0.1:6379> get a
"ZhangSan"
127.0.0.1:6379> set a Lisi
(error) READONLY You can't write against a read only replica.
127.0.0.1:6379> info replication
# Replication
role:slave		# 已变为Slave节点
master_host:172.17.125.29
master_port:6379
master_link_status:up

7)获取集群节点

Master变为Slave后,进入新的Master节点获取集群节点数

结果为:保持原来的1主2从

[root@k8s-master01 redis-cluster-3master]# kubectl exec -it -n redis-cluster redis-2 -- sh -c redis-cli
Defaulted container "redis" out of: redis, sentinel, config-init (init)
127.0.0.1:6379> info replication
# Replication
role:master
connected_slaves:2
min_slaves_good_slaves:2
slave0:ip=172.27.14.237,port=6379,state=online,offset=551082,lag=1
slave1:ip=172.25.244.248,port=6379,state=online,offset=551364,lag=1
master_replid:1401322b3c76bb6b8af1484037a19fbbe5405142
master_replid2:85c61cd91010f1ad7121bcec7aca749a532de278
master_repl_offset:551364
second_repl_offset:348304
repl_backlog_active:1
repl_backlog_size:1048576
repl_backlog_first_byte_offset:5979
repl_backlog_histlen:545386
posted @ 2022-06-09 18:34  秋风お亦冷  阅读(1050)  评论(5编辑  收藏  举报