22. zookeeper实战
官方参考sts部署:运行ZooKeeper,分布式系统协调器|Kubernetes
| |
| root@k8s-master1:~/k8s-data/dockerfile/web/magedu/zookeeper |
| |
| |
| root@k8s-master1:~/k8s-data/dockerfile/web/magedu/zookeeper |
| |
| root@k8s-master1:~/k8s-data/dockerfile/web/magedu/zookeeper |
| |
| TAG=$1 |
| |
| |
| |
| |
| nerdctl build -t harbor.nbrhce.com/demo/zookeeper:${TAG} . |
| |
| nerdctl push harbor.nbrhce.com/demo/zookeeper:${TAG} |
| |
| root@k8s-master1:~/k8s-data/dockerfile/web/magedu/zookeeper |
| |
| FROM harbor.nbrhce.com/baseimages/slim_java:8 |
| |
| ENV ZK_VERSION 3.4.14 |
| ADD repositories /etc/apk/repositories |
| |
| COPY zookeeper-3.4.14.tar.gz /tmp/zk.tgz |
| COPY zookeeper-3.4.14.tar.gz.asc /tmp/zk.tgz.asc |
| COPY KEYS /tmp/KEYS |
| RUN apk add --no-cache --virtual .build-deps \ |
| ca-certificates \ |
| gnupg \ |
| tar \ |
| wget && \ |
| |
| |
| apk add --no-cache \ |
| bash && \ |
| |
| |
| |
| export GNUPGHOME="$(mktemp -d)" && \ |
| gpg -q --batch --import /tmp/KEYS && \ |
| gpg -q --batch --no-auto-key-retrieve --verify /tmp/zk.tgz.asc /tmp/zk.tgz && \ |
| |
| |
| |
| mkdir -p /zookeeper/data /zookeeper/wal /zookeeper/log && \ |
| |
| |
| tar -x -C /zookeeper --strip-components=1 --no-same-owner -f /tmp/zk.tgz && \ |
| |
| |
| cd /zookeeper && \ |
| cp dist-maven/zookeeper-${ZK_VERSION}.jar . && \ |
| rm -rf \ |
| *.txt \ |
| *.xml \ |
| bin/README.txt \ |
| bin/*.cmd \ |
| conf/* \ |
| contrib \ |
| dist-maven \ |
| docs \ |
| lib/*.txt \ |
| lib/cobertura \ |
| lib/jdiff \ |
| recipes \ |
| src \ |
| zookeeper-*.asc \ |
| zookeeper-*.md5 \ |
| zookeeper-*.sha1 && \ |
| |
| |
| apk del .build-deps && \ |
| rm -rf /tmp/* "$GNUPGHOME" |
| |
| COPY conf /zookeeper/conf/ |
| COPY bin/zkReady.sh /zookeeper/bin/ |
| COPY entrypoint.sh / |
| |
| ENV PATH=/zookeeper/bin:${PATH} \ |
| ZOO_LOG_DIR=/zookeeper/log \ |
| ZOO_LOG4J_PROP="INFO, CONSOLE, ROLLINGFILE" \ |
| JMXPORT=9010 |
| |
| ENTRYPOINT [ "/entrypoint.sh" ] |
| |
| CMD [ "zkServer.sh", "start-foreground" ] |
| |
| EXPOSE 2181 2888 3888 9010 |
| |
| |
| root@k8s-master1:~/k8s-data/dockerfile/web/magedu/zookeeper |
| |
| |
| echo ${MYID:-1} > /zookeeper/data/myid |
| |
| if [ -n "$SERVERS" ]; then |
| IFS=\, read -a servers <<<"$SERVERS" |
| for i in "${!servers[@]}"; do |
| printf "\nserver.%i=%s:2888:3888" "$((1 + $i))" "${servers[$i]}" >> /zookeeper/conf/zoo.cfg |
| done |
| fi |
| |
| cd /zookeeper |
| exec "$@" |
22.1 YAML
| apiVersion: v1 |
| kind: Service |
| metadata: |
| name: zookeeper |
| namespace: zookeeper |
| spec: |
| ports: |
| - name: client |
| port: 2181 |
| selector: |
| app: zookeeper |
| --- |
| apiVersion: v1 |
| kind: Service |
| metadata: |
| name: zookeeper1 |
| namespace: zookeeper |
| spec: |
| type: NodePort |
| ports: |
| - name: client |
| port: 2181 |
| nodePort: 32181 |
| - name: followers |
| port: 2888 |
| - name: election |
| port: 3888 |
| selector: |
| app: zookeeper |
| server-id: "1" |
| --- |
| apiVersion: v1 |
| kind: Service |
| metadata: |
| name: zookeeper2 |
| namespace: zookeeper |
| spec: |
| type: NodePort |
| ports: |
| - name: client |
| port: 2181 |
| nodePort: 32182 |
| - name: followers |
| port: 2888 |
| - name: election |
| port: 3888 |
| selector: |
| app: zookeeper |
| server-id: "2" |
| --- |
| apiVersion: v1 |
| kind: Service |
| metadata: |
| name: zookeeper3 |
| namespace: zookeeper |
| spec: |
| type: NodePort |
| ports: |
| - name: client |
| port: 2181 |
| nodePort: 32183 |
| - name: followers |
| port: 2888 |
| - name: election |
| port: 3888 |
| selector: |
| app: zookeeper |
| server-id: "3" |
| root@k8s-master1:~/k8s-data/yaml/magedu/zookeeper |
| kind: Deployment |
| |
| apiVersion: apps/v1 |
| metadata: |
| name: zookeeper1 |
| namespace: zookeeper |
| spec: |
| replicas: 1 |
| selector: |
| matchLabels: |
| app: zookeeper |
| template: |
| metadata: |
| labels: |
| app: zookeeper |
| server-id: "1" |
| spec: |
| volumes: |
| - name: data |
| emptyDir: {} |
| - name: wal |
| emptyDir: |
| medium: Memory |
| containers: |
| - name: server |
| image: harbor.nbrhce.com/demo/zookeeper:v3.4.14 |
| imagePullPolicy: Always |
| env: |
| - name: MYID |
| value: "1" |
| - name: SERVERS |
| value: "zookeeper1,zookeeper2,zookeeper3" |
| - name: JVMFLAGS |
| value: "-Xmx2G" |
| ports: |
| - containerPort: 2181 |
| - containerPort: 2888 |
| - containerPort: 3888 |
| volumeMounts: |
| - mountPath: "/zookeeper/data" |
| name: zookeeper-datadir-pvc-1 |
| volumes: |
| - name: zookeeper-datadir-pvc-1 |
| persistentVolumeClaim: |
| claimName: zookeeper-datadir-pvc-1 |
| --- |
| kind: Deployment |
| |
| apiVersion: apps/v1 |
| metadata: |
| name: zookeeper2 |
| namespace: zookeeper |
| spec: |
| replicas: 1 |
| selector: |
| matchLabels: |
| app: zookeeper |
| template: |
| metadata: |
| labels: |
| app: zookeeper |
| server-id: "2" |
| spec: |
| volumes: |
| - name: data |
| emptyDir: {} |
| - name: wal |
| emptyDir: |
| medium: Memory |
| containers: |
| - name: server |
| image: harbor.nbrhce.com/demo/zookeeper:v3.4.14 |
| imagePullPolicy: Always |
| env: |
| - name: MYID |
| value: "2" |
| - name: SERVERS |
| value: "zookeeper1,zookeeper2,zookeeper3" |
| - name: JVMFLAGS |
| value: "-Xmx2G" |
| ports: |
| - containerPort: 2181 |
| - containerPort: 2888 |
| - containerPort: 3888 |
| volumeMounts: |
| - mountPath: "/zookeeper/data" |
| name: zookeeper-datadir-pvc-2 |
| volumes: |
| - name: zookeeper-datadir-pvc-2 |
| persistentVolumeClaim: |
| claimName: zookeeper-datadir-pvc-2 |
| --- |
| kind: Deployment |
| |
| apiVersion: apps/v1 |
| metadata: |
| name: zookeeper3 |
| namespace: zookeeper |
| spec: |
| replicas: 1 |
| selector: |
| matchLabels: |
| app: zookeeper |
| template: |
| metadata: |
| labels: |
| app: zookeeper |
| server-id: "3" |
| spec: |
| volumes: |
| - name: data |
| emptyDir: {} |
| - name: wal |
| emptyDir: |
| medium: Memory |
| containers: |
| - name: server |
| image: harbor.nbrhce.com/demo/zookeeper:v3.4.14 |
| imagePullPolicy: Always |
| env: |
| - name: MYID |
| value: "3" |
| - name: SERVERS |
| value: "zookeeper1,zookeeper2,zookeeper3" |
| - name: JVMFLAGS |
| value: "-Xmx2G" |
| ports: |
| - containerPort: 2181 |
| - containerPort: 2888 |
| - containerPort: 3888 |
| volumeMounts: |
| - mountPath: "/zookeeper/data" |
| name: zookeeper-datadir-pvc-3 |
| volumes: |
| - name: zookeeper-datadir-pvc-3 |
| persistentVolumeClaim: |
| claimName: zookeeper-datadir-pvc-3 |
| --- |
| apiVersion: v1 |
| kind: PersistentVolume |
| metadata: |
| name: zookeeper-datadir-pv-1 |
| spec: |
| capacity: |
| storage: 20Gi |
| accessModes: |
| - ReadWriteOnce |
| nfs: |
| server: 10.0.0.109 |
| path: /data/k8sdata/zookeeper-datadir-1 |
| |
| --- |
| apiVersion: v1 |
| kind: PersistentVolume |
| metadata: |
| name: zookeeper-datadir-pv-2 |
| spec: |
| capacity: |
| storage: 20Gi |
| accessModes: |
| - ReadWriteOnce |
| nfs: |
| server: 10.0.0.109 |
| path: /data/k8sdata/zookeeper-datadir-2 |
| |
| --- |
| apiVersion: v1 |
| kind: PersistentVolume |
| metadata: |
| name: zookeeper-datadir-pv-3 |
| spec: |
| capacity: |
| storage: 20Gi |
| accessModes: |
| - ReadWriteOnce |
| nfs: |
| server: 10.0.0.109 |
| path: /data/k8sdata/zookeeper-datadir-3 |
| --- |
| apiVersion: v1 |
| kind: PersistentVolumeClaim |
| metadata: |
| name: zookeeper-datadir-pvc-1 |
| namespace: zookeeper |
| spec: |
| accessModes: |
| - ReadWriteOnce |
| volumeName: zookeeper-datadir-pv-1 |
| resources: |
| requests: |
| storage: 10Gi |
| --- |
| apiVersion: v1 |
| kind: PersistentVolumeClaim |
| metadata: |
| name: zookeeper-datadir-pvc-2 |
| namespace: zookeeper |
| spec: |
| accessModes: |
| - ReadWriteOnce |
| volumeName: zookeeper-datadir-pv-2 |
| resources: |
| requests: |
| storage: 10Gi |
| --- |
| apiVersion: v1 |
| kind: PersistentVolumeClaim |
| metadata: |
| name: zookeeper-datadir-pvc-3 |
| namespace: zookeeper |
| spec: |
| accessModes: |
| - ReadWriteOnce |
| volumeName: zookeeper-datadir-pv-3 |
| resources: |
| requests: |
| storage: 10Gi |
22.2 Zookeeper选举机制
| Leader 选举可以分为两个不同的阶段,第一个是我们提到的 Leader 宕机需要重新选举,第二则是当 Zookeeper 启动时需要进行系统的 Leader 初始化选举。下面是zkserver的几种状态: |
| |
| LOOKING 不确定Leader状态。该状态下的服务器认为当前集群中没有Leader,会发起Leader选举。 |
| FOLLOWING 跟随者状态。表明当前服务器角色是Follower,并且它知道Leader是谁。 |
| LEADING 领导者状态。表明当前服务器角色是Leader,它会维护与Follower间的心跳。 |
| OBSERVING 观察者状态。表明当前服务器角色是Observer,与Folower唯一的不同在于不参与选举,也不参与集群写操作时的投票。 |
22.3 初始化Leader选举
| 假设我们集群中有3台机器,那也就意味着我们需要2台同意(超过半数)。这里假设服务器1~3的myid分别为1,2,3,初始化Leader选举过程如下: |
| |
| 服务器 1 启动,发起一次选举。它会首先 投票给自己 ,投票内容为(myid, ZXID),因为初始化所以 ZXID 都为0,此时 server1 发出的投票为(1, 0),即myid为1, ZXID为0。此时服务器 1 票数一票,不够半数以上,选举无法完成,服务器 1 状态保持为 LOOKING。 |
| 服务器 2 启动,再发起一次选举。服务器2首先也会将投票选给自己(2, 0),并将投票信息广播出去(server1也会,只是它那时没有其他的服务器了),server1 在收到 server2 的投票信息后会将投票信息与自己的作比较。首先它会比较 ZXID ,ZXID 大的优先为 Leader,如果相同则比较 myid,myid 大的优先作为 Leader。所以,此时server1 发现 server2 更适合做 Leader,它就会将自己的投票信息更改为(2, 0)然后再广播出去,之后server2 收到之后发现和自己的一样无需做更改。此时,服务器1票数0票,服务器2票数2票,投票已经超过半数,确定 server2 为 Leader。服务器 1更改状态为 FOLLOWING,服务器 2 更改状态为 LEADING。 |
| 服务器 3 启动,发起一次选举。此时服务器 1,2已经不是 LOOKING 状态,它会直接以 FOLLOWING 的身份加入集群 |
| 运行时候如果Leader节点崩溃了会走崩溃恢复模式,新Leader选出前会暂停对外服务,大致可以分为四个阶段:选举、发现、同步、广播(见4.5节),此时Leader选举流程如下: |
| |
| Leader挂掉,剩下的两个 Follower 会将自己的状态 从 Following 变为 Looking 状态 ,每个Server会发出一个投票,第一次都是投自己,其中投票内容为(myid, ZXID),注意这里的 zxid 可能不是0了 |
| 收集来自各个服务器的投票 |
| 处理投票,处理逻辑:优先比较ZXID,然后比较myid |
| 统计投票,只要超过半数的机器接收到同样的投票信息,就可以确定leader |
| 改变服务器状态Looking变为Following或Leading |
| 然后依次进入发现、同步、广播阶段 |
| 举个例子来说明,假设集群有三台服务器,Leader (server2)挂掉了,只剩下server1和server3。 server1 给自己投票为(1,99),然后广播给其他 server,server3 首先也会给自己投票(3,95),然后也广播给其他 server。server1 和 server3 此时会收到彼此的投票信息,和一开始选举一样,他们也会比较自己的投票和收到的投票(zxid 大的优先,如果相同那么就 myid 大的优先)。这个时候 server1 收到了 server3 的投票发现没自己的合适故不变,server3 收到 server1 的投票结果后发现比自己的合适于是更改投票为(1,99)然后广播出去,最后 server1 收到了发现自己的投票已经超过半数就把自己设为 Leader,server3 也随之变为 Follower。 |
| |