zookeeper实战
22. zookeeper实战
官方参考sts部署:运行ZooKeeper,分布式系统协调器|Kubernetes
#这是jdk环境
root@k8s-master1:~/k8s-data/dockerfile/web/magedu/zookeeper# nerdctl pull elevy/slim_java:8
#然后再上传到harbor仓库
root@k8s-master1:~/k8s-data/dockerfile/web/magedu/zookeeper# nerdctl push harbor.nbrhce.com/baseimages/slim_java:8
#构建
root@k8s-master1:~/k8s-data/dockerfile/web/magedu/zookeeper# ./build-command.sh v3.4.14
#!/bin/bash
TAG=$1
#docker build -t harbor.magedu.net/magedu/zookeeper:${TAG} .
#sleep 1
#docker push harbor.magedu.net/magedu/zookeeper:${TAG}
nerdctl build -t harbor.nbrhce.com/demo/zookeeper:${TAG} .
nerdctl push harbor.nbrhce.com/demo/zookeeper:${TAG}
- Dockerfile编写
root@k8s-master1:~/k8s-data/dockerfile/web/magedu/zookeeper# cat Dockerfile
#FROM harbor-linux38.local.com/linux38/slim_java:8
FROM harbor.nbrhce.com/baseimages/slim_java:8
ENV ZK_VERSION 3.4.14
ADD repositories /etc/apk/repositories
# Download Zookeeper
COPY zookeeper-3.4.14.tar.gz /tmp/zk.tgz
COPY zookeeper-3.4.14.tar.gz.asc /tmp/zk.tgz.asc
COPY KEYS /tmp/KEYS
RUN apk add --no-cache --virtual .build-deps \
ca-certificates \
gnupg \
tar \
wget && \
#
# Install dependencies
apk add --no-cache \
bash && \
#
#
# Verify the signature
export GNUPGHOME="$(mktemp -d)" && \
gpg -q --batch --import /tmp/KEYS && \
gpg -q --batch --no-auto-key-retrieve --verify /tmp/zk.tgz.asc /tmp/zk.tgz && \
#
# Set up directories
#
mkdir -p /zookeeper/data /zookeeper/wal /zookeeper/log && \
#
# Install
tar -x -C /zookeeper --strip-components=1 --no-same-owner -f /tmp/zk.tgz && \
#
# Slim down
cd /zookeeper && \
cp dist-maven/zookeeper-${ZK_VERSION}.jar . && \
rm -rf \
*.txt \
*.xml \
bin/README.txt \
bin/*.cmd \
conf/* \
contrib \
dist-maven \
docs \
lib/*.txt \
lib/cobertura \
lib/jdiff \
recipes \
src \
zookeeper-*.asc \
zookeeper-*.md5 \
zookeeper-*.sha1 && \
#
# Clean up
apk del .build-deps && \
rm -rf /tmp/* "$GNUPGHOME"
COPY conf /zookeeper/conf/
COPY bin/zkReady.sh /zookeeper/bin/
COPY entrypoint.sh /
ENV PATH=/zookeeper/bin:${PATH} \
ZOO_LOG_DIR=/zookeeper/log \
ZOO_LOG4J_PROP="INFO, CONSOLE, ROLLINGFILE" \
JMXPORT=9010
ENTRYPOINT [ "/entrypoint.sh" ]
CMD [ "zkServer.sh", "start-foreground" ]
EXPOSE 2181 2888 3888 9010
#启动服务脚本
root@k8s-master1:~/k8s-data/dockerfile/web/magedu/zookeeper# cat entrypoint.sh
#!/bin/bash
echo ${MYID:-1} > /zookeeper/data/myid
if [ -n "$SERVERS" ]; then
IFS=\, read -a servers <<<"$SERVERS"
for i in "${!servers[@]}"; do
printf "\nserver.%i=%s:2888:3888" "$((1 + $i))" "${servers[$i]}" >> /zookeeper/conf/zoo.cfg
done
fi
cd /zookeeper
exec "$@"
22.1 YAML
- service
apiVersion: v1
kind: Service
metadata:
name: zookeeper
namespace: zookeeper
spec:
ports:
- name: client
port: 2181
selector:
app: zookeeper
---
apiVersion: v1
kind: Service
metadata:
name: zookeeper1
namespace: zookeeper
spec:
type: NodePort
ports:
- name: client
port: 2181
nodePort: 32181
- name: followers
port: 2888
- name: election
port: 3888
selector:
app: zookeeper
server-id: "1"
---
apiVersion: v1
kind: Service
metadata:
name: zookeeper2
namespace: zookeeper
spec:
type: NodePort
ports:
- name: client
port: 2181
nodePort: 32182
- name: followers
port: 2888
- name: election
port: 3888
selector:
app: zookeeper
server-id: "2"
---
apiVersion: v1
kind: Service
metadata:
name: zookeeper3
namespace: zookeeper
spec:
type: NodePort
ports:
- name: client
port: 2181
nodePort: 32183
- name: followers
port: 2888
- name: election
port: 3888
selector:
app: zookeeper
server-id: "3"
- Deployment
root@k8s-master1:~/k8s-data/yaml/magedu/zookeeper# cat zookeeper.yaml
kind: Deployment
#apiVersion: extensions/v1beta1
apiVersion: apps/v1
metadata:
name: zookeeper1
namespace: zookeeper
spec:
replicas: 1
selector:
matchLabels:
app: zookeeper
template:
metadata:
labels:
app: zookeeper
server-id: "1"
spec:
volumes:
- name: data
emptyDir: {}
- name: wal
emptyDir:
medium: Memory
containers:
- name: server
image: harbor.nbrhce.com/demo/zookeeper:v3.4.14
imagePullPolicy: Always
env:
- name: MYID
value: "1"
- name: SERVERS
value: "zookeeper1,zookeeper2,zookeeper3"
- name: JVMFLAGS
value: "-Xmx2G"
ports:
- containerPort: 2181
- containerPort: 2888
- containerPort: 3888
volumeMounts:
- mountPath: "/zookeeper/data"
name: zookeeper-datadir-pvc-1
volumes:
- name: zookeeper-datadir-pvc-1
persistentVolumeClaim:
claimName: zookeeper-datadir-pvc-1
---
kind: Deployment
#apiVersion: extensions/v1beta1
apiVersion: apps/v1
metadata:
name: zookeeper2
namespace: zookeeper
spec:
replicas: 1
selector:
matchLabels:
app: zookeeper
template:
metadata:
labels:
app: zookeeper
server-id: "2"
spec:
volumes:
- name: data
emptyDir: {}
- name: wal
emptyDir:
medium: Memory
containers:
- name: server
image: harbor.nbrhce.com/demo/zookeeper:v3.4.14
imagePullPolicy: Always
env:
- name: MYID
value: "2"
- name: SERVERS
value: "zookeeper1,zookeeper2,zookeeper3"
- name: JVMFLAGS
value: "-Xmx2G"
ports:
- containerPort: 2181
- containerPort: 2888
- containerPort: 3888
volumeMounts:
- mountPath: "/zookeeper/data"
name: zookeeper-datadir-pvc-2
volumes:
- name: zookeeper-datadir-pvc-2
persistentVolumeClaim:
claimName: zookeeper-datadir-pvc-2
---
kind: Deployment
#apiVersion: extensions/v1beta1
apiVersion: apps/v1
metadata:
name: zookeeper3
namespace: zookeeper
spec:
replicas: 1
selector:
matchLabels:
app: zookeeper
template:
metadata:
labels:
app: zookeeper
server-id: "3"
spec:
volumes:
- name: data
emptyDir: {}
- name: wal
emptyDir:
medium: Memory
containers:
- name: server
image: harbor.nbrhce.com/demo/zookeeper:v3.4.14
imagePullPolicy: Always
env:
- name: MYID
value: "3"
- name: SERVERS
value: "zookeeper1,zookeeper2,zookeeper3"
- name: JVMFLAGS
value: "-Xmx2G"
ports:
- containerPort: 2181
- containerPort: 2888
- containerPort: 3888
volumeMounts:
- mountPath: "/zookeeper/data"
name: zookeeper-datadir-pvc-3
volumes:
- name: zookeeper-datadir-pvc-3
persistentVolumeClaim:
claimName: zookeeper-datadir-pvc-3
- PV
---
apiVersion: v1
kind: PersistentVolume
metadata:
name: zookeeper-datadir-pv-1
spec:
capacity:
storage: 20Gi
accessModes:
- ReadWriteOnce
nfs:
server: 10.0.0.109
path: /data/k8sdata/zookeeper-datadir-1
---
apiVersion: v1
kind: PersistentVolume
metadata:
name: zookeeper-datadir-pv-2
spec:
capacity:
storage: 20Gi
accessModes:
- ReadWriteOnce
nfs:
server: 10.0.0.109
path: /data/k8sdata/zookeeper-datadir-2
---
apiVersion: v1
kind: PersistentVolume
metadata:
name: zookeeper-datadir-pv-3
spec:
capacity:
storage: 20Gi
accessModes:
- ReadWriteOnce
nfs:
server: 10.0.0.109
path: /data/k8sdata/zookeeper-datadir-3
- PVC
---
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: zookeeper-datadir-pvc-1
namespace: zookeeper
spec:
accessModes:
- ReadWriteOnce
volumeName: zookeeper-datadir-pv-1
resources:
requests:
storage: 10Gi
---
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: zookeeper-datadir-pvc-2
namespace: zookeeper
spec:
accessModes:
- ReadWriteOnce
volumeName: zookeeper-datadir-pv-2
resources:
requests:
storage: 10Gi
---
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: zookeeper-datadir-pvc-3
namespace: zookeeper
spec:
accessModes:
- ReadWriteOnce
volumeName: zookeeper-datadir-pv-3
resources:
requests:
storage: 10Gi
22.2 Zookeeper选举机制
- 选举不同阶段的状态
Leader 选举可以分为两个不同的阶段,第一个是我们提到的 Leader 宕机需要重新选举,第二则是当 Zookeeper 启动时需要进行系统的 Leader 初始化选举。下面是zkserver的几种状态:
LOOKING 不确定Leader状态。该状态下的服务器认为当前集群中没有Leader,会发起Leader选举。
FOLLOWING 跟随者状态。表明当前服务器角色是Follower,并且它知道Leader是谁。
LEADING 领导者状态。表明当前服务器角色是Leader,它会维护与Follower间的心跳。
OBSERVING 观察者状态。表明当前服务器角色是Observer,与Folower唯一的不同在于不参与选举,也不参与集群写操作时的投票。
22.3 初始化Leader选举
- 服务第一次运行的选举
假设我们集群中有3台机器,那也就意味着我们需要2台同意(超过半数)。这里假设服务器1~3的myid分别为1,2,3,初始化Leader选举过程如下:
服务器 1 启动,发起一次选举。它会首先 投票给自己 ,投票内容为(myid, ZXID),因为初始化所以 ZXID 都为0,此时 server1 发出的投票为(1, 0),即myid为1, ZXID为0。此时服务器 1 票数一票,不够半数以上,选举无法完成,服务器 1 状态保持为 LOOKING。
服务器 2 启动,再发起一次选举。服务器2首先也会将投票选给自己(2, 0),并将投票信息广播出去(server1也会,只是它那时没有其他的服务器了),server1 在收到 server2 的投票信息后会将投票信息与自己的作比较。首先它会比较 ZXID ,ZXID 大的优先为 Leader,如果相同则比较 myid,myid 大的优先作为 Leader。所以,此时server1 发现 server2 更适合做 Leader,它就会将自己的投票信息更改为(2, 0)然后再广播出去,之后server2 收到之后发现和自己的一样无需做更改。此时,服务器1票数0票,服务器2票数2票,投票已经超过半数,确定 server2 为 Leader。服务器 1更改状态为 FOLLOWING,服务器 2 更改状态为 LEADING。
服务器 3 启动,发起一次选举。此时服务器 1,2已经不是 LOOKING 状态,它会直接以 FOLLOWING 的身份加入集群
- 运行时Leader选举
运行时候如果Leader节点崩溃了会走崩溃恢复模式,新Leader选出前会暂停对外服务,大致可以分为四个阶段:选举、发现、同步、广播(见4.5节),此时Leader选举流程如下:
Leader挂掉,剩下的两个 Follower 会将自己的状态 从 Following 变为 Looking 状态 ,每个Server会发出一个投票,第一次都是投自己,其中投票内容为(myid, ZXID),注意这里的 zxid 可能不是0了
收集来自各个服务器的投票
处理投票,处理逻辑:优先比较ZXID,然后比较myid
统计投票,只要超过半数的机器接收到同样的投票信息,就可以确定leader
改变服务器状态Looking变为Following或Leading
然后依次进入发现、同步、广播阶段
举个例子来说明,假设集群有三台服务器,Leader (server2)挂掉了,只剩下server1和server3。 server1 给自己投票为(1,99),然后广播给其他 server,server3 首先也会给自己投票(3,95),然后也广播给其他 server。server1 和 server3 此时会收到彼此的投票信息,和一开始选举一样,他们也会比较自己的投票和收到的投票(zxid 大的优先,如果相同那么就 myid 大的优先)。这个时候 server1 收到了 server3 的投票发现没自己的合适故不变,server3 收到 server1 的投票结果后发现比自己的合适于是更改投票为(1,99)然后广播出去,最后 server1 收到了发现自己的投票已经超过半数就把自己设为 Leader,server3 也随之变为 Follower。