每天一点基础K8S--K8S中的deployment(二)蓝绿发布、金丝雀发布
https://app.yinxiang.com/fx/e417266f-6609-40f6-a9cc-a34467ff1671
# 前面实验了deployment的基础语法、扩容缩容和滚动更新。deployment控制器还具有蓝绿发布和金丝雀发布的特性
蓝绿发布
# 蓝绿发布就是现网运行的业务在系统1,计划升级的版本部署了系统2。
# 新版本的系统2部署出来以后将进行充分的测试、验证。通过之后,将业务切换到系统2,即完成了版本更新。
# 优点:
更新过程不用停机,在线完成,风险低;
回滚方便,效率高;
# 缺点:
两套一样的系统,占用资源,成本较高。
负载均衡、代理、路由等处理不当,可能导致业务没正常切换。
# Kubernetes本身是不支持蓝绿发布的,当前主要是通过新的deployment文件配合service label完成。
# 版本1 yaml文件示例
apiVersion: apps/v1
kind: Deployment
metadata:
name: version-1.1.1
labels:
func: deployment-version-1.1.1
spec:
strategy:
type: RollingUpdate
rollingUpdate:
maxSurge: 2
maxUnavailable: 1
replicas: 3
selector:
matchLabels:
version: version-1.1.1
template:
metadata:
name: nginx-test
labels:
version: version-1.1.1
spec:
tolerations:
- effect: "NoSchedule"
key: "can-run-pods"
operator: "Equal"
value: "master-node"
- effect: "NoSchedule"
key: "node-role.kubernetes.io/control-plane"
operator: "Equal"
value:
containers:
- name: nginx-test
image: nginx:stable-alpine-perl
imagePullPolicy: IfNotPresent
env:
- name: VERSION
value: 1.1.1
lifecycle:
postStart:
exec:
command: ["/bin/sh","-c","echo the version is $VERSION, this is a nginx server in container, and the ip is $(ip add show eth0 | grep -w inet | awk '{print $2}') > /usr/share/nginx/html/index.html" ]
startupProbe:
periodSeconds: 5
initialDelaySeconds: 20
timeoutSeconds: 10
httpGet:
scheme: HTTP
port: 80
path: /
livenessProbe:
periodSeconds: 5
initialDelaySeconds: 20
timeoutSeconds: 10
httpGet:
scheme: HTTP
port: 80
path: /
readinessProbe:
periodSeconds: 5
initialDelaySeconds: 20
timeoutSeconds: 10
httpGet:
scheme: HTTP
port: 80
path: /
# 当前业务运行版本为version1.1.1,测试版本系统为2.2.2
[root@master-worker-node-1 deployment]# kubectl apply -f version-1.yaml
deployment.apps/version-1.1.1 created
[root@master-worker-node-1 deployment]# kubectl apply -f version-2.yaml
deployment.apps/version-2.2.2 created
[root@master-worker-node-1 deployment]# kubectl get deployment -o wide
NAME READY UP-TO-DATE AVAILABLE AGE CONTAINERS IMAGES SELECTOR
version-1.1.1 3/3 3 3 88s nginx-test nginx:stable-alpine-perl version=version-1.1.1
version-2.2.2 3/3 3 3 85s nginx-test nginx:stable-alpine-perl version=version-2.2.2
[root@master-worker-node-1 deployment]# kubectl get rs -o wide
NAME DESIRED CURRENT READY AGE CONTAINERS IMAGES SELECTOR
version-1.1.1-655886c79f 3 3 3 98s nginx-test nginx:stable-alpine-perl pod-template-hash=655886c79f,version=version-1.1.1
version-2.2.2-7fffc7cb49 3 3 3 94s nginx-test nginx:stable-alpine-perl pod-template-hash=7fffc7cb49,version=version-2.2.2
[root@master-worker-node-1 deployment]# kubectl get pods -o wide
NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES
version-1.1.1-655886c79f-9282k 1/1 Running 0 2m16s 10.244.54.17 only-worker-node-4 <none> <none>
version-1.1.1-655886c79f-bfs2w 1/1 Running 0 2m16s 10.244.31.8 only-worker-node-3 <none> <none>
version-1.1.1-655886c79f-srzjj 1/1 Running 0 2m16s 10.244.132.229 master-worker-node-2 <none> <none>
version-2.2.2-7fffc7cb49-l9bdl 1/1 Running 0 2m12s 10.244.31.9 only-worker-node-3 <none> <none>
version-2.2.2-7fffc7cb49-nrgxj 1/1 Running 0 2m12s 10.244.54.18 only-worker-node-4 <none> <none>
version-2.2.2-7fffc7cb49-w8gpj 1/1 Running 0 2m12s 10.244.132.230 master-worker-node-2 <none> <none>
# 承载业务的service
apiVersion: v1
kind: Service
metadata:
name: version-service
spec:
type: ClusterIP
ports:
- port: 30080
targetPort: 80
protocol: "TCP"
selector:
version: version-1.1.1
[root@master-worker-node-1 deployment]# kubectl apply -f version-service.yaml
service/version-service created
[root@master-worker-node-1 deployment]# kubectl get svc -o wide | grep version
version-service ClusterIP 10.100.156.76 <none> 30080/TCP 93s version=version-1.1.1
[root@master-worker-node-1 deployment]# kubectl describe service version-service
Name: version-service
Namespace: default
Labels: <none>
Annotations: <none>
Selector: version=version-1.1.1
Type: ClusterIP
IP Family Policy: SingleStack
IP Families: IPv4
IP: 10.100.156.76
IPs: 10.100.156.76
Port: <unset> 30080/TCP
TargetPort: 80/TCP
Endpoints: 10.244.132.229:80,10.244.31.8:80,10.244.54.17:80
Session Affinity: None
Events: <none>
[root@master-worker-node-1 deployment]# kubectl get pods -o wide -l version=version-1.1.1
NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES
version-1.1.1-655886c79f-9282k 1/1 Running 0 13m 10.244.54.17 only-worker-node-4 <none> <none>
version-1.1.1-655886c79f-bfs2w 1/1 Running 0 13m 10.244.31.8 only-worker-node-3 <none> <none>
version-1.1.1-655886c79f-srzjj 1/1 Running 0 13m 10.244.132.229 master-worker-node-2 <none> <none>
# 没有更新之前,只有version-1.1.1对外提供服务
[root@master-worker-node-1 deployment]# ipvsadm -Ln | grep -A 3 30080
TCP 10.100.156.76:30080 rr
-> 10.244.31.8:80 Masq 1 0 0
-> 10.244.54.17:80 Masq 1 0 0
-> 10.244.132.229:80 Masq 1 0 0
[root@master-worker-node-1 deployment]# curl 10.100.156.76:30080
the version is 1.1.1, this is a nginx server in container, and the ip is 10.244.31.8/32
[root@master-worker-node-1 deployment]# curl 10.100.156.76:30080
the version is 1.1.1, this is a nginx server in container, and the ip is 10.244.132.229/32
[root@master-worker-node-1 deployment]# curl 10.100.156.76:30080
the version is 1.1.1, this is a nginx server in container, and the ip is 10.244.54.17/32
# 此时version2.2.2经过充分测试,可以上线了,那么通过修改service的selector即可完成业务切换。
[root@master-worker-node-1 deployment]# kubectl edit service version-service
service/version-service edited
[root@master-worker-node-1 deployment]# kubectl get service -o wide | grep version
version-service ClusterIP 10.100.156.76 <none> 30080/TCP 22m version=version-2.2.2
[root@master-worker-node-1 deployment]# kubectl describe service version-service
Name: version-service
Namespace: default
Labels: <none>
Annotations: <none>
Selector: version=version-2.2.2
Type: ClusterIP
IP Family Policy: SingleStack
IP Families: IPv4
IP: 10.100.156.76
IPs: 10.100.156.76
Port: <unset> 30080/TCP
TargetPort: 80/TCP
Endpoints: 10.244.132.230:80,10.244.31.9:80,10.244.54.18:80
Session Affinity: None
Events: <none>
[root@master-worker-node-1 deployment]# kubectl get pods -o wide -l version=version-2.2.2
NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES
version-2.2.2-7fffc7cb49-l9bdl 1/1 Running 0 34m 10.244.31.9 only-worker-node-3 <none> <none>
version-2.2.2-7fffc7cb49-nrgxj 1/1 Running 0 34m 10.244.54.18 only-worker-node-4 <none> <none>
version-2.2.2-7fffc7cb49-w8gpj 1/1 Running 0 34m 10.244.132.230 master-worker-node-2 <none> <none>
# 已完成业务切换
[root@master-worker-node-1 deployment]# curl 10.100.156.76:30080
the version is 2.2.2, this is a nginx server in container, and the ip is 10.244.54.18/32
[root@master-worker-node-1 deployment]# curl 10.100.156.76:30080
the version is 2.2.2, this is a nginx server in container, and the ip is 10.244.31.9/32
[root@master-worker-node-1 deployment]# curl 10.100.156.76:30080
the version is 2.2.2, this is a nginx server in container, and the ip is 10.244.132.230/32
# 回滚也同样是修改service的selector完成。
金丝雀(canary)发布、灰度发布
# 蓝绿发布是部署一套新版本的环境,待充分测试验证以后再发布,这样比较消耗资源,所以可以先更新一部分pod或者一定比例的pod,新版本的pod运行一段时间后,如果没有报错,那么就可以逐步扩大新版本的pod的数量,并逐步完成更新。
# 简单的灰度发布可以手动完成,但是对于复杂的灰度发布,需要配合监控平台。确定新版本没问题以后,在将其他pod升级到新版本。
# 优点
灵活、策略自定义,如果可以先升级android用户,出现问题也不会影响全部用户;
# 缺点
没有覆盖全部用户,问题排查稍有难度;
# 创建一个deployment 和 service
apiVersion: apps/v1
kind: Deployment
metadata:
name: gray-release
labels:
func: gray-release
spec:
strategy:
type: RollingUpdate
rollingUpdate:
maxSurge: 2
maxUnavailable: 1
replicas: 3
selector:
matchLabels:
version: gray-release
template:
metadata:
name: nginx-test
labels:
version: gray-release
spec:
tolerations:
- effect: "NoSchedule"
key: "can-run-pods"
operator: "Equal"
value: "master-node"
- effect: "NoSchedule"
key: "node-role.kubernetes.io/control-plane"
operator: "Equal"
value:
containers:
- name: nginx-test
image: nginx:stable-alpine-perl
imagePullPolicy: IfNotPresent
env:
- name: VERSION
value: 3.3.3
lifecycle:
postStart:
exec:
command: ["/bin/sh","-c","echo the version is $VERSION, this is a nginx server in container, and the ip is $(ip add show eth0 | grep -w inet | awk '{print $2}') > /usr/share/nginx/html/index.html" ]
startupProbe:
periodSeconds: 5
initialDelaySeconds: 20
timeoutSeconds: 10
httpGet:
scheme: HTTP
port: 80
path: /
livenessProbe:
periodSeconds: 5
initialDelaySeconds: 20
timeoutSeconds: 10
httpGet:
scheme: HTTP
port: 80
path: /
readinessProbe:
periodSeconds: 5
initialDelaySeconds: 20
timeoutSeconds: 10
httpGet:
scheme: HTTP
port: 80
path: /
---
apiVersion: v1
kind: Service
metadata:
name: gray-release-service
spec:
type: ClusterIP
ports:
- port: 30080
targetPort: 80
protocol: "TCP"
selector:
version: gray-release
# 创建deployment和service
[root@master-worker-node-1 deployment]# kubectl apply -f gray-release.yaml
deployment.apps/gray-release created
service/version-service created
[root@master-worker-node-1 deployment]# kubectl get all -o wide
NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES
pod/gray-release-6d975d98bb-2zwgb 1/1 Running 0 3m52s 10.244.132.231 master-worker-node-2 <none> <none>
pod/gray-release-6d975d98bb-k8tcg 1/1 Running 0 3m52s 10.244.54.19 only-worker-node-4 <none> <none>
pod/gray-release-6d975d98bb-nwpt4 1/1 Running 0 3m52s 10.244.31.10 only-worker-node-3 <none> <none>
NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE SELECTOR
service/gray-release-service ClusterIP 10.96.105.29 <none> 30080/TCP 85s version=gray-release
service/kubernetes ClusterIP 10.96.0.1 <none> 443/TCP 14d <none>
NAME READY UP-TO-DATE AVAILABLE AGE CONTAINERS IMAGES SELECTOR
deployment.apps/gray-release 3/3 3 3 3m53s nginx-test nginx:stable-alpine-perl version=gray-release
NAME DESIRED CURRENT READY AGE CONTAINERS IMAGES SELECTOR
replicaset.apps/gray-release-6d975d98bb 3 3 3 3m53s nginx-test nginx:stable-alpine-perl pod-template-hash=6d975d98bb,version=gray-release
# 手工执行灰度测试过程
[root@master-worker-node-1 deployment]# kubectl set image deployment gray-release nginx-test=nginx:stable-alpine-perl-2 && kubectl rollout pause deployment gray-release
deployment.apps/gray-release image updated
deployment.apps/gray-release paused
# 上面这条命了的意思是修改gray-release这个deployment中的nginx-test镜像为stable-alpine-perl-2,按照道理说,此时会触发更新的,具体更新策略就和deployment设置的更新策略相对应。紧接着,执行kubectl rollout pause 暂停。
# 可以看到此时就能看到5个pod,滚动更新的其他任务就被暂停了,可以通过Kubelet get pods -o wide -w 看到pending。
[root@master-worker-node-1 deployment]# kubectl get pods -o wide
NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES
gray-release-5946d98d59-9gzch 1/1 Running 0 2m42s 10.244.54.23 only-worker-node-4 <none> <none>
gray-release-5946d98d59-v4xth 1/1 Running 0 2m42s 10.244.31.1 only-worker-node-3 <none> <none>
gray-release-6d975d98bb-4f686 1/1 Running 0 4m4s 10.244.54.22 only-worker-node-4 <none> <none>
gray-release-6d975d98bb-wgndv 1/1 Running 0 4m5s 10.244.31.2 only-worker-node-3 <none> <none>
gray-release-6d975d98bb-xvlj8 1/1 Running 0 4m3s 10.244.132.234 master-worker-node-2 <none> <none>
# 出来的两个pod是通过gray-release-5946d98d59这个replicaset生成的,也就是新版本的pod。
# 所有的pod都能访问
[root@master-worker-node-1 deployment]# curl 10.96.105.29:30080
the version is 3.3.3, this is a nginx server in container, and the ip is 10.244.54.23/32
[root@master-worker-node-1 deployment]# curl 10.96.105.29:30080
the version is 3.3.3, this is a nginx server in container, and the ip is 10.244.31.1/32
[root@master-worker-node-1 deployment]# curl 10.96.105.29:30080
the version is 3.3.3, this is a nginx server in container, and the ip is 10.244.132.234/32
[root@master-worker-node-1 deployment]# curl 10.96.105.29:30080
the version is 3.3.3, this is a nginx server in container, and the ip is 10.244.54.22/32
[root@master-worker-node-1 deployment]# curl 10.96.105.29:30080
the version is 3.3.3, this is a nginx server in container, and the ip is 10.244.31.2/32
# 充分验证业务以后,新生成的两个pod各项都正常,那么就可以继续rollout动作
[root@master-worker-node-1 deployment]# kubectl rollout resume deployment gray-release
deployment.apps/gray-release resumed
# 此时的pod都恢复到正常。
[root@master-worker-node-1 deployment]# kubectl get pods -o wide
NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES
gray-release-5946d98d59-9gzch 1/1 Running 0 7m32s 10.244.54.23 only-worker-node-4 <none> <none>
gray-release-5946d98d59-v4xth 1/1 Running 0 7m32s 10.244.31.1 only-worker-node-3 <none> <none>
gray-release-5946d98d59-z5p8z 1/1 Running 0 38s 10.244.132.235 master-worker-node-2 <none> <none>
# 此时如果发现新产生的两个金丝雀pod有问题,调度到这两个pod的业务无法访问,应该怎么回滚呢?测试金丝雀发布过程中有问题如何回滚
# kubectl apply -f gray-release.yaml重新声明一下,并恢复deployment的状态
[root@master-worker-node-1 deployment]# kubectl apply -f gray-release.yaml
deployment.apps/gray-release configured
service/gray-release-service unchanged
分类:
K8S
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· DeepSeek 开源周回顾「GitHub 热点速览」
· 记一次.NET内存居高不下排查解决与启示
· 物流快递公司核心技术能力-地址解析分单基础技术分享
· .NET 10首个预览版发布:重大改进与新特性概览!
· .NET10 - 预览版1新功能体验(一)
2020-12-11 九、docker的跨主机通讯之overlay(vxlan)