k8s - kubeadm部署多Master高可用k8s集群
系统初始化
## 配置host解析各主机
cat << EOF > /etc/hosts
::1 localhost localhost.localdomain localhost6 localhost6.localdomain6
127.0.0.1 localhost localhost.localdomain localhost4 localhost4.localdomain4
172.26.105.254 node01.master.k8s.test.czlun.com node01
172.26.105.253 node02.master.k8s.test.czlun.com node02
172.26.105.252 node03.master.k8s.test.czlun.com node03
172.26.105.255 proxy.master.k8s.test.czlun.com
EOF
## 设定主机名
hostnamectl set-hostname $(grep $(ip a list eth0 | grep "inet "| awk -F "[ /]"+ '{print $3}') /etc/hosts | awk '{print $2}')
## 关闭selinux
setenforce 0
sed -i 's/^SELINUX=enforcing$/SELINUX=permissive/' /etc/selinux/config
## 关闭swap
echo 'swapoff -a' >> /etc/rc.local
## 修改内核参数,开启数据包转发,防止iptables被绕过而导致流量无法正确路由的问题
cat << EOF >> /etc/sysctl.d/k8s.conf
net.ipv4.ip_forward = 1
net.bridge.bridge-nf-call-ip6tables = 1
net.bridge.bridge-nf-call-iptables = 1
EOF
sysctl --system
## 内核中加载br_netfilter模块
## 对内核版本有要求,可对内核进行更新
## yum -y install kernel kernel-devel kernel-headers
if [[ `lsmod | grep br_netfilter` == "" ]]; then
echo br_netfilter module is loading
modprobe br_netfilter
if [[ `lsmod | grep br_netfilter` == "" ]]; then
echo Failed to load br_netfilter module
else
echo Succeed to load br_netfilter module
fi
else
echo br_netfilter module is loaded
fi
可选:
- 安装ipvsadm 为kube-proxy提供ipvs支持。
yum -y install ipvsadm ipset
# 临时生效
modprobe -- ip_vs
modprobe -- ip_vs_rr
modprobe -- ip_vs_wrr
modprobe -- ip_vs_sh
modprobe -- nf_conntrack_ipv4
# 永久生效
cat > /etc/sysconfig/modules/ipvs.modules <<EOF
modprobe -- ip_vs
modprobe -- ip_vs_rr
modprobe -- ip_vs_wrr
modprobe -- ip_vs_sh
modprobe -- nf_conntrack_ipv4
EOF
安装k8s集群
通过kubeadm,采用堆叠方案(etcd与control-plane不分离)创建高可用kubernetes集群
kubernetes版本兼容性
对kube-1.16来说,经过验证兼容的docker版本为 1.13.1, 17.03, 17.06, 17.09, 18.06, 18.09
https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG/CHANGELOG-1.16.md#unchanged
此次部署版本选型为
-
k8s 1.16.10
- 1.16.13 紧急升级内容:修复了从受感染节点到群集的权限升级
- 1.16.13 遇到bug,controler-manager 和scheduler无法顺利启动导致liveness健康检查失败故一直重启。所以本次实施选择次版本。
1 actual_state_of_world.go:506] Failed to update statusUpdateNeeded field in actual state of world: Failed to set statusUpdateNeeded to needed true, because nodeName="node01.master.k8s.test.czlun.com" does not exist
-
docker 18.09.3
实施安装
下载aliyun docker-ce repo
curl https://mirrors.aliyun.com/docker-ce/linux/centos/docker-ce.repo -so /etc/yum.repos.d/docker-ce.repo
下载aliyun k8s repo
cat <<EOF > /etc/yum.repos.d/kubernetes.repo
[kubernetes]
name=Kubernetes
baseurl=https://mirrors.aliyun.com/kubernetes/yum/repos/kubernetes-el7-x86_64/
enabled=1
gpgcheck=0
repo_gpgcheck=0
gpgkey=https://mirrors.aliyun.com/kubernetes/yum/doc/yum-key.gpg https://mirrors.aliyun.com/kubernetes/yum/doc/rpm-package-key.gpg
EOF
安装
## 清空并重建yum repo缓存
yum clean all && yum makecache
## remove old version
rpm -e $(rpm -qa | grep -E "^docker|^kube" | tr -t '\n' ' ')
### yum install
yum install -y --nogpgcheck kubelet-1.16.10 kubeadm-1.16.10 kubectl-1.16.10 docker-ce-18.09.9 docker-ce-cli-18.09.9
### reload systemd
systemctl enable --now kubelet docker
应用调优
修改docker配置
# cgroup驱动设定为systemd
# 设定阿里云镜像registry
mkdir -p /etc/docker
cat << EOF > /etc/docker/daemon.json
{
"exec-opts": ["native.cgroupdriver=systemd"],
"log-driver": "json-file",
"log-opts": {
"max-size": "100m"
},
"storage-driver": "overlay2",
"storage-opts": [
"overlay2.override_kernel_check=true"
],
"registry-mirrors": ["https://ydjenbck.mirror.aliyuncs.com"]
}
EOF
systemctl restart docker
初始化集群
创建负载均衡节点
这里使用haproxy以tcp模式作为API Server负载均衡入口,监听端口为12567
# install haproxy
yum install -y haproxy
# backup haproxy config
cp /etc/haproxy/haproxy.cfg{,.bak}
# 配置haproxy
cat << 'EOF' > /etc/haproxy/haproxy.cfg
global
log 127.0.0.1 local2
chroot /var/lib/haproxy
pidfile /var/run/haproxy.pid
maxconn 4000
user haproxy
group haproxy
daemon
stats socket /var/lib/haproxy/stats
defaults
mode http
log global
option httplog
option dontlognull
option http-server-close
option forwardfor except 127.0.0.0/8
option redispatch
retries 3
timeout http-request 10s
timeout queue 1m
timeout connect 10s
timeout client 1m
timeout server 1m
timeout http-keep-alive 10s
timeout check 10s
maxconn 3000
listen stats 0.0.0.0:12345
mode http
log global
maxconn 10
stats enable
stats hide-version
stats refresh 5s
stats show-node
stats uri /stats
frontend kube-api-https
bind 0.0.0.0:12567
mode tcp
default_backend kube-api-server
backend kube-api-server
balance roundrobin
mode tcp
server kube-node1 172.26.105.252:6443 check
server kube-node2 172.26.105.253:6443 check
server kube-node3 172.26.105.254:6443 check
EOF
# 设定为开机自启并立即启动haproxy
systemctl enable --now haproxy
初始化首个控制平面
- 初始化控制平面
kubeadm init \
--pod-network-cidr 192.168.0.0/16 \
--upload-certs \
--control-plane-endpoint proxy.master.k8s.test.czlun.com:12567 \
--kubernetes-version 1.16.10 \
--image-repository registry.aliyuncs.com/google_containers
# --pod-network-cidr 指定pod网络
# --upload-certs 将证书上传
# --control-plane-endpoint 指定控制平面地址,为上方配置的负载均衡节点
# --kubernetes-version 指定k8s版本
# --image-repository 初始化过程中的镜像仓库
执行完成后,应该会得到类似的结果并保存。
To start using your cluster, you need to run the following as a regular user:
mkdir -p $HOME/.kube
sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
sudo chown $(id -u):$(id -g) $HOME/.kube/config
You should now deploy a pod network to the cluster.
Run "kubectl apply -f [podnetwork].yaml" with one of the options listed at:
https://kubernetes.io/docs/concepts/cluster-administration/addons/
You can now join any number of the control-plane node running the following command on each as root:
kubeadm join proxy.master.k8s.test.czlun.com:12567 --token achhjj.4cr1vm4z1k678fqm \
--discovery-token-ca-cert-hash sha256:0731d2cf02babef1d868b1109983c8cc8c9a0d2ed4dc832c0ddb74a71376cbd3 \
--control-plane --certificate-key e345ff76dede23fb9d0f714c99143d50c794a89bce7f1e215df2248e9fe04a7f
Please note that the certificate-key gives access to cluster sensitive data, keep it secret!
As a safeguard, uploaded-certs will be deleted in two hours; If necessary, you can use
"kubeadm init phase upload-certs --upload-certs" to reload certs afterward.
Then you can join any number of worker nodes by running the following on each as root:
kubeadm join proxy.master.k8s.test.czlun.com:12567 --token achhjj.4cr1vm4z1k678fqm \
--discovery-token-ca-cert-hash sha256:0731d2cf02babef1d868b1109983c8cc8c9a0d2ed4dc832c0ddb74a71376cbd3
- 根据提示执行以下命令开始使用集群
mkdir -p $HOME/.kube
sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
sudo chown $(id -u):$(id -g) $HOME/.kube/config
-
为集群部署pod网络,此处选用calico。
使用Kubernetes API数据存储库(不超过50个节点)安装Calico
-
下载Kubernetes API数据存储区的Calico清单文件。
curl https://docs.projectcalico.org/manifests/calico.yaml -O
-
在清单中取消对CALICO_IPV4POOL_CIDR变量的注释,并将其设置为与所选Pod CIDR相同的值。
-
应用清单文件
kubectl apply -f calico.yaml
-
查看calico组件的启动状况与节点状况
kubectl get pod -n kube-system -w kubectl get node
-
注意:
-
主控制平面的证书被加密并上传到
kubeadm-certs
密钥中。 -
要重新生成引导令牌(--token),生成集群所需的完整 'kubeadm join' 参数,请在已加入集群节点的控制平面上使用以下命令:。
kubeadm token create --print-join-command
-
要重新上传证书并生成新的解密密钥,请在已加入集群节点的控制平面上使用以下命令:
kubeadm init phase upload-certs --upload-certs
-
您还可以在
init
期间指定自定义的--certificate-key
,以后可以由join
使用。 要生成这样的密钥,可以使用以下命令:kubeadm alpha certs certificate-key
可选:
- 修改kube-proxy mode为ipvs。所有k8s节点均需要开启ipvs支持,参考此处。
# 将mode修改为ipvs
kubectl edit configmap kube-proxy -n kube-system
...
mode: "ipvs"
...
其余控制平面加入集群
对于剩下的所有控制平面节点,都需要执行以下步骤以控制平面的角色在集群中注册。
- 执行首个控制平面初始化的信息输出中提供的join 命令。
kubeadm join proxy.master.k8s.test.czlun.com:12567 --token achhjj.4cr1vm4z1k678fqm \
--discovery-token-ca-cert-hash sha256:0731d2cf02babef1d868b1109983c8cc8c9a0d2ed4dc832c0ddb74a71376cbd3 \
--control-plane --certificate-key e345ff76dede23fb9d0f714c99143d50c794a89bce7f1e215df2248e9fe04a7f
--control-plane
选项表示注册为控制平面角色,而不是worker节点。--certificate-key
使用指定的密钥解密从控制平面下载的证书。
执行完成后,应该会得到类似的结果并保存。
This node has joined the cluster and a new control plane instance was created:
* Certificate signing request was sent to apiserver and approval was received.
* The Kubelet was informed of the new secure connection details.
* Control plane (master) label and taint were applied to the new node.
* The Kubernetes control plane instances scaled up.
* A new etcd member was added to the local/stacked etcd cluster.
To start administering your cluster from this node, you need to run the following as a regular user:
mkdir -p $HOME/.kube
sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
sudo chown $(id -u):$(id -g) $HOME/.kube/config
Run 'kubectl get nodes' to see this node join the cluster.
- 根据提示执行以下命令开始使用集群
mkdir -p $HOME/.kube
sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
sudo chown $(id -u):$(id -g) $HOME/.kube/config
- 执行以下命令查看该节点加入集群状况
kubectl get nodes
集群部署完成效果。
[root@node01 ~]# kubectl get node
NAME STATUS ROLES AGE VERSION
node01.master.k8s.test.czlun.com Ready master 2d20h v1.16.10
node02.master.k8s.test.czlun.com Ready master 2d19h v1.16.10
node03.master.k8s.test.czlun.com Ready master 2d19h v1.16.10
[root@node01 ~]# kubectl get pod -n kube-system
NAME READY STATUS RESTARTS AGE
calico-kube-controllers-59d85c5c84-7g4h6 1/1 Running 4 2d20h
calico-node-9nbk7 1/1 Running 1 2d19h
calico-node-krffv 1/1 Running 2 2d20h
calico-node-ph89w 1/1 Running 1 2d19h
coredns-58cc8c89f4-d7zch 1/1 Running 1 2d20h
coredns-58cc8c89f4-zg865 1/1 Running 1 2d20h
etcd-node01.master.k8s.test.czlun.com 1/1 Running 1 2d20h
etcd-node02.master.k8s.test.czlun.com 1/1 Running 1 2d19h
etcd-node03.master.k8s.test.czlun.com 1/1 Running 1 2d19h
kube-apiserver-node01.master.k8s.test.czlun.com 1/1 Running 1 2d20h
kube-apiserver-node02.master.k8s.test.czlun.com 1/1 Running 1 2d19h
kube-apiserver-node03.master.k8s.test.czlun.com 1/1 Running 1 2d19h
kube-controller-manager-node01.master.k8s.test.czlun.com 1/1 Running 2 2d20h
kube-controller-manager-node02.master.k8s.test.czlun.com 1/1 Running 1 2d19h
kube-controller-manager-node03.master.k8s.test.czlun.com 1/1 Running 1 2d19h
kube-proxy-5v2v6 1/1 Running 1 2d20h
kube-proxy-mdmjv 1/1 Running 1 2d19h
kube-proxy-vt6kh 1/1 Running 1 2d19h
kube-scheduler-node01.master.k8s.test.czlun.com 1/1 Running 3 2d20h
kube-scheduler-node02.master.k8s.test.czlun.com 1/1 Running 1 2d19h
kube-scheduler-node03.master.k8s.test.czlun.com 1/1 Running 1 2d19h