k8s - kubeadm部署多Master高可用k8s集群

系统初始化

## 配置host解析各主机
cat << EOF > /etc/hosts
::1     localhost       localhost.localdomain   localhost6      localhost6.localdomain6
127.0.0.1       localhost       localhost.localdomain   localhost4      localhost4.localdomain4

172.26.105.254 node01.master.k8s.test.czlun.com node01
172.26.105.253 node02.master.k8s.test.czlun.com node02
172.26.105.252 node03.master.k8s.test.czlun.com node03
172.26.105.255 proxy.master.k8s.test.czlun.com
EOF

## 设定主机名
hostnamectl set-hostname $(grep $(ip a list eth0 | grep "inet "| awk -F "[ /]"+ '{print $3}') /etc/hosts | awk '{print $2}')

## 关闭selinux
setenforce 0
sed -i 's/^SELINUX=enforcing$/SELINUX=permissive/' /etc/selinux/config

## 关闭swap
echo 'swapoff -a' >> /etc/rc.local

## 修改内核参数,开启数据包转发,防止iptables被绕过而导致流量无法正确路由的问题
cat << EOF >> /etc/sysctl.d/k8s.conf
net.ipv4.ip_forward = 1
net.bridge.bridge-nf-call-ip6tables = 1
net.bridge.bridge-nf-call-iptables = 1
EOF
sysctl --system

## 内核中加载br_netfilter模块
## 对内核版本有要求,可对内核进行更新
## yum -y install kernel kernel-devel kernel-headers
if [[ `lsmod | grep br_netfilter` == "" ]]; then
    echo br_netfilter module is loading
    modprobe br_netfilter
    if [[ `lsmod | grep br_netfilter` == "" ]]; then
        echo Failed to load br_netfilter module
    else
        echo Succeed to load br_netfilter module
    fi
else
    echo br_netfilter module is loaded
fi

可选:

  • 安装ipvsadm 为kube-proxy提供ipvs支持。
yum -y install ipvsadm  ipset

# 临时生效
modprobe -- ip_vs
modprobe -- ip_vs_rr
modprobe -- ip_vs_wrr
modprobe -- ip_vs_sh
modprobe -- nf_conntrack_ipv4

# 永久生效
cat > /etc/sysconfig/modules/ipvs.modules <<EOF
modprobe -- ip_vs
modprobe -- ip_vs_rr
modprobe -- ip_vs_wrr
modprobe -- ip_vs_sh
modprobe -- nf_conntrack_ipv4
EOF

安装k8s集群

通过kubeadm,采用堆叠方案(etcd与control-plane不分离)创建高可用kubernetes集群

kubernetes版本兼容性

对kube-1.16来说,经过验证兼容的docker版本为 1.13.1, 17.03, 17.06, 17.09, 18.06, 18.09
https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG/CHANGELOG-1.16.md#unchanged

此次部署版本选型为

  • k8s 1.16.10

    • 1.16.13 紧急升级内容:修复了从受感染节点到群集的权限升级
    • 1.16.13 遇到bug,controler-manager 和scheduler无法顺利启动导致liveness健康检查失败故一直重启。所以本次实施选择次版本。
    1 actual_state_of_world.go:506] Failed to update statusUpdateNeeded field in actual state of world: Failed to set statusUpdateNeeded to needed true, because nodeName="node01.master.k8s.test.czlun.com" does not exist
    
  • docker 18.09.3

实施安装

下载aliyun docker-ce repo

curl https://mirrors.aliyun.com/docker-ce/linux/centos/docker-ce.repo -so /etc/yum.repos.d/docker-ce.repo

下载aliyun k8s repo

cat <<EOF > /etc/yum.repos.d/kubernetes.repo
[kubernetes]
name=Kubernetes
baseurl=https://mirrors.aliyun.com/kubernetes/yum/repos/kubernetes-el7-x86_64/
enabled=1
gpgcheck=0
repo_gpgcheck=0
gpgkey=https://mirrors.aliyun.com/kubernetes/yum/doc/yum-key.gpg https://mirrors.aliyun.com/kubernetes/yum/doc/rpm-package-key.gpg
EOF

安装

## 清空并重建yum repo缓存
yum clean all && yum makecache

## remove old version
rpm -e $(rpm -qa | grep -E "^docker|^kube" | tr -t '\n' ' ')

### yum install
yum install -y --nogpgcheck kubelet-1.16.10 kubeadm-1.16.10 kubectl-1.16.10 docker-ce-18.09.9 docker-ce-cli-18.09.9

### reload systemd
systemctl enable --now kubelet docker

应用调优

修改docker配置

# cgroup驱动设定为systemd
# 设定阿里云镜像registry
mkdir -p /etc/docker
cat << EOF > /etc/docker/daemon.json 
{
  "exec-opts": ["native.cgroupdriver=systemd"],
  "log-driver": "json-file",
  "log-opts": {
    "max-size": "100m"
  },
  "storage-driver": "overlay2",
  "storage-opts": [
    "overlay2.override_kernel_check=true"
  ],
  "registry-mirrors": ["https://ydjenbck.mirror.aliyuncs.com"]
}
EOF
systemctl restart docker

初始化集群

创建负载均衡节点

这里使用haproxy以tcp模式作为API Server负载均衡入口,监听端口为12567

# install haproxy
yum install -y haproxy

# backup haproxy config
cp /etc/haproxy/haproxy.cfg{,.bak}

# 配置haproxy
cat << 'EOF' > /etc/haproxy/haproxy.cfg
global
    log         127.0.0.1 local2
    chroot      /var/lib/haproxy
    pidfile     /var/run/haproxy.pid
    maxconn     4000
    user        haproxy
    group       haproxy
    daemon
    stats socket /var/lib/haproxy/stats
defaults
    mode                    http
    log                     global
    option                  httplog
    option                  dontlognull
    option http-server-close
    option forwardfor       except 127.0.0.0/8
    option                  redispatch
    retries                 3
    timeout http-request    10s
    timeout queue           1m
    timeout connect         10s
    timeout client          1m
    timeout server          1m
    timeout http-keep-alive 10s
    timeout check           10s
    maxconn                 3000
listen stats 0.0.0.0:12345
	mode http
	log global
	maxconn 10
	stats enable
	stats hide-version
	stats refresh 5s
	stats show-node
	stats uri /stats
frontend kube-api-https
	bind 0.0.0.0:12567
	mode tcp
	default_backend kube-api-server
backend kube-api-server
	balance roundrobin
	mode tcp
	server kube-node1 172.26.105.252:6443 check
	server kube-node2 172.26.105.253:6443 check
	server kube-node3 172.26.105.254:6443 check
EOF

# 设定为开机自启并立即启动haproxy
systemctl enable --now haproxy

初始化首个控制平面

  1. 初始化控制平面
kubeadm init \
--pod-network-cidr 192.168.0.0/16 \
--upload-certs \
--control-plane-endpoint proxy.master.k8s.test.czlun.com:12567 \
--kubernetes-version 1.16.10 \
--image-repository registry.aliyuncs.com/google_containers

# --pod-network-cidr 指定pod网络
# --upload-certs 将证书上传
# --control-plane-endpoint 指定控制平面地址,为上方配置的负载均衡节点
# --kubernetes-version 指定k8s版本
# --image-repository 初始化过程中的镜像仓库

执行完成后,应该会得到类似的结果并保存。

To start using your cluster, you need to run the following as a regular user:

  mkdir -p $HOME/.kube
  sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
  sudo chown $(id -u):$(id -g) $HOME/.kube/config

You should now deploy a pod network to the cluster.
Run "kubectl apply -f [podnetwork].yaml" with one of the options listed at:
  https://kubernetes.io/docs/concepts/cluster-administration/addons/

You can now join any number of the control-plane node running the following command on each as root:

  kubeadm join proxy.master.k8s.test.czlun.com:12567 --token achhjj.4cr1vm4z1k678fqm \
    --discovery-token-ca-cert-hash sha256:0731d2cf02babef1d868b1109983c8cc8c9a0d2ed4dc832c0ddb74a71376cbd3 \
    --control-plane --certificate-key e345ff76dede23fb9d0f714c99143d50c794a89bce7f1e215df2248e9fe04a7f

Please note that the certificate-key gives access to cluster sensitive data, keep it secret!
As a safeguard, uploaded-certs will be deleted in two hours; If necessary, you can use 
"kubeadm init phase upload-certs --upload-certs" to reload certs afterward.

Then you can join any number of worker nodes by running the following on each as root:

kubeadm join proxy.master.k8s.test.czlun.com:12567 --token achhjj.4cr1vm4z1k678fqm \
    --discovery-token-ca-cert-hash sha256:0731d2cf02babef1d868b1109983c8cc8c9a0d2ed4dc832c0ddb74a71376cbd3 

  1. 根据提示执行以下命令开始使用集群
  mkdir -p $HOME/.kube
  sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
  sudo chown $(id -u):$(id -g) $HOME/.kube/config
  1. 为集群部署pod网络,此处选用calico。

    使用Kubernetes API数据存储库(不超过50个节点)安装Calico

    1. 下载Kubernetes API数据存储区的Calico清单文件。

      curl https://docs.projectcalico.org/manifests/calico.yaml -O
      
    2. 在清单中取消对CALICO_IPV4POOL_CIDR变量的注释,并将其设置为与所选Pod CIDR相同的值。

    3. 应用清单文件

      kubectl apply -f calico.yaml
      
    4. 查看calico组件的启动状况与节点状况

      kubectl get pod -n kube-system -w
      kubectl get node
      

注意:

  • 主控制平面的证书被加密并上传到 kubeadm-certs 密钥中。

  • 要重新生成引导令牌(--token),生成集群所需的完整 'kubeadm join' 参数,请在已加入集群节点的控制平面上使用以下命令:。

    kubeadm token create --print-join-command
    
  • 要重新上传证书并生成新的解密密钥,请在已加入集群节点的控制平面上使用以下命令:

    kubeadm init phase upload-certs --upload-certs
    
  • 您还可以在 init 期间指定自定义的 --certificate-key,以后可以由 join 使用。 要生成这样的密钥,可以使用以下命令:

    kubeadm alpha certs certificate-key
    

可选:

  • 修改kube-proxy mode为ipvs。所有k8s节点均需要开启ipvs支持参考此处
# 将mode修改为ipvs
kubectl edit configmap kube-proxy -n kube-system
...
    mode: "ipvs"
...

其余控制平面加入集群

对于剩下的所有控制平面节点,都需要执行以下步骤以控制平面的角色在集群中注册。

  1. 执行首个控制平面初始化的信息输出中提供的join 命令。
  kubeadm join proxy.master.k8s.test.czlun.com:12567 --token achhjj.4cr1vm4z1k678fqm \
    --discovery-token-ca-cert-hash sha256:0731d2cf02babef1d868b1109983c8cc8c9a0d2ed4dc832c0ddb74a71376cbd3 \
    --control-plane --certificate-key e345ff76dede23fb9d0f714c99143d50c794a89bce7f1e215df2248e9fe04a7f
  • --control-plane 选项表示注册为控制平面角色,而不是worker节点。
  • --certificate-key 使用指定的密钥解密从控制平面下载的证书。

执行完成后,应该会得到类似的结果并保存。

This node has joined the cluster and a new control plane instance was created:

* Certificate signing request was sent to apiserver and approval was received.
* The Kubelet was informed of the new secure connection details.
* Control plane (master) label and taint were applied to the new node.
* The Kubernetes control plane instances scaled up.
* A new etcd member was added to the local/stacked etcd cluster.

To start administering your cluster from this node, you need to run the following as a regular user:

	mkdir -p $HOME/.kube
	sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
	sudo chown $(id -u):$(id -g) $HOME/.kube/config

Run 'kubectl get nodes' to see this node join the cluster.

  1. 根据提示执行以下命令开始使用集群
	mkdir -p $HOME/.kube
	sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
	sudo chown $(id -u):$(id -g) $HOME/.kube/config
  1. 执行以下命令查看该节点加入集群状况
kubectl get nodes

集群部署完成效果。

[root@node01 ~]# kubectl get node
NAME                           STATUS   ROLES    AGE     VERSION
node01.master.k8s.test.czlun.com   Ready    master   2d20h   v1.16.10
node02.master.k8s.test.czlun.com   Ready    master   2d19h   v1.16.10
node03.master.k8s.test.czlun.com   Ready    master   2d19h   v1.16.10
[root@node01 ~]# kubectl get pod -n kube-system
NAME                                                       READY   STATUS    RESTARTS   AGE
calico-kube-controllers-59d85c5c84-7g4h6                   1/1     Running   4          2d20h
calico-node-9nbk7                                          1/1     Running   1          2d19h
calico-node-krffv                                          1/1     Running   2          2d20h
calico-node-ph89w                                          1/1     Running   1          2d19h
coredns-58cc8c89f4-d7zch                                   1/1     Running   1          2d20h
coredns-58cc8c89f4-zg865                                   1/1     Running   1          2d20h
etcd-node01.master.k8s.test.czlun.com                      1/1     Running   1          2d20h
etcd-node02.master.k8s.test.czlun.com                      1/1     Running   1          2d19h
etcd-node03.master.k8s.test.czlun.com                      1/1     Running   1          2d19h
kube-apiserver-node01.master.k8s.test.czlun.com            1/1     Running   1          2d20h
kube-apiserver-node02.master.k8s.test.czlun.com            1/1     Running   1          2d19h
kube-apiserver-node03.master.k8s.test.czlun.com            1/1     Running   1          2d19h
kube-controller-manager-node01.master.k8s.test.czlun.com   1/1     Running   2          2d20h
kube-controller-manager-node02.master.k8s.test.czlun.com   1/1     Running   1          2d19h
kube-controller-manager-node03.master.k8s.test.czlun.com   1/1     Running   1          2d19h
kube-proxy-5v2v6                                           1/1     Running   1          2d20h
kube-proxy-mdmjv                                           1/1     Running   1          2d19h
kube-proxy-vt6kh                                           1/1     Running   1          2d19h
kube-scheduler-node01.master.k8s.test.czlun.com            1/1     Running   3          2d20h
kube-scheduler-node02.master.k8s.test.czlun.com            1/1     Running   1          2d19h
kube-scheduler-node03.master.k8s.test.czlun.com            1/1     Running   1          2d19h
posted @ 2020-08-10 22:16  Czlun  阅读(1021)  评论(3编辑  收藏  举报
Copyright @2021 Czlun
Powered by .NET 5.0 on Kubernetes