Kubernetes 1.20.1 版 高可用环境搭建
节点介绍
节点名 | IP |
---|---|
master | 192.168.86.128 |
node01 | 192.168.86.129 |
node02 | 192.168.86.130 |
关闭防火墙(master、node01、node02)
# 永久关闭防火墙,临时关闭:systemctl stop firewalld
systemctl disable --now firewalld
systemctl disable --now dnsmasq
systemctl disable --now NetworkManager
关闭selinux(master、node01、node02)
# 关闭selinux
setenforce 0
sed -i 's#SELINUX=enforcing#SELINUX=disabled#g' /etc/sysconfig/selinux
sed -i 's#SELINUX=enforcing#SELINUX=disabled#g' /etc/selinux/config
关闭分区(master、node01、node02)
# 关闭分区
swapoff -a && sysctl -w vm.swappiness=0
sed -ri '/^[^#]*swap/s@^@#@' /etc/fstab
设置hosts(master、node01、node02)
# 设置hosts
cat >> /etc/hosts << EOF
192.168.86.128 master
192.168.86.129 node01
192.168.86.130 node02
EOF
配置 yum 源(master、node01、node02)
# 配置 yum 源
curl -o /etc/yum.repos.d/CentOS-Base.repo https://mirrors.aliyun.com/repo/Centos-7.repo
yum install -y yum-utils device-mapper-persistent-data lvm2
yum-config-manager --add-repo https://mirrors.aliyun.com/docker-ce/linux/centos/docker-ce.repo
cat <<EOF > /etc/yum.repos.d/kubernetes.repo
[kubernetes]
name=Kubernetes
baseUrl=https://mirrors.aliyun.com/kubernetes/yum/repos/kubernetes-el7-x86_64/
enabled=1
gpgcheck=1
repo-gpgcheck=1
gpgkey=https://mirrors.aliyun.com/kubernetes/yum/doc/yum-key.gpg https://mirrors.aliyun.com/kubernetes/yum/doc/rpm-package-key.gpg
EOF
sed -i -e '/mirrors.cloud.aliyuncs.com/d' -e '/mirrors.aliyuncs.com/d' /etc/yum.repos.d/CentOS-Base.repo
必备工具(master、node01、node02)
yum -y install wget jq psmisc vim net-tools telnet yum-utils device-mapper-persistent-data lvm2 git
安装ntpdate(master、node01、node02)
# 安装ntpdate
rpm -ivh http://mirrors.wlnmp.com/centos/wlnmp-release-centos.noarch.rpm
yum install -y ntpdate
同步时间(master、node01、node02)
# 同步时间
ln -sf /usr/share/zoneinfo/Asia/Shanghai /etc/localtime
echo 'Asia/Shanghai' >/etc/timezone
ntpdate time2.aliyun.com
crontab -e
*/5 * * * * ntpdate time2.aliyun.com
:wq!
设置limit(master、node01、node02)
# 临时生效
ulimit -SHn 65535
# 永久生效
vim /etc/security/limits.conf
# 打开配置文件后拖到最后一行,添加
* soft nofile 655360
* hard nofile 131072
* soft nproc 655350
* hard nproc 655350
* soft memlock unlimited
* soft memlock unlimited
免密配置(master)
# 免密配置
ssh-keygen -t rsa
for i in master slave01 slave02;do ssh-copy-id -i .ssh/id_rsa.pub $i;done
内核升级(master、node01、node02)
# 内核升级
yum update -y --exclude=kernel*
# 升级完一定记得重启
reboot
内核配置(master)
cd /opt
mkdir kubernetes
wget http://193.49.22.109/elrepo/kernel/el7/x86_64/RPMS/kernel-ml-devel-4.19.12-1.el7.elrepo.x86_64.rpm
wget http://193.49.22.109/elrepo/kernel/el7/x86_64/RPMS/kernel-ml-4.19.12-1.el7.elrepo.x86_64.rpm
## 传到子节点上
for i in node01 node02;do scp kernel-ml-4.19.12-1.el7.elrepo.x86_64.rpm kernel-ml-devel-4.19.12-1.el7.elrepo.x86_64.rpm $i:/opt/kubernetes ;done
内核安装(master、node01、node02)
cd opt && yum localinstall -y kernel-ml*
grub2-set-default 0 && grub2-mkconfig -o /etc/grub2.cfg
grubby --args="user_namespace.enable=1" --update-kernel="$(grubby --default-kernel)"
## 查看
grubby --default-kernel
## 重启
reboot
# 查看内核
uname -a
安装ipvsadm(master、node01、node02)
yum install -y ipvsadm ipset sysstat conntrack libseccomp
配置ipvs(master、node01、node02)
vim /etc/modules-load.d/ipvs.conf
# 配置ipvs:在内核4.19+版本nf_conntrack_ipv4 改为 nf_conntrack,4.18-版本使用nf_conntrack_ipv4 即可, 添加:
ip_vs
ip_vs_lc
ip_vs_wlc
ip_vs_rr
ip_vs_wrr
ip_vs_lblc
ip_vs_lblcr
ip_vs_dh
ip_vs_sh
ip_vs_fo
ip_vs_nq
ip_vs_sed
ip_vs_ftp
ip_vs_sh
nf_conntrack
ip_tables
ip_set
xt_set
ipt_set
ipt_rpfilter
ipt_REJECT
ipip
## 执行
systemctl enable --now systemd-modules-load.service
## 检测是否加载
lsmod | grep -e ip_vs -e nf_connectrack_ipv4
开启 k8s 集群中必须的内核参数(master、node01、node02)
cat <<EOF > /etc/sysctl.d/k8s.conf
net.ipv4.ip forward=1
net.bridge.bridge-nf-call-iptables=1
net.bridge.bridge-nf-call-ip6tables=1
fs.may_detach_mounts=1
vm.panic_on_oom=0
fs.inotify.max_user_watches=89100
fs.file-max=52706963
fs.nr_open=52706963
net.netfilter.nf_conntrack_max=2310720
net.ipv4.tcp_keepalive_time=600
net.ipv4.tcp_keepalive_probes=3
net.ipv4.tcp_keepalive_intvl=15
net.ipv4.tcp_max_tw_buckets=36000
net.ipv4.tcp_tw_reuse=1
net.ipv4.tcp_max_orphans=327680
net.ipv4.tcp_orphan_retries=3
net.ipv4.tcp_syncookies=1
net.ipv4.tcp_max_syn_backlog=16384
net.ipv4.ip_conntrack_max=65536
net.ipv4.tcp_max_syn_backlog=16384
net.ipv4.tcp_timestamps=0
net.core_somaxconn=16384
EOF
## 执行
sysctl --system
## 重启
reboot
安装docker(master、node01、node02)
yum -y install gcc gcc-c++ make
yum remove docker docker-client docker-client-latest docker-common docker-latest docker-latest-logrotate docker-logrotate docker-engine
yum-config-manager --add-repo http://mirrors.aliyun.com/docker-ce/linux/centos/docker-ce.repo
yum makecache fast
# 版本选择稳定版 19.x
yum -y install docker-ce-19.03.*
mkdir /etc/docker
cat > /etc/docker/daemon.json <<EOF
{
"exec-opts": ["native.cgroupdriver=systemd"]
}
EOF
## 启动
systemctl daemon-reload && systemctl enable --now docker
安装 k8s(master、node01、node02)
# master 查看 k8s 版本
yum list kubeadm.x86_64 --showduplicates | sort -r
# 安装 k8s
yum -y install kubeadm-1.20.1
配置pause 镜像(master、node01、node02)
# 配置pause 镜像
cat > /etc/sysconfig/kubelet<<EOF
KUBELET_EXTRA_ARGS="--cgroup-driver=systemd --pod-infra-container-image=registry.cn-hangzhou.aliyuncs.com/google_containers/pause-amd64:3.2"
EOF
### 启动
systemctl daemon-reload && systemctl enable --now kubelet
安装高可用(master)
# 如果有多个master,也要安装,node节点就不用
yum install -y keepalived haproxy
配置 haproxy.cfg(master)
vim /etc/haproxy/haproxy.cfg
# 先清空原文件内容,加入以下内容
global
maxconn 2000
ulimit-n 16384
log 127.0.0.1 local0 err
stats timeout 30s
defaults
log global
mode http
option httplog
timeout connect 5000
timeout client 50000
timeout server 50000
timeout http-request 15s
timeout http-keep-alive 15s
frontend monitor-in
bind *:33305
mode http
option httplog
monitor-uri /monitor
frontend master
bind 0.0.0.0:16443
bind 127.0.0.1:16443
mode tcp
option tcplog
tcp-request inspect-delay 5s
default_backend master
backend master
mode tcp
option tcplog
option tcp-check
balance roundrobin
default-server inter 10s downinter 5s rise 2 fall 2 slowstart 60s maxconn 250 maxqueue 256 weight 100
server master 192.168.86.128:6443 check
配置 keepalived(master)
vim /etc/keepalived/keepalived.conf
# 先清空原文件内容,加入以下内容
! Configuration File for keepalived
globacl_defs {
router_id LVS_DEVEL
script_user root
enable_script_security
}
vrrp_script chk_apiserver {
script "/etc/keepalived/check_apiserver.sh"
interval 4
weight -5
fall 2
rise 1
}
vrrp_instance VI_1 {
state MASTER
interface ens33
mcast_src_ip 192.168.86.128
virtual_router_id 51
priority 101
advert_int 2
authentication {
auth_type PASS
auth_pass K8SHA_KA_AUTH
}
virtual_ipaddress {
192.168.86.128
}
track_script {
chk_apiserver
}
}
配置 keepalived 健康检查(master)
vim /etc/keepalived/check_apiserver.sh
#!/bin/bash
err=0
for k in $(seq 1 3)
do
check_code=$(pgrep haproxy)
if [[ $check_code == "" ]]; then
err=$(expr $err + 1)
sleep 1
continue
else
err=0
break
fi
done
if [[ $err != "0" ]]; then
echo "systemctl stop keepalived"
/user/bin/systemctl stop keepalived
exit 1
else
exit 0
fi
# 加权限
chmod +x /etc/keepalived/check_apiserver.sh
启动 keepalived(master)
systemctl daemon-reload
systemctl enable --now haproxy
systemctl enable --now keepalived
# 端口查看
netstat -lntp
# 检测
telnet 192.168.86.128 16443
## 如果显示以下内容,证明成功
Trying 192.168.86.128...
Connected to 192.168.86.128.
Escape character is '^]'.
Connection closed by foreign host
集群初始化(master)
cd /opt/kubernetes
vim kubeadm-config.yaml
# 添加
apiVersion: kubeadm.k8s.io/v1beta2
bootstrapTokens:
- groups:
- system:bootstrappers:kubeadm:default-node-token
token: 7t2weq.bjbawausm0jaxury
ttl: 24h0m0s
usages:
- signing
- authentication
kind: InitConfiguration
localAPIEndpoint:
advertiseAddress: 192.168.86.128
nodeRegistration:
criSocket: /var/run/dockershim.sock
name: master
taints:
- effect: NoSchedule
key: node-role.kubernetes.io/master
---
apiServer:
certSANs:
- master
- "192.168.86.128"
timeoutForControlPlane: 4m0s
apiVersion: kubeadm.k8s.io/v1beta2
certificatesDir: /etc/kubernetes/pki
clusterName: kubernetes
controlPlaneEndpoint: 192.168.86.128:16443
controllerManager: {}
etcd:
local:
dataDir: /var/lib/etcd
imageRepository: registry.cn-hangzhou.aliyuncs.com/google_containers
kind: ClusterConfiguration
kubernetesVersion: v1.21.2
networking:
dnsDomain: cluster.local
podSubnet: 172.168.0.0/12
serviceSubnet: 10.96.0.0/12
scheduler: {}
开机启动(master,node01,node02)
systemctl enable --now kubelet
节点初始化(master)
kubeadm init --config kubeadm.config.yaml --upload-certs
....
Your Kubernetes control-plane has initialized successfully!
To start using your cluster, you need to run the following as a regular user:
mkdir -p $HOME/.kube
sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
sudo chown $(id -u):$(id -g) $HOME/.kube/config
Alternatively, if you are the root user, you can run:
export KUBECONFIG=/etc/kubernetes/admin.conf
You should now deploy a pod network to the cluster.
Run "kubectl apply -f [podnetwork].yaml" with one of the options listed at:
https://kubernetes.io/docs/concepts/cluster-administration/addons/
You can now join any number of the control-plane node running the following command on each as root:
# 如果有多个master节点,加入到master节点上
kubeadm join 192.168.86.128:16443 --token 7t2weq.bjbawausm0jaxury \
--discovery-token-ca-cert-hash sha256:ca23deeb8788e527c4a6453614058a5baef65fae20cc0380782581968692f765 \
--control-plane --certificate-key 103a7f90c6bcc90ddf7875fc982d3466c1d074deb03e765b45669d449de27e15
Please note that the certificate-key gives access to cluster sensitive data, keep it secret!
As a safeguard, uploaded-certs will be deleted in two hours; If necessary, you can use
"kubeadm init phase upload-certs --upload-certs" to reload certs afterward.
Then you can join any number of worker nodes by running the following on each as root:
# 把多个node节点加入到master节点上
kubeadm join 192.168.86.128:16443 --token 7t2weq.bjbawausm0jaxury \
--discovery-token-ca-cert-hash sha256:ca23deeb8788e527c4a6453614058a5baef65fae20cc0380782581968692f765
# 如果初始化过程中有失败,执行以下命令,然后重新初始化
kubeadm reset
添加配置文件.bashrc(master)
cat <<EOF >> /root/.bashrc
export KUBECONFIG=/etc/kubernetes/admin.conf
EOF
source /root/.bashrc
查看系统组件(master)
kubectl get pods -n kube-system -o wide
kubectl get pod --all-namespaces
(备用)解决加入节点后长时间token过期问题(master)
# 解决加入节点后长时间token过期问题
kubeadm token create --print-join-command
# 生成key
kubeadm init phase upload-certs --upload-certs
# 最后重新加入
kubeadm join 192.168.86.128:16443 --token 7t2weq.bjbawausm0jaxury \
--discovery-token-ca-cert-hash 新的hash \
--control-plane --certificate-key 新的key
查看节点(master)
kubectl get node
NAME STATUS ROLES AGE VERSION
master NotReady control-plane,master 58s v1.21.2
node01 NotReady <none> 39s v1.21.2
node02 NotReady <none> 36s v1.21.2
kubectl get svc
解决 NotReady 问题(master)
cd opt
git clone https://github.com/dotbalo/k8s-ha-install.git
# 或者
git clone https://gitee.com/dukuan/k8s-ha-install.git
[root@master k8s-ha-install]# pwd
/opt/k8s-ha-install
[root@master k8s-ha-install]# git checkout manual-installation-v1.20.x && cd calico/
分支 manual-installation-v1.20.x 设置为跟踪来自 origin 的远程分支 manual-installation-v1.20.x。
切换到一个新分支 'manual-installation-v1.20.x'
[root@master calico]# pwd
# 修改 calico-etcd.yaml
sed -i 's#etcd_endpoints: "http://<ETCD_IP>:<ETCD_PORT>"#etcd_endpoints: "https://192.168.86.128:2379"#g' calico-etcd.yaml
# [root@master calico] 执行命令
ETCD_CA=`cat /etc/kubernetes/pki/etcd/ca.crt | base64 | tr -d '\n'`
ETCD_CERT=`cat /etc/kubernetes/pki/etcd/server.crt | base64 | tr -d '\n'`
ETCD_KEY=`cat /etc/kubernetes/pki/etcd/server.key | base64 | tr -d '\n'`
sed -i "s@# etcd-key: null@etcd-key: ${ETCD_KEY}@g; s@# etcd-cert: null@etcd-cert: ${ETCD_CERT}@g; s@# etcd-ca: null@etcd-ca: ${ETCD_CA}@g" calico-etcd.yaml
sed -i 's#etcd_ca: ""#etcd_ca:" /calico-secrets/etcd-ca"#g; s#etcd-cert: ""#etcd_cert: "/calico-secrets/ed-cert"#g; s#etcd_key: "" #etcd_key: "/calico-secrets/etcd-key" #g' calico-etcd.yaml
# 查看网段
POD_SUBNET=`cat /etc/kubernetes/manifests/kube-controller-manager.yaml | grep cluster-cidr= | awk -F= '{print $NF}'`
echo $POD_SUBNET
sed -i 's@# - name: CALICO_IPV4POOL_CIDR@- name: CALICO_IPV4POOL_CIDR@g; s@# value: "192.168.0.0/16"@ value:'"${POD_SUBNET}"'@g' calico-etcd.yaml
# 安装
kubectl apply -f calico-etcd.yaml
# 复制到node节点上
scp /etc/kubernetes/pki/front-proxy-ca.crt node01:/etc/kubernetes/pki/front-proxy-ca.crt
scp /etc/kubernetes/pki/front-proxy-ca.crt node02:/etc/kubernetes/pki/front-proxy-ca.crt
# 查看系统组件
kubectl get po -n kube-system
查看三个节点是否联通(master,node01,node02)
## 查看三个节点是否联通
kubectl get svc
telnet 10.96.0.1 443
测试(master)
# 拉取ngxin镜像
[root@master /]# kubectl create deployment nginx --image=nginx
# 查看已经拉下来的镜像
[root@master /]# kubectl get pod
# 对外暴露端口
[root@master /]# kubectl expose deployment nginx --port=80 --type=NodePort
# 查看kubectl进程
[root@master /]# kubectl get pod,svc
NAME READY STATUS RESTARTS AGE
pod/nginx-f89759699-vc8rn 1/1 Running 0 6m22s
NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE
service/kubernetes ClusterIP 10.96.0.1 <none> 443/TCP 30m
service/nginx NodePort 10.105.74.129 <none> 80:30101/TCP 4m17s
# 访问
http://192.168.86.128:30101/
资源指标监控(master)
[root@master metrics-server-0.4.x-kubeadm]# pwd
/opt/k8s-ha-install/metrics-server-0.4.x-kubeadm
[root@master metrics-server-0.4.x-kubeadm]# kubectl create -f comp.yaml
[root@master metrics-server-0.4.x-kubeadm]# kubectl get po -n kube-system
dashboard(master)
# 英文版
[root@master dashboard]# pwd
/opt/k8s-ha-install/dashboard
[root@master dashboard]# kubectl create -f .
[root@master dashboard]# kubectl edit svc kubernetes-dashboard -n kubernetes-dashboard
# 修改
type: NodePort
# 查看IP, 启动
[root@master dashboard]# kubectl get svc kubernetes-dashboard -n kubernetes-dashboard
# 访问
https://192.168.86.128:32101
### 创建管理员配置文件
cd /opt/kubernetes
vim admin.conf
apiVersion: v1
kind: ServiceAccount
metadata:
name: dashboard-admin
namespace: kube-system
---
kind: ClusterRoleBinding
apiVersion: rbac.authorization.k8s.io/v1beta1
metadata:
name: dashboard-admin
subjects:
- kind: ServiceAccount
name: dashboard-admin
namespace: kube-system
roleRef:
kind: ClusterRole
name: cluster-admin
apiGroup: rbac.authorization.k8s.io
:wq!
# 查看token
kubectl -n kube-system describe secret $(kubectl -n kube-system get secret | grep admin-user | awk '{print $1}')
# 中文版推荐款式 kuboard:https://www.kuboard.cn/,里面有官方文档安装一步一步操作即可,超简单
kube-proxy 配置修改(master)
# 默认是iptables,但性能差。修改为ipvs
kubectl edit cm kube-proxy -n kube-system
# 修改
mode: "ipvs"
# master,node01,node02 分别测试一下是否都是 ipvs
curl localhost:10249/proxyMode
# 更新kube-proxy
kubectl patch daemonset kube-proxy -p "{\"spec\":{\"template\":{\"metadata\":{\"annotations\":{\"date\":\"`date + '%s'`\"}}}}}" -n kube-system
# 如果后期遇到问题经常会修改配置文件:/var/lib/kubelet/config.yaml
删除pod(master)
# 删除节点上没必要的pod,目的是节约节点资源
kubectl get po -A -owide
## 查看污点
kubectl describe node -l node-role.kubernetes.io/master= | grep Taints
## 去掉污点
kubectl taint node -l node-role.kubernetes.io/master node-role.kubernetes.io/master:NoSchedule-