Ubuntu 22.04LTS版本kubeadm部署k8s 1.24高可用集群-cri-docker

目录

一.高可用集群规划

1.主机配置

主机名 IP地址
master01 192.168.1.31
master02 192.168.1.32
master03 192.168.1.33
node01 192.168.1.34
master-lb 192.168.1.35

2.集群配置

配置信息 备注
系统版本 Ubuntu-22
Docker版本 20.10.24~3-0
Docker-cri版本 v0.3.2.3
Pod网段 172.20.0.0/16
Service网段 10.196.0.0/16

意:宿主机网段、K8s Service网段、Pod网段不能重复

二.环境准备

1.hosts解析

cat >> /etc/hosts <<'EOF'
 192.168.1.31 master01
 192.168.1.32 master02
 192.168.1.33 master03
 192.168.1.34 node01
 192.168.1.35 vip
EOF

2.配置apt源

cp /etc/apt/sources.list{,.bak}
cat > /etc/apt/sources.list <<EOF
# 默认注释了源码镜像以提高 apt update 速度,如有需要可自行取消注释
deb https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ jammy main restricted universe multiverse
# deb-src https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ jammy main restricted universe multiverse
deb https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ jammy-updates main restricted universe multiverse
# deb-src https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ jammy-updates main restricted universe multiverse
deb https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ jammy-backports main restricted universe multiverse
# deb-src https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ jammy-backports main restricted universe multiverse

deb http://security.ubuntu.com/ubuntu/ jammy-security main restricted universe multiverse
# deb-src http://security.ubuntu.com/ubuntu/ jammy-security main restricted universe multiverse

# 预发布软件源,不建议启用
# deb https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ jammy-proposed main restricted universe multiverse
# # deb-src https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ jammy-proposed main restricted universe multiverse
EOF

apt-get update 更新软件包列表。


配置docker源
安装依赖包
sudo apt-get install -y install apt-transport-https ca-certificates curl gnupg lsb-release gpg
#添加阿里云Docker镜像源GPG秘钥
curl -fsSL https://mirrors.aliyun.com/docker-ce/linux/ubuntu/gpg | sudo gpg --dearmor -o /usr/share/keyrings/docker-archive-keyring.gpg
#添加阿里云镜像源
echo "deb [arch=amd64 signed-by=/usr/share/keyrings/docker-archive-keyring.gpg] https://mirrors.aliyun.com/docker-ce/linux/ubuntu $(lsb_release -cs) stable" | sudo tee /etc/apt/sources.list.d/docker.list > /dev/null
sudo apt update


配置k8s源
# 添加kubernetes的key
curl -fsSL https://mirrors.aliyun.com/kubernetes/apt/doc/apt-key.gpg | sudo gpg --dearmor -o /etc/apt/keyrings/kubernetes-apt-keyring.gpg

# 添加kubernetes apt仓库,使用阿里云镜像源
echo 'deb [signed-by=/etc/apt/keyrings/kubernetes-apt-keyring.gpg] https://mirrors.aliyun.com/kubernetes/apt/ kubernetes-xenial main' | sudo tee /etc/apt/sources.list.d/kubernetes.list

# 更新apt索引
sudo apt update

3.分发密钥、hosts及yum源

注意:分发密钥记得更换密码

#!/bin/bash

RED="\E[1;31m"
GREEN="\E[1;32m"
END="\E[0m"

# 主机列表
hosts=(
  "192.168.1.31"
  "192.168.1.32"
  "192.168.1.33"
  "192.168.1.34"
)
# 新的主机名称列表
new_names=(
  "master01"
  "master02"
  "master03"
  "node01"
)

echo "#####################################################1.分发key#####################################################"
copy_ssh_key() {
  echo '------------------------'
  echo '1.创建ssh-keygen'
  echo '------------------------'
  if [ ! -f /root/.ssh/id_rsa ]; then
    ssh-keygen -t rsa -f /root/.ssh/id_rsa -N ''
  else
    echo -e $GREEN "SSH key already exists. Skipping ssh-keygen." $END
  fi

  for ip in "${hosts[@]}"
  do
    sshpass -p "123456" ssh-copy-id -i ~/.ssh/id_rsa.pub -o StrictHostKeyChecking=no "root@${ip}"
    echo -e $GREEN "${ip}: 密钥分配成功" $END
  done
}

copy_ssh_key

echo "#####################################################3.修改主机名称#####################################################"
change_hostname() {
  for i in "${!hosts[@]}"
  do
    ip="${hosts[$i]}"
    new_name="${new_names[$i]}"
    ssh -o StrictHostKeyChecking=no "root@${ip}" "hostnamectl set-hostname ${new_name}"
    echo -e $GREEN "${ip} 主机名已修改为: ${new_name}" $END
  done
}

# change_hostname

echo "#####################################################4.分发hosts解析#####################################################"
hosts_config() {
  for ip in "${hosts[@]}"
  do
    scp -o StrictHostKeyChecking=no /etc/hosts "root@${ip}:/etc/"
    scp -o StrictHostKeyChecking=no /etc/yum.repos.d/* "root@${ip}:/etc/yum.repos.d/"
  done
}


# hosts_config

4.所有节点关闭防火墙和NetworkManager

systemctl disable --now NetworkManager ufw 

5.所有节点关闭swap分区

swapoff -a && sysctl -w vm.swappiness=0
sed -ri '/^[^#]*swap/s@^@#@' /etc/fstab
free -h

6.所有节点时间同步

timedatectl set-timezone Asia/Shanghai
apt-get install ntpdate -y &> /dev/null
echo "*/10 * * * * root /usr/sbin/ntpdate ntp1.aliyun.com >/dev/null 2>&1" > /etc/cron.d/ntp_sync

7.所有节点配置limit

ulimit -SHn 65535
cat >> /etc/security/limits.conf <<EOF
* soft nofile 65536
* hard nofile 131072
* soft nproc 65535
* hard nproc 655350
* soft memlock unlimited
* hard memlock unlimited
EOF

8.所有节点配置内核参数

cat <<EOF > /etc/sysctl.d/k8s.conf
net.ipv4.ip_forward = 1
net.bridge.bridge-nf-call-iptables = 1
net.bridge.bridge-nf-call-ip6tables = 1
fs.may_detach_mounts = 1
net.ipv4.conf.all.route_localnet = 1
vm.overcommit_memory=1
vm.panic_on_oom=0
fs.inotify.max_user_watches=89100
fs.file-max=52706963
fs.nr_open=52706963
net.netfilter.nf_conntrack_max=2310720
net.ipv4.tcp_keepalive_time = 600
net.ipv4.tcp_keepalive_probes = 3
net.ipv4.tcp_keepalive_intvl =15
net.ipv4.tcp_max_tw_buckets = 36000
net.ipv4.tcp_tw_reuse = 1
net.ipv4.tcp_max_orphans = 327680
net.ipv4.tcp_orphan_retries = 3
net.ipv4.tcp_syncookies = 1
net.ipv4.tcp_max_syn_backlog = 16384
net.ipv4.ip_conntrack_max = 65536
net.ipv4.tcp_max_syn_backlog = 16384
net.ipv4.tcp_timestamps = 0
net.core.somaxconn = 16384
EOF

sysctl --system

9.所有节点安装常用工具

sudo apt-get install wget jq psmisc vim net-tools telnet lvm2 git -y

10.所有节点配置ipvs

1.安装ipvs
sudo apt-get install -y ipvsadm ipset sysstat conntrack libseccomp2


2.所有节点配置ipvs模块
内核4.19+版本 nf_conntrack_ipv4 改为 nf_conntrack   4.18以下使用 nf_conntrack_ipv4

#4.19内核以下配置
cat <<EOF > /etc/sysconfig/modules/ipvs.modules
modprobe -- ip_vs
modprobe -- ip_vs_rr
modprobe -- ip_vs_wrr
modprobe -- ip_vs_sh
modprobe -- nf_conntrack
EOF


#4.19内核及以上配置
cat <<EOF > /etc/modules-load.d/ipvs.conf 
ip_vs
ip_vs_lc
ip_vs_wlc
ip_vs_rr
ip_vs_wrr
ip_vs_lblc
ip_vs_lblcr
ip_vs_dh
ip_vs_sh
ip_vs_fo
ip_vs_nq
ip_vs_sed
ip_vs_ftp
ip_vs_sh
nf_conntrack
ip_tables
ip_set
xt_set
ipt_set
ipt_rpfilter
ipt_REJECT
ipip
EOF

3.重启操作系统即可
reboot 

4.验证加载的模块
lsmod | grep --color=auto -e ip_vs -e nf_conntrack

image.png

三.高可用组件安装

1.所有master节点安装ha和keepalived

apt-get install keepalived haproxy -y

2.HA配置(三台master节点ha配置都一样)

** 注意修改master节点地址 **

mkdir /etc/haproxy/
cat >/etc/haproxy/haproxy.cfg<<EOF
global
  maxconn  2000
  ulimit-n  16384
  log  127.0.0.1 local0 err
  stats timeout 30s

defaults
  log global
  mode  http
  option  httplog
  timeout connect 5000
  timeout client  50000
  timeout server  50000
  timeout http-request 15s
  timeout http-keep-alive 15s

frontend monitor-in
  bind *:33305
  mode http
  option httplog
  monitor-uri /monitor

frontend k8s-master
  bind 0.0.0.0:16443
  bind 127.0.0.1:16443
  mode tcp
  option tcplog
  tcp-request inspect-delay 5s
  default_backend k8s-master

backend k8s-master
  mode tcp
  option tcplog
  option tcp-check
  balance roundrobin
  default-server inter 10s downinter 5s rise 2 fall 2 slowstart 60s maxconn 250 maxqueue 256 weight 100
  server master01 192.168.1.31:6443  check
  server master02 192.168.1.32:6443  check
  server master03 192.168.1.33:6443  check
EOF

3.keepalived配置

3.1 master01配置

cat > /etc/keepalived/keepalived.conf<<EOF

! Configuration File for keepalived
global_defs {
    router_id LVS_DEVEL
script_user root
    enable_script_security
}
vrrp_script chk_apiserver {
    script "/etc/keepalived/check_apiserver.sh"
    interval 5
    weight -5
    fall 2  
rise 1
} 	
vrrp_instance VI_1 {        #实例名字为VI_1,相同实例的备节点名字要和这个相同
    state MASTER            #状态为MASTER,备节点状态需要为BACKUP
    interface eth0          #通信接口为eth0,此参数备节点设置和主节点相同
    mcast_src_ip 192.168.1.31
    virtual_router_id 51    #实例ID为55,keepalived.conf里唯一
    priority 100            #优先级为150,备节点的优先级必须比此数字低
    advert_int 1            #通信检查间隔时间1秒
    authentication {
        auth_type PASS      #PASS认证类型,此参数备节点设置和主节点相同
        auth_pass 1111      #密码是1111,此参数备节点设置和主节点相同。
    }
    virtual_ipaddress {
        192.168.1.35 dev eth0 label eth0:3 #虚拟IP
    }
    track_script {
       chk_apiserver    #模块
    }
}
EOF

3.2 master02配置

cat > /etc/keepalived/keepalived.conf<<EOF

! Configuration File for keepalived
global_defs {
    router_id LVS_DEVEL
script_user root
    enable_script_security
}
vrrp_script chk_apiserver {
    script "/etc/keepalived/check_apiserver.sh"
    interval 5
    weight -5
    fall 2  
rise 1
} 
vrrp_instance VI_1 {        #实例名字为VI_1,相同实例的备节点名字要和这个相同
    state BACKUP            #状态为MASTER,备节点状态需要为BACKUP
    interface eth0          #通信接口为eth0,此参数备节点设置和主节点相同
    mcast_src_ip 192.168.1.32
    virtual_router_id 51    #实例ID为55,keepalived.conf里唯一
    priority 50            #优先级为150,备节点的优先级必须比此数字低
    advert_int 1            #通信检查间隔时间1秒
    authentication {
        auth_type PASS      #PASS认证类型,此参数备节点设置和主节点相同
        auth_pass 1111      #密码是1111,此参数备节点设置和主节点相同。
    }
    virtual_ipaddress {
        192.168.1.35 dev eth0 label eth0:3 #虚拟IP
    }
    track_script {
       chk_apiserver    #模块
    }
}
EOF

3.3 master03配置

cat > /etc/keepalived/keepalived.conf<<EOF

! Configuration File for keepalived
global_defs {
    router_id LVS_DEVEL
script_user root
    enable_script_security
}
vrrp_script chk_apiserver {
    script "/etc/keepalived/check_apiserver.sh"
    interval 5
    weight -5
    fall 2  
rise 1
} 
vrrp_instance VI_1 {        #实例名字为VI_1,相同实例的备节点名字要和这个相同
    state BACKUP            #状态为MASTER,备节点状态需要为BACKUP
    interface eth0          #通信接口为eth0,此参数备节点设置和主节点相同
    mcast_src_ip 192.168.1.33
    virtual_router_id 51    #实例ID为55,keepalived.conf里唯一
    priority 50            #优先级为150,备节点的优先级必须比此数字低
    advert_int 1            #通信检查间隔时间1秒
    authentication {
        auth_type PASS      #PASS认证类型,此参数备节点设置和主节点相同
        auth_pass 1111      #密码是1111,此参数备节点设置和主节点相同。
    }
    virtual_ipaddress {
        192.168.1.35 dev eth0 label eth0:3 #虚拟IP
    }
    track_script {
       chk_apiserver    #模块
    }
}
EOF

4.master节点配置健康检测脚本

cat > /etc/keepalived/check_apiserver.sh <<EOF
#!/bin/bash

err=0
for k in $(seq 1 3)
do
    check_code=$(pgrep haproxy)
    if [[ $check_code == "" ]]; then
        err=$(expr $err + 1)
        sleep 1
        continue
    else
        err=0
        break
    fi
done

if [[ $err != "0" ]]; then
    echo "systemctl stop keepalived"
    /usr/bin/systemctl stop keepalived
    exit 1
else
    exit 0
fi
EOF

增加执行权限

chmod +x /etc/keepalived/check_apiserver.sh

5.所有master节点启动haproxy和keepalived

systemctl enable keepalived && systemctl start keepalived && systemctl status keepalived
systemctl enable haproxy && systemctl start haproxy && systemctl status haproxy

6.测试vip和ha

[root@master01 ~]# ping 192.168.1.35
PING 192.168.1.35 (192.168.1.35) 56(84) bytes of data.
64 bytes from 192.168.1.35: icmp_seq=1 ttl=64 time=0.033 ms
64 bytes from 192.168.1.35: icmp_seq=2 ttl=64 time=0.051 ms
^C
--- 192.168.1.35 ping statistics ---
2 packets transmitted, 2 received, 0% packet loss, time 999ms
rtt min/avg/max/mdev = 0.033/0.042/0.051/0.009 ms
[root@master01 ~]# telnet  192.168.1.35 16443
Trying 192.168.1.35...
Connected to 192.168.1.35
Escape character is '^]'.
Connection closed by foreign host.

image.png

  • 如果16443端口不通 则需要检查harpoxy是否正常启动

四.K8s组件、docker、cri-docker安装

1.所有节点安装docker

1.1 下载最新版本docker

1.下载最新版本docker
sudo apt install docker-ce docker-ce-cli

1.2 下载指定版本docker

2.查看指定版本docker
root@master01:~# apt-cache madison docker-ce-cli | grep 20.10.24~3-0
docker-ce-cli | 5:20.10.24~3-0~ubuntu-jammy | https://mirrors.aliyun.com/docker-ce/linux/ubuntu jammy/stable amd64 Packages
root@master01:~# 
root@master01:~# apt-cache madison docker-ce | grep 20.10.24~3-0
 docker-ce | 5:20.10.24~3-0~ubuntu-jammy | https://mirrors.aliyun.com/docker-ce/linux/ubuntu jammy/stable amd64 Packages

 
3.下载指定版本docker
sudo apt-get install -y docker-ce=5:20.10.24~3-0~ubuntu-jammy docker-ce-cli=5:20.10.24~3-0~ubuntu-jammy

1.3 配置system启动

mkdir -pv /etc/docker && cat <<EOF | sudo tee /etc/docker/daemon.json
{
  "insecure-registries": ["harbor.jiajia.com"],
  "registry-mirrors": ["https://docker.chenby.cn"],
  "exec-opts": ["native.cgroupdriver=systemd"]
}
EOF

1.4 启动docker

systemctl daemon-reload && systemctl enable --now docker && systemctl status docker

2.所有节点安装docker-cri

2.1 下载指定版本

查看系统版本
image.png
2.2 下载对应版本
image.png

wget https://gh.monlor.com/https://github.com/Mirantis/cri-dockerd/releases/download/v0.3.2/cri-dockerd_0.3.2.3-0.ubuntu-jammy_amd64.deb

2.3 所有节点安装安装

sudo dpkg -i cri-dockerd_0.3.2.3-0.ubuntu-jammy_amd64.deb

如果安装过程中出现依赖问题,可以使用以下命令修复依赖:

sudo apt-get install -f

2.4 所有节点cri-dockerd指定pause版本

主要添加如下内容: --pod-infra-container-image=registry.aliyuncs.com/google_containers/pause:3.7

[root@master01 ansible]# vim  /usr/lib/systemd/system/cri-docker.service 
[Unit]
Description=CRI Interface for Docker Application Container Engine
Documentation=https://docs.mirantis.com
After=network-online.target firewalld.service docker.service
Wants=network-online.target
Requires=cri-docker.socket

[Service]
Type=notify
ExecStart=/usr/bin/cri-dockerd --pod-infra-container-image=registry.aliyuncs.com/google_containers/pause:3.7 --container-runtime-endpoint fd://
ExecReload=/bin/kill -s HUP $MAINPID
TimeoutSec=0
RestartSec=2
Restart=always

# Note that StartLimit* options were moved from "Service" to "Unit" in systemd 229.
# Both the old, and new location are accepted by systemd 229 and up, so using the old location
# to make them work for either version of systemd.
StartLimitBurst=3

# Note that StartLimitInterval was renamed to StartLimitIntervalSec in systemd 230.
# Both the old, and new name are accepted by systemd 230 and up, so using the old name to make
# this option work for either version of systemd.
StartLimitInterval=60s

# Having non-zero Limit*s causes performance problems due to accounting overhead
# in the kernel. We recommend using cgroups to do container-local accounting.
LimitNOFILE=infinity
LimitNPROC=infinity
LimitCORE=infinity

# Comment TasksMax if your systemd version does not support it.
# Only systemd 226 and above support this option.
TasksMax=infinity
Delegate=yes
KillMode=process

[Install]
WantedBy=multi-user.target


2.5 启动cri-docker

image.png

systemctl daemon-reload
systemctl enable --now cri-docker.service
systemctl enable --now cri-docker.socket
systemctl status cri-docker.socket  && systemctl status cri-docker.socket

3.安装Kubernetes组件

3.1 查看指定版本

1.查看指定版本
root@master01:~# apt-cache madison kubeadm | grep 1.24.17
   kubeadm | 1.24.17-00 | https://mirrors.aliyun.com/kubernetes/apt kubernetes-xenial/main amd64 Packages
root@master01:~# apt-cache madison kubelet | grep 1.24.17
   kubelet | 1.24.17-00 | https://mirrors.aliyun.com/kubernetes/apt kubernetes-xenial/main amd64 Packages

3.2 下载指定版本

sudo apt-get install -y kubeadm=1.24.17-00 kubelet=1.24.17-00 kubectl=1.24.17-00

3.3 启动kubelet

systemctl daemon-reload && systemctl enable --now kubelet && systemctl status kubelet

五.集群初始化

1.生成初始化文件

apiVersion: kubeadm.k8s.io/v1beta2
bootstrapTokens:
- groups:
  - system:bootstrappers:kubeadm:default-node-token
  token: 7t2weq.bjbawausm0jaxury
  ttl: 24h0m0s
  usages:
  - signing
  - authentication
kind: InitConfiguration
localAPIEndpoint:
  advertiseAddress: 192.168.1.31
  bindPort: 6443
nodeRegistration:
  criSocket: unix:///var/run/cri-dockerd.sock
  name: master01
  taints:
  - effect: NoSchedule
    key: node-role.kubernetes.io/master
---
apiServer:
  certSANs:
  - 192.168.1.35
  timeoutForControlPlane: 4m0s
apiVersion: kubeadm.k8s.io/v1beta2
certificatesDir: /etc/kubernetes/pki
clusterName: kubernetes
controlPlaneEndpoint: 192.168.1.35:16443
controllerManager: {}
dns:
  type: CoreDNS
etcd:
  local:
    dataDir: /var/lib/etcd
imageRepository: registry.cn-hangzhou.aliyuncs.com/google_containers
kind: ClusterConfiguration
kubernetesVersion: v1.24.17 # 更改此处的版本号和kubeadm version一致
networking:
  dnsDomain: cluster.local
  podSubnet: 172.20.0.0/12
  serviceSubnet: 10.196.0.0/16
scheduler: {}

2.更新kubeadm文件

kubeadm config migrate --old-config kubeadm-config.yaml --new-config new.yaml

将new.yaml文件复制到其他master节点

for i in master02 master03; do scp new.yaml $i:/root/; done

3.所有节点下载镜像

kubeadm config images pull --config /root/new.yaml 

image.png

4.master节点初始化

初始化以后会在/etc/kubernetes目录下生成对应的证书和配置文件,之后其他Master节点加入Master01即可。

kubeadm init --config /root/new.yaml  --upload-certs

初始化成功以后,会产生Token值,用于其他节点加入时使用,因此要记录下初始化成功生成的token值(令牌值):
image.png

4.1 master02加入集群

需要注意的是需要指定cri-socket
--cri-socket=unix:///run/cri-dockerd.sock

  kubeadm join 192.168.1.35:16443 --token 7t2weq.bjbawausm0jaxury \
	--discovery-token-ca-cert-hash sha256:b399d593b919df41d254c3a1f707b9331c847561c331e1b600759dab45c608c1 \
	--control-plane --certificate-key e44c6f2d1482342e76835276167e11617acc765c032e5384b10352681358db1f \
  --cri-socket=unix:///run/cri-dockerd.sock

image.png

4.2 master03加入集群

  kubeadm join 192.168.1.35:16443 --token 7t2weq.bjbawausm0jaxury \
	--discovery-token-ca-cert-hash sha256:b399d593b919df41d254c3a1f707b9331c847561c331e1b600759dab45c608c1 \
	--control-plane --certificate-key e44c6f2d1482342e76835276167e11617acc765c032e5384b10352681358db1f \
  --cri-socket=unix:///run/cri-dockerd.sock

4.3 node01加入集群

kubeadm join 192.168.1.35:16443 --token 7t2weq.bjbawausm0jaxury \
	--discovery-token-ca-cert-hash sha256:b399d593b919df41d254c3a1f707b9331c847561c331e1b600759dab45c608c1 \
  --cri-socket=unix:///run/cri-dockerd.sock

image.png

4.4 master节点配置环境变量

  mkdir -p $HOME/.kube
  sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
  sudo chown $(id -u):$(id -g) $HOME/.kube/config

4.5 查看集群状态

[root@master01 ~]# kubectl get no
NAME       STATUS     ROLES           AGE     VERSION
master01   NotReady   control-plane   13m     v1.24.17
master02   NotReady   control-plane   7m57s   v1.24.17
master03   NotReady   control-plane   9m22s   v1.24.17
node01     NotReady   <none>          2m40s   v1.24.17

5.初始化失败排查

如果初始化失败,重置后再次初始化,命令如下(没有失败不要执行):

kubeadm reset -f  --cri-socket=unix:///run/cri-dockerd.sock ; ipvsadm --clear  ; rm -rf ~/.kube

6.Token过期处理

Token过期后生成新的token:

kubeadm token create --print-join-command

Master需要生成--certificate-key:

kubeadm init phase upload-certs  --upload-certs

六.calico组件安装

1.获取源码文件

#获取源码文件
cd /root/ ; git clone https://gitee.com/dukuan/k8s-ha-install.git

2.切换分支

cd /root/k8s-ha-install && git checkout manual-installation-v1.24.x && cd calico/

3.修改Pod网段

POD_SUBNET=`cat /etc/kubernetes/manifests/kube-controller-manager.yaml | grep cluster-cidr= | awk -F= '{print $NF}'`
sed -i "s#POD_CIDR#${POD_SUBNET}#g" calico.yaml
kubectl apply -f calico.yaml

4.验证

image.png

七.Metrics部署

在新版的Kubernetes中系统资源的采集均使用Metrics-server,可以通过Metrics采集节点和Pod的内存、磁盘、CPU和网络的使用率。

1.复制证书到node节点

scp /etc/kubernetes/pki/front-proxy-ca.crt node01:/etc/kubernetes/pki/front-proxy-ca.crt

2.安装metrics

以下操作均在master01节点执行

cd /root/k8s-ha-install/kubeadm-metrics-server
kubectl  create -f comp.yaml 

3.查看状态

image.png

4.验证metrics

kubectl  top no
kubectl  top  pods -n kube-system

image.png

八.修改kube-proxy的网络模式

1.查看kube-proxy工作模式

(1)查看kube-proxy的资源配置,mode为空,并未指明工作模式。
[root@master01 dashboard]# kubectl -n kube-system describe cm kube-proxy  |  grep mode
mode: ""


(2)如下图所示,查看任意的kube-proxy的Pod日志,由于未设置mode,则默认使用iptables代理!
[root@master01 dashboard]# kubectl -n kube-system logs -f kube-proxy-dtvlb

image.png
image.png

2.修改kube-proxy的工作模式为ipvs

(1)如上图所示,仅需修改工作模式为ipvs即可。切记,一定要保存退出!
kubectl -n kube-system edit cm kube-proxy

(2)验证是否修改成功
kubectl -n kube-system describe cm kube-proxy | grep mode 

3.滚动更新

kubectl patch daemonset kube-proxy -p "{\"spec\":{\"template\":{\"metadata\":{\"annotations\":{\"date\":\"`date +'%s'`\"}}}}}" -n kube-system

4.验证

[root@master01 kubeadm-metrics-server]#  curl 127.0.0.1:10249/proxyMode
ipvs

九.集群常用操作

1.补全工具

echo "source <(kubectl completion bash)" >> ~/.bashrc && source ~/.bashrc

2.移除master节点污点

kubectl  taint node  -l node-role.kubernetes.io/control-plane node-role.kubernetes.io/master:NoSchedule-

十.集群可用性验证

1.节点需均正常

kubectl get node

2.Pod均需正常

kubectl get pod -A

3.检查集群网段无任何冲突

kubectl get svc
kubectl get pod -A -owide

4.能够正常创建资源

kubectl create deploy cluster-test --image=registry.cn-beijing.aliyuncs.com/dotbalo/debug-tools -- sleep 3600

5.Pod 必须能够解析 Service(同 namespace 和跨 namespace)

#取上面的NAME进入pod
kubectl exec -it cluster-test-84dfc9c68b-lbkhd -- bash

#解析两个域名,能够对应到.1和.10即可
nslookup kubernetes
nslookup kube-dns.kube-system

image.png

6.每个节点都必须要能访问 Kubernetes 的 kubernetes svc 443 和 kube-dns 的 service 53

curl https://10.96.0.1:443
curl 10.96.0.10:53

每个节点均需出现以下返回信息说明已通

image.png

7.Pod 和 Pod 之间要能够正常通讯(同 namespace 和跨 namespace)

[root@master01 metrics-server]# kubectl get pod -owide
NAME                           READY   STATUS    RESTARTS   AGE     IP               NODE     NOMINATED NODE   READINESS GATES
cluster-test-8b47d69f5-rgllt   1/1     Running   0          4m31s   172.16.196.129   node01   <none>           <none>
[root@master01 metrics-server]# kubectl -n kube-system  get pod -owide

image.png
image.png

8.Pod 和 Pod 之间要能够正常通讯(同机器和跨机器)

for node in master02 master03 node01;  do  ssh $node ping  -c 2 172.16.241.65 && echo 主机名称:$node; done

image.png

十一.etcdctl安装

1.下载

master01

wget https://gh.monlor.com/https://github.com/etcd-io/etcd/releases/download/v3.4.30/etcd-v3.4.30-linux-amd64.tar.gz

2.安装

tar -zxf etcd-v3.4.30-linux-amd64.tar.gz
mv etcd-v3.4.30-linux-amd64/etcdctl /usr/local/bin
chmod +x /usr/local/bin/

3.查看 etcd 高可用集群健康状态

3.1 配置环境变量

vi ~/.bashrc
export ETCDCTL_API=3
export ETCDCTL_CACERT=/etc/kubernetes/pki/etcd/ca.crt
export ETCDCTL_CERT=/etc/kubernetes/pki/etcd/peer.crt
export ETCDCTL_KEY=/etc/kubernetes/pki/etcd/peer.key
export ETCDCTL_ENDPOINTS=192.168.1.31:2379,192.168.1.32:2379,192.168.1.33:2379

source ~/.bashrc
etcdctl --write-out=table endpoint health

image.png
也可以通过命令行直接输出

ETCDCTL_API=3 etcdctl --cacert=/etc/kubernetes/pki/etcd/ca.crt --cert=/etc/kubernetes/pki/etcd/peer.crt --key=/etc/kubernetes/pki/etcd/peer.key --write-out=table --endpoints=10.0.0.181:2379,10.0.0.182:2379,10.0.0.183:2379 endpoint health

4.查看 etcd 高可用集群列表

etcdctl endpoint health member list

image.png

5.查看 etcd 高可用集群 leader

etcdctl endpoint status --write-out=table

image.png

十二.集群证书年限更改

K8S CA证书是10年,但是组件证书的日期只有1年,为了证书一直可用状态需要更新,目前主流的一共有3种:
1、版本升级,只要升级就会让各个证书延期1年,官方设置1年有效期的目的就是希望用户在一年内能升级1次;
2、通过命令续期 (这种只能延长一年);
3、编译源码Kubeadm,证书有效期可自定义;

1.下载源码

wget  https://gh.api.99988866.xyz/https://github.com/kubernetes/kubernetes/archive/refs/tags/v1.24.17.tar.gz

2.解压

tar -xf v1.24.17.tar.gz  -C k8s-src/
cd k8s-src/
mv kubernetes-1.24.17 kubernetes

3.修改证书年限

3.1 修改CA证书年限

cd /root/k8s-src/kubernetes/staging/src/k8s.io/client-go/util/cert
vi cert.go

// 这个方法里面 NotAfter:              now.Add(duration365d * 10).UTC()
// 默认有效期就是 10 年,改成 100 年
// 输入 /NotAfter 查找,回车定位

image.png

3.2 修改集群证书年限

cd /root/k8s-src/kubernetes/cmd/kubeadm/app/constants
vim constants.go
就是这个常量定义 CertificateValidity,改成 * 100 年
输入 /CertificateValidity 查找,回车定位

image.png

4.编译源码

4.1 下载编译依赖包

sudo apt install gcc make rsync jq -y

4.2 查看go版本

[root@master01 kubernetes]# cat /root/k8s-src/kubernetes/build/build-image/cross/VERSION 
v1.24.0-go1.20.7-bullseye.0

4.3 安装go环境

wget  https://gh.api.99988866.xyz/https://github.com/golang/go/archive/refs/tags/go1.20.7.tar.gz
tar -xf go1.20.7.linux-amd64.tar.gz  -C /usr/local/

添加环境变量
vim ~/.bashrc
...
export GOROOT=/usr/local/go
export GOPATH=/usr/local/gopath
export GOPROXY=https://goproxy.cn,direct
export PATH=$PATH:$GOROOT/bin:$GOPATH/bin

生效
source ~/.bashrc

验证
go env
go version

image.png

4.4 编译kubeadm

make all WHAT=cmd/kubeadm GOFLAGS=-v

编译完文件会存放在路径下
[root@master01 kubernetes]# ls /root/k8s-src/kubernetes/_output/local/bin/linux/amd64/kubeadm
/root/k8s-src/kubernetes/_output/local/bin/linux/amd64/kubeadm
image.png

4.5 替换kubeadm文件

mv /usr/bin/kubeadm /usr/bin/kubeadm_backup
cp _output/local/bin/linux/amd64/kubeadm /usr/bin/kubeadm
chmod +x /usr/bin/kubeadm

验证

kubeadm version

image.png

4.6 其他master节点替换

kubeadm这个文件已经修改过源码然后重新编译了 只需要复制道其他master节点然后备份证书续约即可

scp /usr/bin/kubeadm master02:/usr/bin/
scp /usr/bin/kubeadm master03:/usr/bin/

5.续订全部证书

5.1 备份证书

注意:所有master节点都要操作

cp -r /etc/kubernetes/pki  /etc/kubernetes/pki_backup

5.2 查看现有证书年限

# 早期版本 (1.19 及之前版本) 命令如下
kubeadm alpha certs check-expiration
#1.19以后
kubeadm certs check-expiration

image.png

5.3 续订证书

注意:所有master节点都要操作

kubeadm  certs renew all

image.png

5.4 重启k8s组件

重启 kube-apiserver、kube-controller-manager、kube-scheduler、etcd 组件后生效
注意:所有master节点都要操作

mv /etc/kubernetes/manifests/* /tmp/
//约等30秒后 kube-apiserver、kube-controller-manager、kube-scheduler、etcd 容器会停止,然后,再将清单文件移过来:
mv /tmp/kube-* /etc/kubernetes/manifests/
mv /tmp/etcd.yaml /etc/kubernetes/manifests/

再次查看证书年限

注意:所有master节点都要操作

kubeadm certs check-expiration

image.png

6.修改config文件

注意:所有master节点都要操作

cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
chown $(id -u):$(id -g) $HOME/.kube/config

本文章内容参考杜宽的《云原生Kubernetes全栈架构师》,视频、资料文档等,感谢提供优质知识内容!

posted @ 2024-07-24 12:41  &UnstopPable  阅读(155)  评论(0编辑  收藏  举报