Ubuntu 22.04LTS版本kubeadm部署k8s 1.24高可用集群-cri-docker
一.高可用集群规划
1.主机配置
主机名 | IP地址 |
---|---|
master01 | 192.168.1.31 |
master02 | 192.168.1.32 |
master03 | 192.168.1.33 |
node01 | 192.168.1.34 |
master-lb | 192.168.1.35 |
2.集群配置
配置信息 | 备注 |
---|---|
系统版本 | Ubuntu-22 |
Docker版本 | 20.10.24~3-0 |
Docker-cri版本 | v0.3.2.3 |
Pod网段 | 172.20.0.0/16 |
Service网段 | 10.196.0.0/16 |
注意:宿主机网段、K8s Service网段、Pod网段不能重复
二.环境准备
1.hosts解析
cat >> /etc/hosts <<'EOF'
192.168.1.31 master01
192.168.1.32 master02
192.168.1.33 master03
192.168.1.34 node01
192.168.1.35 vip
EOF
2.配置apt源
cp /etc/apt/sources.list{,.bak}
cat > /etc/apt/sources.list <<EOF
# 默认注释了源码镜像以提高 apt update 速度,如有需要可自行取消注释
deb https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ jammy main restricted universe multiverse
# deb-src https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ jammy main restricted universe multiverse
deb https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ jammy-updates main restricted universe multiverse
# deb-src https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ jammy-updates main restricted universe multiverse
deb https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ jammy-backports main restricted universe multiverse
# deb-src https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ jammy-backports main restricted universe multiverse
deb http://security.ubuntu.com/ubuntu/ jammy-security main restricted universe multiverse
# deb-src http://security.ubuntu.com/ubuntu/ jammy-security main restricted universe multiverse
# 预发布软件源,不建议启用
# deb https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ jammy-proposed main restricted universe multiverse
# # deb-src https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ jammy-proposed main restricted universe multiverse
EOF
apt-get update 更新软件包列表。
配置docker源
安装依赖包
sudo apt-get install -y install apt-transport-https ca-certificates curl gnupg lsb-release gpg
#添加阿里云Docker镜像源GPG秘钥
curl -fsSL https://mirrors.aliyun.com/docker-ce/linux/ubuntu/gpg | sudo gpg --dearmor -o /usr/share/keyrings/docker-archive-keyring.gpg
#添加阿里云镜像源
echo "deb [arch=amd64 signed-by=/usr/share/keyrings/docker-archive-keyring.gpg] https://mirrors.aliyun.com/docker-ce/linux/ubuntu $(lsb_release -cs) stable" | sudo tee /etc/apt/sources.list.d/docker.list > /dev/null
sudo apt update
配置k8s源
# 添加kubernetes的key
curl -fsSL https://mirrors.aliyun.com/kubernetes/apt/doc/apt-key.gpg | sudo gpg --dearmor -o /etc/apt/keyrings/kubernetes-apt-keyring.gpg
# 添加kubernetes apt仓库,使用阿里云镜像源
echo 'deb [signed-by=/etc/apt/keyrings/kubernetes-apt-keyring.gpg] https://mirrors.aliyun.com/kubernetes/apt/ kubernetes-xenial main' | sudo tee /etc/apt/sources.list.d/kubernetes.list
# 更新apt索引
sudo apt update
3.分发密钥、hosts及yum源
注意:分发密钥记得更换密码
#!/bin/bash
RED="\E[1;31m"
GREEN="\E[1;32m"
END="\E[0m"
# 主机列表
hosts=(
"192.168.1.31"
"192.168.1.32"
"192.168.1.33"
"192.168.1.34"
)
# 新的主机名称列表
new_names=(
"master01"
"master02"
"master03"
"node01"
)
echo "#####################################################1.分发key#####################################################"
copy_ssh_key() {
echo '------------------------'
echo '1.创建ssh-keygen'
echo '------------------------'
if [ ! -f /root/.ssh/id_rsa ]; then
ssh-keygen -t rsa -f /root/.ssh/id_rsa -N ''
else
echo -e $GREEN "SSH key already exists. Skipping ssh-keygen." $END
fi
for ip in "${hosts[@]}"
do
sshpass -p "123456" ssh-copy-id -i ~/.ssh/id_rsa.pub -o StrictHostKeyChecking=no "root@${ip}"
echo -e $GREEN "${ip}: 密钥分配成功" $END
done
}
copy_ssh_key
echo "#####################################################3.修改主机名称#####################################################"
change_hostname() {
for i in "${!hosts[@]}"
do
ip="${hosts[$i]}"
new_name="${new_names[$i]}"
ssh -o StrictHostKeyChecking=no "root@${ip}" "hostnamectl set-hostname ${new_name}"
echo -e $GREEN "${ip} 主机名已修改为: ${new_name}" $END
done
}
# change_hostname
echo "#####################################################4.分发hosts解析#####################################################"
hosts_config() {
for ip in "${hosts[@]}"
do
scp -o StrictHostKeyChecking=no /etc/hosts "root@${ip}:/etc/"
scp -o StrictHostKeyChecking=no /etc/yum.repos.d/* "root@${ip}:/etc/yum.repos.d/"
done
}
# hosts_config
4.所有节点关闭防火墙和NetworkManager
systemctl disable --now NetworkManager ufw
5.所有节点关闭swap分区
swapoff -a && sysctl -w vm.swappiness=0
sed -ri '/^[^#]*swap/s@^@#@' /etc/fstab
free -h
6.所有节点时间同步
timedatectl set-timezone Asia/Shanghai
apt-get install ntpdate -y &> /dev/null
echo "*/10 * * * * root /usr/sbin/ntpdate ntp1.aliyun.com >/dev/null 2>&1" > /etc/cron.d/ntp_sync
7.所有节点配置limit
ulimit -SHn 65535
cat >> /etc/security/limits.conf <<EOF
* soft nofile 65536
* hard nofile 131072
* soft nproc 65535
* hard nproc 655350
* soft memlock unlimited
* hard memlock unlimited
EOF
8.所有节点配置内核参数
cat <<EOF > /etc/sysctl.d/k8s.conf
net.ipv4.ip_forward = 1
net.bridge.bridge-nf-call-iptables = 1
net.bridge.bridge-nf-call-ip6tables = 1
fs.may_detach_mounts = 1
net.ipv4.conf.all.route_localnet = 1
vm.overcommit_memory=1
vm.panic_on_oom=0
fs.inotify.max_user_watches=89100
fs.file-max=52706963
fs.nr_open=52706963
net.netfilter.nf_conntrack_max=2310720
net.ipv4.tcp_keepalive_time = 600
net.ipv4.tcp_keepalive_probes = 3
net.ipv4.tcp_keepalive_intvl =15
net.ipv4.tcp_max_tw_buckets = 36000
net.ipv4.tcp_tw_reuse = 1
net.ipv4.tcp_max_orphans = 327680
net.ipv4.tcp_orphan_retries = 3
net.ipv4.tcp_syncookies = 1
net.ipv4.tcp_max_syn_backlog = 16384
net.ipv4.ip_conntrack_max = 65536
net.ipv4.tcp_max_syn_backlog = 16384
net.ipv4.tcp_timestamps = 0
net.core.somaxconn = 16384
EOF
sysctl --system
9.所有节点安装常用工具
sudo apt-get install wget jq psmisc vim net-tools telnet lvm2 git -y
10.所有节点配置ipvs
1.安装ipvs
sudo apt-get install -y ipvsadm ipset sysstat conntrack libseccomp2
2.所有节点配置ipvs模块
内核4.19+版本 nf_conntrack_ipv4 改为 nf_conntrack 4.18以下使用 nf_conntrack_ipv4
#4.19内核以下配置
cat <<EOF > /etc/sysconfig/modules/ipvs.modules
modprobe -- ip_vs
modprobe -- ip_vs_rr
modprobe -- ip_vs_wrr
modprobe -- ip_vs_sh
modprobe -- nf_conntrack
EOF
#4.19内核及以上配置
cat <<EOF > /etc/modules-load.d/ipvs.conf
ip_vs
ip_vs_lc
ip_vs_wlc
ip_vs_rr
ip_vs_wrr
ip_vs_lblc
ip_vs_lblcr
ip_vs_dh
ip_vs_sh
ip_vs_fo
ip_vs_nq
ip_vs_sed
ip_vs_ftp
ip_vs_sh
nf_conntrack
ip_tables
ip_set
xt_set
ipt_set
ipt_rpfilter
ipt_REJECT
ipip
EOF
3.重启操作系统即可
reboot
4.验证加载的模块
lsmod | grep --color=auto -e ip_vs -e nf_conntrack
三.高可用组件安装
1.所有master节点安装ha和keepalived
apt-get install keepalived haproxy -y
2.HA配置(三台master节点ha配置都一样)
** 注意修改master节点地址 **
mkdir /etc/haproxy/
cat >/etc/haproxy/haproxy.cfg<<EOF
global
maxconn 2000
ulimit-n 16384
log 127.0.0.1 local0 err
stats timeout 30s
defaults
log global
mode http
option httplog
timeout connect 5000
timeout client 50000
timeout server 50000
timeout http-request 15s
timeout http-keep-alive 15s
frontend monitor-in
bind *:33305
mode http
option httplog
monitor-uri /monitor
frontend k8s-master
bind 0.0.0.0:16443
bind 127.0.0.1:16443
mode tcp
option tcplog
tcp-request inspect-delay 5s
default_backend k8s-master
backend k8s-master
mode tcp
option tcplog
option tcp-check
balance roundrobin
default-server inter 10s downinter 5s rise 2 fall 2 slowstart 60s maxconn 250 maxqueue 256 weight 100
server master01 192.168.1.31:6443 check
server master02 192.168.1.32:6443 check
server master03 192.168.1.33:6443 check
EOF
3.keepalived配置
3.1 master01配置
cat > /etc/keepalived/keepalived.conf<<EOF
! Configuration File for keepalived
global_defs {
router_id LVS_DEVEL
script_user root
enable_script_security
}
vrrp_script chk_apiserver {
script "/etc/keepalived/check_apiserver.sh"
interval 5
weight -5
fall 2
rise 1
}
vrrp_instance VI_1 { #实例名字为VI_1,相同实例的备节点名字要和这个相同
state MASTER #状态为MASTER,备节点状态需要为BACKUP
interface eth0 #通信接口为eth0,此参数备节点设置和主节点相同
mcast_src_ip 192.168.1.31
virtual_router_id 51 #实例ID为55,keepalived.conf里唯一
priority 100 #优先级为150,备节点的优先级必须比此数字低
advert_int 1 #通信检查间隔时间1秒
authentication {
auth_type PASS #PASS认证类型,此参数备节点设置和主节点相同
auth_pass 1111 #密码是1111,此参数备节点设置和主节点相同。
}
virtual_ipaddress {
192.168.1.35 dev eth0 label eth0:3 #虚拟IP
}
track_script {
chk_apiserver #模块
}
}
EOF
3.2 master02配置
cat > /etc/keepalived/keepalived.conf<<EOF
! Configuration File for keepalived
global_defs {
router_id LVS_DEVEL
script_user root
enable_script_security
}
vrrp_script chk_apiserver {
script "/etc/keepalived/check_apiserver.sh"
interval 5
weight -5
fall 2
rise 1
}
vrrp_instance VI_1 { #实例名字为VI_1,相同实例的备节点名字要和这个相同
state BACKUP #状态为MASTER,备节点状态需要为BACKUP
interface eth0 #通信接口为eth0,此参数备节点设置和主节点相同
mcast_src_ip 192.168.1.32
virtual_router_id 51 #实例ID为55,keepalived.conf里唯一
priority 50 #优先级为150,备节点的优先级必须比此数字低
advert_int 1 #通信检查间隔时间1秒
authentication {
auth_type PASS #PASS认证类型,此参数备节点设置和主节点相同
auth_pass 1111 #密码是1111,此参数备节点设置和主节点相同。
}
virtual_ipaddress {
192.168.1.35 dev eth0 label eth0:3 #虚拟IP
}
track_script {
chk_apiserver #模块
}
}
EOF
3.3 master03配置
cat > /etc/keepalived/keepalived.conf<<EOF
! Configuration File for keepalived
global_defs {
router_id LVS_DEVEL
script_user root
enable_script_security
}
vrrp_script chk_apiserver {
script "/etc/keepalived/check_apiserver.sh"
interval 5
weight -5
fall 2
rise 1
}
vrrp_instance VI_1 { #实例名字为VI_1,相同实例的备节点名字要和这个相同
state BACKUP #状态为MASTER,备节点状态需要为BACKUP
interface eth0 #通信接口为eth0,此参数备节点设置和主节点相同
mcast_src_ip 192.168.1.33
virtual_router_id 51 #实例ID为55,keepalived.conf里唯一
priority 50 #优先级为150,备节点的优先级必须比此数字低
advert_int 1 #通信检查间隔时间1秒
authentication {
auth_type PASS #PASS认证类型,此参数备节点设置和主节点相同
auth_pass 1111 #密码是1111,此参数备节点设置和主节点相同。
}
virtual_ipaddress {
192.168.1.35 dev eth0 label eth0:3 #虚拟IP
}
track_script {
chk_apiserver #模块
}
}
EOF
4.master节点配置健康检测脚本
cat > /etc/keepalived/check_apiserver.sh <<EOF
#!/bin/bash
err=0
for k in $(seq 1 3)
do
check_code=$(pgrep haproxy)
if [[ $check_code == "" ]]; then
err=$(expr $err + 1)
sleep 1
continue
else
err=0
break
fi
done
if [[ $err != "0" ]]; then
echo "systemctl stop keepalived"
/usr/bin/systemctl stop keepalived
exit 1
else
exit 0
fi
EOF
增加执行权限
chmod +x /etc/keepalived/check_apiserver.sh
5.所有master节点启动haproxy和keepalived
systemctl enable keepalived && systemctl start keepalived && systemctl status keepalived
systemctl enable haproxy && systemctl start haproxy && systemctl status haproxy
6.测试vip和ha
[root@master01 ~]# ping 192.168.1.35
PING 192.168.1.35 (192.168.1.35) 56(84) bytes of data.
64 bytes from 192.168.1.35: icmp_seq=1 ttl=64 time=0.033 ms
64 bytes from 192.168.1.35: icmp_seq=2 ttl=64 time=0.051 ms
^C
--- 192.168.1.35 ping statistics ---
2 packets transmitted, 2 received, 0% packet loss, time 999ms
rtt min/avg/max/mdev = 0.033/0.042/0.051/0.009 ms
[root@master01 ~]# telnet 192.168.1.35 16443
Trying 192.168.1.35...
Connected to 192.168.1.35
Escape character is '^]'.
Connection closed by foreign host.
- 如果16443端口不通 则需要检查harpoxy是否正常启动
四.K8s组件、docker、cri-docker安装
1.所有节点安装docker
1.1 下载最新版本docker
1.下载最新版本docker
sudo apt install docker-ce docker-ce-cli
1.2 下载指定版本docker
2.查看指定版本docker
root@master01:~# apt-cache madison docker-ce-cli | grep 20.10.24~3-0
docker-ce-cli | 5:20.10.24~3-0~ubuntu-jammy | https://mirrors.aliyun.com/docker-ce/linux/ubuntu jammy/stable amd64 Packages
root@master01:~#
root@master01:~# apt-cache madison docker-ce | grep 20.10.24~3-0
docker-ce | 5:20.10.24~3-0~ubuntu-jammy | https://mirrors.aliyun.com/docker-ce/linux/ubuntu jammy/stable amd64 Packages
3.下载指定版本docker
sudo apt-get install -y docker-ce=5:20.10.24~3-0~ubuntu-jammy docker-ce-cli=5:20.10.24~3-0~ubuntu-jammy
1.3 配置system启动
mkdir -pv /etc/docker && cat <<EOF | sudo tee /etc/docker/daemon.json
{
"insecure-registries": ["harbor.jiajia.com"],
"registry-mirrors": ["https://docker.chenby.cn"],
"exec-opts": ["native.cgroupdriver=systemd"]
}
EOF
1.4 启动docker
systemctl daemon-reload && systemctl enable --now docker && systemctl status docker
2.所有节点安装docker-cri
2.1 下载指定版本
查看系统版本
2.2 下载对应版本
wget https://gh.monlor.com/https://github.com/Mirantis/cri-dockerd/releases/download/v0.3.2/cri-dockerd_0.3.2.3-0.ubuntu-jammy_amd64.deb
2.3 所有节点安装安装
sudo dpkg -i cri-dockerd_0.3.2.3-0.ubuntu-jammy_amd64.deb
如果安装过程中出现依赖问题,可以使用以下命令修复依赖:
sudo apt-get install -f
2.4 所有节点cri-dockerd指定pause版本
主要添加如下内容: --pod-infra-container-image=registry.aliyuncs.com/google_containers/pause:3.7
[root@master01 ansible]# vim /usr/lib/systemd/system/cri-docker.service
[Unit]
Description=CRI Interface for Docker Application Container Engine
Documentation=https://docs.mirantis.com
After=network-online.target firewalld.service docker.service
Wants=network-online.target
Requires=cri-docker.socket
[Service]
Type=notify
ExecStart=/usr/bin/cri-dockerd --pod-infra-container-image=registry.aliyuncs.com/google_containers/pause:3.7 --container-runtime-endpoint fd://
ExecReload=/bin/kill -s HUP $MAINPID
TimeoutSec=0
RestartSec=2
Restart=always
# Note that StartLimit* options were moved from "Service" to "Unit" in systemd 229.
# Both the old, and new location are accepted by systemd 229 and up, so using the old location
# to make them work for either version of systemd.
StartLimitBurst=3
# Note that StartLimitInterval was renamed to StartLimitIntervalSec in systemd 230.
# Both the old, and new name are accepted by systemd 230 and up, so using the old name to make
# this option work for either version of systemd.
StartLimitInterval=60s
# Having non-zero Limit*s causes performance problems due to accounting overhead
# in the kernel. We recommend using cgroups to do container-local accounting.
LimitNOFILE=infinity
LimitNPROC=infinity
LimitCORE=infinity
# Comment TasksMax if your systemd version does not support it.
# Only systemd 226 and above support this option.
TasksMax=infinity
Delegate=yes
KillMode=process
[Install]
WantedBy=multi-user.target
2.5 启动cri-docker
systemctl daemon-reload
systemctl enable --now cri-docker.service
systemctl enable --now cri-docker.socket
systemctl status cri-docker.socket && systemctl status cri-docker.socket
3.安装Kubernetes组件
3.1 查看指定版本
1.查看指定版本
root@master01:~# apt-cache madison kubeadm | grep 1.24.17
kubeadm | 1.24.17-00 | https://mirrors.aliyun.com/kubernetes/apt kubernetes-xenial/main amd64 Packages
root@master01:~# apt-cache madison kubelet | grep 1.24.17
kubelet | 1.24.17-00 | https://mirrors.aliyun.com/kubernetes/apt kubernetes-xenial/main amd64 Packages
3.2 下载指定版本
sudo apt-get install -y kubeadm=1.24.17-00 kubelet=1.24.17-00 kubectl=1.24.17-00
3.3 启动kubelet
systemctl daemon-reload && systemctl enable --now kubelet && systemctl status kubelet
五.集群初始化
1.生成初始化文件
apiVersion: kubeadm.k8s.io/v1beta2
bootstrapTokens:
- groups:
- system:bootstrappers:kubeadm:default-node-token
token: 7t2weq.bjbawausm0jaxury
ttl: 24h0m0s
usages:
- signing
- authentication
kind: InitConfiguration
localAPIEndpoint:
advertiseAddress: 192.168.1.31
bindPort: 6443
nodeRegistration:
criSocket: unix:///var/run/cri-dockerd.sock
name: master01
taints:
- effect: NoSchedule
key: node-role.kubernetes.io/master
---
apiServer:
certSANs:
- 192.168.1.35
timeoutForControlPlane: 4m0s
apiVersion: kubeadm.k8s.io/v1beta2
certificatesDir: /etc/kubernetes/pki
clusterName: kubernetes
controlPlaneEndpoint: 192.168.1.35:16443
controllerManager: {}
dns:
type: CoreDNS
etcd:
local:
dataDir: /var/lib/etcd
imageRepository: registry.cn-hangzhou.aliyuncs.com/google_containers
kind: ClusterConfiguration
kubernetesVersion: v1.24.17 # 更改此处的版本号和kubeadm version一致
networking:
dnsDomain: cluster.local
podSubnet: 172.20.0.0/12
serviceSubnet: 10.196.0.0/16
scheduler: {}
2.更新kubeadm文件
kubeadm config migrate --old-config kubeadm-config.yaml --new-config new.yaml
将new.yaml文件复制到其他master节点
for i in master02 master03; do scp new.yaml $i:/root/; done
3.所有节点下载镜像
kubeadm config images pull --config /root/new.yaml
4.master节点初始化
初始化以后会在/etc/kubernetes目录下生成对应的证书和配置文件,之后其他Master节点加入Master01即可。
kubeadm init --config /root/new.yaml --upload-certs
初始化成功以后,会产生Token值,用于其他节点加入时使用,因此要记录下初始化成功生成的token值(令牌值):
4.1 master02加入集群
需要注意的是需要指定cri-socket
--cri-socket=unix:///run/cri-dockerd.sock
kubeadm join 192.168.1.35:16443 --token 7t2weq.bjbawausm0jaxury \
--discovery-token-ca-cert-hash sha256:b399d593b919df41d254c3a1f707b9331c847561c331e1b600759dab45c608c1 \
--control-plane --certificate-key e44c6f2d1482342e76835276167e11617acc765c032e5384b10352681358db1f \
--cri-socket=unix:///run/cri-dockerd.sock
4.2 master03加入集群
kubeadm join 192.168.1.35:16443 --token 7t2weq.bjbawausm0jaxury \
--discovery-token-ca-cert-hash sha256:b399d593b919df41d254c3a1f707b9331c847561c331e1b600759dab45c608c1 \
--control-plane --certificate-key e44c6f2d1482342e76835276167e11617acc765c032e5384b10352681358db1f \
--cri-socket=unix:///run/cri-dockerd.sock
4.3 node01加入集群
kubeadm join 192.168.1.35:16443 --token 7t2weq.bjbawausm0jaxury \
--discovery-token-ca-cert-hash sha256:b399d593b919df41d254c3a1f707b9331c847561c331e1b600759dab45c608c1 \
--cri-socket=unix:///run/cri-dockerd.sock
4.4 master节点配置环境变量
mkdir -p $HOME/.kube
sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
sudo chown $(id -u):$(id -g) $HOME/.kube/config
4.5 查看集群状态
[root@master01 ~]# kubectl get no
NAME STATUS ROLES AGE VERSION
master01 NotReady control-plane 13m v1.24.17
master02 NotReady control-plane 7m57s v1.24.17
master03 NotReady control-plane 9m22s v1.24.17
node01 NotReady <none> 2m40s v1.24.17
5.初始化失败排查
如果初始化失败,重置后再次初始化,命令如下(没有失败不要执行):
kubeadm reset -f --cri-socket=unix:///run/cri-dockerd.sock ; ipvsadm --clear ; rm -rf ~/.kube
6.Token过期处理
Token过期后生成新的token:
kubeadm token create --print-join-command
Master需要生成--certificate-key:
kubeadm init phase upload-certs --upload-certs
六.calico组件安装
1.获取源码文件
#获取源码文件
cd /root/ ; git clone https://gitee.com/dukuan/k8s-ha-install.git
2.切换分支
cd /root/k8s-ha-install && git checkout manual-installation-v1.24.x && cd calico/
3.修改Pod网段
POD_SUBNET=`cat /etc/kubernetes/manifests/kube-controller-manager.yaml | grep cluster-cidr= | awk -F= '{print $NF}'`
sed -i "s#POD_CIDR#${POD_SUBNET}#g" calico.yaml
kubectl apply -f calico.yaml
4.验证
七.Metrics部署
在新版的Kubernetes中系统资源的采集均使用Metrics-server,可以通过Metrics采集节点和Pod的内存、磁盘、CPU和网络的使用率。
1.复制证书到node节点
scp /etc/kubernetes/pki/front-proxy-ca.crt node01:/etc/kubernetes/pki/front-proxy-ca.crt
2.安装metrics
以下操作均在master01节点执行
cd /root/k8s-ha-install/kubeadm-metrics-server
kubectl create -f comp.yaml
3.查看状态
4.验证metrics
kubectl top no
kubectl top pods -n kube-system
八.修改kube-proxy的网络模式
1.查看kube-proxy工作模式
(1)查看kube-proxy的资源配置,mode为空,并未指明工作模式。
[root@master01 dashboard]# kubectl -n kube-system describe cm kube-proxy | grep mode
mode: ""
(2)如下图所示,查看任意的kube-proxy的Pod日志,由于未设置mode,则默认使用iptables代理!
[root@master01 dashboard]# kubectl -n kube-system logs -f kube-proxy-dtvlb
2.修改kube-proxy的工作模式为ipvs
(1)如上图所示,仅需修改工作模式为ipvs即可。切记,一定要保存退出!
kubectl -n kube-system edit cm kube-proxy
(2)验证是否修改成功
kubectl -n kube-system describe cm kube-proxy | grep mode
3.滚动更新
kubectl patch daemonset kube-proxy -p "{\"spec\":{\"template\":{\"metadata\":{\"annotations\":{\"date\":\"`date +'%s'`\"}}}}}" -n kube-system
4.验证
[root@master01 kubeadm-metrics-server]# curl 127.0.0.1:10249/proxyMode
ipvs
九.集群常用操作
1.补全工具
echo "source <(kubectl completion bash)" >> ~/.bashrc && source ~/.bashrc
2.移除master节点污点
kubectl taint node -l node-role.kubernetes.io/control-plane node-role.kubernetes.io/master:NoSchedule-
十.集群可用性验证
1.节点需均正常
kubectl get node
2.Pod均需正常
kubectl get pod -A
3.检查集群网段无任何冲突
kubectl get svc
kubectl get pod -A -owide
4.能够正常创建资源
kubectl create deploy cluster-test --image=registry.cn-beijing.aliyuncs.com/dotbalo/debug-tools -- sleep 3600
5.Pod 必须能够解析 Service(同 namespace 和跨 namespace)
#取上面的NAME进入pod
kubectl exec -it cluster-test-84dfc9c68b-lbkhd -- bash
#解析两个域名,能够对应到.1和.10即可
nslookup kubernetes
nslookup kube-dns.kube-system
6.每个节点都必须要能访问 Kubernetes 的 kubernetes svc 443 和 kube-dns 的 service 53
curl https://10.96.0.1:443
curl 10.96.0.10:53
每个节点均需出现以下返回信息说明已通
7.Pod 和 Pod 之间要能够正常通讯(同 namespace 和跨 namespace)
[root@master01 metrics-server]# kubectl get pod -owide
NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES
cluster-test-8b47d69f5-rgllt 1/1 Running 0 4m31s 172.16.196.129 node01 <none> <none>
[root@master01 metrics-server]# kubectl -n kube-system get pod -owide
8.Pod 和 Pod 之间要能够正常通讯(同机器和跨机器)
for node in master02 master03 node01; do ssh $node ping -c 2 172.16.241.65 && echo 主机名称:$node; done
十一.etcdctl安装
1.下载
master01
wget https://gh.monlor.com/https://github.com/etcd-io/etcd/releases/download/v3.4.30/etcd-v3.4.30-linux-amd64.tar.gz
2.安装
tar -zxf etcd-v3.4.30-linux-amd64.tar.gz
mv etcd-v3.4.30-linux-amd64/etcdctl /usr/local/bin
chmod +x /usr/local/bin/
3.查看 etcd 高可用集群健康状态
3.1 配置环境变量
vi ~/.bashrc
export ETCDCTL_API=3
export ETCDCTL_CACERT=/etc/kubernetes/pki/etcd/ca.crt
export ETCDCTL_CERT=/etc/kubernetes/pki/etcd/peer.crt
export ETCDCTL_KEY=/etc/kubernetes/pki/etcd/peer.key
export ETCDCTL_ENDPOINTS=192.168.1.31:2379,192.168.1.32:2379,192.168.1.33:2379
source ~/.bashrc
etcdctl --write-out=table endpoint health
也可以通过命令行直接输出
ETCDCTL_API=3 etcdctl --cacert=/etc/kubernetes/pki/etcd/ca.crt --cert=/etc/kubernetes/pki/etcd/peer.crt --key=/etc/kubernetes/pki/etcd/peer.key --write-out=table --endpoints=10.0.0.181:2379,10.0.0.182:2379,10.0.0.183:2379 endpoint health
4.查看 etcd 高可用集群列表
etcdctl endpoint health member list
5.查看 etcd 高可用集群 leader
etcdctl endpoint status --write-out=table
十二.集群证书年限更改
K8S CA证书是10年,但是组件证书的日期只有1年,为了证书一直可用状态需要更新,目前主流的一共有3种:
1、版本升级,只要升级就会让各个证书延期1年,官方设置1年有效期的目的就是希望用户在一年内能升级1次;
2、通过命令续期 (这种只能延长一年);
3、编译源码Kubeadm,证书有效期可自定义;
1.下载源码
wget https://gh.api.99988866.xyz/https://github.com/kubernetes/kubernetes/archive/refs/tags/v1.24.17.tar.gz
2.解压
tar -xf v1.24.17.tar.gz -C k8s-src/
cd k8s-src/
mv kubernetes-1.24.17 kubernetes
3.修改证书年限
3.1 修改CA证书年限
cd /root/k8s-src/kubernetes/staging/src/k8s.io/client-go/util/cert
vi cert.go
// 这个方法里面 NotAfter: now.Add(duration365d * 10).UTC()
// 默认有效期就是 10 年,改成 100 年
// 输入 /NotAfter 查找,回车定位
3.2 修改集群证书年限
cd /root/k8s-src/kubernetes/cmd/kubeadm/app/constants
vim constants.go
就是这个常量定义 CertificateValidity,改成 * 100 年
输入 /CertificateValidity 查找,回车定位
4.编译源码
4.1 下载编译依赖包
sudo apt install gcc make rsync jq -y
4.2 查看go版本
[root@master01 kubernetes]# cat /root/k8s-src/kubernetes/build/build-image/cross/VERSION
v1.24.0-go1.20.7-bullseye.0
4.3 安装go环境
wget https://gh.api.99988866.xyz/https://github.com/golang/go/archive/refs/tags/go1.20.7.tar.gz
tar -xf go1.20.7.linux-amd64.tar.gz -C /usr/local/
添加环境变量
vim ~/.bashrc
...
export GOROOT=/usr/local/go
export GOPATH=/usr/local/gopath
export GOPROXY=https://goproxy.cn,direct
export PATH=$PATH:$GOROOT/bin:$GOPATH/bin
生效
source ~/.bashrc
验证
go env
go version
4.4 编译kubeadm
make all WHAT=cmd/kubeadm GOFLAGS=-v
编译完文件会存放在路径下
[root@master01 kubernetes]# ls /root/k8s-src/kubernetes/_output/local/bin/linux/amd64/kubeadm
/root/k8s-src/kubernetes/_output/local/bin/linux/amd64/kubeadm
4.5 替换kubeadm文件
mv /usr/bin/kubeadm /usr/bin/kubeadm_backup
cp _output/local/bin/linux/amd64/kubeadm /usr/bin/kubeadm
chmod +x /usr/bin/kubeadm
验证
kubeadm version
4.6 其他master节点替换
kubeadm这个文件已经修改过源码然后重新编译了 只需要复制道其他master节点然后备份证书续约即可
scp /usr/bin/kubeadm master02:/usr/bin/
scp /usr/bin/kubeadm master03:/usr/bin/
5.续订全部证书
5.1 备份证书
注意:所有master节点都要操作
cp -r /etc/kubernetes/pki /etc/kubernetes/pki_backup
5.2 查看现有证书年限
# 早期版本 (1.19 及之前版本) 命令如下
kubeadm alpha certs check-expiration
#1.19以后
kubeadm certs check-expiration
5.3 续订证书
注意:所有master节点都要操作
kubeadm certs renew all
5.4 重启k8s组件
重启 kube-apiserver、kube-controller-manager、kube-scheduler、etcd 组件后生效
注意:所有master节点都要操作
mv /etc/kubernetes/manifests/* /tmp/
//约等30秒后 kube-apiserver、kube-controller-manager、kube-scheduler、etcd 容器会停止,然后,再将清单文件移过来:
mv /tmp/kube-* /etc/kubernetes/manifests/
mv /tmp/etcd.yaml /etc/kubernetes/manifests/
再次查看证书年限
注意:所有master节点都要操作
kubeadm certs check-expiration
6.修改config文件
注意:所有master节点都要操作
cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
chown $(id -u):$(id -g) $HOME/.kube/config
本文章内容参考杜宽的《云原生Kubernetes全栈架构师》,视频、资料文档等,感谢提供优质知识内容!
本文来自博客园,作者:&UnstopPable,转载请注明原文链接:https://www.cnblogs.com/Unstoppable9527/p/18320631