部署安装基于containerd 运行时kubernetes-v1.26.0

Posted on 2023-03-12 12:33  守护锁链  阅读(1029)  评论(0编辑  收藏  举报

部署安装基于containerd 运行时kubernetes-v1.26.0

一、准备

系统 CPU RAM IP 网卡 主机名
centos7 2 2 192.168.84.128 NAT master
centos7 2 2 192.168.84.129 NAT node

环境配置(所有节点)

  1. 修改主机名
#master节点
hostnamectl set-hostname master
bash
#node节点
hostnamectl set-hostname node
bash
  1. 配置hosts映射
cat >> /etc/hosts << EOF
192.168.84.128 master
192.168.84.129 node
EOF
  1. 关闭防火墙

    systemctl stop firewalld
    systemctl disable firewalld
    
  2. 关闭selinux

setenforce 0
sed -i "s/SELINUX=enforcing/SELINUX=disabled/g" /etc/selinux/config
  1. 关闭交换分区
swapoff -a
sed -i 's/.*swap.*/#&/' /etc/fstab
  1. 转发 IPv4 并让 iptables 看到桥接流
#转发 IPv4 并让 iptables 看到桥接流量
cat <<EOF | sudo tee /etc/modules-load.d/k8s.conf
overlay
br_netfilter
EOF

sudo modprobe overlay
sudo modprobe br_netfilter
lsmod | grep br_netfilter 
#验证br_netfilter模块
# 设置所需的 sysctl 参数,参数在重新启动后保持不变
cat <<EOF | sudo tee /etc/sysctl.d/k8s.conf
net.bridge.bridge-nf-call-iptables  = 1
net.bridge.bridge-nf-call-ip6tables = 1
net.ipv4.ip_forward                 = 1
EOF

# 应用 sysctl 参数而不重新启动
sudo sysctl --system
  1. 配置 时间同步
yum install ntpdate -y
ntpdate ntp1.aliyun.com

安装containerd(所有节点操作)

  1. 下载containerd

    下载地址

    我这里下载1.16.9

    image-20230310202015041

  2. 上传到所有节点

  3. 安装

tar Cvzxf /usr/local containerd-1.6.19-linux-amd64.tar.gz

# 通过 systemd 启动 containerd
$ vi /etc/systemd/system/containerd.service
# Copyright The containerd Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

[Unit]
Description=containerd container runtime
Documentation=https://containerd.io
After=network.target local-fs.target

[Service]
#uncomment to enable the experimental sbservice (sandboxed) version of containerd/cri integration
#Environment="ENABLE_CRI_SANDBOXES=sandboxed"
ExecStartPre=-/sbin/modprobe overlay
ExecStart=/usr/local/bin/containerd

Type=notify
Delegate=yes
KillMode=process
Restart=always
RestartSec=5
# Having non-zero Limit*s causes performance problems due to accounting overhead
# in the kernel. We recommend using cgroups to do container-local accounting.
LimitNPROC=infinity
LimitCORE=infinity
LimitNOFILE=infinity
# Comment TasksMax if your systemd version does not supports it.
# Only systemd 226 and above support this version.
TasksMax=infinity
OOMScoreAdjust=-999

[Install]
WantedBy=multi-user.target
# 加载配置、启动
systemctl daemon-reload
systemctl enable --now containerd
# 验证
ctr version
#生成配置文件
mkdir /etc/containerd
containerd config default > /etc/containerd/config.toml
systemctl restart containerd

安装runc(所有节点操作)

  1. 下载runc

下载地址

  1. 安装
install -m 755 runc.amd64 /usr/local/sbin/runc
# 验证
runc -v

安装CNI

  1. 下载并上传到服务器

下载地址

  1. 安装
mkdir -p /opt/cni/bin
tar Cxzvf /opt/cni/bin cni-plugins-linux-amd64-v1.2.0.tgz

配置加速器(所有节点操作)

#参考:https://github.com/containerd/containerd/blob/main/docs/cri/config.md#registry-configuration
#添加 config_path = "/etc/containerd/certs.d"
sed -i 's/config_path\ =.*/config_path = \"\/etc\/containerd\/certs.d\"/g' /etc/containerd/config.toml
mkdir /etc/containerd/certs.d/docker.io -p

cat > /etc/containerd/certs.d/docker.io/hosts.toml << EOF
server = "https://docker.io"
[host."https://vh3bm52y.mirror.aliyuncs.com"]
  capabilities = ["pull", "resolve"]
EOF
  
systemctl daemon-reload && systemctl restart containerd

cgroup 驱动(所有节点操作)

#把SystemdCgroup = false修改为:SystemdCgroup = true
sed -i 's/SystemdCgroup\ =\ false/SystemdCgroup\ =\ true/g' /etc/containerd/config.toml
#把sandbox_image = "k8s.gcr.io/pause:3.6"修改为:sandbox_image="registry.aliyuncs.com/google_containers/pause:3.9"
sed -i 's/sandbox_image\ =.*/sandbox_image\ =\ "registry.aliyuncs.com\/google_containers\/pause:3.9"/g' /etc/containerd/config.toml|grep sandbox_image

systemctl daemon-reload 
systemctl restart containerd

安装crictl(所有节点)

  1. 下载

下载地址

这里我选择v1.26.0这个版本的

  1. 安装

    1. 上传到服务器

    2. tar -vzxf crictl-v1.26.0-linux-amd64.tar.gz
      mv crictl /usr/local/bin/
      
      cat >>  /etc/crictl.yaml << EOF
      runtime-endpoint: unix:///var/run/containerd/containerd.sock
      image-endpoint: unix:///var/run/containerd/containerd.sock
      timeout: 10
      debug: true
      EOF
      
      systemctl restart containerd
      

安装kubelet,kubeadm,kubectl(所有节点)

cat <<EOF > /etc/yum.repos.d/kubernetes.repo
[kubernetes]
name=Kubernetes
baseurl=https://mirrors.aliyun.com/kubernetes/yum/repos/kubernetes-el7-x86_64/
enabled=1
gpgcheck=1
repo_gpgcheck=1
gpgkey=https://mirrors.aliyun.com/kubernetes/yum/doc/yum-key.gpg https://mirrors.aliyun.com/kubernetes/yum/doc/rpm-package-key.gpg
EOF
yum install  --nogpgcheck kubelet kubeadm kubectl -y
systemctl enable kubelet

配置ipvs(所有节点)

#参考:https://cloud.tencent.com/developer/article/1717552#:~:text=%E5%9C%A8%E6%89%80%E6%9C%89%E8%8A%82%E7%82%B9%E4%B8%8A%E5%AE%89%E8%A3%85ipset%E8%BD%AF%E4%BB%B6%E5%8C%85%20yum%20install%20ipset%20-y,%E4%B8%BA%E4%BA%86%E6%96%B9%E4%BE%BF%E6%9F%A5%E7%9C%8Bipvs%E8%A7%84%E5%88%99%E6%88%91%E4%BB%AC%E8%A6%81%E5%AE%89%E8%A3%85ipvsadm%20%28%E5%8F%AF%E9%80%89%29%20yum%20install%20ipvsadm%20-y

#安装ipset和ipvsadm
yum install ipset ipvsadm -y
#由于ipvs已经加入到了内核的主干,所以为kube-proxy开启ipvs的前提需要加载以下的内核模块
cat > /etc/sysconfig/modules/ipvs.modules << EOF
#!/bin/bash
modprobe -- ip_vs
modprobe -- ip_vs_rr
modprobe -- ip_vs_wrr
modprobe -- ip_vs_sh
modprobe -- nf_conntrack_ipv4
EOF
#执行加载模块脚本
/bin/bash /etc/sysconfig/modules/ipvs.modules
#查看对应的模块是否加载成功
lsmod | grep -e ip_vs -e nf_conntrack_ipv4

#配置kubelet
cat >>  /etc/sysconfig/kubelet << EOF
# KUBELET_CGROUP_ARGS="--cgroup-driver=systemd"
KUBE_PROXY_MODE="ipvs"
EOF

初始化master节点

  1. 生成默认文件
kubeadm config print init-defaults > kubeadm.yaml
  1. 修改kubeadm.yaml文件
vim kubeadm.yaml
apiVersion: kubeadm.k8s.io/v1beta3
bootstrapTokens:
- groups:
  - system:bootstrappers:kubeadm:default-node-token
  token: abcdef.0123456789abcdef
  ttl: 24h0m0s
  usages:
  - signing
  - authentication
kind: InitConfiguration
localAPIEndpoint:
  advertiseAddress: 192.168.200.5  # 修改为宿主机ip
  bindPort: 6443
nodeRegistration:
  criSocket: unix:///var/run/containerd/containerd.sock
  imagePullPolicy: IfNotPresent
  name: master   # 修改为宿主机名
  taints: null
---
apiServer:
  timeoutForControlPlane: 4m0s
apiVersion: kubeadm.k8s.io/v1beta3
certificatesDir: /etc/kubernetes/pki
clusterName: kubernetes
controllerManager: {}
dns: {}
etcd:
  local:
    dataDir: /var/lib/etcd
imageRepository: registry.aliyuncs.com/google_containers # 修改为阿里镜像
kind: ClusterConfiguration
kubernetesVersion: 1.26.0  # kubeadm的版本为多少这里就修改为多少
networking:
  dnsDomain: cluster.local
  serviceSubnet: 10.96.0.0/12
  podSubnet: 10.244.0.0/16   ## 设置pod网段
scheduler: {}

###添加内容:配置kubelet的CGroup为systemd
---
kind: KubeletConfiguration
apiVersion: kubelet.config.k8s.io/v1beta1
cgroupDriver: systemd
  1. 下载镜像
kubeadm config images pull --image-repository=registry.aliyuncs.com/google_containers --kubernetes-version=v1.26.2
[config/images] Pulled registry.aliyuncs.com/google_containers/kube-apiserver:v1.26.2
[config/images] Pulled registry.aliyuncs.com/google_containers/kube-controller-manager:v1.26.2
[config/images] Pulled registry.aliyuncs.com/google_containers/kube-scheduler:v1.26.2
[config/images] Pulled registry.aliyuncs.com/google_containers/kube-proxy:v1.26.2
[config/images] Pulled registry.aliyuncs.com/google_containers/pause:3.9
[config/images] Pulled registry.aliyuncs.com/google_containers/etcd:3.5.6-0
[config/images] Pulled registry.aliyuncs.com/google_containers/coredns:v1.9.3

  1. 初始化
kubeadm init --config kubeadm.yaml

​ 等待一些时间后有如下输出表示master节点已经初始化完成

[init] Using Kubernetes version: v1.26.2
[preflight] Running pre-flight checks
	[WARNING Service-Kubelet]: kubelet service is not enabled, please run 'systemctl enable kubelet.service'
[preflight] Pulling images required for setting up a Kubernetes cluster
[preflight] This might take a minute or two, depending on the speed of your internet connection
[preflight] You can also perform this action in beforehand using 'kubeadm config images pull'
[certs] Using certificateDir folder "/etc/kubernetes/pki"
[certs] Generating "ca" certificate and key
[certs] Generating "apiserver" certificate and key
[certs] apiserver serving cert is signed for DNS names [kubernetes kubernetes.default kubernetes.default.svc kubernetes.default.svc.cluster.local master] and IPs [10.96.0.1 192.168.84.128]
[certs] Generating "apiserver-kubelet-client" certificate and key
[certs] Generating "front-proxy-ca" certificate and key
[certs] Generating "front-proxy-client" certificate and key
[certs] Generating "etcd/ca" certificate and key
[certs] Generating "etcd/server" certificate and key
[certs] etcd/server serving cert is signed for DNS names [localhost master] and IPs [192.168.84.128 127.0.0.1 ::1]
[certs] Generating "etcd/peer" certificate and key
[certs] etcd/peer serving cert is signed for DNS names [localhost master] and IPs [192.168.84.128 127.0.0.1 ::1]
[certs] Generating "etcd/healthcheck-client" certificate and key
[certs] Generating "apiserver-etcd-client" certificate and key
[certs] Generating "sa" key and public key
[kubeconfig] Using kubeconfig folder "/etc/kubernetes"
[kubeconfig] Writing "admin.conf" kubeconfig file
[kubeconfig] Writing "kubelet.conf" kubeconfig file
[kubeconfig] Writing "controller-manager.conf" kubeconfig file
[kubeconfig] Writing "scheduler.conf" kubeconfig file
[kubelet-start] Writing kubelet environment file with flags to file "/var/lib/kubelet/kubeadm-flags.env"
[kubelet-start] Writing kubelet configuration to file "/var/lib/kubelet/config.yaml"
[kubelet-start] Starting the kubelet
[control-plane] Using manifest folder "/etc/kubernetes/manifests"
[control-plane] Creating static Pod manifest for "kube-apiserver"
[control-plane] Creating static Pod manifest for "kube-controller-manager"
[control-plane] Creating static Pod manifest for "kube-scheduler"
[etcd] Creating static Pod manifest for local etcd in "/etc/kubernetes/manifests"
[wait-control-plane] Waiting for the kubelet to boot up the control plane as static Pods from directory "/etc/kubernetes/manifests". This can take up to 4m0s
[apiclient] All control plane components are healthy after 7.001191 seconds
[upload-config] Storing the configuration used in ConfigMap "kubeadm-config" in the "kube-system" Namespace
[kubelet] Creating a ConfigMap "kubelet-config" in namespace kube-system with the configuration for the kubelets in the cluster
[upload-certs] Skipping phase. Please see --upload-certs
[mark-control-plane] Marking the node master as control-plane by adding the labels: [node-role.kubernetes.io/control-plane node.kubernetes.io/exclude-from-external-load-balancers]
[mark-control-plane] Marking the node master as control-plane by adding the taints [node-role.kubernetes.io/control-plane:NoSchedule]
[bootstrap-token] Using token: abcdef.0123456789abcdef
[bootstrap-token] Configuring bootstrap tokens, cluster-info ConfigMap, RBAC Roles
[bootstrap-token] Configured RBAC rules to allow Node Bootstrap tokens to get nodes
[bootstrap-token] Configured RBAC rules to allow Node Bootstrap tokens to post CSRs in order for nodes to get long term certificate credentials
[bootstrap-token] Configured RBAC rules to allow the csrapprover controller automatically approve CSRs from a Node Bootstrap Token
[bootstrap-token] Configured RBAC rules to allow certificate rotation for all node client certificates in the cluster
[bootstrap-token] Creating the "cluster-info" ConfigMap in the "kube-public" namespace
[kubelet-finalize] Updating "/etc/kubernetes/kubelet.conf" to point to a rotatable kubelet client certificate and key
[addons] Applied essential addon: CoreDNS
[addons] Applied essential addon: kube-proxy

Your Kubernetes control-plane has initialized successfully!

To start using your cluster, you need to run the following as a regular user:

  mkdir -p $HOME/.kube
  sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
  sudo chown $(id -u):$(id -g) $HOME/.kube/config

Alternatively, if you are the root user, you can run:

  export KUBECONFIG=/etc/kubernetes/admin.conf

You should now deploy a pod network to the cluster.
Run "kubectl apply -f [podnetwork].yaml" with one of the options listed at:
  https://kubernetes.io/docs/concepts/cluster-administration/addons/

Then you can join any number of worker nodes by running the following on each as root:

kubeadm join 192.168.84.128:6443 --token abcdef.0123456789abcdef \
	--discovery-token-ca-cert-hash sha256:13dbb19283360eed18d55d1ee758e183ad79f123dc2598afaa76eac0dc0d9890
  1. 创建相关目录
[root@master ~]# mkdir -p $HOME/.kube
[root@master ~]#   sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
[root@master ~]#   sudo chown $(id -u):$(id -g) $HOME/.kube/config

node节点加入到集群

[root@node ~]#  kubeadm join 192.168.200.5:6443 --token abcdef.0123456789abcdef         --discovery-token-ca-cert-hash sha256:7d52da1b42af69666db3483b30a389ab143a1a199b500843741dfd5f180bcb3f

有如下内容表示node节点加入k8s集群成功

[preflight] Running pre-flight checks
	[WARNING Service-Kubelet]: kubelet service is not enabled, please run 'systemctl enable kubelet.service'
[preflight] Reading configuration from the cluster...
[preflight] FYI: You can look at this config file with 'kubectl -n kube-system get cm kubeadm-config -o yaml'
[kubelet-start] Writing kubelet configuration to file "/var/lib/kubelet/config.yaml"
[kubelet-start] Writing kubelet environment file with flags to file "/var/lib/kubelet/kubeadm-flags.env"
[kubelet-start] Starting the kubelet
[kubelet-start] Waiting for the kubelet to perform the TLS Bootstrap...

This node has joined the cluster:
* Certificate signing request was sent to apiserver and a response was received.
* The Kubelet was informed of the new secure connection details.

Run 'kubectl get nodes' on the control-plane to see this node join the cluster.

验证(master节点)

[root@master ~]#  kubectl get nodes

如下输出

NAME     STATUS     ROLES           AGE     VERSION
master   NotReady   control-plane   3m25s   v1.26.0
node     NotReady   <none>          118s    v1.26.0
[root@master ~]#  kubectl get pod -A
kube-system   coredns-5bbd96d687-7d6dl         0/1     Pending   0          2m47s
kube-system   coredns-5bbd96d687-frz87         0/1     Pending   0          2m47s
kube-system   etcd-master                      1/1     Running   0          3m
kube-system   kube-apiserver-master            1/1     Running   0          3m
kube-system   kube-controller-manager-master   1/1     Running   0          3m
kube-system   kube-proxy-7tnw2                 1/1     Running   0          2m47s
kube-system   kube-proxy-lmh4z                 1/1     Running   0          74s
kube-system   kube-scheduler-master            1/1     Running   0          3m1s

注意:此时2个节点(一个master,一个node)此时是notReady,同时查看pod,发现coredns是pending状态,这是正常的。是由于没有安装网络插件,需要安装网络插件。这里我选择了calico

安装网络插件(master节点)

​ 1.根据自己的选择去安装网络插件。这里我选择calico

curl https://raw.githubusercontent.com/projectcalico/calico/v3.25.0/manifests/calico.yaml -O

  1. 安装
kubectl apply -f calico.yaml

问题:此时k8s拉取calico镜像失败,报错如下

Dec 02 21:58:49 node2 containerd[7285]: time="2022-12-02T21:58:49.627179374+08:00" level=error msg="Failed to handle backOff event &ImageCreate{Name:docker.io/calico/cni:v3.24.5,Labels:map[string]string{io.cri-containerd.image: managed,},XXX_unrecognized:[],} for docker.io/calico/cni:v3.24.5" error="update image store for \"docker.io/calico/cni:v3.24.5\": get image info from containerd: get image diffIDs: unexpected media type text/html for sha256:0238205317023105e6589e4001e2a3d81b84c71740b0d9563f6157ddb32c4ea4: not found"
Dec 02 21:58:50 node2 containerd[7285]: time="2022-12-02T21:58:50.627858264+08:00" level=info msg="ImageCreate event &ImageCreate{Name:docker.io/calico/kube-controllers:v3.24.5,Labels:map[string]string{io.cri-containerd.image: managed,},XXX_unrecognized:[],}"
Dec 02 21:58:50 node2 containerd[7285]: time="2022-12-02T21:58:50.627954441+08:00" level=error msg="Failed to handle backOff event &ImageCreate{Name:docker.io/calico/kube-controllers:v3.24.5,Labels:map[string]string{io.cri-containerd.image: managed,},XXX_unrecognized:[],} for docker.io/calico/kube-controllers:v3.24.5" error="update image store for \"docker.io/calico/kube-controllers:v3.24.5\": get image info from containerd: get image diffIDs: unexpected media type text/html for sha256:0238205317023105e6589e4001e2a3d81b84c71740b0d9563f6157ddb32c4ea4: not found"
Dec 02 21:59:01 node2 containerd[7285]: time="2022-12-02T21:59:01.446288924+08:00" level=info msg="Events for \"docker.io/calico/cni:v3.24.5\" is in backoff, enqueue event &ImageDelete{Name:docker.io/calico/cni:v3.24.5,XXX_unrecognized:[],}"
Dec 02 21:59:10 node2 containerd[7285]: time="2022-12-02T21:59:10.628919112+08:00" level=info msg="ImageCreate event &ImageCreate{Name:docker.io/calico/node:v3.24.5,Labels:map[string]string{io.cri-containerd.image: managed,},XXX_unrecognized:[],}"
Dec 02 21:59:10 node2 containerd[7285]: time="2022-12-02T21:59:10.629086106+08:00" level=error msg="Failed to handle backOff event &ImageCreate{Name:docker.io/calico/node:v3.24.5,Labels:map[string]string{io.cri-containerd.image: managed,},XXX_unrecognized:[],} for docker.io/calico/node:v3.24.5" error="update image store for \"docker.io/calico/node:v3.24.5\": get image info from containerd: get image diffIDs: unexpected media type text/html for sha256:0238205317023105e6589e4001e2a3d81b84c71740b0d9563f6157ddb32c4ea4: not found"
Dec 02 21:59:39 node2 containerd[7285]: time="2022-12-02T21:59:39.956573672+08:00" level=error msg="(*service).Write failed" error="rpc error: code = Canceled desc = context canceled" expected="sha256:0797c5cde8c8c54e2b4c3035fc178e8e062d3636855b72b4cfbb37e7a4fb133d" ref="layer-sha256:0797c5cde8c8c54e2b4c3035fc178e8e062d3636855b72b4cfbb37e7a4fb133d" total=86081905

解决方案参考: https://blog.csdn.net/MssGuo/article/details/128149704

删除containerd 加速器配置

vim  /etc/containerd/certs.d/docker.io/hosts.toml 
server = "https://docker.io"
[host."https://xxxmyelo.mirror.aliyuncs.com"]		#这个配置为自己阿里云的镜像加速地址
  capabilities = ["pull", "resolve"]
root@node2:~# systemctl  stop  containerd.service && systemctl  start  containerd.service 
还是不行啊啊啊啊啊啊啊啊啊。

听网友说,删掉了步骤一中的配置的containerd镜像加速器
[plugins."io.containerd.grpc.v1.cri".registry]
     config_path = ""			#改回空地址

重启containerd

systemctl restart containerd

过段时间 查看pod运行状况

watch kubectl get pods -A

此时发现所有的pod都已经成功运行了

kube-system   calico-kube-controllers-57b57c56f-mc5bc   1/1     Running   0          3m45s
kube-system   calico-node-jrggj                         1/1     Running   0          3m45s
kube-system   calico-node-wq6q2                         1/1     Running   0          3m45s
kube-system   coredns-5bbd96d687-7d6dl                  1/1     Running   0          8m30s
kube-system   coredns-5bbd96d687-frz87                  1/1     Running   0          8m30s
kube-system   etcd-master                               1/1     Running   0          8m43s
kube-system   kube-apiserver-master                     1/1     Running   0          8m43s
kube-system   kube-controller-manager-master            1/1     Running   0          8m43s
kube-system   kube-proxy-7tnw2                          1/1     Running   0          8m30s
kube-system   kube-proxy-lmh4z                          1/1     Running   0          6m57s
kube-system   kube-scheduler-master                     1/1     Running   0          8m44s

此时coredns和calico相关的pod都是已经running状态。

查看节点状态

[root@master ~]# kubectl get nodes
NAME     STATUS   ROLES           AGE   VERSION
master   Ready    control-plane   12m   v1.26.2
node1    Ready    <none>          10m   v1.26.2

此时发现两个节点均是Ready状态。

至此,k8s-v1.26.2版本安装成功。

测试k8s集群

[root@master ~]# kubectl create deployment httpd --image=httpd
[root@master ~]# kubectl expose deployment httpd --port=80 --type=NodePort
[root@master ~]# kubectl get pod,svc
NAME                        READY   STATUS    RESTARTS   AGE
pod/httpd-975f8444c-t6z2c   1/1     Running   0          75s

NAME                 TYPE        CLUSTER-IP      EXTERNAL-IP   PORT(S)        AGE
service/httpd        NodePort    10.106.224.79   <none>        80:31409/TCP   63s
service/kubernetes   ClusterIP   10.96.0.1       <none>        443/TCP        14m

访问:http://192.168.84.128:31409

浏览器显示如下:

1

It works! #成功了

Copyright © 2024 守护锁链
Powered by .NET 9.0 on Kubernetes