kubeadm安装k8s1.25多master节点的k8s集群

一、实验环境准备

k8s集群角色	IP	主机名	安装组件	配置
控制节点	192.168.10.60	master1	apiserver、controller-manager、schedule、kubelet、etcd、kube-proxy、容器运行时、calico、keepalived、nginx	2核4G
控制节点	192.168.10.61	master2	apiserver、controller-manager、schedule、kubelet、etcd、kube-proxy、容器运行时、calico、keepalived、nginx	2核4G
控制节点	192.168.10.62	master3	apiserver、controller-manager、schedule、kubelet、etcd、kube-proxy、容器运行时、calico、keepalived、nginx	2核4G
工作节点	192.168.10.63	node1	Kube-proxy、calico、coredns、容器运行时、kubelet	1核2G

1.1 基础环境配置参照

https://www.cnblogs.com/yangmeichong/p/16888720.html

1.2 安装containerd

# 在4台机器上安装containerd
yum install containerd.io -y
mkdir -p /etc/containerd
containerd config default > /etc/containerd/config.toml

# 修改config.toml
SystemdCgroup = true  # false改为true
把 sandbox_image = "k8s.gcr.io/pause:3.6"修改成sandbox_image="registry.aliyuncs.com/google_containers/pause:3.7"

配 置 containerd 开 机 启 动 ， 并 启 动 
systemctl enable containerd --now

cat > /etc/crictl.yaml <<EOF
runtime-endpoint: unix:///run/containerd/containerd.sock
image-endpoint: unix:///run/containerd/containerd.sock
timeout: 10
debug: false
EOF

systemctl restart containerd

# 配置镜像加速器
vim /etc/containerd/config.toml
找到 config_path = ""，修改成如下目录
config_path = "/etc/containerd/certs.d"

mkdir /etc/containerd/certs.d/docker.io/ -p
cat > /etc/containerd/certs.d/docker.io/hosts.toml <<EOF
[host."https://vh3bm52y.mirror.aliyuncs.com",host."https://registry.docker-cn.com"]
  capabilities = ["pull"]
EOF

systemctl restart containerd

# 安装docker用来构建dockerfile镜像
yum install docker-ce -y

cat > /etc/docker/daemon.json <<EOF
{
"registry-mirrors":["https://pft7f97f.mirror.aliyuncs.com","https://registry.docker-cn.com","https://docker.mirrors.ustc.edu.cn","https://dockerhub.azk8s.cn","http://hub-mirror.c.163.com"]
}
EOF

systemctl restart docker

二、安装VIP

# 1.安装k8s所需软件包，master和node都需要
yum install kubelet-1.25.6 kubeadm-1.25.6 kubectl-1.25.6 -y
systemctl enable kubelet

# 2.通过 keepalived+nginx 实现 k8s apiserver 节点高可用
# 在master1和master2上安装nginx+keepalive
yum install epel-release -y
yum install nginx keepalived nginx-mod-stream -y

# 修改nginx.conf配置

user nginx;
worker_processes auto;
error_log /var/log/nginx/error.log;
pid /run/nginx.pid;

# Load dynamic modules. See /usr/share/doc/nginx/README.dynamic.
include /usr/share/nginx/modules/*.conf;

events {
    worker_connections 1024;
}

stream {

    log_format main '$remote_addr $upstream_addr-[$time_local] $status $upstream_bytes_sent';
    access_log /var/log/nginx/k8s-access.log main;
    upstream k8s-apiserver {
        server  192.168.10.60:6443; # Master1 APISERVER IP:PORT
        server  192.168.10.61:6443; # Master2 APISERVER IP:PORT
    }
    server {
        listen 16443;
        proxy_pass k8s-apiserver;
    }
}

http {
    log_format main '$remote_addr - - $remote_user [$time_local] "$request" '
                    '$status $body_bytes_sent "$http_referer" '
                    '"$http_user_agent" "$http_x_forwarded_for"';
    access_log /var/log/nginx/access.log main;

    sendfile on;
    tcp_nopush on;
    tcp_nodelay on;
    keepalive_timeout 65;
    types_hash_max_size 2048;
    include /etc/nginx/mime.types
    default_type application/octet-stream;

    server {
        listen 80 default_server;
        server_name _;
        location / {
        }
    }
}

# yum安装的nginx检测会报错unknown directive "stream"，没有stream模块
解决方法：yum install nginx-mod-stream -y
源码编译安装，添加参数：--with-stream

nginx.conf

# 3.keepalive配置，主从备注地方不一样
[root@master1 keepalived]# cat keepalived.conf 
global_defs {
   notification_email {
     acassen@firewall.loc
     failover@firewall.loc
     sysadmin@firewall.loc
   }
   notification_email_from Alexandre.Cassen@firewall.loc
   smtp_server 127.0.0.1
   smtp_connect_timeout 30
   router_id NGINX_MASTER
}

vrrp_script check_nginx {
    script "/etc/keepalived/check_nginx.sh"#检查nginx 工作状态脚本
}

vrrp_instance VI_1 {
    state MASTER  # 名称
    interface ens33  # 修改为实际网卡名
    virtual_router_id 50 # VRRP 路由 ID 实例，每个实例是唯一的
    priority 100  # 优先级，备服务器设置 90 
    advert_int 1  # 指定 VRRP 心跳包通告间隔时间，默认 1 秒
    authentication {
        auth_type PASS
        auth_pass 1111
    }
    virtual_ipaddress {
        192.168.10.64/24  #VIP设置
    }

    track_script {
        check_nginx
    }
}
master2中keepalive.conf

global_defs {
   notification_email {
     acassen@firewall.loc
     failover@firewall.loc
     sysadmin@firewall.loc
   }
   notification_email_from Alexandre.Cassen@firewall.loc
   smtp_server 127.0.0.1
   smtp_connect_timeout 30
   router_id NGINX_BACKUP
}

vrrp_script check_nginx {
    script "/etc/keepalived/check_nginx.sh"
}

vrrp_instance VI_1 {
    state BACKUP
    interface ens33
    virtual_router_id 51
    priority 90
    advert_int 1
    authentication {
        auth_type PASS
        auth_pass 1111
    }
    virtual_ipaddress {
        192.168.10.64/24
    }
    track_script {
        check_nginx
    }
}

keepalived

[root@master1 keepalived]# cat check_nginx.sh 
#!/bin/bash 
#1、判断 Nginx 是否存活 
counter=$(ps -ef |grep nginx | grep sbin | egrep -cv "grep|$$" )
if [ $counter -eq 0 ]; then 
      #2、如果不存活则尝试启动 Nginx 
      service nginx start 
      sleep 2 
      #3、等待 2 秒后再次获取一次 Nginx 状态 
      counter=$(ps -ef |grep nginx | grep sbin | egrep -cv "grep|$$" ) 
      #4、再次进行判断，如 Nginx 还不存活则停止 Keepalived，让地址进行漂移 
      if [ $counter -eq 0 ]; then 
            service keepalived stop 
      fi 
fi

chmod+x check_nginx.sh

# 启动nginx和keepalived

systemctl start nginx && systemctl start keepalived

ip addr # 查看IP

三、安装与初始化k8s集群

# 设置容器运行时，所有服务器都运行
crictl config runtime-endpoint /run/containerd/containerd.sock

# k8s初始化
# 生成k8s初始化配置文件
[root@master1 ~]# kubeadm config print init-defaults > kubeadm.yaml

# 修改配置文件kubeadm.yaml，红色为新增或者注销
[root@master1 ~]# cat kubeadm.yaml 
apiVersion: kubeadm.k8s.io/v1beta3
bootstrapTokens:
- groups:
  - system:bootstrappers:kubeadm:default-node-token
  token: abcdef.0123456789abcdef
  ttl: 24h0m0s
  usages:
  - signing
  - authentication
kind: InitConfiguration
#localAPIEndpoint:
#  advertiseAddress: 1.2.3.4
#  bindPort: 6443
nodeRegistration:
  criSocket: unix:///run/containerd/containerd.sock
  imagePullPolicy: IfNotPresent
#  name: node
  taints: null
---
apiServer:
  timeoutForControlPlane: 4m0s
apiVersion: kubeadm.k8s.io/v1beta3
certificatesDir: /etc/kubernetes/pki
clusterName: kubernetes
controllerManager: {}
dns: {}
etcd:
  local:
    dataDir: /var/lib/etcd
imageRepository: registry.cn-hangzhou.aliyuncs.com/google_containers
kind: ClusterConfiguration
kubernetesVersion: 1.25.6
controlPlaneEndpoint: 192.168.10.64:16443
networking:
  dnsDomain: cluster.local
  serviceSubnet: 10.96.0.0/12
  podSubnet: 10.244.0.0/16
scheduler: {}
---
apiVersion: kubeproxy.config.k8s.io/v1alpha1
kind: KubeProxyConfiguration
mode: ipvs
---
apiVersion: kubelet.config.k8s.io/v1beta1
kind: KubeletConfiguration
cgroupDriver: systemd

# 单机版
kubeadm init --kubernetes-version=1.25.8 --apiserver-advertise-address=192.168.10.30 --image-repository registry.aliyuncs.com/google_containers --pod-network-cidr=10.244.0.0/16 --service-cidr=10.96.0.0/12 --cri-socket /run/containerd/cri-dockerd.sock --ignore-preflight-errors=SystemVerification

# 开始初始化
[root@master1 ~]# kubeadm init --config=kubeadm.yaml --ignore-preflight-errors=SystemVerification
# 初始化完成后master1上
mkdir -p $HOME/.kube
sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
sudo chown $(id -u):$(id -g) $HOME/.kube/config

# 初始化
[root@master1 ~]# kubeadm init --config=kubeadm.yaml --ignore-preflight-errors=SystemVerification
[init] Using Kubernetes version: v1.25.6
[preflight] Running pre-flight checks
[preflight] Pulling images required for setting up a Kubernetes cluster
[preflight] This might take a minute or two, depending on the speed of your internet connection
[preflight] You can also perform this action in beforehand using 'kubeadm config images pull'
[certs] Using certificateDir folder "/etc/kubernetes/pki"
[certs] Generating "ca" certificate and key
[certs] Generating "apiserver" certificate and key
[certs] apiserver serving cert is signed for DNS names [kubernetes kubernetes.default kubernetes.default.svc kubernetes.default.svc.cluster.local master1] and IPs [10.96.0.1 192.168.10.60 192.168.10.64]
[certs] Generating "apiserver-kubelet-client" certificate and key
[certs] Generating "front-proxy-ca" certificate and key
[certs] Generating "front-proxy-client" certificate and key
[certs] Generating "etcd/ca" certificate and key
[certs] Generating "etcd/server" certificate and key
[certs] etcd/server serving cert is signed for DNS names [localhost master1] and IPs [192.168.10.60 127.0.0.1 ::1]
[certs] Generating "etcd/peer" certificate and key
[certs] etcd/peer serving cert is signed for DNS names [localhost master1] and IPs [192.168.10.60 127.0.0.1 ::1]
[certs] Generating "etcd/healthcheck-client" certificate and key
[certs] Generating "apiserver-etcd-client" certificate and key
[certs] Generating "sa" key and public key
[kubeconfig] Using kubeconfig folder "/etc/kubernetes"
W0215 16:52:00.055951   36309 endpoint.go:57] [endpoint] WARNING: port specified in controlPlaneEndpoint overrides bindPort in the controlplane address
[kubeconfig] Writing "admin.conf" kubeconfig file
W0215 16:52:00.210212   36309 endpoint.go:57] [endpoint] WARNING: port specified in controlPlaneEndpoint overrides bindPort in the controlplane address
[kubeconfig] Writing "kubelet.conf" kubeconfig file
W0215 16:52:00.382619   36309 endpoint.go:57] [endpoint] WARNING: port specified in controlPlaneEndpoint overrides bindPort in the controlplane address
[kubeconfig] Writing "controller-manager.conf" kubeconfig file
W0215 16:52:00.519039   36309 endpoint.go:57] [endpoint] WARNING: port specified in controlPlaneEndpoint overrides bindPort in the controlplane address
[kubeconfig] Writing "scheduler.conf" kubeconfig file
[kubelet-start] Writing kubelet environment file with flags to file "/var/lib/kubelet/kubeadm-flags.env"
[kubelet-start] Writing kubelet configuration to file "/var/lib/kubelet/config.yaml"
[kubelet-start] Starting the kubelet
[control-plane] Using manifest folder "/etc/kubernetes/manifests"
[control-plane] Creating static Pod manifest for "kube-apiserver"
[control-plane] Creating static Pod manifest for "kube-controller-manager"
[control-plane] Creating static Pod manifest for "kube-scheduler"
[etcd] Creating static Pod manifest for local etcd in "/etc/kubernetes/manifests"
[wait-control-plane] Waiting for the kubelet to boot up the control plane as static Pods from directory "/etc/kubernetes/manifests". This can take up to 4m0s
[apiclient] All control plane components are healthy after 32.570055 seconds
[upload-config] Storing the configuration used in ConfigMap "kubeadm-config" in the "kube-system" Namespace
[kubelet] Creating a ConfigMap "kubelet-config" in namespace kube-system with the configuration for the kubelets in the cluster
[upload-certs] Skipping phase. Please see --upload-certs
[mark-control-plane] Marking the node master1 as control-plane by adding the labels: [node-role.kubernetes.io/control-plane node.kubernetes.io/exclude-from-external-load-balancers]
[mark-control-plane] Marking the node master1 as control-plane by adding the taints [node-role.kubernetes.io/control-plane:NoSchedule]
[bootstrap-token] Using token: abcdef.0123456789abcdef
[bootstrap-token] Configuring bootstrap tokens, cluster-info ConfigMap, RBAC Roles
[bootstrap-token] Configured RBAC rules to allow Node Bootstrap tokens to get nodes
[bootstrap-token] Configured RBAC rules to allow Node Bootstrap tokens to post CSRs in order for nodes to get long term certificate credentials
[bootstrap-token] Configured RBAC rules to allow the csrapprover controller automatically approve CSRs from a Node Bootstrap Token
[bootstrap-token] Configured RBAC rules to allow certificate rotation for all node client certificates in the cluster
[bootstrap-token] Creating the "cluster-info" ConfigMap in the "kube-public" namespace
[kubelet-finalize] Updating "/etc/kubernetes/kubelet.conf" to point to a rotatable kubelet client certificate and key
[addons] Applied essential addon: CoreDNS
W0215 16:52:37.813039   36309 endpoint.go:57] [endpoint] WARNING: port specified in controlPlaneEndpoint overrides bindPort in the controlplane address
[addons] Applied essential addon: kube-proxy

Your Kubernetes control-plane has initialized successfully!

To start using your cluster, you need to run the following as a regular user:

  mkdir -p $HOME/.kube
  sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
  sudo chown $(id -u):$(id -g) $HOME/.kube/config

Alternatively, if you are the root user, you can run:

  export KUBECONFIG=/etc/kubernetes/admin.conf

You should now deploy a pod network to the cluster.
Run "kubectl apply -f [podnetwork].yaml" with one of the options listed at:
  https://kubernetes.io/docs/concepts/cluster-administration/addons/

You can now join any number of control-plane nodes by copying certificate authorities
and service account keys on each node and then running the following as root:

  kubeadm join 192.168.10.64:16443 --token abcdef.0123456789abcdef \
    --discovery-token-ca-cert-hash sha256:667a06283277004f1c96aed81c3eb7229c79028e49815194bd3462a75e30a808 \
    --control-plane 

Then you can join any number of worker nodes by running the following on each as root:

kubeadm join 192.168.10.64:16443 --token abcdef.0123456789abcdef \
    --discovery-token-ca-cert-hash sha256:667a06283277004f1c96aed81c3eb7229c79028e49815194bd3462a75e30a808 


# 其它3台机器上，可以等master1初始化好后将镜像打包导入到机器上，如果不用导入的，也可以继续执行初始化操作
[root@master1 ~]# crictl images ls
IMAGE                                                                         TAG                 IMAGE ID            SIZE
registry.aliyuncs.com/google_containers/pause                                 3.7                 221177c6082a8       311kB
registry.cn-hangzhou.aliyuncs.com/google_containers/coredns                   v1.9.3              5185b96f0becf       14.8MB
registry.cn-hangzhou.aliyuncs.com/google_containers/etcd                      3.5.6-0             fce326961ae2d       103MB
registry.cn-hangzhou.aliyuncs.com/google_containers/kube-apiserver            v1.25.6             0137c32dad849       34.3MB
registry.cn-hangzhou.aliyuncs.com/google_containers/kube-controller-manager   v1.25.6             3f1feef5d13e2       31.3MB
registry.cn-hangzhou.aliyuncs.com/google_containers/kube-proxy                v1.25.6             8ef37ea6581b5       20.3MB
registry.cn-hangzhou.aliyuncs.com/google_containers/kube-scheduler            v1.25.6             1dfd746c32714       15.8MB
registry.cn-hangzhou.aliyuncs.com/google_containers/pause                     3.8                 4873874c08efc       311kB
# 导出
[root@master1 ~]# ctr -n=k8s.io images export k8s-1.25.6.tar.gz registry.cn-hangzhou.aliyuncs.com/google_containers/coredns:v1.9.3 registry.cn-hangzhou.aliyuncs.com/google_containers/etcd:3.5.6-0 registry.cn-hangzhou.aliyuncs.com/google_containers/kube-apiserver:v1.25.6 registry.cn-hangzhou.aliyuncs.com/google_containers/kube-controller-manager:v1.25.6 registry.cn-hangzhou.aliyuncs.com/google_containers/kube-proxy:v1.25.6 registry.cn-hangzhou.aliyuncs.com/google_containers/kube-scheduler:v1.25.6 registry.cn-hangzhou.aliyuncs.com/google_containers/pause:3.8

# 传输到其余机器上导入，指定命名空间
[root@master2 ~]# ctr -n=k8s.io images import k8s-1.25.6.tar.gz 
unpacking registry.cn-hangzhou.aliyuncs.com/google_containers/coredns:v1.9.3 (sha256:8e352a029d304ca7431c6507b56800636c321cb52289686a581ab70aaa8a2e2a)...done
unpacking registry.cn-hangzhou.aliyuncs.com/google_containers/etcd:3.5.6-0 (sha256:dd75ec974b0a2a6f6bb47001ba09207976e625db898d1b16735528c009cb171c)...done
unpacking registry.cn-hangzhou.aliyuncs.com/google_containers/kube-apiserver:v1.25.6 (sha256:efa9ae4b9922b3a2c0d296d59651c7cfd528816a641e8c3e7417de72d5f383b4)...done
unpacking registry.cn-hangzhou.aliyuncs.com/google_containers/kube-controller-manager:v1.25.6 (sha256:080f668ca39f876cefbb4d62d94ad6ea1475a33ba7784bfcfd65da58e33c9ffc)...done
unpacking registry.cn-hangzhou.aliyuncs.com/google_containers/kube-proxy:v1.25.6 (sha256:7461d0b2b2c9fe5b50c15acf9739cd6624e1ec1b07ae3877780035e0f436ad63)...done
unpacking registry.cn-hangzhou.aliyuncs.com/google_containers/kube-scheduler:v1.25.6 (sha256:f41301881252779d21dde86aac5a45e9acfe560643b5a28cef1286eabb187e26)...done
unpacking registry.cn-hangzhou.aliyuncs.com/google_containers/pause:3.8 (sha256:9001185023633d17a2f98ff69b6ff2615b8ea02a825adffa40422f51dfdcde9d)...done

[root@master2 ~]# crictl images ls
IMAGE                                                                         TAG                 IMAGE ID            SIZE
registry.cn-hangzhou.aliyuncs.com/google_containers/coredns                   v1.9.3              5185b96f0becf       14.8MB
registry.cn-hangzhou.aliyuncs.com/google_containers/etcd                      3.5.6-0             fce326961ae2d       103MB
registry.cn-hangzhou.aliyuncs.com/google_containers/kube-apiserver            v1.25.6             0137c32dad849       34.3MB
registry.cn-hangzhou.aliyuncs.com/google_containers/kube-controller-manager   v1.25.6             3f1feef5d13e2       31.3MB
registry.cn-hangzhou.aliyuncs.com/google_containers/kube-proxy                v1.25.6             8ef37ea6581b5       20.3MB
registry.cn-hangzhou.aliyuncs.com/google_containers/kube-scheduler            v1.25.6             1dfd746c32714       15.8MB
registry.cn-hangzhou.aliyuncs.com/google_containers/pause                     3.8                 4873874c08efc       311kB

[root@master1 ~]# kubectl get nodes
NAME      STATUS     ROLES           AGE   VERSION
master1   NotReady   control-plane   35m   v1.25.6

[root@master1 ~]# kubectl get pod -n kube-system -o wide
NAME                              READY   STATUS    RESTARTS   AGE   IP              NODE      NOMINATED NODE   READINESS GATES
coredns-7f8cbcb969-hmmbd          0/1     Pending   0          35m   <none>          <none>    <none>           <none>
coredns-7f8cbcb969-sgjl8          0/1     Pending   0          35m   <none>          <none>    <none>           <none>
etcd-master1                      1/1     Running   0          35m   192.168.10.60   master1   <none>           <none>
kube-apiserver-master1            1/1     Running   0          35m   192.168.10.60   master1   <none>           <none>
kube-controller-manager-master1   1/1     Running   0          35m   192.168.10.60   master1   <none>           <none>
kube-proxy-drndv                  1/1     Running   0          35m   192.168.10.60   master1   <none>           <none>
kube-scheduler-master1            1/1     Running   0          35m   192.168.10.60   master1   <none>           <none>

四、扩容k8s集群--添加master节点

# 将master2添加到k8s集群
# 1.创建证书放置目录
mkdir -p /etc/kubernetes/pki/etcd && mkdir -p ~/.kube/

# 2.将master1上的证书拷贝到master2上
[root@master1 etcd]# cd /etc/kubernetes/pki/
[root@master1 pki]# scp ca.* master2:/etc/kubernetes/pki/
scp ca.* master2:/etc/kubernetes/pki/
scp sa.* master2:/etc/kubernetes/pki/
scp front-proxy-ca.* master2:/etc/kubernetes/pki/

[root@master1 etcd]# cd /etc/kubernetes/pki/etcd
scp ca.* master2:/etc/kubernetes/pki/etcd/

# master1上生成token
[root@master1 etcd]# kubeadm token create --print-join-command
kubeadm join 192.168.10.64:16443 --token wzvs1r.51qtj3qragui806k --discovery-token-ca-cert-hash sha256:667a06283277004f1c96aed81c3eb7229c79028e49815194bd3462a75e30a808

# 在master2上执行
[root@master2 ~]# kubeadm join 192.168.10.64:16443 --token wzvs1r.51qtj3qragui806k --discovery-token-ca-cert-hash sha256:667a06283277004f1c96aed81c3eb7229c79028e49815194bd3462a75e30a808 --control-plane --ignore-preflight-errors=SystemVerification

[root@master2 ~]# kubeadm join 192.168.10.64:16443 --token wzvs1r.51qtj3qragui806k --discovery-token-ca-cert-hash sha256:667a06283277004f1c96aed81c3eb7229c79028e49815194bd3462a75e30a808 --control-plane --ignore-preflight-errors=SystemVerification
[preflight] Running pre-flight checks
[preflight] Reading configuration from the cluster...
[preflight] FYI: You can look at this config file with 'kubectl -n kube-system get cm kubeadm-config -o yaml'
[preflight] Running pre-flight checks before initializing the new control plane instance
[preflight] Pulling images required for setting up a Kubernetes cluster
[preflight] This might take a minute or two, depending on the speed of your internet connection
[preflight] You can also perform this action in beforehand using 'kubeadm config images pull'
[certs] Using certificateDir folder "/etc/kubernetes/pki"
[certs] Generating "front-proxy-client" certificate and key
[certs] Generating "etcd/healthcheck-client" certificate and key
[certs] Generating "etcd/peer" certificate and key
[certs] etcd/peer serving cert is signed for DNS names [localhost master2] and IPs [192.168.10.61 127.0.0.1 ::1]
[certs] Generating "etcd/server" certificate and key
[certs] etcd/server serving cert is signed for DNS names [localhost master2] and IPs [192.168.10.61 127.0.0.1 ::1]
[certs] Generating "apiserver-etcd-client" certificate and key
[certs] Generating "apiserver-kubelet-client" certificate and key
[certs] Generating "apiserver" certificate and key
[certs] apiserver serving cert is signed for DNS names [kubernetes kubernetes.default kubernetes.default.svc kubernetes.default.svc.cluster.local master2] and IPs [10.96.0.1 192.168.10.61 192.168.10.64]
[certs] Valid certificates and keys now exist in "/etc/kubernetes/pki"
[certs] Using the existing "sa" key
[kubeconfig] Generating kubeconfig files
[kubeconfig] Using kubeconfig folder "/etc/kubernetes"
W0215 17:42:08.867248   58488 endpoint.go:57] [endpoint] WARNING: port specified in controlPlaneEndpoint overrides bindPort in the controlplane address
[kubeconfig] Writing "admin.conf" kubeconfig file
W0215 17:42:08.941079   58488 endpoint.go:57] [endpoint] WARNING: port specified in controlPlaneEndpoint overrides bindPort in the controlplane address
[kubeconfig] Writing "controller-manager.conf" kubeconfig file
W0215 17:42:09.285528   58488 endpoint.go:57] [endpoint] WARNING: port specified in controlPlaneEndpoint overrides bindPort in the controlplane address
[kubeconfig] Writing "scheduler.conf" kubeconfig file
[control-plane] Using manifest folder "/etc/kubernetes/manifests"
[control-plane] Creating static Pod manifest for "kube-apiserver"
[control-plane] Creating static Pod manifest for "kube-controller-manager"
[control-plane] Creating static Pod manifest for "kube-scheduler"
[check-etcd] Checking that the etcd cluster is healthy
[kubelet-start] Writing kubelet configuration to file "/var/lib/kubelet/config.yaml"
[kubelet-start] Writing kubelet environment file with flags to file "/var/lib/kubelet/kubeadm-flags.env"
[kubelet-start] Starting the kubelet
[kubelet-start] Waiting for the kubelet to perform the TLS Bootstrap...
[etcd] Announced new etcd member joining to the existing etcd cluster
[etcd] Creating static Pod manifest for "etcd"
[etcd] Waiting for the new etcd member to join the cluster. This can take up to 40s
[kubelet-check] Initial timeout of 40s passed.
The 'update-status' phase is deprecated and will be removed in a future release. Currently it performs no operation
[mark-control-plane] Marking the node master2 as control-plane by adding the labels: [node-role.kubernetes.io/control-plane node.kubernetes.io/exclude-from-external-load-balancers]
[mark-control-plane] Marking the node master2 as control-plane by adding the taints [node-role.kubernetes.io/control-plane:NoSchedule]

This node has joined the cluster and a new control plane instance was created:

* Certificate signing request was sent to apiserver and approval was received.
* The Kubelet was informed of the new secure connection details.
* Control plane label and taint were applied to the new node.
* The Kubernetes control plane instances scaled up.
* A new etcd member was added to the local/stacked etcd cluster.

To start administering your cluster from this node, you need to run the following as a regular user:

    mkdir -p $HOME/.kube
    sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
    sudo chown $(id -u):$(id -g) $HOME/.kube/config

Run 'kubectl get nodes' to see this node join the cluster.

执行结果

[root@master1 etcd]# kubectl get nodes
NAME STATUS ROLES AGE VERSION
master1 NotReady control-plane 52m v1.25.6
master2 NotReady control-plane 2m13s v1.25.6

在master3上执行同样的步骤加入到集群中

[root@master3 ~]# kubectl get nodes
NAME STATUS ROLES AGE VERSION
master1 NotReady control-plane 58m v1.25.6
master2 NotReady control-plane 8m17s v1.25.6
master3 NotReady control-plane 26s v1.25.6

五、添加node节点进入集群

# 将node1节点加入集群
[root@node1 ~]# kubeadm join 192.168.10.64:16443 --token wzvs1r.51qtj3qragui806k --discovery-token-ca-cert-hash sha256:667a06283277004f1c96aed81c3eb7229c79028e49815194bd3462a75e30a808 --ignore-preflight-errors=SystemVerification
[preflight] Running pre-flight checks
[preflight] Reading configuration from the cluster...
[preflight] FYI: You can look at this config file with 'kubectl -n kube-system get cm kubeadm-config -o yaml'
[kubelet-start] Writing kubelet configuration to file "/var/lib/kubelet/config.yaml"
[kubelet-start] Writing kubelet environment file with flags to file "/var/lib/kubelet/kubeadm-flags.env"
[kubelet-start] Starting the kubelet
[kubelet-start] Waiting for the kubelet to perform the TLS Bootstrap...

This node has joined the cluster:
* Certificate signing request was sent to apiserver and a response was received.
* The Kubelet was informed of the new secure connection details.

Run 'kubectl get nodes' on the control-plane to see this node join the cluster.

[root@node1 ~]# crictl images ls
I0215 17:54:28.217679   16991 util_unix.go:103] "Using this endpoint is deprecated, please consider using full URL format" endpoint="/run/containerd/containerd.sock" URL="unix:///run/containerd/containerd.sock"
IMAGE                                           TAG                 IMAGE ID            SIZE
registry.aliyuncs.com/google_containers/pause   3.7                 221177c6082a8       311kB

# 查看节点是否加入了
[root@master1 ~]# kubectl get nodes
NAME      STATUS     ROLES           AGE     VERSION
master1   NotReady   control-plane   62m     v1.25.6
master2   NotReady   control-plane   12m     v1.25.6
master3   NotReady   control-plane   4m27s   v1.25.6
node1     NotReady   <none>          27s     v1.25.6

# none表示是工作节点，可以打上标签显示
[root@master1 ~]# kubectl label nodes node1 node-role.kubernetes.io/work=work
node/node1 labeled
[root@master1 ~]# kubectl get nodes
NAME      STATUS     ROLES           AGE     VERSION
master1   NotReady   control-plane   65m     v1.25.6
master2   NotReady   control-plane   15m     v1.25.6
master3   NotReady   control-plane   7m37s   v1.25.6
node1     NotReady   work            3m37s   v1.25.6

[root@master1 ~]# kubectl get pods -n kube-system -o wide
NAME                              READY   STATUS    RESTARTS      AGE     IP              NODE      NOMINATED NODE   READINESS GATES
coredns-7f8cbcb969-hmmbd          0/1     Pending   0             64m     <none>          <none>    <none>           <none>
coredns-7f8cbcb969-sgjl8          0/1     Pending   0             64m     <none>          <none>    <none>           <none>
etcd-master1                      1/1     Running   0             64m     192.168.10.60   master1   <none>           <none>
etcd-master2                      1/1     Running   0             14m     192.168.10.61   master2   <none>           <none>
etcd-master3                      1/1     Running   0             6m23s   192.168.10.62   master3   <none>           <none>
kube-apiserver-master1            1/1     Running   0             64m     192.168.10.60   master1   <none>           <none>
kube-apiserver-master2            1/1     Running   1 (14m ago)   14m     192.168.10.61   master2   <none>           <none>
kube-apiserver-master3            1/1     Running   0             6m21s   192.168.10.62   master3   <none>           <none>
kube-controller-manager-master1   1/1     Running   1 (14m ago)   64m     192.168.10.60   master1   <none>           <none>
kube-controller-manager-master2   1/1     Running   0             13m     192.168.10.61   master2   <none>           <none>
kube-controller-manager-master3   1/1     Running   0             6m24s   192.168.10.62   master3   <none>           <none>
kube-proxy-9sv27                  1/1     Running   0             14m     192.168.10.61   master2   <none>           <none>
kube-proxy-drndv                  1/1     Running   0             64m     192.168.10.60   master1   <none>           <none>
kube-proxy-t7d5q                  1/1     Running   0             2m34s   192.168.10.63   node1     <none>           <none>
kube-proxy-wvg2b                  1/1     Running   0             6m34s   192.168.10.62   master3   <none>           <none>
kube-scheduler-master1            1/1     Running   1 (14m ago)   64m     192.168.10.60   master1   <none>           <none>
kube-scheduler-master2            1/1     Running   0             14m     192.168.10.61   master2   <none>           <none>
kube-scheduler-master3            1/1     Running   0             6m23s   192.168.10.62   master3   <none>           <none>

六、安装k8s网络组件：calico

6.1 安装calico

查看calico支持的版本：https://docs.tigera.io/calico/3.24/getting-started/kubernetes/requirements

下载calico:https://projectcalico.docs.tigera.io/archive/v3.24/getting-started/kubernetes/self-managed-onprem/onpremises

Install Calico with etcd datastore
curl https://raw.githubusercontent.com/projectcalico/calico/v3.24.5/manifests/calico-etcd.yaml -o calico.yaml
# 少于50个节点的下载地址
curl https://raw.githubusercontent.com/projectcalico/calico/v3.24.5/manifests/calico.yaml -O

# 可以提前拉取镜像

ctr -n=k8s.io images pull docker.io/calico/cni:v3.18.0
ctr -n=k8s.io images pull docker.io/calico/kube-controllers:v3.25.1
ctr -n=k8s.io images pull docker.io/calico/node:v3.25.1

# 在master1上执行就可以
[root@master1 ~]# kubectl apply -f calico.yaml 
poddisruptionbudget.policy/calico-kube-controllers created
serviceaccount/calico-kube-controllers created
serviceaccount/calico-node created
secret/calico-etcd-secrets created
configmap/calico-config created
clusterrole.rbac.authorization.k8s.io/calico-kube-controllers created
clusterrole.rbac.authorization.k8s.io/calico-node created
clusterrolebinding.rbac.authorization.k8s.io/calico-kube-controllers created
clusterrolebinding.rbac.authorization.k8s.io/calico-node created
daemonset.apps/calico-node created
deployment.apps/calico-kube-controllers created

[root@master1 ~]# kubectl get node
NAME      STATUS   ROLES           AGE   VERSION
master1   Ready    control-plane   42h   v1.25.6
master2   Ready    control-plane   41h   v1.25.6
master3   Ready    control-plane   41h   v1.25.6
node1     Ready    work            41h   v1.25.6

6.2 calico架构图

Calico 网络模型主要工作组件：
1.Felix：运行在每一台 Host 的 agent 进程，主要负责网络接口管理和监听、路由、ARP 管理、ACL 管理和同步、状态上报等。保证跨主机容器网络互通。
2.etcd：分布式键值存储，相当于 k8s 集群中的数据库，存储着 Calico 网络模型中 IP 地址等相关信息。主要负责网络元数据一致性，确保 Calico 网络状态的准确性；
3.BGP Client（BIRD）：Calico 为每一台 Host 部署一个 BGP Client，即每台 host 上部署一个 BIRD。主要负责把 Felix 写入 Kernel 的路由信息分发到当前 Calico 网络，确保 Workload 间的通信的有效性；
4.BGP Route Reflector：在大型网络规模中，如果仅仅使用 BGP client 形成 mesh 全网互联的方案就会导致规模限制，因为所有节点之间俩俩互联，需要 N^2 个连接，为了解决这个规模问题，可以采用 BGP

6.3 calico.yaml网络插件配置说明

# 1.DaemonSet配置文件说明
...
containers:
# Runs calico-node container on each Kubernetes node. 
# This container programs network policy and routes on each
# host.
  - name: calico-node
  image: docker.io/calico/node:v3.18.0
……
  env:
  # Use Kubernetes API as the backing datastore.
  - name: DATASTORE_TYPE
    value: "kubernetes"
  # Cluster type to identify the deployment type
  - name: CLUSTER_TYPE
  value: "k8s,bgp"
  # Auto-detect the BGP IP address.
  - name: IP
    value: "autodetect" #pod 网段
  - name: CALICO_IPV4POOL_CIDR value: "10.244.0.0/16"
# Enable IPIP
  - name: CALICO_IPV4POOL_IPIP
    value: "Always"

calico-node 服务的主要参数如下:
    CALICO_IPV4POOL_IPIP：是否启用 IPIP 模式。启用 IPIP 模式时，Calico 将在 Node 上创建一个名为 tunl0 的虚拟隧道。IP Pool 可以使用两种模式：BGP 或 IPIP。使用 IPIP 模式时，设置CALICO_IPV4POOL_IPIP="Always"，不使用 IPIP 模式时，
设置CALICO_IPV4POOL_IPIP="Off"，此时将使用 BGP 模式。

IP_AUTODETECTION_METHOD：获取 Node IP 地址的方式，默认使用第 1 个网络接口的 IP 地址，对于安装了多块网卡的 Node，可以使用正则表达式选择正确的网卡，例如"interface=eth.*"表示选择名称以 eth 开头的网卡的 IP 地址。
# 和IPIP同级增加
- name: IP_AUTODETECTION_METHOD
  value: "interface=ens33"  # 所有网卡接口应该一样

扩展：calico 的 IPIP 模式和 BGP 模式对比分析
1）IPIP
    把一个 IP 数据包又套在一个 IP 包里，即把 IP 层封装到 IP 层的一个 tunnel，它的作用其实基本上就相当于一个基于 IP 层的网桥，一般来说，普通的网桥是基于 mac 层的，根本不需要 IP，而这个 ipip 则是通过两端的路由做一个 tunnel，把两个本来不通的网络
通过点对点连接起来；

calico 以 ipip 模式部署完毕后，node 上会有一个 tunl0 的网卡设备，这是 ipip 做隧道封装用的, 也是一种 overlay 模式的网络。当我们把节点下线，calico 容器都停止后，这个设备依然还在，执行rmmodipip 命令可以将它删除。

2）BGP
BGP 模式直接使用物理机作为虚拟路由路（vRouter），不再创建额外的 tunnel

    边界网关协议（BorderGateway Protocol, BGP）是互联网上一个核心的去中心化的自治路由协议。它通过维护 IP 路由表或‘前缀’表来实现自治系统（AS）之间的可达性，属于矢量路由协议。BGP 不使用传统的内部网关协议（IGP）的指标，而是基于路径、网络策略或规则集
来决定路由。因此，它更适合被称为矢量性协议，而不是路由协议，通俗的说就是将接入到机房的多条线路（如电信、联通、移动等）融合为一体，实现多线单 IP；

BGP 机房的优点：服务器只需要设置一个 IP 地址，最佳访问路由是由网络上的骨干路由器根据路由跳数与其它技术指标来确定的，不会占用服务器的任何系统；

    官方提供的 calico.yaml 模板里，默认打开了 ip-ip 功能，该功能会在 node 上创建一个设备tunl0，容器的网络数据会经过该设备被封装一个 ip 头再转发。这里，calico.yaml 中通过修改 calico- node 的环境变量：CALICO_IPV4POOL_IPIP 来
实现 ipip 功能的开关：默认是 Always，表示开启； Off 表示关闭 ipip。
- name: CLUSTER_TYPE
  value: "k8s,bgp"
# Auto-detect the BGP IP address.
- name: IP
  value: "autodetect" # Enable IPIP
- name: CALICO_IPV4POOL_IPIP
  value: "Always"

总结：
    calico BGP 通信是基于 TCP 协议的，所以只要节点间三层互通即可完成，即三层互通的环境 bird 就能生成与邻居有关的路由。但是这些路由和 flannel host-gateway 模式一样，需要二层互通才能访问的通，因此如果在实际环境中配置了 BGP 模式生成了路由
但是不同节点间 pod 访问不通，可能需要再确认下节点间是否二层互通。

为了解决节点间二层不通场景下的跨节点通信问题，calico 也有自己的解决方案——IPIP 模式

七、etcd高可用

# 修改master1、master2和master3上etcd.yaml文件
vim /etc/kubernetes/manifests/etcd.yaml
把
- --initial-cluster=master1=https://192.168.10.60:2380
变成
- --initial-cluster=master1=https://192.168.10.60:2380,master2=https://192.168.10.61:2380,master3=https://192.168.10.62:2380

# 修改成功后，重启kubelet
systemctl restart kubelet

#测试etcd集群是否成功
[root@master1 ~]# docker run --rm -it --net host -v /etc/kubernetes:/etc/kubernetes registry.cn-hangzhou.aliyuncs.com/google_containers/etcd:3.5.4-0 etcdctl --cert /etc/kubernetes/pki/etcd/peer.crt --key /etc/kubernetes/pki/etcd/peer.key --cacert /etc/kubernetes/pki/etcd/ca.crt member list

# 显示如下，说明集群配置成功
3b62ba5c75e23e07, started, master3, https://192.168.10.62:2380, https://192.168.10.62:2379, false
a4c4e764bd4f4d68, started, master2, https://192.168.10.61:2380, https://192.168.10.61:2379, false
d1541dc455b5128b, started, master1, https://192.168.10.60:2380, https://192.168.10.60:2379, false

# 查看集群状态
docker run --rm -it --net host -v /etc/kubernetes:/etc/kubernetes registry.cn-hangzhou.aliyuncs.com/google_containers/etcd:3.5.4-0 etcdctl --cert /etc/kubernetes/pki/etcd/peer.crt --key /etc/kubernetes/pki/etcd/peer.key --cacert /etc/kubernetes/pki/etcd/ca.crt --endpoints=https://192.168.10.60:2379,https://192.168.10.61:2379,https://192.168.10.62:2379 endpoint health --cluster

# 显示如下
https://192.168.10.61:2379 is healthy: successfully committed proposal: took = 12.523256ms
https://192.168.10.60:2379 is healthy: successfully committed proposal: took = 114.3073ms
https://192.168.10.62:2379 is healthy: successfully committed proposal: took = 11.444148ms

# 以表格的方式查看状态
docker run --rm -it --net host -v /etc/kubernetes:/etc/kubernetes registry.cn-hangzhou.aliyuncs.com/google_containers/etcd:3.5.4-0 etcdctl -w table --cert /etc/kubernetes/pki/etcd/peer.crt --key /etc/kubernetes/pki/etcd/peer.key --cacert /etc/kubernetes/pki/etcd/ca.crt --endpoints=https://192.168.10.60:2379,https://192.168.10.61:2379,https://192.168.10.62:2379 endpoint health --cluster
+----------------------------+--------+-------------+-------+
|          ENDPOINT          | HEALTH |    TOOK     | ERROR |
+----------------------------+--------+-------------+-------+
| https://192.168.10.60:2379 |   true | 14.313417ms |       |
| https://192.168.10.62:2379 |   true | 15.410857ms |       |
| https://192.168.10.61:2379 |   true | 18.734055ms |       |
+----------------------------+--------+-------------+-------+

八、测试k8s是否可以正常访问网络

# 1.下载镜像并导入镜像
ctr images import busybox-1-28.tar.gz 

# 2.运行镜像
 kubectl run busybox –image docker.io/library/busybox:1.28 --image-pull-policy=IfNotPresent --restart=Never --rm -it busybox -- sh
/# ping www.baidu.com
测试网络是否通

/# nslookup kubernetes.default.svc.cluster.local Server:    10.96.0.10

10.96.0.10 就是我们 coreDNS 的 clusterIP，说明 coreDNS 配置好了。解析内部 Service 的名称，是通过 coreDNS 去解析的。

posted on 2023-02-17 14:18 杨梅冲阅读(1636) 评论(0) 编辑收藏举报