day02-作业

1.节点规划

2.部署过程

2.1 系统初始化

# cat /etc/security/limits.conf
root                soft    core            unlimited
root                hard    core            unlimited
root                soft    nproc           1000000
root                hard    nproc           1000000
root                soft    nofile          1000000
root                hard    nofile          1000000
root                soft    memlock         32000
root                hard    memlock         32000
root                soft    msgqueue        8192000
root                hard    msgqueue        8192000
# cat /etc/sysctl.conf
net.ipv4.ip_forward=1
vm.max_map_count=262144
kernel.pid_max=4194303
fs.file-max=1000000
net.ipv4.tcp_max_tw_buckets=6000
net.netfilter.nf_conntrack_max=2097152
net.bridge.bridge-nf-call-ip6tables=1
net.bridge.bridge-nf-call-iptables=1
vm.swappiness=0
# swapoff -a
# cat /etc/fstab
注释掉swap
#/swap.img  none    swap    sw  0   0
启用内核模块
# cat /etc/modules-load.d/modules.conf
# /etc/modules: kernel modules to load at boot time.
#
# This file contains the names of kernel modules that should be loaded
# at boot time, one per line. Lines beginning with "#" are ignored.
ip_vs
ip_vs_lc
ip_vs_lblc
ip_vs_lblcr
ip_vs_rr
ip_vs_wrr
ip_vs_sh
ip_vs_dh
ip_vs_fo
ip_vs_nq
ip_vs_sed
ip_vs_ftp
ip_vs_sh
ip_tables
ip_set
ipt_set
ipt_rpfilter
ipt_REJECT
ipip
xt_set
br_netfilter
nf_conntrack
overlay
# reboot

2.2 ha01 和 ha02 的部署

# apt install keepalived haproxy -y
root@ha01:~# cat /etc/keepalived/keepalived.conf 
! Configuration File for keepalived

global_defs {
   notification_email {
     acassen
   }
   notification_email_from Alexandre.Cassen@firewall.loc
   smtp_server 192.168.200.1
   smtp_connect_timeout 30
   router_id LVS_DEVEL
}

vrrp_instance VI_1 {
    state MASTER
    interface eth0
    virtual_router_id 51
    priority 100
    advert_int 1
    unicast_src_ip 10.0.6.7
    unicast_peer {
         10.0.6.8
    }
    authentication {
        auth_type PASS
        auth_pass 1111
    }
    virtual_ipaddress {
        10.0.6.188 dev eth0 label eth0:1
        10.0.6.189 dev eth0 label eth0:2
        10.0.6.190 dev eth0 label eth0:3
        10.0.6.191 dev eth0 label eth0:4
    }
}
root@ha02:~# cat /etc/keepalived/keepalived.conf 
! Configuration File for keepalived

global_defs {
   notification_email {
     acassen
   }
   notification_email_from Alexandre.Cassen@firewall.loc
   smtp_server 192.168.200.1
   smtp_connect_timeout 30
   router_id LVS_DEVEL
}

vrrp_instance VI_1 {
    state BACKUP
    interface eth0
    virtual_router_id 51
    priority 80
    advert_int 1
    unicast_src_ip 10.0.6.8
    unicast_peer {
         10.0.6.7
    }
    authentication {
        auth_type PASS
        auth_pass 1111
    }
    virtual_ipaddress {
        10.0.6.188 dev eth0 label eth0:1
        10.0.6.189 dev eth0 label eth0:2
        10.0.6.190 dev eth0 label eth0:3
        10.0.6.191 dev eth0 label eth0:4
    }
}
# systemctl enable keepalived && systemctl start keepalived
# cat /etc/haproxy/haproxy.cfg
global
    log /dev/log    local0
    log /dev/log    local1 notice
    chroot /var/lib/haproxy
    stats socket /run/haproxy/admin.sock mode 660 level admin expose-fd listeners
    stats timeout 30s
    user haproxy
    group haproxy
    daemon

    # Default SSL material locations
    ca-base /etc/ssl/certs
    crt-base /etc/ssl/private

    # See: https://ssl-config.mozilla.org/#server=haproxy&server-version=2.0.3&config=intermediate
        ssl-default-bind-ciphers ECDHE-ECDSA-AES128-GCM-SHA256:ECDHE-RSA-AES128-GCM-SHA256:ECDHE-ECDSA-AES256-GCM-SHA384:ECDHE-RSA-AES256-GCM-SHA384:ECDHE-ECDSA-CHACHA20-POLY1305:ECDHE-RSA-CHACHA20-POLY1305:DHE-RSA-AES128-GCM-SHA256:DHE-RSA-AES256-GCM-SHA384
        ssl-default-bind-ciphersuites TLS_AES_128_GCM_SHA256:TLS_AES_256_GCM_SHA384:TLS_CHACHA20_POLY1305_SHA256
        ssl-default-bind-options ssl-min-ver TLSv1.2 no-tls-tickets

defaults
    log global
    mode    http
    option  httplog
    option  dontlognull
        timeout connect 5000
        timeout client  50000
        timeout server  50000
    errorfile 400 /etc/haproxy/errors/400.http
    errorfile 403 /etc/haproxy/errors/403.http
    errorfile 408 /etc/haproxy/errors/408.http
    errorfile 500 /etc/haproxy/errors/500.http
    errorfile 502 /etc/haproxy/errors/502.http
    errorfile 503 /etc/haproxy/errors/503.http
    errorfile 504 /etc/haproxy/errors/504.http
listen k8s_api_server_6443
    bind 10.0.6.188:6443
    mode tcp
    server 10.0.6.1 10.0.6.1:6443 check inter 2000 fall 3 rise 5
    server 10.0.6.2 10.0.6.2:6443 check inter 2000 fall 3 rise 5
    server 10.0.6.3 10.0.6.3:6443 check inter 2000 fall 3 rise 5
# sysctl -w net.ipv4.ip_nonlocal_bind=1
# echo 'net.ipv4.ip_nonlocal_bind = 1' >> /etc/sysctl.conf && sysctl -p
# systemctl enable haproxy && systemctl restart haproxy.service
测试vip的迁移和6443端口的监听

2.3 harbor的安装

上传harbor的离线安装包

上传使用的域名的证书

# tar -xf harbor-offline-installer-v2.7.2.tgz
# cp harbor.yml.tmpl harbor.yml
# mkdir /data   # 最好是挂载nas等共享存储，防止数据丢失
# 安装docker 和 docker-compose
# ./install.sh --with-trivy --with-chartmuseum
等待安装完成
win 设置域名解析

docker ps 查看容器是否正常

配置service文件
# docker-compose down 
root@harbor:~/harbor# cat /lib/systemd/system/harbor.service
[Unit]
Description=Harbor
After=docker.service systemd-networkd.service systemd-resolved.service
Requires=docker.service
Documentation=http://github.com/vmware/harbor
  
[Service]
Type=simple
Restart=on-failure
RestartSec=5
ExecStart=/usr/bin/docker-compose -f /root/harbor/docker-compose.yml up
ExecStop=/usr/bin/docker-compose -f /root/harbor/docker-compose.yml down
  
[Install]
WantedBy=multi-user.target
# systemctl daemon-reload && systemctl enable harbor && systemctl start harbor
测试镜像的上传下载
/etc/hosts 添加域名解析
# docker login harbor.20180622.xyz
# docker push harbor.20180622.xyz/baseimages/nginx:1.18

2.4 kubeasz部署k8s

2.4.1 配置ssh的免密钥登录

# ssh-keygen -t rsa-sha2-512 -b 4096
# cat ssh_no_passwd.sh
#!/bin/bash
RED='echo -e \033[01;31m'
GREEN='echo -e \033[01;32m'
END='\033[0m'
#IP地址列表
#master,node,etcd
IP="
10.0.6.1
10.0.6.2
10.0.6.3
10.0.6.4
10.0.6.5
10.0.6.6
10.0.6.11
10.0.6.12
10.0.6.13
"
for node in $IP;do
sshpass -v -p 1 ssh-copy-id -o StrictHostKeyChecking=no root@$node
  if [ $? -eq 0 ];then
     $GREEN "$node密钥copy完成" $END
   else
     $RED "$node密钥copy失败" $END
   fi
done

2.4.2 部署过程

root@deploy:~# apt install ansible git -y
root@deploy:~# export release=3.5.2
root@deploy:~# wget https://github.com/easzlab/kubeasz/releases/download/${release}/ezdown
root@deploy:~# chmod +x ./ezdown
root@deploy:~# ./ezdown -D

创建 python的软链接

# cat ssh_ln_python3.sh
#!/bin/bash
RED='echo -e \033[01;31m'
GREEN='echo -e \033[01;32m'
END='\033[0m'
#IP地址列表
#master,node,etcd
IP="
10.0.6.1
10.0.6.2
10.0.6.3
10.0.6.4
10.0.6.5
10.0.6.6
10.0.6.11
10.0.6.12
10.0.6.13
"
for node in $IP;do
ssh $node "ln -sv /usr/bin/python3 /usr/bin/python"
  if [ $? -eq 0 ];then
     $GREEN "$node 软链接成功" $END
   else
     $RED "$node 软链接成功" $END
   fi
done

修改配置文件

root@deploy:/etc/kubeasz# cat clusters/k8s-01/hosts
# 'etcd' cluster should have odd member(s) (1,3,5,...)
[etcd]
10.0.6.4
10.0.6.5
10.0.6.6

# master node(s), set unique 'k8s_nodename' for each node
# CAUTION: 'k8s_nodename' must consist of lower case alphanumeric characters, '-' or '.',
# and must start and end with an alphanumeric character
[kube_master]
10.0.6.1 k8s_nodename='10.0.6.1'
10.0.6.2 k8s_nodename='10.0.6.2'
#10.0.6.3 k8s_nodename='10.0.6.3'

# work node(s), set unique 'k8s_nodename' for each node
# CAUTION: 'k8s_nodename' must consist of lower case alphanumeric characters, '-' or '.',
# and must start and end with an alphanumeric character
[kube_node]
10.0.6.11 k8s_nodename='10.0.6.11'
10.0.6.12 k8s_nodename='10.0.6.12'
#10.0.6.13 k8s_nodename='10.0.6.13'

# [optional] harbor server, a private docker registry
# 'NEW_INSTALL': 'true' to install a harbor server; 'false' to integrate with existed one
[harbor]
#10.0.6.8 NEW_INSTALL=false

# [optional] loadbalance for accessing k8s from outside
[ex_lb]
#10.0.6.6 LB_ROLE=backup EX_APISERVER_VIP=10.0.6.250 EX_APISERVER_PORT=8443
#10.0.6.7 LB_ROLE=master EX_APISERVER_VIP=10.0.6.250 EX_APISERVER_PORT=8443

# [optional] ntp server for the cluster
[chrony]
#10.0.6.1

[all:vars]
# --------- Main Variables ---------------
# Secure port for apiservers
SECURE_PORT="6443"

# Cluster container-runtime supported: docker, containerd
# if k8s version >= 1.24, docker is not supported
CONTAINER_RUNTIME="containerd"

# Network plugins supported: calico, flannel, kube-router, cilium, kube-ovn
CLUSTER_NETWORK="calico"

# Service proxy mode of kube-proxy: 'iptables' or 'ipvs'
PROXY_MODE="ipvs"

# K8S Service CIDR, not overlap with node(host) networking
SERVICE_CIDR="10.100.0.0/16"

# Cluster CIDR (Pod CIDR), not overlap with node(host) networking
CLUSTER_CIDR="10.200.0.0/16"

# NodePort Range
NODE_PORT_RANGE="30000-62767"

# Cluster DNS Domain
CLUSTER_DNS_DOMAIN="cluster.local"

# -------- Additional Variables (don't change the default value right now) ---
# Binaries Directory
bin_dir="/usr/local/bin"

# Deploy Directory (kubeasz workspace)
base_dir="/etc/kubeasz"

# Directory for a specific cluster
cluster_dir="{{ base_dir }}/clusters/k8s-01"

# CA and other components cert/key Directory
ca_dir="/etc/kubernetes/ssl"

# Default 'k8s_nodename' is empty
k8s_nodename=''

root@deploy:/etc/kubeasz# cat clusters/k8s-01/config.yml 
############################
# prepare
############################
# 可选离线安装系统软件包 (offline|online)
INSTALL_SOURCE: "online"

# 可选进行系统安全加固 github.com/dev-sec/ansible-collection-hardening
OS_HARDEN: false


############################
# role:deploy
############################
# default: ca will expire in 100 years
# default: certs issued by the ca will expire in 50 years
CA_EXPIRY: "876000h"
CERT_EXPIRY: "438000h"

# force to recreate CA and other certs, not suggested to set 'true'
CHANGE_CA: false

# kubeconfig 配置参数
CLUSTER_NAME: "cluster1"
CONTEXT_NAME: "context-{{ CLUSTER_NAME }}"

# k8s version
K8S_VER: "1.26.1"

# set unique 'k8s_nodename' for each node, if not set(default:'') ip add will be used
# CAUTION: 'k8s_nodename' must consist of lower case alphanumeric characters, '-' or '.',
# and must start and end with an alphanumeric character (e.g. 'example.com'),
# regex used for validation is '[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*'
K8S_NODENAME: "{%- if k8s_nodename != '' -%} \
                    {{ k8s_nodename|replace('_', '-')|lower }} \
               {%- else -%} \
                    {{ inventory_hostname }} \
               {%- endif -%}"

############################
# role:etcd
############################
# 设置不同的wal目录，可以避免磁盘io竞争，提高性能
ETCD_DATA_DIR: "/var/lib/etcd"
ETCD_WAL_DIR: ""


############################
# role:runtime [containerd,docker]
############################
# ------------------------------------------- containerd
# [.]启用容器仓库镜像
ENABLE_MIRROR_REGISTRY: true

# [containerd]基础容器镜像
SANDBOX_IMAGE: "harbor.20180622.xyz/baseimages/pause:3.9"

# [containerd]容器持久化存储目录
CONTAINERD_STORAGE_DIR: "/var/lib/containerd"

# ------------------------------------------- docker
# [docker]容器存储目录
DOCKER_STORAGE_DIR: "/var/lib/docker"

# [docker]开启Restful API
ENABLE_REMOTE_API: false

# [docker]信任的HTTP仓库
INSECURE_REG: '["http://easzlab.io.local:5000"]'


############################
# role:kube-master
############################
# k8s 集群 master 节点证书配置，可以添加多个ip和域名（比如增加公网ip和域名）
MASTER_CERT_HOSTS:
  - "10.0.6.188"
  - "kubeapi.20180622.xyz"
  #- "www.test.com"

# node 节点上 pod 网段掩码长度（决定每个节点最多能分配的pod ip地址）
# 如果flannel 使用 --kube-subnet-mgr 参数，那么它将读取该设置为每个节点分配pod网段
# https://github.com/coreos/flannel/issues/847
NODE_CIDR_LEN: 24


############################
# role:kube-node
############################
# Kubelet 根目录
KUBELET_ROOT_DIR: "/var/lib/kubelet"

# node节点最大pod 数
MAX_PODS: 200

# 配置为kube组件（kubelet,kube-proxy,dockerd等）预留的资源量
# 数值设置详见templates/kubelet-config.yaml.j2
KUBE_RESERVED_ENABLED: "no"

# k8s 官方不建议草率开启 system-reserved, 除非你基于长期监控，了解系统的资源占用状况；
# 并且随着系统运行时间，需要适当增加资源预留，数值设置详见templates/kubelet-config.yaml.j2
# 系统预留设置基于 4c/8g 虚机，最小化安装系统服务，如果使用高性能物理机可以适当增加预留
# 另外，集群安装时候apiserver等资源占用会短时较大，建议至少预留1g内存
SYS_RESERVED_ENABLED: "no"


############################
# role:network [flannel,calico,cilium,kube-ovn,kube-router]
############################
# ------------------------------------------- flannel
# [flannel]设置flannel 后端"host-gw","vxlan"等
FLANNEL_BACKEND: "vxlan"
DIRECT_ROUTING: false

# [flannel] 
flannel_ver: "v0.19.2"

# ------------------------------------------- calico
# [calico] IPIP隧道模式可选项有: [Always, CrossSubnet, Never],跨子网可以配置为Always与CrossSubnet(公有云建议使用always比较省事，其他的话需要修改各自公有云的网络配置，具体可以参考各个公有云说明)
# 其次CrossSubnet为隧道+BGP路由混合模式可以提升网络性能，同子网配置为Never即可.
CALICO_IPV4POOL_IPIP: "Always"

# [calico]设置 calico-node使用的host IP，bgp邻居通过该地址建立，可手工指定也可以自动发现
IP_AUTODETECTION_METHOD: "can-reach={{ groups['kube_master'][0] }}"

# [calico]设置calico 网络 backend: brid, vxlan, none
CALICO_NETWORKING_BACKEND: "brid"

# [calico]设置calico 是否使用route reflectors
# 如果集群规模超过50个节点，建议启用该特性
CALICO_RR_ENABLED: false

# CALICO_RR_NODES 配置route reflectors的节点，如果未设置默认使用集群master节点 
# CALICO_RR_NODES: ["192.168.1.1", "192.168.1.2"]
CALICO_RR_NODES: []

# [calico]更新支持calico 版本: ["3.19", "3.23"]
calico_ver: "v3.24.5"

# [calico]calico 主版本
calico_ver_main: "{{ calico_ver.split('.')[0] }}.{{ calico_ver.split('.')[1] }}"

# ------------------------------------------- cilium
# [cilium]镜像版本
cilium_ver: "1.12.4"
cilium_connectivity_check: true
cilium_hubble_enabled: false
cilium_hubble_ui_enabled: false

# ------------------------------------------- kube-ovn
# [kube-ovn]选择 OVN DB and OVN Control Plane 节点，默认为第一个master节点
OVN_DB_NODE: "{{ groups['kube_master'][0] }}"

# [kube-ovn]离线镜像tar包
kube_ovn_ver: "v1.5.3"

# ------------------------------------------- kube-router
# [kube-router]公有云上存在限制，一般需要始终开启 ipinip；自有环境可以设置为 "subnet"
OVERLAY_TYPE: "full"

# [kube-router]NetworkPolicy 支持开关
FIREWALL_ENABLE: true

# [kube-router]kube-router 镜像版本
kube_router_ver: "v0.3.1"
busybox_ver: "1.28.4"


############################
# role:cluster-addon
############################
# coredns 自动安装
dns_install: "no"
corednsVer: "1.9.3"
ENABLE_LOCAL_DNS_CACHE: false
dnsNodeCacheVer: "1.22.13"
# 设置 local dns cache 地址
LOCAL_DNS_CACHE: "169.254.20.10"

# metric server 自动安装
metricsserver_install: "no"
metricsVer: "v0.5.2"

# dashboard 自动安装
dashboard_install: "no"
dashboardVer: "v2.7.0"
dashboardMetricsScraperVer: "v1.0.8"

# prometheus 自动安装
prom_install: "no"
prom_namespace: "monitor"
prom_chart_ver: "39.11.0"

# nfs-provisioner 自动安装
nfs_provisioner_install: "no"
nfs_provisioner_namespace: "kube-system"
nfs_provisioner_ver: "v4.0.2"
nfs_storage_class: "managed-nfs-storage"
nfs_server: "192.168.1.10"
nfs_path: "/data/nfs"

# network-check 自动安装
network_check_enabled: false 
network_check_schedule: "*/5 * * * *"

############################
# role:harbor
############################
# harbor version，完整版本号
HARBOR_VER: "v2.6.3"
HARBOR_DOMAIN: "harbor.easzlab.io.local"
HARBOR_PATH: /var/data
HARBOR_TLS_PORT: 8443
HARBOR_REGISTRY: "{{ HARBOR_DOMAIN }}:{{ HARBOR_TLS_PORT }}"

# if set 'false', you need to put certs named harbor.pem and harbor-key.pem in directory 'down'
HARBOR_SELF_SIGNED_CERT: true

# install extra component
HARBOR_WITH_NOTARY: false
HARBOR_WITH_TRIVY: false
HARBOR_WITH_CHARTMUSEUM: true

2.4.2.1 准备工作

root@deploy:/etc/kubeasz# ./ezctl setup k8s-01 01

2.4.2.2 etcd的初始化

root@deploy:/etc/kubeasz# ./ezctl setup k8s-01 02
验证etcd服务状态
root@etcd01:~# export NODE_IPS="10.0.6.4 10.0.6.5 10.0.6.6"

root@etcd01:~# for ip in ${NODE_IPS}; do
  ETCDCTL_API=3 /usr/local/bin/etcdctl \
  --endpoints=https://${ip}:2379  \
  --cacert=/etc/kubernetes/ssl/ca.pem \
  --cert=/etc/kubernetes/ssl/etcd.pem \
  --key=/etc/kubernetes/ssl/etcd-key.pem \
  endpoint health; done
  
https://10.0.6.4:2379 is healthy: successfully committed proposal: took = 9.879125ms
https://10.0.6.5:2379 is healthy: successfully committed proposal: took = 21.645162ms
https://10.0.6.6:2379 is healthy: successfully committed proposal: took = 7.334946ms

2.4.2.3 运行时的部署

上传nerdctl的压缩文件到/root下
root@deploy:~# tar -xf nerdctl-1.3.0-linux-amd64.tar.gz -C /etc/kubeasz/bin/containerd-bin/
修改yml配置文件时 注意提前备份

root@deploy:/etc/kubeasz# cat roles/containerd/templates/nerdctl.toml.j2
namespace = "k8s.io"
debug = false
debug_full = false
insecure_registry = true

root@deploy:/etc/kubeasz# cat roles/containerd/tasks/main.yml
- name: 获取是否已经安装containerd
  shell: 'systemctl is-active containerd || echo "NoFound"'
  register: containerd_svc

- name: 分发 harbor 的解析文件
  shell: "echo '10.0.6.9 harbor.20180622.xyz' >> /etc/hosts"

- block:
    - name: 准备containerd相关目录
      file: name={{ item }} state=directory
      with_items:
      - "{{ bin_dir }}"
      - "/etc/containerd"
      - "/etc/nerdctl/" # nerdctl的配置文件 

    - name: 加载内核模块 overlay
      modprobe: name=overlay state=present

    - name: 下载 containerd 二进制文件
      copy: src={{ base_dir }}/bin/containerd-bin/{{ item }} dest={{ bin_dir }}/{{ item }} mode=0755
      with_items:
      - containerd
      - containerd-shim
      - containerd-shim-runc-v1
      - containerd-shim-runc-v2
      - crictl
      - ctr
      - runc
      - nerdctl  #分发nerdctl等工具
      - containerd-rootless-setuptool.sh
      - containerd-rootless.sh
      tags: upgrade

    - name: 添加 crictl 自动补全
      lineinfile:
        dest: ~/.bashrc
        state: present
        regexp: 'crictl completion'
        line: 'source <(crictl completion bash) # generated by kubeasz'
  
    - name: 创建 containerd 配置文件
      template: src=config.toml.j2 dest=/etc/containerd/config.toml
      tags: upgrade

    - name: 创建nerdctl的配置文件
      template: src=nerdctl.toml.j2 dest=/etc/nerdctl/nerdctl.toml
      tags: upgrade

    - name: 创建systemd unit文件
      template: src=containerd.service.j2 dest=/etc/systemd/system/containerd.service
      tags: upgrade

    - name: 创建 crictl 配置
      template: src=crictl.yaml.j2 dest=/etc/crictl.yaml

    - name: 开机启用 containerd 服务
      shell: systemctl enable containerd
      ignore_errors: true

    - name: 开启 containerd 服务
      shell: systemctl daemon-reload && systemctl restart containerd
      tags: upgrade

    - name: 轮询等待containerd服务运行
      shell: "systemctl is-active containerd.service"
      register: containerd_status
      until: '"active" in containerd_status.stdout'
      retries: 8
      delay: 2
      tags: upgrade
  when: "'NoFound' in containerd_svc.stdout"

containerd的配置文件

root@deploy:/etc/kubeasz# cat roles/containerd/templates/config.toml.j2
disabled_plugins = []
imports = []
oom_score = 0
plugin_dir = ""
required_plugins = []
root = "{{ CONTAINERD_STORAGE_DIR }}" 
state = "/run/containerd"
temp = ""
version = 2

[cgroup]
  path = ""

[debug]
  address = ""
  format = ""
  gid = 0
  level = ""
  uid = 0

[grpc]
  address = "/run/containerd/containerd.sock"
  gid = 0
  max_recv_message_size = 16777216
  max_send_message_size = 16777216
  tcp_address = ""
  tcp_tls_ca = ""
  tcp_tls_cert = ""
  tcp_tls_key = ""
  uid = 0

[metrics]
  address = ""
  grpc_histogram = false

[plugins]

  [plugins."io.containerd.gc.v1.scheduler"]
    deletion_threshold = 0
    mutation_threshold = 100
    pause_threshold = 0.02
    schedule_delay = "0s"
    startup_delay = "100ms"

  [plugins."io.containerd.grpc.v1.cri"]
    device_ownership_from_security_context = false
    disable_apparmor = false
    disable_cgroup = false
    disable_hugetlb_controller = true
    disable_proc_mount = false
    disable_tcp_service = true
    enable_selinux = false
    enable_tls_streaming = false
    enable_unprivileged_icmp = false
    enable_unprivileged_ports = false
    ignore_image_defined_volumes = false
    max_concurrent_downloads = 3
    max_container_log_line_size = 16384
    netns_mounts_under_state_dir = false
    restrict_oom_score_adj = false
    sandbox_image = "{{ SANDBOX_IMAGE }}"
    selinux_category_range = 1024
    stats_collect_period = 10
    stream_idle_timeout = "4h0m0s"
    stream_server_address = "127.0.0.1"
    stream_server_port = "0"
    systemd_cgroup = false
    tolerate_missing_hugetlb_controller = true
    unset_seccomp_profile = ""

    [plugins."io.containerd.grpc.v1.cri".cni]
      bin_dir = "{{ bin_dir }}"
      conf_dir = "/etc/cni/net.d"
      conf_template = "/etc/cni/net.d/10-default.conf"
      max_conf_num = 1

    [plugins."io.containerd.grpc.v1.cri".containerd]
      default_runtime_name = "runc"
      disable_snapshot_annotations = true
      discard_unpacked_layers = false
      ignore_rdt_not_enabled_errors = false
      no_pivot = false
      snapshotter = "overlayfs"

      [plugins."io.containerd.grpc.v1.cri".containerd.default_runtime]
        base_runtime_spec = ""
        container_annotations = []
        pod_annotations = []
        privileged_without_host_devices = false
        runtime_engine = ""
        runtime_root = ""
        runtime_type = ""

        [plugins."io.containerd.grpc.v1.cri".containerd.default_runtime.options]

      [plugins."io.containerd.grpc.v1.cri".containerd.runtimes]

        [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc]
          base_runtime_spec = ""
          container_annotations = []
          pod_annotations = []
          privileged_without_host_devices = false
          runtime_engine = ""
          runtime_root = ""
          runtime_type = "io.containerd.runc.v2"

          [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc.options]
            BinaryName = ""
            CriuImagePath = ""
            CriuPath = ""
            CriuWorkPath = ""
            IoGid = 0
            IoUid = 0
            NoNewKeyring = false
            NoPivotRoot = false
            Root = ""
            ShimCgroup = ""
            SystemdCgroup = true

      [plugins."io.containerd.grpc.v1.cri".containerd.untrusted_workload_runtime]
        base_runtime_spec = ""
        container_annotations = []
        pod_annotations = []
        privileged_without_host_devices = false
        runtime_engine = ""
        runtime_root = ""
        runtime_type = ""

        [plugins."io.containerd.grpc.v1.cri".containerd.untrusted_workload_runtime.options]

    [plugins."io.containerd.grpc.v1.cri".image_decryption]
      key_model = "node"

    [plugins."io.containerd.grpc.v1.cri".registry]

      [plugins."io.containerd.grpc.v1.cri".registry.auths]

      [plugins."io.containerd.grpc.v1.cri".registry.configs]
        [plugins."io.containerd.grpc.v1.cri".registry.configs."easzlab.io.local:5000".tls]
          insecure_skip_verify = true

        [plugins."io.containerd.grpc.v1.cri".registry.configs."harbor.20180622.xyz".tls]
          insecure_skip_verify = true
        [plugins."io.containerd.grpc.v1.cri".registry.configs."harbor.20180622.xyz".auth]
          username = "admin"
          password = "123456"

        [plugins."io.containerd.grpc.v1.cri".registry.configs."{{ HARBOR_REGISTRY }}".tls]
          insecure_skip_verify = true

      [plugins."io.containerd.grpc.v1.cri".registry.headers]

      [plugins."io.containerd.grpc.v1.cri".registry.mirrors]
        [plugins."io.containerd.grpc.v1.cri".registry.mirrors."easzlab.io.local:5000"]
          endpoint = ["http://easzlab.io.local:5000"]

        [plugins."io.containerd.grpc.v1.cri".registry.mirrors."harbor.20180622.xyz"]
          endpoint = ["https://harbor.20180622.xyz"]

        [plugins."io.containerd.grpc.v1.cri".registry.mirrors."{{ HARBOR_REGISTRY }}"]
          endpoint = ["https://{{ HARBOR_REGISTRY }}"]
{% if ENABLE_MIRROR_REGISTRY %}
        [plugins."io.containerd.grpc.v1.cri".registry.mirrors."docker.io"]
          endpoint = ["https://docker.nju.edu.cn/", "https://kuamavit.mirror.aliyuncs.com"]
        [plugins."io.containerd.grpc.v1.cri".registry.mirrors."gcr.io"]
          endpoint = ["https://gcr.nju.edu.cn"]
        [plugins."io.containerd.grpc.v1.cri".registry.mirrors."k8s.gcr.io"]
          endpoint = ["https://gcr.nju.edu.cn/google-containers/"]
        [plugins."io.containerd.grpc.v1.cri".registry.mirrors."quay.io"]
          endpoint = ["https://quay.nju.edu.cn"]
        [plugins."io.containerd.grpc.v1.cri".registry.mirrors."ghcr.io"]
          endpoint = ["https://ghcr.nju.edu.cn"]
        [plugins."io.containerd.grpc.v1.cri".registry.mirrors."nvcr.io"]
          endpoint = ["https://ngc.nju.edu.cn"]
{% endif %}

    [plugins."io.containerd.grpc.v1.cri".x509_key_pair_streaming]
      tls_cert_file = ""
      tls_key_file = ""

  [plugins."io.containerd.internal.v1.opt"]
    path = "/opt/containerd"

  [plugins."io.containerd.internal.v1.restart"]
    interval = "10s"

  [plugins."io.containerd.metadata.v1.bolt"]
    content_sharing_policy = "shared"

  [plugins."io.containerd.monitor.v1.cgroups"]
    no_prometheus = false

  [plugins."io.containerd.runtime.v1.linux"]
    no_shim = false
    runtime = "runc"
    runtime_root = ""
    shim = "containerd-shim"
    shim_debug = false

  [plugins."io.containerd.runtime.v2.task"]
    platforms = ["linux/amd64"]

  [plugins."io.containerd.service.v1.diff-service"]
    default = ["walking"]

  [plugins."io.containerd.snapshotter.v1.aufs"]
    root_path = ""

  [plugins."io.containerd.snapshotter.v1.btrfs"]
    root_path = ""

  [plugins."io.containerd.snapshotter.v1.devmapper"]
    async_remove = false
    base_image_size = ""
    pool_name = ""
    root_path = ""

  [plugins."io.containerd.snapshotter.v1.native"]
    root_path = ""

  [plugins."io.containerd.snapshotter.v1.overlayfs"]
    root_path = ""

  [plugins."io.containerd.snapshotter.v1.zfs"]
    root_path = ""

[proxy_plugins]

[stream_processors]

  [stream_processors."io.containerd.ocicrypt.decoder.v1.tar"]
    accepts = ["application/vnd.oci.image.layer.v1.tar+encrypted"]
    args = ["--decryption-keys-path", "/etc/containerd/ocicrypt/keys"]
    env = ["OCICRYPT_KEYPROVIDER_CONFIG=/etc/containerd/ocicrypt/ocicrypt_keyprovider.conf"]
    path = "ctd-decoder"
    returns = "application/vnd.oci.image.layer.v1.tar"

  [stream_processors."io.containerd.ocicrypt.decoder.v1.tar.gzip"]
    accepts = ["application/vnd.oci.image.layer.v1.tar+gzip+encrypted"]
    args = ["--decryption-keys-path", "/etc/containerd/ocicrypt/keys"]
    env = ["OCICRYPT_KEYPROVIDER_CONFIG=/etc/containerd/ocicrypt/ocicrypt_keyprovider.conf"]
    path = "ctd-decoder"
    returns = "application/vnd.oci.image.layer.v1.tar+gzip"

[timeouts]
  "io.containerd.timeout.shim.cleanup" = "5s"
  "io.containerd.timeout.shim.load" = "5s"
  "io.containerd.timeout.shim.shutdown" = "3s"
  "io.containerd.timeout.task.state" = "2s"

[ttrpc]
  address = ""
  gid = 0
  uid = 0

root@deploy:/etc/kubeasz# ./ezctl setup k8s-01 03

2.4.2.4 master的部署

root@deploy:/etc/kubeasz# ./ezctl setup k8s-01 04

2.4.2.5 node的部署

root@deploy:/etc/kubeasz# ./ezctl setup k8s-01 05

2.4.2.6 calico的部署

注意calico的yml文件中的下面的变量
- name: CALICO_IPV4POOL_CIDR
  value: "{{ CLUSTER_CIDR }}"

root@deploy:/etc/kubeasz/bin# docker tag easzlab.io.local:5000/calico/cni:v3.24.5 harbor.20180622.xyz/baseimages/calico-cni:v3.24.5
docker tag easzlab.io.local:5000/calico/node:v3.24.5 harbor.20180622.xyz/baseimages/calico-node:v3.24.5
docker tag easzlab.io.local:5000/calico/kube-controllers:v3.24.5 harbor.20180622.xyz/baseimages/calico-kube-controllers:v3.24.5

docker push harbor.20180622.xyz/baseimages/calico-cni:v3.24.5
docker push harbor.20180622.xyz/baseimages/calico-node:v3.24.5
docker push harbor.20180622.xyz/baseimages/calico-kube-controllers:v3.24.5

root@deploy:/etc/kubeasz# grep -i image roles/calico/templates/calico-v3.24.yaml.j2
          image: harbor.20180622.xyz/baseimages/calico-cni:v3.24.5 
          imagePullPolicy: IfNotPresent
          image: harbor.20180622.xyz/baseimages/calico-node:v3.24.5 
          imagePullPolicy: IfNotPresent
          image: harbor.20180622.xyz/baseimages/calico-node:v3.24.5 
          imagePullPolicy: IfNotPresent
          image: harbor.20180622.xyz/baseimages/calico-kube-controllers:v3.24.5 
          imagePullPolicy: IfNotPresent

root@deploy:/etc/kubeasz# ./ezctl setup k8s-01 06

root@master01:~# calicoctl node status 
Calico process is running.

IPv4 BGP status
+--------------+-------------------+-------+----------+-------------+
| PEER ADDRESS |     PEER TYPE     | STATE |  SINCE   |    INFO     |
+--------------+-------------------+-------+----------+-------------+
| 10.0.6.2     | node-to-node mesh | up    | 04:15:07 | Established |
| 10.0.6.11    | node-to-node mesh | up    | 04:15:07 | Established |
| 10.0.6.12    | node-to-node mesh | up    | 04:15:10 | Established |
+--------------+-------------------+-------+----------+-------------+

IPv6 BGP status
No IPv6 peers found.

复制config文件到master节点，用于kubectl命令的执行

root@deploy:/etc/kubeasz# scp /root/.kube/config 10.0.6.1:/root/.kube

修改 config 中的server 的地址为负载均衡的地址，用于高可用

测试通信

root@master01:~# kubectl run net-test --image=centos:7.9.2009 sleep 10000
root@master01:~# kubectl exec -it net-test -- bash

2.4.2.7 node扩节点

root@deploy:/etc/kubeasz# ./ezctl add-node k8s-01 10.0.6.13

2.4.2.8 master扩节点

在添加master节点时，首次网络更新源失败，导致在再次添加报 master节点已存在
需要在hosts文件中删除 master中的ip地址 
root@deploy:/etc/kubeasz# ./ezctl add-master k8s-01 10.0.6.3
root@deploy:/etc/kubeasz# kubectl get node -A 
NAME        STATUS                     ROLES    AGE     VERSION
10.0.6.1    Ready,SchedulingDisabled   master   4h13m   v1.26.1
10.0.6.11   Ready                      node     4h7m    v1.26.1
10.0.6.12   Ready                      node     4h7m    v1.26.1
10.0.6.13   Ready                      node     27m     v1.26.1
10.0.6.2    Ready,SchedulingDisabled   master   4h13m   v1.26.1
10.0.6.3    Ready,SchedulingDisabled   master   115s    v1.26.1

root@master03:~# calicoctl node status 
Calico process is running.

IPv4 BGP status
+--------------+-------------------+-------+----------+-------------+
| PEER ADDRESS |     PEER TYPE     | STATE |  SINCE   |    INFO     |
+--------------+-------------------+-------+----------+-------------+
| 10.0.6.1     | node-to-node mesh | up    | 07:47:55 | Established |
| 10.0.6.2     | node-to-node mesh | up    | 07:48:16 | Established |
| 10.0.6.11    | node-to-node mesh | up    | 07:47:43 | Established |
| 10.0.6.12    | node-to-node mesh | up    | 07:47:34 | Established |
| 10.0.6.13    | node-to-node mesh | up    | 07:48:04 | Established |
+--------------+-------------------+-------+----------+-------------+

IPv6 BGP status
No IPv6 peers found.

2.5 k8s版本升级

上传升级包

kubernetes-client-linux-amd64.tar.gz kubernetes-node-linux-amd64.tar.gz kubernetes-server-linux-amd64.tar.gz kubernetes.tar.gz

root@deploy:/etc/kubeasz# tar -xf /root/kubernetes-client-linux-amd64.tar.gz -C /usr/local/bin/
root@deploy:/etc/kubeasz# tar -xf /root/kubernetes-node-linux-amd64.tar.gz -C /usr/local/bin/
root@deploy:/etc/kubeasz# tar -xf /root/kubernetes-server-linux-amd64.tar.gz -C /usr/local/bin/
root@deploy:/etc/kubeasz# tar -xf /root/kubernetes.tar.gz -C /usr/local/bin/
root@deploy:/usr/local/bin/kubernetes# cd /usr/local/bin/kubernetes/server/bin/
root@deploy:/usr/local/bin/kubernetes/server/bin# \cp kube-apiserver kube-controller-manager kube-scheduler kubelet kube-proxy kubectl /etc/kubeasz/bin/
验证版本
root@deploy:/usr/local/bin/kubernetes/server/bin# /etc/kubeasz/bin/kube-apiserver --version 
Kubernetes v1.26.4

执行批量升级操作
root@deploy:/etc/kubeasz# ./ezctl upgrade k8s-01

root@deploy:/etc/kubeasz# kubectl get node -A 
NAME        STATUS                     ROLES    AGE     VERSION
10.0.6.1    Ready,SchedulingDisabled   master   4h30m   v1.26.4
10.0.6.11   Ready                      node     4h24m   v1.26.4
10.0.6.12   Ready                      node     4h24m   v1.26.4
10.0.6.13   Ready                      node     44m     v1.26.4
10.0.6.2    Ready,SchedulingDisabled   master   4h30m   v1.26.4
10.0.6.3    Ready,SchedulingDisabled   master   19m     v1.26.4
上面命令显示的是 kubelet 的版本

设置不可调度

kubectl cordon 节点名

取消不可调度

kubectl uncordon 节点名

驱逐节点上的pod

kubectl drain

2.6 coredns的部署

root@master01:~/20230416-cases/1.coredns# cat coredns-v1.9.4.yaml
# __MACHINE_GENERATED_WARNING__

apiVersion: v1
kind: ServiceAccount
metadata:
  name: coredns
  namespace: kube-system
  labels:
      kubernetes.io/cluster-service: "true"
      addonmanager.kubernetes.io/mode: Reconcile
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
  labels:
    kubernetes.io/bootstrapping: rbac-defaults
    addonmanager.kubernetes.io/mode: Reconcile
  name: system:coredns
rules:
- apiGroups:
  - ""
  resources:
  - endpoints
  - services
  - pods
  - namespaces
  verbs:
  - list
  - watch
- apiGroups:
  - ""
  resources:
  - nodes
  verbs:
  - get
- apiGroups:
  - discovery.k8s.io
  resources:
  - endpointslices
  verbs:
  - list
  - watch
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
  annotations:
    rbac.authorization.kubernetes.io/autoupdate: "true"
  labels:
    kubernetes.io/bootstrapping: rbac-defaults
    addonmanager.kubernetes.io/mode: EnsureExists
  name: system:coredns
roleRef:
  apiGroup: rbac.authorization.k8s.io
  kind: ClusterRole
  name: system:coredns
subjects:
- kind: ServiceAccount
  name: coredns
  namespace: kube-system
---
apiVersion: v1
kind: ConfigMap
metadata:
  name: coredns
  namespace: kube-system
  labels:
      addonmanager.kubernetes.io/mode: EnsureExists
data:
  Corefile: |
    .:53 {
        errors
        health {
            lameduck 5s
        }
        ready
        kubernetes cluster.local in-addr.arpa ip6.arpa {
            pods insecure
            fallthrough in-addr.arpa ip6.arpa
            ttl 30
        }
        prometheus :9153
        #forward . /etc/resolv.conf {
        forward . 223.6.6.6 {
            max_concurrent 1000
        }
        cache 600
        loop
        reload
        loadbalance
    }
        myserver.online {
          forward . 172.16.16.16:53
        }

---
apiVersion: apps/v1
kind: Deployment
metadata:
  name: coredns
  namespace: kube-system
  labels:
    k8s-app: kube-dns
    kubernetes.io/cluster-service: "true"
    addonmanager.kubernetes.io/mode: Reconcile
    kubernetes.io/name: "CoreDNS"
spec:
  # replicas: not specified here:
  # 1. In order to make Addon Manager do not reconcile this replicas parameter.
  # 2. Default is 1.
  # 3. Will be tuned in real time if DNS horizontal auto-scaling is turned on.
  replicas: 2
  strategy:
    type: RollingUpdate
    rollingUpdate:
      maxUnavailable: 1
  selector:
    matchLabels:
      k8s-app: kube-dns
  template:
    metadata:
      labels:
        k8s-app: kube-dns
    spec:
      securityContext:
        seccompProfile:
          type: RuntimeDefault
      priorityClassName: system-cluster-critical
      serviceAccountName: coredns
      affinity:
        podAntiAffinity:
          preferredDuringSchedulingIgnoredDuringExecution:
          - weight: 100
            podAffinityTerm:
              labelSelector:
                matchExpressions:
                  - key: k8s-app
                    operator: In
                    values: ["kube-dns"]
              topologyKey: kubernetes.io/hostname
      tolerations:
        - key: "CriticalAddonsOnly"
          operator: "Exists"
      nodeSelector:
        kubernetes.io/os: linux
      containers:
      - name: coredns
        image: harbor.20180622.xyz/baseimages/coredns:1.9.4 
        imagePullPolicy: IfNotPresent
        resources:
          limits:
            memory: 256Mi 
            cpu: 200m
          requests:
            cpu: 100m
            memory: 70Mi
        args: [ "-conf", "/etc/coredns/Corefile" ]
        volumeMounts:
        - name: config-volume
          mountPath: /etc/coredns
          readOnly: true
        ports:
        - containerPort: 53
          name: dns
          protocol: UDP
        - containerPort: 53
          name: dns-tcp
          protocol: TCP
        - containerPort: 9153
          name: metrics
          protocol: TCP
        livenessProbe:
          httpGet:
            path: /health
            port: 8080
            scheme: HTTP
          initialDelaySeconds: 60
          timeoutSeconds: 5
          successThreshold: 1
          failureThreshold: 5
        readinessProbe:
          httpGet:
            path: /ready
            port: 8181
            scheme: HTTP
        securityContext:
          allowPrivilegeEscalation: false
          capabilities:
            add:
            - NET_BIND_SERVICE
            drop:
            - all
          readOnlyRootFilesystem: true
      dnsPolicy: Default
      volumes:
        - name: config-volume
          configMap:
            name: coredns
            items:
            - key: Corefile
              path: Corefile
---
apiVersion: v1
kind: Service
metadata:
  name: kube-dns
  namespace: kube-system
  annotations:
    prometheus.io/port: "9153"
    prometheus.io/scrape: "true"
  labels:
    k8s-app: kube-dns
    kubernetes.io/cluster-service: "true"
    addonmanager.kubernetes.io/mode: Reconcile
    kubernetes.io/name: "CoreDNS"
spec:
  selector:
    k8s-app: kube-dns
  clusterIP: 10.100.0.2
  ports:
  - name: dns
    port: 53
    protocol: UDP
  - name: dns-tcp
    port: 53
    protocol: TCP
  - name: metrics
    port: 9153
    protocol: TCP

测试

root@master01:~/20230416-cases/1.coredns# kubectl exec -it net-test -- bash 
[root@net-test /]# ping 223.5.5.5
PING 223.5.5.5 (223.5.5.5) 56(84) bytes of data.
64 bytes from 223.5.5.5: icmp_seq=1 ttl=127 time=83.3 ms
^C
--- 223.5.5.5 ping statistics ---
1 packets transmitted, 1 received, 0% packet loss, time 0ms
rtt min/avg/max/mdev = 83.352/83.352/83.352/0.000 ms
[root@net-test /]# ping www.baidu.com
PING www.a.shifen.com (180.101.50.188) 56(84) bytes of data.
64 bytes from 180.101.50.188 (180.101.50.188): icmp_seq=1 ttl=127 time=59.2 ms
64 bytes from 180.101.50.188 (180.101.50.188): icmp_seq=2 ttl=127 time=56.9 ms

部署dashboard

生成kubeconfig文件用于登录

kuboard 的安装

root@harbor:~# apt install nfs-server -y

root@harbor:~# cat /etc/exports
# /etc/exports: the access control list for filesystems which may be exported
# to NFS clients. See exports(5).
#
# Example for NFSv2 and NFSv3:
# /srv/homes hostname1(rw,sync,no_subtree_check) hostname2(ro,sync,no_subtree_check)
#
# Example for NFSv4:
# /srv/nfs4 gss/krb5i(rw,sync,fsid=0,crossmnt,no_subtree_check)
# /srv/nfs4/homes gss/krb5i(rw,sync,no_subtree_check)
/data/k8sdata/kuboard *(rw,no_root_squash,no_subtree_check)

root@harbor:~# mkdir -p /data/k8sdata/kuboard
root@harbor:~# exportfs -av
exporting *:/data/k8sdata/kuboard

修改yml文件中的 nfs的地址，保存

root@master01:~/20230416-cases/3.kuboard# kubectl apply -f kuboard-all-in-one.yaml