Ansible-K8S 二进制安装

Ansible-K8S 二进制安装

1. k8s集群环境搭建规划信息

1.1 单master环境

1.2 多master环境

1.3 服务器统计

类型 服务器IP地址 备注
Ansible(2台) K8s集群部署服务器,可以和其他服务器混用
K8S Master(3台) K8s控制端,通过一个VIP做主备高可用
Harbor(2台) 高可用镜像服务器
Etcd(3台) 保存K8s集群数据的服务器
Haproxy(2台) 高可用etcd代理服务器
Node节点(2-N台) 真正运行容器的服务器,高可用环境至少两台

1.4 服务器准备

服务器可以是私有云的虚拟机或物理机,也可以是公有云环境的虚拟机环境,如果是公司托管IDC环境,可以直接将harbor和node节点部署在物理机环境,master节点、etcd节点、负载均衡等可以是虚拟机

主机名 IP地址 备注
k8s-master1 192.168.1.70 高可用master节点
k8s-master2 192.168.1.71
k8s-ha1(deploy-harbor) 192.168.1.72 负载均衡高可用 VIP 192.168.1.100
k8s-etcd1 192.168.1.73 注册中心高可用
k8s-etcd2 192.168.1.74
k8s-etcd3 192.168.1.75
k8s-node1 192.168.1.76 k8s节点

1.5 系统配置

VIP:192.168.1.69:6443   #在负载均衡上配置
操作系统:ubuntu 20.04
k8s版本: 1.24.x
calico: 3.4.4

1.6 基础环境准备 ubuntu

  • 配置源
cat > /etc/apt/sources.list <<'EOF'
deb http://mirrors.aliyun.com/ubuntu/ focal main restricted   #优先检索focal的main和restricted组件。
deb http://mirrors.aliyun.com/ubuntu/ focal-updates main restricted
deb http://mirrors.aliyun.com/ubuntu/ focal universe
deb http://mirrors.aliyun.com/ubuntu/ focal-updates universe
deb http://mirrors.aliyun.com/ubuntu/ focal multiverse
deb http://mirrors.aliyun.com/ubuntu/ focal-updates multiverse
deb http://mirrors.aliyun.com/ubuntu/ focal-backports main restricted universe multiverse
deb http://mirrors.aliyun.com/ubuntu focal-security main restricted
deb http://mirrors.aliyun.com/ubuntu focal-security universe
deb http://mirrors.aliyun.com/ubuntu focal-security multiverse
EOF
apt-get update
apt-get update
  • 系统主机名
hostnamectl set-hostname xxxx
  • IP地址配置
vim /etc/netplan/00-installer-config.yaml
  • 系统参数优化
cat <<EOF | sudo tee /etc/modules-load.d/k8s.conf
overlay
br_netfilter
EOF

sudo modprobe overlay
sudo modprobe br_netfilter

# 设置所需的 sysctl 参数,参数在重新启动后保持不变
cat <<EOF | sudo tee /etc/sysctl.d/k8s.conf
net.bridge.bridge-nf-call-iptables  = 1
net.bridge.bridge-nf-call-ip6tables = 1
net.ipv4.ip_forward                 = 1
EOF

# 应用 sysctl 参数而不重新启动
sudo sysctl --system
  • 关闭交换分区
swapoff -a
sed -ri 's/.*swap.*/#&/' /etc/fstab
  • 关闭selinux
setenforce 0
ufw disable
  • 时间同步
cp /usr/share/zoneinfo/Asia/Shanghai /etc/localtime
date -R
  • 主机映射
cat > /etc/hosts <<'EOF'
192.168.1.70 k8s-master1
192.168.1.71 k8s-master2
192.168.1.72 k8s-ha1 harbor.nbrhce.com
192.168.1.73 k8s-etcd1
192.168.1.74 k8s-etcd2
192.168.1.75 k8s-etcd3
192.168.1.76 k8s-node1
EOF

2. 负载均衡部署

2.1 keepalived部署

root@k8s-ha1:~#  apt install keepalived -y

#找方法
root@k8s-ha1:~#  find / -name keepalived*
#找到这个文件更改配置即可
/usr/share/doc/keepalived/samples/keepalived.conf.vrrp

#参考模板
root@k8s-ha1:~#  cp /usr/share/doc/keepalived/samples/keepalived.conf.vrrp /etc/keepalived/keepalived.conf
  • 配置文件
#主节点
root@k8s-ha1:~# vim /etc/keepalived/keepalived.conf
vrrp_instance VI_1 {
    state MASTER
    interface eth0
    virtual_router_id 51
    priority 100
    advert_int 3

    authentication {
        auth_type PASS
        auth_pass 123abc
}
    virtual_ipaddress {
        192.168.1.72 dev eth0 label eth0:1
    }
}

#备节点
root@localhost:~# cat /etc/keepalived/keepalived.conf
vrrp_instance VI_1 {
    state BACKUP
    interface eth0
    virtual_router_id 51
    priority 50
    advert_int 3

    authentication {
        auth_type PASS
        auth_pass 123abc
}
    virtual_ipaddress {
        192.168.1.73 dev eth0 label eth0:1
    }
}


#启动
systemctl start keepalived.service 
systemctl enable keepalived.service 

#检查是否出现VIP
root@k8s-ha1:~# ifconfig
eth0: flags=4163<UP,BROADCAST,RUNNING,MULTICAST>  mtu 1500
        inet 192.168.1.73  netmask 255.255.255.0  broadcast 192.168.1.255
        inet6 fe80::20c:29ff:feab:bfb7  prefixlen 64  scopeid 0x20<link>
        ether 00:0c:29:ab:bf:b7  txqueuelen 1000  (Ethernet)
        RX packets 492885  bytes 250991107 (250.9 MB)
        RX errors 0  dropped 0  overruns 0  frame 0
        TX packets 387918  bytes 23556907 (23.5 MB)
        TX errors 0  dropped 0 overruns 0  carrier 0  collisions 0

eth0:1: flags=4163<UP,BROADCAST,RUNNING,MULTICAST>  mtu 1500
        inet 192.168.1.72  netmask 255.255.255.255  broadcast 0.0.0.0
        ether 00:0c:29:ab:bf:b7  txqueuelen 1000  (Ethernet)

lo: flags=73<UP,LOOPBACK,RUNNING>  mtu 65536
        inet 127.0.0.1  netmask 255.0.0.0
        inet6 ::1  prefixlen 128  scopeid 0x10<host>
        loop  txqueuelen 1000  (Local Loopback)
        RX packets 182  bytes 15942 (15.9 KB)
        RX errors 0  dropped 0  overruns 0  frame 0
        TX packets 182  bytes 15942 (15.9 KB)
        TX errors 0  dropped 0 overruns 0  carrier 0  collisions 0

2.2 haproxy部署

root@k8s-ha1:~# apt install -y haproxy
  • 配置文件
#主备服务器配置一样,当ha1服务器宕机了 VIP会跑到ha2上面 但是有瑕疵
root@k8s-ha1:~# vim /etc/haproxy/haproxy.cfg
..略
listen k8s_api_nodes_6443
#这里绑定的是VIP地址加端口
        bind 192.168.1.100:6443
        mode tcp
        #地址池
        server 192.168.1.70 192.168.1.70:6443 check inter 2000 fall 3 rise 5
        server 192.168.1.71 192.168.1.71:6443 check inter 2000 fall 3 rise 5

#启动
systemctl restart haproxy.service
systemctl enable haproxy.service 

#检查
已经监听了6443端口

root@k8s-ha1:~# ss -lntup
Netid    State     Recv-Q    Send-Q       Local Address:Port       Peer Address:Port   Process                                       
udp      UNCONN    0         0            127.0.0.53%lo:53              0.0.0.0:*       users:(("systemd-resolve",pid=790,fd=12))    
tcp      LISTEN    0         262124        192.168.1.69:6443            0.0.0.0:*       users:(("haproxy",pid=6349,fd=7))            
tcp      LISTEN    0         4096         127.0.0.53%lo:53              0.0.0.0:*       users:(("systemd-resolve",pid=790,fd=13))    
tcp      LISTEN    0         128                0.0.0.0:22              0.0.0.0:*       users:(("sshd",pid=854,fd=3))                
tcp      LISTEN    0         128              127.0.0.1:6010            0.0.0.0:*       users:(("sshd",pid=1151,fd=11))              
tcp      LISTEN    0         128                   [::]:22                 [::]:*       users:(("sshd",pid=854,fd=4))                
tcp      LISTEN    0         128                  [::1]:6010               [::]:*       users:(("sshd",pid=1151,fd=10)) 

#有个缺点这里少一个脚本 来监控haproxy 功能是只要haproxy宕机了就不要让他运行了

3. harbor部署

  • 由于我机器资源不够harbor服务与ansible在一台机器上

kubernetes master节点和node节点使用containerd 可以提前安装或使用部署工具安装,在harbor节点安装docker,后续需要部署harbor

3.1 Harbor部署

#我这里直接使用脚本部署了或者自行安装docker大于1.19版本
root@k8s-harbor1:~# tar xf docker-list.tar.gz
root@k8s-harbor1:~# bash docker-install.sh
root@k8s-harbor1:~# docker version
Client: Docker Engine - Community
 Version:           19.03.15
 API version:       1.40
 Go version:        go1.13.15
 Git commit:        99e3ed8
 Built:             Sat Jan 30 03:11:43 2021
 OS/Arch:           linux/amd64
 Experimental:      false

Server: Docker Engine - Community
 Engine:
  Version:          19.03.15
  API version:      1.40 (minimum version 1.12)
  Go version:       go1.13.15
  Git commit:       99e3ed8
  Built:            Sat Jan 30 03:18:13 2021
  OS/Arch:          linux/amd64
  Experimental:     false
 containerd:
  Version:          v1.3.9
  GitCommit:        ea765aba0d05254012b0b9e595e995c09186427f
 runc:
  Version:          1.0.0-rc10
  GitCommit:        dc9208a3303feef5b3839f4323d9beb36df0a9dd
 docker-init:
  Version:          0.18.0
  GitCommit:        fec3683

3.2 Harbor配置https自签证书

业务镜像将统一上传到Harbor的服务器并实现镜像分发,不用通过互联网在线下载公网镜像,提高镜像分发效率及数据安全性

root@k8s-harbor1:~# mkdir /apps/certs -p 

#生成CA证书私钥
root@k8s-harbor1:/apps/certs# openssl genrsa -out ca.key 4096

#根据上面生成的CA证书私钥,再来生成CA证书 ca.crt 生成 CA 证书
root@k8s-harbor1:/apps/certs# openssl req -x509 -new -nodes -sha512 -days 3650 \
            -subj "/C=CN/ST=Beijing/L=Beijing/O=example/OU=Personal/CN=harbor.nbrhce.com" \
            -key ca.key \
            -out ca.crt
#转成一个客户端证书
root@k8s-harbor1:/apps/certs#  openssl genrsa -out harbor.nbrhce.com.key 4096

#生成证书签名请求(CSR)yourdomain.com.csr。生成证书签名请求 (CSR)
root@k8s-harbor1:/apps/certs# openssl req -sha512 -new \
            -subj "/C=CN/ST=Beijing/L=Beijing/O=example/OU=Personal/CN=harbor.nbrhce.com" \
            -key harbor.nbrhce.com.key \
            -out harbor.nbrhce.com.csr

#生成一个 x509 v3扩展文件。生成 x509 v3 扩展文件
root@k8s-harbor1:/apps/certs# cat > v3.ext << 'EOF'
            authorityKeyIdentifier=keyid,issuer
            basicConstraints=CA:FALSE
            keyUsage = digitalSignature, nonRepudiation, keyEncipherment, dataEncipherment
            extendedKeyUsage = serverAuth
            subjectAltName = @alt_names

#写一个对应的域名即可 下面两个随便添
            [alt_names]
            DNS.1=harbor.nbrhce.com
            DNS.2=harbor.nbrhce.net
            DNS.3=harbor.nbrhce.local
EOF

# 使用该文件为您的Harbor主机生成证书 harbor.nbrhce.com.cert使用该文件为您的 Harbor 主机生成证书 改成对应域名为.cert结尾即可
root@k8s-harbor1:/apps/certs# openssl x509 -req -sha512 -days 3650 \
            -extfile v3.ext \
            -CA ca.crt -CAkey  ca.key -CAcreateserial \
            -in harbor.nbrhce.com.csr \
            -out harbor.nbrhce.com.crt
            
#在生成一个客户端证书留着用
root@k8s-harbor1:/apps/certs# openssl x509 -inform PEM -in harbor.nbrhce.com.key -out harbor.nbrhce.com.cert

3.3 配置harbor证书信任

./install.sh  --with-trivy --with-chartmuseum 

#配置证书信任 必须在这个路径下创建一个目录对应你所使用的域名 否则对应的节点与容器识别不到你的域名
root@k8s-harbor1:/apps/certs#  mkdir /etc/docker/certs.d/harbor.nbrhce.com -p

#把生成的证书放在这个路径下 我就全部都拷贝过来了 或者拷贝一个cert也行
root@k8s-ha-deploy:/apps/certs# cp harbor.nbrhce.com.crt /etc/docker/certs.d/harbor.nbrhce.com/
 root@k8s-ha-deploy:/apps/certs# cp ca.crt /etc/docker/certs.d/harbor.nbrhce.com/
root@k8s-ha-deploy:/apps/certs# cp ca.key /etc/docker/certs.d/harbor.nbrhce.com/
 
#在这个文件中启动服务这块 加上对应访问的域名 与加上harbor主机IP地址与5000端口 这个端口是容器内部代理,你是在宿主机上是看不到的
 
root@k8s-ha-deploy:/apps/certs# vim /lib/systemd/system/docker.service
 ExecStart=/usr/bin/dockerd -H fd://  --insecure-registry=192.168.1.72:5000 --insecure-registry=harbor.nbrhce.com  --containerd=/run/containerd/containerd.sock

更改完必要忘记重启&&重启完docker你会发现harbor也会宕机 你也需要重新启动一下
systemctl daemon-reload
systemctl restart docker
docker-compose restart  或者 docker-compsoe up -d 

#测试 拉取镜像并上传到私有库 先登录 这个用户是harbor内部用户 你先需要去harbor里创建一个用户并给他授权成管理员 这样这个用户就有权限上传镜像了
root@k8s-ha-deploy:/apps/harbor# docker login harbor.nbrhce.com
Username:
password:

root@k8s-ha-deploy:/apps/harbor# docker pull apline
root@k8s-ha-deploy:/apps/harbor# docker tag alpine:latest harbor.nbrhce.com/baseimages/alpine:latest
root@k8s-ha-deploy:/apps/harbor# docker push harbor.nbrhce.com/baseimages/alpine:latest
  • 有这个小锁头就代表配置成功了

  • 测试push镜像到harbor 提前创建好项目

4. ansible部署

由于在导入Ansible做自动化管理时,服务器版本太老,导致Python也相应比较旧,新版本的Ansible无法调用相关模块来管控

  • 基础环境准备
root@k8s-deploy:~# apt install ansible sshpass -y
root@k8s-deploy:~# ssh-keygen 
#一键分发秘钥
root@k8s-deploy:~# vim key.sh

#!/bin/bash

IP="
192.168.1.70
192.168.1.71
192.168.1.73
192.168.1.75
192.168.1.76
192.168.1.77
192.168.1.78
192.168.1.79
192.168.1.80
"
for node in ${IP};do
 sshpass -p 123456 ssh-copy-id ${node} -o StrictHostKeyChecking=no
 echo "${node} 密钥copy完成"
 ssh ${node} ln -sv /usr/bin/python3 /usr/bin/python
 echo "${node} /usr/bin/python3 软连接创建完成"
 done
 
#秘钥分发完要测试一下python的软链接是否生成 

5. 下载kubeasz开源项目及组件

  • 会有docker环境 是因为先用docker拉去镜像拷贝到容器里面在删除下载的镜像
  • hub.docker.io kubeasz-k8s-bin

5.1 下载开源项目

root@k8s-deploy:~# apt install git -y
root@k8s-deploy:~# export release=3.3.1
root@k8s-deploy:~# wget https://github.com/easzlab/kubeasz/releases/download/${release}/ezdown
root@k8s-deploy:~# vim ezdown #自定义下载组件版本-这次不用改
root@k8s-deploy:~# chmod a+x ./ezdown
root@k8s-deploy:~# ./ezdown -D
root@k8s-deploy:~# ll /etc/kubeasz/
total 136
drwxrwxr-x 12 root root  4096 Nov 17 13:22 ./
drwxr-xr-x 95 root root  4096 Nov 17 13:22 ../
-rw-rw-r--  1 root root 20304 Jul  3 20:37 ansible.cfg
drwxr-xr-x  3 root root  4096 Nov 17 13:22 bin/
drwxrwxr-x  8 root root  4096 Jul  3 20:51 docs/
drwxr-xr-x  2 root root  4096 Nov 17 13:30 down/
drwxrwxr-x  2 root root  4096 Jul  3 20:51 example/
-rwxrwxr-x  1 root root 25012 Jul  3 20:37 ezctl*
-rwxrwxr-x  1 root root 25266 Jul  3 20:37 ezdown*
drwxrwxr-x  3 root root  4096 Jul  3 20:51 .github/
-rw-rw-r--  1 root root   301 Jul  3 20:37 .gitignore
drwxrwxr-x 10 root root  4096 Jul  3 20:51 manifests/
drwxrwxr-x  2 root root  4096 Jul  3 20:51 pics/
drwxrwxr-x  2 root root  4096 Jul  3 20:51 playbooks/
-rw-rw-r--  1 root root  5058 Jul  3 20:37 README.md
drwxrwxr-x 22 root root  4096 Jul  3 20:51 roles/
drwxrwxr-x  2 root root  4096 Jul  3 20:51 tools/

5.2 创建集群

#创建一个k8s集群 这个过程就是创建一个目录 目录下有hosts config
root@k8s-deploy:/etc/kubeasz# ./ezctl new k8s-cluster1
2022-11-17 13:38:04 DEBUG generate custom cluster files in /etc/kubeasz/clusters/k8s-cluster1
2022-11-17 13:38:04 DEBUG set versions
2022-11-17 13:38:04 DEBUG cluster k8s-cluster1: files successfully created.
2022-11-17 13:38:04 INFO next steps 1: to config '/etc/kubeasz/clusters/k8s-cluster1/hosts'
2022-11-17 13:38:04 INFO next steps 2: to config '/etc/kubeasz/clusters/k8s-cluster1/config.yml'

5.3 更改hosts文件

#这个文件就是加上你的机器节点IP地址

root@k8s-ha1:/etc/kubeasz/clusters/k8s-cluster1# cat hosts
# 'etcd' cluster should have odd member(s) (1,3,5,...)
[etcd]
192.168.1.76
192.168.1.77
192.168.1.78

# master node(s)
[kube_master]
192.168.1.70
192.168.1.71

# work node(s)
[kube_node]
192.168.1.79

# [optional] harbor server, a private docker registry
# 'NEW_INSTALL': 'true' to install a harbor server; 'false' to integrate with existed one
[harbor]
#192.168.1.8 NEW_INSTALL=false

# [optional] loadbalance for accessing k8s from outside
[ex_lb]
#192.168.1.6 LB_ROLE=backup EX_APISERVER_VIP=192.168.1.250 EX_APISERVER_PORT=8443
#192.168.1.7 LB_ROLE=master EX_APISERVER_VIP=192.168.1.250 EX_APISERVER_PORT=8443

# [optional] ntp server for the cluster
[chrony]
#192.168.1.1

[all:vars]
# --------- Main Variables ---------------
# Secure port for apiservers
SECURE_PORT="6443"

# Cluster container-runtime supported: docker, containerd
# if k8s version >= 1.24, docker is not supported
CONTAINER_RUNTIME="containerd"

# Network plugins supported: calico, flannel, kube-router, cilium, kube-ovn
CLUSTER_NETWORK="calico"

# Service proxy mode of kube-proxy: 'iptables' or 'ipvs'
PROXY_MODE="ipvs"

#=================================这里更改了子网IP变大一点=============================#

# K8S Service CIDR, not overlap with node(host) networking
SERVICE_CIDR="10.100.0.0/16"
#=================================这里更改了子网IP变大一点=============================#
# Cluster CIDR (Pod CIDR), not overlap with node(host) networking
CLUSTER_CIDR="10.200.0.0/16"

# NodePort Range
NODE_PORT_RANGE="30000-32767"

# Cluster DNS Domain
CLUSTER_DNS_DOMAIN="cluster.local"

# -------- Additional Variables (don't change the default value right now) ---
# Binaries Directory
#=================================这里更改命令的存放位置===============================#
bin_dir="/usr/local/bin"

# Deploy Directory (kubeasz workspace)
base_dir="/etc/kubeasz"

# Directory for a specific cluster
cluster_dir="{{ base_dir }}/clusters/k8s-cluster1"

# CA and other components cert/key Directory
ca_dir="/etc/kubernetes/ssl"
root@k8s-ha1:/etc/kubeasz/clusters/k8s-cluster1# 

5.4 config.yaml

#更改初始化容器镜像
#原本是这个为了更快打个标签上传到私有仓库 记得把文件内容更改一下 这个是初始化容器的镜像这个拉取不下来就不可能初始化成功
easzlab.io.local:5000/easzlab/pause:3.7
harbor.nbrhce.com/baseimages/pause:3.7

root@k8s-ha1:/etc/kubeasz/clusters/k8s-cluster1# cat config.yml
############################
# prepare
############################
# 可选离线安装系统软件包 (offline|online)
INSTALL_SOURCE: "online"

# 可选进行系统安全加固 github.com/dev-sec/ansible-collection-hardening
OS_HARDEN: false


############################
# role:deploy
############################
# default: ca will expire in 100 years
# default: certs issued by the ca will expire in 50 years
CA_EXPIRY: "876000h"
CERT_EXPIRY: "438000h"

# kubeconfig 配置参数
CLUSTER_NAME: "cluster1"
CONTEXT_NAME: "context-{{ CLUSTER_NAME }}"

# k8s version
K8S_VER: "1.24.2"

############################
# role:etcd
############################
# 设置不同的wal目录,可以避免磁盘io竞争,提高性能
ETCD_DATA_DIR: "/var/lib/etcd"
ETCD_WAL_DIR: ""


############################
# role:runtime [containerd,docker]
############################
# ------------------------------------------- containerd
# [.]启用容器仓库镜像
ENABLE_MIRROR_REGISTRY: false

# [containerd]基础容器镜像
#=================================这里更改了镜像======================================#
SANDBOX_IMAGE: "harbor.nbrhce.com/baseimages/pause:3.7"

# [containerd]容器持久化存储目录
CONTAINERD_STORAGE_DIR: "/var/lib/containerd"

# ------------------------------------------- docker
# [docker]容器存储目录
DOCKER_STORAGE_DIR: "/var/lib/docker"

# [docker]开启Restful API
ENABLE_REMOTE_API: false

# [docker]信任的HTTP仓库
#==============================这里添加了自己harbor仓库地址============================#
INSECURE_REG: '["http://easzlab.io.local:5000","harbor.nbrhce.com"]'


############################
# role:kube-master
############################
# k8s 集群 master 节点证书配置,可以添加多个ip和域名(比如增加公网ip和域名)
#=================================这里更改IP为VIP地址==域名为随便起一个=================#
MASTER_CERT_HOSTS:
  - "192.168.1.72"
  - "api.myserver.com"
  #- "www.test.com"

# node 节点上 pod 网段掩码长度(决定每个节点最多能分配的pod ip地址)
# 如果flannel 使用 --kube-subnet-mgr 参数,那么它将读取该设置为每个节点分配pod网段
# https://github.com/coreos/flannel/issues/847
NODE_CIDR_LEN: 23


############################
# role:kube-node
############################
# Kubelet 根目录
KUBELET_ROOT_DIR: "/var/lib/kubelet"

# node节点最大pod 数
#=================================这里更改为500======================================#
MAX_PODS: 500

# 配置为kube组件(kubelet,kube-proxy,dockerd等)预留的资源量
# 数值设置详见templates/kubelet-config.yaml.j2
KUBE_RESERVED_ENABLED: "no"

# k8s 官方不建议草率开启 system-reserved, 除非你基于长期监控,了解系统的资源占用状况;
# 并且随着系统运行时间,需要适当增加资源预留,数值设置详见templates/kubelet-config.yaml.j2
# 系统预留设置基于 4c/8g 虚机,最小化安装系统服务,如果使用高性能物理机可以适当增加预留
# 另外,集群安装时候apiserver等资源占用会短时较大,建议至少预留1g内存
SYS_RESERVED_ENABLED: "no"


############################
# role:network [flannel,calico,cilium,kube-ovn,kube-router]
############################
# ------------------------------------------- flannel
# [flannel]设置flannel 后端"host-gw","vxlan"等
FLANNEL_BACKEND: "vxlan"
DIRECT_ROUTING: false

# [flannel] flanneld_image: "quay.io/coreos/flannel:v0.10.0-amd64"
flannelVer: "v0.15.1"
flanneld_image: "easzlab.io.local:5000/easzlab/flannel:{{ flannelVer }}"

# ------------------------------------------- calico
# [calico]设置 CALICO_IPV4POOL_IPIP=“off”,可以提高网络性能,条件限制详见 docs/setup/calico.md
CALICO_IPV4POOL_IPIP: "Always"

# [calico]设置 calico-node使用的host IP,bgp邻居通过该地址建立,可手工指定也可以自动发现
IP_AUTODETECTION_METHOD: "can-reach={{ groups['kube_master'][0] }}"

# [calico]设置calico 网络 backend: brid, vxlan, none
CALICO_NETWORKING_BACKEND: "brid"

# [calico]设置calico 是否使用route reflectors
# 如果集群规模超过50个节点,建议启用该特性
CALICO_RR_ENABLED: false

# CALICO_RR_NODES 配置route reflectors的节点,如果未设置默认使用集群master节点 
# CALICO_RR_NODES: ["192.168.1.1", "192.168.1.2"]
CALICO_RR_NODES: []

# [calico]更新支持calico 版本: [v3.3.x] [v3.4.x] [v3.8.x] [v3.15.x]
calico_ver: "v3.19.4"

# [calico]calico 主版本
calico_ver_main: "{{ calico_ver.split('.')[0] }}.{{ calico_ver.split('.')[1] }}"

# ------------------------------------------- cilium
# [cilium]镜像版本
cilium_ver: "1.11.6"
cilium_connectivity_check: true
cilium_hubble_enabled: false
cilium_hubble_ui_enabled: false

# ------------------------------------------- kube-ovn
# [kube-ovn]选择 OVN DB and OVN Control Plane 节点,默认为第一个master节点
OVN_DB_NODE: "{{ groups['kube_master'][0] }}"

# [kube-ovn]离线镜像tar包
kube_ovn_ver: "v1.5.3"

# ------------------------------------------- kube-router
# [kube-router]公有云上存在限制,一般需要始终开启 ipinip;自有环境可以设置为 "subnet"
OVERLAY_TYPE: "full"

# [kube-router]NetworkPolicy 支持开关
FIREWALL_ENABLE: true

# [kube-router]kube-router 镜像版本
kube_router_ver: "v0.3.1"
busybox_ver: "1.28.4"


############################
# role:cluster-addon
############################
# coredns 自动安装
#=================================这里更改为no=======================================#
dns_install: "no"
corednsVer: "1.9.3"
ENABLE_LOCAL_DNS_CACHE: false
dnsNodeCacheVer: "1.21.1"
# 设置 local dns cache 地址
LOCAL_DNS_CACHE: "169.254.20.10"

# metric server 自动安装
#=================================这里更改为no=======================================#
metricsserver_install: "no"
metricsVer: "v0.5.2"

# dashboard 自动安装
#=================================这里更改为no=======================================#
dashboard_install: "no"
dashboardVer: "v2.5.1"
dashboardMetricsScraperVer: "v1.0.8"

# prometheus 自动安装
#=================================这里更改为no=======================================#
prom_install: "no"
prom_namespace: "monitor"
prom_chart_ver: "35.5.1"

# nfs-provisioner 自动安装
#=================================这里更改为no=======================================#
nfs_provisioner_install: "no"
nfs_provisioner_namespace: "kube-system"
nfs_provisioner_ver: "v4.0.2"
nfs_storage_class: "managed-nfs-storage"
nfs_server: "192.168.1.10"
nfs_path: "/data/nfs"

# network-check 自动安装
network_check_enabled: false 
network_check_schedule: "*/5 * * * *"

############################
# role:harbor
############################
# harbor version,完整版本号
HARBOR_VER: "v2.1.3"
HARBOR_DOMAIN: "harbor.easzlab.io.local"
HARBOR_TLS_PORT: 8443

# if set 'false', you need to put certs named harbor.pem and harbor-key.pem in directory 'down'
HARBOR_SELF_SIGNED_CERT: true

# install extra component
HARBOR_WITH_NOTARY: false
HARBOR_WITH_TRIVY: false
HARBOR_WITH_CLAIR: false
HARBOR_WITH_CHARTMUSEUM: true
root@k8s-ha1:/etc/kubeasz/clusters/k8s-cluster1# 

5.5更改ansible 01.prepare.yml文件

root@deploy-harbor:/etc/kubeasz# vim playbooks/01.prepare.yml
#hosts 只留下这三个就可以
# [optional] to synchronize system time of nodes with 'chrony' 
- hosts:
  - kube_master
  - kube_node
  - etcd
  roles:
  - { role: os-harden, when: "OS_HARDEN|bool" }
  - { role: chrony, when: "groups['chrony']|length > 0" }

# to create CA, kubeconfig, kube-proxy.kubeconfig etc.
- hosts: localhost
  roles:
  - deploy

# prepare tasks for all nodes
- hosts:
  - kube_master
  - kube_node
  - etcd
  roles:
  - prepare

6. 部署K8S群

通过ansible脚本初始化环境及部署ks高可用集群

  • 环境初始化
#帮助
root@k8s-ha1:/etc/kubeasz# ./ezctl --help
Usage: ezctl COMMAND [args]
-------------------------------------------------------------------------------------
Cluster setups:
    list		             to list all of the managed clusters
    checkout    <cluster>            to switch default kubeconfig of the cluster
    new         <cluster>            to start a new k8s deploy with name 'cluster'
    setup       <cluster>  <step>    to setup a cluster, also supporting a step-by-step way
    start       <cluster>            to start all of the k8s services stopped by 'ezctl stop'
    stop        <cluster>            to stop all of the k8s services temporarily
    upgrade     <cluster>            to upgrade the k8s cluster
    destroy     <cluster>            to destroy the k8s cluster
    backup      <cluster>            to backup the cluster state (etcd snapshot)
    restore     <cluster>            to restore the cluster state from backups
    start-aio		             to quickly setup an all-in-one cluster with 'default' settings

Cluster ops:
    add-etcd    <cluster>  <ip>      to add a etcd-node to the etcd cluster
    add-master  <cluster>  <ip>      to add a master node to the k8s cluster
    add-node    <cluster>  <ip>      to add a work node to the k8s cluster
    del-etcd    <cluster>  <ip>      to delete a etcd-node from the etcd cluster
    del-master  <cluster>  <ip>      to delete a master node from the k8s cluster
    del-node    <cluster>  <ip>      to delete a work node from the k8s cluster

Extra operation:
    kcfg-adm    <cluster>  <args>    to manage client kubeconfig of the k8s cluster

Use "ezctl help <command>" for more information about a given command.

#查看脚本命令内容即可知道第几部都是在干什么
root@deploy-harbor:/etc/kubeasz# vim ezctl
 cat <<EOF
available steps:
    01  prepare            to prepare CA/certs & kubeconfig & other system settings 
    02  etcd               to setup the etcd cluster
    03  container-runtime  to setup the container runtime(docker or containerd)
    04  kube-master        to setup the master nodes
    05  kube-node          to setup the worker nodes
    06  network            to setup the network plugin
    07  cluster-addon      to setup other useful plugins
    90  all                to run 01~07 all at once
    10  ex-lb              to install external loadbalance for accessing k8s from outside
    11  harbor             to install a new harbor server or to integrate with an existed one

6.1 创建集群基础设置

root@k8s-ha1:/etc/kubeasz# ./ezctl setup k8s-cluster1 01

  • 遇到的问题
如果遇到卡导apt 进程了执行如下

sudo rm /var/lib/apt/lists/lock
sudo rm /var/cache/apt/archives/lock 
sudo rm /var/lib/dpkg/lock* 
sudo dpkg --configure -a sudo apt update

6.2 部署etcd集群

#所有需要拉取节点的需要配置这个  让containerd可以拉去harbor镜像
#这个是加到了ansible里面了  可以先加到剧本当中 在运行如果没有添加上 自己手动添加

root@k8s-ha1:/etc/kubeasz# vim roles/containerd/templates/config.toml.j2 

#这是在配置自己harbor仓库能让其它容器能拉取下来
#大概在147行
[plugins."io.containerd.grpc.v1.cri".registry.mirrors."harbor.nbrhce.com"]
      endpoint = ["https://harbor.nbrhce.com"]
        [plugins."io.containerd.grpc.v1.cri".registry.configs."harbor.nbrhce.com".tls]
          insecure_skip_verify = true
 #这个用户是harbor内部用户
 [plugins."io.containerd.grpc.v1.cri".registry.configs."harbor.nbrhce.com".auth]
          username = "quyi"
          password = "Harbor12345"

#如果没有生成就自己手动去加
root@k8s-node1:~# vim /etc/containerd/config.toml

#重启
systemctl daemon-reload 
systemctl restart containerd

#测试是否能拉去镜像 这是之前harbor存放的镜像 如果你没有自己上传一个
ln -s /opt/kube/bin/crictl /usr/bin
crictl  pull harbor.nbrhce.com/demo/alpine:v1


root@k8s-ha1:/etc/kubeasz# ./ezctl setup k8s-cluster1 02

#执行完测试
#etcd检查
export NODE_IPS="192.168.1.73 192.168.1.74 192.168.1.75"
for ip in ${NODE_IPS}; do ETCDCTL_API=3 /opt/kube/bin/etcdctl --endpoints=https://${ip}:2379 --cacert=/etc/kubernetes/ssl/ca.pem --cert=/etc/kubernetes/ssl/etcd.pem --key=/etc/kubernetes/ssl/etcd-key.pem endpoint health;
done

#执行结果
https://192.168.1.76:2379 is healthy: successfully committed proposal: took = 8.57508ms
https://192.168.1.77:2379 is healthy: successfully committed proposal: took = 10.019689ms
https://192.168.1.78:2379 is healthy: successfully committed proposal: took = 8.723699ms

root@k8s-master1:~# crictl pull harbor.nbrhce.com/baseimages/pause:3.7 Image is up to date for sha256:221177c6082a88ea4f6240ab2450d540955ac6f4d5454f0e15751b653ebda165

6.3 部署运行时

#这个拉取不下来就初始化不会成功easzlab.io.local:5000/easzlab/pause:3.7
root@k8s-ha1:/etc/kubeasz# ./ezctl setup k8s-cluster1 03

6.4 部署master集群

root@k8s-ha1:/etc/kubeasz# ./ezctl setup k8s-cluster1 04

上面都部署成功了之后 在部署机器上查看以下节点

root@k8s-ha1:/etc/kubeasz# kubectl get nodes
NAME           STATUS                     ROLES    AGE    VERSION
192.168.1.70   Ready,SchedulingDisabled   master   102s   v1.24.2
192.168.1.71   Ready,SchedulingDisabled   master   102s   v1.24.2

6.5 部署node节点

root@k8s-ha1:/etc/kubeasz# ./ezctl setup k8s-cluster1 05
root@deploy-harbor:/etc/kubeasz# kubectl get nodes
NAME           STATUS                     ROLES    AGE     VERSION
192.168.1.70   Ready,SchedulingDisabled   master   8m55s   v1.24.2
192.168.1.74   Ready                      node     50s     v1.24.2

6.6 部署calico

#需要更改calico镜像
#文件位置
root@k8s-ha1:/etc/kubeasz# vim roles/calico/templates/calico-v3.19.yaml.j2
docker tag easzlab.io.local:5000/calico/node:v3.19.4 harbor.nbrhce.com/baseimages/calico/node:v3.19.4
docker push harbor.nbrhce.com/baseimages/calico/node:v3.19.4

docker tag calico/pod2daemon-flexvol:v3.19.4 harbor.nbrhce.com/baseimages/calico-pod2daemon-flexvol:v3.19.4 
docker push harbor.nbrhce.com/baseimages/calico-pod2daemon-flexvol:v3.19.4

docker tag calico/cni:v3.19.4 harbor.nbrhce.com/baseimages/calico/cni:v3.19.4
docker push harbor.nbrhce.com/baseimages/calico/cni:v3.19.4

docker tag calico/kube-controllers:v3.19.4 harbor.nbrhce.com/baseimages/calico/kube-controllers:v3.19.4
docker push harbor.nbrhce.com/baseimages/calico/kube-controllers:v3.19.4
  • 替换里面的image
#这个配置文件还可以更改地址段等等 以后用
root@k8s-ha1:/etc/kubeasz# vim roles/calico/templates/calico-v3.19.yaml.j2 
  • 上面的镜像替换完毕之后在执行06
root@k8s-ha1:/etc/kubeasz# ./ezctl setup k8s-cluster1 06

#成功了
root@deploy-harbor:/etc/kubeasz# kubectl get nodes
NAME           STATUS                     ROLES    AGE   VERSION
192.168.1.70   Ready,SchedulingDisabled   master   21h   v1.24.2
192.168.1.74   Ready                      node     21h   v1.24.2
root@deploy-harbor:/etc/kubeasz# kubectl get pods -A
NAMESPACE     NAME                                       READY   STATUS    RESTARTS        AGE
kube-system   calico-kube-controllers-68555f5f97-fks8p   1/1     Running   1 (99m ago)     21h
kube-system   calico-node-gdc8m                          1/1     Running   40 (9m8s ago)   21h
kube-system   calico-node-h5drr                          1/1     Running   36 (78s ago)    21h

7. 运行容器测试

root@deploy-harbor:/etc/kubeasz# kubectl run net-test1 --image=centos:7.9.2009 sleep 10000000 -n myserver

root@deploy-harbor:/etc/kubeasz# kubectl get pods -n myserver -o wide
NAME        READY   STATUS    RESTARTS   AGE   IP             NODE           NOMINATED NODE   READINESS GATES
net-test1   1/1     Running   0          66s   10.200.36.65   192.168.1.74   <none>           <none>

root@deploy-harbor:/etc/kubeasz# kubectl exec -it net-test1 bash -n myserver
kubectl exec [POD] [COMMAND] is DEPRECATED and will be removed in a future version. Use kubectl exec [POD] -- [COMMAND] instead.

#因为没有coredns 
[root@net-test1 /]# ping baidu.com
ping: baidu.com: Name or service not known

8. 添加master节点(扩容/缩容)

#先查看一下node节点所生成的配置文件 这个配置文件也是动态生成的

root@k8s-node1:~# vim /etc/kube-lb/conf/kube-lb.conf 
user root;
worker_processes 1;

error_log  /etc/kube-lb/logs/error.log warn;

events {
    worker_connections  3000;
}

stream {
    upstream backend {
#api-server地址
        server 192.168.1.70:6443    max_fails=2 fail_timeout=3s;
    }

    server {
        listen 127.0.0.1:6443;
        proxy_connect_timeout 1s;
        proxy_pass backend;
    }
}

#添加master节点
#在添加这个master节点的时候在模板文件中要配置harbor认证信息 否则会有报错拉取不下来镜像
root@deploy-harbor:/etc/kubeasz# ./ezctl add-master k8s-cluster1 192.168.1.77

#执行这个添加的操作他会把这个IP地址写在ansible的hosts文件里面

#删除就是del
root@deploy-harbor:/etc/kubeasz# ./ezctl del-master k8s-cluster1 192.168.1.77

9. 添加node节点(扩容/缩容)

root@deploy-harbor:/etc/kubeasz# ./ezctl add-node k8s-cluster1 192.168.1.77
#删除就是del
root@deploy-harbor:/etc/kubeasz# ./ezctl del-node k8s-cluster1 192.168.1.77

10. K8S升级(小版本)

10.1 官方位置


10.2 下载二进制安装包

#deploy服务器下载安装包  目前安装包没有找到

10.3 master升级

#在node节点上注释掉api-server地址 这样不会影响业务 如果有三个api-server注释掉其中一个就可以 每个节点都需要做
root@k8s-node1:~# vim /etc/kube-lb/conf/kube-lb.conf 
user root;
worker_processes 1;

error_log  /etc/kube-lb/logs/error.log warn;

events {
    worker_connections  3000;
}

stream {
    upstream backend {
#api-server地址
#        server 192.168.1.70:6443    max_fails=2 fail_timeout=3s;
    }

    server {
        listen 127.0.0.1:6443;
        proxy_connect_timeout 1s;
        proxy_pass backend;
    }
}

#然后需要停掉服务
systemctl stop kube-apiserver kube-sontroller-manager kube-scheduler kube-proxy kubelet

#拷贝二进制命令 拷贝到对应的节点上 master 复制到对应/usr/local/bin
kube-apiserver kube-sontroller-manager kube-scheduler kube-proxy kubelet
/usr/local/bin
#验证命令
/usr/local/bin/kube-apiserver --version

#查看是否修改相关的文件比如kube-proxy 
#还需要小心升级时候对应的api版本 如果版本不同的话 在重新搭建一套升级试试
vim /var/lib/kube-proxy/kube-proxy-config.yaml
api版本是指
apiVersion: kubeproxy.config.k8s.io/v1alpha1

#启动 master
systemctl start kube-apiserver kube-sontroller-manager kube-scheduler kube-proxy kubelet

#检查
kubectl get nodes 

#加载一下 node节点
systemctl reload kube-lb.service

#然后在把另外的两个注释掉 把新升级的开开
以上都是升级master

10.4 node升级

一定要驱除pod然后将服务停止然后替换二进制

#需要先把node上面的业务容器都要驱除走

kubectl drain 192.168.x.x --ignore-daemonsets --force

#检查是否驱除完毕

kubectl get pods -A -o wide

#停掉node服务
systemctl stop kubelet kube-proxy

#在部署节点上把kubelet kube-proxy 拷贝到node节点上
kubelet kube-proxy  /usr/local/bin

systemctl reload kube-lb.service

#在把api-server放开
vim /etc/kube-lb/conf/kube-lb.conf 

#再把所有二进制命令拷贝到 这样你部署的时候都是1.24.3的命令了
cp ./* /etc/kubeasz/bin/containerd-bin/

10.5 containerd升级runc升级客户端命令升级

#下载地址containerd
wget https://github.com/containerd/containerd/releases/download/v1.6.10/containerd-1.6.10-linux-amd64.tar.gz

#把containerd的新的命令拷贝过去
cp ./* /etc/kubeasz/bin/containerd-bin/
#下载地址 runc
wget https://github.com/opencontainers/runc/releases/download/v1.1.4/runc.amd64

#下载客户端crictl ctr

#拷贝你所有准备的命令 拷贝的命令路径都是你containerd.service 里面写的路径
scp ./* 192.168.*:/usr/local/bin/
拷贝的时候会发现runc是拷贝不了的 因为node有占用
systemctl disable kubelet kube-proxy
systemctl disable containerd.setvice
reboot
然后在拷贝二进制命令
scp ./* 192.168.*:/usr/local/bin/
systemctl enable --now kubelet kube-proxy containerd.setvice
#重启一下node的kubelet
systemctl restart kubelet

#在把master节点上的containerd在升级一下
scp ./* 192.168.*:/usr/local/bin/
systemctl disable kubelet kube-proxy
systemctl disable containerd.setvice
reboot
systemctl enable --now kubelet kube-proxy containerd.setvice
posted @ 2022-11-20 19:04  YIDADA-SRE  阅读(438)  评论(0编辑  收藏  举报