Ansible-K8S 二进制安装
Ansible-K8S 二进制安装
1. k8s集群环境搭建规划信息
1.1 单master环境
1.2 多master环境
1.3 服务器统计
类型 | 服务器IP地址 | 备注 |
---|---|---|
Ansible(2台) | K8s集群部署服务器,可以和其他服务器混用 | |
K8S Master(3台) | K8s控制端,通过一个VIP做主备高可用 | |
Harbor(2台) | 高可用镜像服务器 | |
Etcd(3台) | 保存K8s集群数据的服务器 | |
Haproxy(2台) | 高可用etcd代理服务器 | |
Node节点(2-N台) | 真正运行容器的服务器,高可用环境至少两台 |
1.4 服务器准备
服务器可以是私有云的虚拟机或物理机,也可以是公有云环境的虚拟机环境,如果是公司托管IDC环境,可以直接将harbor和node节点部署在物理机环境,master节点、etcd节点、负载均衡等可以是虚拟机
主机名 | IP地址 | 备注 |
---|---|---|
k8s-master1 | 192.168.1.70 | 高可用master节点 |
k8s-master2 | 192.168.1.71 | |
k8s-ha1(deploy-harbor) | 192.168.1.72 | 负载均衡高可用 VIP 192.168.1.100 |
k8s-etcd1 | 192.168.1.73 | 注册中心高可用 |
k8s-etcd2 | 192.168.1.74 | |
k8s-etcd3 | 192.168.1.75 | |
k8s-node1 | 192.168.1.76 | k8s节点 |
1.5 系统配置
VIP:192.168.1.69:6443 #在负载均衡上配置
操作系统:ubuntu 20.04
k8s版本: 1.24.x
calico: 3.4.4
1.6 基础环境准备 ubuntu
- 配置源
cat > /etc/apt/sources.list <<'EOF'
deb http://mirrors.aliyun.com/ubuntu/ focal main restricted #优先检索focal的main和restricted组件。
deb http://mirrors.aliyun.com/ubuntu/ focal-updates main restricted
deb http://mirrors.aliyun.com/ubuntu/ focal universe
deb http://mirrors.aliyun.com/ubuntu/ focal-updates universe
deb http://mirrors.aliyun.com/ubuntu/ focal multiverse
deb http://mirrors.aliyun.com/ubuntu/ focal-updates multiverse
deb http://mirrors.aliyun.com/ubuntu/ focal-backports main restricted universe multiverse
deb http://mirrors.aliyun.com/ubuntu focal-security main restricted
deb http://mirrors.aliyun.com/ubuntu focal-security universe
deb http://mirrors.aliyun.com/ubuntu focal-security multiverse
EOF
apt-get update
apt-get update
- 系统主机名
hostnamectl set-hostname xxxx
- IP地址配置
vim /etc/netplan/00-installer-config.yaml
- 系统参数优化
cat <<EOF | sudo tee /etc/modules-load.d/k8s.conf
overlay
br_netfilter
EOF
sudo modprobe overlay
sudo modprobe br_netfilter
# 设置所需的 sysctl 参数,参数在重新启动后保持不变
cat <<EOF | sudo tee /etc/sysctl.d/k8s.conf
net.bridge.bridge-nf-call-iptables = 1
net.bridge.bridge-nf-call-ip6tables = 1
net.ipv4.ip_forward = 1
EOF
# 应用 sysctl 参数而不重新启动
sudo sysctl --system
- 关闭交换分区
swapoff -a
sed -ri 's/.*swap.*/#&/' /etc/fstab
- 关闭selinux
setenforce 0
ufw disable
- 时间同步
cp /usr/share/zoneinfo/Asia/Shanghai /etc/localtime
date -R
- 主机映射
cat > /etc/hosts <<'EOF'
192.168.1.70 k8s-master1
192.168.1.71 k8s-master2
192.168.1.72 k8s-ha1 harbor.nbrhce.com
192.168.1.73 k8s-etcd1
192.168.1.74 k8s-etcd2
192.168.1.75 k8s-etcd3
192.168.1.76 k8s-node1
EOF
2. 负载均衡部署
2.1 keepalived部署
root@k8s-ha1:~# apt install keepalived -y
#找方法
root@k8s-ha1:~# find / -name keepalived*
#找到这个文件更改配置即可
/usr/share/doc/keepalived/samples/keepalived.conf.vrrp
#参考模板
root@k8s-ha1:~# cp /usr/share/doc/keepalived/samples/keepalived.conf.vrrp /etc/keepalived/keepalived.conf
- 配置文件
#主节点
root@k8s-ha1:~# vim /etc/keepalived/keepalived.conf
vrrp_instance VI_1 {
state MASTER
interface eth0
virtual_router_id 51
priority 100
advert_int 3
authentication {
auth_type PASS
auth_pass 123abc
}
virtual_ipaddress {
192.168.1.72 dev eth0 label eth0:1
}
}
#备节点
root@localhost:~# cat /etc/keepalived/keepalived.conf
vrrp_instance VI_1 {
state BACKUP
interface eth0
virtual_router_id 51
priority 50
advert_int 3
authentication {
auth_type PASS
auth_pass 123abc
}
virtual_ipaddress {
192.168.1.73 dev eth0 label eth0:1
}
}
#启动
systemctl start keepalived.service
systemctl enable keepalived.service
#检查是否出现VIP
root@k8s-ha1:~# ifconfig
eth0: flags=4163<UP,BROADCAST,RUNNING,MULTICAST> mtu 1500
inet 192.168.1.73 netmask 255.255.255.0 broadcast 192.168.1.255
inet6 fe80::20c:29ff:feab:bfb7 prefixlen 64 scopeid 0x20<link>
ether 00:0c:29:ab:bf:b7 txqueuelen 1000 (Ethernet)
RX packets 492885 bytes 250991107 (250.9 MB)
RX errors 0 dropped 0 overruns 0 frame 0
TX packets 387918 bytes 23556907 (23.5 MB)
TX errors 0 dropped 0 overruns 0 carrier 0 collisions 0
eth0:1: flags=4163<UP,BROADCAST,RUNNING,MULTICAST> mtu 1500
inet 192.168.1.72 netmask 255.255.255.255 broadcast 0.0.0.0
ether 00:0c:29:ab:bf:b7 txqueuelen 1000 (Ethernet)
lo: flags=73<UP,LOOPBACK,RUNNING> mtu 65536
inet 127.0.0.1 netmask 255.0.0.0
inet6 ::1 prefixlen 128 scopeid 0x10<host>
loop txqueuelen 1000 (Local Loopback)
RX packets 182 bytes 15942 (15.9 KB)
RX errors 0 dropped 0 overruns 0 frame 0
TX packets 182 bytes 15942 (15.9 KB)
TX errors 0 dropped 0 overruns 0 carrier 0 collisions 0
2.2 haproxy部署
root@k8s-ha1:~# apt install -y haproxy
- 配置文件
#主备服务器配置一样,当ha1服务器宕机了 VIP会跑到ha2上面 但是有瑕疵
root@k8s-ha1:~# vim /etc/haproxy/haproxy.cfg
..略
listen k8s_api_nodes_6443
#这里绑定的是VIP地址加端口
bind 192.168.1.100:6443
mode tcp
#地址池
server 192.168.1.70 192.168.1.70:6443 check inter 2000 fall 3 rise 5
server 192.168.1.71 192.168.1.71:6443 check inter 2000 fall 3 rise 5
#启动
systemctl restart haproxy.service
systemctl enable haproxy.service
#检查
已经监听了6443端口
root@k8s-ha1:~# ss -lntup
Netid State Recv-Q Send-Q Local Address:Port Peer Address:Port Process
udp UNCONN 0 0 127.0.0.53%lo:53 0.0.0.0:* users:(("systemd-resolve",pid=790,fd=12))
tcp LISTEN 0 262124 192.168.1.69:6443 0.0.0.0:* users:(("haproxy",pid=6349,fd=7))
tcp LISTEN 0 4096 127.0.0.53%lo:53 0.0.0.0:* users:(("systemd-resolve",pid=790,fd=13))
tcp LISTEN 0 128 0.0.0.0:22 0.0.0.0:* users:(("sshd",pid=854,fd=3))
tcp LISTEN 0 128 127.0.0.1:6010 0.0.0.0:* users:(("sshd",pid=1151,fd=11))
tcp LISTEN 0 128 [::]:22 [::]:* users:(("sshd",pid=854,fd=4))
tcp LISTEN 0 128 [::1]:6010 [::]:* users:(("sshd",pid=1151,fd=10))
#有个缺点这里少一个脚本 来监控haproxy 功能是只要haproxy宕机了就不要让他运行了
3. harbor部署
- 由于我机器资源不够harbor服务与ansible在一台机器上
kubernetes master节点和node节点使用containerd 可以提前安装或使用部署工具安装,在harbor节点安装docker,后续需要部署harbor
3.1 Harbor部署
#我这里直接使用脚本部署了或者自行安装docker大于1.19版本
root@k8s-harbor1:~# tar xf docker-list.tar.gz
root@k8s-harbor1:~# bash docker-install.sh
root@k8s-harbor1:~# docker version
Client: Docker Engine - Community
Version: 19.03.15
API version: 1.40
Go version: go1.13.15
Git commit: 99e3ed8
Built: Sat Jan 30 03:11:43 2021
OS/Arch: linux/amd64
Experimental: false
Server: Docker Engine - Community
Engine:
Version: 19.03.15
API version: 1.40 (minimum version 1.12)
Go version: go1.13.15
Git commit: 99e3ed8
Built: Sat Jan 30 03:18:13 2021
OS/Arch: linux/amd64
Experimental: false
containerd:
Version: v1.3.9
GitCommit: ea765aba0d05254012b0b9e595e995c09186427f
runc:
Version: 1.0.0-rc10
GitCommit: dc9208a3303feef5b3839f4323d9beb36df0a9dd
docker-init:
Version: 0.18.0
GitCommit: fec3683
3.2 Harbor配置https自签证书
业务镜像将统一上传到Harbor的服务器并实现镜像分发,不用通过互联网在线下载公网镜像,提高镜像分发效率及数据安全性
root@k8s-harbor1:~# mkdir /apps/certs -p
#生成CA证书私钥
root@k8s-harbor1:/apps/certs# openssl genrsa -out ca.key 4096
#根据上面生成的CA证书私钥,再来生成CA证书 ca.crt 生成 CA 证书
root@k8s-harbor1:/apps/certs# openssl req -x509 -new -nodes -sha512 -days 3650 \
-subj "/C=CN/ST=Beijing/L=Beijing/O=example/OU=Personal/CN=harbor.nbrhce.com" \
-key ca.key \
-out ca.crt
#转成一个客户端证书
root@k8s-harbor1:/apps/certs# openssl genrsa -out harbor.nbrhce.com.key 4096
#生成证书签名请求(CSR)yourdomain.com.csr。生成证书签名请求 (CSR)
root@k8s-harbor1:/apps/certs# openssl req -sha512 -new \
-subj "/C=CN/ST=Beijing/L=Beijing/O=example/OU=Personal/CN=harbor.nbrhce.com" \
-key harbor.nbrhce.com.key \
-out harbor.nbrhce.com.csr
#生成一个 x509 v3扩展文件。生成 x509 v3 扩展文件
root@k8s-harbor1:/apps/certs# cat > v3.ext << 'EOF'
authorityKeyIdentifier=keyid,issuer
basicConstraints=CA:FALSE
keyUsage = digitalSignature, nonRepudiation, keyEncipherment, dataEncipherment
extendedKeyUsage = serverAuth
subjectAltName = @alt_names
#写一个对应的域名即可 下面两个随便添
[alt_names]
DNS.1=harbor.nbrhce.com
DNS.2=harbor.nbrhce.net
DNS.3=harbor.nbrhce.local
EOF
# 使用该文件为您的Harbor主机生成证书 harbor.nbrhce.com.cert使用该文件为您的 Harbor 主机生成证书 改成对应域名为.cert结尾即可
root@k8s-harbor1:/apps/certs# openssl x509 -req -sha512 -days 3650 \
-extfile v3.ext \
-CA ca.crt -CAkey ca.key -CAcreateserial \
-in harbor.nbrhce.com.csr \
-out harbor.nbrhce.com.crt
#在生成一个客户端证书留着用
root@k8s-harbor1:/apps/certs# openssl x509 -inform PEM -in harbor.nbrhce.com.key -out harbor.nbrhce.com.cert
3.3 配置harbor证书信任
./install.sh --with-trivy --with-chartmuseum
#配置证书信任 必须在这个路径下创建一个目录对应你所使用的域名 否则对应的节点与容器识别不到你的域名
root@k8s-harbor1:/apps/certs# mkdir /etc/docker/certs.d/harbor.nbrhce.com -p
#把生成的证书放在这个路径下 我就全部都拷贝过来了 或者拷贝一个cert也行
root@k8s-ha-deploy:/apps/certs# cp harbor.nbrhce.com.crt /etc/docker/certs.d/harbor.nbrhce.com/
root@k8s-ha-deploy:/apps/certs# cp ca.crt /etc/docker/certs.d/harbor.nbrhce.com/
root@k8s-ha-deploy:/apps/certs# cp ca.key /etc/docker/certs.d/harbor.nbrhce.com/
#在这个文件中启动服务这块 加上对应访问的域名 与加上harbor主机IP地址与5000端口 这个端口是容器内部代理,你是在宿主机上是看不到的
root@k8s-ha-deploy:/apps/certs# vim /lib/systemd/system/docker.service
ExecStart=/usr/bin/dockerd -H fd:// --insecure-registry=192.168.1.72:5000 --insecure-registry=harbor.nbrhce.com --containerd=/run/containerd/containerd.sock
更改完必要忘记重启&&重启完docker你会发现harbor也会宕机 你也需要重新启动一下
systemctl daemon-reload
systemctl restart docker
docker-compose restart 或者 docker-compsoe up -d
#测试 拉取镜像并上传到私有库 先登录 这个用户是harbor内部用户 你先需要去harbor里创建一个用户并给他授权成管理员 这样这个用户就有权限上传镜像了
root@k8s-ha-deploy:/apps/harbor# docker login harbor.nbrhce.com
Username:
password:
root@k8s-ha-deploy:/apps/harbor# docker pull apline
root@k8s-ha-deploy:/apps/harbor# docker tag alpine:latest harbor.nbrhce.com/baseimages/alpine:latest
root@k8s-ha-deploy:/apps/harbor# docker push harbor.nbrhce.com/baseimages/alpine:latest
-
有这个小锁头就代表配置成功了
-
测试push镜像到harbor 提前创建好项目
4. ansible部署
由于在导入Ansible做自动化管理时,服务器版本太老,导致Python也相应比较旧,新版本的Ansible无法调用相关模块来管控
- 基础环境准备
root@k8s-deploy:~# apt install ansible sshpass -y
root@k8s-deploy:~# ssh-keygen
#一键分发秘钥
root@k8s-deploy:~# vim key.sh
#!/bin/bash
IP="
192.168.1.70
192.168.1.71
192.168.1.73
192.168.1.75
192.168.1.76
192.168.1.77
192.168.1.78
192.168.1.79
192.168.1.80
"
for node in ${IP};do
sshpass -p 123456 ssh-copy-id ${node} -o StrictHostKeyChecking=no
echo "${node} 密钥copy完成"
ssh ${node} ln -sv /usr/bin/python3 /usr/bin/python
echo "${node} /usr/bin/python3 软连接创建完成"
done
#秘钥分发完要测试一下python的软链接是否生成
5. 下载kubeasz开源项目及组件
- 会有docker环境 是因为先用docker拉去镜像拷贝到容器里面在删除下载的镜像
- hub.docker.io kubeasz-k8s-bin
5.1 下载开源项目
root@k8s-deploy:~# apt install git -y
root@k8s-deploy:~# export release=3.3.1
root@k8s-deploy:~# wget https://github.com/easzlab/kubeasz/releases/download/${release}/ezdown
root@k8s-deploy:~# vim ezdown #自定义下载组件版本-这次不用改
root@k8s-deploy:~# chmod a+x ./ezdown
root@k8s-deploy:~# ./ezdown -D
root@k8s-deploy:~# ll /etc/kubeasz/
total 136
drwxrwxr-x 12 root root 4096 Nov 17 13:22 ./
drwxr-xr-x 95 root root 4096 Nov 17 13:22 ../
-rw-rw-r-- 1 root root 20304 Jul 3 20:37 ansible.cfg
drwxr-xr-x 3 root root 4096 Nov 17 13:22 bin/
drwxrwxr-x 8 root root 4096 Jul 3 20:51 docs/
drwxr-xr-x 2 root root 4096 Nov 17 13:30 down/
drwxrwxr-x 2 root root 4096 Jul 3 20:51 example/
-rwxrwxr-x 1 root root 25012 Jul 3 20:37 ezctl*
-rwxrwxr-x 1 root root 25266 Jul 3 20:37 ezdown*
drwxrwxr-x 3 root root 4096 Jul 3 20:51 .github/
-rw-rw-r-- 1 root root 301 Jul 3 20:37 .gitignore
drwxrwxr-x 10 root root 4096 Jul 3 20:51 manifests/
drwxrwxr-x 2 root root 4096 Jul 3 20:51 pics/
drwxrwxr-x 2 root root 4096 Jul 3 20:51 playbooks/
-rw-rw-r-- 1 root root 5058 Jul 3 20:37 README.md
drwxrwxr-x 22 root root 4096 Jul 3 20:51 roles/
drwxrwxr-x 2 root root 4096 Jul 3 20:51 tools/
5.2 创建集群
#创建一个k8s集群 这个过程就是创建一个目录 目录下有hosts config
root@k8s-deploy:/etc/kubeasz# ./ezctl new k8s-cluster1
2022-11-17 13:38:04 DEBUG generate custom cluster files in /etc/kubeasz/clusters/k8s-cluster1
2022-11-17 13:38:04 DEBUG set versions
2022-11-17 13:38:04 DEBUG cluster k8s-cluster1: files successfully created.
2022-11-17 13:38:04 INFO next steps 1: to config '/etc/kubeasz/clusters/k8s-cluster1/hosts'
2022-11-17 13:38:04 INFO next steps 2: to config '/etc/kubeasz/clusters/k8s-cluster1/config.yml'
5.3 更改hosts文件
#这个文件就是加上你的机器节点IP地址
root@k8s-ha1:/etc/kubeasz/clusters/k8s-cluster1# cat hosts
# 'etcd' cluster should have odd member(s) (1,3,5,...)
[etcd]
192.168.1.76
192.168.1.77
192.168.1.78
# master node(s)
[kube_master]
192.168.1.70
192.168.1.71
# work node(s)
[kube_node]
192.168.1.79
# [optional] harbor server, a private docker registry
# 'NEW_INSTALL': 'true' to install a harbor server; 'false' to integrate with existed one
[harbor]
#192.168.1.8 NEW_INSTALL=false
# [optional] loadbalance for accessing k8s from outside
[ex_lb]
#192.168.1.6 LB_ROLE=backup EX_APISERVER_VIP=192.168.1.250 EX_APISERVER_PORT=8443
#192.168.1.7 LB_ROLE=master EX_APISERVER_VIP=192.168.1.250 EX_APISERVER_PORT=8443
# [optional] ntp server for the cluster
[chrony]
#192.168.1.1
[all:vars]
# --------- Main Variables ---------------
# Secure port for apiservers
SECURE_PORT="6443"
# Cluster container-runtime supported: docker, containerd
# if k8s version >= 1.24, docker is not supported
CONTAINER_RUNTIME="containerd"
# Network plugins supported: calico, flannel, kube-router, cilium, kube-ovn
CLUSTER_NETWORK="calico"
# Service proxy mode of kube-proxy: 'iptables' or 'ipvs'
PROXY_MODE="ipvs"
#=================================这里更改了子网IP变大一点=============================#
# K8S Service CIDR, not overlap with node(host) networking
SERVICE_CIDR="10.100.0.0/16"
#=================================这里更改了子网IP变大一点=============================#
# Cluster CIDR (Pod CIDR), not overlap with node(host) networking
CLUSTER_CIDR="10.200.0.0/16"
# NodePort Range
NODE_PORT_RANGE="30000-32767"
# Cluster DNS Domain
CLUSTER_DNS_DOMAIN="cluster.local"
# -------- Additional Variables (don't change the default value right now) ---
# Binaries Directory
#=================================这里更改命令的存放位置===============================#
bin_dir="/usr/local/bin"
# Deploy Directory (kubeasz workspace)
base_dir="/etc/kubeasz"
# Directory for a specific cluster
cluster_dir="{{ base_dir }}/clusters/k8s-cluster1"
# CA and other components cert/key Directory
ca_dir="/etc/kubernetes/ssl"
root@k8s-ha1:/etc/kubeasz/clusters/k8s-cluster1#
5.4 config.yaml
#更改初始化容器镜像
#原本是这个为了更快打个标签上传到私有仓库 记得把文件内容更改一下 这个是初始化容器的镜像这个拉取不下来就不可能初始化成功
easzlab.io.local:5000/easzlab/pause:3.7
harbor.nbrhce.com/baseimages/pause:3.7
root@k8s-ha1:/etc/kubeasz/clusters/k8s-cluster1# cat config.yml
############################
# prepare
############################
# 可选离线安装系统软件包 (offline|online)
INSTALL_SOURCE: "online"
# 可选进行系统安全加固 github.com/dev-sec/ansible-collection-hardening
OS_HARDEN: false
############################
# role:deploy
############################
# default: ca will expire in 100 years
# default: certs issued by the ca will expire in 50 years
CA_EXPIRY: "876000h"
CERT_EXPIRY: "438000h"
# kubeconfig 配置参数
CLUSTER_NAME: "cluster1"
CONTEXT_NAME: "context-{{ CLUSTER_NAME }}"
# k8s version
K8S_VER: "1.24.2"
############################
# role:etcd
############################
# 设置不同的wal目录,可以避免磁盘io竞争,提高性能
ETCD_DATA_DIR: "/var/lib/etcd"
ETCD_WAL_DIR: ""
############################
# role:runtime [containerd,docker]
############################
# ------------------------------------------- containerd
# [.]启用容器仓库镜像
ENABLE_MIRROR_REGISTRY: false
# [containerd]基础容器镜像
#=================================这里更改了镜像======================================#
SANDBOX_IMAGE: "harbor.nbrhce.com/baseimages/pause:3.7"
# [containerd]容器持久化存储目录
CONTAINERD_STORAGE_DIR: "/var/lib/containerd"
# ------------------------------------------- docker
# [docker]容器存储目录
DOCKER_STORAGE_DIR: "/var/lib/docker"
# [docker]开启Restful API
ENABLE_REMOTE_API: false
# [docker]信任的HTTP仓库
#==============================这里添加了自己harbor仓库地址============================#
INSECURE_REG: '["http://easzlab.io.local:5000","harbor.nbrhce.com"]'
############################
# role:kube-master
############################
# k8s 集群 master 节点证书配置,可以添加多个ip和域名(比如增加公网ip和域名)
#=================================这里更改IP为VIP地址==域名为随便起一个=================#
MASTER_CERT_HOSTS:
- "192.168.1.72"
- "api.myserver.com"
#- "www.test.com"
# node 节点上 pod 网段掩码长度(决定每个节点最多能分配的pod ip地址)
# 如果flannel 使用 --kube-subnet-mgr 参数,那么它将读取该设置为每个节点分配pod网段
# https://github.com/coreos/flannel/issues/847
NODE_CIDR_LEN: 23
############################
# role:kube-node
############################
# Kubelet 根目录
KUBELET_ROOT_DIR: "/var/lib/kubelet"
# node节点最大pod 数
#=================================这里更改为500======================================#
MAX_PODS: 500
# 配置为kube组件(kubelet,kube-proxy,dockerd等)预留的资源量
# 数值设置详见templates/kubelet-config.yaml.j2
KUBE_RESERVED_ENABLED: "no"
# k8s 官方不建议草率开启 system-reserved, 除非你基于长期监控,了解系统的资源占用状况;
# 并且随着系统运行时间,需要适当增加资源预留,数值设置详见templates/kubelet-config.yaml.j2
# 系统预留设置基于 4c/8g 虚机,最小化安装系统服务,如果使用高性能物理机可以适当增加预留
# 另外,集群安装时候apiserver等资源占用会短时较大,建议至少预留1g内存
SYS_RESERVED_ENABLED: "no"
############################
# role:network [flannel,calico,cilium,kube-ovn,kube-router]
############################
# ------------------------------------------- flannel
# [flannel]设置flannel 后端"host-gw","vxlan"等
FLANNEL_BACKEND: "vxlan"
DIRECT_ROUTING: false
# [flannel] flanneld_image: "quay.io/coreos/flannel:v0.10.0-amd64"
flannelVer: "v0.15.1"
flanneld_image: "easzlab.io.local:5000/easzlab/flannel:{{ flannelVer }}"
# ------------------------------------------- calico
# [calico]设置 CALICO_IPV4POOL_IPIP=“off”,可以提高网络性能,条件限制详见 docs/setup/calico.md
CALICO_IPV4POOL_IPIP: "Always"
# [calico]设置 calico-node使用的host IP,bgp邻居通过该地址建立,可手工指定也可以自动发现
IP_AUTODETECTION_METHOD: "can-reach={{ groups['kube_master'][0] }}"
# [calico]设置calico 网络 backend: brid, vxlan, none
CALICO_NETWORKING_BACKEND: "brid"
# [calico]设置calico 是否使用route reflectors
# 如果集群规模超过50个节点,建议启用该特性
CALICO_RR_ENABLED: false
# CALICO_RR_NODES 配置route reflectors的节点,如果未设置默认使用集群master节点
# CALICO_RR_NODES: ["192.168.1.1", "192.168.1.2"]
CALICO_RR_NODES: []
# [calico]更新支持calico 版本: [v3.3.x] [v3.4.x] [v3.8.x] [v3.15.x]
calico_ver: "v3.19.4"
# [calico]calico 主版本
calico_ver_main: "{{ calico_ver.split('.')[0] }}.{{ calico_ver.split('.')[1] }}"
# ------------------------------------------- cilium
# [cilium]镜像版本
cilium_ver: "1.11.6"
cilium_connectivity_check: true
cilium_hubble_enabled: false
cilium_hubble_ui_enabled: false
# ------------------------------------------- kube-ovn
# [kube-ovn]选择 OVN DB and OVN Control Plane 节点,默认为第一个master节点
OVN_DB_NODE: "{{ groups['kube_master'][0] }}"
# [kube-ovn]离线镜像tar包
kube_ovn_ver: "v1.5.3"
# ------------------------------------------- kube-router
# [kube-router]公有云上存在限制,一般需要始终开启 ipinip;自有环境可以设置为 "subnet"
OVERLAY_TYPE: "full"
# [kube-router]NetworkPolicy 支持开关
FIREWALL_ENABLE: true
# [kube-router]kube-router 镜像版本
kube_router_ver: "v0.3.1"
busybox_ver: "1.28.4"
############################
# role:cluster-addon
############################
# coredns 自动安装
#=================================这里更改为no=======================================#
dns_install: "no"
corednsVer: "1.9.3"
ENABLE_LOCAL_DNS_CACHE: false
dnsNodeCacheVer: "1.21.1"
# 设置 local dns cache 地址
LOCAL_DNS_CACHE: "169.254.20.10"
# metric server 自动安装
#=================================这里更改为no=======================================#
metricsserver_install: "no"
metricsVer: "v0.5.2"
# dashboard 自动安装
#=================================这里更改为no=======================================#
dashboard_install: "no"
dashboardVer: "v2.5.1"
dashboardMetricsScraperVer: "v1.0.8"
# prometheus 自动安装
#=================================这里更改为no=======================================#
prom_install: "no"
prom_namespace: "monitor"
prom_chart_ver: "35.5.1"
# nfs-provisioner 自动安装
#=================================这里更改为no=======================================#
nfs_provisioner_install: "no"
nfs_provisioner_namespace: "kube-system"
nfs_provisioner_ver: "v4.0.2"
nfs_storage_class: "managed-nfs-storage"
nfs_server: "192.168.1.10"
nfs_path: "/data/nfs"
# network-check 自动安装
network_check_enabled: false
network_check_schedule: "*/5 * * * *"
############################
# role:harbor
############################
# harbor version,完整版本号
HARBOR_VER: "v2.1.3"
HARBOR_DOMAIN: "harbor.easzlab.io.local"
HARBOR_TLS_PORT: 8443
# if set 'false', you need to put certs named harbor.pem and harbor-key.pem in directory 'down'
HARBOR_SELF_SIGNED_CERT: true
# install extra component
HARBOR_WITH_NOTARY: false
HARBOR_WITH_TRIVY: false
HARBOR_WITH_CLAIR: false
HARBOR_WITH_CHARTMUSEUM: true
root@k8s-ha1:/etc/kubeasz/clusters/k8s-cluster1#
5.5更改ansible 01.prepare.yml文件
root@deploy-harbor:/etc/kubeasz# vim playbooks/01.prepare.yml
#hosts 只留下这三个就可以
# [optional] to synchronize system time of nodes with 'chrony'
- hosts:
- kube_master
- kube_node
- etcd
roles:
- { role: os-harden, when: "OS_HARDEN|bool" }
- { role: chrony, when: "groups['chrony']|length > 0" }
# to create CA, kubeconfig, kube-proxy.kubeconfig etc.
- hosts: localhost
roles:
- deploy
# prepare tasks for all nodes
- hosts:
- kube_master
- kube_node
- etcd
roles:
- prepare
6. 部署K8S群
通过ansible脚本初始化环境及部署ks高可用集群
- 环境初始化
#帮助
root@k8s-ha1:/etc/kubeasz# ./ezctl --help
Usage: ezctl COMMAND [args]
-------------------------------------------------------------------------------------
Cluster setups:
list to list all of the managed clusters
checkout <cluster> to switch default kubeconfig of the cluster
new <cluster> to start a new k8s deploy with name 'cluster'
setup <cluster> <step> to setup a cluster, also supporting a step-by-step way
start <cluster> to start all of the k8s services stopped by 'ezctl stop'
stop <cluster> to stop all of the k8s services temporarily
upgrade <cluster> to upgrade the k8s cluster
destroy <cluster> to destroy the k8s cluster
backup <cluster> to backup the cluster state (etcd snapshot)
restore <cluster> to restore the cluster state from backups
start-aio to quickly setup an all-in-one cluster with 'default' settings
Cluster ops:
add-etcd <cluster> <ip> to add a etcd-node to the etcd cluster
add-master <cluster> <ip> to add a master node to the k8s cluster
add-node <cluster> <ip> to add a work node to the k8s cluster
del-etcd <cluster> <ip> to delete a etcd-node from the etcd cluster
del-master <cluster> <ip> to delete a master node from the k8s cluster
del-node <cluster> <ip> to delete a work node from the k8s cluster
Extra operation:
kcfg-adm <cluster> <args> to manage client kubeconfig of the k8s cluster
Use "ezctl help <command>" for more information about a given command.
#查看脚本命令内容即可知道第几部都是在干什么
root@deploy-harbor:/etc/kubeasz# vim ezctl
cat <<EOF
available steps:
01 prepare to prepare CA/certs & kubeconfig & other system settings
02 etcd to setup the etcd cluster
03 container-runtime to setup the container runtime(docker or containerd)
04 kube-master to setup the master nodes
05 kube-node to setup the worker nodes
06 network to setup the network plugin
07 cluster-addon to setup other useful plugins
90 all to run 01~07 all at once
10 ex-lb to install external loadbalance for accessing k8s from outside
11 harbor to install a new harbor server or to integrate with an existed one
6.1 创建集群基础设置
root@k8s-ha1:/etc/kubeasz# ./ezctl setup k8s-cluster1 01
- 遇到的问题
如果遇到卡导apt 进程了执行如下
sudo rm /var/lib/apt/lists/lock
sudo rm /var/cache/apt/archives/lock
sudo rm /var/lib/dpkg/lock*
sudo dpkg --configure -a sudo apt update
6.2 部署etcd集群
#所有需要拉取节点的需要配置这个 让containerd可以拉去harbor镜像
#这个是加到了ansible里面了 可以先加到剧本当中 在运行如果没有添加上 自己手动添加
root@k8s-ha1:/etc/kubeasz# vim roles/containerd/templates/config.toml.j2
#这是在配置自己harbor仓库能让其它容器能拉取下来
#大概在147行
[plugins."io.containerd.grpc.v1.cri".registry.mirrors."harbor.nbrhce.com"]
endpoint = ["https://harbor.nbrhce.com"]
[plugins."io.containerd.grpc.v1.cri".registry.configs."harbor.nbrhce.com".tls]
insecure_skip_verify = true
#这个用户是harbor内部用户
[plugins."io.containerd.grpc.v1.cri".registry.configs."harbor.nbrhce.com".auth]
username = "quyi"
password = "Harbor12345"
#如果没有生成就自己手动去加
root@k8s-node1:~# vim /etc/containerd/config.toml
#重启
systemctl daemon-reload
systemctl restart containerd
#测试是否能拉去镜像 这是之前harbor存放的镜像 如果你没有自己上传一个
ln -s /opt/kube/bin/crictl /usr/bin
crictl pull harbor.nbrhce.com/demo/alpine:v1
root@k8s-ha1:/etc/kubeasz# ./ezctl setup k8s-cluster1 02
#执行完测试
#etcd检查
export NODE_IPS="192.168.1.73 192.168.1.74 192.168.1.75"
for ip in ${NODE_IPS}; do ETCDCTL_API=3 /opt/kube/bin/etcdctl --endpoints=https://${ip}:2379 --cacert=/etc/kubernetes/ssl/ca.pem --cert=/etc/kubernetes/ssl/etcd.pem --key=/etc/kubernetes/ssl/etcd-key.pem endpoint health;
done
#执行结果
https://192.168.1.76:2379 is healthy: successfully committed proposal: took = 8.57508ms
https://192.168.1.77:2379 is healthy: successfully committed proposal: took = 10.019689ms
https://192.168.1.78:2379 is healthy: successfully committed proposal: took = 8.723699ms
root@k8s-master1:~# crictl pull harbor.nbrhce.com/baseimages/pause:3.7 Image is up to date for sha256:221177c6082a88ea4f6240ab2450d540955ac6f4d5454f0e15751b653ebda165
6.3 部署运行时
#这个拉取不下来就初始化不会成功easzlab.io.local:5000/easzlab/pause:3.7
root@k8s-ha1:/etc/kubeasz# ./ezctl setup k8s-cluster1 03
6.4 部署master集群
root@k8s-ha1:/etc/kubeasz# ./ezctl setup k8s-cluster1 04
上面都部署成功了之后 在部署机器上查看以下节点
root@k8s-ha1:/etc/kubeasz# kubectl get nodes
NAME STATUS ROLES AGE VERSION
192.168.1.70 Ready,SchedulingDisabled master 102s v1.24.2
192.168.1.71 Ready,SchedulingDisabled master 102s v1.24.2
6.5 部署node节点
root@k8s-ha1:/etc/kubeasz# ./ezctl setup k8s-cluster1 05
root@deploy-harbor:/etc/kubeasz# kubectl get nodes
NAME STATUS ROLES AGE VERSION
192.168.1.70 Ready,SchedulingDisabled master 8m55s v1.24.2
192.168.1.74 Ready node 50s v1.24.2
6.6 部署calico
#需要更改calico镜像
#文件位置
root@k8s-ha1:/etc/kubeasz# vim roles/calico/templates/calico-v3.19.yaml.j2
docker tag easzlab.io.local:5000/calico/node:v3.19.4 harbor.nbrhce.com/baseimages/calico/node:v3.19.4
docker push harbor.nbrhce.com/baseimages/calico/node:v3.19.4
docker tag calico/pod2daemon-flexvol:v3.19.4 harbor.nbrhce.com/baseimages/calico-pod2daemon-flexvol:v3.19.4
docker push harbor.nbrhce.com/baseimages/calico-pod2daemon-flexvol:v3.19.4
docker tag calico/cni:v3.19.4 harbor.nbrhce.com/baseimages/calico/cni:v3.19.4
docker push harbor.nbrhce.com/baseimages/calico/cni:v3.19.4
docker tag calico/kube-controllers:v3.19.4 harbor.nbrhce.com/baseimages/calico/kube-controllers:v3.19.4
docker push harbor.nbrhce.com/baseimages/calico/kube-controllers:v3.19.4
- 替换里面的image
#这个配置文件还可以更改地址段等等 以后用
root@k8s-ha1:/etc/kubeasz# vim roles/calico/templates/calico-v3.19.yaml.j2
- 上面的镜像替换完毕之后在执行06
root@k8s-ha1:/etc/kubeasz# ./ezctl setup k8s-cluster1 06
#成功了
root@deploy-harbor:/etc/kubeasz# kubectl get nodes
NAME STATUS ROLES AGE VERSION
192.168.1.70 Ready,SchedulingDisabled master 21h v1.24.2
192.168.1.74 Ready node 21h v1.24.2
root@deploy-harbor:/etc/kubeasz# kubectl get pods -A
NAMESPACE NAME READY STATUS RESTARTS AGE
kube-system calico-kube-controllers-68555f5f97-fks8p 1/1 Running 1 (99m ago) 21h
kube-system calico-node-gdc8m 1/1 Running 40 (9m8s ago) 21h
kube-system calico-node-h5drr 1/1 Running 36 (78s ago) 21h
7. 运行容器测试
root@deploy-harbor:/etc/kubeasz# kubectl run net-test1 --image=centos:7.9.2009 sleep 10000000 -n myserver
root@deploy-harbor:/etc/kubeasz# kubectl get pods -n myserver -o wide
NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES
net-test1 1/1 Running 0 66s 10.200.36.65 192.168.1.74 <none> <none>
root@deploy-harbor:/etc/kubeasz# kubectl exec -it net-test1 bash -n myserver
kubectl exec [POD] [COMMAND] is DEPRECATED and will be removed in a future version. Use kubectl exec [POD] -- [COMMAND] instead.
#因为没有coredns
[root@net-test1 /]# ping baidu.com
ping: baidu.com: Name or service not known
8. 添加master节点(扩容/缩容)
#先查看一下node节点所生成的配置文件 这个配置文件也是动态生成的
root@k8s-node1:~# vim /etc/kube-lb/conf/kube-lb.conf
user root;
worker_processes 1;
error_log /etc/kube-lb/logs/error.log warn;
events {
worker_connections 3000;
}
stream {
upstream backend {
#api-server地址
server 192.168.1.70:6443 max_fails=2 fail_timeout=3s;
}
server {
listen 127.0.0.1:6443;
proxy_connect_timeout 1s;
proxy_pass backend;
}
}
#添加master节点
#在添加这个master节点的时候在模板文件中要配置harbor认证信息 否则会有报错拉取不下来镜像
root@deploy-harbor:/etc/kubeasz# ./ezctl add-master k8s-cluster1 192.168.1.77
#执行这个添加的操作他会把这个IP地址写在ansible的hosts文件里面
#删除就是del
root@deploy-harbor:/etc/kubeasz# ./ezctl del-master k8s-cluster1 192.168.1.77
9. 添加node节点(扩容/缩容)
root@deploy-harbor:/etc/kubeasz# ./ezctl add-node k8s-cluster1 192.168.1.77
#删除就是del
root@deploy-harbor:/etc/kubeasz# ./ezctl del-node k8s-cluster1 192.168.1.77
10. K8S升级(小版本)
10.1 官方位置
10.2 下载二进制安装包
#deploy服务器下载安装包 目前安装包没有找到
10.3 master升级
#在node节点上注释掉api-server地址 这样不会影响业务 如果有三个api-server注释掉其中一个就可以 每个节点都需要做
root@k8s-node1:~# vim /etc/kube-lb/conf/kube-lb.conf
user root;
worker_processes 1;
error_log /etc/kube-lb/logs/error.log warn;
events {
worker_connections 3000;
}
stream {
upstream backend {
#api-server地址
# server 192.168.1.70:6443 max_fails=2 fail_timeout=3s;
}
server {
listen 127.0.0.1:6443;
proxy_connect_timeout 1s;
proxy_pass backend;
}
}
#然后需要停掉服务
systemctl stop kube-apiserver kube-sontroller-manager kube-scheduler kube-proxy kubelet
#拷贝二进制命令 拷贝到对应的节点上 master 复制到对应/usr/local/bin
kube-apiserver kube-sontroller-manager kube-scheduler kube-proxy kubelet
/usr/local/bin
#验证命令
/usr/local/bin/kube-apiserver --version
#查看是否修改相关的文件比如kube-proxy
#还需要小心升级时候对应的api版本 如果版本不同的话 在重新搭建一套升级试试
vim /var/lib/kube-proxy/kube-proxy-config.yaml
api版本是指
apiVersion: kubeproxy.config.k8s.io/v1alpha1
#启动 master
systemctl start kube-apiserver kube-sontroller-manager kube-scheduler kube-proxy kubelet
#检查
kubectl get nodes
#加载一下 node节点
systemctl reload kube-lb.service
#然后在把另外的两个注释掉 把新升级的开开
以上都是升级master
10.4 node升级
一定要驱除pod然后将服务停止然后替换二进制
#需要先把node上面的业务容器都要驱除走
kubectl drain 192.168.x.x --ignore-daemonsets --force
#检查是否驱除完毕
kubectl get pods -A -o wide
#停掉node服务
systemctl stop kubelet kube-proxy
#在部署节点上把kubelet kube-proxy 拷贝到node节点上
kubelet kube-proxy /usr/local/bin
systemctl reload kube-lb.service
#在把api-server放开
vim /etc/kube-lb/conf/kube-lb.conf
#再把所有二进制命令拷贝到 这样你部署的时候都是1.24.3的命令了
cp ./* /etc/kubeasz/bin/containerd-bin/
10.5 containerd升级runc升级客户端命令升级
#下载地址containerd
wget https://github.com/containerd/containerd/releases/download/v1.6.10/containerd-1.6.10-linux-amd64.tar.gz
#把containerd的新的命令拷贝过去
cp ./* /etc/kubeasz/bin/containerd-bin/
#下载地址 runc
wget https://github.com/opencontainers/runc/releases/download/v1.1.4/runc.amd64
#下载客户端crictl ctr
#拷贝你所有准备的命令 拷贝的命令路径都是你containerd.service 里面写的路径
scp ./* 192.168.*:/usr/local/bin/
拷贝的时候会发现runc是拷贝不了的 因为node有占用
systemctl disable kubelet kube-proxy
systemctl disable containerd.setvice
reboot
然后在拷贝二进制命令
scp ./* 192.168.*:/usr/local/bin/
systemctl enable --now kubelet kube-proxy containerd.setvice
#重启一下node的kubelet
systemctl restart kubelet
#在把master节点上的containerd在升级一下
scp ./* 192.168.*:/usr/local/bin/
systemctl disable kubelet kube-proxy
systemctl disable containerd.setvice
reboot
systemctl enable --now kubelet kube-proxy containerd.setvice