kubeadm安装多master节点k8s高可用集群
百度网盘链接:https://pan.baidu.com/s/15t_TSH5RRpCFXV-93JHpNw?pwd=8od3 提取码:8od3
9 kubeadm安装多master节点k8s高可用集群
9.1 初始化实验环境
K8S集群角色 |
IP地址 |
主机名 |
控制节点 |
192.168.40.180 |
master1 |
控制节点 |
192.168.40.181 |
master2 |
工作节点 |
192.168.40.182 |
node1 |
Vip |
192.168.40.199 |
|
备注:初始化是以master1为例,master2及node1参照master1配置即可。
9.1.1 修改机器ip
# vim /etc/sysconfig/network-scripts/ifcfg-eth0
NAME=eth0
DEVICE=eth0
ONBOOT=yes
TYPE=Ethernet
BOOTPROTO=static
IPADDR=192.168.40.180
NETMASK=255.255.255.0
GATEWAY=192.168.40.253
DEFROUTE=yes
9.1.2 关闭selinux
# sed -i 's/SELINUX=enforcing/SELINUX=disabled/g' /etc/selinux/config
9.1.3 配置主机名称
# hostnamectl set-hostname master1 && bash
9.1.4 配置/etc/hosts文件
# vim /etc/hosts //修改每台机器的hosts文件,增加如下三行:
192.168.40.180 master1
192.168.40.181 master2
192.168.40.182 node1
9.1.5 配置主机间互通
# ssh-keygen #一路回车,不输入密码。
把本地生成的密钥文件和私钥文件拷贝到远程主机。
# ssh-copy-id master1
# ssh-copy-id master2
# ssh-copy-id node1
9.1.6 关闭交换分区swap
注释swap挂载,给swap这行开头加一下注释。
# vim /etc/fstab
#/dev/mapper/centos-swap swap swap defaults 0 0
9.1.7 修改机器内核参数
# modprobe br_netfilter
# echo "modprobe br_netfilter" >> /etc/profile
# cat > /etc/sysctl.d/k8s.conf <<EOF
net.bridge.bridge-nf-call-ip6tables = 1
net.bridge.bridge-nf-call-iptables = 1
net.ipv4.ip_forward = 1
EOF
# sysctl -p /etc/sysctl.d/k8s.conf
重启后模块失效,下面是开机自动加载模块的脚本,在/etc/新建rc.sysinit 文件
# vim /etc/rc.sysinit
#!/bin/bash
for file in /etc/sysconfig/modules/*.modules; do
[ -x $file ] && $file
done
在/etc/sysconfig/modules/目录下新建文件
# vim
/etc/sysconfig/modules/br_netfilter.modules
modprobe br_netfilter
增加权限
# chmod 755 /etc/sysconfig/modules/br_netfilter.modules
9.1.8 关闭防火墙
# systemctl stop firewalld ; systemctl disable firewalld
9.1.9 配置yum源
#备份基础repo源
# mkdir /root/repo.bak
# cd /etc/yum.repos.d/
# mv * /root/repo.bak/
#下载阿里云的repo源
把CentOS-Base.repo和epel.repo文件上传到master1主机的/etc/yum.repos.d/目录下
配置国内阿里云docker的repo源
# yum install yum-utils -y
# yum-config-manager --add-repo http://mirrors.aliyun.com/docker-ce/linux/centos/docker-ce.repo
9.1.10 配置时间同步
#安装ntpdate命令
# yum install ntpdate -y
#跟网络时间做同步
# ntpdate cn.pool.ntp.org
#把时间同步做成计划任务
# crontab -e
* */1 * * * /usr/sbin/ntpdate cn.pool.ntp.org
#重启crond服务
#service crond restart
9.1.11 安装基础软件包
# yum install -y yum-utils openssh-clients device-mapper-persistent-data lvm2 wget net-tools nfs-utils lrzsz gcc gcc-c++ make cmake libxml2-devel openssl-devel curl curl-devel unzip sudo ntp libaio-devel vim ncurses-devel autoconf automake zlib-devel python-devel epel-release openssh-server socat ipvsadm conntrack ntpdate telnet ipvsadm
9.2 部署docker服务
备注:部署docker服务是以master1为例,master2及node1参照master1配置即可。
9.2.1 安装docker-ce
# yum -y install docker-ce-20.10.6 docker-ce-cli-20.10.6 containerd.io
# systemctl start docker && systemctl enable docker && systemctl status docker
9.2.2 配置docker镜像加速器和驱动
# vim /etc/docker/daemon.json
{
"registry-mirrors":["https://registry.docker-cn.com","https://docker.mirrors.ustc.edu.cn","https://dockerhub.azk8s.cn","http://hub-mirror.c.163.com","https://glrgy8jw.mirror.aliyuncs.com"],
"exec-opts": ["native.cgroupdriver=systemd"]
}
备注:修改docker文件驱动为systemd,默认为cgroupfs,kubelet默认使用systemd,两者必须一致。
# systemctl daemon-reload
# systemctl restart docker
9.3 部署keepalived+nginx
9.3.1 安装nginx主备
# yum -y install nginx
9.3.2 配置nginx
备注:nginx主备配置相同
# vim /etc/nginx/nginx.conf
user nginx;
worker_processes auto;
error_log /var/log/nginx/error.log;
pid /run/nginx.pid;
include /usr/share/nginx/modules/*.conf;
events {
worker_connections 1024;
}
# 四层负载均衡,为两台Master apiserver组件提供负载均衡
stream {
log_format main '$remote_addr $upstream_addr - [$time_local] $status $upstream_bytes_sent';
access_log /var/log/nginx/k8s-access.log main;
upstream k8s-apiserver {
server 192.168.40.180:6443 weight=5 max_fails=3 fail_timeout=30s;
server 192.168.40.181:6443 weight=5 max_fails=3 fail_timeout=30s;
}
server {
listen 16443; #由于nginx与master节点复用,这个监听端口不能是6443,否则会冲突。
proxy_pass k8s-apiserver;
}
}
http {
log_format main '$remote_addr - $remote_user [$time_local] "$request" '
'$status $body_bytes_sent "$http_referer" '
'"$http_user_agent" "$http_x_forwarded_for"';
access_log /var/log/nginx/access.log main;
sendfile on;
tcp_nopush on;
tcp_nodelay on;
keepalive_timeout 65;
types_hash_max_size 2048;
include /etc/nginx/mime.types;
default_type application/octet-stream;
server {
listen 80 default_server;
server_name _;
location / {
}
}
}
9.3.3 安装keepalived主备
# yum -y install keepalived
9.3.4 配置keepalived主备
配置主keepalived
[root@master1 ~]# vim /etc/keepalived/keepalived.conf
global_defs {
notification_email {
acassen@firewall.loc
failover@firewall.loc
sysadmin@firewall.loc
}
notification_email_from Alexandre.Cassen@firewall.loc
smtp_server 127.0.0.1
smtp_connect_timeout 30
router_id NGINX_MASTER
}
vrrp_script check_nginx {
script "/etc/keepalived/check_nginx.sh"
}
vrrp_instance VI_1 {
state MASTER
interface eth0 #修改为实际网卡名
virtual_router_id 51 #VRRP 路由 ID实例,每个实例是唯一的
priority 100 #优先级,备服务器设置 90
advert_int 1 #指定VRRP 心跳包通告间隔时间,默认1秒
authentication {
auth_type PASS
auth_pass 1111
}
# 虚拟IP
virtual_ipaddress {
192.168.40.199/24
}
track_script {
check_nginx
}
}
#vrrp_script:指定检查nginx工作状态脚本(根据nginx状态判断是否故障转移)
#virtual_ipaddress:虚拟IP(VIP)
配置备keepalived
[root@master2 ~]# vim /etc/keepalived/keepalived.conf
global_defs {
notification_email {
acassen@firewall.loc
failover@firewall.loc
sysadmin@firewall.loc
}
notification_email_from Alexandre.Cassen@firewall.loc
smtp_server 127.0.0.1
smtp_connect_timeout 30
router_id NGINX_BACKUP
}
vrrp_script check_nginx {
script "/etc/keepalived/check_nginx.sh"
}
vrrp_instance VI_1 {
state BACKUP
interface eth0
virtual_router_id 51 # VRRP 路由 ID实例,每个实例是唯一的
priority 90
advert_int 1
authentication {
auth_type PASS
auth_pass 1111
}
virtual_ipaddress {
192.168.40.199/24
}
track_script {
check_nginx
}
}
9.3.5 配置keepalived监控脚本
备注:主备节点脚本相同
# vim /etc/keepalived/check_nginx.sh
#1 判断Nginx是否存活
#2 如果不存活则尝试启动Nginx
#3 等待2秒后再次获取一次Nginx状态
#4 再次进行判断,如Nginx还不存活则停止Keepalived,让地址进行漂移
#!/bin/bash
counter=$(ps -ef |grep nginx | grep sbin | egrep -cv "grep|$$" )
if [ $counter -eq 0 ]; then
service nginx start
sleep 2
counter=$(ps -ef |grep nginx | grep sbin | egrep -cv "grep|$$" )
if [ $counter -eq 0 ]; then
service keepalived stop
fi
fi
# chmod +x /etc/keepalived/check_nginx.sh
#注:keepalived根据脚本返回状态码(0为工作正常,非0不正常)判断是否故障转移。
9.3.6 启动服务
备注:主备节点都执行
# systemctl daemon-reload
# yum -y install nginx-mod-stream
# systemctl start nginx
# systemctl start keepalived
# systemctl enable nginx keepalived
# systemctl status keepalived
9.4 部署k8s集群
9.4.1 配置安装k8s组件需要的阿里云的repo源
# vim /etc/yum.repos.d/kubernetes.repo
[kubernetes]
name=Kubernetes
baseurl=https://mirrors.aliyun.com/kubernetes/yum/repos/kubernetes-el7-x86_64/
enabled=1
gpgcheck=0
9.4.2 安装k8s初始化工具
# yum install -y kubelet-1.20.6 kubeadm-1.20.6 kubectl-1.20.6
# systemctl enable kubelet
注:每个软件包的作用
Kubeadm:kubeadm是一个工具,用来初始化k8s集群的
kubelet:安装在集群所有节点上,用于启动Pod的
kubectl:通过kubectl可以部署和管理应用,查看各种资源,创建 删除和更新各种组件
9.4.3 kubeadm初始化k8s集群
#把初始化k8s集群需要的离线镜像包上传到master1 master2 node1机器上,手动解压。(这一步可以不做)
# docker load -i k8simage-1-20-6.tar.gz
9.4.4 创建kubeadm-config.yaml文件
# kubeadm config print init-defaults > kubeadm.yaml
根据我们自己的需求修改配置,比如修改 imageRepository 的值,kube-proxy 的模式为 ipvs,初始化节点的时候需要指定cgroupDriver为systemd
# vim kubeadm.yaml
……
apiVersion: kubeadm.k8s.io/v1beta2
bootstrapTokens:
- groups:
- system:bootstrappers:kubeadm:default-node-token
token: abcdef.0123456789abcdef
ttl: 24h0m0s
usages:
- signing
- authentication
kind: InitConfiguration
localAPIEndpoint:
advertiseAddress: 192.168.40.180 #控制节点的ip
bindPort: 6443
nodeRegistration:
criSocket: /var/run/dockershim.sock #这一行可以注释掉
name: master1 #控制节点主机名
taints:
- effect: NoSchedule
key: node-role.kubernetes.io/master
---
apiServer:
timeoutForControlPlane: 4m0s
apiVersion: kubeadm.k8s.io/v1beta2
certificatesDir: /etc/kubernetes/pki
clusterName: kubernetes
controllerManager: {}
dns:
type: CoreDNS
etcd:
local:
dataDir: /var/lib/etcd
imageRepository: registry.aliyuncs.com/google_containers #指定从阿里云仓库拉取镜像
kind: ClusterConfiguration
kubernetesVersion: v1.20.6
networking:
dnsDomain: cluster.local
serviceSubnet: 10.96.0.0/12
podSubnet: 10.244.0.0/16 #指定pod网段,需要新增加这个
scheduler: {}
#追加如下几行
---
apiVersion: kubeproxy.config.k8s.io/v1alpha1
kind: KubeProxyConfiguration
mode: ipvs
---
apiVersion: kubelet.config.k8s.io/v1beta1
kind: KubeletConfiguration
cgroupDriver: systemd
9.4.5 基于kubeadm.yaml文件初始化k8s
[root@master1~]# kubeadm init --config=kubeadm.yaml --ignore-preflight-errors=SystemVerification
9.4.6 配置kubectl的配置文件
#配置kubectl的配置文件config,相当于对kubectl进行授权,这样kubectl命令可以使用这个证书对k8s集群进行管理
[root@master1 ~]# mkdir -p $HOME/.kube
[root@master1 ~]# cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
[root@master1 ~]# chown $(id -u):$(id -g) $HOME/.kube/config
[root@master1 ~]# kubectl get nodes
NAME STATUS ROLES AGE VERSION
master1 NotReady control-plane,master 60s v1.20.6
此时集群状态还是NotReady状态,因为没有安装网络插件。
9.4.7 扩容k8s集群-添加master节点
把master1节点的证书拷贝到master2上,在master2创建证书存放目录:
[root@master2 ~]# mkdir -p /etc/kubernetes/pki/etcd && mkdir -p ~/.kube/
把omaster1节点的证书拷贝到master2上:
[root@master1 ~]# cd /etc/kubernetes/pki/ && scp ca.crt ca.key sa.key sa.pub front-proxy-ca.crt front-proxy-ca.key master2:/etc/kubernetes/pki/
[root@master1 ~]# cd /etc/kubernetes/pki/etcd/ && scp ca.crt ca.key master2:/etc/kubernetes/pki/etcd/
证书拷贝之后在master2上执行如下命令,命令是在master1上生成,命令时效24h,大家复制自己生成的命令,再加上对应参数,这样就可以把master2加入到集群,成为控制节点。
[root@master1 ~]# kubeadm token create --print-join-command
显示如下:
kubeadm join 192.168.40.199:16443 --token zwzcks.u4jd8lj56wpckcwv \
--discovery-token-ca-cert-hash sha256:1ba1b274090feecfef58eddc2a6f45590299c1d0624618f1f429b18a064cb728
在master2上执行:
[root@master2 ~]#kubeadm join 192.168.40.199:16443 --token zwzcks.u4jd8lj56wpckcwv \
--discovery-token-ca-cert-hash sha256:1ba1b274090feecfef58eddc2a6f45590299c1d0624618f1f429b18a064cb728 \
--control-plane --ignore-preflight-errors=SystemVerification
在master1上查看集群状况:
[root@master1 ~]# kubectl get nodes
NAME STATUS ROLES AGE VERSION
master1 NotReady control-plane,master 49m v1.20.6
master2 NotReady <none> 39s v1.20.6
9.4.8 扩容k8s集群-添加node节点
在master1上查看加入节点的命令:
[root@master1 ~]# kubeadm token create --print-join-command
显示如下:
kubeadm join 192.168.40.199:16443 --token y23a82.hurmcpzedblv34q8 --discovery-token-ca-cert-hash sha256:1ba1b274090feecfef58eddc2a6f45590299c1d0624618f1f429b18a064cb728
把node1加入k8s集群:
[root@node1~]# kubeadm token create --print-join-command
kubeadm join 192.168.40.199:16443 --token y23a82.hurmcpzedblv34q8 --discovery-token-ca-cert-hash sha256:1ba1b274090feecfef58eddc2a6f45590299c1d0624618f1f429b18a064cb728 --ignore-preflight-errors=SystemVerification
在master1上查看集群节点状况:
[root@master1 ~]# kubectl get nodes
NAME STATUS ROLES AGE VERSION
master1 NotReady control-plane,master 53m v1.20.6
master2 NotReady control-plane,master 5m13s v1.20.6
node1 NotReady <none> 59s v1.20.6
注意:上面状态都是NotReady状态,说明没有安装网络插件
可以看到node1的ROLES角色为空,<none>就表示这个节点是工作节点。
可以把node1的ROLES变成work,按照如下方法:
[root@master1 ~]# kubectl label node node1 node-role.kubernetes.io/worker=worker
[root@master1 ~]# kubectl get pods -n kube-system
NAME READY STATUS RESTARTS AGE
coredns-7f89b7bc75-lh28j 0/1 Pending 0 18h
coredns-7f89b7bc75-p7nhj 0/1 Pending 0 18h
etcd-master1 1/1 Running 0 18h
etcd-master2 1/1 Running 0 15m
kube-apiserver-master1 1/1 Running 0 18h
kube-apiserver-master2 1/1 Running 0 15m
kube-controller-manager-master1 1/1 Running 1 18h
kube-controller-manager-master2 1/1 Running 0 15m
kube-proxy-n26mf 1/1 Running 0 4m33s
kube-proxy-sddbv 1/1 Running 0 18h
kube-proxy-sgqm2 1/1 Running 0 15m
kube-scheduler-master1 1/1 Running 1 18h
kube-scheduler-master2 1/1 Running 0 15m
9.4.9 安装kubernetes网络组件-Calico
上传calico.yaml到master1上,使用yaml文件安装calico 网络插件。
[root@master1 ~]# kubectl apply -f calico.yaml
注:在线下载配置文件地址是: https://docs.projectcalico.org/manifests/calico.yaml
[root@master1 ~]# kubectl get pod -n kube-system
再次查看集群状态。
[root@master1 ~]# kubectl get nodes
NAME STATUS ROLES AGE VERSION
master1 Ready control-plane,master 58m v1.20.6
node1 Ready <none> 5m46s v1.20.6
node2 Ready <none> 5m46s v1.20.6
[root@master1 ~]# kubectl get pods -n kube-system
9.4.10测试在k8s创建pod是否可以正常访问网络
#把busybox-1-28.tar.gz上传到master1节点,手动解压。
[root@master1 ~]# docker load -i busybox-1-28.tar.gz
[root@master1 ~]# kubectl run busybox --image busybox:1.28 --image-pull-policy=IfNotPresent --restart=Never --rm -it busybox -- sh
/ # ping www.baidu.com
PING www.baidu.com (39.156.66.18): 56 data bytes
64 bytes from 39.156.66.18: seq=0 ttl=127 time=39.3 ms
通过上面可以看到能访问网络,说明calico网络插件已经被正常安装了。
9.4.11 测试coredns是否正常
[root@master1 ~]# kubectl run busybox --image busybox:1.28 --restart=Never --rm -it busybox -- sh
/ # nslookup kubernetes.default.svc.cluster.local
Server: 10.96.0.10
Address 1: 10.96.0.10 kube-dns.kube-system.svc.cluster.local
Name: kubernetes.default.svc.cluster.local
Address 1: 10.96.0.1 kubernetes.default.svc.cluster.local
10.96.0.10 就是我们coreDNS的clusterIP,说明coreDNS配置好了。解析内部Service的名称,是通过coreDNS去解析的。
#注意:busybox要用指定的1.28版本,不能用最新版本,最新版本,nslookup会解析不到dns和ip
9.4.12 kubeadm初始化k8s证书过期解决方案
查看证书有效时间:
# openssl x509 -in /etc/kubernetes/pki/ca.crt -noout -text |grep Not
# openssl x509 -in /etc/kubernetes/pki/apiserver.crt -noout -text |grep Not
延长证书过期时间
1.把update-kubeadm-cert.sh文件上传到master1节点
2.给update-kubeadm-cert.sh证书授权可执行权限
[root@master1~]# chmod +x update-kubeadm-cert.sh
3.执行下面命令,修改证书过期时间,把时间延长到10年
[root@master1~]# ./update-kubeadm-cert.sh all
4.在master1节点查询pod是否正常,能查询出数据说明证书签发完成
[root@master1~]# kubectl get pods -n kube-system
5.再次查看证书有效期,可以看到会延长到10年
# openssl x509 -in /etc/kubernetes/pki/ca.crt -noout -text |grep Not
# openssl x509 -in /etc/kubernetes/pki/apiserver.crt -noout -text |grep Not
9.4.13 配置其他节点可执行kubectl命令
备注:以master2节点为例
在需要执行kubectl命令的节点上创建.kube目录
[root@master2~]# mkdir /root/.kube
从master1上拷贝config文件到master2
[root@master1~]# scp /root/.kube/config master2:/root/.kube/