k8s+kubeovn高可用环境搭建
1.准备环境
角色
|
IP
|
---|---|
master1,node1 | 10.167.47.12 |
master2,node2 | 10.167.47.24 |
master3,node3 | 10.167.47.25 |
VIP(虚拟ip) | 10.167.47.86 |
# 在master添加hosts cat >> /etc/hosts << EOF 10.167.47.12 master1 10.167.47.24 master2 10.167.47.25 master2 EOF # 关闭防火墙 systemctl stop firewalld && systemctl disable firewalld # 关闭selinux sed -i 's/enforcing/disabled/' /etc/selinux/config # 永久 setenforce 0 # 临时 # 关闭swap swapoff -a # 临时 sed -ri 's/.*swap.*/#&/' /etc/fstab # 永久 # 根据规划设置主机名 hostnamectl set - hostname < hostname > sysctl --system # 生效 # 时间同步 cp /etc/yum .repos.d /CentOS-Base .repo /etc/yum .repos.d /CentOS-Base .repo.backup curl -o /etc/yum .repos.d /CentOS-Base .repo http: //mirrors .aliyun.com /repo/Centos-7 .repo yum clean all && yum makecache yum install ntpdate -y && timedatectl set -timezone Asia /Shanghai && ntpdate time2.aliyun.com # 加入到crontab crontab -e 0 5 * * * /usr/sbin/ntpdate time2.aliyun.com # 加入到开机自动同步,/etc/rc.local vi /etc/rc . local ntpdate time2.aliyun.com #使用ulimit -a 可以查看当前系统的所有限制值,使用ulimit -n 可以查看当前的最大打开文件数。 #新装的linux默认只有1024,当作负载较大的服务器时,很容易遇到error: too many open files。因此,需要将其改大。 #使用 ulimit -n 65535 可即时修改,但重启后就无效了。(注ulimit -SHn 65535 等效 ulimit -n 65535,-S指soft,-H指hard) #临时设置,但重启后就无效了 ulimit -SHn 65535 # 资源配置,永久设置 vi /etc/security/limits .conf # 末尾添加如下内容 * soft nofile 65536 * hard nofile 65536 * soft nproc 65536 * hard nproc 65536 * soft memlock unlimited * hard memlock unlimited #优化内核参数 cat <<EOF > /etc/sysctl .d /k8s .conf net.ipv4.ip_forward=1 net.bridge.bridge-nf-call-iptables=1 net.bridge.bridge-nf-call-ip6tables=1 fs.may_detach_mounts=1 vm.overcommit_memory=1 vm.panic_on_oom=0 fs.inotify.max_user_watches=89100 fs. file -max=52706963 fs.nr_open=52706963 net.netfilter.nf_conntrack_max=2310720 net.ipv4.tcp_keepalive_time=600 net.ipv4.tcp_keepalive_probes=3 net.ipv4.tcp_keepalive_intvl=15 net.ipv4.tcp_max_tw_buckets=36000 net.ipv4.tcp_tw_reuse=1 net.ipv4.tcp_max_orphans=327680 net.ipv4.tcp_orphan_retries=3 net.ipv4.tcp_syncookies=1 net.ipv4.tcp_max_syn_backlog=16384 net.ipv4.ip_conntrack_max=65536 net.ipv4.tcp_max_syn_backlog=16384 net.ipv4.tcp_timestamps=0 net.core.somaxconn=16384 EOF sysctl --system # 生效 #重启后可以查看是否生效 lsmod | grep --color=auto -e ip_vs -e nf_conntrack #重启 reboot |
2.所有master节点部署keepalived
1.安装相关包和keepalived
#安装开启ipvs #安装 yum install ipvsadm ipset sysstat conntrack libseccomp -y #临时生效 modprobe -- ip_vs modprobe -- ip_vs_rr modprobe -- ip_vs_wrr modprobe -- ip_vs_sh modprobe -- nf_conntrack_ipv4 #永久生效 cat <<EOF > /etc/modules-load .d /ipvs .conf ip_vs ip_vs_lc ip_vs_wlc ip_vs_rr ip_vs_wrr ip_vs_lblc ip_vs_lblcr ip_vs_dh ip_vs_sh ip_vs_nq ip_vs_sed ip_vs_ftp ip_vs_sh nf_conntrack ip_tables ip_set xt_set ipt_set ipt_rpfilter ipt_REJECT ipip EOF #安装haproxy yum install -y haproxy cat > /etc/haproxy/haproxy .cfg << EOF global log 127.0.0.1 local0 chroot /var/lib/haproxy pidfile /var/run/haproxy .pid maxconn 4000 user haproxy group haproxy daemon stats socket /var/lib/haproxy/stats defaults mode tcp log global option tcplog option dontlognull option redispatch retries 3 timeout queue 1m timeout connect 10s timeout client 1m timeout server 1m timeout check 10s maxconn 3000 # 起名 listen k8s_master # 虚拟IP的端口 bind 0.0.0.0:16443 mode tcp option tcplog balance roundrobin # 高可用的负载均衡的master server master1 10.167.47.12:6443 check inter 10000 fall 2 rise 2 weight 1 server master2 10.167.47.24:6443 check inter 10000 fall 2 rise 2 weight 1 server master3 10.167.47.25:6443 check inter 10000 fall 2 rise 2 weight 1 EOF # 设置开机启动 $ systemctl enable haproxy # 开启haproxy $ systemctl start haproxy # 查看启动状态 $ systemctl status haproxy #创建检测脚本 cat > /etc/keepalived/check_haproxy .sh << EOF #!/bin/bash if [ ` ps -C haproxy --no-header | wc -l` == 0 ]; then systemctl start haproxy sleep 3 if [ ` ps -C haproxy --no-header | wc -l` == 0 ]; then systemctl stop keepalived fi fi EOF chmod +x /etc/keepalived/check_haproxy .sh #安装keepalived yum install -y conntrack-tools libseccomp libtool-ltdl && yum install -y keepalived #master1节点配置 cat > /etc/keepalived/keepalived .conf << EOF global_defs { router_id master1 } vrrp_script check_haproxy { script "/etc/keepalived/check_haproxy.sh" interval 3000 } vrrp_instance VI_1 { state MASTER interface eth0 virtual_router_id 80 priority 100 advert_int 1 authentication { auth_type PASS auth_pass 111111 } virtual_ipaddress { 10.167.47.86 } track_script { check_haproxy } } EOF #master2节点配置 cat > /etc/keepalived/keepalived .conf << EOF global_defs { router_id master2 } vrrp_script check_haproxy { script "/etc/keepalived/check_haproxy.sh" interval 3000 } #修改网卡名 vrrp_instance VI_1 { state BACKUP interface eth0 virtual_router_id 80 priority 90 advert_int 1 authentication { auth_type PASS auth_pass 111111 } virtual_ipaddress { 10.167.47.86 } track_script { } } EOF cat > /etc/keepalived/keepalived .conf << EOF global_defs { router_id master3 } vrrp_script check_haproxy { script "/etc/keepalived/check_haproxy.sh" interval 3000 } vrrp_instance VI_1 { state BACKUP interface eth0 virtual_router_id 80 priority 80 advert_int 1 authentication { auth_type PASS auth_pass 111111 } virtual_ipaddress { 10.167.47.86 } track_script { } } EOF # 启动keepalived $ systemctl start keepalived.service 设置开机启动 $ systemctl enable keepalived.service # 查看启动状态 $ systemctl status keepalived.service ip a s eth0 |
3.安装Docker
#安装docker的yum源 yum install -y yum-utils device-mapper-persistent-data lvm2 yum-config-manager --add-repo http: //mirrors.aliyun.com/docker-ce/linux/centos/docker-ce.repo #或者 https: //files.cnblogs.com/files/chuanghongmeng/docker-ce.zip?t=1669080259 #安装 yum install docker-ce- 20.10 . 3 -y mkdir -p /data/docker mkdir -p /etc/docker/ #温馨提示:由于新版kubelet建议使用systemd,所以可以把docker的CgroupDriver改成systemd #如果/etc/docker 目录不存在,启动docker会自动创建。 cat > /etc/docker/daemon.json <<EOF { "exec-opts" : [ "native.cgroupdriver=systemd" ], } EOF #温馨提示:根据服务器的情况,选择docker的数据存储路径,例如:/data vi /usr/lib/systemd/system/docker.service ExecStart=/usr/bin/dockerd --graph=/data/docker #重载配置文件 systemctl daemon-reload systemctl restart docker systemctl enable docker.service |
4.安装k8s组件(all node)
#跟换k8s的yum源 cat <<EOF>> /etc/yum.repos.d/kubernetes.repo [kubernetes] name=Kubernetes baseurl=https: //mirrors.aliyun.com/kubernetes/yum/repos/kubernetes-el7-x86_64/ enabled= 1 gpgcheck= 0 repo_gpgcheck= 0 gpgkey=https: //mirrors.aliyun.com/kubernetes/yum/doc/yum-key.gpg https://mirrors.aliyun.com/kubernetes/yum/doc/rpm-package-key.gpg EOF #更换阿里源 sed -i -e '/mirrors.cloud.aliyuncs.com/d' -e '/mirrors.aliyuncs.com/d' /etc/yum.repos.d/CentOS-Base.repo #查看可以安装的版本 yum list kubeadm.x86_64 --showduplicates | sort -r #卸载旧版本 yum remove -y kubelet kubeadm kubectl #安装 yum install kubeadm kubelet kubectl -y #开机启动 systemctl enable kubelet && systemctl start kubelet |
5.k8s初始化配置 (master1)
mkdir /usr/local/kubernetes/manifests -p cd /usr/local/kubernetes/manifests/ #cri 在 disabled_plugins 列表中,将它去除,然后保存文件并重新加载 所有节点执行,否则无法启动kubelet rm /etc/containerd/config.toml containerd config default > /etc/containerd/config.toml #建议修改disabled_plugins 里面的cri删掉 crictl config runtime-endpoint /run/containerd/containerd.sock vi /etc/crictl.yaml # 编辑/etc/crictl.yaml文件, 修改, 主要是新版本增加的image-endpoint runtime-endpoint: "unix:///run/containerd/containerd.sock" image-endpoint: "unix:///run/containerd/containerd.sock" #与上边runtime-endpoint一致即可 timeout: 10 debug: false pull-image-on-create: false disable-pull-on-run: false systemctl restart containerd #初始化集群 kubeadm reset #kubeadm init --image-repository registry.aliyuncs.com/google_containers --kubernetes-version v1. 28.2 --service-cidr= 10.11 . 0.0 / 16 --pod-network-cidr= 10.10 . 0.0 / 16 #生成初始化配置文件 vi kubeadm-config.yaml apiVersion: kubeadm.k8s.io/v1beta3 bootstrapTokens: - groups: - system:bootstrappers:kubeadm: default -node-token token: abcdef.0123456789abcdef ttl: 24h0m0s usages: - signing - authentication kind: InitConfiguration localAPIEndpoint: advertiseAddress: 10.167 . 47.12 bindPort: 6443 nodeRegistration: criSocket: /run/containerd/containerd.sock #自己的主机名 name: master1 --- apiServer: #添加高可用配置 extraArgs: authorization-mode: "Node,RBAC" #填写所有kube-apiserver节点的hostname、IP、VIP certSANs: - master1 - master2 - master3 - 10.167 . 47.12 - 10.167 . 47.24 - 10.167 . 47.25 - 10.167 . 47.86 timeoutForControlPlane: 4m0s apiVersion: kubeadm.k8s.io/v1beta3 certificatesDir: /etc/kubernetes/pki clusterName: kubernetes controllerManager: {} dns: type: CoreDNS etcd: local: dataDir: /var/lib/etcd #跟换镜像源 imageRepository: registry.aliyuncs.com/google_containers kind: ClusterConfiguration kubernetesVersion: 1.28 . 2 #虚拟IP和端口 controlPlaneEndpoint: "10.167.47.86:16443" networking: dnsDomain: cluster.local podSubnet: 10.10 . 0.0 / 16 serviceSubnet: 10.11 . 0.0 / 16 scheduler: {} --- apiVersion: kubeproxy.config.k8s.io/v1alpha1 kind: KubeProxyConfiguration mode: ipvs --- apiVersion: kubelet.config.k8s.io/v1beta1 kind: KubeletConfiguration cgroupDriver: systemd #初始化集群 #所有节点执行,否则会因为拉不下镜像错误 ctr -n k8s.io images pull -k registry.aliyuncs.com/google_containers/pause: 3.6 ctr -n k8s.io images tag registry.aliyuncs.com/google_containers/pause: 3.6 registry.k8s.io/pause: 3.6 kubeadm init --config=kubeadm-config.yaml #按照提示配置环境变量,使用kubectl工具: mkdir -p $HOME/.kube cp -i /etc/kubernetes/admin.conf $HOME/.kube/configco chown $(id -u):$(id -g) $HOME/.kube/config kubectl get nodes kubectl get pods -n kube-system #其他master节点加入 #先拷贝证书到其他master节点,不然加入报错 master2和master3 mkdir -p /etc/kubernetes/pki mkdir -p /etc/kubernetes/pki/etcd/ scp -rp /etc/kubernetes/pki/ca.* master2:/etc/kubernetes/pki scp -rp /etc/kubernetes/pki/sa.* master2:/etc/kubernetes/pki scp -rp /etc/kubernetes/pki/front-proxy-ca.* master2:/etc/kubernetes/pki scp -rp /etc/kubernetes/pki/etcd/ca.* master2:/etc/kubernetes/pki/etcd scp -rp /etc/kubernetes/admin.conf master2:/etc/kubernetes/ scp -rp /etc/kubernetes/pki/ca.* master3:/etc/kubernetes/pki scp -rp /etc/kubernetes/pki/sa.* master3:/etc/kubernetes/pki scp -rp /etc/kubernetes/pki/front-proxy-ca.* master3:/etc/kubernetes/pki scp -rp /etc/kubernetes/pki/etcd/ca.* master3:/etc/kubernetes/pki/etcd scp -rp /etc/kubernetes/admin.conf master3:/etc/kubernetes/ |
6.安装kube-ovn(master1安装)
#需要首先设置container镜像加速地址否则无法下载ovn镜像 所有master节点执行 mkdir /etc/containerd/certs .d # 我们是给docker来配置镜像加速的,所以我们再创建一个docker.io的目录 #修改config.toml vi /etc/containerd/config .toml # 追加内容 [plugins. "io.containerd.grpc.v1.cri" .registry] config_path = "/etc/containerd/certs.d" mkdir -p certs.d && cd certs.d/ # docker hub镜像加速 mkdir -p /etc/containerd/certs .d /docker .io cat > /etc/containerd/certs .d /docker .io /hosts .toml << EOF server = "https://docker.io" capabilities = [ "pull" , "resolve" ] skip_verify = true capabilities = [ "pull" , "resolve" ] skip_verify = true capabilities = [ "pull" , "resolve" ] skip_verify = true capabilities = [ "pull" , "resolve" ] skip_verify = true capabilities = [ "pull" , "resolve" ] skip_verify = true EOF # registry.k8s.io镜像加速 mkdir -p /etc/containerd/certs .d /registry .k8s.io tee /etc/containerd/certs .d /registry .k8s.io /hosts .toml << 'EOF' server = "https://registry.k8s.io" capabilities = [ "pull" , "resolve" , "push" ] skip_verify = true EOF # docker.elastic.co镜像加速 mkdir -p /etc/containerd/certs .d /docker .elastic.co tee /etc/containerd/certs .d /docker .elastic.co /hosts .toml << 'EOF' server = "https://docker.elastic.co" capabilities = [ "pull" , "resolve" , "push" ] skip_verify = true EOF # gcr.io镜像加速 mkdir -p /etc/containerd/certs .d /gcr .io tee /etc/containerd/certs .d /gcr .io /hosts .toml << 'EOF' server = "https://gcr.io" capabilities = [ "pull" , "resolve" , "push" ] skip_verify = true EOF # ghcr.io镜像加速 mkdir -p /etc/containerd/certs .d /ghcr .io tee /etc/containerd/certs .d /ghcr .io /hosts .toml << 'EOF' server = "https://ghcr.io" capabilities = [ "pull" , "resolve" , "push" ] skip_verify = true EOF # k8s.gcr.io镜像加速 mkdir -p /etc/containerd/certs .d /k8s .gcr.io tee /etc/containerd/certs .d /k8s .gcr.io /hosts .toml << 'EOF' server = "https://k8s.gcr.io" capabilities = [ "pull" , "resolve" , "push" ] skip_verify = true EOF # mcr.m.daocloud.io镜像加速 mkdir -p /etc/containerd/certs .d /mcr .microsoft.com tee /etc/containerd/certs .d /mcr .microsoft.com /hosts .toml << 'EOF' server = "https://mcr.microsoft.com" capabilities = [ "pull" , "resolve" , "push" ] skip_verify = true EOF # nvcr.io镜像加速 mkdir -p /etc/containerd/certs .d /nvcr .io tee /etc/containerd/certs .d /nvcr .io /hosts .toml << 'EOF' server = "https://nvcr.io" capabilities = [ "pull" , "resolve" , "push" ] skip_verify = true EOF # quay.io镜像加速 mkdir -p /etc/containerd/certs .d /quay .io tee /etc/containerd/certs .d /quay .io /hosts .toml << 'EOF' server = "https://quay.io" capabilities = [ "pull" , "resolve" , "push" ] skip_verify = true EOF # registry.jujucharms.com镜像加速 mkdir -p /etc/containerd/certs .d /registry .jujucharms.com tee /etc/containerd/certs .d /registry .jujucharms.com /hosts .toml << 'EOF' server = "https://registry.jujucharms.com" capabilities = [ "pull" , "resolve" , "push" ] skip_verify = true EOF # rocks.canonical.com镜像加速 mkdir -p /etc/containerd/certs .d /rocks .canonical.com tee /etc/containerd/certs .d /rocks .canonical.com /hosts .toml << 'EOF' server = "https://rocks.canonical.com" capabilities = [ "pull" , "resolve" , "push" ] skip_verify = true EOF systemctl restart containerd #验证 ctr i pull --hosts- dir = /etc/containerd/certs .d registry.k8s.io /sig-storage/csi-provisioner :v3.5.0 ctr --debug= true i pull --hosts- dir = /etc/containerd/certs .d registry.k8s.io /sig-storage/csi-provisioner :v3.5.0 #使用crictl 命令拉取 crictl --debug= true pull docker.io /library/ubuntu :20.04 crictl images # 下载自动化安装脚本可能需要FQ `wget https://raw.githubusercontent.com/kubeovn/kube-ovn/release-1.10/dist/images/install.sh` # 修改`install.sh`配置参数 REGISTRY= "kubeovn" # 镜像仓库地址 VERSION= "v1.10.10" # 镜像版本/Tag POD_CIDR= "10.10.0.0/16" # 默认子网 CIDR 不要和 SVC/NODE/JOIN CIDR 重叠 SVC_CIDR= "10.11.0.0/16" # 需要和 apiserver 的 service-cluster-ip-range 保持一致 JOIN_CIDR= "100.12.0.0/16" # Pod 和主机通信网络 CIDR,不要和 SVC/NODE/POD CIDR 重叠 LABEL= "node-role.kubernetes.io/control-plane" # 部署 OVN DB 节点的标签 IFACE= "" # 容器网络所使用的的宿主机网卡名,如果为空则使用 Kubernetes 中的 Node IP 所在网卡 TUNNEL_TYPE= "geneve" # 隧道封装协议,可选 geneve, vxlan 或 stt,stt 需要单独编译 ovs 内核模块 # 执行`bash install.sh`安装 |
7.将master节点同时置为node
默认情况下Kubernetes Control Plane Master Node
被设置为不能部署pod
的,因为Control Plane
节点被默认设置了以下NoSchedule
标签
需要去掉NoSchedule
标签即可解决问题,如下操作 (以Master节点为例,其它Control Plane节点同样操作):
kubectl taint node master1 node-role.kubernetes.io /control-plane :NoSchedule-node /master1 untainted #查看结果 kubectl describe node master1 | grep Taint #可以用以下脚本同时去掉三个节点的标签 for node in $(kubectl get nodes --selector= 'node-role.kubernetes.io/control-plane' | awk 'NR>1 {print $1}' ) ; do kubectl taint node $node node-role.kubernetes.io /control-plane- ; done |
注意,以上是为了测试才将 Kubernetes Control Plane Master Node承担了Worker Node的角色,一般不建议如此操作,因为Control Plane Master Node是关键组件,负责管理整个集群,包括调度集群任务和工作量,监测节点和容器运行状态等等,让Control Plane Master Node承担Worker Node功能会有负面作用,例如消耗了资源,导致时间延迟,以及系统不稳定。 最后,也有安全风险。
8.安装kuboard(master1)
1.安装helm
wget https: //get .helm.sh /helm-v3 .12.3-linux-amd64. tar .gz tar -zxvf helm-v3.12.3-linux-amd64. tar .gz cd linux-amd64 cp helm /usr/local/bin/ helm version |
2.安装kuboard
由于dashboadr只能允许localhost访问,所以安装kuboard
#这是华为云的镜像仓库替代 docker hub 分发 Kuboard 所需要的镜像 wget https: //addons.kuboard.cn/kuboard/kuboard-v3-swr.yaml #这里需要修改里面配置 如下: KUBOARD_SERVER_NODE_PORT: '30080' ===> KUBOARD_ENDPOINT: 'http://your-node-ip-address:30080' kubectl apply -f kuboard-v3-swr.yaml 浏览器访问:http: //masterIP:30080 账号:admin 密码:Kuboard123 |