离线方式安装高可用RKE2 (版本: v1.22.13+rke2r1)记录

说明:

  1. 我的环境比较特殊,机房上不了外网, 但是有内网YUM源, 如果各位没有内网YUM可以给yum添加代理
  2. 部分资源(安装包,api审计yaml文件,chart包)部署时也是用的内网资源,这些我同步自己的aliyun server上了,但是像rke2的镜像包体积比较大,我的阿里云虚拟机带宽太小没法快速下载,只能靠各位上github下载,rke2-images.linux-amd64.tar.zst,rke2.linux-amd64.tar.gz,sha256sum-amd64.txt下载地址见 https://github.com/rancher/rke2/releases
  3. 因为上不了外网,我给containerd加了代理配置/etc/sysconfig/rke2-server/etc/sysconfig/rke2-agent,如果各位的环境直接能访问外网,可以不用添加这个配置
  4. 想要了解RKE2可以到官网(https://docs.rke2.iohttps://docs.rancher.cn/docs/rke2/_index/)看最新资料
  5. 用官网给出的离线安装(https://docs.rke2.io/install/airgap/)方式可能有坑, 节点拉起时安装网络插件必须使用到网络,没有网络节点一直时Notready的状态
  6. 在线安装方式的文档可以看: https://fanq.ren/2022/09/03/k8s/rke2搭建k8s
  7. 如果发现有啥错误,欢迎评论区打扰

初始化节点

配置主机名信息,添加本地hosts解析

hostnamectl set-hostname rke2-master-default-loadblance
hostnamectl set-hostname rke2-master-default-nodepool-1
hostnamectl set-hostname rke2-master-default-nodepool-2
hostnamectl set-hostname rke2-master-default-nodepool-3
hostnamectl set-hostname rke2-node-default-nodepool-1
hostnamectl set-hostname rke2-node-default-nodepool-2
hostnamectl set-hostname rke2-node-default-nodepool-3
hostnamectl set-hostname rke2-node-default-nodepool-4

修改所有节点hosts

cat >> /etc/hosts << EOF
172.17.0.50  rke2-master-default-loadblance
172.17.0.51  rke2-master-default-nodepool-1
172.17.0.52  rke2-master-default-nodepool-2
172.17.0.53  rke2-master-default-nodepool-3
172.17.0.54  rke2-node-default-nodepool-1
172.17.0.55  rke2-node-default-nodepool-2
172.17.0.56  rke2-node-default-nodepool-3
172.17.0.57  rke2-node-default-nodepool-4
EOF

重新生成机器ID并重启

cp -rf /dev/null /etc/machine-id
systemd-machine-id-setup
reboot

所有节点安装需要的包

systemctl stop firewalld; systemctl disable firewalld;
systemctl stop dnsmasq; systemctl disable dnsmasq;
systemctl stop ntpd; systemctl disable ntpd;
systemctl stop postfix; systemctl disable postfix;
iptables -F && iptables -X && iptables -F -t nat && iptables -X -t nat && iptables -P FORWARD ACCEPT;
swapoff -a; sed -i '/ swap / s/^\(.*\)$/#\1/g' /etc/fstab;
setenforce 0; sed -i '/SELINUX/s/enforcing/disabled/' /etc/selinux/config;
yum -y install epel-release; 
yum -y install chrony curl wget vim sysstat net-tools openssl openssh lsof socat nfs-utils conntrack ipvsadm ipset iptables sysstat libseccomp;
systemctl disable rpcbind;
systemctl enable chronyd;

调整limit

cat >> /etc/security/limits.conf << EOF
* soft nproc  65535
* hard nproc  65535
* soft nofile 65535
* hard nofile 65535
EOF

调整时间同步服务

cat >  /etc/chrony.conf << EOF
server ntp.aliyun.com iburst
server ntp.ntsc.ac.cn iburst
driftfile /var/lib/chrony/drift
makestep 1.0 3
rtcsync
logdir /var/log/chrony

EOF

调整NetworkManager

cat >>  /etc/NetworkManager/conf.d << EOF
[keyfile]
unmanaged-devices=interface-name:cali*;interface-name:flannel*
EOF

升级内核

curl -LO https://mirrors.aliyun.com/elrepo/kernel/el7/x86_64/RPMS/kernel-lt-5.4.134-1.el7.elrepo.x86_64.rpm
curl -LO https://mirrors.aliyun.com/elrepo/kernel/el7/x86_64/RPMS/kernel-lt-devel-5.4.134-1.el7.elrepo.x86_64.rpm
yum install kernel-lt-* -y
grub2-set-default  0
grub2-mkconfig -o /etc/grub2.cfg
grubby --default-kernel 
reboot

导入ipvs模块

cat > /etc/modules-load.d/ipvs.conf << EOF
ip_vs
ip_vs_lc
ip_vs_wlc
ip_vs_rr
ip_vs_wrr
ip_vs_lblc
ip_vs_lblcr
ip_vs_dh
ip_vs_sh
ip_vs_fo
ip_vs_nq
ip_vs_sed
ip_vs_ftp
nf_conntrack
br_netfilter
ip_tables
ip_set
xt_set
ipt_set
ipt_rpfilter
ipt_REJECT
overlay
EOF

systemctl enable --now systemd-modules-load.service

修改内核并重启所有节点

cat >  /etc/sysctl.d/kubernetes.conf <<EOF
net.ipv4.tcp_keepalive_time = 600
net.ipv4.tcp_keepalive_intvl = 30
net.ipv4.tcp_keepalive_probes = 10
net.ipv6.conf.all.disable_ipv6 = 1
net.ipv6.conf.default.disable_ipv6 = 1
net.ipv6.conf.lo.disable_ipv6 = 1
net.ipv4.neigh.default.gc_stale_time = 120
net.ipv4.conf.all.rp_filter = 0
net.ipv4.conf.default.rp_filter = 0
net.ipv4.conf.default.arp_announce = 2
net.ipv4.conf.lo.arp_announce = 2
net.ipv4.conf.all.arp_announce = 2
net.ipv4.ip_forward = 1
net.ipv4.tcp_max_tw_buckets = 5000
net.ipv4.tcp_syncookies = 1
net.ipv4.tcp_max_syn_backlog = 1024
net.ipv4.tcp_synack_retries = 2
net.bridge.bridge-nf-call-ip6tables = 1
net.bridge.bridge-nf-call-iptables = 1
net.bridge.bridge-nf-call-arptables = 1
net.netfilter.nf_conntrack_max = 2310720
fs.inotify.max_user_watches=89100
fs.file-max = 52706963
fs.nr_open = 52706963
vm.swappiness = 0
vm.overcommit_memory=1
kernel.panic=10
EOF

reboot

Haproxy节点配置

yum install haproxy -y

写入以下配置到/etc/haproxy/haproxy.cfg

global
    log         127.0.0.1 local2
    chroot      /var/lib/haproxy
    pidfile     /var/run/haproxy.pid
    maxconn     6000
    user        haproxy
    group       haproxy
    daemon
    stats socket /var/lib/haproxy/stats

defaults
    mode                    tcp
    log                     global
    option                  tcplog
    option                  dontlognull
    option                  redispatch
    retries                 3
    timeout http-request    10s
    timeout queue           1m
    timeout connect         10s
    timeout client          1m
    timeout server          1m
    timeout http-keep-alive 10s
    timeout check           10s
    maxconn                 3000

listen stats
    bind 0.0.0.0:9000
    mode  http
    stats uri /status
    stats refresh 30s
    stats realm "Haproxy Manager"
    stats auth admin:admin
    stats hide-version
    stats admin if TRUE


frontend  rke2-server
   bind *:9345
   mode tcp
   default_backend      rke2-server

backend   rke2-server
    balance     roundrobin
    mode        tcp
    server      rke2-master-default-nodepool-1 172.17.0.51:9345 check weight 1 maxconn 1000 check inter 2000 rise 2 fall 3
    server      rke2-master-default-nodepool-2 172.17.0.52:9345 check weight 1 maxconn 1000 check inter 2000 rise 2 fall 3
    server      rke2-master-default-nodepool-3 172.17.0.53:9345 check weight 1 maxconn 1000 check inter 2000 rise 2 fall 3


frontend  rke2-apiserver
   bind *:6443
   mode tcp
   default_backend      rke2-apiserver

backend   rke2-apiserver
    balance     roundrobin
    mode        tcp
    server      rke2-master-default-nodepool-1 172.17.0.51:6443 check weight 1 maxconn 1000 check inter 2000 rise 2 fall 3
    server      rke2-master-default-nodepool-2 172.17.0.52:6443 check weight 1 maxconn 1000 check inter 2000 rise 2 fall 3
    server      rke2-master-default-nodepool-3 172.17.0.53:6443 check weight 1 maxconn 1000 check inter 2000 rise 2 fall 3

启动haproxy

systemctl enable --now haproxy.service

RKE2-Server初始化

在所有Master节点上执行资源下载以及安装

groupadd -r etcd
useradd -r -g etcd -s /bin/false -d /var/lib/etcd etcd
mkdir -p /var/lib/etcd-snapshots
chown -Rf etcd:etcd /var/lib/etcd-snapshots
mkdir -p /etc/rke2/config
curl -L https://storage.corpintra.plus/rke2/audit-policy.yaml -o /etc/rke2/config/audit-policy.yaml

cat > /etc/sysconfig/rke2-server <<EOF
CONTAINERD_HTTP_PROXY=http://192.168.1.10:3128
CONTAINERD_HTTPS_PROXY=http://192.168.1.10:3128
CONTAINERD_NO_PROXY=127.0.0.0/8,10.0.0.0/8,172.16.0.0/12,192.168.0.0/16,.svc,.cluster.local
EOF

mkdir /root/rke2-artifacts
cd /root/rke2-artifacts/
curl -LO  https://github.com/rancher/rke2/releases/download/v1.22.13%2Brke2r1/rke2-images.linux-amd64.tar.zst
curl -LO  https://github.com/rancher/rke2/releases/download/v1.22.13%2Brke2r1/rke2.linux-amd64.tar.gz
curl -LO  https://github.com/rancher/rke2/releases/download/v1.22.13%2Brke2r1/sha256sum-amd64.txt
curl -sfL https://rancher-mirror.oss-cn-beijing.aliyuncs.com/rke2/install.sh | INSTALL_RKE2_TYPE=server INSTALL_RKE2_MIRROR=cn INSTALL_RKE2_ARTIFACT_PATH=/root/rke2-artifacts INSTALL_RKE2_VERSION=v1.22.13+rke2r1 sh -

配置第一台Master节点

常用的参数见: https://docs.rke2.io/install/install_options/server_config/
网络选择: https://docs.rke2.io/install/network_options/
如果你想禁用一些RKE2自带的Chart: https://docs.rke2.io/advanced/#disabling-server-charts

mkdir -p /etc/rancher/rke2 /var/lib/etcd-snapshots
cat >  /etc/rancher/rke2/config.yaml <<EOF
write-kubeconfig-mode: "0644"
#token: <token for server node> ##等rke2-server成功起来后查看/var/lib/rancher/rke2/server/token文件获得,这儿不影响后面节点的拉起,等master拉起后把token加上,取消注释就可以了
#server: https://172.17.0.50:9345 #等三台都起来后把这个配置取消注释
tls-san:
  - "127.0.0.1"
  - "172.17.0.50"
  - "172.17.0.51"
  - "172.17.0.52"
  - "172.17.0.53"
  - "rke2-master-default-loadblance"
  - "rke2-master-default-nodepool-1"
  - "rke2-master-default-nodepool-2"
  - "rke2-master-default-nodepool-3"
cni: "calico"
profile: "cis-1.6"
cluster-cidr: 10.244.0.0/16
service-cidr: 10.96.0.0/16
disable-cloud-controller: true
kube-proxy-arg:
- "proxy-mode=ipvs"
kubelet-arg:
- "max-pods=110"
audit-policy-file: "/etc/rke2/config/audit-policy.yaml"
etcd-snapshot-schedule-cron: "0 */4 * * *"
etcd-snapshot-retention: "84"
etcd-snapshot-dir: "/var/lib/etcd-snapshots"
disable: 
- "rke2-ingress-nginx"
#- "rke2-metrics-server"
EOF

启动第一台节点(需要耐心等一等,很慢,如果遇到启动失败的,重启一次,一般重启一次能解决 )

systemctl enable --now rke2-server.service

配置第二台Master节点

mkdir -p /etc/rancher/rke2
cat >  /etc/rancher/rke2/config.yaml <<EOF
write-kubeconfig-mode: "0644"
token: <token for server node> ##填写第一个master server节点的token,通过在第一个master节点查看/var/lib/rancher/rke2/server/token文件获得;
server: https://172.17.0.50:9345
tls-san:
  - "127.0.0.1"
  - "172.17.0.50"
  - "172.17.0.51"
  - "172.17.0.52"
  - "172.17.0.53"
  - "rke2-master-default-loadblance"
  - "rke2-master-default-nodepool-1"
  - "rke2-master-default-nodepool-2"
  - "rke2-master-default-nodepool-3"
cni: "calico"
profile: "cis-1.6"
cluster-cidr: 10.244.0.0/16
service-cidr: 10.96.0.0/16
disable-cloud-controller: true
kube-proxy-arg:
- "proxy-mode=ipvs"
kubelet-arg:
- "max-pods=110"
audit-policy-file: "/etc/rke2/config/audit-policy.yaml"
etcd-snapshot-schedule-cron: "0 */4 * * *"
etcd-snapshot-retention: "84"
etcd-snapshot-dir: "/var/lib/etcd-snapshots"
disable: 
- "rke2-ingress-nginx"
#- "rke2-metrics-server"
EOF

启动第二台节点, 比较慢,耐心等等

systemctl enable --now rke2-server.service

配置第三台Master节点

mkdir -p /etc/rancher/rke2
cat >  /etc/rancher/rke2/config.yaml <<EOF
write-kubeconfig-mode: "0644"
token: <token for server node> ##填写第一个master server节点的token,通过在第一个master节点查看/var/lib/rancher/rke2/server/token文件获得;
server: https://172.17.0.50:9345
tls-san:
  - "127.0.0.1"
  - "172.17.0.50"
  - "172.17.0.51"
  - "172.17.0.52"
  - "172.17.0.53"
  - "rke2-master-default-loadblance"
  - "rke2-master-default-nodepool-1"
  - "rke2-master-default-nodepool-2"
  - "rke2-master-default-nodepool-3"
cni: "calico"
profile: "cis-1.6"
cluster-cidr: 10.244.0.0/16
service-cidr: 10.96.0.0/16
disable-cloud-controller: true
kube-proxy-arg:
- "proxy-mode=ipvs"
kubelet-arg:
- "max-pods=110"
audit-policy-file: "/etc/rke2/config/audit-policy.yaml"
etcd-snapshot-schedule-cron: "0 */4 * * *"
etcd-snapshot-retention: "84"
etcd-snapshot-dir: "/var/lib/etcd-snapshots"
disable: 
- "rke2-ingress-nginx"
#- "rke2-metrics-server"
EOF

启动第三台节点, 比较慢,耐心等等

systemctl enable --now rke2-server.service

等所有Master节点起来后执行下面命令检查

/var/lib/rancher/rke2/bin/kubectl --kubeconfig /etc/rancher/rke2/rke2.yaml get nodes -o wide

RKE2-Agent节点安装

挨台安装Agent节点

cat > /etc/sysconfig/rke2-agent <<EOF
CONTAINERD_HTTP_PROXY=http://192.168.1.10:3128
CONTAINERD_HTTPS_PROXY=http://192.168.1.10:3128
CONTAINERD_NO_PROXY=127.0.0.0/8,10.0.0.0/8,172.16.0.0/12,192.168.0.0/16,.svc,.cluster.local
EOF


mkdir /root/rke2-artifacts
cd /root/rke2-artifacts/
curl -LO  https://github.com/rancher/rke2/releases/download/v1.22.13%2Brke2r1/rke2-images.linux-amd64.tar.zst
curl -LO  https://github.com/rancher/rke2/releases/download/v1.22.13%2Brke2r1/rke2.linux-amd64.tar.gz
curl -LO  https://github.com/rancher/rke2/releases/download/v1.22.13%2Brke2r1/sha256sum-amd64.txt
curl -sfL https://rancher-mirror.oss-cn-beijing.aliyuncs.com/rke2/install.sh | INSTALL_RKE2_TYPE=agent INSTALL_RKE2_MIRROR=cn INSTALL_RKE2_ARTIFACT_PATH=/root/rke2-artifacts INSTALL_RKE2_VERSION=v1.22.13+rke2r1 sh -

mkdir -p /etc/rancher/rke2
cat >  /etc/rancher/rke2/config.yaml <<EOF
server: https://172.17.0.50:9345
token: <token for server node> ##填写第一个master server节点的token,通过在第一个master节点查看/var/lib/rancher/rke2/server/token文件获得;
cluster-cidr: 10.244.0.0/16
service-cidr: 10.96.0.0/16
kube-proxy-arg:
- "proxy-mode=ipvs"
kubelet-arg:
- "max-pods=110"
EOF


systemctl enable rke2-agent.service --now

等所有Node节点起来后执行下面命令检查

/var/lib/rancher/rke2/bin/kubectl --kubeconfig /etc/rancher/rke2/rke2.yaml get nodes -o wide

RKE2默认使用containerd作为Runtime,如果想要查看节点上运行的容器,可以使用下面的命令:

export CRI_CONFIG_FILE=/var/lib/rancher/rke2/agent/etc/crictl.yaml && /var/lib/rancher/rke2/bin/crictl ps

安装ingress-nginx-controller

随便找个master节点

export KUBECONFIG=/etc/rancher/rke2/rke2.yaml
curl -LO https://storage.corpintra.plus/kubernetes/charts/ingress-nginx-4.2.5.tgz
helm upgrade ingress-nginx --namespace ingress-nginx --create-namespace --debug --wait --install --atomic \
   --set controller.kind="Deployment" \
   --set controller.replicaCount="3" \
   --set controller.minAvailable="1" \
   --set controller.image.registry="registry.cn-hangzhou.aliyuncs.com" \
   --set controller.image.image="kubelibrary/ingress-nginx-controller" \
   --set controller.image.tag="v1.3.1" \
   --set controller.image.digest="sha256:d3642f55a6a7a102a9a579b3382fe73869c73890de4c94f28e36ba5e07925944" \
   --set controller.ingressClassResource.name="nginx" \
   --set controller.ingressClassResource.enable="true" \
   --set controller.ingressClassResource.default="false" \
   --set controller.service.enabled="true" \
   --set controller.service.type="NodePort" \
   --set controller.service.enableHttps="false" \
   --set controller.service.nodePorts.http="32080" \
   --set controller.service.nodePorts.https="32443" \
   --set controller.admissionWebhooks.enabled="true" \
   --set controller.admissionWebhooks.patch.image.registry="registry.cn-hangzhou.aliyuncs.com" \
   --set controller.admissionWebhooks.patch.image.image="kubelibrary/kube-webhook-certgen" \
   --set controller.admissionWebhooks.patch.image.tag="v1.3.0" \
   --set controller.admissionWebhooks.patch.image.digest="sha256:fe821886866f174069dbb1e3af741662efb44952e39d66488d1fb811673440b7" \
   --set defaultBackend.enabled="true" \
   --set defaultBackend.name="defaultbackend" \
   --set defaultBackend.image.registry="registry.cn-hangzhou.aliyuncs.com" \
   --set defaultBackend.image.image="kubelibrary/defaultbackend-amd64" \
   --set defaultBackend.image.tag="1.5" \
   --set defaultBackend.replicaCount="1" \
   --set defaultBackend.minAvailable="1" \
   --set rbac.create="true" \
   --set serviceAccount.create="true" \
   --set podSecurityPolicy.enabled="true" \
   ./ingress-nginx-4.2.5.tgz

安装存储插件(可选)

export KUBECONFIG=/etc/rancher/rke2/rke2.yaml
curl -LO https://storage.corpintra.plus/kubernetes/charts/csi-driver-smb-v1.9.0.tgz
helm upgrade csi-driver-smb \
   --namespace kube-system \
   --create-namespace \
   --debug \
   --wait \
   --install \
   --atomic \
   --set image.baseRepo="registry.cn-hangzhou.aliyuncs.com/kubelibrary" \
   --set image.smb.repository="registry.cn-hangzhou.aliyuncs.com/kubelibrary/smbplugin" \
   --set image.smb.tag="v1.9.0" \
   --set image.csiProvisioner.repository="registry.cn-hangzhou.aliyuncs.com/kubelibrary/csi-provisioner" \
   --set image.csiProvisioner.tag="v3.2.0" \
   --set image.livenessProbe.repository="registry.cn-hangzhou.aliyuncs.com/kubelibrary/livenessprobe" \
   --set image.livenessProbe.tag="v2.7.0" \
   --set image.nodeDriverRegistrar.repository="registry.cn-hangzhou.aliyuncs.com/kubelibrary/csi-node-driver-registrar" \
   --set image.nodeDriverRegistrar.tag="v2.5.1" \
   --set controller.replicas=2 \
   ./csi-driver-smb-v1.9.0.tgz
   
   
curl -LO https://storage.corpintra.plus/kubernetes/charts/csi-driver-nfs-v4.1.0.tgz   
helm upgrade csi-driver-nfs \
   --namespace kube-system \
   --create-namespace \
   --debug \
   --wait \
   --install \
   --atomic \
   --set image.nfs.repository="registry.cn-hangzhou.aliyuncs.com/kubelibrary/nfsplugin" \
   --set image.nfs.tag="v4.1.0" \
   --set image.csiProvisioner.repository="registry.cn-hangzhou.aliyuncs.com/kubelibrary/csi-provisioner" \
   --set image.csiProvisioner.tag="v3.2.0" \
   --set image.livenessProbe.repository="registry.cn-hangzhou.aliyuncs.com/kubelibrary/livenessprobe" \
   --set image.livenessProbe.tag="v2.7.0" \
   --set image.nodeDriverRegistrar.repository="registry.cn-hangzhou.aliyuncs.com/kubelibrary/csi-node-driver-registrar" \
   --set image.nodeDriverRegistrar.tag="v2.5.1" \
   --set controller.replicas=2 \
   ./csi-driver-nfs-v4.1.0.tgz
posted @   银河系派件员  阅读(2402)  评论(0编辑  收藏  举报
相关博文:
阅读排行:
· 分享一个免费、快速、无限量使用的满血 DeepSeek R1 模型,支持深度思考和联网搜索!
· 使用C#创建一个MCP客户端
· 基于 Docker 搭建 FRP 内网穿透开源项目(很简单哒)
· ollama系列1:轻松3步本地部署deepseek,普通电脑可用
· 按钮权限的设计及实现
点击右上角即可分享
微信分享提示