K8S 1.28 生产高可用集群部署( Rocky Linux 8.7+Kubernetes+Dashboard+Helm+Prometheus+Grafana+Alertmanager)
多master节点 K8S 生产集群部署
环境信息
K8S集群角色 | 主机名 | IP地址 | 备注 |
---|---|---|---|
控制节点 | master01 | 192.168.71.201 | |
控制节点 | master02 | 192.168.71.202 | |
控制节点 | master03 | 192.168.71.203 | |
工作节点 | worker01 | 192.168.71.204 | |
工作节点 | worker02 | 192.168.71.205 |
操作系统: Rocky Linux 8.7 (Green Obsidian)
系统初始化配置
每个节点上都执行
# 节点间配置免密登录
ssh-keygen # 一路回车
for i in 201 202 203 204;do ssh-copy-id root@192.168.71.$i; done
#关闭防火墙
systemctl stop firewalld
systemctl disable firewalld
#关闭selinux
#永久关闭
sed -i 's/enforcing/disabled/' /etc/selinux/config
#临时关闭
setenforce 0
#关闭swap
#临时关闭
swapoff -a
#永久关闭
sed -ri 's/.*swap.*/#&/' /etc/fstab
#根据规划设置主机名
hostnamectl set-hostname <hostname> && bash
#在master添加hosts
cat >> /etc/hosts << EOF
192.168.71.201 master01
192.168.71.202 master02
192.168.71.203 master03
192.168.71.204 worker01
192.168.71.205 worker02
EOF
#将桥接的IPv4流量传递到iptables的链
cat > /etc/sysctl.d/k8s.conf << EOF
net.bridge.bridge-nf-call-ip6tables = 1
net.bridge.bridge-nf-call-iptables = 1
net.ipv4.ip_forward=1
EOF
#生效sysctl
sysctl --system
# 开启ipvs
cat > /etc/sysconfig/modules/ipvs.modules << EOF
#!/bin/bash
ipvs_modules="ip_vs ip_vs_lc ip_vs_wlc ip_vs_rr ip_vs_wrr ip_vs_lblc ip_vs_lblcr ip_vs_dh ip_vs_sh ip_vs_nq ip_vs_sed ip_vs_ftp nf_conntrack"
for kernel_module in ${ipvs_modules}; do
/sbin/modinfo -F filename ${kernel_module} > /dev/null 2>&1
if [ 0 -eq 0 ]; then
/sbin/modprobe ${kernel_module}
fi
done
EOF
chmod 755 /etc/sysconfig/modules/ipvs.modules && bash
/etc/sysconfig/modules/ipvs.modules && lsmod | grep ip_vs
#时间同步
curl -fsSL "https://sh.wlnmp.com/wlnmp.sh" | bash
yum install wntp wget -y
ntpdate time.windows.com
#把时间同步做成计划任务
crontab -e
*/5 * * * * /usr/sbin/ntpdate time-a.nist.gov s2m.time.edu.cn ntp.sjtu.edu.cn &> /dev/null
####### docker安装
# 安装基础包
yum install wget jq psmisc vim net-tools telnet yum-utils device-mapper-persistent-data lvm2 git lrzsz ipvsadm -y
#添加yum docker软件源
wget https://mirrors.aliyun.com/docker-ce/linux/centos/docker-ce.repo -O /etc/yum.repos.d/docker-ce.repo
#安装docker
yum -y install docker-ce
#启用并启动docker
systemctl enable docker && systemctl start docker
配置镜像下载加速器
cat > /etc/docker/daemon.json << EOF
{
"registry-mirrors": ["https://b9pmyelo.mirror.aliyuncs.com"],
"exec-opts": ["native.cgroupdriver=systemd"]
}
EOF
#重启docker
systemctl daemon-reload
systemctl restart docker
docker info
安装cri-dockerd
#下载cri-dockerd安装包
wget https://github.com/Mirantis/cri-dockerd/releases/download/v0.3.1/cri-dockerd-0.3.1-3.el7.x86_64.rpm
#安装cri-dockerd
rpm -ivh cri-dockerd-0.3.1-3.el7.x86_64.rpm
#修改镜像地址为国内,否则kubelet拉取不了镜像导致启动失败
vi /usr/lib/systemd/system/cri-docker.service
ExecStart=/usr/bin/cri-dockerd --container-runtime-endpoint fd:// --pod-infra-container-image=registry.aliyuncs.com/google_containers/pause:3.7
#启动cri-dockerd
systemctl daemon-reload
systemctl enable cri-docker && systemctl start cri-docker
添加yum k8s软件源
cat > /etc/yum.repos.d/kubernetes.repo << EOF
[kubernetes]
name=Kubernetes
baseurl=https://mirrors.aliyun.com/kubernetes/yum/repos/kubernetes-el7-x86_64
enabled=1
gpgcheck=0
repo_gpgcheck=0
gpgkey=https://mirrors.aliyun.com/kubernetes/yum/doc/yum-key.gpg https://mirrors.aliyun.com/kubernetes/yum/doc/rpm-package-key.gpg
EOF
安装kubeadm、kubelet、kubectl
yum install -y kubelet-1.28.0 kubeadm-1.28.0 kubectl-1.28.0
systemctl enable kubelet
通过 keepalive+nginx 实现 k8s apiserver 节点高可用
yum install nginx keepalived nginx-mod-stream -y
[root@master01 ~]# cat > /etc/nginx/nginx.conf << EOF
user nginx;
worker_processes auto;
error_log /var/log/nginx/error.log;
pid /run/nginx.pid;
include /usr/share/nginx/modules/*.conf;
events {
worker_connections 1024;
}
# 四层负载均衡,为两台Master apiserver组件提供负载均衡
stream {
log_format main '$remote_addr $upstream_addr - [$time_local] $status $upstream_bytes_sent';
access_log /var/log/nginx/k8s-access.log main;
upstream k8s-apiserver {
server 192.168.71.201:6443; # Master1 APISERVER IP:PORT
server 192.168.71.202:6443; # Master2 APISERVER IP:PORT
server 192.168.71.203:6443; # Master2 APISERVER IP:PORT
}
server {
listen 16443; # 由于nginx与master节点复用,这个监听端口不能是6443,否则会冲突
proxy_pass k8s-apiserver;
}
}
http {
log_format main '$remote_addr - $remote_user [$time_local] "$request" '
'$status $body_bytes_sent "$http_referer" '
'"$http_user_agent" "$http_x_forwarded_for"';
access_log /var/log/nginx/access.log main;
sendfile on;
tcp_nopush on;
tcp_nodelay on;
keepalive_timeout 65;
types_hash_max_size 2048;
include /etc/nginx/mime.types;
default_type application/octet-stream;
server {
listen 80 default_server;
server_name _;
location / {
}
}
}
EOF
[root@master01 ~]# cat > /etc/keepalived/keepalived.conf <<EOF
global_defs {
notification_email {
acassen@firewall.loc
failover@firewall.loc
sysadmin@firewall.loc
}
notification_email_from Alexandre.Cassen@firewall.loc
smtp_server 127.0.0.1
smtp_connect_timeout 30
router_id NGINX_MASTER
}
vrrp_script check_nginx {
script "/etc/keepalived/check_nginx.sh"
}
vrrp_instance VI_1 {
state MASTER
interface ens192 # 修改为实际网卡名
virtual_router_id 51 # VRRP 路由 ID实例,每个实例是唯一的
priority 100 # 优先级,备服务器设置 90
advert_int 1 # 指定VRRP 心跳包通告间隔时间,默认1秒
authentication {
auth_type PASS
auth_pass 1111
}
# 虚拟IP
virtual_ipaddress {
192.168.71.200/24
}
track_script {
check_nginx
}
}
EOF
#!/bin/bash
#1、判断Nginx是否存活
counter=`ps -C nginx --no-header | wc -l`
if [ $counter -eq 0 ]; then
#2、如果不存活则尝试启动Nginx
service nginx start
sleep 2
#3、等待2秒后再次获取一次Nginx状态
counter=`ps -C nginx --no-header | wc -l`
#4、再次进行判断,如Nginx还不存活则停止Keepalived,让地址进行漂移
if [ $counter -eq 0 ]; then
service keepalived stop
fi
fi
启动服务
chmod +x /etc/keepalived/check_nginx.sh
cat > /etc/haproxy/haproxy.cfg <<EOF
global
log 127.0.0.1 local2
chroot /var/lib/haproxy
pidfile /var/run/haproxy.pid
maxconn 4096
user haproxy
group haproxy
daemon
stats socket /var/lib/haproxy/stats
defaults
mode http
log global
option httplog
option dontlognull
option http-server-close
option forwardfor except 127.0.0.0/8
option redispatch
retries 3
timeout http-request 10s
timeout queue 1m
timeout connect 10s
timeout client 1m
timeout server 1m
timeout http-keep-alive 10s
timeout check 10s
maxconn 3000
frontend kube-apiserver
mode tcp
bind *:9443
option tcplog
default_backend kube-apiserver
listen stats
mode http
bind *:8888
stats auth admin:password # 访问http://192.168.71.200:8888/stats 用户/密码
stats refresh 5s
stats realm HAProxy\ Statistics #统计页面密码框上提示文本
stats uri /stats
log 127.0.0.1 local3 err
backend kube-apiserver
mode tcp
balance roundrobin
server master01 192.168.71.201:6443 check
server master02 192.168.71.202:6443 check
server master03 192.168.71.203:6443 check
EOF
systemctl start nginx haproxy keepalived;systemctl enable nginx haproxy keepalived;
控制节点执行初始化kubernetes
####注意修改下面一行的地址为自己的master地址
kubeadm init \
--apiserver-advertise-address=192.168.71.201 \
--apiserver-bind-port=6443 \
--control-plane-endpoint=192.168.71.200:16443 \
--image-repository registry.aliyuncs.com/google_containers \
--kubernetes-version v1.28.0 \
--service-cidr=10.96.0.0/12 \
--pod-network-cidr=10.244.0.0/16 \
--cri-socket=unix:///var/run/cri-dockerd.sock \
--ignore-preflight-errors=all --dry-run
#如何输出没有报错,去掉--dry-run参数,开始真正执行:
kubeadm init \
--apiserver-advertise-address=192.168.71.201 \
--apiserver-bind-port=6443 \
--control-plane-endpoint=192.168.71.200:16443 \
--image-repository registry.aliyuncs.com/google_containers \
--kubernetes-version v1.28.0 \
--service-cidr=10.96.0.0/12 \
--pod-network-cidr=10.244.0.0/16 \
--cri-socket=unix:///var/run/cri-dockerd.sock \
--ignore-preflight-errors=all
[init] Using Kubernetes version: v1.28.0
[preflight] Running pre-flight checks
[WARNING FileExisting-tc]: tc not found in system path
[preflight] Pulling images required for setting up a Kubernetes cluster
[preflight] This might take a minute or two, depending on the speed of your internet connection
[preflight] You can also perform this action in beforehand using 'kubeadm config images pull'
[certs] Using certificateDir folder "/etc/kubernetes/pki"
[certs] Generating "ca" certificate and key
[certs] Generating "apiserver" certificate and key
[certs] apiserver serving cert is signed for DNS names [kubernetes kubernetes.default kubernetes.default.svc kubernetes.default.svc.cluster.local master01] and IPs [10.96.0.1 192.168.71.201 192.168.71.200]
[certs] Generating "apiserver-kubelet-client" certificate and key
[certs] Generating "front-proxy-ca" certificate and key
[certs] Generating "front-proxy-client" certificate and key
[certs] Generating "etcd/ca" certificate and key
[certs] Generating "etcd/server" certificate and key
[certs] etcd/server serving cert is signed for DNS names [localhost master01] and IPs [192.168.71.201 127.0.0.1 ::1]
[certs] Generating "etcd/peer" certificate and key
[certs] etcd/peer serving cert is signed for DNS names [localhost master01] and IPs [192.168.71.201 127.0.0.1 ::1]
[certs] Generating "etcd/healthcheck-client" certificate and key
[certs] Generating "apiserver-etcd-client" certificate and key
[certs] Generating "sa" key and public key
[kubeconfig] Using kubeconfig folder "/etc/kubernetes"
W0111 23:58:03.751664 40634 endpoint.go:57] [endpoint] WARNING: port specified in controlPlaneEndpoint overrides bindPort in the controlplane address
[kubeconfig] Writing "admin.conf" kubeconfig file
W0111 23:58:04.239047 40634 endpoint.go:57] [endpoint] WARNING: port specified in controlPlaneEndpoint overrides bindPort in the controlplane address
[kubeconfig] Writing "kubelet.conf" kubeconfig file
W0111 23:58:04.672888 40634 endpoint.go:57] [endpoint] WARNING: port specified in controlPlaneEndpoint overrides bindPort in the controlplane address
[kubeconfig] Writing "controller-manager.conf" kubeconfig file
W0111 23:58:04.888464 40634 endpoint.go:57] [endpoint] WARNING: port specified in controlPlaneEndpoint overrides bindPort in the controlplane address
[kubeconfig] Writing "scheduler.conf" kubeconfig file
[etcd] Creating static Pod manifest for local etcd in "/etc/kubernetes/manifests"
[control-plane] Using manifest folder "/etc/kubernetes/manifests"
[control-plane] Creating static Pod manifest for "kube-apiserver"
[control-plane] Creating static Pod manifest for "kube-controller-manager"
[control-plane] Creating static Pod manifest for "kube-scheduler"
[kubelet-start] Writing kubelet environment file with flags to file "/var/lib/kubelet/kubeadm-flags.env"
[kubelet-start] Writing kubelet configuration to file "/var/lib/kubelet/config.yaml"
[kubelet-start] Starting the kubelet
[wait-control-plane] Waiting for the kubelet to boot up the control plane as static Pods from directory "/etc/kubernetes/manifests". This can take up to 4m0s
[kubelet-check] Initial timeout of 40s passed.
[apiclient] All control plane components are healthy after 42.515566 seconds
[upload-config] Storing the configuration used in ConfigMap "kubeadm-config" in the "kube-system" Namespace
[kubelet] Creating a ConfigMap "kubelet-config" in namespace kube-system with the configuration for the kubelets in the cluster
[upload-certs] Skipping phase. Please see --upload-certs
[mark-control-plane] Marking the node master01 as control-plane by adding the labels: [node-role.kubernetes.io/control-plane node.kubernetes.io/exclude-from-external-load-balancers]
[mark-control-plane] Marking the node master01 as control-plane by adding the taints [node-role.kubernetes.io/control-plane:NoSchedule]
[bootstrap-token] Using token: aw7ue3.l1nuv6ekilrz3prc
[bootstrap-token] Configuring bootstrap tokens, cluster-info ConfigMap, RBAC Roles
[bootstrap-token] Configured RBAC rules to allow Node Bootstrap tokens to get nodes
[bootstrap-token] Configured RBAC rules to allow Node Bootstrap tokens to post CSRs in order for nodes to get long term certificate credentials
[bootstrap-token] Configured RBAC rules to allow the csrapprover controller automatically approve CSRs from a Node Bootstrap Token
[bootstrap-token] Configured RBAC rules to allow certificate rotation for all node client certificates in the cluster
[bootstrap-token] Creating the "cluster-info" ConfigMap in the "kube-public" namespace
[kubelet-finalize] Updating "/etc/kubernetes/kubelet.conf" to point to a rotatable kubelet client certificate and key
[addons] Applied essential addon: CoreDNS
W0111 23:58:53.353845 40634 endpoint.go:57] [endpoint] WARNING: port specified in controlPlaneEndpoint overrides bindPort in the controlplane address
[addons] Applied essential addon: kube-proxy
Your Kubernetes control-plane has initialized successfully!
To start using your cluster, you need to run the following as a regular user:
mkdir -p $HOME/.kube
sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
sudo chown $(id -u):$(id -g) $HOME/.kube/config
Alternatively, if you are the root user, you can run:
export KUBECONFIG=/etc/kubernetes/admin.conf
You should now deploy a pod network to the cluster.
Run "kubectl apply -f [podnetwork].yaml" with one of the options listed at:
https://kubernetes.io/docs/concepts/cluster-administration/addons/
You can now join any number of control-plane nodes by copying certificate authorities
and service account keys on each node and then running the following as root:
kubeadm join 192.168.71.200:16443 --token aw7ue3.l1nuv6ekilrz3prc \
--discovery-token-ca-cert-hash sha256:2a392a654dd95fc11062c71f153e17cd0453e1aecb4b82ce9315f893530fc0ed \
--control-plane
Then you can join any number of worker nodes by running the following on each as root:
kubeadm join 192.168.71.200:16443 --token aw7ue3.l1nuv6ekilrz3prc \
--discovery-token-ca-cert-hash sha256:2a392a654dd95fc11062c71f153e17cd0453e1aecb4b82ce9315f893530fc0ed
# master02/master03 节点创建证书文件目录
mkdir /etc/kubernetes/pki/etcd -p
# 从master01 拷贝证书文件 到master02/master03节点
scp -rp /etc/kubernetes/pki/ca.* 192.168.71.202:/etc/kubernetes/pki/
scp -rp /etc/kubernetes/pki/ca.* 192.168.71.203:/etc/kubernetes/pki/
scp -rp /etc/kubernetes/pki/sa.* 192.168.71.203:/etc/kubernetes/pki/
scp -rp /etc/kubernetes/pki/sa.* 192.168.71.202:/etc/kubernetes/pki/
scp -rp /etc/kubernetes/pki/front-proxy-ca.* 192.168.71.202:/etc/kubernetes/pki/
scp -rp /etc/kubernetes/pki/front-proxy-ca.* 192.168.71.203:/etc/kubernetes/pki/
scp -rp /etc/kubernetes/pki/etcd/ca.* 192.168.71.203:/etc/kubernetes/pki/etcd/
scp -rp /etc/kubernetes/pki/etcd/ca.* 192.168.71.202:/etc/kubernetes/pki/etcd/
scp -rp /etc/kubernetes/admin.conf 192.168.71.202:/etc/kubernetes/
scp -rp /etc/kubernetes/admin.conf 192.168.71.203:/etc/kubernetes/
# master02/master03 节点执行
kubeadm join 192.168.71.200:16443 --token aw7ue3.l1nuv6ekilrz3prc --discovery-token-ca-cert-hash sha256:2a392a654dd95fc11062c71f153e17cd0453e1aecb4b82ce9315f893530fc0ed --control-plane --cri-socket=unix:///var/run/cri-dockerd.sock
–apiserver-advertise-address:集群通告地址
–image-repository:由于默认拉取镜像地址k8s.gcr.io国内无法访问,这里指定阿里云镜像仓库地址
–kubernetes-version:K8s版本,与上面安装的一致
–service-cidr:集群内部虚拟网络,Pod统一访问入口
–pod-network-cidr:Pod网络,与下面部署的CNI网络组件yaml中保持一致
–cri-socket:指定cri-dockerd接口,如果是containerd则使用unix:///run/containerd/containerd.sock
初始化完成后,会输出一个join命令,先记住工作节点会用到;拷贝kubectl使用的连接k8s认证文件的默认路径:
mkdir -p $HOME/.kube
sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
sudo chown $(id -u):$(id -g) $HOME/.kube/config
加入Kubernetes Node
通过kubeadm join命令并手动加上--cri-socket=unix:///var/run/cri-dockerd.sock;
kubeadm join 192.168.71.200:16443 --token aw7ue3.l1nuv6ekilrz3prc \
--discovery-token-ca-cert-hash sha256:2a392a654dd95fc11062c71f153e17cd0453e1aecb4b82ce9315f893530fc0ed --cri-socket=unix:///var/run/cri-dockerd.sock
# 查看kubernetes节点状态kubectl get nodes;由于网络插件还没有部署,节点状态还是 NotReady。
[root@master01 ~]# kubectl get node
NAME STATUS ROLES AGE VERSION
master01 NotReady control-plane 23m v1.28.0
master02 NotReady control-plane 13m v1.28.0
master03 NotReady control-plane 10m v1.28.0
worker01 NotReady <none> 12m v1.28.0
部署容器网络
Calico是一个纯三层的数据中心网络方案,是目前Kubernetes主流的网络方案。
下载完后还需要修改里面定义Pod网络(CALICO_IPV4POOL_CIDR),与前面kubeadm init的--pod-network-cidr指定的一样,修改完以后执行部署命令,等Calico Pod都是Running状态,节点也会准备就绪。
wget https://docs.projectcalico.org/v3.25/manifests/calico.yaml --no-check-certificate
kubectl apply -f calico.yaml
kubectl get pods -n kube-system
[root@master01 ~]# kubectl get pods -A
NAMESPACE NAME READY STATUS RESTARTS AGE
kube-system calico-kube-controllers-658d97c59c-97zcn 1/1 Running 0 9m28s
kube-system calico-node-2zb2s 1/1 Running 0 9m30s
kube-system calico-node-82p62 1/1 Running 0 9m31s
kube-system calico-node-9sht8 1/1 Running 0 9m31s
kube-system calico-node-h78jw 1/1 Running 3 (3m27s ago) 9m31s
kube-system coredns-66f779496c-gj725 1/1 Running 0 32m
kube-system coredns-66f779496c-z4fkt 1/1 Running 0 32m
kube-system etcd-master01 1/1 Running 0 32m
kube-system etcd-master02 1/1 Running 0 23m
kube-system etcd-master03 1/1 Running 0 19m
kube-system kube-apiserver-master01 1/1 Running 2 (2m43s ago) 32m
kube-system kube-apiserver-master02 1/1 Running 0 22m
kube-system kube-apiserver-master03 1/1 Running 1 (6m24s ago) 19m
kube-system kube-controller-manager-master01 1/1 Running 2 (6m33s ago) 32m
kube-system kube-controller-manager-master02 1/1 Running 3 (4m1s ago) 22m
kube-system kube-controller-manager-master03 1/1 Running 2 (3m32s ago) 19m
kube-system kube-proxy-2tddg 1/1 Running 0 19m
kube-system kube-proxy-r4njn 1/1 Running 0 32m
kube-system kube-proxy-rpz72 1/1 Running 0 23m
kube-system kube-proxy-vjcj8 1/1 Running 0 21m
kube-system kube-scheduler-master01 1/1 Running 2 (7m34s ago) 32m
kube-system kube-scheduler-master02 1/1 Running 2 (3m56s ago) 22m
kube-system kube-scheduler-master03 1/1 Running 3 (3m27s ago) 19m
[root@master01 ~]# kubectl get node
NAME STATUS ROLES AGE VERSION
master01 Ready control-plane 33m v1.28.0
master02 Ready control-plane 23m v1.28.0
master03 Ready control-plane 20m v1.28.0
worker01 Ready <none> 22m v1.28.0
部署 Dashboard
Dashboard是官方提供的一个UI,可用于基本管理K8s资源,通过下面的地址下载yaml文件;
wget https://raw.githubusercontent.com/kubernetes/dashboard/v2.7.0/aio/deploy/recommended.yaml --no-check-certificate
默认Dashboard只能集群内部访问,修改Service为NodePort类型,暴露到外部访问,修改如下:
vi recommended.yaml
...
kind: Service
apiVersion: v1
metadata:
labels:
k8s-app: kubernetes-dashboard
name: kubernetes-dashboard
namespace: kubernetes-dashboard
spec:
ports:
- port: 443
targetPort: 8443
nodePort: 30001
selector:
k8s-app: kubernetes-dashboard
type: NodePort
kubectl apply -f recommended.yaml
[root@master01 ~]# kubectl apply -f recommended.yaml
namespace/kubernetes-dashboard created
serviceaccount/kubernetes-dashboard created
service/kubernetes-dashboard created
secret/kubernetes-dashboard-certs created
secret/kubernetes-dashboard-csrf created
secret/kubernetes-dashboard-key-holder created
configmap/kubernetes-dashboard-settings created
role.rbac.authorization.k8s.io/kubernetes-dashboard created
clusterrole.rbac.authorization.k8s.io/kubernetes-dashboard created
rolebinding.rbac.authorization.k8s.io/kubernetes-dashboard created
clusterrolebinding.rbac.authorization.k8s.io/kubernetes-dashboard created
deployment.apps/kubernetes-dashboard created
service/dashboard-metrics-scraper created
deployment.apps/dashboard-metrics-scraper created
[root@master01 ~]# kubectl get pods -n kubernetes-dashboard
NAME READY STATUS RESTARTS AGE
dashboard-metrics-scraper-5657497c4c-59qw8 1/1 Running 0 3m26s
kubernetes-dashboard-78f87ddfc-47hhr 1/1 Running 0 3m29s
创建service account并绑定默认cluster-admin管理员集群角色:
#创建用户
kubectl create serviceaccount dashboard-admin -n kubernetes-dashboard
#用户授权
kubectl create clusterrolebinding dashboard-admin --clusterrole=cluster-admin --serviceaccount=kubernetes-dashboard:dashboard-admin
#获取用户Token
kubectl create token dashboard-admin -n kubernetes-dashboard
访问地址: https://NodeIP:30001,使用输出的token登录Dashboard。
Helm 应用管理部署
wget https://get.helm.sh/helm-v3.6.0-linux-amd64.tar.gz
tar xf helm-v3.6.0-linux-amd64.tar.gz
mv linux-amd64/helm /usr/local/bin/
source <(helm completion bash)
echo "source <(helm completion bash)" >> ~/.bash_profile
helm completion bash > /usr/share/bash-completion/completions/helm
helm repo add bitnami https://charts.bitnami.com/bitnami
helm repo list
部署Prometheus+Grafana+Alertmanager
# git clone https://github.com/coreos/kube-prometheus
# cd kube-prometheus/manifests/
# 安装CRD和prometheus-operator
[root@k8s-master manifests]# kubectl apply -f setup/
[root@k8s-master manifests]# kubectl get pod -n monitoring
安装prometheus, alertmanager, grafana, kube-state-metrics, node-exporter等资源,时间较长 如有下载镜像失败的可以单独导入镜像
[root@k8s-master manifests]# kubectl apply -f .
[root@master01 ~]# kubectl get pods -n monitoring
NAME READY STATUS RESTARTS AGE
alertmanager-main-0 2/2 Running 0 2d22h
alertmanager-main-1 2/2 Running 0 2d22h
alertmanager-main-2 2/2 Running 0 2d22h
blackbox-exporter-76b5c44577-4p775 3/3 Running 0 2d22h
grafana-599d66f74-sb479 1/1 Running 0 2d22h
kube-state-metrics-cff77f89d-zxnlm 3/3 Running 0 2d22h
node-exporter-2xbmv 2/2 Running 0 2d22h
node-exporter-45t2k 2/2 Running 0 2d22h
node-exporter-9cpl2 2/2 Running 0 2d22h
node-exporter-crvfv 2/2 Running 0 2d22h
prometheus-adapter-74894c5547-6dh24 1/1 Running 0 2d22h
prometheus-adapter-74894c5547-x2s58 1/1 Running 0 2d22h
prometheus-operator-57757d758c-grqkk 2/2 Running 422 (3m25s ago) 2d22h
[root@master01 ~]#
worker 节点打标签
[root@master01 ~]# kubectl label node worker01 node-role.kubernetes.io/worker=worker
node/worker01 labeled
[root@master01 ~]# kubectl get nodes
NAME STATUS ROLES AGE VERSION
master01 Ready control-plane 4d13h v1.28.0
master02 Ready control-plane 4d13h v1.28.0
master03 Ready control-plane 4d13h v1.28.0
worker01 Ready worker 4d13h v1.28.0
延长证书过期时间
查看证书有效时间:
[root@master01 ~]# openssl x509 -in /etc/kubernetes/pki/ca.crt -noout -text |grep Not
Not Before: Jan 11 15:52:59 2024 GMT
Not After : Jan 8 15:57:59 2034 GMT
通过上面可看到ca证书有效期是10年,从2024到2034年
[root@master01 ~]# openssl x509 -in /etc/kubernetes/pki/apiserver.crt -noout -text |grep Not
Not Before: Jan 11 15:52:59 2024 GMT
Not After : Jan 10 15:58:00 2025 GMT
通过上面可看到apiserver证书有效期是1年,从2024到2025年:
延长证书过期时间
1.把update-kubeadm-cert.sh文件上传到master01、master02、master03节点
2.在每个节点都执行如下命令
1)给update-kubeadm-cert.sh证书授权可执行权限
[root@xianchaomaster1 ~]# chmod +x update-kubeadm-cert.sh
[root@xianchaomaster2 ~]# chmod +x update-kubeadm-cert.sh
2)执行下面命令,修改证书过期时间,把时间延长到10年
[root@master01 ~]# ./update-kubeadm-cert.sh all
[root@master02 ~]# ./update-kubeadm-cert.sh all
[root@master03 ~]# ./update-kubeadm-cert.sh all
3)在master01节点查询Pod是否正常,能查询出数据说明证书签发完成
[root@master01 ~]# kubectl get pods -A
NAMESPACE NAME READY STATUS RESTARTS AGE
kube-system calico-kube-controllers-658d97c59c-97zcn 1/1 Running 0 4d12h
kube-system calico-node-2zb2s 1/1 Running 0 4d12h
kube-system calico-node-82p62 1/1 Running 0 4d12h
kube-system calico-node-9sht8 1/1 Running 0 4d12h
kube-system calico-node-h78jw 1/1 Running 3 (4d12h ago) 4d12h
kube-system coredns-66f779496c-gj725 1/1 Running 0 4d13h
kube-system coredns-66f779496c-z4fkt 1/1 Running 0 4d13h
kube-system etcd-master01 1/1 Running 0 4d13h
kube-system etcd-master02 1/1 Running 0 4d12h
kube-system etcd-master03 1/1 Running 0 4d12h
kube-system kube-apiserver-master01 1/1 Running 2 (4d12h ago) 4d13h
kube-system kube-apiserver-master02 1/1 Running 0 4d12h
kube-system kube-apiserver-master03 1/1 Running 1 (4d12h ago) 4d12h
kube-system kube-controller-manager-master01 1/1 Running 4 (37h ago) 4d13h
kube-system kube-controller-manager-master02 1/1 Running 4 (2d15h ago) 4d12h
kube-system kube-controller-manager-master03 1/1 Running 4 (3d18h ago) 4d12h
kube-system kube-proxy-2tddg 1/1 Running 0 4d12h
kube-system kube-proxy-r4njn 1/1 Running 0 4d13h
kube-system kube-proxy-rpz72 1/1 Running 0 4d12h
kube-system kube-proxy-vjcj8 1/1 Running 0 4d12h
kube-system kube-scheduler-master01 1/1 Running 3 4d13h
kube-system kube-scheduler-master02 1/1 Running 4 (2d15h ago) 4d12h
kube-system kube-scheduler-master03 1/1 Running 4 (101m ago) 4d12h
kubernetes-dashboard dashboard-metrics-scraper-5657497c4c-59qw8 1/1 Running 0 4d12h
kubernetes-dashboard kubernetes-dashboard-78f87ddfc-47hhr 1/1 Running 0 4d12h
monitoring alertmanager-main-0 2/2 Running 0 3d18h
monitoring alertmanager-main-1 2/2 Running 0 3d18h
monitoring alertmanager-main-2 2/2 Running 0 3d18h
monitoring blackbox-exporter-76b5c44577-4p775 3/3 Running 0 3d18h
monitoring grafana-599d66f74-sb479 1/1 Running 0 3d18h
monitoring kube-state-metrics-cff77f89d-zxnlm 3/3 Running 0 3d18h
monitoring node-exporter-2xbmv 2/2 Running 0 3d18h
monitoring node-exporter-45t2k 2/2 Running 0 3d18h
monitoring node-exporter-9cpl2 2/2 Running 0 3d18h
monitoring node-exporter-crvfv 2/2 Running 0 3d18h
monitoring prometheus-adapter-74894c5547-6dh24 1/1 Running 0 3d18h
monitoring prometheus-adapter-74894c5547-x2s58 1/1 Running 0 3d18h
monitoring prometheus-operator-57757d758c-grqkk 2/2 Running 542 (8m45s ago) 3d18h
能够看到pod信息,说明证书签发正常
验证证书有效时间是否延长到10年
[root@master01 ~]# openssl x509 -in /etc/kubernetes/pki/apiserver.crt -noout -text |grep Not
Not Before: Jan 16 05:00:07 2024 GMT
Not After : Jan 13 05:00:07 2034 GMT
通过上面可看到apiserver证书有效期是10年,从2021到2031年:
[root@master01 ~]# openssl x509 -in /etc/kubernetes/pki/apiserver-etcd-client.crt -noout -text |grep Not
Not Before: Jan 16 05:00:01 2024 GMT
Not After : Jan 13 05:00:01 2034 GMT
通过上面可看到etcd证书有效期是10年,从2020到2030年:
[root@master01 ~]# openssl x509 -in /etc/kubernetes/pki/front-proxy-ca.crt -noout -text |grep Not
Not Before: Jan 11 15:53:01 2024 GMT
Not After : Jan 8 15:58:01 2034 GMT
通过上面可看到fron-proxy证书有效期是10年,从2024到2034年
部署ingress-nginx
[root@master01 ~]# kubectl apply -f ingress-nginx-v1.9.5.yaml
[root@master01 ~]# kubectl get pods -n ingress-nginx
NAME READY STATUS RESTARTS AGE
ingress-nginx-admission-create-s4b4m 0/1 Completed 0 86s
ingress-nginx-admission-patch-w5mhj 0/1 Completed 0 86s
ingress-nginx-controller-6cdb7f4b5f-48qfx 1/1 Running 0 86s
取消master 节点污点
# 查看污点
kubectl describe node master01 |grep Taints
Taints: node-role.kubernetes.io/control-plane:NoSchedule
# 删除污点
kubectl taint nodes --all node-role.kubernetes.io/control-plane-
作者:一毛
本博客所有文章仅用于学习、研究和交流目的,欢迎非商业性质转载。
不管遇到了什么烦心事,都不要自己为难自己;无论今天发生多么糟糕的事,都不应该感到悲伤。记住一句话:越努力,越幸运。