k8s搭建
k8s搭建
配置系统
关闭交换空间
sudo swapoff -a
# 避免开机启动交换空间
sudo vim /etc/fstab
# 注释掉有swap的行
关闭防火墙
# 查看防火墙状态
sudo systemctl status ufw
# 如果防火墙为active,关闭
sudo systemctl stop ufw
sudo systemctl disable ufw
# 再次检查防火墙状态
sudo systemctl status ufw
配置网络桥接
iptables 检查桥接流量
确保 br_netfilter
模块被加载。这一操作可以通过运行 lsmod | grep br_netfilter
来完成。若要显式加载该模块,可执行 sudo modprobe br_netfilter
为了让你的 Linux 节点上的 iptables 能够正确地查看桥接流量,你需要确保在你的 sysctl
配置中将 net.bridge.bridge-nf-call-iptables
设置为 1
cat <<EOF | sudo tee /etc/modules-load.d/k8s.conf
br_netfilter
EOF
cat <<EOF | sudo tee /etc/sysctl.d/k8s.conf
net.bridge.bridge-nf-call-ip6tables = 1
net.bridge.bridge-nf-call-iptables = 1
EOF
sudo sysctl --system
设置正向代理
由于内部服务器无法连接外网,设置正向代理
sudo vim /etc/hosts
# 在hosts文件中添加内容
10.130.47.27 translation.googleapis.com
10.130.47.27 packages.cloud.google.com
10.130.47.27 apt.kubernetes.io
10.130.47.27 k8s.gcr.io
10.130.47.27 storage.googleapis.com
安装kubeadm,kubelet,kubectl
https://kubernetes.io/zh/docs/setup/production-environment/tools/kubeadm/install-kubeadm/
基于Debian的发行版
# 更新 apt 包索引并安装使用 Kubernetes apt 仓库所需要的包:
sudo apt update
sudo apt install -y apt-transport-https ca-certificates curl
# 下载 Google Cloud 公开签名秘钥:
sudo curl -fsSLo /usr/share/keyrings/kubernetes-archive-keyring.gpg https://packages.cloud.google.com/apt/doc/apt-key.gpg
# 添加 Kubernetes apt 仓库:
echo "deb [signed-by=/usr/share/keyrings/kubernetes-archive-keyring.gpg] https://apt.kubernetes.io/ kubernetes-xenial main" | sudo tee /etc/apt/sources.list.d/kubernetes.list
# 查看版本
apt search kubelet
apt search kubeadm
apt search kubectl
# 安装对应版本的k8s,这里是1.19.4-00
# 安装kubelet
sudo apt install -y kubelet=1.19.4-00
# 此时可能会提示缺少依赖
# The following packages have unmet dependencies:
# kubelet : Depends: kubernetes-cni (>= 0.8.7)
# 安装kubernetes-cni
apt search kubernetes-cni
sudo apt install -y kubernetes-cni=0.8.7-00
# 再安装kubelet
sudo apt install -y kubelet=1.19.4-00
# 安装kubectl
sudo apt install -y kubectl=1.19.4-00
# 安装kubeadm
sudo apt install -y kubeadm=1.19.4-00
# 此时可能会提示缺少依赖
# The following packages have unmet dependencies:
# kubeadm : Depends: cri-tools (>= 1.13.0) but it is not installable
# 安装cri-tools
apt search cri-tools
sudo apt install cri-tools=1.13.0-01
# 再安装kubeadm
sudo apt install -y kubeadm=1.19.4-00
# 锁定版本
sudo apt-mark hold kubelet kubeadm kubectl
# 设置 kubelet 自启动,并启动 kubelet
sudo systemctl enable kubelet && sudo systemctl start kubelet
设置kubectl自动补全(可选)
详见https://kubernetes.io/zh/docs/tasks/tools/install-kubectl-linux/最后部分
启动主节点
主节点启动配置文件
kubeadm.yml
导出默认的配置文件
sudo kubeadm config print init-defaults --component-configs KubeletConfiguration
修改配置文件
apiVersion: kubeadm.k8s.io/v1beta2
bootstrapTokens:
- groups:
- system:bootstrappers:kubeadm:default-node-token
token: abcdef.0123456789abcdef
ttl: 24h0m0s
usages:
- signing
- authentication
kind: InitConfiguration
localAPIEndpoint:
# 主节点ip
advertiseAddress: 192.168.1.5
bindPort: 6443
nodeRegistration:
criSocket: /var/run/dockershim.sock
# 主节点name
name: master
taints: null
---
apiServer:
timeoutForControlPlane: 4m0s
apiVersion: kubeadm.k8s.io/v1beta2
certificatesDir: /etc/kubernetes/pki
clusterName: kubernetes
controllerManager: {}
dns:
type: CoreDNS
etcd:
local:
dataDir: /var/lib/etcd
imageRepository: k8s.gcr.io
kind: ClusterConfiguration
# 修改版本号 必须对应
kubernetesVersion: 1.19.4
networking:
# 新增该配置 用于后续 Calico网络插件
podSubnet: 192.168.0.0/16
dnsDomain: cluster.local
serviceSubnet: 10.96.0.0/12
scheduler: {}
---
apiVersion: kubelet.config.k8s.io/v1beta1
authentication:
anonymous:
enabled: false
webhook:
cacheTTL: 0s
enabled: true
x509:
clientCAFile: /etc/kubernetes/pki/ca.crt
authorization:
mode: Webhook
webhook:
cacheAuthorizedTTL: 0s
cacheUnauthorizedTTL: 0s
cgroupDriver: cgroupfs
clusterDNS:
- 10.96.0.10
clusterDomain: cluster.local
cpuManagerReconcilePeriod: 0s
evictionPressureTransitionPeriod: 0s
fileCheckFrequency: 0s
# 改ip为0.0.0.0
healthzBindAddress: 0.0.0.0
healthzPort: 10248
httpCheckFrequency: 0s
imageMinimumGCAge: 0s
kind: KubeletConfiguration
logging: {}
nodeStatusReportFrequency: 0s
nodeStatusUpdateFrequency: 0s
rotateCertificates: true
runtimeRequestTimeout: 0s
shutdownGracePeriod: 0s
shutdownGracePeriodCriticalPods: 0s
staticPodPath: /etc/kubernetes/manifests
streamingConnectionIdleTimeout: 0s
syncFrequency: 0s
volumeStatsAggPeriod: 0s
# 上传修改后的配置文件到服务器
scp kubeadm.yml wangshikai@s184:/home/wangshikai/.
# 切换到kubeadm.yml所在的路径下
# 查看所需镜像
sudo kubeadm config images list --config kubeadm.yml
# 拉取镜像
sudo kubeadm config images pull --config kubeadm.yml
初始化主节点
# 在kubeadm.yml所在路径下执行
# --config=kubeadm.yml 指定配置文件
# --experimental-upload-certs 更新证书
# tee kubeadm-init.log 将日志保存到文件
sudo kubeadm init --config=kubeadm.yml --upload-certs | tee kubeadm-init.log
# 记录下此时输出的token用于连接子节点
kubeadm join 10.31.4.184:6443 --token abcdef.0123456789abcdef \
--discovery-token-ca-cert-hash sha256:38722f42f8b3fbc8f29043020e41d4b55b913dd89abf15147c7db70a0d9de876
# 配置kubectl
mkdir -p $HOME/.kube
sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
# 修改文件所有者(非 ROOT 用户才需要执行)
sudo chown $(id -u):$(id -g) $HOME/.kube/config
# 验证是否成功
kubectl get nodes
# 能够打印出节点信息即表示成功
NAME STATUS ROLES AGE VERSION
master NotReady master 100s v1.19.4
重置安装
如果出现问题,重置kubeadm
#重置
sudo kubeadm reset
# 将$HOME/.kube文件移除
sudo rm -rf $HOME/.kube
#然后重新执行初始化(记得先把 kubeadm-init.log 文件删掉)
sudo rm -rf kubeadm-init.log
kubeadm init --config=kubeadm.yml --upload-certs | tee kubeadm-init.log
weave网络插件
下面选一个安装,推荐第二个
kubectl apply -f "https://cloud.weave.works/k8s/net?k8s-version=$(kubectl version | base64 | tr -d '\n')"
kubectl apply -f "https://cloud.weave.works/k8s/net?k8s-version=1.19.4"
检查组件运行状态
kubectl get cs
# 如果出现
NAME STATUS MESSAGE ERROR
scheduler Unhealthy Get "http://127.0.0.1:10251/healthz": dial tcp 127.0.0.1:10251: connect: connection refused
controller-manager Unhealthy Get "http://127.0.0.1:10252/healthz": dial tcp 127.0.0.1:10252: connect: connection refused
etcd-0 Healthy {"health":"true"}
# 是 /etc/kubernetes/manifests下的 kube-controller-manager.yaml 和 kube-scheduler.yaml 设置的默认端口是0,在文件中注释掉就可以了
vim /etc/kubernetes/manifests/kube-scheduler.yaml
apiVersion: v1
kind: Pod
metadata:
creationTimestamp: null
labels:
component: kube-scheduler
tier: control-plane
name: kube-scheduler
namespace: kube-system
spec:
containers:
- command:
- kube-scheduler
- --authentication-kubeconfig=/etc/kubernetes/scheduler.conf
- --authorization-kubeconfig=/etc/kubernetes/scheduler.conf
- --bind-address=127.0.0.1
- --kubeconfig=/etc/kubernetes/scheduler.conf
- --leader-elect=true
# 注释掉这个
# - --port=0
kube-controller-manager.yaml
文件同理。
scheduler
为调度服务,主要作用是将 POD 调度到 Nodecontroller-manager
为自动化修复服务,主要作用是 Node 宕机后自动修复 Node 回到正常的工作状态etcd-0
则是熟悉的服务注册与发现
rook持久化插件
安装版本1.6.3
git clone --single-branch --branch v1.6.3 https://github.com/rook/rook.git
cd rook/cluster/examples/kubernetes/ceph
kubectl create -f crds.yaml -f common.yaml -f operator.yaml
kubectl create -f cluster.yaml
关于pod的操作
-
删除失败的pod 在namespace处写要删除的pods的namespace
kubectl -n ${NAMESPACE} delete pods --field-selector=status.phase=Failed
-
查看所有pods
kubectl get pods -A
kubectl delete pod calico-node-9qfmc -n calico-system
-
查看匹配的pod
kubectl get 指令的作用,就是从 Kubernetes 里面获取(GET)指定的 API 对象。可以看到,在这里我还加上了一个 -l 参数,即获取所有匹配 app: nginx 标签的 Pod
kubectl get pods -l app=nginx
-
查看pod细节
使用 kubectl describe 命令,查看一个 API 对象的细节
kubectl describe pod nginx-deployment-5d59d67564-gw66n
kubectl -n calico-system delete pods --field-selector=status.phase=Failed
# 查看已安装版本
dpkg -s xxx
# 查看所有版本
apt list version xxx -a