k8s集群部署cadvisor+node-exporter+prometheus+grafana监控系统
一、通过daemonset 安装cadvisor
下载cadvisor并上传到harbor
docker tag gcr.io/cadvisor/cadvisor:v0.45.0 harbor.magedu.net/baseimages/cadvisor:v0.45.0
docker push harbor.magedu.net/baseimages/cadvisor:v0.45.0
root@easzlab-deploy:~/20220911# kubectl create ns monitor namespace/monitor created root@easzlab-deploy:~/20220911#
cat case1-daemonset-deploy-cadvisor.yaml
apiVersion: apps/v1 kind: DaemonSet metadata: name: cadvisor namespace: monitor spec: selector: matchLabels: app: cAdvisor template: metadata: labels: app: cAdvisor spec: tolerations: #污点容忍,忽略master的NoSchedule - effect: NoSchedule key: node-role.kubernetes.io/master hostNetwork: true restartPolicy: Always # 重启策略 containers: - name: cadvisor image: harbor.magedu.net/baseimages/cadvisor:v0.45.0 imagePullPolicy: IfNotPresent # 镜像策略 ports: - containerPort: 8080 volumeMounts: - name: root mountPath: /rootfs - name: run mountPath: /var/run - name: sys mountPath: /sys - name: docker mountPath: /var/lib/containerd volumes: - name: root hostPath: path: / - name: run hostPath: path: /var/run - name: sys hostPath: path: /sys - name: docker hostPath: path: /var/lib/containerd
验证cadvisor
二、部署node-exporter
cat case2-daemonset-deploy-node-exporter.yaml
apiVersion: apps/v1 kind: DaemonSet metadata: name: node-exporter namespace: monitor labels: k8s-app: node-exporter spec: selector: matchLabels: k8s-app: node-exporter template: metadata: labels: k8s-app: node-exporter spec: tolerations: - effect: NoSchedule key: node-role.kubernetes.io/master containers: - image: prom/node-exporter:v1.3.1 imagePullPolicy: IfNotPresent name: prometheus-node-exporter ports: - containerPort: 9100 hostPort: 9100 protocol: TCP name: metrics volumeMounts: - mountPath: /host/proc name: proc - mountPath: /host/sys name: sys - mountPath: /host name: rootfs args: - --path.procfs=/host/proc - --path.sysfs=/host/sys - --path.rootfs=/host volumes: - name: proc hostPath: path: /proc - name: sys hostPath: path: /sys - name: rootfs hostPath: path: / hostNetwork: true hostPID: true --- apiVersion: v1 kind: Service metadata: annotations: prometheus.io/scrape: "true" labels: k8s-app: node-exporter name: node-exporter namespace: monitor spec: type: NodePort ports: - name: http port: 9100 nodePort: 39100 protocol: TCP selector: k8s-app: node-exporter
验证node_exporter数据
三、deployment部署Prometheus server
cat case3-1-prometheus-cfg.yaml
--- kind: ConfigMap apiVersion: v1 metadata: labels: app: prometheus name: prometheus-config namespace: monitor data: prometheus.yml: | global: scrape_interval: 15s scrape_timeout: 10s evaluation_interval: 1m scrape_configs: - job_name: 'kubernetes-node' kubernetes_sd_configs: - role: node relabel_configs: - source_labels: [__address__] regex: '(.*):10250' replacement: '${1}:9100' target_label: __address__ action: replace - action: labelmap regex: __meta_kubernetes_node_label_(.+) - job_name: 'kubernetes-node-cadvisor' kubernetes_sd_configs: - role: node scheme: https tls_config: ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token relabel_configs: - action: labelmap regex: __meta_kubernetes_node_label_(.+) - target_label: __address__ replacement: kubernetes.default.svc:443 - source_labels: [__meta_kubernetes_node_name] regex: (.+) target_label: __metrics_path__ replacement: /api/v1/nodes/${1}/proxy/metrics/cadvisor - job_name: 'kubernetes-apiserver' kubernetes_sd_configs: - role: endpoints scheme: https tls_config: ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token relabel_configs: - source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name] action: keep regex: default;kubernetes;https - job_name: 'kubernetes-service-endpoints' kubernetes_sd_configs: - role: endpoints relabel_configs: - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape] action: keep regex: true - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scheme] action: replace target_label: __scheme__ regex: (https?) - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_path] action: replace target_label: __metrics_path__ regex: (.+) - source_labels: [__address__, __meta_kubernetes_service_annotation_prometheus_io_port] action: replace target_label: __address__ regex: ([^:]+)(?::\d+)?;(\d+) replacement: $1:$2 - action: labelmap regex: __meta_kubernetes_service_label_(.+) - source_labels: [__meta_kubernetes_namespace] action: replace target_label: kubernetes_namespace - source_labels: [__meta_kubernetes_service_name] action: replace target_label: kubernetes_service_name
指定172.16.88.156 node-01 节点为Prometheus 数据存放路径
mkdir -p /data/prometheusdata
root@easzlab-k8s-master-03:~# chmod 777 /data/prometheusdata
root@easzlab-k8s-master-03:~# chown 65534.65534 /data/prometheusdata -R #否则Prometheus pod会报错
cat case3-2-prometheus-deployment.yaml
--- apiVersion: apps/v1 kind: Deployment metadata: name: prometheus-server namespace: monitor labels: app: prometheus spec: replicas: 1 selector: matchLabels: app: prometheus component: server #matchExpressions: #- {key: app, operator: In, values: [prometheus]} #- {key: component, operator: In, values: [server]} template: metadata: labels: app: prometheus component: server annotations: prometheus.io/scrape: 'false' spec: nodeName: 172.16.88.156 serviceAccountName: monitor containers: - name: prometheus image: prom/prometheus:v2.31.2 imagePullPolicy: IfNotPresent command: - prometheus - --config.file=/etc/prometheus/prometheus.yml - --storage.tsdb.path=/prometheus - --storage.tsdb.retention=720h ports: - containerPort: 9090 protocol: TCP volumeMounts: - mountPath: /etc/prometheus/prometheus.yml name: prometheus-config subPath: prometheus.yml - mountPath: /prometheus/ name: prometheus-storage-volume volumes: - name: prometheus-config configMap: name: prometheus-config items: - key: prometheus.yml path: prometheus.yml mode: 0644 - name: prometheus-storage-volume hostPath: path: /data/prometheusdata type: Directory
root@easzlab-deploy:~/20220911# kubectl create serviceaccount monitor -n monitor serviceaccount/monitor created root@easzlab-deploy:~/20220911# kubectl create clusterrolebinding monitor-clusterrolebinding -n monitor --clusterrole=cluster-admin --serviceaccount=monitor:monitor clusterrolebinding.rbac.authorization.k8s.io/monitor-clusterrolebinding created root@easzlab-deploy:~/20220911# root@easzlab-deploy:~/20220911# kubectl apply -f case3-2-prometheus-deployment.yaml deployment.apps/prometheus-server created root@easzlab-deploy:~/20220911#
创建Prometheus service
cat case3-3-prometheus-svc.yaml
--- apiVersion: v1 kind: Service metadata: name: prometheus namespace: monitor labels: app: prometheus spec: type: NodePort ports: - port: 9090 targetPort: 9090 nodePort: 30090 protocol: TCP selector: app: prometheus component: server
cat case4-prom-rbac.yaml
apiVersion: v1 kind: ServiceAccount metadata: name: prometheus namespace: monitor --- apiVersion: v1 kind: Secret type: kubernetes.io/service-account-token metadata: name: monitor-token namespace: monitor annotations: kubernetes.io/service-account.name: "prometheus" --- apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRole metadata: name: prometheus rules: - apiGroups: - "" resources: - nodes - services - endpoints - pods - nodes/proxy verbs: - get - list - watch - apiGroups: - "extensions" resources: - ingresses verbs: - get - list - watch - apiGroups: - "" resources: - configmaps - nodes/metrics verbs: - get - nonResourceURLs: - /metrics verbs: - get --- #apiVersion: rbac.authorization.k8s.io/v1beta1 apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRoleBinding metadata: name: prometheus roleRef: apiGroup: rbac.authorization.k8s.io kind: ClusterRole name: prometheus subjects: - kind: ServiceAccount name: prometheus namespace: monitor
四、安装kube-state-metrics
cat case5-kube-state-metrics-deploy.yaml
apiVersion: apps/v1 kind: Deployment metadata: name: kube-state-metrics namespace: kube-system spec: replicas: 1 selector: matchLabels: app: kube-state-metrics template: metadata: labels: app: kube-state-metrics spec: serviceAccountName: kube-state-metrics containers: - name: kube-state-metrics image: registry.cn-hangzhou.aliyuncs.com/zhangshijie/kube-state-metrics:v2.6.0 ports: - containerPort: 8080 --- --- apiVersion: v1 kind: ServiceAccount metadata: name: kube-state-metrics namespace: kube-system --- apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRole metadata: name: kube-state-metrics rules: - apiGroups: [""] resources: ["nodes", "pods", "services", "resourcequotas", "replicationcontrollers", "limitranges", "persistentvolumeclaims", "persistentvolumes", "namespaces", "endpoints"] verbs: ["list", "watch"] - apiGroups: ["extensions"] resources: ["daemonsets", "deployments", "replicasets"] verbs: ["list", "watch"] - apiGroups: ["apps"] resources: ["statefulsets"] verbs: ["list", "watch"] - apiGroups: ["batch"] resources: ["cronjobs", "jobs"] verbs: ["list", "watch"] - apiGroups: ["autoscaling"] resources: ["horizontalpodautoscalers"] verbs: ["list", "watch"] --- apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRoleBinding metadata: name: kube-state-metrics roleRef: apiGroup: rbac.authorization.k8s.io kind: ClusterRole name: kube-state-metrics subjects: - kind: ServiceAccount name: kube-state-metrics namespace: kube-system --- apiVersion: v1 kind: Service metadata: annotations: prometheus.io/scrape: 'true' name: kube-state-metrics namespace: kube-system labels: app: kube-state-metrics spec: type: NodePort ports: - name: kube-state-metrics port: 8080 targetPort: 8080 nodePort: 31666 protocol: TCP selector: app: kube-state-metrics
五、安装grafana
cat grafana-enterprise.yaml
apiVersion: apps/v1 kind: Deployment metadata: name: grafana-enterprise namespace: monitor spec: replicas: 1 selector: matchLabels: app: grafana-enterprise template: metadata: labels: app: grafana-enterprise spec: containers: - image: grafana/grafana imagePullPolicy: Always #command: # - "tail" # - "-f" # - "/dev/null" securityContext: allowPrivilegeEscalation: false runAsUser: 0 name: grafana ports: - containerPort: 3000 protocol: TCP volumeMounts: - mountPath: "/var/lib/grafana" name: data resources: requests: cpu: 100m memory: 100Mi limits: cpu: 500m memory: 2500Mi volumes: - name: data emptyDir: {} --- apiVersion: v1 kind: Service metadata: name: grafana namespace: monitor spec: type: NodePort ports: - port: 80 targetPort: 3000 nodePort: 31000 selector: app: grafana-enterprise
通过http://ip+port 访问grafana admin admin
六、添加数据源
七、导入模板id
导入cadvisor
此时pod名称显示为pod uuid,需要对以下地方进行修改
点击update
点击Save dashboard保存
页面也需要修改