前言
相关文档如下:
《01-n9e-v5 部署-server》
《01-n9e-v5部署-agent》
《02-容器监控-cadvisor+n9e》
《03-k8s集群监控(上)》
《03-k8s集群监控(下)》
《04-监控redis集群-prometheuse+n9e》
1. 部署cadvisor
说明:
k8s每个节点部署cadvisor服务
因为我是用docker-compose启动的,因此需要提前安装。
- yaml文件
创建/opt/cadvisor目录,在目录下创建docker-compose.yml文件如下:
| cadvisor: |
| image: harbocto.xxx.com.cn/public/cadvisor:v0.24.1 |
| container_name: monitoring_cadvisor |
| restart: unless-stopped |
| volumes: |
| - /:/rootfs:ro |
| - /var/run:/var/run:rw |
| - /sys:/sys:ro |
| - /var/lib/docker/:/var/lib/docker:ro |
| ports: |
| - "4194:8080" |
| [root@DoM01 cadvisor]# docker-compose up -d |
| [root@DoM01 cadvisor]# docker-compose ps |
| Name Command State Ports |
| ------------------------------------------------------------------------------------- |
| monitoring_cadvisor /usr/bin/cadvisor -logtostderr Up 0.0.0.0:4194->8080/tcp |
2. 部署 kube-state-metrics
2.1 创建集群用户
| apiVersion: rbac.authorization.k8s.io/v1 |
| kind: ClusterRole |
| metadata: |
| labels: |
| app.kubernetes.io/name: kube-state-metrics |
| app.kubernetes.io/version: v1.9.7 |
| name: kube-state-metrics |
| rules: |
| - apiGroups: |
| - "" |
| resources: |
| - configmaps |
| - secrets |
| - nodes |
| - pods |
| - services |
| - resourcequotas |
| - replicationcontrollers |
| - limitranges |
| - persistentvolumeclaims |
| - persistentvolumes |
| - namespaces |
| - endpoints |
| verbs: |
| - list |
| - watch |
| - apiGroups: |
| - extensions |
| resources: |
| - daemonsets |
| - deployments |
| - replicasets |
| - ingresses |
| verbs: |
| - list |
| - watch |
| - apiGroups: |
| - apps |
| resources: |
| - statefulsets |
| - daemonsets |
| - deployments |
| - replicasets |
| verbs: |
| - list |
| - watch |
| - apiGroups: |
| - batch |
| resources: |
| - cronjobs |
| - jobs |
| verbs: |
| - list |
| - watch |
| - apiGroups: |
| - autoscaling |
| resources: |
| - horizontalpodautoscalers |
| verbs: |
| - list |
| - watch |
| - apiGroups: |
| - authentication.k8s.io |
| resources: |
| - tokenreviews |
| verbs: |
| - create |
| - apiGroups: |
| - authorization.k8s.io |
| resources: |
| - subjectaccessreviews |
| verbs: |
| - create |
| - apiGroups: |
| - policy |
| resources: |
| - poddisruptionbudgets |
| verbs: |
| - list |
| - watch |
| - apiGroups: |
| - certificates.k8s.io |
| resources: |
| - certificatesigningrequests |
| verbs: |
| - list |
| - watch |
| - apiGroups: |
| - storage.k8s.io |
| resources: |
| - storageclasses |
| - volumeattachments |
| verbs: |
| - list |
| - watch |
| - apiGroups: |
| - admissionregistration.k8s.io |
| resources: |
| - mutatingwebhookconfigurations |
| - validatingwebhookconfigurations |
| verbs: |
| - list |
| - watch |
| - apiGroups: |
| - networking.k8s.io |
| resources: |
| - networkpolicies |
| verbs: |
| - list |
| - watch |
| kubectl create -f cluster-role.yaml |
| [root@DoM01 kube-state-metrics]# kubectl get ClusterRole kube-state-metrics |
| NAME AGE |
| kube-state-metrics 11m |
2.2 创建服务账户
- 创建 service-account.yaml文件
| apiVersion: v1 |
| kind: ServiceAccount |
| metadata: |
| labels: |
| app.kubernetes.io/name: kube-state-metrics |
| app.kubernetes.io/version: v1.9.7 |
| name: kube-state-metrics |
| namespace: monitor |
| kubectl create -f service-account.yaml |
| [root@DoM01 kube-state-metrics]# kubectl get ServiceAccount kube-state-metrics -n monitor |
| NAME SECRETS AGE |
| kube-state-metrics 1 13m |
2.3 绑定集群用户
- 创建cluster-role-binding.yaml文件
| apiVersion: rbac.authorization.k8s.io/v1 |
| kind: ClusterRoleBinding |
| metadata: |
| labels: |
| app.kubernetes.io/name: kube-state-metrics |
| app.kubernetes.io/version: v1.9.7 |
| name: kube-state-metrics |
| roleRef: |
| apiGroup: rbac.authorization.k8s.io |
| kind: ClusterRole |
| name: kube-state-metrics |
| subjects: |
| - kind: ServiceAccount |
| name: kube-state-metrics |
| namespace: monitor |
| kubectl create -f cluster-role-binding.yaml |
| [root@DoM01 kube-state-metrics]# kubectl describe ClusterRoleBinding kube-state-metrics -n monitor |
| Name: kube-state-metrics |
| Labels: app.kubernetes.io/name=kube-state-metrics |
| app.kubernetes.io/version=v1.9.7 |
| Annotations: <none> |
| Role: |
| Kind: ClusterRole |
| Name: kube-state-metrics |
| Subjects: |
| Kind Name Namespace |
| ---- ---- --------- |
| ServiceAccount kube-state-metrics monitor |
2.4 deployment.yml
包含deployment、service
| apiVersion: apps/v1 |
| kind: Deployment |
| metadata: |
| labels: |
| app.kubernetes.io/name: kube-state-metrics |
| app.kubernetes.io/version: v1.9.7 |
| name: kube-state-metrics |
| namespace: monitor |
| spec: |
| replicas: 1 |
| selector: |
| matchLabels: |
| app.kubernetes.io/name: kube-state-metrics |
| template: |
| metadata: |
| labels: |
| app.kubernetes.io/name: kube-state-metrics |
| app.kubernetes.io/version: v1.9.7 |
| spec: |
| containers: |
| - image: quay.mirrors.ustc.edu.cn/coreos/kube-state-metrics:v1.9.7 |
| livenessProbe: |
| httpGet: |
| path: /healthz |
| port: 8080 |
| initialDelaySeconds: 5 |
| timeoutSeconds: 5 |
| name: kube-state-metrics |
| ports: |
| - containerPort: 8080 |
| name: http-metrics |
| - containerPort: 8081 |
| name: telemetry |
| readinessProbe: |
| httpGet: |
| path: / |
| port: 8081 |
| initialDelaySeconds: 5 |
| timeoutSeconds: 5 |
| nodeSelector: |
| beta.kubernetes.io/os: linux |
| serviceAccountName: kube-state-metrics |
| --- |
| apiVersion: v1 |
| kind: Service |
| metadata: |
| |
| |
| labels: |
| app.kubernetes.io/name: kube-state-metrics |
| app.kubernetes.io/version: v1.9.7 |
| name: kube-state-metrics |
| namespace: monitor |
| spec: |
| clusterIP: None |
| ports: |
| - name: http-metrics |
| port: 8080 |
| targetPort: http-metrics |
| - name: telemetry |
| port: 8081 |
| targetPort: telemetry |
| selector: |
| app.kubernetes.io/name: kube-state-metrics |
| kubectl create -f deployment.yml |
| [root@DoM01 kube-state-metrics]# kubectl get all -n monitor |
| NAME READY STATUS RESTARTS AGE |
| pod/kube-state-metrics-d49db84b9-6gfwp 1/1 Running 0 14m |
| |
| |
| NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE |
| service/kube-state-metrics ClusterIP None <none> 8080/TCP,8081/TCP 14m |
| |
| |
| NAME READY UP-TO-DATE AVAILABLE AGE |
| deployment.apps/kube-state-metrics 1/1 1 1 14m |
| |
| NAME DESIRED CURRENT READY AGE |
| replicaset.apps/kube-state-metrics-d49db84b9 1 1 1 14m |
| |
3. 部署prometheus
3.1 创建rabc权限
- 创建 文件prometheus.rbac.yaml
为prometheus提供rabc权限。包括 ClusterRole、ServiceAccount、ClusterRoleBinding
之前创建kube-state-metrics时是分开写的,既然已经熟悉了,我们这里写在一起。
| apiVersion: rbac.authorization.k8s.io/v1 |
| kind: ClusterRole |
| metadata: |
| name: prometheus |
| rules: |
| - apiGroups: [""] |
| resources: |
| - nodes |
| - nodes/proxy |
| - services |
| - endpoints |
| - pods |
| verbs: ["get", "list", "watch"] |
| - apiGroups: |
| - extensions |
| resources: |
| - ingresses |
| verbs: ["get", "list", "watch"] |
| - nonResourceURLs: ["/metrics"] |
| verbs: ["get"] |
| --- |
| apiVersion: v1 |
| kind: ServiceAccount |
| metadata: |
| name: prometheus |
| namespace: monitor |
| --- |
| apiVersion: rbac.authorization.k8s.io/v1 |
| kind: ClusterRoleBinding |
| metadata: |
| name: prometheus |
| roleRef: |
| apiGroup: rbac.authorization.k8s.io |
| kind: ClusterRole |
| name: prometheus |
| subjects: |
| - kind: ServiceAccount |
| name: prometheus |
| namespace: monitor |
| kubectl create -f prometheus.rbac.yaml |
| [root@DoM01 promethues]# kubectl describe ClusterRole prometheus |
| Name: prometheus |
| Labels: <none> |
| Annotations: <none> |
| PolicyRule: |
| Resources Non-Resource URLs Resource Names Verbs |
| --------- ----------------- -------------- ----- |
| endpoints [] [] [get list watch] |
| nodes/proxy [] [] [get list watch] |
| nodes [] [] [get list watch] |
| pods [] [] [get list watch] |
| services [] [] [get list watch] |
| ingresses.extensions [] [] [get list watch] |
| [/metrics] [] [get] |
| |
| [root@DoM01 promethues]# kubectl describe ServiceAccount prometheus -n monitor |
| Name: prometheus |
| Namespace: monitor |
| Labels: <none> |
| Annotations: <none> |
| Image pull secrets: <none> |
| Mountable secrets: prometheus-token-t7msz |
| Tokens: prometheus-token-t7msz |
| Events: <none> |
| [root@DoM01 promethues]# kubectl describe ClusterRoleBinding prometheus -n monitor |
| Name: prometheus |
| Labels: <none> |
| Annotations: <none> |
| Role: |
| Kind: ClusterRole |
| Name: prometheus |
| Subjects: |
| Kind Name Namespace |
| ---- ---- --------- |
| ServiceAccount prometheus monitor |
| |
3.2 创建promethues配置文件
- 创建 prometheus.config.yaml文件
创建comfigmap,为prometheus提供配置文件
| apiVersion: v1 |
| kind: ConfigMap |
| metadata: |
| name: prometheus-config |
| namespace: monitor |
| data: |
| prometheus.yml: | |
| global: |
| scrape_interval: 15s |
| evaluation_interval: 15s |
| scrape_configs: |
| |
| - job_name: 'kubernetes-apiservers' |
| kubernetes_sd_configs: |
| - role: endpoints |
| scheme: https |
| tls_config: |
| ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt |
| bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token |
| relabel_configs: |
| - source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name] |
| action: keep |
| regex: default;kubernetes;https |
| |
| - job_name: 'kubernetes-nodes' |
| kubernetes_sd_configs: |
| - role: node |
| scheme: https |
| tls_config: |
| ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt |
| bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token |
| relabel_configs: |
| - action: labelmap |
| regex: __meta_kubernetes_node_label_(.+) |
| - target_label: __address__ |
| replacement: kubernetes.default.svc:443 |
| - source_labels: [__meta_kubernetes_node_name] |
| regex: (.+) |
| target_label: __metrics_path__ |
| replacement: /api/v1/nodes/${1}/proxy/metrics |
| |
| - job_name: 'kubernetes-cadvisor' |
| kubernetes_sd_configs: |
| - role: node |
| scheme: https |
| tls_config: |
| ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt |
| bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token |
| relabel_configs: |
| - action: labelmap |
| regex: __meta_kubernetes_node_label_(.+) |
| - target_label: __address__ |
| replacement: kubernetes.default.svc:443 |
| - source_labels: [__meta_kubernetes_node_name] |
| regex: (.+) |
| target_label: __metrics_path__ |
| replacement: /api/v1/nodes/${1}/proxy/metrics/cadvisor |
| |
| - job_name: 'kubernetes-service-endpoints' |
| kubernetes_sd_configs: |
| - role: endpoints |
| relabel_configs: |
| - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape] |
| action: keep |
| regex: true |
| - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scheme] |
| action: replace |
| target_label: __scheme__ |
| regex: (https?) |
| - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_path] |
| action: replace |
| target_label: __metrics_path__ |
| regex: (.+) |
| - source_labels: [__address__, __meta_kubernetes_service_annotation_prometheus_io_port] |
| action: replace |
| target_label: __address__ |
| regex: ([^:]+)(?::\d+)?;(\d+) |
| replacement: $1:$2 |
| - action: labelmap |
| regex: __meta_kubernetes_service_label_(.+) |
| - source_labels: [__meta_kubernetes_namespace] |
| action: replace |
| target_label: kubernetes_namespace |
| - source_labels: [__meta_kubernetes_service_name] |
| action: replace |
| target_label: kubernetes_name |
| |
| - job_name: 'kubernetes-services' |
| kubernetes_sd_configs: |
| - role: service |
| metrics_path: /probe |
| params: |
| module: [http_2xx] |
| relabel_configs: |
| - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_probe] |
| action: keep |
| regex: true |
| - source_labels: [__address__] |
| target_label: __param_target |
| - target_label: __address__ |
| replacement: blackbox-exporter.example.com:9115 |
| - source_labels: [__param_target] |
| target_label: instance |
| - action: labelmap |
| regex: __meta_kubernetes_service_label_(.+) |
| - source_labels: [__meta_kubernetes_namespace] |
| target_label: kubernetes_namespace |
| - source_labels: [__meta_kubernetes_service_name] |
| target_label: kubernetes_name |
| |
| - job_name: 'kubernetes-ingresses' |
| kubernetes_sd_configs: |
| - role: ingress |
| relabel_configs: |
| - source_labels: [__meta_kubernetes_ingress_annotation_prometheus_io_probe] |
| action: keep |
| regex: true |
| - source_labels: [__meta_kubernetes_ingress_scheme,__address__,__meta_kubernetes_ingress_path] |
| regex: (.+);(.+);(.+) |
| replacement: ${1}://${2}${3} |
| target_label: __param_target |
| - target_label: __address__ |
| replacement: blackbox-exporter.example.com:9115 |
| - source_labels: [__param_target] |
| target_label: instance |
| - action: labelmap |
| regex: __meta_kubernetes_ingress_label_(.+) |
| - source_labels: [__meta_kubernetes_namespace] |
| target_label: kubernetes_namespace |
| - source_labels: [__meta_kubernetes_ingress_name] |
| target_label: kubernetes_name |
| |
| - job_name: 'kubernetes-pods' |
| kubernetes_sd_configs: |
| - role: pod |
| relabel_configs: |
| - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape] |
| action: keep |
| regex: true |
| - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path] |
| action: replace |
| target_label: __metrics_path__ |
| regex: (.+) |
| - source_labels: [__address__, __meta_kubernetes_pod_annotation_prometheus_io_port] |
| action: replace |
| regex: ([^:]+)(?::\d+)?;(\d+) |
| replacement: $1:$2 |
| target_label: __address__ |
| - action: labelmap |
| regex: __meta_kubernetes_pod_label_(.+) |
| - source_labels: [__meta_kubernetes_namespace] |
| action: replace |
| target_label: kubernetes_namespace |
| - source_labels: [__meta_kubernetes_pod_name] |
| action: replace |
| target_label: kubernetes_pod_name |
| kubectl create -f prometheus.config.yaml |
| [root@DoM01 promethues]# kubectl get configmaps -n monitor |
| NAME DATA AGE |
| prometheus-config 1 3h7m |
3.3 deployment和service
- 创建 prometheus.deployment.yamll文件
以部署deployment和serivce。
| apiVersion: apps/v1beta2 |
| kind: Deployment |
| metadata: |
| labels: |
| name: prometheus-deployment |
| name: prometheus |
| namespace: monitor |
| spec: |
| replicas: 1 |
| selector: |
| matchLabels: |
| app: prometheus |
| template: |
| metadata: |
| labels: |
| app: prometheus |
| spec: |
| containers: |
| - image: prom/prometheus:v2.2.1 |
| name: prometheus |
| command: |
| - "/bin/prometheus" |
| args: |
| - "--config.file=/etc/prometheus/prometheus.yml" |
| - "--storage.tsdb.path=/prometheus" |
| - "--storage.tsdb.retention=24h" |
| ports: |
| - containerPort: 9090 |
| protocol: TCP |
| volumeMounts: |
| - mountPath: "/prometheus" |
| name: data |
| - mountPath: "/etc/prometheus" |
| name: config-volume |
| - name: host-time |
| mountPath: /etc/localtime |
| readOnly: true |
| resources: |
| requests: |
| cpu: 100m |
| memory: 100Mi |
| limits: |
| cpu: 500m |
| memory: 2500Mi |
| serviceAccountName: prometheus |
| imagePullSecrets: |
| - name: login-harbor |
| volumes: |
| - name: host-time |
| hostPath: |
| path: /etc/localtime |
| - name: data |
| emptyDir: {} |
| - name: config-volume |
| configMap: |
| name: prometheus-config |
| |
| --- |
| kind: Service |
| apiVersion: v1 |
| metadata: |
| labels: |
| app: prometheus |
| name: prometheus |
| namespace: monitor |
| spec: |
| type: NodePort |
| ports: |
| - port: 9090 |
| targetPort: 9090 |
| nodePort: 30003 |
| selector: |
| app: prometheus |
| kubectl create -f prometheus.deployment.yaml |
| [root@DoM01 promethues] |
| |
| pod/prometheus-57fb8576bf-knzvs 1/1 Running 0 22m |
| |
| |
| service/prometheus NodePort 10.1.189.71 <none> 9090:30003/TCP 3h5m |
| |
| |
| deployment.apps/prometheus 1/1 1 1 3h5m |
| |
| replicaset.apps/prometheus-57fb8576bf 1 1 1 3h4m |
| |
3.4. 查看结果

接下来我们接入n9e,见文档 《03-k8s集群监控(下)》

【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】凌霞软件回馈社区,博客园 & 1Panel & Halo 联合会员上线
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】博客园社区专享云产品让利特惠,阿里云新客6.5折上折
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· DeepSeek “源神”启动!「GitHub 热点速览」
· 微软正式发布.NET 10 Preview 1:开启下一代开发框架新篇章
· 我与微信审核的“相爱相杀”看个人小程序副业
· C# 集成 DeepSeek 模型实现 AI 私有化(本地部署与 API 调用教程)
· DeepSeek R1 简明指南:架构、训练、本地部署及硬件要求