Prometheus K8S部署
Prometheus K8S部署
部署方式:https://github.com/kubernetes/kubernetes/tree/master/cluster/addons/prometheus
源码目录:kubernetes/cluster/addons/prometheus
服务发现:https://prometheus.io/docs/prometheus/latest/configuration/configuration/#kubernetes_sd_config
部署条件
1、K8S中部署内部DNS服务
2、已有可使用的动态PV
配置文件
下列是已经修改好的配置文件,可根据条件自行微调
- # 访问api授权
- prometheus-rbac.yaml
-
apiVersion: v1 # 创建 ServiceAccount 授予权限 kind: ServiceAccount metadata: name: prometheus namespace: kube-system labels: kubernetes.io/cluster-service: "true" addonmanager.kubernetes.io/mode: Reconcile --- apiVersion: rbac.authorization.k8s.io/v1beta1 kind: ClusterRole metadata: name: prometheus labels: kubernetes.io/cluster-service: "true" addonmanager.kubernetes.io/mode: Reconcile rules: - apiGroups: - "" # 授予的权限 resources: - nodes - nodes/metrics - services - endpoints - pods verbs: - get - list - watch - apiGroups: - "" resources: - configmaps verbs: - get - nonResourceURLs: - "/metrics" verbs: - get --- # 角色绑定 apiVersion: rbac.authorization.k8s.io/v1beta1 kind: ClusterRoleBinding metadata: name: prometheus labels: kubernetes.io/cluster-service: "true" addonmanager.kubernetes.io/mode: Reconcile roleRef: apiGroup: rbac.authorization.k8s.io kind: ClusterRole name: prometheus subjects: - kind: ServiceAccount name: prometheus namespace: kube-system
- # 管理prometheus配置文件
- prometheus-configmap.yaml
-
# Prometheus configuration format https://prometheus.io/docs/prometheus/latest/configuration/configuration/ apiVersion: v1 kind: ConfigMap metadata: name: prometheus-config namespace: kube-system labels: kubernetes.io/cluster-service: "true" addonmanager.kubernetes.io/mode: EnsureExists data: # 存放prometheus配置文件 prometheus.yml: | # 配置采集目标 scrape_configs: - job_name: prometheus static_configs: - targets: # 采集自身 - localhost:9090 # 采集:Apiserver 生存指标 # 创建的job name 名称为 kubernetes-apiservers - job_name: kubernetes-apiservers # 基于k8s的服务发现 kubernetes_sd_configs: - role: endpoints # 使用通信标记标签 relabel_configs: # 保留正则匹配标签 - action: keep # 已经包含 regex: default;kubernetes;https source_labels: - __meta_kubernetes_namespace - __meta_kubernetes_service_name - __meta_kubernetes_endpoint_port_name # 使用方法为https、默认http scheme: https tls_config: # promethus访问Apiserver使用认证 ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt # 跳过https认证 insecure_skip_verify: true # promethus访问Apiserver使用认证 bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token # 采集:Kubelet 生存指标 - job_name: kubernetes-nodes-kubelet kubernetes_sd_configs: # 发现集群中所有的Node - role: node relabel_configs: # 通过regex获取关键信息 - action: labelmap regex: __meta_kubernetes_node_label_(.+) scheme: https tls_config: ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt insecure_skip_verify: true bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token # 采集:nodes-cadvisor 信息 - job_name: kubernetes-nodes-cadvisor kubernetes_sd_configs: - role: node relabel_configs: - action: labelmap regex: __meta_kubernetes_node_label_(.+) # 重命名标签 - target_label: __metrics_path__ replacement: /metrics/cadvisor scheme: https tls_config: ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt insecure_skip_verify: true bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token # 采集:service-endpoints 信息 - job_name: kubernetes-service-endpoints # 选定指标 kubernetes_sd_configs: - role: endpoints relabel_configs: - action: keep regex: true # 指定源标签 source_labels: - __meta_kubernetes_service_annotation_prometheus_io_scrape - action: replace regex: (https?) source_labels: - __meta_kubernetes_service_annotation_prometheus_io_scheme # 重命名标签采集 target_label: __scheme__ - action: replace regex: (.+) source_labels: - __meta_kubernetes_service_annotation_prometheus_io_path target_label: __metrics_path__ - action: replace regex: ([^:]+)(?::\d+)?;(\d+) replacement: $1:$2 source_labels: - __address__ - __meta_kubernetes_service_annotation_prometheus_io_port target_label: __address__ - action: labelmap regex: __meta_kubernetes_service_label_(.+) - action: replace source_labels: - __meta_kubernetes_namespace target_label: kubernetes_namespace - action: replace source_labels: - __meta_kubernetes_service_name target_label: kubernetes_name # 采集:kubernetes-services 服务指标 - job_name: kubernetes-services kubernetes_sd_configs: - role: service # 黑盒探测,探测IP与端口是否可用 metrics_path: /probe params: module: - http_2xx relabel_configs: - action: keep regex: true source_labels: - __meta_kubernetes_service_annotation_prometheus_io_probe - source_labels: - __address__ target_label: __param_target # 使用 blackbox进行黑盒探测 - replacement: blackbox target_label: __address__ - source_labels: - __param_target target_label: instance - action: labelmap regex: __meta_kubernetes_service_label_(.+) - source_labels: - __meta_kubernetes_namespace target_label: kubernetes_namespace - source_labels: - __meta_kubernetes_service_name target_label: kubernetes_name # 采集: kubernetes-pods 信息 - job_name: kubernetes-pods kubernetes_sd_configs: - role: pod relabel_configs: - action: keep regex: true source_labels: # 只保留采集的信息 - __meta_kubernetes_pod_annotation_prometheus_io_scrape - action: replace regex: (.+) source_labels: - __meta_kubernetes_pod_annotation_prometheus_io_path target_label: __metrics_path__ - action: replace regex: ([^:]+)(?::\d+)?;(\d+) replacement: $1:$2 source_labels: # 采集地址 - __address__ # 采集端口 - __meta_kubernetes_pod_annotation_prometheus_io_port target_label: __address__ - action: labelmap regex: __meta_kubernetes_pod_label_(.+) - action: replace source_labels: - __meta_kubernetes_namespace target_label: kubernetes_namespace - action: replace source_labels: - __meta_kubernetes_pod_name target_label: kubernetes_pod_name alerting: # 告警配置文件 alertmanagers: - kubernetes_sd_configs: # 采用动态获取 - role: pod tls_config: ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token relabel_configs: - source_labels: [__meta_kubernetes_namespace] regex: kube-system action: keep - source_labels: [__meta_kubernetes_pod_label_k8s_app] regex: alertmanager action: keep - source_labels: [__meta_kubernetes_pod_container_port_number] regex: action: drop
- # 将prometheus暴露访问
- prometheus-service.yaml
-
apiVersion: apps/v1 kind: StatefulSet metadata: name: prometheus # 部署命名空间 namespace: kube-system labels: k8s-app: prometheus kubernetes.io/cluster-service: "true" addonmanager.kubernetes.io/mode: Reconcile version: v2.2.1 spec: serviceName: "prometheus" replicas: 1 podManagementPolicy: "Parallel" updateStrategy: type: "RollingUpdate" selector: matchLabels: k8s-app: prometheus template: metadata: labels: k8s-app: prometheus annotations: scheduler.alpha.kubernetes.io/critical-pod: '' spec: priorityClassName: system-cluster-critical serviceAccountName: prometheus # 初始化容器 initContainers: - name: "init-chown-data" image: "busybox:latest" imagePullPolicy: "IfNotPresent" command: ["chown", "-R", "65534:65534", "/data"] volumeMounts: - name: prometheus-data mountPath: /data subPath: "" containers: - name: prometheus-server-configmap-reload image: "jimmidyson/configmap-reload:v0.1" imagePullPolicy: "IfNotPresent" args: - --volume-dir=/etc/config - --webhook-url=http://localhost:9090/-/reload volumeMounts: - name: config-volume mountPath: /etc/config readOnly: true resources: limits: cpu: 10m memory: 10Mi requests: cpu: 10m memory: 10Mi - name: prometheus-server # 主要使用镜像 image: "prom/prometheus:v2.2.1" imagePullPolicy: "IfNotPresent" args: - --config.file=/etc/config/prometheus.yml - --storage.tsdb.path=/data - --web.console.libraries=/etc/prometheus/console_libraries - --web.console.templates=/etc/prometheus/consoles - --web.enable-lifecycle ports: - containerPort: 9090 readinessProbe: # 健康检查 httpGet: path: /-/ready port: 9090 initialDelaySeconds: 30 timeoutSeconds: 30 livenessProbe: httpGet: path: /-/healthy port: 9090 initialDelaySeconds: 30 timeoutSeconds: 30 # based on 10 running nodes with 30 pods each resources: limits: cpu: 200m memory: 1000Mi requests: cpu: 200m memory: 1000Mi # 数据卷 volumeMounts: - name: config-volume mountPath: /etc/config - name: prometheus-data mountPath: /data subPath: "" terminationGracePeriodSeconds: 300 volumes: - name: config-volume configMap: name: prometheus-config volumeClaimTemplates: - metadata: name: prometheus-data spec: # 使用动态PV、修改为已创建的PV动态存储 storageClassName: managed-nfs-storage accessModes: - ReadWriteOnce resources: requests: storage: "16Gi"
- # 通过有状态的形式将prometheus部署
- prometheus-statefulset.yaml
-
kind: Service apiVersion: v1 metadata: name: prometheus # 指定命名空间 namespace: kube-system labels: kubernetes.io/name: "Prometheus" kubernetes.io/cluster-service: "true" addonmanager.kubernetes.io/mode: Reconcile spec: # 添加外部访问 type: NodePort # 指定内部访问协议 ports: - name: http port: 9090 protocol: TCP targetPort: 9090 selector: k8s-app: prometheus
部署
1、下载github包:https://github.com/kubernetes/kubernetes/
2、复制文件到指定目录
mkdir ~/prometheus
cp ~/kubernetes/cluster/addons/prometheus/* ~/prometheus/
3、进入到目录
cd ~/prometheus/
4、k8s通过配置文件创建运行容器
kubectl apply -f prometheus-rbac.yaml kubectl apply -f prometheus-configmap.yaml kubectl apply -f prometheus-statefulset.yaml kubectl apply -f prometheus-service.yaml
5、查看创建资源
kubectl get pod,svc -n kube-system NAME READY STATUS RESTARTS AGE pod/coredns-64479cf49b-lsqqn 1/1 Running 0 75m pod/prometheus-0 2/2 Running 0 2m12s NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE service/kube-dns ClusterIP 10.0.0.2 <none> 53/UDP,53/TCP,9153/TCP 75m service/prometheus NodePort 10.0.0.170 <none> 9090:42575/TCP 8s
6、测试通过端口开启端口访问监控端