20220306作业 prometheus1
1.二进制安装prometheus
mkdir /apps/
ln -sf prometheus-2.33.4.linux-amd64 prometheus
检查配置文件
cd prometheus;./prometheus check config prometheus.yml
创建service文件
vim /etc/systemd/system/prometheus.service
[Unit]
Description=Prometheus Server
Documentation=https://prometheus.io/docs/introduction/overview/
After=network.target
[Service]
Restart=on-failure
WorkingDirectory=/apps/prometheus/
ExecStart=/apps/prometheus/prometheus --config.file=/apps/prometheus/prometheus.yml
[Instal1]
WantedBy=multi-user.target
启动prometheus并设置开机自启动
systemctl start prometheus ;systemctl enable prometheus
2.二进制安装node_export
mkdir /apps
ln -sf node_exporter-1.3.1.linux-amd64 node_exporter
创建service文件
vim /etc/systemd/system/node-exporter.service
[Unit]
Description=Prometheus Node Exporter
After=network.target
[Service]
ExecStart=/apps/node_exporter/node_exporter
[Instal1]
WantedBy=multi-user.target
启动node-export 并设置开机自启动
systemctl status node-exporter ; systemctl enable node-exporter
3.添加node节点收集数据
vim /apps/prometheus/prometheus.yml
- job_name: "prometheus-node"
static_configs:
- targets: ["192.168.1.242:9100"]
4.安装grafana
wget https://dl.grafana.com/enterprise/release/grafana-enterprise-8.4.3-1.x86_64.rpm
sudo yum install grafana-enterprise-8.4.3-1.x86_64.rpm
启动grafana并设置开机自启动
systemctl restart grafana-server;systemctl enable grafana-server
5.daemon部署node-export
先创建命名空间
kubectl create namespace monitoring
根据yaml创建node-export
apiVersion: apps/v1
kind: DaemonSet
metadata:
name: node-exporter
namespace: monitoring
labels:
k8s-app: node-exporter
spec:
selector:
matchLabels:
k8s-app: node-exporter
template:
metadata:
labels:
k8s-app: node-exporter
spec:
tolerations:
- effect: NoSchedule
key: node-role.kubernetes.io/master
containers:
- image: prom/node-exporter:v1.3.1
imagePullPolicy: IfNotPresent
name: prometheus-node-exporter
ports:
- containerPort: 9100
hostPort: 9100
protocol: TCP
name: metrics
volumeMounts:
- mountPath: /host/proc
name: proc
- mountPath: /host/sys
name: sys
- mountPath: /host
name: rootfs
args:
- --path.procfs=/host/proc
- --path.sysfs=/host/sys
- --path.rootfs=/host
volumes:
- name: proc
hostPath:
path: /proc
- name: sys
hostPath:
path: /sys
- name: rootfs
hostPath:
path: /
hostNetwork: true
hostPID: true
---
apiVersion: v1
kind: Service
metadata:
annotations:
prometheus.io/scrape: "true"
labels:
k8s-app: node-exporter
name: node-exporter
namespace: monitoring
spec:
type: NodePort
ports:
- name: http
port: 9100
nodePort: 39100
protocol: TCP
selector:
k8s-app: node-exporter
6.deployment 部署prometheus server
创建configmap:
---
kind: ConfigMap
apiVersion: v1
metadata:
labels:
app: prometheus
name: prometheus-config
namespace: monitoring
data:
prometheus.yml: |
global:
scrape_interval: 15s
scrape_timeout: 10s
evaluation_interval: 1m
scrape_configs:
- job_name: 'kubernetes-node'
kubernetes_sd_configs:
- role: node
relabel_configs:
- source_labels: [__address__]
regex: '(.*):10250'
replacement: '${1}:9100'
target_label: __address__
action: replace
- action: labelmap
regex: __meta_kubernetes_node_label_(.+)
- job_name: 'kubernetes-node-cadvisor'
kubernetes_sd_configs:
- role: node
scheme: https
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
relabel_configs:
- action: labelmap
regex: __meta_kubernetes_node_label_(.+)
- target_label: __address__
replacement: kubernetes.default.svc:443
- source_labels: [__meta_kubernetes_node_name]
regex: (.+)
target_label: __metrics_path__
replacement: /api/v1/nodes/${1}/proxy/metrics/cadvisor
- job_name: 'kubernetes-service-endpoints'
kubernetes_sd_configs:
- role: endpoints
relabel_configs:
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape]
action: keep
regex: true
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scheme]
action: replace
target_label: __scheme__
regex: (https?)
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_path]
action: replace
target_label: __metrics_path__
regex: (.+)
- source_labels: [__address__, __meta_kubernetes_service_annotation_prometheus_io_port]
action: replace
target_label: __address__
regex: ([^:]+)(?::\d+)?;(\d+)
replacement: $1:$2
- action: labelmap
regex: __meta_kubernetes_service_label_(.+)
- source_labels: [__meta_kubernetes_namespace]
action: replace
target_label: kubernetes_namespace
- source_labels: [__meta_kubernetes_service_name]
action: replace
target_label: kubernetes_name
- job_name: 'kubernetes-apiserver'
kubernetes_sd_configs:
- role: endpoints
scheme: https
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
relabel_configs:
- source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name]
action: keep
regex: default;kubernetes;https
创建serviceaccount:
kubectl create serviceaccount monitor -n monitoring
账号授权:
kubectl create clusterrolebinding monitor-clusterrolebinding -n monitoring --clusterrole=cluster-admin --serviceaccount=monitoring:monitor
部署prometheus server
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: prometheus-server
namespace: monitoring
labels:
app: prometheus
spec:
replicas: 1
selector:
matchLabels:
app: prometheus
component: server
#matchExpressions:
#- {key: app, operator: In, values: [prometheus]}
#- {key: component, operator: In, values: [server]}
template:
metadata:
labels:
app: prometheus
component: server
annotations:
prometheus.io/scrape: 'false'
spec:
#nodeName: 192.168.1.73
serviceAccountName: monitor
containers:
- name: prometheus
image: prom/prometheus:v2.31.2
imagePullPolicy: IfNotPresent
command:
- prometheus
- --config.file=/etc/prometheus/prometheus.yml
- --storage.tsdb.path=/prometheus
- --storage.tsdb.retention=720h
ports:
- containerPort: 9090
protocol: TCP
volumeMounts:
- mountPath: /etc/prometheus/prometheus.yml
name: prometheus-config
subPath: prometheus.yml
- mountPath: /prometheus/
name: prometheus-storage-volume
volumes:
- name: prometheus-config
configMap:
name: prometheus-config
items:
- key: prometheus.yml
path: prometheus.yml
mode: 0644
- name: prometheus-storage-volume
nfs:
server: 192.168.1.76
path: /data/prometheus
7.daemon部署cadvisor
docker load -i cadvisor-v0.39.2.tar.gz
docker tag gcr.io/cadvisor/cadvisor:v0.39.2 192.168.1.138/baseimages/cadvisor:v0.39.2
docker push 192.168.1.138/baseimages/cadvisor:v0.39.2
kubectl apply -f case1-daemonset-deploy-cadvisor.yaml
apiVersion: apps/v1
kind: DaemonSet
metadata:
name: cadvisor
namespace: monitoring
spec:
selector:
matchLabels:
app: cAdvisor
template:
metadata:
labels:
app: cAdvisor
spec:
tolerations: #污点容忍,忽略master的NoSchedule
- effect: NoSchedule
key: node-role.kubernetes.io/master
hostNetwork: true
restartPolicy: Always # 重启策略
containers:
- name: cadvisor
image: 192.168.1.138/baseimages/cadvisor:v0.39.2
imagePullPolicy: IfNotPresent # 镜像策略
ports:
- containerPort: 8080
volumeMounts:
- name: root
mountPath: /rootfs
- name: run
mountPath: /var/run
- name: sys
mountPath: /sys
- name: docker
mountPath: /var/lib/docker
volumes:
- name: root
hostPath:
path: /
- name: run
hostPath:
path: /var/run
- name: sys
hostPath:
path: /sys
- name: docker
hostPath:
path: /var/lib/docker
手动增加cAdvisor监控
- job_name: "prometheus-pod"
static_configs:
- targets:["192.168.1.34:8080","192.168.1.36:8080","192.168.1.37:8080","192.168.1.72:8080","192.168.1.73:8080","192.168.1.74:8080"]
8.ProQL语句
8.1 数据分类
瞬时向量,瞬时数据(instant vector):是一组时间序列,每个时间序列包含单个数据样本
范围向量,范围数据(range vector):是指在任何一个时间范围内,抓取的所有度量指标数据,比如最近一天的网卡流量
标量、纯量数据(scalar):是一个浮点数类型的数据值,使用node_load1获取到时一个瞬时向量,但是可使用内置函数scalar()将瞬时向量转换为标量,例如:scalar(sum(node_load1))
字符串(string):字符串类型的数据,目前使用较少
8.2 数据类型
counter:
计数器,counter类型代表一个累积的指标数据,在没有被重置的前提下只增不减,比如磁盘io总数,nginx的请求总数,网卡流经的报文总数等
Gauge:
仪表盘,Gauge类型代表一个可以任意变化的指标数据,值可以随时增高或减少,如带宽速录,cpu负载、内存利用率,nginx活动连接数等
Histogram:
累积直方图,Histogram会在一段时间范围内对数据进行采样(通常是请求持续时间或响应大小等),假如每分钟产生一个当前的活跃连接数,那么一天就会产生1440个数据,查看数据的每间隔的绘图跨度为2小时,2点的柱状图包括0-2小时内的数据,4点包括0-4小时的数据
Summary:摘要,也是一组数据,统计的不是区间的个数而是统计分位数,从0-1,表示的是0%-100%,如下统计的是0、0.25、0.5、0.75、1的数据量分别是多少
8.3 指标数据
node_memory_MemFree_bytes #查询node节点的总剩余内存
node_memory_MemFree_bytes{instance="192.168.1.34:9100"} #指定节点查询
8.4 匹配器
= : 选择与提供的字符串完全相同的标签,精确匹配
node_memory_MemFree_bytes{instance="192.168.1.34:9100"}
!= :选择与提供的字符串不相同的标签,取反
node_memory_MemFree_bytes{instance!="192.168.1.34:9100"}
=~ : 选择正则表达式与提供的字符串相匹配的标签
node_memory_MemFree_bytes{instance=~"192.168.1.*:9100"}
!~ : 选择正则表达式与提供的字符串不匹配的标签
node_memory_MemFree_bytes{instance!~"192.168.1.*:9100"}
8.5 时间范围
s - 秒
m - 分钟
h - 小时
d - 天
w - 周
y - 年
8.6 运算符
+
-
*
/
%
^等
8.7 聚合运算
max() 最大值;min() 最小值 ; avg() 平均值
sum() 求和 count() 统计返回值总数 count_values() 对value个数进行统计 #count_values("node_os_version",node_os_version)
abs() 返回指标数据的值 absent() 如果监控指标有数据返回空,没有返回1,用于对监控项设置告警
stdddev() 标准差 stdvar() 求方差
topk() 样本值排名最大的N个数据 #topk(5,promhttp_metric_handler_requests_total) bottomk() 样本值排名最小的N个数据
rate() 专门用来搭配counter数据类型使用,功能是取counter数据类型在这个时间段中平均每秒增量平均数 irate() 专门用来搭配counter数据类型使用,功能是取counter数据类型在这个时间段中平均每秒的峰值
by 在计算结果中,只保留by指定的标签值;without() 与by相反
9.prometheus 的服务发现机制
常用的服务发现:https://prometheus.io/docs/prometheus/latest/configuration/configuration/
9.1 kubernetes_sd_config
k8s内部prometheus配置服务发现见 6
k8s外部prometheus配置服务发现
创建用户,获取到证书
自动发现配置如下:
- job_name: 'kubernetes-apiservers-monitor'
kubernetes_sd_configs:
- role: endpoints
api_server: https://192.168.1.198:6443
tls_config:
insecure_skip_verify: true
bearer_token_file: /apps/prometheus/k8s.token
scheme: https
tls_config:
insecure_skip_verify: true
bearer_token_file: /apps/prometheus/k8s.token
relabel_configs:
- source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name]
action: keep
regex: default;kubernetes;https
- target_label: __address__
replacement: 192.168.1.198:6443
- job_name: 'kubernetes-nodes-monitor'
scheme: http
tls_config:
insecure_skip_verify: true
bearer_token_file: /apps/prometheus/k8s.token
kubernetes_sd_configs:
- role: node
api_server: https://192.168.1.198:6443
tls_config:
insecure_skip_verify: true
bearer_token_file: /apps/prometheus/k8s.token
relabel_configs:
- source_labels: [__address__]
regex: '(.*):10250'
replacement: '${1}:9100'
target_label: __address__
action: replace
- source_labels: [__meta_kubernetes_node_label_failure_domain_beta_kubernetes_io_region]
regex: '(.*)'
replacement: '${1}'
action: replace
target_label: LOC
- source_labels: [__meta_kubernetes_node_label_failure_domain_beta_kubernetes_io_region]
regex: '(.*)'
replacement: 'NODE'
action: replace
target_label: Type
- source_labels: [__meta_kubernetes_node_label_failure_domain_beta_kubernetes_io_region]
regex: '(.*)'
replacement: 'K3S-test'
action: replace
target_label: Env
- action: labelmap
regex: __meta_kubernetes_node_label_(.+)
- job_name: 'kubernetes-pods-monitor'
kubernetes_sd_configs:
- role: pod
api_server: https://192.168.1.198:6443
tls_config:
insecure_skip_verify: true
bearer_token_file: /apps/prometheus/k8s.token
relabel_configs:
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape]
action: keep
regex: true
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path]
action: replace
target_label: __metrics_path__
regex: (.+)
- source_labels: [__address__, __meta_kubernetes_pod_annotation_prometheus_io_port]
action: replace
regex: ([^:]+)(?::\d+)?;(\d+)
replacement: $1:$2
target_label: __address__
- action: labelmap
regex: __meta_kubernetes_pod_label_(.+)
- source_labels: [__meta_kubernetes_namespace]
action: replace
target_label: kubernetes_namespace
- source_labels: [__meta_kubernetes_pod_name]
action: replace
target_label: kubernetes_pod_name
- source_labels: [__meta_kubernetes_pod_label_pod_template_hash]
regex: '(.*)'
replacement: 'K8S-test'
action: replace
target_label: Env
9.2 consul_sd_config
nohup ./consul agent -server -bootstrap -bind=192.168.1.191 -client=192.168.1.191 -data-dir=/data/consul -ui -node=192.168.1.191 &
nohup ./consul agent -bind=192.168.1.192 -client=192.168.1.192 -data-dir=/data/consul -node=192.168.1.192 -join=192.168.1.191 &
nohup ./consul agent -bind=192.168.1.193 -client=192.168.1.193 -data-dir=/data/consul -node=192.168.1.193 -join=192.168.1.191 &
二进制部署prometheus中consul配置:
点击查看代码
- job_name: consul
honor_labels: true
metrics_path: /metrics
scheme: http
consul_sd_configs:
- server: 192.168.1.191:8500
services: [] #发现的目标服务名称,空为所有服务,可以写servicea,servcieb,servicec
- server: 192.168.1.192:8500
services: []
- server: 192.168.1.193:8500
services: []
relabel_configs:
- source_labels: ['__meta_consul_tags']
target_label: 'product'
- source_labels: ['__meta_consul_dc']
target_label: 'idc'
- source_labels: ['__meta_consul_service']
regex: "consul"
action: drop
测试写入数据:
curl -X PUT -d '{"id": "node-exporter72","name": "node-exporter72","address": "192.168.1.72","port":9100,"tags": ["node-exporter"],"checks": [{"http": "http://192.168.1.72:9100/","interval": "5s"}]}' http://192.168.1.191:8500/v1/agent/service/register
curl -X PUT -d '{"id": "node-exporter73","name": "node-exporter73","address": "192.168.1.73","port":9100,"tags": ["node-exporter"],"checks": [{"http": "http://192.168.1.73:9100/","interval": "5s"}]}' http://192.168.1.191:8500/v1/agent/service/register
curl -X PUT -d '{"id": "node-exporter74","name": "node-exporter74","address": "192.168.1.74","port":9100,"tags": ["node-exporter"],"checks": [{"http": "http://192.168.1.74:9100/","interval": "5s"}]}' http://192.168.1.191:8500/v1/agent/service/register
curl -X PUT -d '{"id": "cadvisor72","name": "cadvisor72","address": "192.168.1.72","port":8080,"tags": ["cadvisor"],"checks": [{"http": "http://192.168.1.72:8080/","interval": "5s"}]}' http://192.168.1.191:8500/v1/agent/service/register
删除数据:
curl --request PUT http://192.168.1.191:8500/v1/agent/service/deregister/node-exporter72
9.3 dns_sd_config
- job_name: 'webapp'
dns_sd_configs:
- names: [ 'www.92.com']
type: A
port: 9100
9.4 file_sd_config
sd_my_server.json
[
{
"targets": ["192.168.1.72:9100","192.168.1.73:9100","192.168.1.74:9100"]
}
]
prometheus.yml
- job_name: 'file_ds' #此处定义了自动发现的采集任务名称,可以依据自己的业务定义多个自动发现任务
file_sd_configs:
- files:
- /apps/prometheus/file_sd/sd_my_server.json #采集文件路径
refresh_interval: 10s #自动发现间隔时间,默认5m
9.5 static_config
- job_name: "prometheus-pod"
static_configs:
- targets: ["192.168.1.198:8080","192.168.1.36:8080","192.168.1.37:8080","192.168.1.72:8080","192.168.1.73:8080","192.168.1.74:8080"]
本文作者:没有猫的猫奴
本文链接:https://www.cnblogs.com/cat1/p/15982478.html
版权声明:本作品采用知识共享署名-非商业性使用-禁止演绎 2.5 中国大陆许可协议进行许可。
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】凌霞软件回馈社区,博客园 & 1Panel & Halo 联合会员上线
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】博客园社区专享云产品让利特惠,阿里云新客6.5折上折
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· DeepSeek “源神”启动!「GitHub 热点速览」
· 微软正式发布.NET 10 Preview 1:开启下一代开发框架新篇章
· 我与微信审核的“相爱相杀”看个人小程序副业
· C# 集成 DeepSeek 模型实现 AI 私有化(本地部署与 API 调用教程)
· DeepSeek R1 简明指南:架构、训练、本地部署及硬件要求