Alertmanager是Prometheus的一个报警通知组件,需要结合Prometheus使用。Prometheus将监测到的异常事件发送给Alertmanager,Alertmanager发送异常事件的通知(邮件、webhook等)。
1.prometheus.yml添加alertmanagers信息
[root@localhost prometheus]# cat prometheus.yml
global:
scrape_interval: 15s
evaluation_interval: 15s
scrape_configs:
- job_name: 'prometheus'
static_configs:
- targets: ['localhost:9090']
- targets: ['192.168.70.189:9100']
labels:
group: 'client-node-exporter'
- job_name: 'mysql'
static_configs:
- targets: ['192.168.70.189:3306']
- job_name: 'httpd'
static_configs:
- targets: ['192.168.70.189:9117']
- job_name: 'pushgateway'
static_configs:
- targets: ['192.168.70.189:9091']
alerting:
alertmanagers:
- static_configs:
- targets: ["192.168.70.189:9093"]
rule_files:
- "rules.yml"
2.报警规则rule.yml
[root@localhost server]# cat /apps/prometheus/server/rules.yml
groups:
- name: cqh
rules:
- alert: cqh测试
expr: dead_lift > 150
for: 1m
labels:
status: warning
annotations:
summary: "{{$labels.instance}}:硬拉超标!lightweight baby!!!"
description: "{{$labels.instance}}:硬拉超标!lightweight baby!!!"
- name: 内存报警规则
rules:
- alert: 内存使用率告警
expr: (1 - (node_memory_MemAvailable_bytes / (node_memory_MemTotal_bytes))) * 100 > 30
for: 1m
labels:
severity: warning
annotations:
summary: "服务器可用内存不足。"
description: "内存使用率已超过30%(当前值:{{ $value }}%)"
需要重启启动prometheus或重新加载
3.alertmanager.yml
[root@localhost alertmanager]# cat alertmanager.yml
global:
resolve_timeout: 5m
route:
group_by: ['alertname','instance']
group_wait: 15s
group_interval: 15s
repeat_interval: 5m
receiver: webhook
routes:
- receiver: webhook
group_wait: 15s
receivers:
- name: webhook
webhook_configs:
- send_resolved: true
url: 'http://192.168.70.189:8060/dingtalk/webhook1/send'
启动alertmanager:
docker run -d -p 9093:9093 \ --name alertmanager \ -v /apps/prometheus/alertmanager/alertmanager.yml:/etc/alertmanager/alertmanager.yml \
-v /etc/localtime:/etc/localtime \ prom/alertmanager
4.dingding插件配置(webhook方式)
[root@localhost dingtalk]# cat /apps/prometheus/dingtalk/config.yml
targets:
mytest:
url: https://oapi.dingtalk.com/robot/send?access_token=973a1b54687f926de9c8afbd10118f58ece0f3d694959682dcfd5d431cb5b1c1
secret: SECf65f91b6fae1a047482de5bd7fa3cb2a513166b2933c4dfc11cf4755c03150cf
webhook1:
url: https://oapi.dingtalk.com/robot/send?access_token=8979024890bb57491b434b7e5a3970276335516a9b67df469ffa0c5603a92afd
secret: SEC63bae31c9f116c4a08bed9f0e90dc9317743e207025c978c658b7d528b1a6e19
webhook2:
url: https://oapi.dingtalk.com/robot/send?access_token=xxxxxxxxxxxx
webhook_legacy:
url: https://oapi.dingtalk.com/robot/send?access_token=xxxxxxxxxxxx
message:
title: '{{ template "legacy.title" . }}'
text: '{{ template "legacy.content" . }}'
webhook_mention_all:
url: https://oapi.dingtalk.com/robot/send?access_token=xxxxxxxxxxxx
mention:
all: true
webhook_mention_users:
url: https://oapi.dingtalk.com/robot/send?access_token=xxxxxxxxxxxx
mention:
mobiles: ['156xxxx8827', '189xxxx8325']
mytest1:
url: https://oapi.dingtalk.com/robot/send?access_token=2f02ee1b8b82f340271d8c3c384964b714fbbd06e31f787c6b46d8c47e8dfe46
secret: SECbf91ed2d679100e1ac8d5bab9025a077f9e0a5220a47f4cf037ec20611c41e87
启动钉钉报警通知:
docker run -d \
--name dingtalk \
--restart always \
-p 8060:8060 \
-v /apps/prometheus/dingtalk/config.yml:/etc/prometheus-webhook-dingtalk/config.yml \
timonwong/prometheus-webhook-dingtalk
webhook1信息获取请查看钉钉获取文档:https://www.cnblogs.com/aroin/p/14756719.html
url: https://oapi.dingtalk.com/robot/send?access_token=8979024890bb57491b434b7e5a3970276335516a9b67df469ffa0c5603a92afd
secret: SEC63bae31c9f116c4a08bed9f0e90dc9317743e207025c978c658b7d528b1a6e19