Alertmanager是Prometheus的一个报警通知组件,需要结合Prometheus使用。Prometheus将监测到的异常事件发送给Alertmanager,Alertmanager发送异常事件的通知(邮件、webhook等)。

1.prometheus.yml添加alertmanagers信息

[root@localhost prometheus]# cat prometheus.yml
global:
  scrape_interval: 15s
  evaluation_interval: 15s
scrape_configs:
  - job_name: 'prometheus'
    static_configs:
    - targets: ['localhost:9090']
    - targets: ['192.168.70.189:9100']
      labels:
        group: 'client-node-exporter'
  - job_name: 'mysql'
    static_configs:
    - targets: ['192.168.70.189:3306']
  - job_name: 'httpd'
    static_configs:
    - targets: ['192.168.70.189:9117']
  - job_name: 'pushgateway'
    static_configs:
    - targets: ['192.168.70.189:9091']
alerting:
  alertmanagers:
  - static_configs:
    - targets: ["192.168.70.189:9093"]
rule_files:
  - "rules.yml"
View Code

 

2.报警规则rule.yml

[root@localhost server]# cat /apps/prometheus/server/rules.yml 
groups:
  - name: cqh
    rules:
      - alert: cqh测试
        expr: dead_lift > 150
        for: 1m
        labels:
          status: warning
        annotations:
          summary: "{{$labels.instance}}:硬拉超标!lightweight baby!!!"
          description: "{{$labels.instance}}:硬拉超标!lightweight baby!!!"
  - name: 内存报警规则
    rules:
    - alert: 内存使用率告警
      expr: (1 - (node_memory_MemAvailable_bytes / (node_memory_MemTotal_bytes))) * 100 > 30
      for: 1m
      labels:
        severity: warning
      annotations:
        summary: "服务器可用内存不足。"
        description: "内存使用率已超过30%(当前值:{{ $value }}%)"
View Code

 需要重启启动prometheus或重新加载

3.alertmanager.yml

[root@localhost alertmanager]# cat alertmanager.yml
global:
  resolve_timeout: 5m
route:
  group_by: ['alertname','instance']
  group_wait: 15s
  group_interval: 15s
  repeat_interval: 5m
  receiver: webhook
  routes:
  - receiver: webhook
    group_wait: 15s
receivers:
- name: webhook
  webhook_configs:
  - send_resolved: true
    url: 'http://192.168.70.189:8060/dingtalk/webhook1/send'
View Code

启动alertmanager:

docker run -d -p 9093:9093 \ --name alertmanager \ -v /apps/prometheus/alertmanager/alertmanager.yml:/etc/alertmanager/alertmanager.yml \
-v /etc/localtime:/etc/localtime \ prom/alertmanager

 4.dingding插件配置(webhook方式)

[root@localhost dingtalk]# cat /apps/prometheus/dingtalk/config.yml 
targets:
  mytest:
    url: https://oapi.dingtalk.com/robot/send?access_token=973a1b54687f926de9c8afbd10118f58ece0f3d694959682dcfd5d431cb5b1c1

    secret: SECf65f91b6fae1a047482de5bd7fa3cb2a513166b2933c4dfc11cf4755c03150cf
  webhook1:
    url: https://oapi.dingtalk.com/robot/send?access_token=8979024890bb57491b434b7e5a3970276335516a9b67df469ffa0c5603a92afd
    secret: SEC63bae31c9f116c4a08bed9f0e90dc9317743e207025c978c658b7d528b1a6e19
  webhook2:
    url: https://oapi.dingtalk.com/robot/send?access_token=xxxxxxxxxxxx
  webhook_legacy:
    url: https://oapi.dingtalk.com/robot/send?access_token=xxxxxxxxxxxx

    message:

      title: '{{ template "legacy.title" . }}'
      text: '{{ template "legacy.content" . }}'
  webhook_mention_all:
    url: https://oapi.dingtalk.com/robot/send?access_token=xxxxxxxxxxxx
    mention:
      all: true
  webhook_mention_users:
    url: https://oapi.dingtalk.com/robot/send?access_token=xxxxxxxxxxxx
    mention:
      mobiles: ['156xxxx8827', '189xxxx8325']
  mytest1:
    url: https://oapi.dingtalk.com/robot/send?access_token=2f02ee1b8b82f340271d8c3c384964b714fbbd06e31f787c6b46d8c47e8dfe46
    secret: SECbf91ed2d679100e1ac8d5bab9025a077f9e0a5220a47f4cf037ec20611c41e87
View Code

启动钉钉报警通知:

docker run -d \
--name dingtalk \
--restart always \
-p 8060:8060 \
-v /apps/prometheus/dingtalk/config.yml:/etc/prometheus-webhook-dingtalk/config.yml \
timonwong/prometheus-webhook-dingtalk
 

 

 

 

webhook1信息获取请查看钉钉获取文档:https://www.cnblogs.com/aroin/p/14756719.html
url: https://oapi.dingtalk.com/robot/send?access_token=8979024890bb57491b434b7e5a3970276335516a9b67df469ffa0c5603a92afd
secret: SEC63bae31c9f116c4a08bed9f0e90dc9317743e207025c978c658b7d528b1a6e19