Prometheus企业微信告警

自己注册一个企业微信,进入管理控制台。

在应用管理中点击创建应用

创建机器人

发送消息测试

发送消息测试,我这里可以正常收到消息

找到企业ID

找到机器人的AgentId

点击查看,把Secret保存下

查看下部门ID

添加微信告警配置

[root@harbor harbor]# vim /apps/alertmanager/alertmanager.yml

  - name: "wechat"
    wechat_configs:
    - corp_id: wwbf878f48d2348b76 #企业ID
      #to user: '@all'            #所有人
      to_party: 2                 #部门ID 
      agent_id: 1000002           #机器人ID
      api_secret: V7WnlfwUiSrUzRKTUCh6sgvKVd_UTZ_pBnGGuwUxLFQ #Secret
      send_resolved: true

把发送告警服务器的IP添加到企业IP中,设置完1分钟后生效。

重启

systemctl restart alertmanager.service

去企业微信查看告警。

 

告警分类发送,根据标签匹配分类发送告警

severity:critical级别的走邮件

service: magedu-pods级别的消息发给钉钉

剩余的都发给企业微信

[root@harbor apps]# cat prometheus/rules/yzy_rules.yml 
groups:
  - name: alertmanager_pod.rules
    rules:
    - alert: Pod_all_cpu_usage
      expr: (sum by(name)(rate(container_cpu_usage_seconds_total{image!=""}[5m]))*100) > 1
      for: 2m
      labels:
        #severity: critical
        service: magedu-pods
      annotations:
        description: 容器 {{ $labels.name }} CPU 资源利用率大于10% , (current value is {{ $value }})
        summary: Dev CPU 负载告警

    - alert: Pod_all_memory_usage
      #expr: sort_desc(avg by(name)(irate(container_ memory_usage_bytes{name!=""}[5m]))*100) > 10% #内存大于10%
      expr: sort_desc(avg by(name)(irate(node_memory_MemFree_bytes {name!=""}[5m]))) > 2147483648 #内存大于 2G
      for: 2m
      labels:
        severity: critical
      annotations:
        description: 容器 {{ $labels.name }} Memory资源利用率大于 2G,(current value is {{ $value }})
        summary: Dev Memory 负载告警
    
    - alert: Pod_all_network_receive_usage
      expr: sum by (name) (irate(container_network_receive_bytes_total{container_name="POD"}[1m])) > 1
      for: 2m
      labels:
        severity: critical
      annotations:
        description: 容器 {{ $labels.name }} network_receive 资源利用率大于 50M , (current value is {{ $value }}

    - alert: node内存可用大小
      expr: node_memory_MemFree_bytes < 4*1024*1024*1024 #故意写错的
      for: 2m
      labels:
        severity: info
      annotations:
        description: node节点的可用内存小于4G
[root@harbor apps]# cat alertmanager/alertmanager.yml 
global:
  resolve_timeout: 1m
  smtp_smarthost: 'smtp.qq.com:465'
  smtp_from: '760478xxx@qq.com'
  smtp_auth_username: '760478xxx@qq.com'
  smtp_auth_password: 'sxcpymhdrkenbegd'
  smtp_hello: '@qq.com'
  smtp_require_tls: false

route:
  group_by: ['alertname']
  group_wait: 1s
  group_interval: 5s
  repeat_interval: 10s
  receiver: 'wechat'
  #添加路由信息
  routes:
  - receiver: web.hook #critical级别的消息发给邮件
    group_wait: 10s
    match_re:
      severity: critical
  - receiver: dingding.alertname #service: magedu-pods级别的消息发给钉钉
    group_wait: 10s
    match_re:
      service: magedu-pods
receivers:
  - name: 'wechat'
    wechat_configs:
    - corp_id: 'wwbf878f48d2348b76'
      to_party: '2'
      agent_id: '1000002'
      api_secret: 'V7WnlfwUiSrUzRKTUCh6sgvKVd_UTZ_pBnGGuwUxLFQ'
      send_resolved: true

  - name: 'dingding.alertname'
    webhook_configs:
    - url: 'http://10.211.55.26:8060/dingtalk/alertname/send' #配置dingtalk的地址和端口
      send_resolved: true

  - name: 'web.hook'
    email_configs:
      - to: '1500120xxxx@163.com'

inhibit_rules:
  - source_match:
      severity: 'critical'
    target_match:
      severity: 'warning'
    equal: ['alertname', 'dev', 'instance']

设置微信告警模板

[root@harbor alertmanager]# cat message_template.templ 
{{ define "wechat.default.message" }}
{{ range $i, $alert := .Alerts }}
===alertmanager 微信监控报警===
告警状态: {{ .Status }}
告警级别: {{ $alert.Labels.severity }}
告警类型: {{ $alert.Labels.alertname }}
告警应用: {{ $alert.Annotations.summary }}
故障主机: {{ $alert.Labels.instance }}
告警主题: {{ $alert.Annotations.summary }}
触发阀值: {{ $alert.Annotations.value}}
告警详情: {{ $alert.Annotations.description }}
触发时间: {{ $alert.StartsAt.Format "2006-01-02 1504:05" }}
===========end=========
{{ end }}
{{ end }}

 配置alertmanager.yml文件,配置完后重启

 

posted @ 2022-08-24 13:45  Maniana  阅读(187)  评论(0编辑  收藏  举报