prometheus中的alertmanage配置文件详解
alertmanage学习链接: 路由中标签详解: https://yunlzheng.gitbook.io/prometheus-book/parti-prometheus-ji-chu/alert/alert-manager-route alertmanage的配置文件示例 https://blog.csdn.net/qq_22227087/article/details/96483009 alertmanage主配置文件中关键字(方法)详解 https://zhuanlan.zhihu.com/p/74932366 ########################################################### prometheus配置文件和alert,amage配置文件详解 prometheus的主配置文件中包含rule.yml, rule.yml中的rules只负责定义报警的触发条件(下面有rule.yml配置文件的样例)。触发以后会将消息转发到alertmanage,alertmanage的主配置文件中根据路由选择转发给哪个接收者(下面有alertmanage.yml配置文件的样例)
#################################################################
promethuse主配置文件样例
# my global config global: scrape_interval: 15s # Set the scrape interval to every 15 seconds. Default is every 1 minute. evaluation_interval: 15s # Evaluate rules every 15 seconds. The default is every 1 minute. scrape_timeout: 15s # scrape_timeout is set to the global default (10s). # Alertmanager configuration
#promethuse关联alertmanger alerting: alertmanagers: - static_configs: - targets: - localhost:9093 # Load rules once and periodically evaluate them according to the global 'evaluation_interval'. rule_files: # - "first_rules.yml" # - "second_rules.yml" - /data/prometheus/alertmanager-0.17.0/netdata-rule.yml - /data/prometheus/alertmanager-0.17.0/mysql-rule.yml - /data/prometheus/alertmanager-0.17.0/bigdata-rule.yml - /data/prometheus/alertmanager-0.17.0/db-rule.yml - /data/prometheus/alertmanager-0.17.0/ops-rule.yml - /data/prometheus/alertmanager-0.17.0/game-backend-rule.yml - /data/prometheus/alertmanager-0.17.0/game-status-rule.yml - /data/prometheus/alertmanager-0.17.0/url-rule.yml - /data/prometheus/alertmanager-0.17.0/idc-rule.yml
############################################################ rule.yml配置样例 groups: - name: test-rule rules: - alert: "tcp连接报警" expr: netdata_ipv4_tcpsock_active_connections_average{dimension="connections",family="tcp"} > 1 for: 1s labels: severity: warning annotations: summary: "服务名:{{$labels.alertname}}" description: "业务500报警: {{ $value }}" value: "{{ $value }}" - name: test-rule2 rules: - alert: "内存报警" expr: 100 - ((node_memory_MemAvailable * 100) / node_memory_MemTotal) > 1 for: 1s labels: severity: test annotations: summary: "服务名:{{$labels.alertname}}" description: "业务500报警: {{ $value }}" value: "{{ $value }}" ########################################################## global: resolve_timeout: 5m # smtp_smarthost: 使用email打开服务配置 smtp_smarthost: 'smtp.gmail.com:465' # smtp_from:指定通知报警的邮箱 smtp_from: 'solomon02040@gmail.com' # smtp_auth_username:邮箱用户名 smtp_auth_username: 'solomon02040@gmail.com' # smtp_auth_password:授权密码 smtp_auth_password: '8899//zz' # smtp_require_tls:是否启用tls smtp_require_tls: false templates: - '/data/alertmanager/template/*.tmpl' route: receiver: solomontest #接收人的名字可以自己定义 group_by: ['alertname'] group_wait: 10s #组报警等待时间 group_interval: 10s #组报警间隔时间 repeat_interval: 60s #重复报警间隔时间 routes: - receiver: solomontest group_wait: 10s match_re: #使用match_re方法(使用正则的方式匹配,这里用match更好) env: test #env是报警数据中的标签,test是标签的值 receivers: - name: solomontest email_configs: - to: 'solomon02040@gmail.com' html: '{{ template "xx.html" . }}' #指定模板 headers: { Subject: "[WARN] 报警邮件" } - name: 'test' email_configs: - to: 'xxx@xx.xx' html: '{{ template "xx.html" . }}' headers: { Subject: " {{ 第二路由匹配测试}}" } ########################################################