prometheus,alertmanager 报警配置详解
###
1、prometheus配置文件
vim prometheus.yml
### global: scrape_interval: 15s external_labels: monitor: 'codelab-monitor' scrape_configs: - job_name: test static_configs: - targets: ['10.13.82.244:8000'] labels: instance: proxy - job_name: node static_configs: - targets: ['10.13.82.244:9100','10.13.82.196:9100'] alerting: alertmanagers: - static_configs: - targets: ["localhost:9093"] rule_files: - rule.yml
2、告警规则配置文件
vim rule.yml ### groups: - name: test-rule rules: - alert: "内存报警" expr: 100 - ((node_memory_MemAvailable * 100) / node_memory_MemTotal) > 10 for: 1s labels: severity: warning annotations: summary: "服务名:{{$labels.alertname}}" description: "业务500报警: {{ $value }}" value: "{{ $value }}" - name: test-rule2 rules: - alert: "内存报警" expr: 100 - ((node_memory_MemAvailable * 100) / node_memory_MemTotal) > 40 for: 1s labels: severity: test annotations: summary: "服务名:{{$labels.alertname}}" description: "业务500报警: {{ $value }}" value: "{{ $value }}"
3、alertmanager配置文件
vim alertmanager.yml ### global: smtp_smarthost: 'xxx' smtp_from: 'xxx' smtp_auth_username: 'xxx' smtp_auth_password: 'xxx' smtp_require_tls: false templates: - '/alertmanager/template/*.tmpl' route: receiver: 'default-receiver' # 下面routes:规则中没有匹配的的信息,会发送到此默认的'webhook'接收端(接收端地址随意写但是需与receivers:中-name: 对应上 group_wait: 1s # 收到告警时 等待0s确认时间内是否有新告警 如果有则一并发送 group_interval: 1s # 在发送新告警前的等待时间。通常5m或以上、第二组发送邮件间隔时间 repeat_interval: 1s # 发送重复告警的周期。如果已经发送了通知,再次发送之前需要等待多长时间。通常3小时或以上 group_by: [cluster, alertname] routes: - receiver: test group_wait: 1s match_re: severity: test receivers: - name: 'default-receiver' email_configs: - to: 'xx@xx.xx' html: '{{ template "xx.html" . }}' headers: { Subject: " {{ .CommonAnnotations.summary }}" } # 报警邮件主题 - name: 'test' email_configs: - to: 'xx@xx.xx' html: '{{ template "xx.html" . }}' headers: { Subject: " {{ 第二路由匹配测试}}" } # 报警邮件主题
4、告警模板配置
vim test.tmpl ### {{ define "xx.html" }} <table border="5"> <tr><td>报警项</td> <td>磁盘</td> <td>报警阀值</td> <td>开始时间</td> </tr> {{ range $i, $alert := .Alerts }} <tr><td>{{ index $alert.Labels "alertname" }}</td> <td>{{ index $alert.Labels "instance" }}</td> <td>{{ index $alert.Annotations "value" }}</td> <td>{{ $alert.StartsAt }}</td> </tr> {{ end }} </table> {{ end }}
###