prometheus,alertmanager 报警配置详解
vim prometheus.yml
global: scrape_interval: 15s external_labels: monitor: 'codelab-monitor' scrape_configs: - job_name: test static_configs: - targets: ['10.13.82.244:8000'] labels: instance: proxy - job_name: node static_configs: - targets: ['10.13.82.244:9100','10.13.82.196:9100'] alerting: alertmanagers: - static_configs: - targets: ["localhost:9093"] rule_files: - rule.yml
vim rule.yml
groups: - name: test-rule rules: - alert: "内存报警" expr: 100 - ((node_memory_MemAvailable * 100) / node_memory_MemTotal) > 10 for: 1s labels: severity: warning annotations: summary: "服务名:{{$labels.alertname}}" description: "业务500报警: {{ $value }}" value: "{{ $value }}" - name: test-rule2 rules: - alert: "内存报警" expr: 100 - ((node_memory_MemAvailable * 100) / node_memory_MemTotal) > 40 for: 1s labels: severity: test annotations: summary: "服务名:{{$labels.alertname}}" description: "业务500报警: {{ $value }}" value: "{{ $value }}"
vim alertmanager.yml
global: smtp_smarthost: 'xxx' smtp_from: 'xxx' smtp_auth_username: 'xxx' smtp_auth_password: 'xxx' smtp_require_tls: false templates: - '/alertmanager/template/*.tmpl' route: receiver: 'default-receiver' group_wait: 1s #组报警等待时间 group_interval: 1s #组报警间隔时间 repeat_interval: 1s #重复报警间隔时间 group_by: [cluster, alertname] routes: - receiver: test group_wait: 1s match_re: severity: test receivers: - name: 'default-receiver' email_configs: - to: 'xx@xx.xx' html: '{{ template "xx.html" . }}' headers: { Subject: " {{ .CommonAnnotations.summary }}" } - name: 'test' email_configs: - to: 'xxx@xx.xx' html: '{{ template "xx.html" . }}' headers: { Subject: " {{ 第二路由匹配测试}}" }
vim test.tmpl
{{ define "xx.html" }} <table border="5"> <tr><td>报警项</td> <td>磁盘</td> <td>报警阀值</td> <td>开始时间</td> </tr> {{ range $i, $alert := .Alerts }} <tr><td>{{ index $alert.Labels "alertname" }}</td> <td>{{ index $alert.Labels "instance" }}</td> <td>{{ index $alert.Annotations "value" }}</td> <td>{{ $alert.StartsAt }}</td> </tr> {{ end }} </table> {{ end }}
启动:./alertmanager --log.level=debug ./prometheus --log.level=debug