prometheus,alertmanager 报警配置详解

vim prometheus.yml

global:
  scrape_interval:     15s
  external_labels:
    monitor: 'codelab-monitor'
scrape_configs:
  - job_name: test
    static_configs:
      - targets: ['10.13.82.244:8000']
        labels:
          instance: proxy
  - job_name: node
    static_configs:
      - targets: ['10.13.82.244:9100','10.13.82.196:9100']
alerting:
  alertmanagers:
  - static_configs:
    - targets: ["localhost:9093"]

rule_files:
   - rule.yml

vim rule.yml

groups:
- name: test-rule
  rules:
  - alert: "内存报警"
    expr: 100 - ((node_memory_MemAvailable * 100) / node_memory_MemTotal) > 10
    for: 1s
    labels:
      severity: warning
    annotations:
      summary: "服务名:{{$labels.alertname}}"
      description: "业务500报警: {{ $value }}"
      value: "{{ $value }}"
- name: test-rule2
  rules:
  - alert: "内存报警"
    expr: 100 - ((node_memory_MemAvailable * 100) / node_memory_MemTotal) > 40
    for: 1s
    labels:
      severity: test
    annotations:
      summary: "服务名:{{$labels.alertname}}"
      description: "业务500报警: {{ $value }}"
      value: "{{ $value }}"

vim alertmanager.yml

global:
  smtp_smarthost: 'xxx'
  smtp_from: 'xxx'
  smtp_auth_username: 'xxx'
  smtp_auth_password: 'xxx'
  smtp_require_tls: false

templates:
  - '/alertmanager/template/*.tmpl'

route:
  receiver: 'default-receiver'
  group_wait: 1s #组报警等待时间
  group_interval: 1s  #组报警间隔时间
  repeat_interval: 1s  #重复报警间隔时间
  group_by: [cluster, alertname]
  routes:
  - receiver: test
    group_wait: 1s
    match_re:
      severity: test
receivers:
- name: 'default-receiver'
  email_configs:
  - to: 'xx@xx.xx'
    html: '{{ template "xx.html" . }}'
    headers: { Subject: " {{ .CommonAnnotations.summary }}" }
- name: 'test'
  email_configs:
  - to: 'xxx@xx.xx'
    html: '{{ template "xx.html" . }}'
    headers: { Subject: " {{ 第二路由匹配测试}}" }

vim test.tmpl

{{ define "xx.html" }}
<table border="5">
    <tr><td>报警项</td>
        <td>磁盘</td>
        <td>报警阀值</td>
        <td>开始时间</td>
    </tr>
    {{ range $i, $alert := .Alerts }}
        <tr><td>{{ index $alert.Labels "alertname" }}</td>
            <td>{{ index $alert.Labels "instance" }}</td>
            <td>{{ index $alert.Annotations "value" }}</td>
            <td>{{ $alert.StartsAt }}</td>
        </tr>
    {{ end }}
</table>
{{ end }}

 

启动:./alertmanager --log.level=debug     ./prometheus --log.level=debug

posted @ 2018-05-22 14:37  小秒  阅读(28733)  评论(0编辑  收藏  举报