Prometheus实战之配置汇总
导航:这里主要是列出一个prometheus一些系统的学习过程,最后按照章节顺序查看,由于写作该文档经历了不同时期,所以在文中有时出现 的云环境不统一,但是学习具体使用方法即可,在最后的篇章,有一个完整的腾讯云的实战案例。 8.kube-state-metrics 和 metrics-server 13.Grafana简单用法 参考: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#kubernetes_sd_config https://www.bookstack.cn/read/prometheus_practice/introduction-README.md |
因为记录此文档经历了几次云环境的更换,且在各篇章讲解的过程中配置列出不够详细,所以这里做一个汇总,更容易查阅。(代码中敏感信息已经隐藏)
1.主prometheus配置
global: scrape_interval: 15s evaluation_interval: 15s rule_files: - /opt/prometheus/rule/*.rules #remote_write: # - url: "http://influxdb:8086/api/v1/prom/write?u=root&p=huawei&db=prometheus" #basic_auth: #username: root #password: huawei #remote_read: #- url: "http://influxdb:8086/api/v1/prom/read?u=root&p=huawei&db=prometheus" #basic_auth: #username: root #password: huawei alerting: alertmanagers: - static_configs: - targets: ["10.10.1.10:9093","10.10.1.5:9093"] scrape_configs: - job_name: 'prometheus' static_configs: - targets: ['prometheus01:9100','prometheus02:9100','influxdb:9100'] labels: project: prometheus relabel_configs: - source_labels: [__address__] regex: ([a-zA-Z0-9-.]+):(\d{1,4}) target_label: hostname replacement: ${1} - job_name: "cn-lcm-prod" scrape_interval: 20s scrape_timeout: 20s honor_labels: true metrics_path: '/federate' params: 'match[]': - '{job="prometheus"}' - '{__name__=~"job:.*"}' - '{job="cn-lcm-prod-kubernetes-kubelet"}' - '{job="cn-lcm-prod-kubernetes-cadvisor"}' - '{job="cn-lcm-prod-kubernetes-pods"}' - '{job="cn-lcm-prod-kubernetes-apiservers"}' - '{job="cn-lcm-prod-kubernetes-services"}' - '{job="cn-lcm-prod-kubernetes-service-endpoints"}' static_configs: - targets: - 'x.x.x.x:9090' labels: project: cn-lcm-prod-k8s - job_name: "cn-gc" scrape_interval: 2s scrape_timeout: 2s honor_labels: true metrics_path: '/federate' params: 'match[]': - '{job="prometheus"}' - '{__name__=~"job:.*"}' - '{job="cn-gc-consul-node"}' - '{job="cn-gc-consul-process"}' - '{job="cn-gc-nginx01"}' - '{job="cn-gc-nginx02"}' static_configs: - targets: - 'x.x.x.x:9090' labels: project: cn-gc
2.主prometheus告警模版样例
cat cn-lcm-prod-kubernetes.rules groups: - name: cn-lcm-prod rules: - alert: PodCpu expr: (sum(rate(container_cpu_usage_seconds_total{job=~'cn-lcm-prod.*',container_name!=''}[3m])) by(job,cluster,namespace,container_name,pod_name))/(sum(container_spec_cpu_quota) by(job,cluster,namespace,container_name,pod_name) /100000)*100 > 70 for: 1m labels: severity: warning project: cn-lcm-prod annotations: summary: "CN-LCM-PROD PodCpu CPU使用率已达到告警阈值" description: "{{ $labels.pod_name }} 告警阈值为70%, 当前值为: {{ $value }}" - alert: PodMem expr: sum(container_memory_rss{image!="",job=~'cn-lcm-prod.*'}) by(job,cluster,namespace,container_name,pod_name) / sum(container_spec_memory_limit_bytes{image!="",job=~'cn-lcm-prod.*'}) by(job,cluster,namespace,container_name,pod_name) * 100 != +inf > 90 for: 1m labels: severity: warning project: cn-lcm-prod annotations: summary: "CN-LCM-PROD PodMem 内存使用率已达到告警阈值" description: "{{ $labels.pod_name }} 告警阈值为90%, 当前值为: {{ $value }}" - alert: PodNetworkIn expr: sum by (namespace,job,pod_name) (irate(container_network_receive_bytes_total{image!="",job=~'cn-lcm-prod.*'}[1m])) / 1024 /1024 > 30 for: 1m labels: severity: warning project: cn-lcm-prod annotations: summary: "CN-LCM-PROD PodNetworkIn NETWORK入带宽使用率已达到告警阈值" description: "{{ $labels.pod_name }} 告警阈值为20MB, 当前值为: {{ $value }} MB" - alert: PodNetworkOut expr: sum by (namespace,job,pod_name) (irate(container_network_transmit_bytes_total{image!="",job=~'cn-lcm-prod.*'}[1m])) / 1024 /1024 > 30 for: 1m labels: severity: warning project: cn-lcm-prod annotations: summary: "CN-LCM-PROD PodNetworkOut NETWORK出带宽使用率已达到告警阈值" description: "{{ $labels.pod_name }} 告警阈值为10MB, 当前值为: {{ $value }} MB" - alert: PodRestart expr: changes(kube_pod_container_status_restarts_total{job=~"cn-lcm-prod.*"}[3m]) > 0 for: 1m labels: severity: warning project: cn-lcm-prod annotations: summary: "CN-LCM-PROD PodRestart POD重启" description: "{{ $labels.pod }} 有重启记录" - alert: PodCpu expr: (sum(rate(container_cpu_usage_seconds_total{job=~'cn-lcm-prod.*',container_name!=""}[3m])) by(job,cluster,namespace,container_name,pod_name))/(sum(container_spec_cpu_quota) by(job,cluster,namespace,container_name,pod_name) /100000)*100 > 90 for: 1m labels: severity: high project: cn-lcm-prod annotations: summary: "CN-LCM-PROD PodCpu CPU使用率已达到告警阈值" description: "{{ $labels.pod_name }} 告警阈值为90%, 当前值为: {{ $value }}" - alert: PodMem expr: sum(container_memory_rss{image!="",job=~'cn-lcm-prod.*'}) by(job,cluster,namespace,container_name,pod_name) / sum(container_spec_memory_limit_bytes{image!="",job=~'cn-lcm-prod.*'}) by(job,cluster,namespace,container_name,pod_name) * 100 != +inf > 90 for: 1m labels: severity: high project: cn-lcm-prod annotations: summary: "CN-LCM-PROD PodMem 内存使用率已达到告警阈值" description: "{{ $labels.pod_name }} 告警阈值为90%, 当前值为: {{ $value }}" cat cn-lcm-prod-node.rules groups: - name: cn-lcm-prod rules: - alert: NodeCpu expr: (1-((sum(increase(node_cpu_seconds_total{mode="idle",job=~'cn-lcm-prod.*'}[1m])) by (instance,job,hostname)) / (sum(increase(node_cpu_seconds_total[1m])) by (instance,job,hostname)))) * 100 > 70 for: 1m labels: severity: warning project: cn-lcm-prod annotations: summary: "CN-LCM-PROD NodeCpu 使用率已达到告警阈值" description: "{{ $labels.hostname }} 告警阈值为70%, 当前值为: {{ $value }}" - alert: NodeMem expr: (1-((node_memory_Buffers_bytes + node_memory_Cached_bytes + node_memory_MemFree_bytes) / node_memory_MemTotal_bytes {job=~'cn-lcm-prod.*'})) * 100 > 80 for: 1m labels: severity: warning project: cn-lcm-prod annotations: summary: "CN-LCM-PROD NodeMem 使用率已达到告警阈值" description: "{{ $labels.instance }} 告警阈值为80%, 当前值为: {{ $value }}" - alert: NodeNetworkIn expr: irate(node_network_receive_bytes_total{job=~"cn-lcm-prod.*",device='eth0'}[5m]) /1024 /1024 > 10 for: 1m labels: severity: warning project: cn-lcm-prod annotations: summary: "CN-LCM-PROD NodeNetworkIn 网卡 ETH0 使用率已达到告警阈值" description: "{{ $labels.instance }} 告警阈值为10MB, 当前值为: {{ $value }} MB" - alert: NodeNetworkOut expr: irate(node_network_transmit_bytes_total{job=~"cn-lcm-prod.*",device='eth0'}[5m]) /1024 /1024 > 5 for: 1m labels: severity: warning project: cn-lcm-prod annotations: summary: "CN-LCM-PROD NodeNetworkOut 网卡 ETH0 使用率已达到告警阈值" description: "{{ $labels.instance }} 告警阈值为5MB, 当前值为: {{ $value }} MB" - alert: NodeDiskUsed expr: (sum(node_filesystem_size_bytes{job=~'cn-lcm-prod.*'}) by(instance,device,hostname) - sum(node_filesystem_free_bytes{job=~'cn-lcm-prod.*'}) by(instance,device,hostname)) / sum(node_filesystem_size_bytes{job=~'cn-lcm-prod.*'}) by(instance,device,hostname) * 100 > 70 for: 1m labels: severity: warning project: cn-lcm-prod annotations: summary: "CN-LCM-PROD NodeDiskUsed 磁盘使用率已达到告警阈值" description: "{{ $labels.instance }} 告警阈值为70%, 当前值为: {{ $value }}" - alert: NodeDiskIopsRead expr: irate(node_disk_reads_completed_total{job=~"cn-lcm-prod.*"}[1m]) > 500 for: 1m labels: severity: warning project: cn-lcm-prod annotations: summary: "CN-LCM-PROD NodeDiskIopsRead 磁盘读取IOPS已达到告警阈值" description: "{{ $labels.instance }} 告警阈值为500, 当前值为: {{ $value }}" - alert: NodeDiskIopsWrite expr: irate(node_disk_writes_completed_total{job=~"cn-lcm-prod.*"}[1m]) > 500 for: 1m labels: severity: warning project: cn-lcm-prod annotations: summary: "CN-LCM-PROD NodeDiskIopsWrite 磁盘写入IOPS已达到告警阈值" description: "{{ $labels.instance }} 告警阈值为500, 当前值为: {{ $value }}" - alert: NodeDiskReadTime expr: irate(node_disk_read_time_seconds_total{job=~"cn-lcm-prod.*"}[1m]) > 2 for: 1m labels: severity: warning project: cn-lcm-prod annotations: summary: "CN-LCM-PROD NodeDiskReadTime 磁盘读取时间已达到告警阈值" description: "{{ $labels.instance }} 告警阈值为2s, 当前值为: {{ $value }} s" - alert: NodeDiskWriteTime expr: irate(node_disk_write_time_seconds_total{job=~"cn-lcm-prod.*"}[1m]) > 2 for: 1m labels: severity: warning project: cn-lcm-prod annotations: summary: "CN-LCM-PROD NodeDiskWriteTime 磁盘写入时间已达到告警阈值" description: "{{ $labels.instance }} 告警阈值为2s, 当前值为: {{ $value }} s" - alert: NodeTcpCurrEstab expr: node_netstat_Tcp_CurrEstab{job=~"cn-lcm-prod.*"} > 4000 for: 1m labels: severity: warning project: cn-lcm-prod annotations: summary: "CN-LCM-PROD NodeTcpCurrEstab TCP当前连接已达到告警阈值" description: "{{ $labels.instance }} 告警阈值为500, 当前值为: {{ $value }}" - alert: NodeSystemLoad expr: node_load5{hostname=~"$node"} > 10 for: 1m labels: severity: warning project: cn-lcm-prod annotations: summary: "CN-LCM-PROD NodeSystemLoad Node 5分钟内负载已达到告警阈值" description: "{{ $labels.instance }} 5分钟内负载告警阈值为10, 当前值为: {{ $value }}" ###################high##################### - alert: NodeCpu expr: (1-((sum(increase(node_cpu_seconds_total{mode="idle",job=~'cn-lcm-prod.*'}[1m])) by (instance,job,hostname)) / (sum(increase(node_cpu_seconds_total[1m])) by (instance,job,hostname)))) * 100 > 90 for: 1m labels: severity: high project: cn-lcm-prod annotations: summary: "CN-LCM-PROD NodeCpu 使用率已达到告���阈值(严重)" description: "{{ $labels.hostname }} 告警阈值为90%, 当前值为: {{ $value }}" - alert: NodeMem expr: (1-((node_memory_Buffers_bytes + node_memory_Cached_bytes + node_memory_MemFree_bytes) / node_memory_MemTotal_bytes {job=~'cn-lcm-prod.*'})) * 100 > 90 for: 1m labels: severity: high project: cn-lcm-prod annotations: summary: "CN-LCM-PROD NodeMem 使用率已达到告警阈值(严重)" description: "{{ $labels.instance }} 告警阈值为90%, 当前值为: {{ $value }}" - alert: NodeDiskUsed expr: (sum(node_filesystem_size_bytes{job=~'cn-lcm-prod.*'}) by(instance,device,hostname) - sum(node_filesystem_free_bytes{job=~'cn-lcm-prod.*'}) by(instance,device,hostname)) / sum(node_filesystem_size_bytes{job=~'cn-lcm-prod.*'}) by(instance,device,hostname) * 100 > 90 for: 1m labels: severity: high project: cn-lcm-prod annotations: summary: "CN-LCM-PROD NodeDiskUsed 磁盘使用率已达���告警阈值(严重)" description: "{{ $labels.instance }} 告警阈值为90%, 当前值为: {{ $value }}" - alert: NodeDiskIopsRead expr: irate(node_disk_reads_completed_total{job=~"cn-lcm-prod.*"}[1m]) > 2000 for: 1m labels: severity: high project: cn-lcm-prod annotations: summary: "CN-LCM-PROD NodeDiskIopsRead 磁盘读取IOPS已达到告警阈值(严重)" description: "{{ $labels.instance }} 告警阈值为2000, 当前值为: {{ $value }}" - alert: NodeDiskIopsWrite expr: irate(node_disk_writes_completed_total{job=~"cn-lcm-prod.*"}[1m]) > 2000 for: 1m labels: severity: high project: cn-lcm-prod annotations: summary: "CN-LCM-PROD NodeDiskIopsWrite 磁盘写入IOPS已达到告警阈值(严重)" description: "{{ $labels.instance }} 告警阈值为2000, 当前值为: {{ $value }}" - alert: NodeTcpCurrEstab expr: node_netstat_Tcp_CurrEstab{job=~"cn-lcm-prod.*"} > 6000 for: 1m labels: severity: high project: cn-lcm-prod annotations: summary: "CN-LCM-PROD NodeTcpCurrEstab TCP当前连接已达到告警阈值(严重)" description: "{{ $labels.instance }} 告警阈值为6000, 当前值为: {{ $value }}"
3.备prometheus配置以及告警模版样例
和主配置,一点都不需要更改
4.主备alertmanager配置
alertmanager主备配置都一样,只是启动的时候命令不相同。
global: #该参数定义了当Alertmanager持续多长时间未接收到告警后标记告警状态为resolved(已解决)。该参数的定义可能会影响到告警恢复通知的接收时间,读者可根据自己的实际场景进行定义,其默认值为5分钟 resolve_timeout: 5m smtp_smarthost: 'smtp.qiye.163.com:465' smtp_from: 'monitor@em.denachina.com' smtp_auth_username: 'monitor@em.denachina.com' smtp_auth_password: '3tkvGD8G4giGmAu' smtp_require_tls: false #wechat_api_url: 'https://qyapi.weixin.qq.com/cgi-bin/' #wechat_api_secret: 'CahXkrW-B12Il05HDqB79DIO8yRjLF8jhym0_hgZNUg' #wechat_api_corp_id: 'wwd30f74a2a3aca657' templates: - "/opt/alertmanager/templates/*.tmpl" route: receiver: 'default-receiver' #这里是被监控的标签key,比如一个alertname标签的信息为一封邮件发送 group_by: ['alertname','project','severity'] #两组告警的间隔时间 group_interval: 1m #为了分组发送,在多少秒之内的告警合为一条 group_wait: 20s #如果已经发送了通知,还要等多久才能再次发送 repeat_interval: 120m routes: - receiver: cn-lcm-prod group_wait: 20s match_re: project: cn-lcm-prod - receiver: cn-gc group_wait: 20s match_re: project: cn-gc receivers: - name: 'default-receiver' email_configs: - to: "zexiong.li@xx.com,zhaohe.ji@xx.com,xin.a.yao@xx.com,lizexiong@huawei.com" send_resolved: true #html: '{{ template "email.html" . }}' #headers: { Subject: " {{ .CommonLabels.alertname }} {{ .CommonAnnotations.summary }}" } headers: { Subject: "{{ .CommonAnnotations.summary }}" } - name: 'cn-lcm-prod' email_configs: - to: "{{ template "lizexiongmail" }}" send_resolved: true headers: { Subject: "{{ .CommonAnnotations.summary }}" } webhook_configs: - url: "http://10.10.1.16:5000" send_resolved: true - name: 'cn-gc' email_configs: - to: "{{ template "lizexiongmail" }}" send_resolved: true headers: { Subject: " {{ .CommonAnnotations.summary }}" } webhook_configs: - url: "http://10.10.1.16:5000" send_resolved: true #wechat_configs: #- corp_id: 'wwd30f74a2a3aca657' #to_party: '1' #send_resolved: true #agent_id: '1000002' #to_user: '@all' #api_secret: 'CahXkrW-B12Il05HDqB79DIO8yRjLF8jhym0_hgZNUg' #message: '{{ template "wechat.default.message" . }}' inhibit_rules: #source_match: 匹配当前告警发生后其他告警抑制掉 - source_match: #severity: 指定告警级别 severity: 'high' #target_match:抑制告警 target_match: #severity: 指定抑制告警级别 severity: 'warning' #equal: 只有包含指定标签才可成立规则 equal: ['alertname', 'instance']
5.Alertmanager模版示例
可以看到alertmanager用了lizexiongmail模版变量,这里看看模版变量怎么写的。
cat email.tmpl {{ define "lizexiongmail" }} zexiong.li@dena.com {{ end }} {{ define "email.html" }} <table border="5"> <tr><td>报警项</td> <td>磁盘</td> <td>报警阀值</td> <td>开始时间</td> </tr> {{ range $i, $alert := .Alerts }} <tr><td>{{ index $alert.Labels "alertname" }}</td> <td>{{ index $alert.Labels "instance" }}</td> <td>{{ index $alert.Labels "value" }}</td> <td>{{ $alert.StartsAt }}</td> </tr> {{ end }} </table> {{ end }}
还有官方自带的模版
cat default.tmpl {{ define "__alertmanager" }}Alertmanager{{ end }} {{ define "__alertmanagerURL" }}{{ .ExternalURL }}/#/alerts?receiver={{ .Receiver | urlquery }}{{ end }} {{ define "__subject" }}[{{ .Status | toUpper }}{{ if eq .Status "firing" }}:{{ .Alerts.Firing | len }}{{ end }}] {{ .GroupLabels.SortedPairs.Values | join " " }} {{ if gt (len .CommonLabels) (len .GroupLabels) }}({{ with .CommonLabels.Remove .GroupLabels.Names }}{{ .Values | join " " }}{{ end }}){{ end }}{{ end }} {{ define "__description" }}{{ end }} {{ define "__text_alert_list" }}{{ range . }}Labels: {{ range .Labels.SortedPairs }} - {{ .Name }} = {{ .Value }} {{ end }}Annotations: {{ range .Annotations.SortedPairs }} - {{ .Name }} = {{ .Value }} {{ end }}Source: {{ .GeneratorURL }} {{ end }}{{ end }} {{ define "slack.default.title" }}{{ template "__subject" . }}{{ end }} {{ define "slack.default.username" }}{{ template "__alertmanager" . }}{{ end }} {{ define "slack.default.fallback" }}{{ template "slack.default.title" . }} | {{ template "slack.default.titlelink" . }}{{ end }} {{ define "slack.default.callbackid" }}{{ end }} {{ define "slack.default.pretext" }}{{ end }} {{ define "slack.default.titlelink" }}{{ template "__alertmanagerURL" . }}{{ end }} {{ define "slack.default.iconemoji" }}{{ end }} {{ define "slack.default.iconurl" }}{{ end }} {{ define "slack.default.text" }}{{ end }} {{ define "slack.default.footer" }}{{ end }} {{ define "pagerduty.default.description" }}{{ template "__subject" . }}{{ end }} {{ define "pagerduty.default.client" }}{{ template "__alertmanager" . }}{{ end }} {{ define "pagerduty.default.clientURL" }}{{ template "__alertmanagerURL" . }}{{ end }} {{ define "pagerduty.default.instances" }}{{ template "__text_alert_list" . }}{{ end }} {{ define "opsgenie.default.message" }}{{ template "__subject" . }}{{ end }} {{ define "opsgenie.default.description" }}{{ .CommonAnnotations.SortedPairs.Values | join " " }} {{ if gt (len .Alerts.Firing) 0 -}} Alerts Firing: {{ template "__text_alert_list" .Alerts.Firing }} {{- end }} {{ if gt (len .Alerts.Resolved) 0 -}} Alerts Resolved: {{ template "__text_alert_list" .Alerts.Resolved }} {{- end }} {{- end }} {{ define "opsgenie.default.source" }}{{ template "__alertmanagerURL" . }}{{ end }} {{ define "wechat.default.message" }}{{ template "__subject" . }} {{ .CommonAnnotations.SortedPairs.Values | join " " }} {{ if gt (len .Alerts.Firing) 0 -}} Alerts Firing: {{ template "__text_alert_list" .Alerts.Firing }} {{- end }} {{ if gt (len .Alerts.Resolved) 0 -}} Alerts Resolved: {{ template "__text_alert_list" .Alerts.Resolved }} {{- end }} AlertmanagerUrl: {{ template "__alertmanagerURL" . }} {{- end }} {{ define "wechat.default.to_user" }}{{ end }} {{ define "wechat.default.to_party" }}{{ end }} {{ define "wechat.default.to_tag" }}{{ end }} {{ define "wechat.default.agent_id" }}{{ end }} {{ define "victorops.default.state_message" }}{{ .CommonAnnotations.SortedPairs.Values | join " " }} {{ if gt (len .Alerts.Firing) 0 -}} Alerts Firing: {{ template "__text_alert_list" .Alerts.Firing }} {{- end }} {{ if gt (len .Alerts.Resolved) 0 -}} Alerts Resolved: {{ template "__text_alert_list" .Alerts.Resolved }} {{- end }} {{- end }} {{ define "victorops.default.entity_display_name" }}{{ template "__subject" . }}{{ end }} {{ define "victorops.default.monitoring_tool" }}{{ template "__alertmanager" . }}{{ end }} {{ define "email.default.subject" }}{{ template "__subject" . }}{{ end }} {{ define "email.default.html" }} <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> <!-- Style and HTML derived from https://github.com/mailgun/transactional-email-templates The MIT License (MIT) Copyright (c) 2014 Mailgun Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. --> <html xmlns="http://www.w3.org/1999/xhtml" xmlns="http://www.w3.org/1999/xhtml" style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;"> <head style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;"> <meta name="viewport" content="width=device-width" style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;" /> <meta http-equiv="Content-Type" content="text/html; charset=UTF-8" style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;" /> <title style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;">{{ template "__subject" . }}</title> </head> <body itemscope="" itemtype="http://schema.org/EmailMessage" style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; -webkit-font-smoothing: antialiased; -webkit-text-size-adjust: none; height: 100%; line-height: 1.6em; width: 100% !important; background-color: #f6f6f6; margin: 0; padding: 0;" bgcolor="#f6f6f6"> <table style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; width: 100%; background-color: #f6f6f6; margin: 0;" bgcolor="#f6f6f6"> <tr style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;"> <td style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; vertical-align: top; margin: 0;" valign="top"></td> <td width="600" style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; vertical-align: top; display: block !important; max-width: 600px !important; clear: both !important; width: 100% !important; margin: 0 auto; padding: 0;" valign="top"> <div style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; max-width: 600px; display: block; margin: 0 auto; padding: 0;"> <table width="100%" cellpadding="0" cellspacing="0" style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; border-radius: 3px; background-color: #fff; margin: 0; border: 1px solid #e9e9e9;" bgcolor="#fff"> <tr style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;"> <td style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 16px; vertical-align: top; color: #fff; font-weight: 500; text-align: center; border-radius: 3px 3px 0 0; background-color: #E6522C; margin: 0; padding: 20px;" align="center" bgcolor="#E6522C" valign="top"> {{ .Alerts | len }} alert{{ if gt (len .Alerts) 1 }}s{{ end }} for {{ range .GroupLabels.SortedPairs }} {{ .Name }}={{ .Value }} {{ end }} </td> </tr> <tr style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;"> <td style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; vertical-align: top; margin: 0; padding: 10px;" valign="top"> <table width="100%" cellpadding="0" cellspacing="0" style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;"> <tr style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;"> <td style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; vertical-align: top; margin: 0; padding: 0 0 20px;" valign="top"> <a href="{{ template "__alertmanagerURL" . }}" style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; color: #FFF; text-decoration: none; line-height: 2em; font-weight: bold; text-align: center; cursor: pointer; display: inline-block; border-radius: 5px; text-transform: capitalize; background-color: #348eda; margin: 0; border-color: #348eda; border-style: solid; border-width: 10px 20px;">View in {{ template "__alertmanager" . }}</a> </td> </tr> {{ if gt (len .Alerts.Firing) 0 }} <tr style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;"> <td style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; vertical-align: top; margin: 0; padding: 0 0 20px;" valign="top"> <strong style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;">[{{ .Alerts.Firing | len }}] Firing</strong> </td> </tr> {{ end }} {{ range .Alerts.Firing }} <tr style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;"> <td style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; vertical-align: top; margin: 0; padding: 0 0 20px;" valign="top"> <strong style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;">Labels</strong><br style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;" /> {{ range .Labels.SortedPairs }}{{ .Name }} = {{ .Value }}<br style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;" />{{ end }} {{ if gt (len .Annotations) 0 }}<strong style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;">Annotations</strong><br style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;" />{{ end }} {{ range .Annotations.SortedPairs }}{{ .Name }} = {{ .Value }}<br style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;" />{{ end }} <a href="{{ .GeneratorURL }}" style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; color: #348eda; text-decoration: underline; margin: 0;">Source</a><br style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;" /> </td> </tr> {{ end }} {{ if gt (len .Alerts.Resolved) 0 }} {{ if gt (len .Alerts.Firing) 0 }} <tr style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;"> <td style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; vertical-align: top; margin: 0; padding: 0 0 20px;" valign="top"> <br style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;" /> <hr style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;" /> <br style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;" /> </td> </tr> {{ end }} <tr style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;"> <td style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; vertical-align: top; margin: 0; padding: 0 0 20px;" valign="top"> <strong style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;">[{{ .Alerts.Resolved | len }}] Resolved</strong> </td> </tr> {{ end }} {{ range .Alerts.Resolved }} <tr style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;"> <td style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; vertical-align: top; margin: 0; padding: 0 0 20px;" valign="top"> <strong style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;">Labels</strong><br style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;" /> {{ range .Labels.SortedPairs }}{{ .Name }} = {{ .Value }}<br style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;" />{{ end }} {{ if gt (len .Annotations) 0 }}<strong style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;">Annotations</strong><br style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;" />{{ end }} {{ range .Annotations.SortedPairs }}{{ .Name }} = {{ .Value }}<br style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;" />{{ end }} <a href="{{ .GeneratorURL }}" style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; color: #348eda; text-decoration: underline; margin: 0;">Source</a><br style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;" /> </td> </tr> {{ end }} </table> </td> </tr> </table> <div style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; width: 100%; clear: both; color: #999; margin: 0; padding: 20px;"> <table width="100%" style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;"> <tr style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;"> <td style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 12px; vertical-align: top; text-align: center; color: #999; margin: 0; padding: 0 0 20px;" align="center" valign="top"><a href="{{ .ExternalURL }}" style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 12px; color: #999; text-decoration: underline; margin: 0;">Sent by {{ template "__alertmanager" . }}</a></td> </tr> </table> </div></div> </td> <td style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; vertical-align: top; margin: 0;" valign="top"></td> </tr> </table> </body> </html> {{ end }} {{ define "pushover.default.title" }}{{ template "__subject" . }}{{ end }} {{ define "pushover.default.message" }}{{ .CommonAnnotations.SortedPairs.Values | join " " }} {{ if gt (len .Alerts.Firing) 0 }} Alerts Firing: {{ template "__text_alert_list" .Alerts.Firing }} {{ end }} {{ if gt (len .Alerts.Resolved) 0 }} Alerts Resolved: {{ template "__text_alert_list" .Alerts.Resolved }} {{ end }} {{ end }} {{ define "pushover.default.url" }}{{ template "__alertmanagerURL" . }}{{ end }} {{ define "sns.default.subject" }}{{ template "__subject" . }}{{ end }} {{ define "sns.default.message" }}{{ .CommonAnnotations.SortedPairs.Values | join " " }} {{ if gt (len .Alerts.Firing) 0 }} Alerts Firing: {{ template "__text_alert_list" .Alerts.Firing }} {{ end }} {{ if gt (len .Alerts.Resolved) 0 }} Alerts Resolved: {{ template "__text_alert_list" .Alerts.Resolved }} {{ end }} {{ end }} [root@prometheus01 templates]# cat default.tmpl ^C [root@prometheus01 templates]# vim default.tmpl [root@prometheus01 templates]# cat default.tmpl | wc -l 231 [root@prometheus01 templates]# vim default.tmpl ^C [root@prometheus01 templates]# cat default.tmpl {{ define "__alertmanager" }}Alertmanager{{ end }} {{ define "__alertmanagerURL" }}{{ .ExternalURL }}/#/alerts?receiver={{ .Receiver | urlquery }}{{ end }} {{ define "__subject" }}[{{ .Status | toUpper }}{{ if eq .Status "firing" }}:{{ .Alerts.Firing | len }}{{ end }}] {{ .GroupLabels.SortedPairs.Values | join " " }} {{ if gt (len .CommonLabels) (len .GroupLabels) }}({{ with .CommonLabels.Remove .GroupLabels.Names }}{{ .Values | join " " }}{{ end }}){{ end }}{{ end }} {{ define "__description" }}{{ end }} {{ define "__text_alert_list" }}{{ range . }}Labels: {{ range .Labels.SortedPairs }} - {{ .Name }} = {{ .Value }} {{ end }}Annotations: {{ range .Annotations.SortedPairs }} - {{ .Name }} = {{ .Value }} {{ end }}Source: {{ .GeneratorURL }} {{ end }}{{ end }} {{ define "slack.default.title" }}{{ template "__subject" . }}{{ end }} {{ define "slack.default.username" }}{{ template "__alertmanager" . }}{{ end }} {{ define "slack.default.fallback" }}{{ template "slack.default.title" . }} | {{ template "slack.default.titlelink" . }}{{ end }} {{ define "slack.default.callbackid" }}{{ end }} {{ define "slack.default.pretext" }}{{ end }} {{ define "slack.default.titlelink" }}{{ template "__alertmanagerURL" . }}{{ end }} {{ define "slack.default.iconemoji" }}{{ end }} {{ define "slack.default.iconurl" }}{{ end }} {{ define "slack.default.text" }}{{ end }} {{ define "slack.default.footer" }}{{ end }} {{ define "pagerduty.default.description" }}{{ template "__subject" . }}{{ end }} {{ define "pagerduty.default.client" }}{{ template "__alertmanager" . }}{{ end }} {{ define "pagerduty.default.clientURL" }}{{ template "__alertmanagerURL" . }}{{ end }} {{ define "pagerduty.default.instances" }}{{ template "__text_alert_list" . }}{{ end }} {{ define "opsgenie.default.message" }}{{ template "__subject" . }}{{ end }} {{ define "opsgenie.default.description" }}{{ .CommonAnnotations.SortedPairs.Values | join " " }} {{ if gt (len .Alerts.Firing) 0 -}} Alerts Firing: {{ template "__text_alert_list" .Alerts.Firing }} {{- end }} {{ if gt (len .Alerts.Resolved) 0 -}} Alerts Resolved: {{ template "__text_alert_list" .Alerts.Resolved }} {{- end }} {{- end }} {{ define "opsgenie.default.source" }}{{ template "__alertmanagerURL" . }}{{ end }} {{ define "wechat.default.message" }}{{ template "__subject" . }} {{ .CommonAnnotations.SortedPairs.Values | join " " }} {{ if gt (len .Alerts.Firing) 0 -}} Alerts Firing: {{ template "__text_alert_list" .Alerts.Firing }} {{- end }} {{ if gt (len .Alerts.Resolved) 0 -}} Alerts Resolved: {{ template "__text_alert_list" .Alerts.Resolved }} {{- end }} AlertmanagerUrl: {{ template "__alertmanagerURL" . }} {{- end }} {{ define "wechat.default.to_user" }}{{ end }} {{ define "wechat.default.to_party" }}{{ end }} {{ define "wechat.default.to_tag" }}{{ end }} {{ define "wechat.default.agent_id" }}{{ end }} {{ define "victorops.default.state_message" }}{{ .CommonAnnotations.SortedPairs.Values | join " " }} {{ if gt (len .Alerts.Firing) 0 -}} Alerts Firing: {{ template "__text_alert_list" .Alerts.Firing }} {{- end }} {{ if gt (len .Alerts.Resolved) 0 -}} Alerts Resolved: {{ template "__text_alert_list" .Alerts.Resolved }} {{- end }} {{- end }} {{ define "victorops.default.entity_display_name" }}{{ template "__subject" . }}{{ end }} {{ define "victorops.default.monitoring_tool" }}{{ template "__alertmanager" . }}{{ end }} {{ define "email.default.subject" }}{{ template "__subject" . }}{{ end }} {{ define "email.default.html" }} <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> <!-- Style and HTML derived from https://github.com/mailgun/transactional-email-templates The MIT License (MIT) Copyright (c) 2014 Mailgun Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. --> <html xmlns="http://www.w3.org/1999/xhtml" xmlns="http://www.w3.org/1999/xhtml" style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;"> <head style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;"> <meta name="viewport" content="width=device-width" style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;" /> <meta http-equiv="Content-Type" content="text/html; charset=UTF-8" style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;" /> <title style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;">{{ template "__subject" . }}</title> </head> <body itemscope="" itemtype="http://schema.org/EmailMessage" style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; -webkit-font-smoothing: antialiased; -webkit-text-size-adjust: none; height: 100%; line-height: 1.6em; width: 100% !important; background-color: #f6f6f6; margin: 0; padding: 0;" bgcolor="#f6f6f6"> <table style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; width: 100%; background-color: #f6f6f6; margin: 0;" bgcolor="#f6f6f6"> <tr style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;"> <td style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; vertical-align: top; margin: 0;" valign="top"></td> <td width="600" style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; vertical-align: top; display: block !important; max-width: 600px !important; clear: both !important; width: 100% !important; margin: 0 auto; padding: 0;" valign="top"> <div style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; max-width: 600px; display: block; margin: 0 auto; padding: 0;"> <table width="100%" cellpadding="0" cellspacing="0" style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; border-radius: 3px; background-color: #fff; margin: 0; border: 1px solid #e9e9e9;" bgcolor="#fff"> <tr style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;"> <td style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 16px; vertical-align: top; color: #fff; font-weight: 500; text-align: center; border-radius: 3px 3px 0 0; background-color: #E6522C; margin: 0; padding: 20px;" align="center" bgcolor="#E6522C" valign="top"> {{ .Alerts | len }} alert{{ if gt (len .Alerts) 1 }}s{{ end }} for {{ range .GroupLabels.SortedPairs }} {{ .Name }}={{ .Value }} {{ end }} </td> </tr> <tr style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;"> <td style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; vertical-align: top; margin: 0; padding: 10px;" valign="top"> <table width="100%" cellpadding="0" cellspacing="0" style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;"> <tr style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;"> <td style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; vertical-align: top; margin: 0; padding: 0 0 20px;" valign="top"> <a href="{{ template "__alertmanagerURL" . }}" style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; color: #FFF; text-decoration: none; line-height: 2em; font-weight: bold; text-align: center; cursor: pointer; display: inline-block; border-radius: 5px; text-transform: capitalize; background-color: #348eda; margin: 0; border-color: #348eda; border-style: solid; border-width: 10px 20px;">View in {{ template "__alertmanager" . }}</a> </td> </tr> {{ if gt (len .Alerts.Firing) 0 }} <tr style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;"> <td style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; vertical-align: top; margin: 0; padding: 0 0 20px;" valign="top"> <strong style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;">[{{ .Alerts.Firing | len }}] Firing</strong> </td> </tr> {{ end }} {{ range .Alerts.Firing }} <tr style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;"> <td style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; vertical-align: top; margin: 0; padding: 0 0 20px;" valign="top"> <strong style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;">Labels</strong><br style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;" /> {{ range .Labels.SortedPairs }}{{ .Name }} = {{ .Value }}<br style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;" />{{ end }} {{ if gt (len .Annotations) 0 }}<strong style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;">Annotations</strong><br style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;" />{{ end }} {{ range .Annotations.SortedPairs }}{{ .Name }} = {{ .Value }}<br style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;" />{{ end }} <a href="{{ .GeneratorURL }}" style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; color: #348eda; text-decoration: underline; margin: 0;">Source</a><br style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;" /> </td> </tr> {{ end }} {{ if gt (len .Alerts.Resolved) 0 }} {{ if gt (len .Alerts.Firing) 0 }} <tr style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;"> <td style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; vertical-align: top; margin: 0; padding: 0 0 20px;" valign="top"> <br style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;" /> <hr style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;" /> <br style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;" /> </td> </tr> {{ end }} <tr style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;"> <td style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; vertical-align: top; margin: 0; padding: 0 0 20px;" valign="top"> <strong style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;">[{{ .Alerts.Resolved | len }}] Resolved</strong> </td> </tr> {{ end }} {{ range .Alerts.Resolved }} <tr style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;"> <td style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; vertical-align: top; margin: 0; padding: 0 0 20px;" valign="top"> <strong style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;">Labels</strong><br style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;" /> {{ range .Labels.SortedPairs }}{{ .Name }} = {{ .Value }}<br style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;" />{{ end }} {{ if gt (len .Annotations) 0 }}<strong style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;">Annotations</strong><br style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;" />{{ end }} {{ range .Annotations.SortedPairs }}{{ .Name }} = {{ .Value }}<br style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;" />{{ end }} <a href="{{ .GeneratorURL }}" style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; color: #348eda; text-decoration: underline; margin: 0;">Source</a><br style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;" /> </td> </tr> {{ end }} </table> </td> </tr> </table> <div style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; width: 100%; clear: both; color: #999; margin: 0; padding: 20px;"> <table width="100%" style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;"> <tr style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;"> <td style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 12px; vertical-align: top; text-align: center; color: #999; margin: 0; padding: 0 0 20px;" align="center" valign="top"><a href="{{ .ExternalURL }}" style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 12px; color: #999; text-decoration: underline; margin: 0;">Sent by {{ template "__alertmanager" . }}</a></td> </tr> </table> </div></div> </td> <td style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; vertical-align: top; margin: 0;" valign="top"></td> </tr> </table> </body> </html> {{ end }} {{ define "pushover.default.title" }}{{ template "__subject" . }}{{ end }} {{ define "pushover.default.message" }}{{ .CommonAnnotations.SortedPairs.Values | join " " }} {{ if gt (len .Alerts.Firing) 0 }} Alerts Firing: {{ template "__text_alert_list" .Alerts.Firing }} {{ end }} {{ if gt (len .Alerts.Resolved) 0 }} Alerts Resolved: {{ template "__text_alert_list" .Alerts.Resolved }} {{ end }} {{ end }} {{ define "pushover.default.url" }}{{ template "__alertmanagerURL" . }}{{ end }} {{ define "sns.default.subject" }}{{ template "__subject" . }}{{ end }} {{ define "sns.default.message" }}{{ .CommonAnnotations.SortedPairs.Values | join " " }} {{ if gt (len .Alerts.Firing) 0 }} Alerts Firing: {{ template "__text_alert_list" .Alerts.Firing }} {{ end }} {{ if gt (len .Alerts.Resolved) 0 }} Alerts Resolved: {{ template "__text_alert_list" .Alerts.Resolved }} {{ end }} {{ end }}
6.子联邦prometheus配置
下面两节,在上一章也讲解过,为了完整,这里还是重复一遍。
因为子联邦节点是监控k8s集群的,为了方便,肯定是部署在prometheus里,下面看看deploy的配置文件。
这里不讲解获取数据来源工具组件等一些授权以及安装,默认当作已安装完成的情况,需要的去查看kube-state-metrics和metrics-server章节
apiVersion: v1 kind: "Service" metadata: name: prometheus namespace: monitoring labels: name: prometheus spec: ports: - name: prometheus protocol: TCP port: 9090 targetPort: 9090 nodePort: 30946 selector: app: prometheus type: NodePort --- apiVersion: apps/v1 kind: Deployment metadata: labels: name: prometheus name: prometheus namespace: monitoring spec: replicas: 1 selector: matchLabels: app: prometheus template: metadata: labels: app: prometheus spec: serviceAccountName: prometheus containers: - name: prometheus image: prom/prometheus:v2.3.0 env: - name: ver value: "15" command: - "/bin/prometheus" args: - "--config.file=/etc/prometheus/prometheus.yml" - "--log.level=debug" ports: - containerPort: 9090 protocol: TCP volumeMounts: - mountPath: "/etc/prometheus" name: prometheus-config volumes: - name: prometheus-config configMap: name: prometheus-config
当然也不能少了RBAC的授权。
apiVersion: rbac.authorization.k8s.io/v1beta1 kind: ClusterRole metadata: name: prometheus rules: - apiGroups: [""] resources: - nodes #node发现模式的授权资源,不然通过kubelet自带的发现模式不授权这个资源,会在prometheus爆出403错误 - nodes/metrics - nodes/proxy - services - endpoints - pods - namespaces verbs: ["get", "list", "watch"] - apiGroups: - extensions resources: - ingresses verbs: ["get", "list", "watch"] - nonResourceURLs: ["/metrics","/api/*"] verbs: ["get"] --- apiVersion: v1 kind: ServiceAccount metadata: name: prometheus namespace: monitoring --- apiVersion: rbac.authorization.k8s.io/v1beta1 kind: ClusterRoleBinding metadata: name: prometheus roleRef: apiGroup: rbac.authorization.k8s.io kind: ClusterRole name: prometheus subjects: - kind: ServiceAccount name: prometheus namespace: monitoring
上面的deploy挂载了一个卷,就是配置文件,如下的配置文件,没有告警条目,没有持久化存储,因为他们不需要,只要总prometheus来向它收集数据就可以了.它也是一个prometheus,只是做的工作比较少罢了.
这个prometheus包含了以下采集任务:
- kubernetes-kubelet
- kubernetes-cadvisor
- kubernetes-pods
- kubernetes-apiservers
- kubernetes-services
- kubernetes-ingresses
- kubernetes-service-endpoints
基本涵盖了k8s 的大部分的key,所以k8s内联邦prometheus角色需要注意这么几点就可以.
下面的配置文件如下,可能(cn-lcm-prod)项目标识不太一样,这里可以忽略,改成自己对应的项目即可
apiVersion: v1 kind: ConfigMap metadata: name: prometheus-config namespace: monitoring data: prometheus.yml: | global: scrape_interval: 15s evaluation_interval: 15s scrape_configs: - job_name: 'prometheus' static_configs: - targets: ['localhost:9090'] - job_name: 'cn-lcm-prod-kubernetes-kubelet' scheme: https tls_config: ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token kubernetes_sd_configs: - role: node relabel_configs: - action: labelmap regex: __meta_kubernetes_node_label_(.+) - target_label: __address__ replacement: kubernetes.default.svc:443 - source_labels: [__meta_kubernetes_node_name] regex: (.+) target_label: __metrics_path__ replacement: /api/v1/nodes/${1}/proxy/metrics - job_name: 'cn-web-prod-kubernetes-cadvisor' scheme: https tls_config: ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token kubernetes_sd_configs: - role: node relabel_configs: - target_label: __address__ replacement: kubernetes.default.svc:443 - source_labels: [__meta_kubernetes_node_name] regex: (.+) target_label: __metrics_path__ replacement: /api/v1/nodes/${1}/proxy/metrics/cadvisor - action: labelmap regex: __meta_kubernetes_node_label_(.+) - job_name: 'cn-lcm-prod-kubernetes-pods' kubernetes_sd_configs: - role: pod relabel_configs: - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape] action: keep regex: true - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path] action: replace target_label: __metrics_path__ regex: (.+) - source_labels: [__address__, __meta_kubernetes_pod_annotation_prometheus_io_port] action: replace regex: ([^:]+)(?::\d+)?;(\d+) replacement: $1:$2 target_label: __address__ - action: labelmap regex: __meta_kubernetes_pod_label_(.+) - source_labels: [__meta_kubernetes_namespace] action: replace target_label: kubernetes_namespace - source_labels: [__meta_kubernetes_pod_name] action: replace target_label: kubernetes_pod_name - job_name: 'cn-lcm-prod-kubernetes-apiservers' kubernetes_sd_configs: - role: endpoints scheme: https tls_config: ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token relabel_configs: - source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name] action: keep regex: default;kubernetes;https - target_label: __address__ replacement: kubernetes.default.svc:443 - job_name: 'cn-lcm-prod-kubernetes-services' metrics_path: /probe params: module: [http_2xx] kubernetes_sd_configs: - role: service relabel_configs: - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_probe] action: keep regex: true - source_labels: [__address__] target_label: __param_target - target_label: __address__ replacement: blackbox-exporter.monitoring.svc.cluster.local:9115 - source_labels: [__param_target] target_label: instance - action: labelmap regex: __meta_kubernetes_service_label_(.+) - source_labels: [__meta_kubernetes_namespace] target_label: kubernetes_namespace - source_labels: [__meta_kubernetes_service_name] target_label: kubernetes_name - job_name: 'cn-lcm-prod-kubernetes-ingresses' metrics_path: /probe params: module: [http_2xx] kubernetes_sd_configs: - role: ingress relabel_configs: - source_labels: [__meta_kubernetes_ingress_annotation_prometheus_io_probe] action: keep regex: true - source_labels: [__meta_kubernetes_ingress_scheme,__address__,__meta_kubernetes_ingress_path] regex: (.+);(.+);(.+) replacement: ${1}://${2}${3} target_label: __param_target - target_label: __address__ replacement: blackbox-exporter.monitoring.svc.cluster.local:9115 - source_labels: [__param_target] target_label: instance - action: labelmap regex: __meta_kubernetes_ingress_label_(.+) - source_labels: [__meta_kubernetes_namespace] target_label: kubernetes_namespace - source_labels: [__meta_kubernetes_ingress_name] target_label: kubernetes_name - job_name: 'cn-lcm-prod-kubernetes-service-endpoints' scrape_interval: 10s scrape_timeout: 10s #这个job配置不太一样,采集时间是10秒,因为使用全局配置的15秒,会出现拉取数据闪断的情况,所以,这里单独配置成10秒 kubernetes_sd_configs: - role: endpoints relabel_configs: - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape] action: keep regex: true - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scheme] action: replace target_label: __scheme__ regex: (https?) - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_path] action: replace target_label: __metrics_path__ regex: (.+) - source_labels: [__address__, __meta_kubernetes_service_annotation_prometheus_io_port] action: replace target_label: __address__ regex: ([^:]+)(?::\d+)?;(\d+) replacement: $1:$2 - action: labelmap regex: __meta_kubernetes_service_label_(.+) - source_labels: [__meta_kubernetes_namespace] action: replace target_label: kubernetes_namespace - source_labels: [__meta_kubernetes_service_name] action: replace target_label: kubernetes_name
7.宿主机联邦及consul自动发现
其实在,主prometheus端, 联邦角色在 k8s集群内还是集群外,主prometheus并不关注,但是运维人员需要关注,因为在k8s集群内和宿主机采集方式和自动发现方式都不一样.
下面的宿主机联邦节点也配置监控进程以及端口的方式,具体的配置解释,翻阅前面相关章节的文档
Consul的使用这里不再做介绍.这里仅贴出配置。
Consul的配置过于简单,这里就不展示了,仅贴出prometheus的配置。
# my global config global: scrape_interval: 15s # Set the scrape interval to every 15 seconds. Default is every 1 minute. evaluation_interval: 15s # Evaluate rules every 15 seconds. The default is every 1 minute. scrape_configs: - job_name: 'cn-gc-consul-node' consul_sd_configs: - server: '127.0.0.1:8500' #手动填写的方式 #services: ['cn-gc-game02'] relabel_configs: - source_labels: [__meta_consul_tags] regex: .*cn-gc.* action: keep - source_labels: [__meta_consul_service_id] target_label: "hostname" - job_name: 'cn-gc-consul-process' consul_sd_configs: - server: '127.0.0.1:8500' relabel_configs: - source_labels: [__meta_consul_tags] regex: .*cn-gc.* action: keep - source_labels: [__meta_consul_service_id] target_label: "hostname" - source_labels: [__address__] regex: ((?:(?:\d|[1-9]\d|1\d\d|2[0-4]\d|25[0-5])\.){3}(?:\d|[1-9]\d|2[0-4]\d|25[0-5])):(\d{1,4}) target_label: __address__ replacement: ${1}:9256 - job_name: 'cn-gc-nginx01' metrics_path: /probe params: module: [http_2xx] static_configs: - targets: ['10.10.3.4:80/admin#/login'] #consul_sd_configs: #- server: '127.0.0.1:8500' #services: [] relabel_configs: #- source_labels: [__meta_consul_tags] #regex: .*cn-gc-port.* #action: keep - source_labels: [__address__] target_label: __param_target - target_label: __address__ replacement: 10.10.3.4:9115 - job_name: 'cn-gc-nginx02' metrics_path: /probe params: module: [http_2xx] static_configs: - targets: ['10.10.3.10:80/admin#/login'] relabel_configs: - source_labels: [__address__] target_label: __param_target - target_label: __address__ replacement: 10.10.3.10:9115 - job_name: 'port' metrics_path: /probe params: module: [tcp_connect] #static_configs: #- targets: ['10.1.1.9:12020',] consul_sd_configs: - server: '127.0.0.1:8500' services: [] relabel_configs: - source_labels: [__meta_consul_tags] regex: .*cn-gc.* action: keep - source_labels: [__meta_consul_service_id] target_label: "hostname" #- source_labels: [__address__] #target_label: __param_target - source_labels: [__address__] #regex: ((?:(?:\d|[1-9]\d|1\d\d|2[0-4]\d|25[0-5])\.){3}(?:\d|[1-9]\d|2[0-4]\d|25[0-5])):(\d{1,5}) target_label: __param_target replacement: 127.0.0.1:11000 - source_labels: [__address__] regex: ((?:(?:\d|[1-9]\d|1\d\d|2[0-4]\d|25[0-5])\.){3}(?:\d|[1-9]\d|2[0-4]\d|25[0-5])):(\d{1,5}) target_label: __address__ replacement: ${1}:9115