docker-compose部署prometheus+grafana+alertmanager

cat docker-compose.yml如下

version: '2'
services:
  # 添加 普罗米修斯服务
  prometheus:
    # Docker Hub 镜像
    image: prom/prometheus:latest
    # 容器名称
    container_name: Myprometheus
    # 容器内部 hostname
    hostname: prometheus
    # 容器支持自启动
    restart: always
    # 容器与宿主机 端口映射
    ports:
      - '9090:9090'
    # 将宿主机中的config文件夹,挂载到容器中/config文件夹
    volumes:
      - './prometheus/config:/config'
      - './prometheus/data/prometheus:/prometheus/data'
    # 指定容器中的配置文件
    command:
      - '--config.file=/config/prometheus.yml'
      # 支持热更新
      - '--web.enable-lifecycle'

  # 添加告警模块
  alertmanager:
    image: prom/alertmanager:latest
    container_name: Myalertmanager
    hostname: alertmanager
    restart: always
    ports:
      - '9093:9093'
    volumes:
      - './prometheus/config:/config'
      - './prometheus/data/alertmanager:/alertmanager/data'
    command:
      - '--config.file=/config/alertmanager.yml'

  # 添加监控可视化面板
  grafana:
    image: docker.xxxxx.com/library/grafana:v20191225
    container_name: Mygrafana
    hostname: grafana
    restart: always
    ports:
      - '3000:3000'
    volumes:
      # 配置grafana 邮件服务器
      - './grafana/config/grafana.ini:/etc/grafana/grafana.ini'
      - './grafana/data/grafana:/var/lib/grafana'

prometheus.yml配置文件如下

# my global config
global:
  scrape_interval:     15s # Set the scrape interval to every 15 seconds. Default is every 1 minute.
  evaluation_interval: 15s # Evaluate rules every 15 seconds. The default is every 1 minute.
  # scrape_timeout is set to the global default (10s).

# Alertmanager configuration
alerting:
  alertmanagers:
  - static_configs:
    - targets:
      # - alertmanager:9093

# Load rules once and periodically evaluate them according to the global 'evaluation_interval'.
rule_files:
  # - "first_rules.yml"
  # - "second_rules.yml"

# A scrape configuration containing exactly one endpoint to scrape:
# Here it's Prometheus itself.
scrape_configs:
  # The job name is added as a label `job=<job_name>` to any timeseries scraped from this config.
 

- job_name: 'promethus'
  static_configs:
     - targets: ['192.168.1.100:9090']
       labels:
        instance: 'Monitor-Service-01'
        platform: 'master'

- job_name: 'system'
  static_configs:
     - targets: ['192.168.1.101:9100']
       labels:
        instance: 'Monitor Service-01'
        platform: 'worker'

- job_name: 'Server'
  static_configs:
     - targets: ['192.168.2.100:9100']
       labels:
        instance: 'Service-web01'
        platform: 'esc01'
     - targets: ['192.168.2.101:9101']
       labels:
        instance: 'Service-db01'
        platform: 'esc01'
     - targets: ['192.168.2.102:9102']
       labels:
        instance: 'Service-db02'
        platform: 'esc01'
     - targets: ['192.168.2.103:9103']
       labels:
        instance: 'Service-dbpool'
        platform: 'esc01'

 alertmanager.yml配置信息如下

global:
  resolve_timeout: 1m
  # The smarthost and SMTP sender used for mail notifications.
  #smtp_smarthost: ''
  #smtp_from: ''
  #smtp_auth_username: ''
  #smtp_auth_password: ''

route:
  receiver: 'default-receiver'
    # The labels by which incoming alerts are grouped together. For example,
  # multiple alerts coming in for cluster=A and alertname=LatencyHigh would
  # be batched into a single group.
  #group_by: ['alertname']

  # When a new group of alerts is created by an incoming alert, wait at
  # least 'group_wait' to send the initial notification.
  # This way ensures that you get multiple alerts for the same group that start
  # firing shortly after another are batched together on the first
  # notification.
  # group_wait: 5s

  # When the first notification was sent, wait 'group_interval' to send a batch
  # of new alerts that started firing for that group.
  # group_interval: 30s

  # If an alert has successfully been sent, wait 'repeat_interval' to
  # resend them.
  repeat_interval: 1m
receivers:
  - name: 'default-receiver'

  

posted @ 2020-01-07 18:00  也曾少年  阅读(2086)  评论(0编辑  收藏  举报