docker-compose部署prometheus+grafana+alertmanager
cat docker-compose.yml如下
version: '2' services: # 添加 普罗米修斯服务 prometheus: # Docker Hub 镜像 image: prom/prometheus:latest # 容器名称 container_name: Myprometheus # 容器内部 hostname hostname: prometheus # 容器支持自启动 restart: always # 容器与宿主机 端口映射 ports: - '9090:9090' # 将宿主机中的config文件夹,挂载到容器中/config文件夹 volumes: - './prometheus/config:/config' - './prometheus/data/prometheus:/prometheus/data' # 指定容器中的配置文件 command: - '--config.file=/config/prometheus.yml' # 支持热更新 - '--web.enable-lifecycle' # 添加告警模块 alertmanager: image: prom/alertmanager:latest container_name: Myalertmanager hostname: alertmanager restart: always ports: - '9093:9093' volumes: - './prometheus/config:/config' - './prometheus/data/alertmanager:/alertmanager/data' command: - '--config.file=/config/alertmanager.yml' # 添加监控可视化面板 grafana: image: docker.xxxxx.com/library/grafana:v20191225 container_name: Mygrafana hostname: grafana restart: always ports: - '3000:3000' volumes: # 配置grafana 邮件服务器 - './grafana/config/grafana.ini:/etc/grafana/grafana.ini' - './grafana/data/grafana:/var/lib/grafana'
prometheus.yml配置文件如下
# my global config global: scrape_interval: 15s # Set the scrape interval to every 15 seconds. Default is every 1 minute. evaluation_interval: 15s # Evaluate rules every 15 seconds. The default is every 1 minute. # scrape_timeout is set to the global default (10s). # Alertmanager configuration alerting: alertmanagers: - static_configs: - targets: # - alertmanager:9093 # Load rules once and periodically evaluate them according to the global 'evaluation_interval'. rule_files: # - "first_rules.yml" # - "second_rules.yml" # A scrape configuration containing exactly one endpoint to scrape: # Here it's Prometheus itself. scrape_configs: # The job name is added as a label `job=<job_name>` to any timeseries scraped from this config. - job_name: 'promethus' static_configs: - targets: ['192.168.1.100:9090'] labels: instance: 'Monitor-Service-01' platform: 'master' - job_name: 'system' static_configs: - targets: ['192.168.1.101:9100'] labels: instance: 'Monitor Service-01' platform: 'worker' - job_name: 'Server' static_configs: - targets: ['192.168.2.100:9100'] labels: instance: 'Service-web01' platform: 'esc01' - targets: ['192.168.2.101:9101'] labels: instance: 'Service-db01' platform: 'esc01' - targets: ['192.168.2.102:9102'] labels: instance: 'Service-db02' platform: 'esc01' - targets: ['192.168.2.103:9103'] labels: instance: 'Service-dbpool' platform: 'esc01'
alertmanager.yml配置信息如下
global: resolve_timeout: 1m # The smarthost and SMTP sender used for mail notifications. #smtp_smarthost: '' #smtp_from: '' #smtp_auth_username: '' #smtp_auth_password: '' route: receiver: 'default-receiver' # The labels by which incoming alerts are grouped together. For example, # multiple alerts coming in for cluster=A and alertname=LatencyHigh would # be batched into a single group. #group_by: ['alertname'] # When a new group of alerts is created by an incoming alert, wait at # least 'group_wait' to send the initial notification. # This way ensures that you get multiple alerts for the same group that start # firing shortly after another are batched together on the first # notification. # group_wait: 5s # When the first notification was sent, wait 'group_interval' to send a batch # of new alerts that started firing for that group. # group_interval: 30s # If an alert has successfully been sent, wait 'repeat_interval' to # resend them. repeat_interval: 1m receivers: - name: 'default-receiver'