me小怪兽

导航

< 2025年3月 >
23 24 25 26 27 28 1
2 3 4 5 6 7 8
9 10 11 12 13 14 15
16 17 18 19 20 21 22
23 24 25 26 27 28 29
30 31 1 2 3 4 5

统计

blackbox_exporter安装及监控

本文主要介绍如何使用blackbox_exporter的收集被监控主机的网站状态、端口等信息,借助 Prometheus 最终以仪表盘的形式显示在 Grafana 中。
blackbox_exporter是Prometheus 官方提供的 exporter 之一,可以提供 http、dns、tcp、icmp 的监控数据采集。
2.blackbox_exporter 应用场景
HTTP 测试
定义 Request Header 信息
判断 Http status / Http Respones Header / Http Body 内容
TCP 测试
业务组件端口状态监听
应用层协议定义与监听
ICMP 测试
主机探活机制
POST 测试
接口联通性
SSL 证书过期时间
3. 安装blackbox_exporter
3.1 各个版本的blackbox_exporter如下:
# wget https://github.com/prometheus/blackbox_exporter/releases/download/v0.15.1/blackbox_exporter-0.15.1.linux-amd64.tar.gz
# tar -xvf blackbox_exporter-0.15.1.linux-amd64.tar.gz
# mv blackbox_exporter-0.15.1.linux-amd64 /usr/local/blackbox_exporter
3.3 创建systemd服务
vim /lib/systemd/system/blackbox_exporter.service

[Unit]
Description=blackbox_exporter
After=network.target
[Service]
ExecStart=/usr/local/blackbox_exporter/blackbox_exporter --config.file=/usr/local/blackbox_exporter/blackbox.yml
Restart=on-failure
[Install]
WantedBy=multi-user.target

# systemctl daemon-reload
# systemctl start blackbox_exporter  && systemctl enable blackbox_exporter

3.5 验证是否启动成功 默认监听端口为9115
# systemctl status blackbox_exporter
# netstat -lnpt|grep 9115


在prometheus.yml中加入blackbox_exporter

[root@prometheus prometheus]# cat prometheus.yml

复制代码
# my global config
global:
  scrape_interval:     15s # Set the scrape interval to every 15 seconds. Default is every 1 minute.
  evaluation_interval: 15s # Evaluate rules every 15 seconds. The default is every 1 minute.
  # scrape_timeout is set to the global default (10s).

# Alertmanager configuration
alerting:
  alertmanagers:
  - static_configs:
    - targets:
       - 172.16.1.12:20016 #告警节点的地址和端口

# Load rules once and periodically evaluate them according to the global 'evaluation_interval'.
rule_files:
  # - "first_rules.yml"
  # - "second_rules.yml"
  - "rules/*.yml"  #开启配置监控模板,适用于所有主机,需要自定义,注意目录mkdir prometheus/rules

# A scrape configuration containing exactly one endpoint to scrape:
# Here it's Prometheus itself.
scrape_configs:
  # The job name is added as a label `job=<job_name>` to any timeseries scraped from this config.
  - job_name: 'prometheus'

    # metrics_path defaults to '/metrics'
    # scheme defaults to 'http'.

    static_configs:
    - targets: ['0.0.0.0:9090']
- job_name: 'MySQL'
    static_configs:
      - targets: ['172.16.1.3:9104']
      - targets: ['172.16.1.4:9104']
  - job_name: 'Redis'
    static_configs:
      - targets: ['172.16.1.12:9121']
      - targets: ['172.16.1.13:9121']
  - job_name: 'pika'
    static_configs:
      - targets: ['172.16.1.15:9121']
  - job_name: 'elasticsearch'
    static_configs:
      - targets: ['172.16.1.12:9114']
  - job_name: 'PostgreSQL'
    static_configs:
      - targets: ['172.16.1.12:9187']
      - targets: ['172.16.1.3:9187']
  - job_name: 'Node'
    static_configs:
      - targets: ['172.16.1.2:20015','172.16.1.3:20015','172.16.1.4:20015','172.16.1.5:20015','172.16.1.6:20015','172.16.1.7:20015','172.16.1.8:20015','172.16.1.9:20015','172.16.1.10:20015','172.16.1.12:20015']
  - job_name: 'Nginx'
    static_configs:
      - targets: ['172.16.1.12:9113']

  - job_name: "Port_status"
    metrics_path: /probe
    params:
      module: [tcp_connect]
    file_sd_configs:
    - files:
      - "/usr/local/prometheus/file_sd/port.yml"
    relabel_configs:
      - source_labels: [__address__]
        target_label: __param_target
      - source_labels: [__param_target]
        target_label: instance
      - target_label: __address__
        replacement: 172.16.1.12:9115


  - job_name: http-status
    metrics_path: /probe
    params: 
      module: [http_2xx]
    static_configs:
    - targets:
      - https://mar.abk.com
      - https://ip.abk.com
      labels:   #自定义标签,附加在target上
        group: web
    relabel_configs:
      - source_labels: [__address__]
        target_label: __param_target
      - source_labels: [__param_target]
        target_label: instance
      - target_label: __address__
        replacement: 172.16.1.12:9115


  - job_name: 'blackbox_check_hosts'
    metrics_path: /probe
    params:
      module: [icmp]
    static_configs:
    - targets:
      - 172.16.1.2
      - 172.16.1.3
      - 172.16.1.4
      - 172.16.1.5
      - 172.16.1.6
      - 172.16.1.7
      - 172.16.1.8
      - 172.16.1.9
      - 172.16.1.10
      labels:
        group: icmp
    relabel_configs:
      - source_labels: [__address__]
        target_label: __param_target
      - source_labels: [__param_target]
        target_label: instance
      - target_label: __address__
        replacement: 172.16.1.12:9115

  - job_name: 'pushgateway'
    honor_labels: true
    static_configs:
      - targets: ['172.16.1.12:20018']
        labels:
          instance: pushgateway
复制代码

[root@prometheus prometheus]# cat rules/blackbox.yml

复制代码
groups:

- name: BlackboxExporter

  rules:

#    - alert: Blackbox探测失败
#      expr: 'probe_success == 0'
#      for: 0m
#      labels:
#        severity: 严重
#      annotations:
#        summary: Blackbox 探测失败 (instance {{ $labels.instance }})
#        description: "探测失败\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"

    - alert: Blackbox配置重新加载失败
      expr: 'blackbox_exporter_config_last_reload_successful != 1'
      for: 0m
      labels:
        severity: 警告
      annotations:
        summary: Blackbox 配置重新加载失败 (instance {{ $labels.instance }})
        description: "Blackbox配置重新加载失败\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"

    - alert: Blackbox探测时间慢
      expr: 'avg_over_time(probe_duration_seconds[1m]) > 5'
      for: 1m
      labels:
        severity: 警告
      annotations:
        summary: Blackbox 探测时间慢 (instance {{ $labels.instance }})
        description: "Blackbox探测花了5秒钟以上的时间完成\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"

    - alert: Blackbox探测HTTP失败
      expr: 'probe_http_status_code <= 199 OR probe_http_status_code >= 400'
      for: 0m
      labels:
        severity: 严重
      annotations:
        summary: Blackbox 探测HTTP失败 (instance {{ $labels.instance }})
        description: "HTTP状态代码不是200-399\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"

    - alert: Blackbox SSL证书于30天内过期
      expr: '3 <= round((last_over_time(probe_ssl_earliest_cert_expiry[10m]) - time()) / 86400, 0.1) < 30'
      for: 0m
      labels:
        severity: 警告
      annotations:
        summary: Blackbox SSL证书于30天内过期 (instance {{ $labels.instance }})
        description: "SSL证书将在30天内过期\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"

    - alert: Blackbox SSL证书于3天内过期
      expr: '0 <= round((last_over_time(probe_ssl_earliest_cert_expiry[10m]) - time()) / 86400, 0.1) < 3'
      for: 0m
      labels:
        severity: 严重
      annotations:
        summary: Blackbox SSL证书于3天内过期 (instance {{ $labels.instance }})
        description: "SSL证书将在3天内过期\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"

    - alert: BlackboxSSL证书已过期
      expr: 'round((last_over_time(probe_ssl_earliest_cert_expiry[10m]) - time()) / 86400, 0.1) < 0'
      for: 0m
      labels:
        severity: 严重
      annotations:
        summary: Blackbox SSL证书已过期 (instance {{ $labels.instance }})
        description: "SSL证书已经过期\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"

    - alert: Blackbox探测慢速HTTP
      expr: 'avg_over_time(probe_http_duration_seconds[1m]) > 5'
      for: 1m
      labels:
        severity: 警告
      annotations:
        summary: Blackbox 探测慢速HTTP (instance {{ $labels.instance }})
        description: "HTTP请求花费了超过5s\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"

    - alert: Blackbox探测慢速ping
      expr: 'avg_over_time(probe_icmp_duration_seconds[1m]) > 5'
      for: 1m
      labels:
        severity: 警告
      annotations:
        summary: Blackbox 探测慢速ping (instance {{ $labels.instance }})
        description: "Blackbox ping耗时超过5s\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"

复制代码
复制代码
[root@prometheus prometheus]#  cat prometheus/file_sd/port.yml 
#- targets:
#  - 172.16.1.2:20015
#  - 172.16.1.3:20015
#  - 172.16.1.4:20015
#  - 172.16.1.5:20015
#  - 172.16.1.6:20015
#  - 172.16.1.7:20015
#  - 172.16.1.8:20015
#  - 172.16.1.9:20015
#  - 172.16.1.10:20015
#  labels:
##    group: 自定义
##    tag: 自定义
#    group: 生产平台
#    tag: node_exporter
- targets:
  - 172.16.1.2:22
  - 172.16.1.3:22
  - 172.16.1.4:22
  - 172.16.1.5:22
  - 172.16.1.6:22
  - 172.16.1.7:22
  - 172.16.1.8:22
  - 172.16.1.9:22
  - 172.16.1.10:22
  labels:
    group: 生产平台 
    tag: sshd
- targets:
  - 172.16.1.3:3306
  labels:
    group: 生产平台
    tag: mysql_master
- targets:
  - 172.16.1.4:3306
  labels:
    group: 生产平台
    tag: mysql_slave
- targets:
  - 172.16.1.8:6379
  labels:
    group: 生产平台
    tag: redis
- targets:
  - 172.16.1.2:15432
  labels:
    group: 生产平台
    tag: pgsql
- targets:
  - 172.16.1.3:9201
  labels:
    group: 生产平台
    tag: es_1
- targets:
  - 172.16.1.4:9201
  labels:
    group: 生产平台
    tag: es_2
- targets:
  - 172.16.1.5:9201
  labels:
    group: 生产平台
    tag: es_3
- targets:
  - 172.16.1.2:8888
  labels:
    group: 生产平台
    tag: oss
- targets:
  - 172.17.1.12:443
  labels:
    group: 生产平台
    tag: nginx
复制代码

访问blackbox

 

访问promethues

 访问grafana

导入模版编号16292

 

posted on   me小怪兽  阅读(2001)  评论(0编辑  收藏  举报

相关博文:
阅读排行:
· 分享4款.NET开源、免费、实用的商城系统
· 全程不用写代码,我用AI程序员写了一个飞机大战
· MongoDB 8.0这个新功能碉堡了,比商业数据库还牛
· 记一次.NET内存居高不下排查解决与启示
· 白话解读 Dapr 1.15:你的「微服务管家」又秀新绝活了
点击右上角即可分享
微信分享提示