自动化监控file_sd_configs + consul_sd_configs

- 基于文件的服务发现


新增prometheus配置项
- job_name: 'file_sd'
  file_sd_configs:
  - files: ['/opt/monitor/prometheus/sd_config/*.yml']
  
[root@slave-2 prometheus]# kill -HUP 52720

[root@slave-2 sd_config]# vi node_exporter.yml
- targets: ['192.168.2.64:9100']

image

- 基于consul的服务发现


Consul是一个分布式的服务发现和键/值存储系统。

image

- 部署consul
[root@slave-4 ~]# docker run --name consul -d -p 8500:8500 consul

http://192.168.2.63:8500/
image

- 新增prometheus配置项
- job_name: 'consul_sd'
  consul_sd_configs:
  - server: 192.168.2.63:8500
    services: ['Linux']
- 向consul注册
[root@slave-4 ~]# curl -X PUT -d '{"id": "Linux-1","name": "Linux","address": "192.168.2.64","port": 9100,"tags": ["service"],"checks": [{"http": "http://192.168.2.64:9100","interval": "5s"}]}' http://192.168.2.63:8500/v1/agent/service/register
- 查看效果

image

- 自动监控100台服务器
1. 准备采集器资源
[root@slave-2 ansible]# ll node_exporter-1.3.1.linux-amd64.tar.gz
-rw-r--r--. 1 root root 9033415 2月  28 02:59 node_exporter-1.3.1.linux-amd64.tar.gz
[root@slave-2 ansible]# cat node_exporter.service
[Service]
ExecStart=/usr/local/node_exporter/node_exporter --web.config=/usr/local/node_exporter/config.yml
ExecReload=/bin/kill -HUP $MAINPID
KillMode=process
Restart=on-failure
[Install]
WantedBy=multi-user.target
[root@slave-2 ansible]# cat config.yml
basic_auth_users:
  prometheus: $2y$12$9e6WXMrIA.lgfKfgTuo8eO9l30MAO7y.LsUXb17Nrp/q2Y5M5sq2K
2. 准备注册器脚本
[root@slave-2 ansible]# cat consul-register.sh
#!/bin/bash
service_name=$1
instance_id=$2
ip=$3
port=$4

curl -X PUT -d '{"id": "'"$instance_id"'","name": "'"$service_name"'","address": "'"$ip"'","port": '"$port"',"tags": ["'"$service_name"'"],"checks": [{"http": "http://'"$ip"':'"$port"'","interval": "5s"}]}' http://192.168.2.63:8500/v1/agent/service/register
3. 准备hosts文件
[root@slave-2 ansible]# cat hosts
[webservers]
192.168.2.60 name=web1

[dbservers]
192.168.2.61 name=db1
4. 准备playbook并执行
[root@slave-2 ansible]# cat playbook.yaml
- hosts: webservers
  gather_facts: no
  vars:
    exporter_port: 9100
  tasks:
  - name: 推送二进制文件
    unarchive: src=node_exporter-1.3.1.linux-amd64.tar.gz dest=/usr/local
  - name: 重命名
    shell: |
         cd /usr/local
         if [ ! -d node_exporter ];then
            mv node_exporter-1.3.1.linux-amd64 node_exporter
         fi
  - name: 推送配置文件
    copy: src=config.yml dest=/usr/local/node_exporter
  - name: 拷贝systemd文件
    copy: src=node_exporter.service dest=/usr/lib/systemd/system
  - name: 启动服务
    systemd: name=node_exporter state=started enabled=yes daemon_reload=yes
  - name: 推送注册脚本
    copy: src=consul-register.sh dest=/usr/local/bin/
  - name: 注册当前节点
    # 服务名 实例名 IP 端口
    shell: /bin/bash /usr/local/bin/consul-register.sh {{ group_names[0] }} {{ name }} {{ inventory_hostname }} {{ exporter_port }}

[root@slave-2 ansible]# ansible-playbook -i hosts playbook.yaml -uroot -k
SSH password: 
5. 查看consul注册情况

image

6. 新增prometheus配置项
- job_name: 'webservers'
   basic_auth:
    username: prometheus
    password: 123456
   consul_sd_configs:
   - server: 192.168.2.63:8500
     services: ['webservers']
- job_name: 'dbservers'
  basic_auth:
   username: prometheus
   password: 123456
  consul_sd_configs:
  - server: 192.168.2.63:8500
    services: ['dbservers']
7. 查看监控效果

image

image

posted @ 2022-02-28 01:06  曾某某scau  阅读(580)  评论(0编辑  收藏  举报