Prometheus

安装

tar -xf prometheus-2.37.1.linux-amd64.tar.gz -C /usr/local/src/
ln -sv /usr/local/src/prometheus-2.37.1.linux-amd64/ /usr/local/prometheus
cat > /etc/systemd/system/prometheus.service <<EOF
[Unit]
Description=Prometheus Server
Documentation=https://prometheus.io/docs/introduction/overview/
After=network.target
[Service]
Restart=on-failure
WorkingDirectory=/usr/local/prometheus/
ExecStart=/usr/local/prometheus/prometheus --config.file=/usr/local/prometheus/prometheus.yml --web.enable-lifecycle
[Install]
WantedBy=multi-user.target
EOF
systemctl daemon-reload && systemctl restart prometheus && systemctl enable prometheus

验证prometheus web界⾯:http://192.168.88.20:9090/

收集服务器和客户端的node-exporter指标数据

客户端:

[root@test-20 src]# tar -xf node_exporter-1.4.0.linux-amd64.tar.gz 
[root@test-20 src]# ln /usr/local/src/node_exporter-1.4.0.linux-amd64 -sv /usr/local/node_exporter
创建node-exporter service启动⽂件:
cat > /etc/systemd/system/node-exporter.service <<EOF
[Unit]
Description=Prometheus Node Exporter
After=network.target
[Service]
ExecStart=/usr/local/node_exporter/node_exporter
[Install]
WantedBy=multi-user.target
EOF
systemctl daemon-reload && systemctl restart node-exporter && systemctl enable node-exporter.service

服务器:

[root@test-20 src]# tar -xf node_exporter-1.4.0.linux-amd64.tar.gz 
[root@test-20 src]# ln /usr/local/src/node_exporter-1.4.0.linux-amd64 -sv /usr/local/node_exporter
cat > /etc/systemd/system/node-exporter.service <<EOF
[Unit]
Description=Prometheus Node Exporter
After=network.target
[Service]
ExecStart=/usr/local/node_exporter/node_exporter
[Install]
WantedBy=multi-user.target
EOF
systemctl daemon-reload && systemctl restart node-exporter && systemctl enable node-exporter.service
[root@test-20 src]# vim /usr/local/prometheus/prometheus.yml 
# my global config
global:
  scrape_interval: 15s # Set the scrape interval to every 15 seconds. Default is every 1 minute.
  evaluation_interval: 15s # Evaluate rules every 15 seconds. The default is every 1 minute.
  # scrape_timeout is set to the global default (10s).

# Alertmanager configuration
alerting:
  alertmanagers:
    - static_configs:
        - targets:
          # - alertmanager:9093

# Load rules once and periodically evaluate them according to the global 'evaluation_interval'.
rule_files:
  # - "first_rules.yml"
  # - "second_rules.yml"

# A scrape configuration containing exactly one endpoint to scrape:
# Here it's Prometheus itself.
scrape_configs:
  # The job name is added as a label `job=<job_name>` to any timeseries scraped from this config.
  - job_name: "prometheus"

    # metrics_path defaults to '/metrics'
    # scheme defaults to 'http'.

    static_configs:
      - targets: ["192.168.88.20:9090"]
  - job_name: "node-exporter"
    static_configs:
      - targets: ["192.168.88.20:9100","192.168.88.10:9100"]

动态(热)加载配置:

curl -X POST http://192.168.88.20:9090/-/reload

Grafana部署

安装Grafana Server:
yum localinstall -y grafana-9.2.0-1.x86_64.rpm && systemctl restart grafana-server.service && systemctl enable grafana-server.service
验证grafana web界⾯:http://192.168.88.20:3000
初始账号密码:admin : admin
添加prometheus数据源:

复制ID

 

 

file_sd_configs

[root@test-20 ~]# vim  /usr/local/prometheus/prometheus.yml
  - job_name: "node-exporter"
    file_sd_configs:
      - files:
        - /usr/local/prometheus/file_sd/*.yml
        refresh_interval: 5s
[root@test-20 ~]# vim /usr/local/prometheus/file_sd/target.yml
- targets: ["192.168.88.20:9100","192.168.88.10:9100"]

配置 prometheus 到 consul 发现服务

在三台机器分别安装consul 

unzip consul_1.13.2_linux_amd64.zip
cp consul /usr/local/bin/
mkdir /data/consul/ -p

启动consul

服务器:

nohup consul agent -server -bootstrap -bind=192.168.88.20 -client=192.168.88.20 -data-dir=/data/consul -ui -node=192.168.88.20 &

客户端:

nohup consul agent -server -bind=192.168.88.10 -client=192.168.88.10 -data-dir=/data/consul -ui -node=192.168.88.10 -join=192.168.88.20 &
nohup consul agent -server -bind=192.168.88.30 -client=192.168.88.30 -data-dir=/data/consul -ui -node=192.168.88.30 -join=192.168.88.20 &
prometheus 配置文件:
[root@test-20 ~]# vim /usr/local/prometheus/prometheus.yml
scrape_configs:
  - job_name: "prometheus"

    # metrics_path defaults to '/metrics'
    # scheme defaults to 'http'.

    static_configs:
      - targets: ["192.168.88.20:9090"]
#  - job_name: "node-exporter"
#    file_sd_configs:
#      - files:
#        - /usr/local/prometheus/file_sd/*.yml
#        refresh_interval: 5s
  - job_name: "consul"
    consul_sd_configs:
      - server: 192.168.88.20:8500
        services: []
      - server: 192.168.88.10:8500
        services: []
      - server: 192.168.88.30:8500
        services: []
    relabel_configs:
      - source_labels: ['__meta_consul_tags']
        target_label: 'product'
      - source_labels: ['__meta_consul_dc']
        target_label: 'idc'
      - source_labels: ['__meta_consul_service']
        regex: "consul"
        action: drop
consul 服务注册与删除:
注册
curl -X PUT -d '{"id": "test-20","name": "test-20","address": "192.168.88.20","port":9100,"tags": ["test-20"],"checks": [{"http": "http://192.168.88.20:9100/","interval": "5s"}]}' http://192.168.88.20:8500/v1/agent/service/register
curl -X PUT -d '{"id": "test-10","name": "test-10","address": "192.168.88.10","port":9100,"tags": ["test-10"],"checks": [{"http": "http://192.168.88.10:9100/","interval": "5s"}]}' http://192.168.88.20:8500/v1/agent/service/register
curl -X PUT -d '{"id": "test-30","name": "test-30","address": "192.168.88.30","port":9100,"tags": ["test-30"],"checks": [{"http": "http://192.168.88.30:9100/","interval": "5s"}]}' http://192.168.88.20:8500/v1/agent/service/register

删除

curl --request PUT http://172.31.2.181:8500/v1/agent/service/deregister/node-exporter183

查看结果

 

Alertmanager

部署Alertmanager
tar -xf alertmanager-0.24.0.linux-amd64.tar.gz
ln -sv /usr/local/src/alertmanager-0.24.0.linux-amd64 /usr/local/alertmanager
cat > /etc/systemd/system/alertmanager.service <<EOF
[Unit]
Description=Prometheus alertmanager
After=network.target
[Service]
ExecStart=/usr/local/alertmanager/alertmanager --config.file=/usr/local/alertmanager/alertmanager.yml
[Install]
WantedBy=multi-user.target
EOF
Alertmanager基础配置
[root@test-20 src]# vim /usr/local/alertmanager/alertmanager.yml 
global:
  resolve_timeout: 5m
  smtp_smarthost: 'smtp.qq.com:465'
  smtp_from: '2228360744@qq.com'
  smtp_auth_username: '2228360744@qq.com'
  smtp_auth_password: 'ybqtwtrdjnrfeaha'
  smtp_hello: '@qq.com'
  smtp_require_tls: false
route:
  receiver: email-receiver
receivers:
  - name: 'email-receiver'
    email_configs:
    - to: '2228360744@qq.com'
[root@test-20 src]# vim /usr/local/prometheus/prometheus.yml
alerting: 
  alertmanagers:
    - static_configs:
        - targets:
          - "192.168.88.20: 9093"
systemctl daemon-reload && systemctl restart alertmanager && systemctl enable alertmanager
systemctl restart prometheus.service

配置Prometheus加载告警规则文件

mkdir /usr/local/prometheus/rules

若某个Instance的up指标的值转为0持续超过1分钟后,将触发告警

[root@test-20 src]# vim /usr/local/prometheus/rules/rule.yml
groups:
- name: AllInstances
  rules:
  - alert: InstanceDown
  #Condition for alerting
    expr: up == 0
    for: 1m
    # Annotation - additional informational labels to store more information
    annotations:
      title: 'Instance down'
      description: Instance has been down for more than 1 minute.'
    # Labels - additional labels to be attached to the alert
    labels:
      severity: 'critical'
[root@test-20 src]# vim /usr/local/prometheus/prometheus.yml
rule_files:
  - "rules/*.yml"
  # - "second_rules.yml" 
systemctl restart prometheus.service

测试:

[root@test-30 ~]# systemctl stop node-exporter.service

 

Mysql exporter 

准备好mysql数据库

GRANT PROCESS, REPLICATION CLIENT, SELECT ON *.* TO  'exporter'@'%' identified by '123456';
flush privileges;
[root@test-30 src]# vim /etc/my.cnf
[client]
host=127.0.0.1
user=exporter
password=123456
/etc/init.d/mysqld restart

安装mysqld_exporter

tar -xf mysqld_exporter-0.14.0.linux-amd64.tar.gz
ln -sv /usr/local/src/mysqld_exporter-0.14.0.linux-amd64 /usr/local/mysqld_exporter
cat > /usr/lib/systemd/system/mysqld_exporter.service <<EOF

[Unit]
Description=node_exporter
Documentation=https://prometheus.io/
After=network.target
[Service]
Type=simple
ExecStart=/usr/local/mysqld_exporter/mysqld_exporter --config.my-cnf=/etc/my.cnf
Restart=on-failure
[Install]
WantedBy=multi-user.target
EOF
systemctl daemon-reload && systemctl restart mysqld_exporter.service && systemctl enable mysqld_exporter.service

配置服务器:

[root@test-20 src]# vim /usr/local/prometheus/prometheus.yml
  - job_name: "mysqld_exporter"
    static_configs:
      - targets: ["192.168.88.30:9104"]
systemctl restart prometheus.service

结果:

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

posted @ 2022-10-17 19:43  kivtx  阅读(65)  评论(0)    收藏  举报