Prometheus

1|0安装

tar -xf prometheus-2.37.1.linux-amd64.tar.gz -C /usr/local/src/
ln -sv /usr/local/src/prometheus-2.37.1.linux-amd64/ /usr/local/prometheus
cat > /etc/systemd/system/prometheus.service <<EOF [Unit] Description=Prometheus Server Documentation=https://prometheus.io/docs/introduction/overview/ After=network.target [Service] Restart=on-failure WorkingDirectory=/usr/local/prometheus/ ExecStart=/usr/local/prometheus/prometheus --config.file=/usr/local/prometheus/prometheus.yml --web.enable-lifecycle
[Install] WantedBy=multi-user.target EOF
systemctl daemon-reload && systemctl restart prometheus && systemctl enable prometheus

验证prometheus web界⾯:http://192.168.88.20:9090/

2|0收集服务器和客户端的node-exporter指标数据

客户端:

[root@test-20 src]# tar -xf node_exporter-1.4.0.linux-amd64.tar.gz [root@test-20 src]# ln /usr/local/src/node_exporter-1.4.0.linux-amd64 -sv /usr/local/node_exporter
创建node-exporter service启动⽂件:
cat > /etc/systemd/system/node-exporter.service <<EOF [Unit] Description=Prometheus Node Exporter After=network.target [Service] ExecStart=/usr/local/node_exporter/node_exporter [Install] WantedBy=multi-user.target EOF
systemctl daemon-reload && systemctl restart node-exporter && systemctl enable node-exporter.service

服务器:

[root@test-20 src]# tar -xf node_exporter-1.4.0.linux-amd64.tar.gz [root@test-20 src]# ln /usr/local/src/node_exporter-1.4.0.linux-amd64 -sv /usr/local/node_exporter
cat > /etc/systemd/system/node-exporter.service <<EOF [Unit] Description=Prometheus Node Exporter After=network.target [Service] ExecStart=/usr/local/node_exporter/node_exporter [Install] WantedBy=multi-user.target EOF
systemctl daemon-reload && systemctl restart node-exporter && systemctl enable node-exporter.service
[root@test-20 src]# vim /usr/local/prometheus/prometheus.yml # my global config global: scrape_interval: 15s # Set the scrape interval to every 15 seconds. Default is every 1 minute. evaluation_interval: 15s # Evaluate rules every 15 seconds. The default is every 1 minute. # scrape_timeout is set to the global default (10s). # Alertmanager configuration alerting: alertmanagers: - static_configs: - targets: # - alertmanager:9093 # Load rules once and periodically evaluate them according to the global 'evaluation_interval'. rule_files: # - "first_rules.yml" # - "second_rules.yml" # A scrape configuration containing exactly one endpoint to scrape: # Here it's Prometheus itself. scrape_configs: # The job name is added as a label `job=<job_name>` to any timeseries scraped from this config. - job_name: "prometheus" # metrics_path defaults to '/metrics' # scheme defaults to 'http'. static_configs: - targets: ["192.168.88.20:9090"] - job_name: "node-exporter" static_configs: - targets: ["192.168.88.20:9100","192.168.88.10:9100"]

3|0动态(热)加载配置:

curl -X POST http://192.168.88.20:9090/-/reload

4|0Grafana部署

安装Grafana Server:
yum localinstall -y grafana-9.2.0-1.x86_64.rpm && systemctl restart grafana-server.service && systemctl enable grafana-server.service
验证grafana web界⾯:http://192.168.88.20:3000
初始账号密码:admin : admin
添加prometheus数据源:

复制ID

 

 

5|0file_sd_configs

[root@test-20 ~]# vim /usr/local/prometheus/prometheus.yml - job_name: "node-exporter" file_sd_configs: - files: - /usr/local/prometheus/file_sd/*.yml refresh_interval: 5s
[root@test-20 ~]# vim /usr/local/prometheus/file_sd/target.yml - targets: ["192.168.88.20:9100","192.168.88.10:9100"]

6|0配置 prometheus 到 consul 发现服务

在三台机器分别安装consul 

unzip consul_1.13.2_linux_amd64.zip
cp consul /usr/local/bin/
mkdir /data/consul/ -p

启动consul

服务器:

nohup consul agent -server -bootstrap -bind=192.168.88.20 -client=192.168.88.20 -data-dir=/data/consul -ui -node=192.168.88.20 &

客户端:

nohup consul agent -server -bind=192.168.88.10 -client=192.168.88.10 -data-dir=/data/consul -ui -node=192.168.88.10 -join=192.168.88.20 &
nohup consul agent -server -bind=192.168.88.30 -client=192.168.88.30 -data-dir=/data/consul -ui -node=192.168.88.30 -join=192.168.88.20 &
prometheus 配置文件:
[root@test-20 ~]# vim /usr/local/prometheus/prometheus.yml scrape_configs: - job_name: "prometheus" # metrics_path defaults to '/metrics' # scheme defaults to 'http'. static_configs: - targets: ["192.168.88.20:9090"] # - job_name: "node-exporter" # file_sd_configs: # - files: # - /usr/local/prometheus/file_sd/*.yml # refresh_interval: 5s - job_name: "consul" consul_sd_configs: - server: 192.168.88.20:8500 services: [] - server: 192.168.88.10:8500 services: [] - server: 192.168.88.30:8500 services: [] relabel_configs: - source_labels: ['__meta_consul_tags'] target_label: 'product' - source_labels: ['__meta_consul_dc'] target_label: 'idc' - source_labels: ['__meta_consul_service'] regex: "consul" action: drop
consul 服务注册与删除:
注册
curl -X PUT -d '{"id": "test-20","name": "test-20","address": "192.168.88.20","port":9100,"tags": ["test-20"],"checks": [{"http": "http://192.168.88.20:9100/","interval": "5s"}]}' http://192.168.88.20:8500/v1/agent/service/register
curl -X PUT -d '{"id": "test-10","name": "test-10","address": "192.168.88.10","port":9100,"tags": ["test-10"],"checks": [{"http": "http://192.168.88.10:9100/","interval": "5s"}]}' http://192.168.88.20:8500/v1/agent/service/register
curl -X PUT -d '{"id": "test-30","name": "test-30","address": "192.168.88.30","port":9100,"tags": ["test-30"],"checks": [{"http": "http://192.168.88.30:9100/","interval": "5s"}]}' http://192.168.88.20:8500/v1/agent/service/register

删除

curl --request PUT http://172.31.2.181:8500/v1/agent/service/deregister/node-exporter183

查看结果

 

7|0Alertmanager

部署Alertmanager
tar -xf alertmanager-0.24.0.linux-amd64.tar.gz
ln -sv /usr/local/src/alertmanager-0.24.0.linux-amd64 /usr/local/alertmanager
cat > /etc/systemd/system/alertmanager.service <<EOF [Unit] Description=Prometheus alertmanager After=network.target [Service] ExecStart=/usr/local/alertmanager/alertmanager --config.file=/usr/local/alertmanager/alertmanager.yml [Install] WantedBy=multi-user.target EOF
Alertmanager基础配置
[root@test-20 src]# vim /usr/local/alertmanager/alertmanager.yml global: resolve_timeout: 5m smtp_smarthost: 'smtp.qq.com:465' smtp_from: '2228360744@qq.com' smtp_auth_username: '2228360744@qq.com' smtp_auth_password: 'ybqtwtrdjnrfeaha' smtp_hello: '@qq.com' smtp_require_tls: false route: receiver: email-receiver receivers: - name: 'email-receiver' email_configs: - to: '2228360744@qq.com'
[root@test-20 src]# vim /usr/local/prometheus/prometheus.yml alerting: alertmanagers: - static_configs: - targets: - "192.168.88.20: 9093"
systemctl daemon-reload && systemctl restart alertmanager && systemctl enable alertmanager
systemctl restart prometheus.service

8|0配置Prometheus加载告警规则文件

mkdir /usr/local/prometheus/rules

若某个Instance的up指标的值转为0持续超过1分钟后,将触发告警

[root@test-20 src]# vim /usr/local/prometheus/rules/rule.yml groups: - name: AllInstances rules: - alert: InstanceDown #Condition for alerting expr: up == 0 for: 1m # Annotation - additional informational labels to store more information annotations: title: 'Instance down' description: Instance has been down for more than 1 minute.' # Labels - additional labels to be attached to the alert labels: severity: 'critical'
[root@test-20 src]# vim /usr/local/prometheus/prometheus.yml rule_files: - "rules/*.yml" # - "second_rules.yml"
systemctl restart prometheus.service

测试:

[root@test-30 ~]# systemctl stop node-exporter.service

 

9|0Mysql exporter 

准备好mysql数据库

GRANT PROCESS, REPLICATION CLIENT, SELECT ON *.* TO 'exporter'@'%' identified by '123456';
flush privileges;
[root@test-30 src]# vim /etc/my.cnf [client] host=127.0.0.1 user=exporter password=123456
/etc/init.d/mysqld restart

安装mysqld_exporter

tar -xf mysqld_exporter-0.14.0.linux-amd64.tar.gz
ln -sv /usr/local/src/mysqld_exporter-0.14.0.linux-amd64 /usr/local/mysqld_exporter
cat > /usr/lib/systemd/system/mysqld_exporter.service <<EOF [Unit] Description=node_exporter Documentation=https://prometheus.io/ After=network.target [Service] Type=simple ExecStart=/usr/local/mysqld_exporter/mysqld_exporter --config.my-cnf=/etc/my.cnf Restart=on-failure [Install] WantedBy=multi-user.target EOF
systemctl daemon-reload && systemctl restart mysqld_exporter.service && systemctl enable mysqld_exporter.service

配置服务器:

[root@test-20 src]# vim /usr/local/prometheus/prometheus.yml - job_name: "mysqld_exporter" static_configs: - targets: ["192.168.88.30:9104"]
systemctl restart prometheus.service

结果:

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 


__EOF__

本文作者panda-wei
本文链接https://www.cnblogs.com/panda-wei/p/16800370.html
关于博主:评论和私信会在第一时间回复。或者直接私信我。
版权声明:本博客所有文章除特别声明外,均采用 BY-NC-SA 许可协议。转载请注明出处!
声援博主:如果您觉得文章对您有帮助,可以点击文章右下角推荐一下。您的鼓励是博主的最大动力!
posted @   kivtx  阅读(40)  评论(0编辑  收藏  举报
(评论功能已被禁用)
相关博文:
阅读排行:
· TypeScript + Deepseek 打造卜卦网站:技术与玄学的结合
· Manus的开源复刻OpenManus初探
· AI 智能体引爆开源社区「GitHub 热点速览」
· 从HTTP原因短语缺失研究HTTP/2和HTTP/3的设计差异
· 三行代码完成国际化适配,妙~啊~
点击右上角即可分享
微信分享提示