Linux服务器系统监控
1、Docker安装Prometheus
1.1、安装docker和docker-compose环境
https://www.cnblogs.com/hg-super-man/p/10908220.html
1.2、安装
拷贝配置
https://gitee.com/xiaohai008/docker-prometheus.git
1.3、启动服务
docker-compose up -d
2、二进制安装
https://prometheus.io/download/
2.1、安装Prometheus
# 下载prometheus二进制压缩包
wget https://github.com/prometheus/prometheus/releases/download/v2.45.1/prometheus-2.45.1.linux-amd64.tar.gz
# 解压
tar -zxvf prometheus-2.45.1.linux-amd64.tar.gz
# 移动到/opt
mkdir /opt/prometheus -p
mv prometheus-2.45.1.linux-amd64 /opt/prometheus/prometheus
# 创建一个专门的prometheus用户
useradd -M -s /usr/sbin/nologin prometheus
# 更改prometheus用户的文件夹权限
chown -R prometheus:prometheus /opt/prometheus
# 创建systemd 服务
cat > /etc/systemd/system/prometheus.service << "EOF"
[Unit]
Description=Prometheus Server
Documentation=https://prometheus.io/docs/introduction/overview/
After=network-online.target
[Service]
Type=simple
User=prometheus
Group=prometheus
Restart=on-failure
ExecStart=/opt/prometheus/prometheus/prometheus \
--config.file=/opt/prometheus/prometheus/prometheus.yml \
--storage.tsdb.path=/opt/prometheus/prometheus/data \
--storage.tsdb.retention.time=60d \
--web.enable-lifecycle
[Install]
WantedBy=multi-user.target
EOF
# 启动Prometheus
systemctl daemon-reload
systemctl start prometheus.service
# 加入到开机自启动
systemctl enable prometheus.service
# 检查
systemctl status prometheus.service
# 查看Prometheus的日志以进行故障排除
journalctl -u prometheus.service -f
2.1.1、 访问地址
prometheus http://ip:9090
监控指标 http://ip:9090/metrics
2.2、安装altermanager
# 下载altermanager二进制压缩包
wget https://github.com/prometheus/alertmanager/releases/download/v0.26.0/alertmanager-0.26.0.linux-amd64.tar.gz
# 解压
tar -zxvf alertmanager-0.26.0.linux-amd64.tar.gz
# 移动到/opt
mv alertmanager-0.26.0.linux-amd64 /opt/prometheus/alertmanager
# 更改prometheus用户的文件夹权限
chown -R prometheus:prometheus /opt/prometheus/alertmanager
# 创建systemd 服务
cat > /etc/systemd/system/alertmanager.service << "EOF"
[Unit]
Description=Alert Manager
Wants=network-online.target
After=network-online.target
[Service]
Type=simple
User=prometheus
Group=prometheus
ExecStart=/opt/prometheus/alertmanager/alertmanager \
--config.file=/opt/prometheus/alertmanager/alertmanager.yml \
--storage.path=/opt/prometheus/alertmanager/data
Restart=always
[Install]
WantedBy=multi-user.target
EOF
# 启动alertmanager
systemctl daemon-reload
systemctl start alertmanager.service
# 检查
systemctl status alertmanager.service
# 加入到开机自启动
systemctl enable alertmanager.service
2.2.1、 访问地址
2.2.2、修改prometheus配置
增加alertmanager配置
# Alertmanager configuration
alerting:
alertmanagers:
- static_configs:
- targets:
- localhost:9093
# 告警配置
rule_files:
- "alert.yml"
2.2.3、增加触发器配置文件
cat > /opt/prometheus/prometheus/alert.yml <<"EOF"
groups:
- name: Prometheus alert
rules:
# 对任何实例超过30s无法联系的情况发出警报
- alert: 服务告警
expr: up == 0
for: 30s
labels:
severity: critical
annotations:
instance: "{{ $labels.instance }}"
description: "{{ $labels.job }} 服务已关闭"
EOF
2.2.4、检查配置
cd /opt/prometheus/prometheus
./promtool check config prometheus.yml
2.2.5、重启prometheus
systemctl restart prometheus
2.3、安装grafana
https://grafana.com/grafana/download
# 下载grafana二进制压缩包
wget https://dl.grafana.com/enterprise/release/grafana-enterprise-10.1.5.linux-amd64.tar.gz
# 解压
tar -zxvf grafana-enterprise-10.1.5.linux-amd64.tar.gz
# 移动到/opt
mv grafana-enterprise-10.1.5 /opt/prometheus/grafana
# 更改prometheus用户的文件夹权限
chown -R prometheus:prometheus /opt/prometheus/grafana
# 创建systemd 服务
cat > /etc/systemd/system/grafana-server.service << "EOF"
[Unit]
Description=Grafana Server
Documentation=http://docs.grafana.org
[Service]
Type=simple
User=prometheus
Group=prometheus
Restart=on-failure
ExecStart=/opt/prometheus/grafana/bin/grafana-server \
--config=/opt/prometheus/grafana/conf/defaults.ini \
--homepath=/opt/prometheus/grafana
[Install]
WantedBy=multi-user.target
EOF
# 启动grafana
systemctl daemon-reload
systemctl start grafana-server
# 检查
systemctl status grafana-server.service
# 加入到开机自启动
systemctl enable grafana-server.service
2.3.1、 访问地址
2.4、安装node_exporter
单独给任何机器安装监控服务也是使用下面操作
https://prometheus.io/download/
# 下载node_exporter二进制压缩包
wget https://github.com/prometheus/node_exporter/releases/download/v1.6.1/node_exporter-1.6.1.linux-amd64.tar.gz
# 解压
tar -zxvf node_exporter-1.6.1.linux-amd64.tar.gz
# 移动到/opt
mv node_exporter-1.6.1.linux-amd64 /opt/prometheus/node_exporter
# 更改prometheus用户的文件夹权限
chown -R prometheus:prometheus /opt/prometheus/node_exporter
# 创建systemd 服务
cat > /etc/systemd/system/node_exporter.service << "EOF"
[Unit]
Description=node_exporter
Documentation=https://prometheus.io
After=network.target
[Service]
Type=simple
User=prometheus
Group=prometheus
Restart=on-failure
ExecStart=/opt/prometheus/node_exporter/node_exporter
[Install]
WantedBy=multi-user.target
EOF
# 启动grafana
systemctl daemon-reload
systemctl start node_exporter
# 检查
systemctl status node_exporter
# 加入到开机自启动
systemctl enable node_exporter
2.3.1、 访问地址
2.3.2、修改prometheus配置
在scrape_config这行下面添加如下配置
vi /opt/prometheus/prometheus/prometheus.yml
# node_exporter配置
- job_name: "node_exporter"
scrape_interval: 15s
static_configs:
- targets: ["localhost:9100"]
labels:
instance: Prometheus服务器
2.3.3、重载prometheus
curl -X POST http://localhost:9090/-/reload
2.4、从Grafana.com导入仪表盘
https://grafana.com/grafana/dashboards/
3、通过git安装
# 生成服务
cd /opt/prometheus
https://gitee.com/xiaohai008/prometheus.git
mv *.service /etc/systemd/system/
# 创建一个专门的prometheus用户
useradd -M -s /usr/sbin/nologin prometheus
# 更改prometheus用户的文件夹权限
chown -R prometheus:prometheus /opt/prometheus
# 启动
systemctl daemon-reload
systemctl start prometheus
systemctl start grafna0server
systemctl start node_exporter
systemctl start alertmanager
# 设置开机自启
systemctl enable prometheus
systemctl enable grafna0server
systemctl enable node_exporter
systemctl enable alertmanager
# 检查
systemctl status prometheus
systemctl status grafna0server
systemctl status node_exporter
systemctl status alertmanager