1. 准备环境

本次操作基于centos 7

# 禁用selinux 以防兼容性问题
setenforce 0
sed -i 's/SELINUX=enforcing/SELINUX=disabled/g' /etc/selinux/config

# 关闭防火墙服务
systemctl disable firewalld &>/dev/null
systemctl stop firewalld

# 设置dns
cat > /etc/resolv.conf << EOF
nameserver 114.114.114.114
nameserver 8.8.8.8
nameserver 223.5.5.5
EOF

# 更新yum源、epel源
mkdir /etc/yum.repos.d/old/ &>/dev/null
mv /etc/yum.repos.d/* /etc/yum.repos.d/old/ &> /dev/null
curl -o /etc/yum.repos.d/CentOS-Base.repo https://repo.huaweicloud.com/repository/conf/CentOS-7-reg.repo &>/dev/null
curl -o /etc/yum.repos.d/epel.repo https://mirrors.aliyun.com/repo/epel-7.repo &> /dev/null

# 安装docker服务
yum clean all &>/dev/null
yum makecache &>/dev/null
yum install -y yum-utils device-mapper-persistent-data lvm2
yum-config-manager --add-repo http://mirrors.aliyun.com/docker-ce/linux/centos/docker-ce.repo
yum install docker-ce docker-ce-cli containerd.io

# 查看docker 版本 确认是否安装成功
docker --version

# docker 镜像源
cat >>/etc/docker/daemon.json<<EOF
{
"registry-mirrors": ["https://docker.registry.cyou",
"https://docker-cf.registry.cyou",
"https://dockercf.jsdelivr.fyi",
"https://docker.jsdelivr.fyi",
"https://dockertest.jsdelivr.fyi",
"https://mirror.aliyuncs.com",
"https://dockerproxy.com",
"https://mirror.baidubce.com",
"https://docker.m.daocloud.io",
"https://docker.nju.edu.cn",
"https://docker.mirrors.sjtug.sjtu.edu.cn",
"https://docker.mirrors.ustc.edu.cn",
"https://mirror.iscas.ac.cn",
"https://docker.rainbond.cc"]
}
EOF
systemctl daemon-reload
systemctl restart docker

# 查看是否换源成功
docker info 

# 安装docker compose
sudo curl -L "https://github.com/docker/compose/releases/latest/download/docker-compose-$(uname -s)-$(uname -m)" -o /usr/local/bin/docker-compose
# 查看docker-compos是否安装成功
docker-compose --version

# 拉取镜像
docker pull prom/prometheus
docker pull grafana/grafana
docker pull prom/node-exporter
docker pull pryorda/vmware_exporter # 监控vmware的V-center使用这个镜像  主要为vsphere 
docker images # 查看镜像

# docker save -o node_export.tgz prom/node-exporter 打包镜像方便后面监控节点使用
# docker load -i node_export.tgz     监控节点使用docker 镜像
# 


2. 创建工作目录

创建一个目录来存放相关配置文件:

mkdir /opt/prometheus-grafana
cd /opt/prometheus-grafana

3. 配置 Prometheus

3.1 创建 Prometheus 配置文件

新建 prometheus.yml

touch prometheus.yml

内容如下:

global:
  scrape_interval: 1m       # 设置全局抓取间隔
  scrape_timeout: 10s       # 设置全局抓取超时
  evaluation_interval: 1m   # 设置规则评估间隔
runtime:
  gogc: 80                  # 调整垃圾回收器的效率

scrape_configs:
- job_name: '集群200'
  honor_timestamps: true
  scrape_interval: 1m
  scrape_timeout: 10s
  metrics_path: /metrics
  scheme: http
  enable_compression: true
  follow_redirects: true
  enable_http2: true
  static_configs:
  - targets:
      - 10.100.100.200:9100
    labels:
      group: '200集群'
      instance: '200集群-control-node'
  - targets:
      - 10.100.100.200:9101
    labels:
      group: '200集群'
      instance: '200集群-compute-node1'
  - targets:
      - 10.100.100.200:9102
    labels:
      group: '200集群'
      instance: '200集群-compute-node2'
  - targets:
      - 10.100.100.200:9103
    labels:
      group: '200集群'
      instance: '200集群-compute-node3'
  - targets:
      - 10.100.100.200:9104
    labels:
      group: '200集群'
      instance: '200集群-compute-node4'

- job_name: 'vmware_vcenter'
  scrape_interval: 1m
  scrape_timeout: 10s
  metrics_path: '/metrics'
  static_configs:
  - targets:
      - '10.99.99.55'
  relabel_configs:
  - source_labels: [__address__]
    target_label: __param_target
  - source_labels: [__param_target]
    target_label: instance
  - target_label: __address__
    replacement: 10.99.99.172:9272
# 动态 relabeling 示例(可根据需求启用)
# # - job_name: '动态目标'
# #   kubernetes_sd_configs:
# #   - role: endpoints
# #   relabel_configs:
# #   - source_labels: [__meta_kubernetes_service_name]
# #     regex: 'metrics-service'
# #     action: keep


3.2 创建存储配置

创建一个目录用来保存 Prometheus 数据:

mkdir prometheus_data

4. 创建 Docker Compose 配置

新建 docker-compose.yml

vim docker-compose.yml

内容如下:

version: '3.7'
services:
  prometheus:
    image: prom/prometheus:latest
    container_name: prometheus
    ports:
      - "9090:9090"
    volumes:
      - /opt/prometheus-grafana/prometheus.yml:/etc/prometheus/prometheus.yml
      - /opt/prometheus-grafana/prometheus_data:/prometheus
    restart: unless-stopped

  grafana:
    image: grafana/grafana:latest
    container_name: grafana
    ports:
      - "3000:3000"
    volumes:
      - /opt/prometheus-grafana/grafana_data:/var/lib/grafana
    environment:
      - GF_SECURITY_ADMIN_USER=admin
      - GF_SECURITY_ADMIN_PASSWORD=admin
    restart: unless-stopped

  node_exporter:
    image: prom/node-exporter:latest
    container_name: node_exporter
    ports:
      - "9100:9100"
    restart: unless-stopped

volumes:
  grafana_data:

5. 启动服务

运行以下命令启动容器:

docker-compose up -d

检查服务状态:

docker ps

6. 访问 Prometheus 和 Grafana

  1. Prometheus:访问 http://<your_server_ip>:9090
  2. Grafana:访问 http://<your_server_ip>:3000
    • 默认用户名:admin
    • 默认密码:admin

7. 配置 Grafana 数据源

  1. 登录 Grafana 后,进入 Configuration > Data Sources
  2. 点击 Add data source,选择 Prometheus
  3. 在 URL 栏输入 http://prometheus:9090,点击 Save & Test

8. 添加监控面板

在 Grafana 中:

  1. 点击 + > Import
  2. 输入面板的 ID,例如:
    • Node Exporter Full:1860
  3. 点击 Load,选择数据源后保存。

9. 验证监控效果

  • 在 Prometheus 中,可以通过 http://<your_server_ip>:9090 查看监控目标状态。
  • 在 Grafana 中,可以通过导入的面板查看实时图表。

注意事项

  • 数据持久化:Prometheus 和 Grafana 数据通过 docker-compose.yml 中的挂载路径实现持久化。
  • 安全性:生产环境建议修改 Grafana 默认用户名密码。
  • 扩展性:可以通过修改 prometheus.yml 添加其他监控目标,例如数据库、应用程序等。

这套方案简单易用,非常适合快速部署监控环境。