ESXI与PVE监控告警
ESXI
配置文件
[root@192 exporter]# cat vmware_exporter/config.env
VSPHERE_USER=administrator@vm.com
VSPHERE_PASSWORD=P@sswor
VSPHERE_HOST=192.168.0.20
VSPHERE_IGNORE_SSL=TRUE
VSPHERE_SPECS_SIZE=2000
启动脚本
[root@192 exporter]# cat vmware_exporter/start.sh
docker run -it -d --rm -p 9272:9272 --name vmware_exporter --env-file config.env pryorda/vmware_exporter
prometheus配置
- job_name: 'vmware_vcenter'
metrics_path: '/metrics'
static_configs:
- targets:
- '192.168.0.20'
relabel_configs:
- source_labels: [__address__]
target_label: __param_target
- source_labels: [__param_target]
target_label: instance
- target_label: __address__
replacement: 192.168.0.241:9272
grafana图表:11243
告警规则
[root@192 data]# cat /home/prometheus/rules/vmware-exporter.rules
groups:
- name: vmwarestatus
rules:
- alert: HighNumberOfSnapshots
expr: vmware_vm_snapshots > 5
for: 30m
labels:
severity: critical
department: 大学城
annotations:
summary: High Number of Snapshots (instance {{ $labels.instance }})
description: "High snapshots number on {{ $labels.instance }}: {{ $value }}\n Num = {{ $value }}\n VMware_Name = {{ $labels.vm_name }}"
- alert: VirtualMachineMemoryCritical
expr: vmware_vm_mem_usage_average / 100 >= 90
for: 30m
labels:
severity: critical
department: 大学城
annotations:
summary: Virtual Machine Memory Critical (instance {{ $labels.instance }})
description: "High memory usage on {{ $labels.instance }}: {{ $value | printf \"%.2f\"}}%\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
- alert: EsxiHostCPUCritical
expr: ((vmware_host_cpu_usage / vmware_host_cpu_max) * 100) > 90
for: 5m
labels:
severity: critical
department: 大学城
annotations:
summary: Esxi Host CPU Warning (instance {{ $labels.instance }})
description: "Outdated Host Esxi CPU on {{ $labels.instance }}: {{ $value | printf \"%.2f\"}}%\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
- alert: EsxiDiskUages
expr: ((vmware_datastore_capacity_size - vmware_datastore_freespace_size) / vmware_datastore_capacity_size) * 100 >99
for: 5m
labels:
severity: critical
department: 大学城
annotations:
summary: Esxi Host Disk Warning (instance {{ $labels.instance }})
description: "Outdated Host Esxi Disk on {{ $labels.instance }}: {{ $value | printf \"%.2f\"}}%\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
- alert: EsxiHostStatus
expr: vmware_host_power_state == 0
for: 5m
labels:
severity: critical
department: 大学城
annotations:
summary: Esxi Host Status Warning (instance {{ $labels.instance }})
description: "Outdated Host Esxi Status on {{ $labels.instance }}: {{ $value | printf \"%.2f\"}}%\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
PVE
配置文件
说明,要先创建对应用户,并赋予监控权限。参考:https://github.com/nbuchwitz/check_pve
[root@192 exporter]# cat pve_exporter/pve.yml
default:
user: prometheus@pve
password: P@ssword7
verify_ssl: false
启动脚本
[root@192 exporter]# cat pve_exporter/start.sh
docker run --name prometheus-pve-exporter -d -p 9221:9221 -v /data/exporter/pve_exporter/pve.yml:/etc/pve.yml prompve/prometheus-pve-exporter
prometheus配置
- job_name: 'pve'
static_configs:
- targets:
- 192.168.0.150
metrics_path: /pve
params:
module: [default]
relabel_configs:
- source_labels: [__address__]
target_label: __param_target
- source_labels: [__param_target]
target_label: instance
- target_label: __address__
replacement: 192.168.0.241:9221
grafana图表:10347
告警规则
[root@192 exporter]# cat /home/prometheus/rules/pve-exporter.rules
groups:
- name: pvestatus
rules:
- alert: PVEMemoryWarning
expr: ((pve_memory_usage_bytes / pve_memory_size_bytes * on(id, instance) group_left(name, type) pve_guest_info) and on(id, instance) pve_up == 1) * 100 >98
for: 5m
labels:
severity: critical
department: 大学城
annotations:
summary: pve Memory Warning (instance {{ $labels.name }})
description: "High memory usage on {{ $labels.name }}: {{ $value | printf \"%.2f\"}}%\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
- alert: PVEHostCPUCritical
expr: (((pve_cpu_usage_ratio / pve_cpu_usage_limit) * on(id, instance) group_left(name, type) pve_guest_info) and on(id, instance) pve_up == 1) *100 >90
for: 5m
labels:
severity: critical
department: 大学城
annotations:
summary: PVE Host CPU Warning (instance {{ $labels.name }})
description: "Outdated Host PVE CPU on {{ $labels.name }}: {{ $value | printf \"%.2f\"}}%\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
- alert: PVEDiskUsage
expr: pve_disk_usage_bytes{id=~"storage/.+"} / pve_disk_size_bytes * on (id, instance) group_left(storage) pve_storage_info *100 >95
for: 5m
labels:
severity: critical
department: 大学城
annotations:
summary: PVE disk usage > 95%!!! (instance {{ $labels.name }})
description: "Outdated Host PVE on {{ $labels.name }}: {{ $value | printf \"%.2f\"}}%\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
- alert: PVENodeStatus
expr: pve_node_info ==0
for: 5m
labels:
severity: critical
department: 大学城
annotations:
summary: PVE node is downing %!!! (instance {{ $labels.name }})
description: "Outdated Host PVE on {{ $labels.name }}: {{ $value | printf \"%.2f\"}}%\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
如对您有帮助,支持下呗!
微信

支付宝

分类:
Prometheus
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· DeepSeek 开源周回顾「GitHub 热点速览」
· 物流快递公司核心技术能力-地址解析分单基础技术分享
· .NET 10首个预览版发布:重大改进与新特性概览!
· AI与.NET技术实操系列(二):开始使用ML.NET
· 单线程的Redis速度为什么快?
2017-12-05 maps.reg
2017-12-05 dnion的remap.conf文件