【 PushGateway 】采集Nginx stub_status指标
一、Nginx开启状态页
Nginx 具体指标含义这里不再赘述,详情参考 :Nginx配置主动健康检查 - https://www.cnblogs.com/szz1113/p/11772055.html
server {
listen 80;
server_name status.pyenv.cc;
location /upstream-status {
stub_status on;
access_log off;
allow 127.0.0.1;
deny all;
}
}
二、将状态信息推送至PushGateway
# cat openresty_status_monitoring.py
#!/usr/bin/env python
# -*- coding: utf-8 -*-
try:
import sys
import os
import json
import time
import requests
import subprocess
import prometheus_client
from prometheus_client import Counter, Gauge, push_to_gateway
from prometheus_client.core import CollectorRegistry
REGISTRY = CollectorRegistry(auto_describe=False)
except ImportError:
print("Import Error")
exit()
class Component_monitor:
"""
// http://FQDN/status?format=json
{"servers": {
"total": 2,
"generation": 2,
"server": [
{"index": 0, "upstream": "wt_teams_dev", "name": "10.6.1.211:31231", "status": "down", "rise": 0, "fall": 1860, "type": "http", "port": 0},
{"index": 1, "upstream": "wt_teams_dev", "name": "10.6.1.211:31232", "status": "up", "rise": 0, "fall": 1860, "type": "http", "port": 0},
{"index": 2, "upstream": "wt_rd_nodes", "name": "10.6.1.211:31230", "status": "up", "rise": 4053, "fall": 0, "type": "http", "port": 0}
]
}}
"""
def __init__(self, gateway_ip, gateway_port, status_url, env):
self.gateway_ip = gateway_ip
self.gateway_port = gateway_port
self.gateway_url = self.gateway_ip + ':' + self.gateway_port
self.env = env
self.status_url = status_url
def post_pushgateway(self, data_type, data,):
registry = CollectorRegistry()
# total = Upstream node
if data_type == "total":
try:
g = Gauge('openresty_total', 'Openresty Upstream Total', ['indicators'], registry=registry)
g.labels('total').set(data)
push_to_gateway(self.gateway_url, job=self.env + '_openresty_total', registry=registry)
return "【 Success 】- total is ok"
except Exception as e:
return "【 Error 】- total - {}".format(e)
elif data_type == "upstream_node_sum":
try:
g = Gauge('openresty_upstream_node_sum', 'Openresty Upstream Sum', ['indicators', 'upstream_name',], registry=registry)
for k,v in data.items():
g.labels('upstream_node_sum', k).set(v)
push_to_gateway(self.gateway_url, job=self.env + '_upstream_node_sum', registry=registry)
return "【 Success 】- upstream_node_sum is ok"
except Exception as e:
return "【 Error 】- upstream_node_sum - {}".format(e)
elif data_type == "upstream_node_status":
try:
g = Gauge('openresty_upstream_node_status', 'Openresty Upstream Status', ['indicators', 'upstream_name', 'name'], registry=registry)
for sv in data:
if sv.get('status') == 'up':
sv['status'] = 0
else:
sv['status'] = 1
# print(sv.get('upstream'), sv.get('name'), sv.get('status'))
# for k,v in data.items():
g.labels('upstream_node_status', sv.get('upstream'), sv.get('name')).set(sv.get('status'))
push_to_gateway(self.gateway_url, job=self.env + '_upstream_node_status', registry=registry)
return "【 Success 】- upstream_node_status is ok"
except Exception as e:
return "【 Error 】- upstream_node_status - {}".format(e)
elif data_type == "upstream_node_rise":
try:
g = Gauge('openresty_upstream_node_rise', 'Openresty Upstream Rise', ['indicators', 'upstream_name', 'name'], registry=registry)
for sv in data:
# print(sv.get('upstream'), sv.get('name'), sv.get('status'))
# for k,v in data.items():
g.labels('upstream_node_rise', sv.get('upstream'), sv.get('name')).set(sv.get('rise'))
push_to_gateway(self.gateway_url, job=self.env + '_upstream_node_rise', registry=registry)
return "【 Success 】- upstream_node_rise is ok"
except Exception as e:
return "【 Error 】- upstream_node_rise - {}".format(e)
elif data_type == "upstream_node_fall":
try:
g = Gauge('openresty_upstream_node_fall', 'Openresty Upstream Fall', ['indicators', 'upstream_name', 'name'], registry=registry)
for sv in data:
g.labels('upstream_node_fall', sv.get('upstream'), sv.get('name')).set(sv.get('fall'))
push_to_gateway(self.gateway_url, job=self.env + '_upstream_node_fall', registry=registry)
return "【 Success 】- upstream_node_fall is ok"
except Exception as e:
return "【 Error 】- upstream_node_fall - {}".format(e)
def http_status_response(self):
with requests.get(url=self.status_url) as re:
result = re.json()
response = {
"date": time.strftime("%Y-%m-%d-%H_%M_%S", time.localtime()),
"total": {},
"node_sum": {},
"node_status": {},
"node_rise": {},
"node_fall": {},
}
# total
total = result['servers']['total']
response['total'] = self.post_pushgateway(data_type='total', data=total)
# upstream_node_sum
server = result['servers']['server']
upstream = []
for sv in server:
upstream.append(sv['upstream'])
num = {}
# {'wt-ios': 2, 'wt_teams_dev': 2, 'wt_teams_api': 1, 'wt_teams_lib': 1, 'wt_rd_nodes': 1}
for i in upstream:
if upstream.count(i) > 1:
num[i] = upstream.count(i)
else:
num[i] = upstream.count(i)
response['node_sum'] = self.post_pushgateway(data_type='upstream_node_sum', data=num)
# upstream_node_status
response['node_status'] = self.post_pushgateway(data_type='upstream_node_status', data=server)
# upstream_node_rise
response['node_rise'] = self.post_pushgateway(data_type='upstream_node_rise', data=server)
# upstream_node_fall
response['node_fall'] = self.post_pushgateway(data_type='upstream_node_fall', data=server)
return response
if __name__ == '__main__':
status_url = "http://status.pyenv.cc/upstream-status?format=json"
# Pushgateway Service Address
gateway_ip = "127.0.0.1"
# Pushgateway Service NodePort Port
gateway_port = "9091"
# 环境标识
environment = "alpha"
Com_monitor = Component_monitor(gateway_ip=gateway_ip, gateway_port=gateway_port, status_url=status_url, env=environment)
res = Com_monitor.http_status_response()
print(res)
# 执行脚本测试
root@yc-dev-k8s-control-plane-01-ningxia:~/xxx_scripts/python_scripts/pushgateway_scripts# python3 openresty_status_monitoring.py
{'date': '2022-01-20-02_50_31', 'total': '【 Success 】- total is ok', 'node_sum': '【 Success 】- upstream_node_sum is ok', 'node_status': '【 Success 】- upstream_node_status is ok', 'node_rise': '【 Success 】- upstream_node_rise is ok', 'node_fall': '【 Success 】- upstream_node_fall is ok'}
- pushgateway job
- prometheus graph
三、Grafana Template
模版过于简单,这里不再阐述;
四、Prometheus Rules
# cat prometheus-rules.yaml
- name: kubernetes-absent
rules:
# openresty is down
# alpha
- alert: OpenrestyNodeDownOther01
annotations:
message: Alpha Node group {{ $labels.exported_job }}/{{ $labels.upstream_name }} ({{ $labels.name
}}) is down;
expr: |
rate(alpha_openresty_upstream_node_status[5m]) 60 * 5 > 0
for: 1m
labels:
severity: critical
向往的地方很远,喜欢的东西很贵,这就是我努力的目标。