【 PushGateway 】采集Nginx stub_status指标

⭐️欢迎关注⭐️

一、Nginx开启状态页

Nginx 具体指标含义这里不再赘述,详情参考 :Nginx配置主动健康检查 - https://www.cnblogs.com/szz1113/p/11772055.html

server {
    listen 80;
    server_name status.pyenv.cc;
    location /upstream-status {
        stub_status on;
        access_log off;
        allow 127.0.0.1;
        deny all;
    }
}

image.png

image.png

二、将状态信息推送至PushGateway

# cat openresty_status_monitoring.py
#!/usr/bin/env python
# -*- coding: utf-8 -*-

try:
    import sys
    import os
    import json
    import time
    import requests
    import subprocess
    import prometheus_client
    from prometheus_client import Counter, Gauge, push_to_gateway
    from prometheus_client.core import CollectorRegistry
    REGISTRY = CollectorRegistry(auto_describe=False)
except ImportError:
    print("Import Error")
    exit()

class Component_monitor:
    """
    // http://FQDN/status?format=json
    {"servers": {
      "total": 2,
      "generation": 2,
      "server": [
        {"index": 0, "upstream": "wt_teams_dev", "name": "10.6.1.211:31231", "status": "down", "rise": 0, "fall": 1860, "type": "http", "port": 0},
        {"index": 1, "upstream": "wt_teams_dev", "name": "10.6.1.211:31232", "status": "up", "rise": 0, "fall": 1860, "type": "http", "port": 0},
        {"index": 2, "upstream": "wt_rd_nodes", "name": "10.6.1.211:31230", "status": "up", "rise": 4053, "fall": 0, "type": "http", "port": 0}
      ]
    }}
    """

    def __init__(self, gateway_ip, gateway_port, status_url, env):
        self.gateway_ip = gateway_ip
        self.gateway_port = gateway_port
        self.gateway_url = self.gateway_ip + ':' + self.gateway_port
        self.env = env
        self.status_url = status_url

    def post_pushgateway(self, data_type, data,):
        registry = CollectorRegistry()
        # total = Upstream node
        if data_type == "total":
            try:
                g = Gauge('openresty_total', 'Openresty Upstream Total', ['indicators'], registry=registry)
                g.labels('total').set(data)
                push_to_gateway(self.gateway_url, job=self.env + '_openresty_total', registry=registry)
                return "【 Success 】- total is ok"
            except Exception as e:
                return "【 Error 】- total - {}".format(e)

        elif data_type == "upstream_node_sum":
            try:
                g = Gauge('openresty_upstream_node_sum', 'Openresty Upstream Sum', ['indicators', 'upstream_name',], registry=registry)
                for k,v in data.items():
                    g.labels('upstream_node_sum', k).set(v)
                push_to_gateway(self.gateway_url, job=self.env + '_upstream_node_sum', registry=registry)
                return "【 Success 】- upstream_node_sum is ok"
            except Exception as e:
                return "【 Error 】- upstream_node_sum - {}".format(e)
        elif data_type == "upstream_node_status":
            try:
                g = Gauge('openresty_upstream_node_status', 'Openresty Upstream Status', ['indicators', 'upstream_name', 'name'], registry=registry)
                for sv in data:
                    if sv.get('status') == 'up':
                        sv['status'] = 0
                    else:
                        sv['status'] = 1
                    # print(sv.get('upstream'), sv.get('name'), sv.get('status'))
                # for k,v in data.items():
                    g.labels('upstream_node_status', sv.get('upstream'), sv.get('name')).set(sv.get('status'))
                push_to_gateway(self.gateway_url, job=self.env + '_upstream_node_status', registry=registry)
                return "【 Success 】- upstream_node_status is ok"
            except Exception as e:
                return "【 Error 】- upstream_node_status - {}".format(e)
        elif data_type == "upstream_node_rise":
            try:
                g = Gauge('openresty_upstream_node_rise', 'Openresty Upstream Rise', ['indicators', 'upstream_name', 'name'], registry=registry)
                for sv in data:
                    # print(sv.get('upstream'), sv.get('name'), sv.get('status'))
                # for k,v in data.items():
                    g.labels('upstream_node_rise', sv.get('upstream'), sv.get('name')).set(sv.get('rise'))
                push_to_gateway(self.gateway_url, job=self.env + '_upstream_node_rise', registry=registry)
                return "【 Success 】- upstream_node_rise is ok"
            except Exception as e:
                return "【 Error 】- upstream_node_rise - {}".format(e)
        elif data_type == "upstream_node_fall":
            try:
                g = Gauge('openresty_upstream_node_fall', 'Openresty Upstream Fall', ['indicators', 'upstream_name', 'name'], registry=registry)
                for sv in data:
                    g.labels('upstream_node_fall', sv.get('upstream'), sv.get('name')).set(sv.get('fall'))
                push_to_gateway(self.gateway_url, job=self.env + '_upstream_node_fall', registry=registry)
                return "【 Success 】- upstream_node_fall is ok"
            except Exception as e:
                return "【 Error 】- upstream_node_fall - {}".format(e)
    def http_status_response(self):
        with requests.get(url=self.status_url) as re:
            result = re.json()
            response = {
                "date": time.strftime("%Y-%m-%d-%H_%M_%S", time.localtime()),
                "total": {},
                "node_sum": {},
                "node_status": {},
                "node_rise": {},
                "node_fall": {},
            }
            # total
            total = result['servers']['total']
            response['total'] = self.post_pushgateway(data_type='total', data=total)

            # upstream_node_sum
            server = result['servers']['server']
            upstream = []
            for sv in server:
                upstream.append(sv['upstream'])
            num = {}
            # {'wt-ios': 2, 'wt_teams_dev': 2, 'wt_teams_api': 1, 'wt_teams_lib': 1, 'wt_rd_nodes': 1}
            for i in upstream:
                if upstream.count(i) > 1:
                    num[i] = upstream.count(i)
                else:
                    num[i] = upstream.count(i)
            response['node_sum'] = self.post_pushgateway(data_type='upstream_node_sum', data=num)

            # upstream_node_status
            response['node_status'] = self.post_pushgateway(data_type='upstream_node_status', data=server)
            # upstream_node_rise
            response['node_rise'] = self.post_pushgateway(data_type='upstream_node_rise', data=server)
            # upstream_node_fall
            response['node_fall'] = self.post_pushgateway(data_type='upstream_node_fall', data=server)
        return response

if __name__ == '__main__':
    status_url = "http://status.pyenv.cc/upstream-status?format=json"
    # Pushgateway Service Address
    gateway_ip = "127.0.0.1"
    # Pushgateway Service NodePort Port
    gateway_port = "9091"
    # 环境标识
    environment = "alpha"
    Com_monitor = Component_monitor(gateway_ip=gateway_ip, gateway_port=gateway_port, status_url=status_url, env=environment)
    res = Com_monitor.http_status_response()
    print(res)
# 执行脚本测试
root@yc-dev-k8s-control-plane-01-ningxia:~/xxx_scripts/python_scripts/pushgateway_scripts# python3 openresty_status_monitoring.py
{'date': '2022-01-20-02_50_31', 'total': '【 Success 】- total is ok', 'node_sum': '【 Success 】- upstream_node_sum is ok', 'node_status': '【 Success 】- upstream_node_status is ok', 'node_rise': '【 Success 】- upstream_node_rise is ok', 'node_fall': '【 Success 】- upstream_node_fall is ok'}
  • pushgateway job

image.png

  • prometheus graph

image.png

image.png

image.png

image.png

image.png

三、Grafana Template

模版过于简单,这里不再阐述;

image.png

四、Prometheus Rules

# cat prometheus-rules.yaml
  - name: kubernetes-absent
    rules:
    # openresty is down
    # alpha
    - alert: OpenrestyNodeDownOther01
      annotations:
        message: Alpha Node group {{ $labels.exported_job }}/{{ $labels.upstream_name }} ({{ $labels.name
          }}) is down;
      expr: |
        rate(alpha_openresty_upstream_node_status[5m]) 60 * 5 > 0
      for: 1m
      labels:
        severity: critical

posted @ 2022-01-24 17:11  SRE运维充电站  阅读(61)  评论(0编辑  收藏  举报