开发Prometheus的redis bigkey exporter,完善k8s下redis容器的性能监控

    好久没来博客园写技术总结了,回顾这些年的职业路径,从自动化测试到python后端到golang后端直到现在的devops,确实积累了很多心得,可惜都没时间来写写这些心得(主要是懒)。自从转为devops后确实有比较多的时间了(主要是加班少: ) ),看来以后还是要多写写技术总结 :)。

     废话不多说,直接进入今天的主题。前段时间公司要求需要对k8s下的redis容器进行bigkey的监控,监控的指标需要接入到Prometheus,以grafana可视化。分析了具体需求后,网上看了一下,没看到有相关的exporter有支持redis的bigkey指标,最后决定用redis的内置工具进行定制开发,具体的实现可参考我的github:https://github.com/zhenghan008/redisKeyMetrics。目前实现的功能可以支持bigkey,hotkey和memkey的监控。接入Prometheus的监控后,在grafana的展示大概如下图所示, 其中24 小时内redis bigkey 的变化率的promql可以使用,均值统计也是差不多的表达式。结合redis的其它exporter的key监控指标,算是可以比较完善的监控整个redis的性能。

对应的grafana dashboard的json可以参考下面的:

 


{
"__inputs": [
{
"name": "DS_PROMETHEUS",
"label": "Prometheus",
"description": "",
"type": "datasource",
"pluginId": "prometheus",
"pluginName": "Prometheus"
}
],
"__elements": {},
"__requires": [
{
"type": "grafana",
"id": "grafana",
"name": "Grafana",
"version": "10.1.2"
},
{
"type": "panel",
"id": "graph",
"name": "Graph (old)",
"version": ""
},
{
"type": "datasource",
"id": "prometheus",
"name": "Prometheus",
"version": "1.0.0"
}
],
"annotations": {
"list": [
{
"builtIn": 1,
"datasource": {
"type": "grafana",
"uid": "-- Grafana --"
},
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"type": "dashboard"
}
]
},
"editable": true,
"fiscalYearStartMonth": 0,
"graphTooltip": 0,
"id": null,
"links": [],
"liveNow": false,
"panels": [
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"description": "redis key mean statistics",
"fieldConfig": {
"defaults": {
"unit": "decbytes"
},
"overrides": []
},
"fill": 1,
"fillGradient": 0,
"gridPos": {
"h": 9,
"w": 24,
"x": 0,
"y": 0
},
"hiddenSeries": false,
"id": 2,
"legend": {
"alignAsTable": true,
"avg": false,
"current": true,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"sort": "current",
"sortDesc": true,
"total": false,
"values": true
},
"lines": true,
"linewidth": 1,
"nullPointMode": "null",
"options": {
"alertThreshold": true,
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"percentage": false,
"pluginVersion": "10.1.2",
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"editorMode": "code",
"expr": "avg by (key_group,instance,key_type,sample_type) (label_replace(redis_key_metrics{instance=~\"$instance\", sample_type=~\"$sample_type\"}, \"key_group\", \"$1\", \"key_name\", \"([^:]*)[:]*.*\"))",
"instant": false,
"legendFormat": "__auto",
"range": true,
"refId": "A"
}
],
"thresholds": [],
"timeRegions": [],
"title": "redis key mean statistics",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"mode": "time",
"show": true,
"values": []
},
"yaxes": [
{
"$$hashKey": "object:143",
"format": "decbytes",
"logBase": 1,
"show": true
},
{
"$$hashKey": "object:144",
"format": "short",
"logBase": 1,
"show": true
}
],
"yaxis": {
"align": false
}
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"description": "Change rate of redis key within 24 hours",
"fieldConfig": {
"defaults": {
"unit": "percent"
},
"overrides": []
},
"fill": 1,
"fillGradient": 0,
"gridPos": {
"h": 8,
"w": 24,
"x": 0,
"y": 9
},
"hiddenSeries": false,
"id": 1,
"legend": {
"alignAsTable": true,
"avg": true,
"current": true,
"max": true,
"min": true,
"rightSide": true,
"show": true,
"sort": "current",
"sortDesc": true,
"total": false,
"values": true
},
"lines": true,
"linewidth": 1,
"nullPointMode": "null",
"options": {
"alertThreshold": true,
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"percentage": false,
"pluginVersion": "10.1.2",
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"editorMode": "code",
"expr": "sum by(key_group,instance,key_type,sample_type) (label_replace(rate(redis_key_metrics{instance=\"$instance\", sample_type=~\"$sample_type\"}[24h]) * 100, \"key_group\", \"$1\", \"key_name\", \"([^:]*)[:]*.*\"))",
"instant": false,
"legendFormat": "__auto",
"range": true,
"refId": "A"
}
],
"thresholds": [],
"timeRegions": [],
"title": " Change rate of redis key within 24 hours",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"mode": "time",
"show": true,
"values": []
},
"yaxes": [
{
"$$hashKey": "object:250",
"format": "percent",
"logBase": 1,
"show": true
},
{
"$$hashKey": "object:251",
"format": "short",
"logBase": 1,
"show": true
}
],
"yaxis": {
"align": false
}
}
],
"refresh": "",
"schemaVersion": 38,
"style": "dark",
"tags": [],
"templating": {
"list": [
{
"current": {},
"definition": "label_values(redis_key_metrics,instance)",
"description": "instance",
"hide": 0,
"includeAll": false,
"label": "instance",
"multi": false,
"name": "instance",
"options": [],
"query": {
"query": "label_values(redis_key_metrics,instance)",
"refId": "PrometheusVariableQueryEditor-VariableQuery"
},
"refresh": 1,
"regex": "",
"skipUrlSync": false,
"sort": 0,
"type": "query"
},
{
"current": {},
"definition": "label_values(redis_key_metrics,sample_type)",
"description": "sample_type",
"hide": 0,
"includeAll": false,
"label": "sample_type",
"multi": false,
"name": "sample_type",
"options": [],
"query": {
"query": "label_values(redis_key_metrics,sample_type)",
"refId": "PrometheusVariableQueryEditor-VariableQuery"
},
"refresh": 1,
"regex": "",
"skipUrlSync": false,
"sort": 0,
"type": "query"
}
]
},
"time": {
"from": "now-6h",
"to": "now"
},
"timepicker": {},
"timezone": "",
"title": "Redis MEM HOT BIG Key Statistics",
"uid": "ef713c79-9bab-4e3e-ab09-d2476c527f60",
"version": 11,
"weekStart": ""
}

 

 

 

 

sum by(key_group) (label_replace(rate(redis_key_metrics[24h]) * 100, "key_group", "$1", "key_name", "([^:]*)[:]*.*"))

 

posted @ 2024-06-19 14:52  zhenghan_郑撼  阅读(98)  评论(2编辑  收藏  举报