使用lld自动发现监控多实例redis
zabbix 可以通过常见的手段监控到各种服务,通过编写脚本来获取返回值并将获取到的值通过图形来展现出来,包括(系统、服务、业务)层面。可是有些时候在一些不固定的场合监控一些不固定的服务就比较麻烦。例如,服务器运行四台 redis,端口分别是 6379 6380 6381 6382,那么这时候如果需要监控则需要建立三个模板,分别对应不同的模板又要设置不同的 Shell Script 及 UserParameter 来监控不同端口的服务。 zabbix 有 LLD 特性(low level discovery),属于自动发现的范畴(该自动发现属于多服务的监控,是系统层面的),你会发现有很多要监控的对象大部分都是不固定的,到处都是LLD。换句话说,我们要监控的对象如果是固定的,那直接添加一个item就可以了,但是如果不是固定的,那就需要用LLD。 使用方法: 1)使用netstat来捕获redis-server启用的端口,但zabbix-agent运行在zabbix账号下,故须给zabbix账号授权运行netstat -nltp的权限,在root用户下执行如下命令: echo "zabbix ALL=(root) NOPASSWD:/bin/netstat" > /etc/sudoers.d/zabbix echo 'Defaults:zabbix !requiretty' >> /etc/sudoers.d/zabbix 2)将qiueer目录、redis.py复制到 /usr/local/zabbix-agent/scripts 目录,供参考: mkdir -p /usr/local/zabbix-agent/scripts 将 https://github.com/qiueer/zabbix/tree/master/Redis/qiueer 目录下的所有文件拷贝到/usr/local/zabbix-agent/scripts目录中 添加redis.py这个自动发现的脚本 # vim /usr/local/zabbix-agent/scripts/redis.py #!/usr/bin/env python #encoding=utf-8 import sys import os from optparse import OptionParser import re from qiueer.python.slog import slog from qiueer.python.cmds import cmds from qiueer.python.filecache import filecache from qiueer.python.utils import which class Redis(object): def __init__(self, logpath, password=None, port=6379, debug=False): self._logpath = logpath self._password = password self._port = port if port else 6379 self._debug = debug self._file_cache_path = "/tmp/.zabbix_memcache_%s.log" % (port) self._file_cache = filecache(self._file_cache_path) self._logger = slog(self._logpath, debug=debug, size=5, count=5) def get_redis_port_list(self): # sudo权限,必须授予,在root用户下执行如下命令 """ echo "zabbix ALL=(root) NOPASSWD:/bin/netstat" > /etc/sudoers.d/zabbix echo 'Defaults:zabbix !requiretty' >> /etc/sudoers.d/zabbix chmod 600 /etc/sudoers.d/zabbix """ cmdstr = "sudo netstat -nlpt | grep 'redis' | awk '{print $4}'|awk -F: '{print $2}'" c2 = cmds(cmdstr, timeout=3) stdo = c2.stdo() stde = c2.stde() retcode = c2.code() (stdo_list, stde_list) = (re.split("\n", stdo), re.split("\n", stde)) logdict = { "cmdstr": cmdstr, "stdo": stdo, "stde": stde, "retcode": retcode, "orders": ["cmdstr", "stdo", "stde", "retcode"], } if retcode !=0: self._logger.dictlog(width=8, level="error", **logdict) return else: self._logger.dictlog(width=8, level="info", **logdict) data = list() for port in stdo_list: if not port:continue port = int(str(port).strip()) data.append({"{#REDIS_PORT}": port}) import json return json.dumps({'data': data}, sort_keys=True, indent=7, separators=(",",":")) def get_item(self, key, port=None, password=None, force=False): """ 参数: """ # cmdstr = "redis-cli -h 172.16.155.21 -p 6379 info | grep 'used_cpu_sys' " port = port if port else self._port password = password if password else self._password if force == False: value = self._file_cache.get_val_from_json(key) logdict = { "msg": "Try To Get From Cache File: %s" % self._file_cache_path, "key": key, "value": value, "orders": ["msg", "key", "value"], } self._logger.dictlog(width=8, level="info", **logdict) if value: return value rds_cli_path = which("redis-cli") ## 适配编译安装,这里设置常用的路径 rds_paths_def = ["/usr/local/bin/redis-cli", "/bin/redis-cli", "/usr/local/redis-server/bin/redis-cli"] cmdstr = None if rds_cli_path: cmdstr = "%s -h 172.16.155.21 -p %s info" % (rds_cli_path, port) if password: cmdstr = "%s -h 172.16.155.21 -a %s -p %s info" % (rds_cli_path, password, port) else: for p in rds_paths_def: if os.path.exists(p) == False: continue cmdstr = "%s -h 172.16.155.21 -p %s info" % (p, port) if password: cmdstr = "%s -h 172.16.155.21 -a %s -p %s info" % (p, password, port) break c2 = cmds(cmdstr, timeout=3) stdo = c2.stdo() stde = c2.stde() retcode = c2.code() (stdo_list, stde_list) = (re.split("\n", stdo), re.split("\n", stde)) logdict = { "cmdstr": cmdstr, "stdo": stdo, "stde": stde, "retcode": retcode, "orders": ["cmdstr", "stdo", "stde", "retcode"], } if retcode !=0: self._logger.dictlog(width=8, level="error", **logdict) return else: self._logger.dictlog(width=8, level="info", **logdict) resobj = {} for line in stdo_list: line = str(line).strip() ln_ary = re.split(":", line) if ln_ary and len(ln_ary) != 2:continue dst_key = str(ln_ary[0]).strip() dst_val = str(ln_ary[1]).strip() resobj[dst_key] = dst_val self._file_cache.save_to_cache_file(resobj) return resobj.get(key, "") def main(passwd_file): try: usage = "usage: %prog [options]\ngGet Redis Stat" parser = OptionParser(usage) parser.add_option("-l", "--list", action="store_true", dest="is_list", default=False, help="if list all redis port") parser.add_option("-k", "--key", action="store", dest="key", type="string", default='blocked_clients', help="execute 'redis-cli info' to see more infomation") parser.add_option("-a", "--password", action="store", dest="password", type="string", default=None, help="the password for redis-server") parser.add_option("-p", "--port", action="store", dest="port", type="int", default=6379, help="the port for redis-server, for example: 6379") parser.add_option("-d", "--debug", action="store_true", dest="debug", default=False, help="if output all") parser.add_option("-f", "--force", action="store_true", dest="force", default=False, help="if force to parse command oupout") (options, args) = parser.parse_args() if 1 >= len(sys.argv): parser.print_help() return password = options.password if not password and os.path.exists(passwd_file): fd = open(passwd_file, 'r') lines = fd.readlines() fd.close() for line in lines: line = str(line).strip() if line == "" or line.startswith("#"):continue ln_ary = re.split(r"[|;|,|\s]+", line) fport = int(ln_ary[0]) if fport == int(options.port): password = ln_ary[1] break logpath = "/tmp/zabbix_redis_info.log" redis_ins = Redis(logpath, password=password, port=options.port, debug=options.debug) if options.is_list == True: print redis_ins.get_redis_port_list() return print redis_ins.get_item(options.key, port=options.port, force=options.force) except Exception as expt: import traceback tb = traceback.format_exc() print tb if __name__ == '__main__': # redis密码存放的文件 redis_passwd_file = "/usr/local/zabbix-agent/scripts/.redis.passwd" main(redis_passwd_file) 3)zabbix_agent.conf配置文件中需包含如下配置,注意脚本的位置: ## qiueer redis-stat for discovery UserParameter=custom.redis.discovery, python /usr/local/zabbix-agent/scripts/redis.py --list UserParameter=custom.redis.item[*],python /usr/local/zabbix-agent/scripts/redis.py -p $1 -k $2 4)配置完成后,重启zabbix agent,例如: service zabbix-agent restart 5)在zabbix前端导入模板: Qiueer-Template: Business-Redis-Discovery.xml 6)如果连接Redis需要账号密码,则需要配置端口、密码的对应关系,配置文件路径如下: /usr/local/zabbix-agent/scripts/.redis.passwd(注意需给zabbix用户赋予只读权限) 内容类似如下,格式是: 端口 密码 6379 42s#qdd [root@sdtw02 scripts]# cat .redis.passwd 6379 cX8RvegIER0S 6380 YwvmTGqD5YpP 6381 8RJ4QYXen9Q 6382 If4krh6x7cj2 PS: 2)、3)步骤中的路径/usr/local/zabbix-agent/scripts/根据实际情况修改 使用示例: 1)获取redis端口列表: [root@sdtw02 scripts]# python redis.py -l /usr/local/zabbix-agent/scripts/qiueer/python/slog.py:53: DeprecationWarning: object.__new__() takes no parameters slog.__logger = object.__new__(cls, *args, **kwd) { "data":[ { "{#REDIS_PORT}":6379 }, { "{#REDIS_PORT}":6380 }, { "{#REDIS_PORT}":6381 }, { "{#REDIS_PORT}":6382 } ] } 2)采集数据 [root@sdtw02 scripts]# python redis.py -p 6379 -k used_memory_rss 7819264 其他: ## 使用如下命令解析其输出来获取redis的端口 sudo netstat -nlpt | grep 'redis' | awk '{print $4}'|awk -F: '{print $2}' 几个坑: ①如果不能正常获取采集的数据,可以检查日志: /tmp/zabbix_redis_info.log [2018-01-03 13:41:22] ERROR cmdstr: /usr/local/bin/redis-cli -h 127.0.0.1 -a 3i0uULCGcX8RvegIER0S -p 6379 info stdo: stde: Could not connect to Redis at 127.0.0.1:6379: Connection refused retcode: 1 从上面的日志可以看到连接被拒绝,我们自己手动连接也是一样,是因为redis的监听绑定在了内网ip上面,而不是绑定在了127.0.0.1上,修改即可 # sed -i 's#127.0.0.1#172.16.155.21#g' redis.py ②我们使用的是默认的python2.6.6可能执行redis.py时或出现一些警告信息,如下,这样无法正常获取数据 [root@sdtw02 scripts]# python redis.py -p 6379 -k used_memory_rss /usr/local/zabbix-agent/scripts/qiueer/python/slog.py:53: DeprecationWarning: object.__new__() takes no parameters slog.__logger = object.__new__(cls, *args, **kwd) 修改 /usr/local/zabbix-agent/scripts/qiueer/python/slog.py 53行为如下即可: slog.__logger = object.__new__(cls) ③获取不到数据 在zabbix服务端调试: [root@u04zbx01 ~]# zabbix_get -s 1.1.1.1 -p 10050 -k "custom.redis.discovery" Traceback (most recent call last): File "/usr/local/zabbix-agent/scripts/qiueer/python/slog.py", line 81, in __init__ file_handler = RotatingFileHandler(self._filename, mode='a',maxBytes=self._size*1024*1024,backupCount=self._count) File "/usr/lib64/python2.6/logging/handlers.py", line 112, in __init__ BaseRotatingHandler.__init__(self, filename, mode, encoding, delay) File "/usr/lib64/python2.6/logging/handlers.py", line 64, in __init__ logging.FileHandler.__init__(self, filename, mode, encoding, delay) File "/usr/lib64/python2.6/logging/__init__.py", line 827, in __init__ StreamHandler.__init__(self, self._open()) File "/usr/lib64/python2.6/logging/__init__.py", line 846, in _open stream = open(self.baseFilename, self.mode) IOError: [Errno 13] Permission denied: '/tmp/zabbix_redis_info.log' { "data":[ { "{#REDIS_PORT}":6379 }, { "{#REDIS_PORT}":6380 }, { "{#REDIS_PORT}":6381 }, { "{#REDIS_PORT}":6382 } ] } 是因为zabbix用户没有log日志的权限,解决: chown -R zabbix.zabbix /tmp/zabbix_redis_info.log 继续调试: [root@u04zbx01 ~]# zabbix_get -s 1.1.1.1 -p 10050 -k custom.redis.item[6380,total_commands_processed] 6869 [root@u04zbx01 ~]# zabbix_get -s 1.1.1.1 -p 10050 -k custom.redis.item[6381,total_commands_processed] 14926408 [root@u04zbx01 ~]# zabbix_get -s 1.1.1.1 -p 10050 -k custom.redis.item[6382,total_commands_processed] 5 [root@u04zbx01 ~]# zabbix_get -s 1.1.1.1 -p 10050 -k custom.redis.item[6382,used_memory_peak] 822880 [root@u04zbx01 ~]# zabbix_get -s 1.1.1.1 -p 10050 -k custom.redis.item[6379,used_memory_peak] 1029192 [root@u04zbx01 ~]# zabbix_get -s 1.1.1.1 -p 10050 -k custom.redis.item[6380,used_memory_peak] 948128 [root@u04zbx01 ~]# zabbix_get -s 1.1.1.1 -p 10050 -k custom.redis.item[6381,used_memory_peak] 1072568 [root@u04zbx01 ~]# zabbix_get -s 1.1.1.1 -p 10050 -k custom.redis.item[6382,used_memory_peak] 822880