zabbix批量监控域名下nginx的访问50x状态码数量
背景: 购物车相关的站点某些页面经常出现502,如果超过一些阈值则需要报警给管理员知道 1.自动发现脚本的编写 # vim /usr/local/zabbix_agents_3.2.0/scripts/store_nginx_code.sh #!/bin/bash
# function:monitor store nginx access error code
web_nginx_code_discovery () {
WEB_DOMAIN=($(cat /usr/local/zabbix_agents_3.2.0/scripts/web_nginx_code_site.txt|grep -v "^#"))
printf '{\n'
printf '\t"data":[\n'
for((i=0;i<${#WEB_DOMAIN[@]};++i))
{
num=$(echo $((${#WEB_DOMAIN[@]}-1)))
if [ "$i" != ${num} ];
then
printf "\t\t{ \n"
printf "\t\t\t\"{#SITENAME}\":\"${WEB_DOMAIN[$i]}\"},\n"
else
printf "\t\t{ \n"
printf "\t\t\t\"{#SITENAME}\":\"${WEB_DOMAIN[$num]}\"}]}\n"
fi
}
}
# 统计nginx access log中的50x代码个数
error_code_count () {
web_site=$1
if [ ${web_site} == 'store.chinasoft.jp' ];then
/usr/bin/cat /data/www/logs/nginx_log/access/${web_site}.access.log |awk '{print $1" "$10" "$11}'|grep 'HTTP/1.1" 50'|uniq |wc -l
exit 0
fi
tail -n 1000 /data/www/logs/nginx_log/access/${web_site}_access.log |awk '{print $1" "$10" "$11}'|grep 'HTTP/1.1" 50'|uniq |wc -l
}
error_code400_count () {
web_site=$1
tail -n 1000 /data/www/logs/nginx_log/access/${web_site}_access.log |awk '{print $1" "$10" "$11}'|grep 'HTTP/1.1" 40'|uniq |wc -l
}
case "$1" in
web_nginx_code_discovery)
web_nginx_code_discovery
;;
error_code_count)
error_code_count $2
;;
error_code400_count)
error_code400_count $2
;;
*)
echo "Usage:$0 {web_nginx_code_discovery|error_code_count [URL]}"
;;
esac # chmod +x /usr/local/zabbix_agents_3.2.0/scripts/store_nginx_code.sh # 需要监控的日志(按照站点分类) vim /usr/local/zabbix_agents_3.2.0/scripts、store_site.txt store.aa.com store.bb.net # 核心代码 [root@ws_store_web01:/usr/local/zabbix_agents_3.2.0/scripts]# cat error_status_count.sh #!/bin/bash # Functions to return nginx stats function main { web_site=$1 if [ ${web_site} == 'store.cc.jp' ];then /usr/bin/cat /data/www/logs/nginx_log/access/${web_site}.access.log |awk '{print $1" "$10" "$11}'|grep 'HTTP/1.1" 50'|uniq |wc -l exit 0 fi /usr/bin/cat /data/www/logs/nginx_log/access/${web_site}_access.log |awk '{print $1" "$10" "$11}'|grep 'HTTP/1.1" 50'|uniq |wc -l } # Run the requested function main $1 3.配置编写 vim /usr/local/zabbix_agents_3.2.0/conf/zabbix_agentd/store_domain_discovery.conf UserParameter=store.domain.discovery,/usr/local/zabbix_agents_3.2.0/scripts/store_nginx_code.sh store_domain_discovery UserParameter=store.domain.code[*],/usr/local/zabbix_agents_3.2.0/scripts/store_nginx_code.sh error_code_count $1 模板关键配置:
zabbix模板: <?xml version="1.0" encoding="UTF-8"?> <zabbix_export> <version>3.2</version> <date>2019-01-26T08:48:38Z</date> <groups> <group> <name>Templates</name> </group> </groups> <templates> <template> <template>store nginx access error_code count</template> <name>store nginx access error_code count</name> <description/> <groups> <group> <name>Templates</name> </group> </groups> <applications/> <items/> <discovery_rules> <discovery_rule> <name>store.domain.discovery</name> <type>0</type> <snmp_community/> <snmp_oid/> <key>store.domain.discovery</key> <delay>180</delay> <status>0</status> <allowed_hosts/> <snmpv3_contextname/> <snmpv3_securityname/> <snmpv3_securitylevel>0</snmpv3_securitylevel> <snmpv3_authprotocol>0</snmpv3_authprotocol> <snmpv3_authpassphrase/> <snmpv3_privprotocol>0</snmpv3_privprotocol> <snmpv3_privpassphrase/> <delay_flex/> <params/> <ipmi_sensor/> <authtype>0</authtype> <username/> <password/> <publickey/> <privatekey/> <port/> <filter> <evaltype>0</evaltype> <formula/> <conditions/> </filter> <lifetime>30</lifetime> <description/> <item_prototypes> <item_prototype> <name>store.domain.code ON $1</name> <type>0</type> <snmp_community/> <multiplier>0</multiplier> <snmp_oid/> <key>store.domain.code[{#SITENAME},]</key> <delay>30</delay> <history>30</history> <trends>90</trends> <status>0</status> <value_type>3</value_type> <allowed_hosts/> <units/> <delta>0</delta> <snmpv3_contextname/> <snmpv3_securityname/> <snmpv3_securitylevel>0</snmpv3_securitylevel> <snmpv3_authprotocol>0</snmpv3_authprotocol> <snmpv3_authpassphrase/> <snmpv3_privprotocol>0</snmpv3_privprotocol> <snmpv3_privpassphrase/> <formula>1</formula> <delay_flex/> <params/> <ipmi_sensor/> <data_type>0</data_type> <authtype>0</authtype> <username/> <password/> <publickey/> <privatekey/> <port/> <description/> <inventory_link>0</inventory_link> <applications/> <valuemap/> <logtimefmt/> <application_prototypes/> </item_prototype> </item_prototypes> <trigger_prototypes> <trigger_prototype> <expression>{store nginx access error_code count:store.domain.code[{#SITENAME},].max(#2)}>10</expression> <recovery_mode>0</recovery_mode> <recovery_expression/> <name>{#SITENAME} nginx error code is great than 10</name> <correlation_mode>0</correlation_mode> <correlation_tag/> <url/> <status>0</status> <priority>0</priority> <description/> <type>0</type> <manual_close>0</manual_close> <dependencies/> <tags/> </trigger_prototype> </trigger_prototypes> <graph_prototypes/> <host_prototypes/> </discovery_rule> </discovery_rules> <httptests/> <macros/> <templates/> <screens/> </template> </templates> </zabbix_export>
监控十分钟内的50x错误,脚本升级记录
#!/bin/bash # function:monitor store nginx access error code store_domain_discovery () { WEB_DOMAIN=($(cat /usr/local/zabbix_agents_3.2.0/scripts/store_site.txt|grep -v "^#")) printf '{\n' printf '\t"data":[\n' for((i=0;i<${#WEB_DOMAIN[@]};++i)) { num=$(echo $((${#WEB_DOMAIN[@]}-1))) if [ "$i" != ${num} ]; then printf "\t\t{ \n" printf "\t\t\t\"{#SITENAME}\":\"${WEB_DOMAIN[$i]}\"},\n" else printf "\t\t{ \n" printf "\t\t\t\"{#SITENAME}\":\"${WEB_DOMAIN[$num]}\"}]}\n" fi } } # 统计nginx access log中的50x代码个数 error_code_count () { web_site=$1 if [ ${web_site} == 'store.chinasoft.jp' ];then /usr/bin/cat /data/www/logs/nginx_log/access/${web_site}.access.log |awk '{print $1" "$10" "$11}'|grep 'HTTP/1.1" 50'|uniq |wc -l exit 0 fi /usr/bin/cat /data/www/logs/nginx_log/access/${web_site}_access.log |awk '{print $1" "$10" "$11}'|grep 'HTTP/1.1" 50'|uniq |wc -l } last10_mins_error_code_count () { web_site=$1 if [ ${web_site} == 'store.chinasoft.jp' ];then /usr/bin/cat /data/www/logs/nginx_log/access/${web_site}.access.log |awk '{print $1" "$10" "$11}'|grep 'HTTP/1.1" 50'|uniq |wc -l exit 0 fi #/usr/bin/cat /data/www/logs/nginx_log/access/${web_site}_access.log |awk '{print $1" "$10" "$11}'|grep 'HTTP/1.1" 50'|uniq |wc -l # 统计最近 10 分钟的 50x 错误 /usr/bin/tac /data/www/logs/nginx_log/access/${web_site}_access.log| awk 'BEGIN{ "date -d \"-10 minute\" +\"%H:%M:%S\"" | getline min10ago } { if (substr($4, 14) > min10ago) print $0;else exit }' | tac| awk '{print $1" "$10" "$11}' | grep 'HTTP/ 1.1" 50'|uniq|wc -l } case "$1" in store_domain_discovery) store_domain_discovery ;; error_code_count) error_code_count $2 ;; last10_mins_error_code_count) last10_mins_error_code_count $2 ;; *) echo "Usage:$0 {store_web_discovery|error_code_count|last10_mins_error_code_count|last10_mins_error_msg [URL]}" ;; esac
监控项
UserParameter=store.domain.discovery,/usr/local/zabbix_agents_3.2.0/scripts/store_nginx_code.sh store_domain_discovery UserParameter=store.domain.code[*],/usr/local/zabbix_agents_3.2.0/scripts/store_nginx_code.sh error_code_count $1 UserParameter=store.domain.10mins.code[*],/usr/local/zabbix_agents_3.2.0/scripts/store_nginx_code.sh last10_mins_error_code_count $1