zabbix批量监控域名下nginx的访问50x状态码数量

背景:
购物车相关的站点某些页面经常出现502,如果超过一些阈值则需要报警给管理员知道

1.自动发现脚本的编写
# vim /usr/local/zabbix_agents_3.2.0/scripts/store_nginx_code.sh 

#!/bin/bash 
# function:monitor store nginx access error code

web_nginx_code_discovery () {
WEB_DOMAIN=($(cat  /usr/local/zabbix_agents_3.2.0/scripts/web_nginx_code_site.txt|grep -v "^#"))
        printf '{\n'
        printf '\t"data":[\n'
for((i=0;i<${#WEB_DOMAIN[@]};++i))
    {
    num=$(echo $((${#WEB_DOMAIN[@]}-1)))
        if [ "$i" != ${num} ];
            then
        printf "\t\t{ \n"
        printf "\t\t\t\"{#SITENAME}\":\"${WEB_DOMAIN[$i]}\"},\n"
            else
                printf  "\t\t{ \n"
                printf  "\t\t\t\"{#SITENAME}\":\"${WEB_DOMAIN[$num]}\"}]}\n"
        fi
    }
}

# 统计nginx access log中的50x代码个数
error_code_count () {
    web_site=$1
    if [ ${web_site} == 'store.chinasoft.jp' ];then
        /usr/bin/cat /data/www/logs/nginx_log/access/${web_site}.access.log |awk '{print $1" "$10" "$11}'|grep 'HTTP/1.1" 50'|uniq |wc -l
        exit 0
    fi
    tail -n 1000 /data/www/logs/nginx_log/access/${web_site}_access.log |awk '{print $1" "$10" "$11}'|grep 'HTTP/1.1" 50'|uniq |wc -l
}

error_code400_count () {
    web_site=$1
        tail -n 1000 /data/www/logs/nginx_log/access/${web_site}_access.log |awk '{print $1" "$10" "$11}'|grep 'HTTP/1.1" 40'|uniq |wc -l
}
 
case "$1" in
    web_nginx_code_discovery)
        web_nginx_code_discovery    
    ;;
    error_code_count)
        error_code_count $2
    ;;
    error_code400_count)
                error_code400_count $2
        ;;
*)
 
echo "Usage:$0 {web_nginx_code_discovery|error_code_count [URL]}"
;;
esac
# chmod +x /usr/local/zabbix_agents_3.2.0/scripts/store_nginx_code.sh # 需要监控的日志(按照站点分类) vim /usr/local/zabbix_agents_3.2.0/scripts、store_site.txt store.aa.com store.bb.net # 核心代码 [root@ws_store_web01:/usr/local/zabbix_agents_3.2.0/scripts]# cat error_status_count.sh #!/bin/bash # Functions to return nginx stats function main { web_site=$1 if [ ${web_site} == 'store.cc.jp' ];then /usr/bin/cat /data/www/logs/nginx_log/access/${web_site}.access.log |awk '{print $1" "$10" "$11}'|grep 'HTTP/1.1" 50'|uniq |wc -l exit 0 fi /usr/bin/cat /data/www/logs/nginx_log/access/${web_site}_access.log |awk '{print $1" "$10" "$11}'|grep 'HTTP/1.1" 50'|uniq |wc -l } # Run the requested function main $1 3.配置编写 vim /usr/local/zabbix_agents_3.2.0/conf/zabbix_agentd/store_domain_discovery.conf UserParameter=store.domain.discovery,/usr/local/zabbix_agents_3.2.0/scripts/store_nginx_code.sh store_domain_discovery UserParameter=store.domain.code[*],/usr/local/zabbix_agents_3.2.0/scripts/store_nginx_code.sh error_code_count $1 模板关键配置:



zabbix模板:
<?xml version="1.0" encoding="UTF-8"?> <zabbix_export> <version>3.2</version> <date>2019-01-26T08:48:38Z</date> <groups> <group> <name>Templates</name> </group> </groups> <templates> <template> <template>store nginx access error_code count</template> <name>store nginx access error_code count</name> <description/> <groups> <group> <name>Templates</name> </group> </groups> <applications/> <items/> <discovery_rules> <discovery_rule> <name>store.domain.discovery</name> <type>0</type> <snmp_community/> <snmp_oid/> <key>store.domain.discovery</key> <delay>180</delay> <status>0</status> <allowed_hosts/> <snmpv3_contextname/> <snmpv3_securityname/> <snmpv3_securitylevel>0</snmpv3_securitylevel> <snmpv3_authprotocol>0</snmpv3_authprotocol> <snmpv3_authpassphrase/> <snmpv3_privprotocol>0</snmpv3_privprotocol> <snmpv3_privpassphrase/> <delay_flex/> <params/> <ipmi_sensor/> <authtype>0</authtype> <username/> <password/> <publickey/> <privatekey/> <port/> <filter> <evaltype>0</evaltype> <formula/> <conditions/> </filter> <lifetime>30</lifetime> <description/> <item_prototypes> <item_prototype> <name>store.domain.code ON $1</name> <type>0</type> <snmp_community/> <multiplier>0</multiplier> <snmp_oid/> <key>store.domain.code[{#SITENAME},]</key> <delay>30</delay> <history>30</history> <trends>90</trends> <status>0</status> <value_type>3</value_type> <allowed_hosts/> <units/> <delta>0</delta> <snmpv3_contextname/> <snmpv3_securityname/> <snmpv3_securitylevel>0</snmpv3_securitylevel> <snmpv3_authprotocol>0</snmpv3_authprotocol> <snmpv3_authpassphrase/> <snmpv3_privprotocol>0</snmpv3_privprotocol> <snmpv3_privpassphrase/> <formula>1</formula> <delay_flex/> <params/> <ipmi_sensor/> <data_type>0</data_type> <authtype>0</authtype> <username/> <password/> <publickey/> <privatekey/> <port/> <description/> <inventory_link>0</inventory_link> <applications/> <valuemap/> <logtimefmt/> <application_prototypes/> </item_prototype> </item_prototypes> <trigger_prototypes> <trigger_prototype> <expression>{store nginx access error_code count:store.domain.code[{#SITENAME},].max(#2)}&gt;10</expression> <recovery_mode>0</recovery_mode> <recovery_expression/> <name>{#SITENAME} nginx error code is great than 10</name> <correlation_mode>0</correlation_mode> <correlation_tag/> <url/> <status>0</status> <priority>0</priority> <description/> <type>0</type> <manual_close>0</manual_close> <dependencies/> <tags/> </trigger_prototype> </trigger_prototypes> <graph_prototypes/> <host_prototypes/> </discovery_rule> </discovery_rules> <httptests/> <macros/> <templates/> <screens/> </template> </templates> </zabbix_export>

 监控十分钟内的50x错误,脚本升级记录

#!/bin/bash 
# function:monitor store nginx access error code

store_domain_discovery () { 
WEB_DOMAIN=($(cat  /usr/local/zabbix_agents_3.2.0/scripts/store_site.txt|grep -v "^#")) 
        printf '{\n' 
        printf '\t"data":[\n' 
for((i=0;i<${#WEB_DOMAIN[@]};++i)) 
    { 
    num=$(echo $((${#WEB_DOMAIN[@]}-1))) 
        if [ "$i" != ${num} ]; 
            then 
        printf "\t\t{ \n" 
        printf "\t\t\t\"{#SITENAME}\":\"${WEB_DOMAIN[$i]}\"},\n" 
            else 
                printf  "\t\t{ \n" 
                printf  "\t\t\t\"{#SITENAME}\":\"${WEB_DOMAIN[$num]}\"}]}\n" 
        fi 
    } 
} 

# 统计nginx access log中的50x代码个数
error_code_count () { 
    web_site=$1
    if [ ${web_site} == 'store.chinasoft.jp' ];then
        /usr/bin/cat /data/www/logs/nginx_log/access/${web_site}.access.log |awk '{print $1" "$10" "$11}'|grep 'HTTP/1.1" 50'|uniq |wc -l
        exit 0
    fi
    /usr/bin/cat /data/www/logs/nginx_log/access/${web_site}_access.log |awk '{print $1" "$10" "$11}'|grep 'HTTP/1.1" 50'|uniq |wc -l
} 

last10_mins_error_code_count () { 
    web_site=$1
    if [ ${web_site} == 'store.chinasoft.jp' ];then
        /usr/bin/cat /data/www/logs/nginx_log/access/${web_site}.access.log |awk '{print $1" "$10" "$11}'|grep 'HTTP/1.1" 50'|uniq |wc -l
        exit 0
    fi
    #/usr/bin/cat /data/www/logs/nginx_log/access/${web_site}_access.log |awk '{print $1" "$10" "$11}'|grep 'HTTP/1.1" 50'|uniq |wc -l
    # 统计最近 10 分钟的 50x 错误
    /usr/bin/tac /data/www/logs/nginx_log/access/${web_site}_access.log| awk 'BEGIN{ "date -d \"-10 minute\" +\"%H:%M:%S\"" | getline min10ago } { if (substr($4, 14) > min10ago) print $0;else exit }' | tac| awk '{print $1" "$10" "$11}' | grep 'HTTP/
1.1" 50'|uniq|wc -l
} 

 
case "$1" in
    store_domain_discovery) 
        store_domain_discovery 
    ;;
    error_code_count) 
        error_code_count $2
    ;; 
        last10_mins_error_code_count)
                last10_mins_error_code_count $2
        ;;
*)
 
echo "Usage:$0 {store_web_discovery|error_code_count|last10_mins_error_code_count|last10_mins_error_msg [URL]}" 
;; 
esac

监控项

UserParameter=store.domain.discovery,/usr/local/zabbix_agents_3.2.0/scripts/store_nginx_code.sh store_domain_discovery
UserParameter=store.domain.code[*],/usr/local/zabbix_agents_3.2.0/scripts/store_nginx_code.sh error_code_count $1
UserParameter=store.domain.10mins.code[*],/usr/local/zabbix_agents_3.2.0/scripts/store_nginx_code.sh last10_mins_error_code_count $1

 

posted @ 2019-01-26 17:07  reblue520  阅读(902)  评论(0编辑  收藏  举报