Linux服务器定时健康检查,发生故障自动微信告警
此脚本适用于于各种Linux环境,可以实现各种监控项目,可自定义阀值,实现不同监控效果
已在原有脚本基础上做了简化,提取了主要功能
目前实现的有:
1、磁盘监控
2、内存监控
3、cpu负荷监控
4、进程数监控
5、iptables 状态检查(若防火墙未开启,启动防火墙)
6、多端口检查
其中若一项失败,微信提醒对应失败的监控项
脚本如下所示:
#!/bin/bash source /etc/profile Echo_Colour(){ echo -e "[\033[$1;1m$2\033[0m]" } Print_Format(){ printf "|%-12s|%15s|%10s|\n" "$1" "$2" "$3" } Print_Format2(){ printf "+%-12s+%15s+%10s+\n" "------------" "---------------" "----------" } Print_Select(){ Num1="$1" Num2="$2" if [ "`echo "$Num1 > $Num2"|bc`" == "1" ];then Print_Format "$3" "$4" "$5" fi } Output_(){ TrueFalse=$1 PrintVar=$2 PrintResult=$3 if [[ -z $TrueFalse && -n "$PrintVar" ]];then PrintVar=`printf "%-45s" "${PrintVar}"` echo -n -e "\033[32;49;1m[`date +%F\ %T`]\t${PrintVar}\033[39;49;0m" elif [[ -n $TrueFalse && "$TrueFalse" == "0" ]];then Echo_Colour "32" "$PrintResult" elif [[ -n $TrueFalse && "$TrueFalse" == "1" ]];then Echo_Colour "31" "$PrintResult" else Echo_Colour "31" "syntax error";exit 1 fi } Output_Select(){ Num1="$1" Num2="$2" PrintTrue="$3" PrintFalse="$4" RetrunExit="$5" if [ "$Num1" == "$Num2" ];then Output_ "0" "" "$PrintTrue" else Output_ "1" "" "$PrintFalse" $RetrunExit 1 fi } weixin(){ #CropID 企业Id #Secret 管理组的凭证密钥 CropID="wx80179d3a3eb67***" Secret="ZyqFs4qfUiXcz8plHFbhCWkF3JEjj7vASkZjs8YTRqKxq1yAx-U46foyNXNKz2qw" GURL="https://qyapi.weixin.qq.com/cgi-bin/gettoken?corpid=$CropID&corpsecret=$Secret" #AccessToken是企业号的全局唯一票据,调用接口时需携带AccessToken Gtoken=$(/usr/bin/curl -s -G $GURL | awk -F\" '{print $4}') PURL="https://qyapi.weixin.qq.com/cgi-bin/message/send?access_token=$Gtoken" Content=`cat $TMP` curl -l -H "Content-type: application/json" -X POST -d "{\"touser\":\"@all\",\"msgtype\":\"text\",\"toparty\":\"1\",\"agentid\":\"1\",\"text\":{\"content\": \"Server Check Monitor:\n$Content\"}, \"safe\":\"0\"}" $PURL &>/tmp/weixin.log } server_check(){ Cpucore="`cat /proc/cpuinfo | grep -c processor`" Cpuload="`uptime|awk -F, '{print $(NF-1)}'|tr -d " "`" Cpuidle="`vmstat|awk '/[0-9]+/{print $(NF-2)}'`" MemTotal="`free -m|awk '/Mem:/{print $2}'`" MemUse="`free -m|awk '/-\/+/{print $3}'`" MemFree="`echo "scale=2;($(($MemTotal-$MemUse))) / $MemTotal*100"|bc|cut -d. -f1`" SwapTotal="`free -m|awk '/Swap:/{print $2}'`" SwapUse="`free -m|awk '/Swap/{print $3}'`" SwapFree="`echo "scale=2;($(($SwapTotal-$SwapUse))) / $SwapTotal*100"|bc|cut -d. -f1`" DiskUse1="`df -h|awk '/\/$/{print $(NF-3)}'`" DiskUse1Free="`df -h|awk '/\/$/{print $(NF-1)}'|tr -d "%"`" DiskUse1Free="`echo "scale=2;100 - $DiskUse1Free"|bc`" DiskUse2="`df -h|awk '/\/data$/{print $(NF-3)}'`" DiskUse2Free="`df -h|awk '/\/data$/{print $(NF-1)}'|tr -d "%"`" DiskUse2Free="`echo "scale=2;100 - $DiskUse2Free"|bc`" eth0Link="`ethtool eth0|awk '/Link/{print $NF}'`" eth0Speed="`ethtool eth0|awk '/Speed/{print $NF}'`" eth0Duplex="`ethtool eth0|awk '/Duplex/{print $NF}'`" eth1Link="`ethtool eth1|awk '/Link/{print $NF}'`" eth1Speed="`ethtool eth1|awk '/Speed/{print $NF}'`" eth1Duplex="`ethtool eth1|awk '/Duplex/{print $NF}'`" if `/sbin/iptables -L -n|grep -q "Chain INPUT (policy DROP)"` ;then Iptables=Yes Iptables_bc=1 else /bin/sh /data/shelltools/web_iptable.sh >>$LogFile 2>&1 if `/sbin/iptables -L -n|grep -q "Chain INPUT (policy DROP)"`;then Iptables=Yes Iptables_bc=1 else Iptables=No Iptables_bc=0 fi fi } Game_Check(){ GameOnLineNum=`netstat -ntp|awk '/ESTABLISHED/{print $4}'|grep -P -c ":9200|:9300"` GameJavaProNum="`jps |grep -c "Server"`" GameTomcatProNum="`jps |grep -c Bootstrap`" GameNginxProNum="`ps -ef |grep -v grep|grep -c "nginx: master process"`" } Send_Warning(){ TMP=`mktemp` echo "${Site}:${IP} ERROR" >>$TMP 2>&1 Print_Format2 >>$TMP 2>&1 Print_Format "Site" "Name" "Warning" >>$TMP 2>&1 Print_Format2 >>$TMP 2>&1 Print_Select "$Cpuload" "$Cpucore" "$Site" "CPU Load" "$Cpuload" >>$TMP 2>&1 Print_Select "20" "$Cpuidle" "$Site" "CPU Idle" "$Cpuidle%" >>$TMP 2>&1 Print_Select "5" "$MemFree" "$Site" "Mem Idle" "$MemFree%" >>$TMP 2>&1 Print_Select "10" "$DiskUse1Free" "$Site" "/" "$DiskUse1Free%" >>$TMP 2>&1 Print_Select "10" "$DiskUse2Free" "$Site" "/data" "$DiskUse2Free%" >>$TMP 2>&1 Print_Select "4" "$GameJavaProNum" "$Site" "JavaPro" "$GameJavaProNum" >>$TMP 2>&1 Print_Select "1" "$GameTomcatProNum" "$Site" "TomcatPro" "$GameTomcatProNum" >>$TMP 2>&1 Print_Select "1" "$GameNginxProNum" "$Site" "NginxPro" "$GameNginxProNum" >>$TMP 2>&1 Print_Select "1" "$Iptables_bc" "$Site" "iptables" "$Iptables_bc" >>$TMP 2>&1 nc -nvz -w 2 $IP 80 >>$LogFile 2>&1 Print_Select "$?" "0" "$Site" "$IP" "Web 80 Port Fail" >>$TMP 2>&1 nc -nvz -w 2 $IP 22 >>$LogFile 2>&1 Print_Select "$?" "0" "$Site" "$IP" "SSH 22 Port Fail" >>$TMP 2>&1 Print_Format2 sed -i '/ 2: parse error/d' $TMP if [ `cat $TMP|wc -l` -gt 4 ];then cd /tmp/ weixin fi #cat $TMP rm -f $TMP } main() { LogFile=/root/check.log Site=`hostname` IP=`ifconfig eth0|grep "inet addr"|awk -F: '{print $2}'|awk '{print $1}'` Alarm=`awk -F":" '/alarm/{print $2}' /root/config |sed 's/ //g'` server_check >>/dev/null Game_Check >>/dev/null if [ $Alarm = ON ];then Send_Warning fi } main #取消微信告警 #echo alarm:OFF > /root/config
需要微信告警执行:
#echo alarm:ON > /root/config 【此为微信告警开关】
效果图如下(Java进程低于5个、80端口不通,微信告警):
Learn how to learn~~