服务器监控异常重启服务并发送邮件
#!/bin/sh
# func:自动监控tomcat脚本并且执行重启操作
# 获取tomcat进程ID(其中[grep -w '.....']中的.....需要替换为实际部署的tomcat文件夹名,如下)
TomcatID=$(ps -ef |grep tomcat |grep -w '/home/tomcat'|grep -v 'grep'|awk '{print $2}')
# 获取同一个tomcat重复启动个数
TomcatCount=$( ps -ef |grep tomcat |grep -w '/home/tomcat'|grep -v 'grep'|awk '{print $2}' |wc -l)
# tomcat启动程序(这里注意tomcat实际安装的路径)
StartTomcat=/home/tomcat/bin/startup.sh
TomcatCache=/home/tomcat/work
#定义要监控的页面地址
WebUrl=http://127.0.0.1
mailAddress=diguanglin@moko.cc
#日志输出
GetPageInfo=/home/tmp/tomcat_oip_back_visit.info
TomcatMonitorLog=/home/tmp/tomcat_oip_back_monitor.log
Monitor()
{
echo "[info]开始监控tomcat...[$(date +'%F %H:%M:%S')]"
if [ $TomcatCount -gt 1 ];then #这里判断Tomcat进程是否有重复启动,有重复启动则全部干掉
ps -ef |grep tomcat |grep -w '/home/tomcat'|grep -v 'grep'|awk '{print $2}' | xargs kill -9
echo "同一tomcat开启开启 $TomcatCount 个进程,统统kill掉"
sleep 5
else
if [ $TomcatID ];then #这里判断Tomcat进程是否存在
echo "[info]当前tomcat进程ID为:$TomcatID,继续检测页面..."
# 检测是否启动成功(成功的话页面会返回状态"200"),100秒无响应就不等待了
TomcatServiceCode=$(curl -s -o $GetPageInfo -m 100 --connect-timeout 100 $WebUrl -w %{http_code})
if [ $TomcatServiceCode -eq 200 ];then
echo "[info]页面返回码为$TomcatServiceCode,tomcat启动成功,测试页面正常"
else
echo "[error]tomcat页面出错,请注意...状态码为$TomcatServiceCode,错误日志已输出到$GetPageInfo" | mail -v -s "系统报警" $mailAddress
echo "[error]页面访问出错,开始重启tomcat"
kill -9 $TomcatID # 杀掉原tomcat进程
sleep 5
rm -rf $TomcatCache # 清理tomcat缓存
$StartTomcat
fi
else
echo "[error]tomcat进程不存在!tomcat开始自动重启..."
echo "[info]$StartTomcat,请稍候..."
rm -rf $TomcatCache
$StartTomcat
fi
fi
echo "--------------------------"
}
Monitor>>$TomcatMonitorLog