常用故障排查监控shell脚本
脚本执行花费时间毫秒级别
#!/bin/bash #计算脚本执行时间 #开始 纳秒时间戳 start=$(date +%s%N) echo 'start nanosecond:' $start #脚本内容 sleep 0.001; #结束 纳秒时间戳 end=$(date +%s%N) echo 'end nanosecond:' $end #计算时间差毫秒 需要除以1000000 take=$(( (end - start) /1000000 )) #输出计算结果 echo Time taken to execute commands is ${take} millisecond .
ping 延迟/超时 临时排查 企微/协作报警
#!/bin/bash #ping_monitor.sh IP_ADDRESS=$1 kdoc_webhook=$3 if [[ -n "$2" ]]; then PING_TIME_OUT_SECOND=$2 else PING_TIME_OUT_SECOND=2 fi if [ -n "$IP_ADDRESS" ] ; then while : do PING_OK=`ping -c 1 -W ${PING_TIME_OUT_SECOND} $IP_ADDRESS | grep "time=" ` #NOWTIME=`date +"%Y-%m-%d--%H:%M:%S"` > /dev/null #报:(standard_in) 1: syntax error if [ 0 -eq $? ]; then echo "$PING_OK `date +"%Y-%m-%d--%H:%M:%S"`" >> PING_${IP_ADDRESS}_OK.log ms=`echo $PING_OK | awk -F '=| ' '{print $11}'` if [[ -n "$2" ]] && [[ -n "$3" ]];then if [[ `echo "$ms > $2" | bc` -eq 1 ]]; then if [[ "$3" =~ "xz.wps.cn" ]]; then curl -X POST -H "Content-Type: application/json" -H "Cache-Control: no-cache" -d '{"msgtype": "markdown","markdown": {"text": "<font size=\"16\" color=\"red\">'`date +"%Y-%m-%d--%H:%M:%S"`' '`hostname`' ping '$IP_ADDRESS' 延迟'$ms'ms</font>\n\r\n"}}' ${kdoc_webhook} elif [[ "$3" =~ "weixin.qq.com" ]]; then curl -X POST -H "Content-Type: application/json" -H "Cache-Control: no-cache" -d '{"msgtype": "markdown","markdown": {"content": "<font size=\"16\" color=\"red\">'`date +"%Y-%m-%d--%H:%M:%S"`' '`hostname`' ping '$IP_ADDRESS' 延迟'$ms'ms</font>\n\r\n"}}' ${kdoc_webhook} fi fi fi else echo "PING $IP_ADDRESS 2seconds TMOUT... `date`" >> PING_${IP_ADDRESS}_TMOUT.log if [[ -n "$2" ]] && [[ -n "$3" ]]; then if [[ "$3" =~ "xz.wps.cn" ]]; then curl -X POST -H "Content-Type: application/json" -H "Cache-Control: no-cache" -d '{"msgtype": "markdown","markdown": {"text": "<font size=\"16\" color=\"red\">'`date +"%Y-%m-%d--%H:%M:%S"`' '`hostname`' ping '$IP_ADDRESS' ping超时('${PING_TIME_OUT_SECOND}'S)</font>\n\r\n"}}' ${kdoc_webhook} elif [[ "$3" =~ "weixin.qq.com" ]]; then curl -X POST -H "Content-Type: application/json" -H "Cache-Control: no-cache" -d '{"msgtype": "markdown","markdown": {"content": "<font size=\"16\" color=\"red\">'`date +"%Y-%m-%d--%H:%M:%S"`' '`hostname`' ping '$IP_ADDRESS' ping超时('${PING_TIME_OUT_SECOND}'S)</font>\n\r\n"}}' ${kdoc_webhook} fi fi fi sleep 1 done else echo "用法:$0 <IP or 域名> [超时秒数 协作/企微 webhook地址]" echo "监控日志请到当前目录下获取" fi
dig dns 解析耗时排查监控脚本
#!/bin/bash #DIG_monitor.sh IP_ADDRESS=$2 kdoc_webhook=$4 DOMAIN=$1 WARN_VALUE=$3 if [ -n "$IP_ADDRESS" ] && [ -n "$DOMAIN" ]; then while : do STRACE_DIG=`strace -c dig $DOMAIN @"$IP_ADDRESS" 2>&1` if [ 0 -eq $? ]; then NOWTIME=`date +"%Y-%m-%d--%H:%M:%S"` #报:(standard_in) 1: syntax error DIG_OK=`echo "$STRACE_DIG" | grep "Query time" ` DIG_RESU=`echo "$STRACE_DIG" | grep "IN"` echo "$NOWTIME $DIG_OK $DIG_RESU" >> DIG_"${IP_ADDRESS}"_OK.log ms=`echo "$DIG_OK" | awk '{print $4}'` #echo $ms"ms" if [[ -n "$3" ]] && [[ -n "$4" ]];then if [[ `echo "$ms > $WARN_VALUE" | bc` -eq 1 ]]; then # STRACE_DIG 延迟日志记录 echo "${NOWTIME}${STRACE_DIG}" >> DIG_"${IP_ADDRESS}"_OK_DELAY_STRACE.log if [[ "$kdoc_webhook" =~ "xz.wps.cn" ]]; then curl -X POST -H "Content-Type: application/json" -H "Cache-Control: no-cache" -d '{"msgtype": "markdown","markdown": {"text": "<font size=\"16\" color=\"red\">'$NOWTIME' '$IP_ADDRESS' DIG解析 '$DOMAIN' 耗时'$ms'ms</font>\n\r\n"}}' ${kdoc_webhook} elif [[ "$kdoc_webhook" =~ "weixin.qq.com" ]]; then curl -X POST -H "Content-Type: application/json" -H "Cache-Control: no-cache" -d '{"msgtype": "markdown","markdown": {"content": "<font size=\"16\" color=\"red\">'$NOWTIME' '$IP_ADDRESS' DIG解析 '$DOMAIN' 耗时'$ms'ms</font>\n\r\n"}}' ${kdoc_webhook} fi fi fi else NOWTIME=`date +"%Y-%m-%d--%H:%M:%S"` echo "$NOWTIME DIG $IP_ADDRESS 2seconds TMOUT... `date`" >> DIG__${IP_ADDRESS}_TMOUT.log if [[ -n "$3" ]] && [[ -n "/$4" ]]; then echo "${NOWTIME}${STRACE_DIG}" >> DIG__${IP_ADDRESS}_TMOUT_DELAY_STRACE.log if [[ "$kdoc_webhook" =~ "xz.wps.cn" ]]; then curl -X POST -H "Content-Type: application/json" -H "Cache-Control: no-cache" -d '{"msgtype": "markdown","markdown": {"text": "<font size=\"16\" color=\"red\">'$NOWTIME' '$IP_ADDRESS' DIG解析 '$DOMAIN' 超时(2000ms)</font>\n\r\n"}}' ${kdoc_webhook} elif [[ "$kdoc_webhook" =~ "weixin.qq.com" ]]; then curl -X POST -H "Content-Type: application/json" -H "Cache-Control: no-cache" -d '{"msgtype": "markdown","markdown": {"content": "<font size=\"16\" color=\"red\">'$NOWTIME' '$IP_ADDRESS' DIG解析 '$DOMAIN' 超时(2000ms)</font>\n\r\n"}}' ${kdoc_webhook} fi fi fi sleep 1 done else echo "用法:$0 <域名> DNS地址 [延迟毫秒数 协作/企微 webhook地址]" echo "监控日志请到当前目录下获取" fi
dns 解析耗时支持指定dns IP和端口
#!/bin/bash #DIG_monitor.sh IP_ADDRESS=$2 kdoc_webhook=$4 DOMAIN=$1 WARN_VALUE=$3 # 获取命令行传参 while getopts 'd:n:p:v:u:' OPT; do case $OPT in d) DOMAIN="$OPTARG";; n) IP_ADDRESS="$OPTARG";; p) PORT="$OPTARG";; v) WARN_VALUE="$OPTARG";; u) kdoc_webhook="$OPTARG";; ?) echo "Usage: `basename $0` <-d 域名> <-n dns服务器地址> [-v 报警毫秒数] [-p dns 端口] [-u webhook地址(协作/企微)]";; esac done if [[ -z "$PORT" ]]; then PORT=53 #echo "PORT 需要设置默认值" #else # echo "PORT 值$PORT" fi if [ -n "$IP_ADDRESS" ] && [ -n "$DOMAIN" ]; then while : do STRACE_DIG=`strace -c dig $DOMAIN @"$IP_ADDRESS" -p $PORT 2>&1` if [ 0 -eq $? ]; then NOWTIME=`date +"%Y-%m-%d--%H:%M:%S"` #报:(standard_in) 1: syntax error DIG_OK=`echo "$STRACE_DIG" | grep "Query time" ` DIG_RESU=`echo "$STRACE_DIG" | grep "IN"` echo "$NOWTIME $DIG_OK $DIG_RESU" >> DIG_${DOMAIN}_${IP_ADDRESS}_"$PORT"_OK.log ms=`echo "$DIG_OK" | awk '{print $4}'` #echo $ms"ms" if [[ -n "$WARN_VALUE" ]] && [[ -n "$kdoc_webhook" ]];then if [[ `echo "$ms > $WARN_VALUE" | bc` -eq 1 ]]; then # STRACE_DIG 延迟日志记录 echo "${NOWTIME}${STRACE_DIG}" >> DIG_"${DOMAIN}"_"${IP_ADDRESS}"_"$PORT"_OK_DELAY_STRACE.log if [[ "$kdoc_webhook" =~ "xz.wps.cn" ]]; then curl -X POST -H "Content-Type: application/json" -H "Cache-Control: no-cache" -d '{"msgtype": "markdown","markdown": {"text": "<font size=\"16\" color=\"red\">'$NOWTIME' '${IP_ADDRESS}端口:$PORT' DIG解析 '$DOMAIN' 耗时'$ms'ms</font>\n\r\n"}}' ${kdoc_webhook} elif [[ "$kdoc_webhook" =~ "weixin.qq.com" ]]; then curl -X POST -H "Content-Type: application/json" -H "Cache-Control: no-cache" -d '{"msgtype": "markdown","markdown": {"content": "<font size=\"16\" color=\"red\">'$NOWTIME' '${IP_ADDRESS}端口:$PORT' DIG解析 '$DOMAIN' 耗时'$ms'ms</font>\n\r\n"}}' ${kdoc_webhook} fi fi fi else NOWTIME=`date +"%Y-%m-%d--%H:%M:%S"` echo "$NOWTIME DIG $IP_ADDRESS 2seconds TMOUT... `date`" >> DIG_"${DOMAIN}"_"${IP_ADDRESS}"_"$PORT"_TMOUT.log if [[ -n "$WARN_VALUE" ]] && [[ -n "$kdoc_webhook" ]]; then echo "${NOWTIME}${STRACE_DIG}" >> DIG_${DOMAIN}_"${IP_ADDRESS}"_"$PORT"_TMOUT_DELAY_STRACE.log if [[ "$kdoc_webhook" =~ "xz.wps.cn" ]]; then curl -X POST -H "Content-Type: application/json" -H "Cache-Control: no-cache" -d '{"msgtype": "markdown","markdown": {"text": "<font size=\"16\" color=\"red\">'$NOWTIME' '${IP_ADDRESS}端口:$PORT' DIG解析 '$DOMAIN' 超时(2000ms)</font>\n\r\n"}}' ${kdoc_webhook} elif [[ "$kdoc_webhook" =~ "weixin.qq.com" ]]; then curl -X POST -H "Content-Type: application/json" -H "Cache-Control: no-cache" -d '{"msgtype": "markdown","markdown": {"content": "<font size=\"16\" color=\"red\">'$NOWTIME' '${IP_ADDRESS}端口:$PORT' DIG解析 '$DOMAIN' 超时(2000ms)</font>\n\r\n"}}' ${kdoc_webhook} fi fi fi sleep 1 done else echo "Usage: `basename $0` <-d 域名> <-n dns服务器地址> [-v 报警毫秒数] [-p dns 端口] [-u webhook地址(协作/企微)]" echo "监控日志请到当前目录下获取" fi
tcp端口持续监测
#!/bin/bash # 持续监测端口监听状态 # tcping 二进制文件:https://ks3-cn-beijing.ksyun.com/zhangmingda/tcping # 放到当前脚本同级目录 chmod +x tcping 给与权限 # 比如脚本名称为tcping-monitor.sh ,为本脚本授权:chmod +x tcping-monitor.sh # 执行本脚本监控放到后台,比如监控8.8.8.8 的80端口,定期关注日志: nohup ./tcpng-monitor.sh 8.8.8.8 80 & GUEST_IP=$1 GUEST_PORT=$2 LOGFILE=$1_TcpPort$2_monitor.log if [ -n "$GUEST_IP" ] && [ -n "$GUEST_PORT" ]; then while : do RESO=`./tcping -t 2 $GUEST_IP $GUEST_PORT` # -t 超时时间S if [ $? -eq 0 ]; then sleep 1 fi echo `date` $RESO >> $LOGFILE done else echo "用法:$0 <IP or 域名> <端口>" echo "监控日志请到当前目录下获取" fi
curl 持续监控返回值
#!/bin/bash # #Author:zhangmingda #date:20191021 #use:持续监控https/http连接请求状态 ######################################################### logfile='curl_monitor.log' if [ ! -f ${logfile} ];then touch $logfile fi #日志文件 echo;echo "curl_log result from $1 " |tee -a ${logfile} ######################################################### echo ' DNS_OK: TCP_OK: DATA_START: TOTAL_TIME: http_code:' | tee -a ${logfile} while true ; do tid="$(date '+%F %H:%M:%S')" ; url=$1 ; curl -m 3 -4 -o /dev/null -s -w "curl_tid:${tid} %{time_namelookup} %{time_connect} %{time_starttransfer} %{time_total} code:%{h ttp_code} \n" \ ${url} | tee -a $logfile ; sleep 1; done
持续监控https/http连接请求状态
#!/bin/bash
#
#Author:zhangmingda
#date:2019-12-30
#use:持续监控https/http连接请求状态
#########################################################
logfile='curl_monitor.log'
if [ ! -f ${logfile} ];then
touch $logfile
fi #日志文件
echo;echo "curl_log result from $1 " |tee -a ${logfile}
#########################################################
echo ' DNS_OK: TCP_OK: DATA_START: TOTAL_TIME: http_code:' | tee -a ${logfile}
while true ;
do
tid="$(date '+%F %H:%M:%S')" ;
url=$1 ;
curl -m 3 -4 -o /dev/null -s -w "curl_tid:${tid} %{time_namelookup} %{time_connect} %{time_starttransfer} %{time_total} \
code:%{http_code} http_connect:%{http_connect} remote_tcp/ip:%{remote_ip}:%{remote_port} url_effective:%{url_effective} \
speed_download:%{speed_download} time_redirect:%{time_redirect}\n" \
${url} | tee -a $logfile ;
sleep 1;
done
posted on 2019-12-11 10:20 zhangmingda 阅读(667) 评论(0) 编辑 收藏 举报