常用故障排查监控shell脚本


脚本执行花费时间毫秒级别

#!/bin/bash
#计算脚本执行时间

#开始 纳秒时间戳
start=$(date +%s%N)
echo 'start nanosecond:' $start

#脚本内容
sleep 0.001;

#结束 纳秒时间戳
end=$(date +%s%N)
echo 'end   nanosecond:' $end

#计算时间差毫秒 需要除以1000000
take=$(( (end - start) /1000000 ))

#输出计算结果
echo Time taken to execute commands is ${take} millisecond .

 ping 延迟/超时 临时排查  企微/协作报警

#!/bin/bash
#ping_monitor.sh

IP_ADDRESS=$1
kdoc_webhook=$3
if [[ -n "$2"  ]]; then
    PING_TIME_OUT_SECOND=$2
else
    PING_TIME_OUT_SECOND=2
fi

if [  -n "$IP_ADDRESS" ] ; then

    while :
    do
        PING_OK=`ping -c 1 -W ${PING_TIME_OUT_SECOND}  $IP_ADDRESS  | grep "time=" `
        #NOWTIME=`date +"%Y-%m-%d--%H:%M:%S"` > /dev/null  #报:(standard_in) 1: syntax error
        if [ 0 -eq $? ]; then
            echo "$PING_OK  `date +"%Y-%m-%d--%H:%M:%S"`" >> PING_${IP_ADDRESS}_OK.log
            ms=`echo $PING_OK | awk -F '=| ' '{print $11}'`
            if [[ -n "$2" ]] && [[ -n "$3" ]];then
                if [[ `echo "$ms > $2" | bc`  -eq 1 ]]; then 
                    if [[ "$3" =~ "xz.wps.cn" ]]; then 
                        curl -X POST -H "Content-Type: application/json" -H "Cache-Control: no-cache"  -d '{"msgtype": "markdown","markdown": {"text": "<font size=\"16\" color=\"red\">'`date +"%Y-%m-%d--%H:%M:%S"`' '`hostname`' ping '$IP_ADDRESS' 延迟'$ms'ms</font>\n\r\n"}}' ${kdoc_webhook}
                    elif  [[ "$3" =~ "weixin.qq.com" ]]; then
                        curl -X POST -H "Content-Type: application/json" -H "Cache-Control: no-cache"  -d '{"msgtype": "markdown","markdown": {"content": "<font size=\"16\" color=\"red\">'`date +"%Y-%m-%d--%H:%M:%S"`' '`hostname`' ping '$IP_ADDRESS' 延迟'$ms'ms</font>\n\r\n"}}' ${kdoc_webhook}
                    fi
                fi 
            fi

        else
            echo "PING $IP_ADDRESS 2seconds TMOUT...  `date`"  >> PING_${IP_ADDRESS}_TMOUT.log
            if [[ -n "$2"  ]] && [[ -n "$3" ]]; then
                    if [[ "$3" =~ "xz.wps.cn" ]]; then 
                        curl -X POST -H "Content-Type: application/json" -H "Cache-Control: no-cache"  -d '{"msgtype": "markdown","markdown": {"text": "<font size=\"16\" color=\"red\">'`date +"%Y-%m-%d--%H:%M:%S"`' '`hostname`' ping '$IP_ADDRESS' ping超时('${PING_TIME_OUT_SECOND}'S)</font>\n\r\n"}}' ${kdoc_webhook}
                    elif  [[ "$3" =~ "weixin.qq.com" ]]; then
                        curl -X POST -H "Content-Type: application/json" -H "Cache-Control: no-cache"  -d '{"msgtype": "markdown","markdown": {"content": "<font size=\"16\" color=\"red\">'`date +"%Y-%m-%d--%H:%M:%S"`' '`hostname`' ping '$IP_ADDRESS'  ping超时('${PING_TIME_OUT_SECOND}'S)</font>\n\r\n"}}' ${kdoc_webhook}
                    fi
            fi
        fi

        sleep 1
    done
else
    echo "用法:$0 <IP or 域名> [超时秒数 协作/企微 webhook地址]"
    echo "监控日志请到当前目录下获取" 
fi

 

 dig dns 解析耗时排查监控脚本

#!/bin/bash
#DIG_monitor.sh

IP_ADDRESS=$2
kdoc_webhook=$4
DOMAIN=$1
WARN_VALUE=$3

if [  -n "$IP_ADDRESS" ] &&  [  -n "$DOMAIN" ]; then

    while :
    do
        STRACE_DIG=`strace -c dig $DOMAIN @"$IP_ADDRESS" 2>&1`
        if [ 0 -eq $? ]; then
            NOWTIME=`date +"%Y-%m-%d--%H:%M:%S"`  #报:(standard_in) 1: syntax error
            DIG_OK=`echo "$STRACE_DIG"  | grep "Query time" `
            DIG_RESU=`echo "$STRACE_DIG"  | grep "IN"`
            echo "$NOWTIME $DIG_OK  $DIG_RESU" >> DIG_"${IP_ADDRESS}"_OK.log
            ms=`echo "$DIG_OK" | awk '{print $4}'`
            #echo $ms"ms"
            if [[ -n "$3" ]] && [[ -n "$4" ]];then
                if [[ `echo "$ms > $WARN_VALUE" | bc`  -eq 1 ]]; then 
                # STRACE_DIG 延迟日志记录
                    echo "${NOWTIME}${STRACE_DIG}" >> DIG_"${IP_ADDRESS}"_OK_DELAY_STRACE.log
                    if [[ "$kdoc_webhook" =~ "xz.wps.cn" ]]; then 
                        curl -X POST -H "Content-Type: application/json" -H "Cache-Control: no-cache"  -d '{"msgtype": "markdown","markdown": {"text": "<font size=\"16\" color=\"red\">'$NOWTIME' '$IP_ADDRESS' DIG解析 '$DOMAIN' 耗时'$ms'ms</font>\n\r\n"}}' ${kdoc_webhook}
                    elif  [[ "$kdoc_webhook" =~ "weixin.qq.com" ]]; then
                        curl -X POST -H "Content-Type: application/json" -H "Cache-Control: no-cache"  -d '{"msgtype": "markdown","markdown": {"content": "<font size=\"16\" color=\"red\">'$NOWTIME' '$IP_ADDRESS' DIG解析 '$DOMAIN' 耗时'$ms'ms</font>\n\r\n"}}' ${kdoc_webhook}
                    fi
                fi 
            fi

        else
            NOWTIME=`date +"%Y-%m-%d--%H:%M:%S"`
            echo "$NOWTIME DIG $IP_ADDRESS 2seconds TMOUT...  `date`"  >> DIG__${IP_ADDRESS}_TMOUT.log
            if [[ -n "$3"  ]] && [[ -n "/$4" ]]; then
                echo "${NOWTIME}${STRACE_DIG}" >> DIG__${IP_ADDRESS}_TMOUT_DELAY_STRACE.log
                if [[ "$kdoc_webhook" =~ "xz.wps.cn" ]]; then 
                    curl -X POST -H "Content-Type: application/json" -H "Cache-Control: no-cache"  -d '{"msgtype": "markdown","markdown": {"text": "<font size=\"16\" color=\"red\">'$NOWTIME' '$IP_ADDRESS' DIG解析 '$DOMAIN' 超时(2000ms)</font>\n\r\n"}}' ${kdoc_webhook}
                elif  [[ "$kdoc_webhook" =~ "weixin.qq.com" ]]; then
                    curl -X POST -H "Content-Type: application/json" -H "Cache-Control: no-cache"  -d '{"msgtype": "markdown","markdown": {"content": "<font size=\"16\" color=\"red\">'$NOWTIME' '$IP_ADDRESS' DIG解析 '$DOMAIN'  超时(2000ms)</font>\n\r\n"}}' ${kdoc_webhook}
                fi
            fi
        fi

        sleep 1
    done
else
    echo "用法:$0 <域名>  DNS地址 [延迟毫秒数 协作/企微 webhook地址]"
    echo "监控日志请到当前目录下获取" 
fi

 dns 解析耗时支持指定dns IP和端口

#!/bin/bash
#DIG_monitor.sh

IP_ADDRESS=$2
kdoc_webhook=$4
DOMAIN=$1
WARN_VALUE=$3

# 获取命令行传参
while getopts 'd:n:p:v:u:' OPT; do
    case $OPT in
        d)
            DOMAIN="$OPTARG";;
        n)
            IP_ADDRESS="$OPTARG";;
        p)
            PORT="$OPTARG";;
        v)
            WARN_VALUE="$OPTARG";;
        u)
            kdoc_webhook="$OPTARG";;
        ?)
            echo "Usage: `basename $0` <-d 域名> <-n dns服务器地址> [-v 报警毫秒数]  [-p dns 端口] [-u webhook地址(协作/企微)]";;
    esac
done
if [[ -z "$PORT" ]]; then
    PORT=53
    #echo "PORT 需要设置默认值"
#else
#    echo "PORT 值$PORT"
fi


if [  -n "$IP_ADDRESS" ] &&  [  -n "$DOMAIN" ]; then

    while :
    do
        STRACE_DIG=`strace -c dig $DOMAIN @"$IP_ADDRESS" -p $PORT 2>&1`
        if [ 0 -eq $? ]; then
            NOWTIME=`date +"%Y-%m-%d--%H:%M:%S"`  #报:(standard_in) 1: syntax error
            DIG_OK=`echo "$STRACE_DIG"  | grep "Query time" `
            DIG_RESU=`echo "$STRACE_DIG"  | grep "IN"`
            echo "$NOWTIME $DIG_OK  $DIG_RESU" >> DIG_${DOMAIN}_${IP_ADDRESS}_"$PORT"_OK.log
            ms=`echo "$DIG_OK" | awk '{print $4}'`
            #echo $ms"ms"
            if [[ -n "$WARN_VALUE" ]] && [[ -n "$kdoc_webhook" ]];then
                if [[ `echo "$ms > $WARN_VALUE" | bc`  -eq 1 ]]; then 
                # STRACE_DIG 延迟日志记录
                    echo "${NOWTIME}${STRACE_DIG}" >> DIG_"${DOMAIN}"_"${IP_ADDRESS}"_"$PORT"_OK_DELAY_STRACE.log
                    if [[ "$kdoc_webhook" =~ "xz.wps.cn" ]]; then 
                        curl -X POST -H "Content-Type: application/json" -H "Cache-Control: no-cache"  -d '{"msgtype": "markdown","markdown": {"text": "<font size=\"16\" color=\"red\">'$NOWTIME' '${IP_ADDRESS}端口:$PORT' DIG解析 '$DOMAIN' 耗时'$ms'ms</font>\n\r\n"}}' ${kdoc_webhook}
                    elif  [[ "$kdoc_webhook" =~ "weixin.qq.com" ]]; then
                        curl -X POST -H "Content-Type: application/json" -H "Cache-Control: no-cache"  -d '{"msgtype": "markdown","markdown": {"content": "<font size=\"16\" color=\"red\">'$NOWTIME' '${IP_ADDRESS}端口:$PORT' DIG解析 '$DOMAIN' 耗时'$ms'ms</font>\n\r\n"}}' ${kdoc_webhook}
                    fi
                fi 
            fi

        else
            NOWTIME=`date +"%Y-%m-%d--%H:%M:%S"`
            echo "$NOWTIME DIG $IP_ADDRESS 2seconds TMOUT...  `date`"  >> DIG_"${DOMAIN}"_"${IP_ADDRESS}"_"$PORT"_TMOUT.log
            if [[ -n "$WARN_VALUE"  ]] && [[ -n "$kdoc_webhook" ]]; then
                echo "${NOWTIME}${STRACE_DIG}" >> DIG_${DOMAIN}_"${IP_ADDRESS}"_"$PORT"_TMOUT_DELAY_STRACE.log
                if [[ "$kdoc_webhook" =~ "xz.wps.cn" ]]; then 
                    curl -X POST -H "Content-Type: application/json" -H "Cache-Control: no-cache"  -d '{"msgtype": "markdown","markdown": {"text": "<font size=\"16\" color=\"red\">'$NOWTIME' '${IP_ADDRESS}端口:$PORT' DIG解析 '$DOMAIN' 超时(2000ms)</font>\n\r\n"}}' ${kdoc_webhook}
                elif  [[ "$kdoc_webhook" =~ "weixin.qq.com" ]]; then
                    curl -X POST -H "Content-Type: application/json" -H "Cache-Control: no-cache"  -d '{"msgtype": "markdown","markdown": {"content": "<font size=\"16\" color=\"red\">'$NOWTIME' '${IP_ADDRESS}端口:$PORT' DIG解析 '$DOMAIN'  超时(2000ms)</font>\n\r\n"}}' ${kdoc_webhook}
                fi
            fi
        fi

        sleep 1
    done
else
    echo "Usage: `basename $0` <-d 域名> <-n dns服务器地址> [-v 报警毫秒数]  [-p dns 端口] [-u webhook地址(协作/企微)]"
    echo "监控日志请到当前目录下获取" 
fi

 

 tcp端口持续监测

#!/bin/bash
# 持续监测端口监听状态
# tcping 二进制文件:https://ks3-cn-beijing.ksyun.com/zhangmingda/tcping
# 放到当前脚本同级目录 chmod +x tcping 给与权限
# 比如脚本名称为tcping-monitor.sh ,为本脚本授权:chmod +x tcping-monitor.sh
# 执行本脚本监控放到后台,比如监控8.8.8.8 的80端口,定期关注日志: nohup ./tcpng-monitor.sh 8.8.8.8 80 &
GUEST_IP=$1
GUEST_PORT=$2
LOGFILE=$1_TcpPort$2_monitor.log

if [  -n "$GUEST_IP" ] && [  -n "$GUEST_PORT" ]; then
    while :
        do
        RESO=`./tcping -t 2  $GUEST_IP $GUEST_PORT`
        # -t 超时时间S
        if [ $? -eq 0 ]; then
            sleep 1
        fi
        echo `date` $RESO     >> $LOGFILE
    done
else
    echo "用法:$0 <IP or 域名> <端口>"
    echo "监控日志请到当前目录下获取" 
fi

 

curl 持续监控返回值

#!/bin/bash 
#
#Author:zhangmingda
#date:20191021
#use:持续监控https/http连接请求状态
#########################################################
logfile='curl_monitor.log'
if [ ! -f ${logfile}  ];then
    touch $logfile
fi  #日志文件
echo;echo "curl_log  result from  $1 " |tee -a ${logfile}
#########################################################
echo '                              DNS_OK: TCP_OK: DATA_START: TOTAL_TIME: http_code:'  | tee -a ${logfile}

while true  ;
do 
    tid="$(date '+%F %H:%M:%S')" ; 
    url=$1 ;
    curl -m 3 -4 -o /dev/null -s -w "curl_tid:${tid}   %{time_namelookup}    %{time_connect}    %{time_starttransfer}     %{time_total}     code:%{h
ttp_code} \n" \    ${url}   | tee -a  $logfile   ;   
    sleep 1;  
done

 

持续监控https/http连接请求状态

#!/bin/bash
#
#Author:zhangmingda
#date:2019-12-30
#use:持续监控https/http连接请求状态
#########################################################
logfile='curl_monitor.log'
if [ ! -f ${logfile} ];then
touch $logfile
fi #日志文件
echo;echo "curl_log result from $1 " |tee -a ${logfile}
#########################################################
echo ' DNS_OK: TCP_OK: DATA_START: TOTAL_TIME: http_code:' | tee -a ${logfile}

while true ;
do
tid="$(date '+%F %H:%M:%S')" ;
url=$1 ;
curl -m 3 -4 -o /dev/null -s -w "curl_tid:${tid} %{time_namelookup} %{time_connect} %{time_starttransfer} %{time_total} \
code:%{http_code} http_connect:%{http_connect} remote_tcp/ip:%{remote_ip}:%{remote_port} url_effective:%{url_effective} \
speed_download:%{speed_download} time_redirect:%{time_redirect}\n" \
${url} | tee -a $logfile ;
sleep 1;
done

posted on 2019-12-11 10:20  zhangmingda  阅读(650)  评论(0编辑  收藏  举报

导航