Linux 巡检服务器资源服务脚本

说明:该脚本定义一个数组变量,包含各个服务器的ip、ssh端口、账号密码和巡检服务,指定磁盘和内存的告警空间,巡检各个服务器的磁盘、内存的剩余空间和服务有没有开启,当剩余空间小于告警空间时,把告警信息输出到/tmp/result.txt。

 

#!/bin/bash

#====================参数区域 start====================
#需要检查的服务器列表  IP@@ssh端口@@用户名@@密码@@端口或进程名
serverInfo=(
192.168.1.137@@22@@root@@sykj@2021@@8080
192.168.1.15@@22@@root@@ZbxQgs3Nmu4rF4k6@@80@@videocore@@facecore
192.168.1.14@@22@@root@@78307ef7-152e-23nv.@@80@@videocore@@facecore
192.168.1.137@@22@@root@@sykj@2021@@8080
192.168.1.71@@22@@root@@sykj@2022@@8080@@80@@3306@@facecore
192.168.1.146@@22@@root@@sykj@2022@@8080@@80@@3306@@Iotcore
192.168.1.141@@22@@root@@sykj_2022@@80
192.168.1.119@@22@@root@@sykj@2020@@facecore
192.168.1.21@@22@@root@@32213ce8-87ed-89h28v.
192.168.1.23@@22@@root@@21ops.com-2fdub09.
192.168.1.127@@22@@root@@DaN-e7eb5f.9-a9893-EQXfe8TPnz
192.168.1.195@@22@@root@@c7b053f2bf99639c
192.168.1.154@@22@@root@@@sykj2020.987-ubvon
)
#当前服务器root密码
mastServerPasswd='sykj@2022'

#/tmp/result.txt记录警告信息
cat /dev/null > /tmp/result.txt

#====================参数区域 end======================

echo -e "==============开始读取存储空间阈值=============="

memory_warn="1g"

part_warn="10G"

echo "memory_warn=${memory_warn}"

echo "part_warn=${part_warn}"

#================比较2个存储空间的大小======================
function compare(){
    one_warn=$1
    two_free=$2
    echo $2
    if [[ $one_warn =~ (G+)|(g+) &&  $two_free =~ (G+)|(g+) ]]
    then
        #去除MmGg
        warn=`echo  $one_warn | tr -cd "[0-9]." `
        free=`echo $two_free | tr -cd "[0-9]." `
        echo "$warn,$free"
        if [ `echo "$warn > $free" | bc`  -eq 1  ]
        then
            compare_info="${3}_${4}_warning!${2}"
        else
            compare_info="${3}_${4}_runing!"
        fi

    elif [[ $one_warn =~ (M+)|(m+) && $two_free =~ (M+)|(m+) ]]
    then
        warn=`echo $one_warn | tr -cd "[0-9]."`
        free=`echo $two_free | tr -cd "[0-9]."`
        if [ `echo "$warn > $free" | bc`  -eq 1 ]
        then
            compare_info="${3}_${4}_warning!${2}"
        else
            compare_info="${3}_${4}_runing!"
        fi
    else
        if [[ $one_warn =~ (M+)|(m+) &&  $two_free =~ (G+)|(g+) ]]
        then
            compare_info="${3}_${4}_runing!"
        elif [[ $two_free =~ (T+)|(t+) ]]
        then
            compare_info="${3}_${4}_runing!"
        else
            compare_info="${3}_${4}_warning!${2}"
        fi
    fi

}
#=======检测端口或进程名是否存在
function check_live(){
    check_live_info=""
    process=$1
    port=`cat /tmp/${2} | grep -iE "^tcp*|^udp*" |grep -iw ${process}`
    if [ -z "${port}"  ]
    then
        pc=`cat /tmp/${2} | grep -i ${process} | grep -v "grep"`
        if [  -z "${pc}"  ]
        then
            echo "${process}  stoppp"
            check_live_info="${2}_${process}_stoped!"
        fi
    fi
}

#====================参数区域 end======================
 

echo -e "\n=============开始循环生成记录文件========================"
for(( i=0;i<${#serverInfo[@]};i++)); do 
    ServerIp=`echo ${serverInfo[i]}|awk -F@@ '{print $1}'`;
    ServerPort=`echo ${serverInfo[i]}|awk -F@@ '{print $2}'`;
    ServerUser=`echo ${serverInfo[i]}|awk -F@@ '{print $3}'`;
    ServerPasswd=`echo ${serverInfo[i]}|awk -F@@ '{print $4}'`;
    ports=`echo ${serverInfo[i]}|awk -F@@ '{for(i=4;i<=NF;i++){print $i}}'`
    rm -rf /tmp/${ServerIp}
    echo -e "远程连接到${ServerIp}生成记录文件"
    /usr/bin/expect <<EOF
        set timeout 10
    spawn ssh -p${ServerPort} ${ServerUser}@${ServerIp}
    expect {
      "Last" {send "pwd\r"}
      "again" { send_error "\n远程连接到${ServerIp}服务器密码错误!"; exit 1;}
          "*assword*" {send_user "${ServerPasswd}\n";send "${ServerPasswd}\n";exp_continue} 
      "*yes*" {send "yes\n";exp_continue }
          timeout  { send_error "远程连接到${ServerIp}服务器超时,请检查网络环境或延长超时时间\n";  exit 1; }
    }
        expect "]#"
      send "uname -a > /tmp/${ServerIp}\r"
      send "cat /etc/redhat-release >> /tmp/${ServerIp}\r"
      send "grep 'DMI'  /var/log/dmesg >>/tmp/${ServerIp} || echo '' >>/tmp/${ServerIp}\r"
      send "free -h >>/tmp/${ServerIp}\r"
          send "df -h >>/tmp/${ServerIp}\r"
      send "grep -i 'core id' /proc/cpuinfo >>/tmp/${ServerIp}\r"
      send "grep -i 'processor' /proc/cpuinfo >>/tmp/${ServerIp}\r"
      send "grep -i 'physical id' /proc/cpuinfo >>/tmp/${ServerIp}\r"
      send "cat /proc/cpuinfo |grep 'model name'>>/tmp/${ServerIp}\r"
      send "ifconfig >>/tmp/${ServerIp}\r"
      send "netstat -npltu >>/tmp/${ServerIp}\r"
      send "ps -axu >>/tmp/${ServerIp}\r"
          send "exit\r"
 
        expect eof
EOF
    echo -e "\n复制${ServerIp}记录文件到master主机"
    /usr/bin/expect <<EOF
    spawn scp -q -r -P${ServerPort} ${ServerUser}@${ServerIp}:/tmp/${ServerIp} /tmp/
    set timeout 10
    expect {
       "Last" {send "pwd\r"}
       "again" { send_error "\n远程连接到${ServerIp}服务器密码错误!"; exit 1;}
       "*assword*" {send "${ServerPasswd}\n";exp_continue} 
       "*yes*" {send "yes\n";exp_continue }
       timeout  { send_error "连接到${ServerIp}服务器超时,请检查网络环境或延长超时时间\n";  exit 1; }
    }
EOF
    echo -e "完成\n"
done;

echo -e "=============读取收到的记录文件并打印结果================\n\n\n"
for(( i=0;i<${#serverInfo[@]};i++)); do 
   ServerIp=`echo ${serverInfo[i]}|awk -F@@ '{print $1}'`;
   echo -e "==================================================${ServerIp}服务器信息======================================================"
   tmpFIle=`find /tmp/ -name ${ServerIp}|wc -l`
   if [ ${tmpFIle} = '1' ];then
       echo -e "【硬件厂商】\n`cat /tmp/${ServerIp}|awk 'NR==3'`" >> /tmp/${ServerIp}
       echo -e "\n【系统内核】\n`cat /tmp/${ServerIp}|awk 'NR==1'|awk '{print $3}'`" >> /tmp/${ServerIp}
       echo -e "\n【系统版本】\n`cat /tmp/${ServerIp}|awk 'NR==2'`"
       echo -e "\n【CPU 信息】" >> /tmp/${ServerIp}
       echo -e "c p u 型号:`grep 'model name' /tmp/${ServerIp} |head -1|awk -F: '{print $2}'`" >> /tmp/${ServerIp}
       echo -e "物理cpu个数:`grep -i 'physical id' /tmp/${ServerIp}|sort -u|wc -l`" >> /tmp/${ServerIp}
       echo -e "cpu核心数:`grep -i 'core id' /tmp/${ServerIp}|sort -u|wc -l`" >> /tmp/${ServerIp}
       echo -e "线  程  数:`grep -i 'processor' /tmp/${ServerIp}|sort -u|wc -l`" >> /tmp/${ServerIp}
       echo -e "\n【内存信息】"
       free_memory=`cat /tmp/${ServerIp} | grep -iE '^Mem|^Swap' |awk  '{print $4 }' | head -1`
       compare $memory_warn $free_memory $ServerIp "memory"
       echo "$compare_info" | grep "warning" >> /tmp/result.txt
       cat /tmp/${ServerIp}|grep -i -E '^mem|^swap|total'
       echo -e "\n【磁盘存储信息】"
       cat /tmp/${ServerIp}|grep -i -E '^文件系统|^Filesystem|^/dev'| grep -v 'boot' | awk 'NR>1{print $1,$4}' > /tmp/tmp.txt
       #读取文件的每一行,因为for i in只能一个一个取标识符,所以用while
       cat /tmp/tmp.txt | while read line
       do
        echo $line
        part_free=`echo "$line"|awk  '{print $2}'`
        part_name=`echo "$line"|awk  '{print $1}'`
        #echo $part_free
        compare $part_warn $part_free $ServerIp "$part_name"
        echo "$compare_info" | grep "warning" >> /tmp/result.txt
       done
       echo -e "\n【网络设置】\n`grep -i -E 'inet|flags' /tmp/${ServerIp}`"
       echo -e "\n【服务检测】"
       processs=(`echo ${serverInfo[i]}|awk -F@@ '{for(i=5;i<=NF;i++){print $i}}'`)
       for j in ${processs[@]}
       do
        check_live $j ${ServerIp}
        echo "$check_live_info" | grep "stoped" >> /tmp/result.txt
       done
       echo -e "\n\n\n"
   else
       echo -e "   连接到${ServerIp}服务器异常,跳过检测\n\n\n"
       echo "连接到${ServerIp}服务器异常,跳过检测\n"  >> /tmp/result.txt 
   fi
done;

 

posted @ 2023-04-07 15:17  苦逼yw  阅读(156)  评论(0编辑  收藏  举报