Linux 巡检服务器资源服务脚本
说明:该脚本定义一个数组变量,包含各个服务器的ip、ssh端口、账号密码和巡检服务,指定磁盘和内存的告警空间,巡检各个服务器的磁盘、内存的剩余空间和服务有没有开启,当剩余空间小于告警空间时,把告警信息输出到/tmp/result.txt。
#!/bin/bash #====================参数区域 start==================== #需要检查的服务器列表 IP@@ssh端口@@用户名@@密码@@端口或进程名 serverInfo=( 192.168.1.137@@22@@root@@sykj@2021@@8080 192.168.1.15@@22@@root@@ZbxQgs3Nmu4rF4k6@@80@@videocore@@facecore 192.168.1.14@@22@@root@@78307ef7-152e-23nv.@@80@@videocore@@facecore 192.168.1.137@@22@@root@@sykj@2021@@8080 192.168.1.71@@22@@root@@sykj@2022@@8080@@80@@3306@@facecore 192.168.1.146@@22@@root@@sykj@2022@@8080@@80@@3306@@Iotcore 192.168.1.141@@22@@root@@sykj_2022@@80 192.168.1.119@@22@@root@@sykj@2020@@facecore 192.168.1.21@@22@@root@@32213ce8-87ed-89h28v. 192.168.1.23@@22@@root@@21ops.com-2fdub09. 192.168.1.127@@22@@root@@DaN-e7eb5f.9-a9893-EQXfe8TPnz 192.168.1.195@@22@@root@@c7b053f2bf99639c 192.168.1.154@@22@@root@@@sykj2020.987-ubvon ) #当前服务器root密码 mastServerPasswd='sykj@2022' #/tmp/result.txt记录警告信息 cat /dev/null > /tmp/result.txt #====================参数区域 end====================== echo -e "==============开始读取存储空间阈值==============" memory_warn="1g" part_warn="10G" echo "memory_warn=${memory_warn}" echo "part_warn=${part_warn}" #================比较2个存储空间的大小====================== function compare(){ one_warn=$1 two_free=$2 echo $2 if [[ $one_warn =~ (G+)|(g+) && $two_free =~ (G+)|(g+) ]] then #去除MmGg warn=`echo $one_warn | tr -cd "[0-9]." ` free=`echo $two_free | tr -cd "[0-9]." ` echo "$warn,$free" if [ `echo "$warn > $free" | bc` -eq 1 ] then compare_info="${3}_${4}_warning!${2}" else compare_info="${3}_${4}_runing!" fi elif [[ $one_warn =~ (M+)|(m+) && $two_free =~ (M+)|(m+) ]] then warn=`echo $one_warn | tr -cd "[0-9]."` free=`echo $two_free | tr -cd "[0-9]."` if [ `echo "$warn > $free" | bc` -eq 1 ] then compare_info="${3}_${4}_warning!${2}" else compare_info="${3}_${4}_runing!" fi else if [[ $one_warn =~ (M+)|(m+) && $two_free =~ (G+)|(g+) ]] then compare_info="${3}_${4}_runing!" elif [[ $two_free =~ (T+)|(t+) ]] then compare_info="${3}_${4}_runing!" else compare_info="${3}_${4}_warning!${2}" fi fi } #=======检测端口或进程名是否存在 function check_live(){ check_live_info="" process=$1 port=`cat /tmp/${2} | grep -iE "^tcp*|^udp*" |grep -iw ${process}` if [ -z "${port}" ] then pc=`cat /tmp/${2} | grep -i ${process} | grep -v "grep"` if [ -z "${pc}" ] then echo "${process} stoppp" check_live_info="${2}_${process}_stoped!" fi fi } #====================参数区域 end====================== echo -e "\n=============开始循环生成记录文件========================" for(( i=0;i<${#serverInfo[@]};i++)); do ServerIp=`echo ${serverInfo[i]}|awk -F@@ '{print $1}'`; ServerPort=`echo ${serverInfo[i]}|awk -F@@ '{print $2}'`; ServerUser=`echo ${serverInfo[i]}|awk -F@@ '{print $3}'`; ServerPasswd=`echo ${serverInfo[i]}|awk -F@@ '{print $4}'`; ports=`echo ${serverInfo[i]}|awk -F@@ '{for(i=4;i<=NF;i++){print $i}}'` rm -rf /tmp/${ServerIp} echo -e "远程连接到${ServerIp}生成记录文件" /usr/bin/expect <<EOF set timeout 10 spawn ssh -p${ServerPort} ${ServerUser}@${ServerIp} expect { "Last" {send "pwd\r"} "again" { send_error "\n远程连接到${ServerIp}服务器密码错误!"; exit 1;} "*assword*" {send_user "${ServerPasswd}\n";send "${ServerPasswd}\n";exp_continue} "*yes*" {send "yes\n";exp_continue } timeout { send_error "远程连接到${ServerIp}服务器超时,请检查网络环境或延长超时时间\n"; exit 1; } } expect "]#" send "uname -a > /tmp/${ServerIp}\r" send "cat /etc/redhat-release >> /tmp/${ServerIp}\r" send "grep 'DMI' /var/log/dmesg >>/tmp/${ServerIp} || echo '' >>/tmp/${ServerIp}\r" send "free -h >>/tmp/${ServerIp}\r" send "df -h >>/tmp/${ServerIp}\r" send "grep -i 'core id' /proc/cpuinfo >>/tmp/${ServerIp}\r" send "grep -i 'processor' /proc/cpuinfo >>/tmp/${ServerIp}\r" send "grep -i 'physical id' /proc/cpuinfo >>/tmp/${ServerIp}\r" send "cat /proc/cpuinfo |grep 'model name'>>/tmp/${ServerIp}\r" send "ifconfig >>/tmp/${ServerIp}\r" send "netstat -npltu >>/tmp/${ServerIp}\r" send "ps -axu >>/tmp/${ServerIp}\r" send "exit\r" expect eof EOF echo -e "\n复制${ServerIp}记录文件到master主机" /usr/bin/expect <<EOF spawn scp -q -r -P${ServerPort} ${ServerUser}@${ServerIp}:/tmp/${ServerIp} /tmp/ set timeout 10 expect { "Last" {send "pwd\r"} "again" { send_error "\n远程连接到${ServerIp}服务器密码错误!"; exit 1;} "*assword*" {send "${ServerPasswd}\n";exp_continue} "*yes*" {send "yes\n";exp_continue } timeout { send_error "连接到${ServerIp}服务器超时,请检查网络环境或延长超时时间\n"; exit 1; } } EOF echo -e "完成\n" done; echo -e "=============读取收到的记录文件并打印结果================\n\n\n" for(( i=0;i<${#serverInfo[@]};i++)); do ServerIp=`echo ${serverInfo[i]}|awk -F@@ '{print $1}'`; echo -e "==================================================${ServerIp}服务器信息======================================================" tmpFIle=`find /tmp/ -name ${ServerIp}|wc -l` if [ ${tmpFIle} = '1' ];then echo -e "【硬件厂商】\n`cat /tmp/${ServerIp}|awk 'NR==3'`" >> /tmp/${ServerIp} echo -e "\n【系统内核】\n`cat /tmp/${ServerIp}|awk 'NR==1'|awk '{print $3}'`" >> /tmp/${ServerIp} echo -e "\n【系统版本】\n`cat /tmp/${ServerIp}|awk 'NR==2'`" echo -e "\n【CPU 信息】" >> /tmp/${ServerIp} echo -e "c p u 型号:`grep 'model name' /tmp/${ServerIp} |head -1|awk -F: '{print $2}'`" >> /tmp/${ServerIp} echo -e "物理cpu个数:`grep -i 'physical id' /tmp/${ServerIp}|sort -u|wc -l`" >> /tmp/${ServerIp} echo -e "cpu核心数:`grep -i 'core id' /tmp/${ServerIp}|sort -u|wc -l`" >> /tmp/${ServerIp} echo -e "线 程 数:`grep -i 'processor' /tmp/${ServerIp}|sort -u|wc -l`" >> /tmp/${ServerIp} echo -e "\n【内存信息】" free_memory=`cat /tmp/${ServerIp} | grep -iE '^Mem|^Swap' |awk '{print $4 }' | head -1` compare $memory_warn $free_memory $ServerIp "memory" echo "$compare_info" | grep "warning" >> /tmp/result.txt cat /tmp/${ServerIp}|grep -i -E '^mem|^swap|total' echo -e "\n【磁盘存储信息】" cat /tmp/${ServerIp}|grep -i -E '^文件系统|^Filesystem|^/dev'| grep -v 'boot' | awk 'NR>1{print $1,$4}' > /tmp/tmp.txt #读取文件的每一行,因为for i in只能一个一个取标识符,所以用while cat /tmp/tmp.txt | while read line do echo $line part_free=`echo "$line"|awk '{print $2}'` part_name=`echo "$line"|awk '{print $1}'` #echo $part_free compare $part_warn $part_free $ServerIp "$part_name" echo "$compare_info" | grep "warning" >> /tmp/result.txt done echo -e "\n【网络设置】\n`grep -i -E 'inet|flags' /tmp/${ServerIp}`" echo -e "\n【服务检测】" processs=(`echo ${serverInfo[i]}|awk -F@@ '{for(i=5;i<=NF;i++){print $i}}'`) for j in ${processs[@]} do check_live $j ${ServerIp} echo "$check_live_info" | grep "stoped" >> /tmp/result.txt done echo -e "\n\n\n" else echo -e " 连接到${ServerIp}服务器异常,跳过检测\n\n\n" echo "连接到${ServerIp}服务器异常,跳过检测\n" >> /tmp/result.txt fi done;