监控cpu使用率、load、内存、磁盘,并实现邮件报警
1 #!/bin/bash 2 #获取当前时间 3 now=`date -u -d"+8 hour" +'%Y-%m-%d %H:%M:%S'` 4 #cpu使用阈值 5 cpu_warn='30' 6 #mem空闲阈值 7 mem_warn='2048' 8 #disk使用阈值 9 disk_warn='60' 10 #load1\5\15 11 load_warn_1='15' 12 #获取主机IP,下条命令也可以获取 13 hostip=$(ip addr | awk '/^[0-9]+: / {}; /inet.*global/ {print gensub(/(.*)\/(.*)/, "\\1", "g", $2)}') 14 #email,多个邮箱用逗号隔开 15 email_sender=(xx@163.com) 16 17 #---cpu 18 item_cpu () { 19 #cpu_idle=`top -b -n 1 | grep Cpu | awk '{print $8}'|cut -f 1 -d "."` 20 cpu_idle=`top -n 1 -b | sed -e 's/ //g' | grep "Cpu(s):" | awk -F ":" '{print $2}' | awk -F "," '{print $1}'|sed -e 's/us//g'` 21 #cpu_use=`expr 100 - $cpu_idle` 22 #cpu_use=`echo "100-$cpu_idle" | bc` 23 #echo "$now $hostip 当前cpu使用率为 $cpu_use%" >> /opt/cpu.log 24 if [ $cpu_idle \> $cpu_warn ] 25 then 26 echo "$now $hostip 当前cpu使用率为$cpu_idle%,超出阈值$cpu_warn 请及时处理" | mail -s "${hostip} cpu预警" ${email_sender} 27 else 28 echo "$now $hostip 当前cpu使用率为$cpu_idle%,未超过阈值" >> /opt/cpu.log 29 fi 30 } 31 32 #---mem 33 item_mem () { 34 #MB为单位 35 mem_free=`free -m | grep "Mem" | awk '{print $4+$6}'` 36 if [ $mem_free -lt $mem_warn ] 37 then 38 echo "$now $hostip 当前内存剩余空间为${mem_free}MB,低于阈值${mem_warn}MB请及时处理" | mail -s "${hostip} 内存预警" ${email_sender} 39 else 40 echo "$now $hostip 当前内存剩余空间为${mem_free}MB,未超过阈值" >> /opt/mem.log 41 fi 42 } 43 #---disk 44 item_disk () { 45 disk_use=`df -P | grep /dev | grep -v -E '(tmp|boot)' | awk '{print $5}' | cut -f 1 -d "%"` 46 if [ $disk_use -gt $disk_warn ] 47 then 48 echo "$now $hostip 磁盘使用率超过阈值,当前使用率为$disk_use%,低于阈值${disk_warn}%请及时处理" | mail -s "${hostip} 磁盘预警" ${email_sender} 49 else 50 echo "$now $hostip 磁盘使用率未过阈值,当前使用率为$disk_use%,未超过阈值" >> /opt/disk.log 51 fi 52 } 53 54 55 #---load_1 56 item_load_1(){ 57 load_1=`top -n 1 -b | grep average | awk -F ':' '{print $5}' | sed -e 's/\,//g' | awk -F " " '{print $1}'` 58 if [ $load_1 \> $load_warn_1 ] 59 then 60 echo "${now} ${hostip} average 1 = ${load_1},已超过阈值 ${load_warn_1} 请及时处理" | mail -s "${hostip} load_1预警" ${email_sender} 61 else 62 echo "${now} ${hostip} average 1 = ${load_1},未超过阈值" >> /opt/load.log 63 fi 64 } 65 66 67 68 item_cpu 69 item_mem 70 item_disk 71 item_load_1
如果是内网环境,只有一台可以发报警邮件,请参考服务监控+邮箱报警