shell脚本分析nginx日志

注意:复制该脚本到linux系统中使用时,需要先安装dos2unix工具进行格式转换

yum install dos2unix -y

 

安装成功后,使用该命令转换windows的换行符

dos2unix a.sh

 

正在写入的文件不能用tar进行压缩

--------压缩日志----------------------
94 access.log
95 tar: access.log: file changed as we read it
96 #### 压缩日志失败 ####

#!/bin/sh
#分析nginx日志
DATE=`date '+%Y%m%d-%H%M'` 
ARCHIVE=/usr/log_bak/nginx_$DATE.tar.gz
MESSAGE=/usr/log_bak/"Nginx_Analysis""$DATE"
FILENAME=/data/nginx/logs/access.log
BACKNAME=/usr/log_bak/"nginx_bak"

function Mail(){
        mail -s "***Nginx Report***" 666@qq.com < $MESSAGE
}

function Bowser(){
key[0]='" 200 [0-9]{3}';word[0]='http 200'
key[1]='" 206 [0-9]{3}';word[1]='http 206'
key[2]='" 404 [0-9]{3}';word[2]='http 404'
key[3]='" 503 [0-9]{3}';word[3]='http 503'
##########
#  seo/seo.html" target="_blank">
key[4]='Googlebot.*google.com/bot.html';word[4]='Google Browser'
key[5]='Baiduspider.*baidu.com/search/spider.html';word[5]='Baidu Browser'
key[6]='bingbot.*bing.com/bingbot.htm';word[6]='Bing Browser'
#Soso 'Sosospider.*soso.com/webspider.htm'
#ÓеÀ 'YoudaoBot.*youdao.com/help/webmaster/spider/'
#YahooÖйú 'Yahoo! Slurp China'
##########
# 
key[7]='MSIE';word[7]='MSIE'
key[8]='Gecko/.*Firefox';word[8]='Firefox'
key[9]='AppleWebKit.*like Gecko';word[9]='Webkit'
key[10]='Opera.*Presto';word[10]='Opera'

key[11]='Windows NT 6.1';word[11]='Windows 7 访问'
key[12]='Macintosh; Intel Mac OS X';word[12]='Mac OS X 访问'
key[13]='X11.*Linux';word[13]='Linux with X11'
key[14]='Android;';word[14]='Android'
#WindowsϵÁÐ win2000'Windows NT 5.0' winxp'Windows NT 5.1' winvasta'Windows NT 6.0' win7'Windows NT 6.1
#SymbianOS 'SymbianOS'
##########
# 
key[15]='iPad.*like Mac OS X';word[15]='iPad 访问'
key[16]='Nokia';word[16]='Nokia'
key[17]='Nokia5800';word[17]='Nokia5800 XpressMusic'
#iPhone 'iPhone.*like Mac OS X'
##########
#
key[18]='GET /.*.mp3 HTTP';word[18]="访问 mp3 file"
key[19]='GET /.*.jpg HTTP';word[19]="访问 jpg file"

#echo $filename
#echo "nginx日志: ${FILENAME},一共${totle}行,需要处理 ${#key[@]}条" >> $MESSAGE
#echo "来源IP$(cat $FILENAME | awk '{print $1}' |sort|uniq|wc -l)" >> $MESSAGE
i=4
echo "----浏览器来源----" >> $MESSAGE
echo "--浏览器-----总计------占比--" >> $MESSAGE
while [ $i -lt ${#key[@]} ]
    do 
         s1=${word[$i]}
         s2=$(cat $BACKNAME | grep ''"${key[$i]}"'' | wc -l)
         s3=$(awk 'BEGIN{printf "%.2f%",('$s2'/'$totle')*100}')
           echo "${s1} ${s2} ${s3}" >> $MESSAGE
         ((i++))
    done
if [[ $? == 0 ]]; then
    echo "分析浏览器标示成功" >> $MESSAGE 
else
    echo "分析浏览器标示失败" >> $MESSAGE 
fi
 echo "--------------------" >> $MESSAGE
}

Check_http_status()
{
#grep -ioE "HTTP\/1\.[1|0]\"[[:blank:]][0-9]{3}" access.log
#拿到日志中所有的包含HTTP状态码的部分,拿出第二段来判断,并将结果分配到数组中
codes=(`grep -ioE "HTTP\/1\.[1|0]\"[[:blank:]][0-9]{3}" $BACKNAME | awk -F"[ ]+"    'BEGIN{i=0;j=0;k=0;n=0;p=0;}{ if($2>=100&&$2<200)               
                        {i++}
                else if($2>=200&&$2<300)
                        {j++}
                else if($2>=300&&$2<400)
                        {k++}
                else if($2>=400&&$2<500)
                        {n++}
                else if($2>=500)
                        {p++}
        }END{
                print i?i:0,j?j:0,k?k:0,n?n:0,p?p:0,i+j+k+n+p
                }'`)    
echo "--HTTP状态码---COUNT---PERCENT------" >> $MESSAGE
echo "status[100+]:--${codes[0]}--$(awk 'BEGIN{printf "%.2f%",('${codes[0]}'/'${codes[5]}')*100}')" >> $MESSAGE
echo "status[200+]:--${codes[1]}--$(awk 'BEGIN{printf "%.2f%",('${codes[1]}'/'${codes[5]}')*100}')" >> $MESSAGE
echo "status[300+]:--${codes[2]}--$(awk 'BEGIN{printf "%.2f%",('${codes[2]}'/'${codes[5]}')*100}')" >> $MESSAGE
echo "status[400+]:--${codes[3]}--$(awk 'BEGIN{printf "%.2f%",('${codes[3]}'/'${codes[5]}')*100}')" >> $MESSAGE
echo "status[500+]:--${codes[4]}--$(awk 'BEGIN{printf "%.2f%",('${codes[4]}'/'${codes[5]}')*100}')" >> $MESSAGE
echo "----所有的状态码: ${codes[5]}----" >> $MESSAGE
}

function IpUrlTime(){

echo "来源IP共--$(cat $BACKNAME | awk '{print $1}' |sort|uniq|wc -l)--个" >> $MESSAGE
ip=$(cat $BACKNAME | awk '{print $1}'|sort | uniq -c | sort -nr | head -n 20)                         
echo "----访问前20个IP统计----" >> $MESSAGE
echo "$ip" >> $MESSAGE

#通过日志查看当天访问页面的url:
url=$(cat $BACKNAME | awk '{print $7}'|sort | uniq -c | sort -nr |head -n 20)
echo "----访问前20个URL统计----" >> $MESSAGE
echo "$url" >> $MESSAGE

#通过日志查看当天访问次数最多的时间段
time=$(awk '{print $4}' $BACKNAME  |cut -c 14-18 | sort | uniq -c | sort -nr | head | head -n 20)
echo "----访问前20个时间点统计----" >> $MESSAGE
echo "$time" >> $MESSAGE
}
#----------start---------------
ip=`ifconfig | grep 'inet addr:'|grep -v '127.0.0.1'|awk -F '[ :]+' '{print $4}'`
echo "--------Server $ip---------------" >> $MESSAGE
echo "--------$(df -h)---------------" >> $MESSAGE

cd /usr/log_bak

if [ $? == 0 ]
   then
    echo "进入目录/usr/log_bak" >> $MESSAGE
else
    echo "####进入目录失败,退出####" >> $MESSAGE 
    exit 0
fi

echo "---------------------" >> $MESSAGE
echo "备份日志:" $(date +"%y-%m-%d %H:%M:%S") >> $MESSAGE
echo "---------------------" >> $MESSAGE
#bak access.log
cp $FILENAME $BACKNAME
#check bak
if [[ $? == 0 ]]
   then
    echo "日志复制成功" >> $MESSAGE 
else
    echo "####日志复制失败,退出####" >> $MESSAGE 
    exit 0
fi


echo "-------------------------------" >> $MESSAGE
echo "分析时间:" $(date +"%y-%m-%d %H:%M:%S") >> $MESSAGE
echo "-------------------------------" >> $MESSAGE
totle=$(cat $BACKNAME | wc -l)
size=$(ls -sh $BACKNAME | awk '{print $1}')
echo "nginx日志,${size},一共${totle}行 " >> $MESSAGE
IpUrlTime
Check_http_status
Bowser

echo "--------压缩日志----------------------" >> $MESSAGE

#直接备份复制的文件,否者access.log1正在写入,无法压缩
tar czvf $ARCHIVE nginx_bak >> $MESSAGE 2>&1
#判断catalina.out备份是否成功
if [[ $? == 0 ]]
    then
    #创建备份文件的压缩包
#    tar czvf $ARCHIVE $LOG >> log.txt 2>&1
    echo "[$ARCHIVE] 日志压缩成功!" >> $MESSAGE
# clear access.log
    > $FILENAME
    if [[ $? == 0 ]]
       then 
        echo "清空日志清空日志成功" >> $MESSAGE
        rm -f $BACKNAME
    else
        echo "###清空日志失败 Failed #####" >> $MESSAGE
    fi
    #只需保留备份文件的压缩包即可
else
    echo "#### 压缩日志失败 ####" >> $MESSAGE
    exit 0
fi


echo "---------------------" >> $MESSAGE
echo "结束时间:" $(date +"%y-%m-%d %H:%M:%S") >> $MESSAGE
echo "---------------------" >> $MESSAGE

Mail

 

posted @ 2017-04-20 23:46  kangjie  阅读(1257)  评论(0编辑  收藏  举报