SHELL pv uv 统计事例
#!/bin/sh #statistics newplive logs SOURCELOGS=$1 if [ "$#" != 1 ];then echo echo "please input file!" echo "eg:/home/liqiu/newlive_090807.log" echo exit 3; fi ALLPVNUM=`/bin/cat $SOURCELOGS | /usr/bin/awk -F" " '$2!~/(img|js|css|amf|png|dns|html|ico|xml|port)/ {print $2}' | /usr/bin/wc -l` LISTPV=`/bin/cat $SOURCELOGS | /usr/bin/awk -F" " '$2!~/(img|js|css|amf|png|dns|html|ico|xml|port)/ {gsub("\/\/","\/");print $2}' | /bin/sort | /usr/bin/uniq -c | /bin/sort -nr | /usr/bin/awk -F' ' '{if($1>1000) print $0"\n\r"}'` ALLIP=`/bin/cat $SOURCELOGS | /usr/bin/awk -F" " '$2!~/(img|js|css|amf|png|dns|html|ico|xml|port)/ {print $1}' | /bin/sort | /usr/bin/uniq | /usr/bin/wc -l` #get SD SD=`echo $LISTPV | /usr/bin/awk -F'sd=' '{print substr($2,1,1);}'` #echo $SD SDN=`/bin/cat $SOURCELOGS | /usr/bin/awk -F" " ' {if(($2!~/(img|js|css|amf|png|dns|html|ico|xml|port)/) && ($2~/sd='$SD'/)) print $1}' | /bin/sort | /usr/bin/uniq | /usr/bin/wc -l` #/usr/bin/awk 'BEGIN {print "'$SD'"}' #echo $SDN; #exit FROMID3=`/bin/cat $SOURCELOGS | /usr/bin/awk -F" " '$2!~/(img|js|css|amf|png|dns|html|ico|xml|port)/ && $2~/sd=3/ {print $1,$2}'| /bin/sort | /usr/bin/uniq | /usr/bin/wc -l` OUTLINKNUM=`/bin/cat $SOURCELOGS | /usr/bin/awk -F" " '$2!~/(img|js|css|amf|png|dns|html|ico|xml|port)/{print $3}' | /bin/grep -v "-" | /bin/grep -v "music.sina.com.cn/newlive" | /usr/bin/wc -l` echo echo "pv总量:"$ALLPVNUM echo "排名前几位的为:" echo $LISTPV echo echo "独立ip:"$ALLIP echo "其中来自 /newlive/index.php?sd=$SD 为:"$SDN echo echo "外链:"$OUTLINKNUM echo "排名前几位的为:" /bin/cat $SOURCELOGS | /usr/bin/awk -F" " '$2!~/(img|js|css|amf|png|dns|html|ico|xml|port)/{print $3}' | /bin/grep -v "-" | /bin/grep -v "music.sina.com.cn/newlive" | /bin/sed -e "s/?.*$//" -e"s/\"//g" | /bin/sort | /usr/bin/uniq -c | /bin/sort -nr| /usr/bin/awk -F' ' '{if($1>70) print $0}'
上面是很早之前的代码了,随着数据逐步的增多,不可能都放到一个文件中处理,比如UV。
下面的统计方法是:先将文件散列到不同的文件里面,然后在统计
#!/bin/bash sourcePath="/TICKET_LOGS_CN6/CN6_ticket/ /TICKET_LOGS_CN1/CN6_ticket/" sourceFile="trace.log.2014-09-01.gz" targetPath="/tmp/log/" for ((i=1;i<=9;i++)) do `find $sourcePath -name $sourceFile -exec zcat {} \; | tr -d "[|]" | awk '{if ($4~/^'"${i}"'/) print($4) > "'"${targetPath}${i}"'.log"}'` done cat ${targetPath}*.log | sort | uniq -c | wc -l