SHELL pv uv 统计事例

#!/bin/sh
#statistics newplive logs
SOURCELOGS=$1
if [ "$#" != 1 ];then
echo 
echo "please input file!"
echo "eg:/home/liqiu/newlive_090807.log"
echo 
exit 3;
fi

ALLPVNUM=`/bin/cat $SOURCELOGS  | /usr/bin/awk -F" " '$2!~/(img|js|css|amf|png|dns|html|ico|xml|port)/ {print $2}'  | /usr/bin/wc -l`
LISTPV=`/bin/cat $SOURCELOGS  | /usr/bin/awk -F" " '$2!~/(img|js|css|amf|png|dns|html|ico|xml|port)/ {gsub("\/\/","\/");print $2}'   | /bin/sort | /usr/bin/uniq -c | /bin/sort -nr | /usr/bin/awk -F' ' '{if($1>1000) print $0"\n\r"}'`

ALLIP=`/bin/cat $SOURCELOGS  | /usr/bin/awk -F" " '$2!~/(img|js|css|amf|png|dns|html|ico|xml|port)/ {print $1}' | /bin/sort | /usr/bin/uniq | /usr/bin/wc -l`

#get SD
SD=`echo $LISTPV | /usr/bin/awk -F'sd=' '{print substr($2,1,1);}'`
#echo $SD
SDN=`/bin/cat $SOURCELOGS  | /usr/bin/awk -F" " ' {if(($2!~/(img|js|css|amf|png|dns|html|ico|xml|port)/) && ($2~/sd='$SD'/)) print $1}' | /bin/sort | /usr/bin/uniq | /usr/bin/wc -l`

#/usr/bin/awk 'BEGIN {print "'$SD'"}'
#echo $SDN;
#exit
FROMID3=`/bin/cat $SOURCELOGS  | /usr/bin/awk -F" " '$2!~/(img|js|css|amf|png|dns|html|ico|xml|port)/ && $2~/sd=3/ {print $1,$2}'| /bin/sort | /usr/bin/uniq | /usr/bin/wc -l`
OUTLINKNUM=`/bin/cat $SOURCELOGS  | /usr/bin/awk -F" " '$2!~/(img|js|css|amf|png|dns|html|ico|xml|port)/{print $3}' | /bin/grep -v "-" | /bin/grep -v "music.sina.com.cn/newlive" | /usr/bin/wc -l`

echo 
echo "pv总量:"$ALLPVNUM
echo "排名前几位的为:"
echo $LISTPV

echo
echo "独立ip:"$ALLIP
echo "其中来自 /newlive/index.php?sd=$SD 为:"$SDN

echo
echo "外链:"$OUTLINKNUM
echo "排名前几位的为:"
/bin/cat $SOURCELOGS  | /usr/bin/awk -F" " '$2!~/(img|js|css|amf|png|dns|html|ico|xml|port)/{print $3}' | /bin/grep -v "-" | /bin/grep -v "music.sina.com.cn/newlive" | /bin/sed -e "s/?.*$//" -e"s/\"//g" | /bin/sort | /usr/bin/uniq -c | /bin/sort -nr| /usr/bin/awk -F' ' '{if($1>70) print $0}'

 上面是很早之前的代码了,随着数据逐步的增多,不可能都放到一个文件中处理,比如UV。

 下面的统计方法是:先将文件散列到不同的文件里面,然后在统计

#!/bin/bash
sourcePath="/TICKET_LOGS_CN6/CN6_ticket/ /TICKET_LOGS_CN1/CN6_ticket/"
sourceFile="trace.log.2014-09-01.gz"
targetPath="/tmp/log/"
for ((i=1;i<=9;i++))
do
        `find $sourcePath -name $sourceFile -exec zcat {} \; | tr -d "[|]" | awk '{if ($4~/^'"${i}"'/) print($4) > "'"${targetPath}${i}"'.log"}'`
done
cat ${targetPath}*.log | sort | uniq -c | wc -l

 

posted @ 2013-09-25 19:00  李秋  阅读(1106)  评论(0编辑  收藏  举报