Linux awk使用方法

awk使用简介:

awk 'BEGIN{statements} pattern{commands} END{ end statements}'
awk脚本包括三个部分:BEGIN+可以使用模式匹配的通用语句块+END语句块
执行步骤:
1.执行begin部分
2.读取input stream,每读一行数据,执行一次pattern部分
3.stream读完后执行end
例如:
[root@TopInsight 3760_04_code]#awk 'BEGIN {i=0} {i++} END{print i}'  word_freq.sh
17
或者
[root@TopInsight 3760_04_code]#awk "BEGIN {i=0} {i++} END{print i}"  word_freq.sh
17

示例1:打印.history_profile文件中中使用次数较多的命令

[root@Moneytu /var/log/httpd]# printf "%-20s\tCount\n" Commands;cat ~/.bash_history  | grep -Ev '#.*' | awk 'BEGIN {} {list[$1]++;} \
>  END { for(i in list) 
>  {
>  printf("%-20s\t%d\n",i,list[i]);} 
>  }'  v1="Commands" v2="Count" | sort -nrk 2 | head
Commands            	Count
ls                  	151
cd                  	92
service             	83
top                 	77
vi                  	72
df                  	63
mysql               	47
mysqlcheck          	40
iptables            	39
tail                	36

示例2:对apache访问ip进行排序,比较性能:

[root@Moneytu /var/log/httpd]# cat sum_ip.sh 
#!/bin/bash

##对访问ip进行排序

useage(){
    cat << EOF
    $0 filename topN
EOF
    exit 1
}

if [ $# -ne 2 ]; then
    useage
fi 

sumip(){
    printf "%-20s\tCount\n------------------------------\n" IP;
    awk 'BEGIN {FS=" ";}{IP[$1]++;}
    END {for(i in IP)
    {printf("%-20s\t%d\n",i,IP[i]);}
    }'  $1 | sort -nrk 2 | head -n $TOP
}

FILE=$1
TOP=$2
sumip $FILE  $TOP

结果:

[root@Moneytu /var/log/httpd]# time ./sum_ip.sh access_log 20
IP                  	Count
------------------------------
222.223.40.185      	19787
60.28.116.222       	17468
112.124.26.17       	13891
::1                 	13426
111.162.65.49       	10201
61.183.230.130      	9926
219.140.149.244     	8891
113.247.155.122     	8023
218.106.119.137     	7776
61.189.184.55       	6532
218.69.24.74        	5931
220.181.125.198     	5665
221.226.105.178     	5115
123.126.68.36       	4655
182.87.49.132       	4620
113.116.173.16      	4428
122.96.24.195       	3878
60.166.75.85        	3557
183.54.191.41       	3399
222.88.66.49        	3004

real	0m0.400s
user	0m0.347s
sys	0m0.056s

比下面的命令要快很多:

[root@Moneytu /var/log/httpd]# time cat access_log | awk '{print $1}' | sort | uniq -c | sort -nr | head -n 20
  19787 222.223.40.185
  17468 60.28.116.222
  13862 112.124.26.17
  13422 ::1
  10201 111.162.65.49
   9926 61.183.230.130
   8891 219.140.149.244
   8023 113.247.155.122
   7776 218.106.119.137
   6532 61.189.184.55
   5931 218.69.24.74
   5650 220.181.125.198
   5115 221.226.105.178
   4642 123.126.68.36
   4620 182.87.49.132
   4428 113.116.173.16
   3878 122.96.24.195
   3557 60.166.75.85
   3399 183.54.191.41
   3004 222.88.66.49

real	0m4.052s
user	0m3.823s
sys	0m0.237s

awk引用外部变量的方法:

for f in `ls`;do cat $f | awk '{print $1  >> "'$f.file'" }';done

 

posted @ 2019-03-15 19:15  hexel  阅读(389)  评论(0编辑  收藏  举报