awk

AWK

awk支持条件判断、数组、循环等功能。所以，我们也可以把awk理解成一个脚本语言解释器。

awk -v FS=: 'NR==2{print $1} NR==5{print $1}' /etc/passwd

awk -v FS=: 'BEGIN{print "username \t userid"} {print $1 "\t\t" $3}' /etc/passwd

awk 'BEGIN{for(i=1;i<10;i++){for(j=1;j<=i;j++){printf j "*" i"="i*j;printf "\t" };print "\n"}}'

1*1=1
1*2=2   2*2=4
1*3=3   2*3=6   3*3=9
1*4=4   2*4=8   3*4=12  4*4=16
1*5=5   2*5=10  3*5=15  4*5=20  5*5=25
1*6=6   2*6=12  3*6=18  4*6=24  5*6=30  6*6=36
1*7=7   2*7=14  3*7=21  4*7=28  5*7=35  6*7=42  7*7=49
1*8=8   2*8=16  3*8=24  4*8=32  5*8=40  6*8=48  7*8=56  8*8=64
1*9=9   2*9=18  3*9=27  4*9=36  5*9=45  6*9=54  7*9=63  8*9=72  9*9=81

语法格式：

awk [options] 'Pattern{Action}' file1 file2

options

参数	释意
-F	指定分割符	`echo 'http://baidu.com??search=awk'
-f	指定脚本	例子
-v	执行变量	awk -v name=$HOSTNAME 'BEGIN {print name}'
--posix	兼容正则
-FS
-OFS
-RS
-ORS

cat  file.awk
BEGIN{print "the latest list of user and shells"
print "userid \t shell"
print "----- \t ------"
FS=":"
}
{print $1 "\t" $7}
END{print "end of test"}

awk -f file.awk /etc/passwd

Pattern

空模式
```
awk '{print $0}' /etc/passwd
```

匹配正则

awk '/^root/{print $0}' /etc/passwd

当使用{x,y}或[[:space]]，这种正则时，要加上参数—posix

awk -F ":" '$1 ~ /root/{print $0}' /etc/passwd

awk -F ":" '$1 !~ /root/{print $0}' /etc/passwd

匹配字符串
```
awk '/root/{print $0}' /etc/passwd
```

匹配范围

awk 'NR>2 && NR<5 {print $0}' /etc/passwd

特殊模式BEGIN END

action

动作		事例
print		awk '{print NR,$0}' /etc/passwd
printf		awk '{printf "%s\t%s\n" NR,"-", $0}' /etc/passwd
if		awk 'BEGIN {if (3 > 2) {print "AAA"} else {print "BBB"}}'
三元运算		awk -F ":" '{usertype=$3<500?"系统用户":"普通用户"}{print $1, usertype}' /etc/passwd
for		awk 'BEGIN{for(i=1;i<10;i++){for(j=1;j<=i;j++){printf j""i"="ji };print ""}}'
		awk '{counts[$1]++}; END {for(url in counts) print counts[url], url}' /var/log/httpd/access_log
while		awk 'BEGIN{num=0;while (num<10){print num;num++}}'
do while		awk 'BEGIN{num=0;do {print num;num++}while (num<10)}'
continue		awk 'BEGIN{num=0;do {num++;if (num ==2) continue; print num;}while (num<10)}'
break		awk 'BEGIN{num=0;do {num++;if (num ==2) break; print num;}while (num<10)}'
next		提前结束对本行文本的处理，并接着处理下一行；例如，下面的命令将显示其ID号为奇数的用户： awk -F: '{if($3%2==0) next;print $1,$3}' /etc/passwd

变量

系统变量

变量	释意
$0	代表整行	awk -F ":" '{if($3>500)print $0}' /etc/passwd
$1	代表第一个域	awk -F ":" '{if($3>500)print $1}' /etc/passwd
$2	代表第二个域
$3	代表第三个域	`echo "test many program"\|awk '{$3="procedure";print $0}'`
$n	代表第n个域
$NF $(NF-1)	最后一列	netstat -ant\|awk '{print $NF}'
NF	number fields	awk 'BEGIN{print 序列\t 内容}'
FNR	file number rows	awk '{print FNR,$0}' /etc/passwd /etc/services
NR	number rows	awk 'BEGIN{print "序列","\t", "内容"};{print NR"\t"$0}' /etc/passwd
FS	field separator	awk -v FS=: '{print $NF}' /etc/passwd
OFS	out field separator	awk -v FS=: -v OFS=- '{print $1,$NF}' /etc/passwd
RS	row separator	awk -v RS=: '{print $0}' /etc/passwd
ORS	out row separator	awk -v RS=: -v ORS=: '{print $0}' /etc/passwd
ARGV	参数的个数	数组，保存命令行本身这个字符串，如awk '{print $0}' a.txt b.txt这个命令中，ARGV[0]保存awk，ARGV[1]保存a.txt；
ARVC	参数组成的数组	awk命令的参数的个数；
FILENAME	文件名称	awk '{print FILENAME}' /etc/passwd
BEGIN	最先执行的动作	awk 'BEGIN{name="AA";print name}'
END	最后执行的动作	awk 'BEGIN{name="AA";print name}{print }END{print 'end'}'

自定义变量

str="中文"
awk -v str=$str 'BEGIN{print str}'

awk  'BEGIN{str="中文";print str}'

运算符

算数运算

-x: 负值
+x: 转换为数值；
x^y: 
x**y: 次方
x*y: 乘法
x/y：除法
x+y:
x-y:
x%y:
++
--

赋值运算

=
+=
-=
*=
/=
%=
^=
**=

比较运算

x < y
x <= y
x > y
x >= y
x == y
x != y
x ~ y	True if the string x matches the regexp denoted by y. 
x !~ y	True if the string x does not match the regexp denoted by y. 
subscript in array

逻辑运算

||
&&

函数

内置函数

函数名	释义	语法
rand	伪随机	awk 'BEGIN{print rand()}' 0.237788
srand		awk 'BEGIN{print srand()}' 1
	生成随机数	awk 'BEGIN{srand();print rand()}'
int	整数	awk 'BEGIN{srand();print int(100000*rand())}'
length([s])	变量或字符串长度	awk 'BEGIN{for(i=0;i<length(123);i++) print i}'
substr(s, i [, n])	替换字符串，但是无法直接修改文件	awk '/^SELINUX/{gsub("disabled","permissive",$0);print $0}' /etc/selinux/config
system(command)	执行系统command并将结果返回至awk命令	`tail -f access_log
split(string, array [, fieldsep [, seps ] ])		功能：将string表示的字符串以fieldsep为分隔符进行分隔，并将分隔后的结果保存至array为名的数组中；数组下标为从0开始的序列；例子

[root@allinone ~]# echo "hello world"|awk '{gsub("l","L");print $0}'
heLLo worLd
[root@allinone ~]# echo "hello world"|awk '{gsub("l","L",$1);print $0}'
heLLo world

[root@allinone ~]# echo "hello world"|awk '{sub("l","L");print $0}'    
heLlo world
[root@allinone ~]# echo "hello world"|awk '{sub("l","L",$2);print $0}' 
hello worLd

用户自定义函数

自定义函数使用function关键字。函数还可以使用return语句返回值，格式为“return value”。

function F_NAME([variable])
{
	statements
}

用例收集

按照连接状态统计系统tcp 连接数

netstat -nat|awk '{count[$NF]++} END{for(i in count){print count[i] ,i}}'|sort -nr
netstat -nat|awk '{print $NF}' |sort|uniq -c|sort -rn
netstat -ant | awk 'NR>2{print $NF}'|sort|uniq -c

网站访问统计

netstat -antlp|awk '/10.0.16.14:80/{split($5,ip,":");count[ip[1]]++}END {for (i in count)print count[i],i}'|sort -rn|head

netstat -antlp|awk -F "[: ]+" '/10.0.16.14:80/{print $6}'|sort|uniq -c|sort -rn|head

查找较多time_wait连接

netstat -n|awk '/TIME_WAIT/{print $5}'|sort|uniq -c|sort -rn|head -n20

找查较多的SYN连接

netstat -an|awk '/SYN/{print $5}'|awk -F ":" '{print $1}'|sort|uniq -c|sort -nr

获取对应端口的pid

netstat -ntlp|awk '/80 /{print $7}'| cut -d/ -f1

列出传输最大的几个exe文件（分析下载站的时候常用)

cat access.log |awk '($7~/.exe/){print $10 " " $1 " " $4 " " $7}''|sort -nr|head -20

列出输出大于200000byte(约200kb)的exe文件以及对应文件发生次数

cat access.log |awk '($10 > 200000 && $7~/.exe/){print $7}' |sort -n|uniq -c|sort -nr|head -100

如果日志最后一列记录的是页面文件传输时间，则有列出到客户端最耗时的页面

cat access.log |awk '($7~/.php/){print $NF " " $1 " " $4 " " $7}'|sort -nr|head -100

列出最最耗时的页面(超过60秒的)的以及对应页面发生次数

cat access.log |awk '($NF > 60 && $7~/.php/){print $7}'|sort -n|uniq -c|sort -nr|head -100

列出传输时间超过 30 秒的文件

cat access.log |awk '($NF > 30){print $7}'|sort -n|uniq -c|sort -nr|head -20

统计网站流量（G)

cat access.log |awk ‘{sum+=$10} END {print sum/1024/1024/1024}’

统计404的连接

access.log | awk '($9 ~/404/){print $9,$7}'| sort

统计http status

cat access.log |awk '{counts[$(9)]+=1}; END {for(code in counts) print code, counts[code]}'

cat access.log |awk '{print $9}'|sort|uniq -c|sort -rn

蜘蛛分析，查看是哪些蜘蛛在抓取内容。

/usr/sbin/tcpdump -i eth0 -l -s 0 -w - dst port 80 | strings | grep -i user-agent | grep -i -E 'bot|crawler|slurp|spider'

网站日分析2(Squid篇）按域统计流量

zcat squid_access.log.tar.gz| awk '{print $10,$7}' |awk 'BEGIN{FS="[ /]"}{trfc[$4]+=$1}END{for(domain in trfc){printf "%st%dn",domain,trfc[domain]}}'

查看数据库执行的sql

tcpdump -i eth0 -s 0 -l -w - dst port 3306 | strings | egrep -i 'SELECT|UPDATE|DELETE|INSERT|SET|COMMIT|ROLLBACK|CREATE|DROP|ALTER|CALL'

查找用户和id

awk -v FS=':' 'BEGIN{printf "%-10s\t %s\n","用户名称","用户ID"} {printf "%-10s\t %s\n",$1,$3}' /etc/passwd

posted @ 2022-03-28 22:06 mingtian是吧阅读(52) 评论(0) 收藏举报

刷新页面返回顶部

awk

AWK

options

Pattern

action

变量

运算符

函数

用例收集

公告