文本处理awk

1、基本用法

[root@netposa13501206 ~]# cat grade.txt 
M.Tansley    05/99      48311   Green   8       40      44
J.Lulu       06/99      48317   green   9       24      26
P.Bunny      02/99      48      Yellow  12      35      28
J.Troll      07/99      4842    Brown-3 12      26      26
L.Tansley    05/99      4712    Brown-2 12      30      28
# 打印所有记录
[root@netposa13501206 ~]# awk '{print $0}' grade.txt
# 打印单独记录
[root@netposa13501206 ~]# awk '{print $1,$4}' grade.txt
# 打印开头和结尾
[root@netposa13501206 ~]# awk 'BEGIN{print "Name\n----------"}{print $1}END{print "----------\nend-of-report"}' grade.txt
Name
----------
M.Tansley
J.Lulu
P.Bunny
J.Troll
L.Tansley
----------
end-of-report

2、条件操作

2.1 匹配

# 输出第4列存在Brown的行
[root@netposa13501206 ~]# awk '{if($4~/Brown/)print $0}' grade.txt
J.Troll      07/99      4842    Brown-3 12      26      26
L.Tansley    05/99      4712    Brown-2 12      30      28
# 输出第4列存在Brown的行(正则表达)
[root@netposa13501206 ~]# awk '$0 ~ /Brown/' grade.txt
J.Troll      07/99      4842    Brown-3 12      26      26
L.Tansley    05/99      4712    Brown-2 12      30      28

2.2 精准匹配

# 输出第3列值为48的行
[root@netposa13501206 ~]# awk '$3=="48" {print $0}' grade.txt
P.Bunny      02/99      48      Yellow  12      35      28

2.3 反向匹配

# 输出第4列不存在Brown的行
[root@netposa13501206 ~]# awk '{if($4!~/Brown/)print $0}' grade.txt
[root@netposa13501206 ~]# awk '$0 !~ /Brown/' grade.txt
M.Tansley    05/99      48311   Green   8       40      44
J.Lulu       06/99      48317   green   9       24      26
P.Bunny      02/99      48      Yellow  12      35      28
# 输出第3列值不为48的行
[root@netposa13501206 ~]# awk '$3!="48" {print $0}' grade.txt
M.Tansley    05/99      48311   Green   8       40      44
J.Lulu       06/99      48317   green   9       24      26
J.Troll      07/99      4842    Brown-3 12      26      26
L.Tansley    05/99      4712    Brown-2 12      30      28

2.4 小于

[root@netposa13501206 ~]# awk '{if($6<$7) print $1" did better"}' grade.txt
M.Tansley did better
J.Lulu did better

2.5 模式匹配

# 输出匹配Green或green的行
[root@netposa13501206 ~]# awk '/[Gg]reen/' grade.txt
M.Tansley    05/99      48311   Green   8       40      44
J.Lulu       06/99      48317   green   9       24      26
# 行首前三个字符任意,第四个是a
[root@netposa13501206 ~]# awk '$1 ~ /^...a/' grade.txt
M.Tansley    05/99      48311   Green   8       40      44
L.Tansley    05/99      4712    Brown-2 12      30      28
# 或关系匹配|
[root@netposa13501206 ~]# awk '$0 ~ /(Yellow|Brown)/' grade.txt
P.Bunny      02/99      48      Yellow  12      35      28
J.Troll      07/99      4842    Brown-3 12      26      26
L.Tansley    05/99      4712    Brown-2 12      30      28
# 或关系匹配||
[root@netposa13501206 ~]# awk '{if($4=="Yellow" || $4~/Brown/ ) print $0}' grade.txt
P.Bunny      02/99      48      Yellow  12      35      28
J.Troll      07/99      4842    Brown-3 12      26      26
L.Tansley    05/99      4712    Brown-2 12      30      28
# 或关系匹配&&
[root@netposa13501206 ~]# awk '{if($1=="P.Bunny" && $4=="Yellow" ) print $0}' grade.txt
P.Bunny      02/99      48      Yellow  12      35      28

2.6 NF、NR与-F

# 输出倒数第二列大于27,并且行数大于等于4
[root@netposa ~]# awk '{if($(NF-1)>"27" && NR>=4) print $0}' grade.txt
L.Tansley    05/99      4712    Brown-2 12      30      28
# 以./-三个符号为分割符
[root@netposa ~]# echo "192.168-14/25.143" | awk -F '[-./]' '{print $4}'
25

2.7 变量赋值

# BEGIN方式
[root@netposa ~]# awk 'BEGIN{a="27";b="30"}{if($6>a && $7>b) print $0}' grade.txt
M.Tansley    05/99      48311   Green   8       40      44
# 普通方式
[root@netposa ~]# awk '{a="27";b="30";if($6>a && $7>b) print $0}' grade.txt
M.Tansley    05/99      48311   Green   8       40      44

2.8 修改数值域取值

# 显示所有行
[root@netposa ~]# awk '{if($1=="M.Tansley")$6=$6-1;print $1,$6,$7}' grade.txt
M.Tansley 39 44
J.Lulu 24 26
P.Bunny 35 28
J.Troll 26 26
L.Tansley 30 28
# 只显示修改的行
[root@netposa ~]# awk '{if($1=="M.Tansley"){$6=$6-1;print $1,$6,$7}}' grade.txt
M.Tansley 39 44

2.9 创建新域

[root@netposa ~]# awk '{if($6<$7){diff=$7-$6;print$1,diff}}' grade.txt
M.Tansley 4
J.Lulu 2

2.10 列值相加

[root@netposa ~]# awk '{(total+=$6)};END{print "total : " total}' grade.txt
total : 155

3、内置函数

3.1 gsub

# 4842变为4899
[root@netposa ~]# awk 'gsub(/4842/,4899){print $0}' grade.txt
J.Troll      07/99      4899    Brown-3 12      26      26
# 第4列带reen的行改为TOM
[root@netposa ~]# awk 'gsub(/.*reen/,"TOM",$4){print $1,$4,$NF}' grade.txt 
M.Tansley TOM 44
J.Lulu TOM 26

3.2 substr

# 截取第一列前5个字符
[root@node110 ~]# awk '$1=="L.Tansley"{print substr($1,1,5)}' grade.txt
L.Tan
# 截取名字
[root@node110 ~]# awk '{print substr($1,3)}' grade.txt
Tansley
Lulu
Bunny
Troll
Tansley

3.3 printf

# 打印学生名字和序列号,左对齐15个字符长度,后跟序列号
[root@netposa ~]# awk '{printf "%-15s %s\n",$1,$3}' grade.txt
M.Tansley       48311
J.Lulu          48317
P.Bunny         48
J.Troll         4842
L.Tansley       4712

3.4 向awk传值

# 查询10岁以下的学生
[root@netposa ~]# awk '{if($5<AGE) print $0}' AGE=10 grade.txt
M.Tansley    05/99      48311   Green   8       40      44
J.Lulu       06/99      48317   green   9       24      26
# 打印磁盘空间大于15G的磁盘
[root@netposa ~]# df -k | awk '($4 ~ /^[0-9]/){if($4>TRRIGER)printf "%-15s %s\n",$6,$4}' TRRIGER=15000000
/               863365324
/dev            16399432
/dev/shm        16410208
/run            15752456
/sys/fs/cgroup  16410208

3.5 数组

# 定义数组并打印
[root@netposa ~]# awk 'BEGIN{print split("123#456#789",x,"#")}END{for(i in x){print x[i]}}' /dev/null
3
123
456
789
# 统计tcp服务的数量
[root@netposa sh]# netstat -an | awk '/^tcp/ {++S[$NF]} END{for(a in S) print a, S[a]}'
LISTEN 16
ESTABLISHED 378
TIME_WAIT 12

3.6 数组应用

[root@netposa ~]# cat grade_student.txt
Yellow#Junior
Orange#Senior
Yellow#Junior
Purple#Junior
Brown-2#Junior
White#Senior
Orange#Senior
Red#Junior
Brown-2#Senior
Yellow#Senior
Blue#Junior
Green#Senior
Purple#Junior
White#Junior
# 按颜色统计数量
[root@netposa ~]# awk -F '#' '{++C[$1]}END{for (i in C)print i,C[i]}' grade_student.txt 
Red 1
Orange 2
Blue 1
White 2
Brown-2 2
Purple 2
Green 1
Yellow 3
# 统计Senior数量
[root@netposa ~]# awk -F '#' 'BEGIN{C[Senior]}{for (i in C){if($2=="Senior")C[Senior]++}}END{print "Senior""\t"C[i]}' grade_student.txt 
Senior  6
posted @ 2020-11-18 13:50  那就这样吧~  阅读(127)  评论(0编辑  收藏  举报