awk书上练习
文件car:
plym fury 1970 73 2500 chevy malibu 1999 60 3000 ford mustang 1965 45 10000 volvo s80 1998 102 9850 ford thundbd 2003 15 10500 chevy malibu 2000 50 3500 bmw 325i 1985 115 450 honda accord 2001 30 6000 ford taurus 2004 10 17000 toyota rav4 2002 180 750 chevy impala 1985 85 1550 ford explor 2003 25 9500
基本
awk '{print}' car #类似标准输出
awk '/chevy/' car #包含字符串的所有文本行进行复制 chevy malibu 1999 60 3000 chevy malibu 2000 50 3500 chevy impala 1985 85 1550
dahu@dahu-OptiPlex-3046:~/myfile$ awk '/10/' car #只要包含,以字符串的形式 ford mustang 1965 45 10000 volvo s80 1998 102 9850 ford thundbd 2003 15 10500 ford taurus 2004 10 17000
dahu@dahu-OptiPlex-3046:~/myfile$ awk '{print $1,$3}' car #显示第一列,空格,第三列
dahu@dahu-OptiPlex-3046:~/myfile$ awk '/chevy/{print $1,$3}' car #匹配行,选中列 chevy 1999 chevy 2000 chevy 1985
dahu@dahu-OptiPlex-3046:~/myfile$ awk '$2 ~ /[0-9]/' car #第二列匹配某种正则,最后显示所有行 volvo s80 1998 102 9850 bmw 325i 1985 115 450 toyota rav4 2002 180 750
dahu@dahu-OptiPlex-3046:~/myfile$ awk '$2 ~ /^[tm]/{print $3,$2,"$"$5}' car #第2个字段匹配正则,并按要求显示 1999 malibu $3000 1965 mustang $10000 2003 thundbd $10500 2000 malibu $3500 2004 taurus $17000
dahu@dahu-OptiPlex-3046:~/myfile$ awk '$5<=3000' car #按大小查找 plym fury 1970 73 2500 chevy malibu 1999 60 3000 bmw 325i 1985 115 450 toyota rav4 2002 180 750 chevy impala 1985 85 1550
dahu@dahu-OptiPlex-3046:~/myfile$ awk '/volvo/,/bmw/' car #匹配两个之间的行 volvo s80 1998 102 9850 ford thundbd 2003 15 10500 chevy malibu 2000 50 3500 bmw 325i 1985 115 450
dahu@dahu-OptiPlex-3046:~/myfile$ awk '/chevy/,/ford/' car #贪心,尽可能多的获取行,和上面例子类似 chevy malibu 1999 60 3000 ford mustang 1965 45 10000 chevy malibu 2000 50 3500 bmw 325i 1985 115 450 honda accord 2001 30 6000 ford taurus 2004 10 17000 chevy impala 1985 85 1550 ford explor 2003 25 9500
awk -f file car
file 里可以写awk程序,也就是上面引号的内容,不需要加引号.
BEGIN
dahu@dahu-OptiPlex-3046:~/myfile$ cat aaa #前缀 BEGIN{ print "Make Mode Year Miles Price" print "----------------------------" } {print} dahu@dahu-OptiPlex-3046:~/myfile$ awk -f aaa car Make Mode Year Miles Price ---------------------------- plym fury 1970 73 2500 chevy malibu 1999 60 3000 ford mustang 1965 45 10000 volvo s80 1998 102 9850 ford thundbd 2003 15 10500 chevy malibu 2000 50 3500 bmw 325i 1985 115 450 honda accord 2001 30 6000 ford taurus 2004 10 17000 toyota rav4 2002 180 750 chevy impala 1985 85 1550 ford explor 2003 25 9500
length
dahu@dahu-OptiPlex-3046:~/myfile$ awk '{print length,$0}' car|sort -n #显示每一行的字符数(包含空格的数量),并按顺序排序,length后也可加括号 21 bmw 325i 1985 115 450 22 plym fury 1970 73 2500 23 volvo s80 1998 102 9850 24 ford explor 2003 25 9500 24 toyota rav4 2002 180 750 25 chevy impala 1985 85 1550 25 chevy malibu 1999 60 3000 25 chevy malibu 2000 50 3500 25 ford taurus 2004 10 17000 25 honda accord 2001 30 6000 26 ford mustang 1965 45 10000 26 ford thundbd 2003 15 10500
NR
dahu@dahu-OptiPlex-3046:~/myfile$ awk '{print NR,$0}' car #显示行数,NR记录编号,NF字段数目 1 plym fury 1970 73 2500 2 chevy malibu 1999 60 3000 3 ford mustang 1965 45 10000 4 volvo s80 1998 102 9850 5 ford thundbd 2003 15 10500 6 chevy malibu 2000 50 3500 7 bmw 325i 1985 115 450 8 honda accord 2001 30 6000 9 ford taurus 2004 10 17000 10 toyota rav4 2002 180 750 11 chevy impala 1985 85 1550 12 ford explor 2003 25 9500
dahu@dahu-OptiPlex-3046:~/myfile$ awk 'NR==2,NR==4' car #显示第2行到第4行的内容,太方便了! chevy malibu 1999 60 3000 ford mustang 1965 45 10000 volvo s80 1998 102 9850
END
dahu@dahu-OptiPlex-3046:~/myfile$ awk 'END {print NR,"cars for sale"}' car #end表示数据已处理之后,此时NR就是总行数了 12 cars for sale
if
dahu@dahu-OptiPlex-3046:~/myfile$ cat aaa #if的简短用法,没有使用花括号 { if ($1 ~ /ply/) $1 ="plymouth" if ($1 ~ /chev/) $1 ="chevrolet" print } dahu@dahu-OptiPlex-3046:~/myfile$ awk -f aaa car plymouth fury 1970 73 2500 chevrolet malibu 1999 60 3000 ford mustang 1965 45 10000 volvo s80 1998 102 9850 ford thundbd 2003 15 10500 chevrolet malibu 2000 50 3500 bmw 325i 1985 115 450 honda accord 2001 30 6000 ford taurus 2004 10 17000 toyota rav4 2002 180 750 chevrolet impala 1985 85 1550 ford explor 2003 25 9500
dahu@dahu-OptiPlex-3046:~/myfile$ cat aaa #程序稍加改进,把awk程序文件直接改成可直接运行的,增加一下可执行的权限chmod 744 aaa #!/usr/bin/awk -f { if ($1 ~ /ply/) $1 ="plymouth" if ($1 ~ /chev/) $1 ="chevrolet" print } dahu@dahu-OptiPlex-3046:~/myfile$ ./aaa car
dahu@dahu-OptiPlex-3046:~/myfile$ cat price_range #{ #这边是将第五列改成评价 #if ($5 <=5000) $5="cheap"; #else if (5000<$5 && $5<10000) $5="please ask"; #else if ($5>=10000) $5="expensive"; #print $0 #} BEGIN{ s="cheap" } { #保留第5列 if ($5 <=5000) s="cheap"; else if (5000<$5 && $5<10000) s="please ask"; else if ($5>=10000) s="expensive"; print $0,s #显示的时候,注意$ } dahu@dahu-OptiPlex-3046:~/myfile$ awk -f price_range car plym fury 1970 73 2500 cheap chevy malibu 1999 60 3000 cheap ford mustang 1965 45 10000 expensive volvo s80 1998 102 9850 please ask ford thundbd 2003 15 10500 expensive chevy malibu 2000 50 3500 cheap bmw 325i 1985 115 450 cheap honda accord 2001 30 6000 please ask ford taurus 2004 10 17000 expensive toyota rav4 2002 180 750 cheap chevy impala 1985 85 1550 cheap ford explor 2003 25 9500 please ask
OFS
什么样的情况下才会按照新格式输出呢?
dahu@dahu-OptiPlex-3046:~/myfile$ cat aaa #OFS输出字段分隔符,默认是空格,但是我改了之后发现只有满足if条件的才会按照新格式输出,如果注释掉了if,就都不会按照新格式输出,存疑. #!/usr/bin/awk -f BEGIN{OFS=" >> "} { if ($1 ~ /ply/) $1 ="plymouth" if ($1 ~ /chev/) $1 ="chevrolet" print $0 } dahu@dahu-OptiPlex-3046:~/myfile$ ./aaa car plymouth >> fury >> 1970 >> 73 >> 2500 chevrolet >> malibu >> 1999 >> 60 >> 3000 ford mustang 1965 45 10000 volvo s80 1998 102 9850 ford thundbd 2003 15 10500 chevrolet >> malibu >> 2000 >> 50 >> 3500 bmw 325i 1985 115 450 honda accord 2001 30 6000 ford taurus 2004 10 17000 toyota rav4 2002 180 750 chevrolet >> impala >> 1985 >> 85 >> 1550 ford explor 2003 25 9500
printf
dahu@dahu-OptiPlex-3046:~/myfile$ cat aaa #printf改善输出格式 #!/usr/bin/awk -f BEGIN{ print " Miles" print "Make Mode Year (000) Price " print \ "--------------------------------------------" } { if ($1 ~ /ply/) $1 ="plymouth" if ($1 ~ /chev/) $1 ="chevrolet" printf "%-10s %-8s %2d %5d $ %8.2f\n",\ $1,$2,$3,$4,$5 } dahu@dahu-OptiPlex-3046:~/myfile$ ./aaa car Miles Make Mode Year (000) Price -------------------------------------------- plymouth fury 1970 73 $ 2500.00 chevrolet malibu 1999 60 $ 3000.00 ford mustang 1965 45 $ 10000.00 volvo s80 1998 102 $ 9850.00 ford thundbd 2003 15 $ 10500.00 chevrolet malibu 2000 50 $ 3500.00 bmw 325i 1985 115 $ 450.00 honda accord 2001 30 $ 6000.00 ford taurus 2004 10 $ 17000.00 toyota rav4 2002 180 $ 750.00 chevrolet impala 1985 85 $ 1550.00 ford explor 2003 25 $ 9500.00
dahu@dahu-OptiPlex-3046:~/myfile$ cat redirect #重定向输出 #!/usr/bin/awk -f /chevy/ {print > "chevfile"} /ford/ {print > "fordfile"} END {print "done."} dahu@dahu-OptiPlex-3046:~/myfile$ ./redirect car done. dahu@dahu-OptiPlex-3046:~/myfile$ cat chevfile chevy malibu 1999 60 3000 chevy malibu 2000 50 3500 chevy impala 1985 85 1550 dahu@dahu-OptiPlex-3046:~/myfile$ cat fordfile ford mustang 1965 45 10000 ford thundbd 2003 15 10500 ford taurus 2004 10 17000 ford explor 2003 25 9500
FS
输入字段分隔符
for
dahu@dahu-OptiPlex-3046:~/myfile$ cat manuf #for结构,第一列的内容放入这个字典 awk '{manuu[$1]++} END {for (name in manuu) {print name ,manuu[name]}}' car #里面的花括号也可以不用加,命令之间用;间隔 dahu@dahu-OptiPlex-3046:~/myfile$ ./manuf honda 1 bmw 1 volvo 1 ford 4 plym 1 chevy 3 toyota 1
dahu@dahu-OptiPlex-3046:~/myfile$ cat mmanuf #这个程序感觉厉害了 if [ $# !=2 ] then echo "something wrong!" exit 1 fi awk < $2 ' {count[$'$1']++} #注意这里,单引号成对出现,两端引号内容连接起来,中间还是直接引用传入的第一个参数,在这里是1,所以调用第一列的内容,666,在END里也试过,单引号随便加,反正连起来的.哪怕是把变量名拆掉也行...吊 END{for (item in count) print item,count[item]}
#END{fo''r (item in count) ''print it''em,count[item]} #你敢信吗? ' dahu@dahu-OptiPlex-3046:~/myfile$ ./mmanuf 1 car ./mmanuf: line 1: [: 2: unary operator expected honda 1 bmw 1 volvo 1 ford 4 plym 1 chevy 3 toyota 1
实操:
1.统计不重复的个数:
xch27@lanzhou:/asrdata/users/ql826/lmwork/comm_cloud/aicar_solution/v28_24Apr2017/data/radio/slot$ awk 'BEGIN{FS="&"}{print $1}' slot.map |sort -u CLASS-FM频道 CLASS-主持人 CLASS-序列号 CLASS-频道 CLASS-频道类型
2.统计某一项的个数:
xch27@lanzhou:/asrdata/users/ql826/lmwork/comm_cloud/aicar_solution/v28_24Apr2017/data/radio/slot$ awk 'BEGIN{FS="&";a=0}{if($1 =="CLASS-频道")a++}END{print a}' slot.map 114716
3.找到某几行的内容
dahu@dahu-OptiPlex-3046:~/Downloads$ awk '{if(NR<=4900 && NR>=4800)print $2}' enwords.oov.cnt-gt800.cnt-pron > en
4.匹配我要找的CLASS-XXX,且不重复,for的简单应用
xch27@lanzhou:/asrdata/users/xch27/lmwork/comm_cloud/aicar_solution/v30_23May2017/data/life/music/pat$ head gequ_geshou <s> CLASS-动作二 CLASS-歌曲名 CLASS-歌手名 唱 的 </s> <s> CLASS-动作二 CLASS-歌曲名 CLASS-歌手名 的 </s> <s> CLASS-动作二 CLASS-歌曲名 CLASS-歌手名 的 </s> <s> CLASS-动作二 CLASS-歌曲名 CLASS-歌手名 的 </s> <s> CLASS-动作二 CLASS-歌手名 的 CLASS-语种 歌曲 CLASS-歌曲名 </s> <s> CLASS-动作三 CLASS-歌曲名 的 歌词 CLASS-歌手名 唱 的 </s> <s> CLASS-动作三 CLASS-歌曲名 的 歌词 CLASS-歌手名 的 </s> <s> CLASS-动作三 CLASS-歌曲名 的 歌词 CLASS-歌手名 演唱 的 </s> <s> CLASS-动作三 CLASS-歌手名 唱 的 CLASS-歌曲名 的 歌词 </s> <s> CLASS-动作三 CLASS-歌手名 的 CLASS-歌曲名 的 歌词 </s>
xch27@lanzhou:/.../pat$ awk '{for(i=2;i<NF;i++)if($i ~ "CLASS-"){print $i}}' gequ_geshou |sort -u CLASS-动作三 CLASS-动作二 CLASS-操作 CLASS-歌手名 CLASS-歌曲名 CLASS-语种
统计 不匹配"CLASS-"的行数
awk '{a=0;for(i=1;i<=NF;i++){if($i ~ "CLASS-"){a=1}};if(a!=1){count++}}END{print count}' music.comm.mrg.v3.pat.wseg
修改文件内容,多变量传递
#!/bin/bash #awk传入变量练习,直接修改小麦 fs=`awk '/你好小迈/{print $2}' wakeup.logp` #echo $fs #多个变量这样添加 #p=321 #echo |awk -v tt="$fs" -v tg="$p" 'BEGIN{print tt,tg}' ft=`awk '/小迈你好/{print $2}' wakeup.logp`
awk -v nhxm="$fs" -v xmnh="$ft" '{if($1~"你好小麦"){a=nhxm+2;print $1,a}else if($1~"小麦你好"){a=xmnh+2;print $1,a}else{print $0}}' wakeup.logp >tmp
awk根据不同名称输入不同文件.知识点:FS,RS,substr,split,awk内部输出
xch27@taiyuan:/asrdata/users/xch27/lmwork/comm_cloud/aicar_solution/v32_30june2017/data/navi/slot$ head t1 名称:北京南顺油脂有限公司 拼音:BeiJing NanShun YouZhi YouXianGongSi 别称: 地址:良乡南肖庄道口西 类型:公司企业;公司;公司 省:北京市 市:北京市 区县:房山区 热度:9.6901977 xch27@taiyuan:/asrdata/users/xch27/lmwork/comm_cloud/aicar_solution/v32_30june2017/data/navi/slot$ awk 'BEGIN{FS="\n";RS="\n\n"}{a=substr($5,4);split(a,A,";");print substr($1,4)>>A[1];}' t1 xch27@taiyuan:/asrdata/users/xch27/lmwork/comm_cloud/aicar_solution/v32_30june2017/data/navi/slot$ ls gaode.alldata.0620.txt readme.sh tmp 体育休闲服务 公司企业 商务住宅 搜索词_0620.txt 生活服务 购物服务 餐饮服务 lineprocess.py t1 住宿服务 全量数据_0620.txt 医疗保健服务 地名地址信息 政府机构及社会团体 科教文化服务 金融保险服务