awk书上练习

文件car:

plym fury 1970 73 2500
chevy malibu 1999 60 3000
ford mustang 1965 45 10000
volvo s80 1998 102 9850
ford thundbd 2003 15 10500
chevy malibu 2000 50 3500
bmw 325i 1985 115 450
honda accord 2001 30 6000
ford taurus 2004 10 17000
toyota rav4 2002 180 750
chevy impala 1985 85 1550
ford explor 2003 25 9500

基本

awk '{print}' car    #类似标准输出
awk '/chevy/' car    #包含字符串的所有文本行进行复制
chevy malibu 1999 60 3000
chevy malibu 2000 50 3500
chevy impala 1985 85 1550
dahu@dahu-OptiPlex-3046:~/myfile$ awk '/10/' car    #只要包含,以字符串的形式
ford mustang 1965 45 10000
volvo s80 1998 102 9850
ford thundbd 2003 15 10500
ford taurus 2004 10 17000
dahu@dahu-OptiPlex-3046:~/myfile$ awk '{print $1,$3}' car    #显示第一列,空格,第三列
dahu@dahu-OptiPlex-3046:~/myfile$ awk '/chevy/{print $1,$3}' car    #匹配行,选中列
chevy 1999
chevy 2000
chevy 1985
dahu@dahu-OptiPlex-3046:~/myfile$ awk '$2 ~ /[0-9]/' car   #第二列匹配某种正则,最后显示所有行
volvo s80 1998 102 9850
bmw 325i 1985 115 450
toyota rav4 2002 180 750
dahu@dahu-OptiPlex-3046:~/myfile$ awk '$2 ~ /^[tm]/{print $3,$2,"$"$5}' car    #第2个字段匹配正则,并按要求显示
1999 malibu $3000
1965 mustang $10000
2003 thundbd $10500
2000 malibu $3500
2004 taurus $17000
dahu@dahu-OptiPlex-3046:~/myfile$ awk '$5<=3000' car    #按大小查找
plym fury 1970 73 2500
chevy malibu 1999 60 3000
bmw 325i 1985 115 450
toyota rav4 2002 180 750
chevy impala 1985 85 1550
dahu@dahu-OptiPlex-3046:~/myfile$ awk '/volvo/,/bmw/' car    #匹配两个之间的行
volvo s80 1998 102 9850
ford thundbd 2003 15 10500
chevy malibu 2000 50 3500
bmw 325i 1985 115 450
dahu@dahu-OptiPlex-3046:~/myfile$ awk '/chevy/,/ford/' car    #贪心,尽可能多的获取行,和上面例子类似
chevy malibu 1999 60 3000
ford mustang 1965 45 10000
chevy malibu 2000 50 3500
bmw 325i 1985 115 450
honda accord 2001 30 6000
ford taurus 2004 10 17000
chevy impala 1985 85 1550
ford explor 2003 25 9500
awk -f file car

file 里可以写awk程序,也就是上面引号的内容,不需要加引号.

BEGIN

dahu@dahu-OptiPlex-3046:~/myfile$ cat aaa    #前缀
BEGIN{
print "Make Mode Year Miles Price"
print "----------------------------"
}
{print}
dahu@dahu-OptiPlex-3046:~/myfile$ awk -f aaa car
Make Mode Year Miles Price
----------------------------
plym fury 1970 73 2500
chevy malibu 1999 60 3000
ford mustang 1965 45 10000
volvo s80 1998 102 9850
ford thundbd 2003 15 10500
chevy malibu 2000 50 3500
bmw 325i 1985 115 450
honda accord 2001 30 6000
ford taurus 2004 10 17000
toyota rav4 2002 180 750
chevy impala 1985 85 1550
ford explor 2003 25 9500

length

dahu@dahu-OptiPlex-3046:~/myfile$ awk '{print length,$0}' car|sort -n    #显示每一行的字符数(包含空格的数量),并按顺序排序,length后也可加括号
21 bmw 325i 1985 115 450
22 plym fury 1970 73 2500
23 volvo s80 1998 102 9850
24 ford explor 2003 25 9500
24 toyota rav4 2002 180 750
25 chevy impala 1985 85 1550
25 chevy malibu 1999 60 3000
25 chevy malibu 2000 50 3500
25 ford taurus 2004 10 17000
25 honda accord 2001 30 6000
26 ford mustang 1965 45 10000
26 ford thundbd 2003 15 10500

NR

dahu@dahu-OptiPlex-3046:~/myfile$ awk '{print NR,$0}' car    #显示行数,NR记录编号,NF字段数目
1 plym fury 1970 73 2500
2 chevy malibu 1999 60 3000
3 ford mustang 1965 45 10000
4 volvo s80 1998 102 9850
5 ford thundbd 2003 15 10500
6 chevy malibu 2000 50 3500
7 bmw 325i 1985 115 450
8 honda accord 2001 30 6000
9 ford taurus 2004 10 17000
10 toyota rav4 2002 180 750
11 chevy impala 1985 85 1550
12 ford explor 2003 25 9500
dahu@dahu-OptiPlex-3046:~/myfile$ awk 'NR==2,NR==4' car    #显示第2行到第4行的内容,太方便了!
chevy malibu 1999 60 3000
ford mustang 1965 45 10000
volvo s80 1998 102 9850

END

dahu@dahu-OptiPlex-3046:~/myfile$ awk 'END {print NR,"cars for sale"}' car    #end表示数据已处理之后,此时NR就是总行数了
12 cars for sale

if

dahu@dahu-OptiPlex-3046:~/myfile$ cat aaa  #if的简短用法,没有使用花括号
{
    if ($1 ~ /ply/) $1 ="plymouth"
    if ($1 ~ /chev/) $1 ="chevrolet"
    print
}
dahu@dahu-OptiPlex-3046:~/myfile$ awk -f aaa car
plymouth fury 1970 73 2500
chevrolet malibu 1999 60 3000
ford mustang 1965 45 10000
volvo s80 1998 102 9850
ford thundbd 2003 15 10500
chevrolet malibu 2000 50 3500
bmw 325i 1985 115 450
honda accord 2001 30 6000
ford taurus 2004 10 17000
toyota rav4 2002 180 750
chevrolet impala 1985 85 1550
ford explor 2003 25 9500
dahu@dahu-OptiPlex-3046:~/myfile$ cat aaa  #程序稍加改进,把awk程序文件直接改成可直接运行的,增加一下可执行的权限chmod 744  aaa
#!/usr/bin/awk -f
{
    if ($1 ~ /ply/) $1 ="plymouth"
    if ($1 ~ /chev/) $1 ="chevrolet"
    print
}
dahu@dahu-OptiPlex-3046:~/myfile$ ./aaa car
dahu@dahu-OptiPlex-3046:~/myfile$ cat price_range
#{                                #这边是将第五列改成评价
#if ($5 <=5000) $5="cheap";
#else if (5000<$5 && $5<10000) $5="please ask";
#else if ($5>=10000) $5="expensive";
#print $0
#}
BEGIN{
    s="cheap"
}
{                                  #保留第5列
if ($5 <=5000) s="cheap";
else if (5000<$5 && $5<10000) s="please ask";
else if ($5>=10000) s="expensive";
print $0,s                          #显示的时候,注意$
}
dahu@dahu-OptiPlex-3046:~/myfile$ awk -f price_range car
plym fury 1970 73 2500 cheap
chevy malibu 1999 60 3000 cheap
ford mustang 1965 45 10000 expensive
volvo s80 1998 102 9850 please ask
ford thundbd 2003 15 10500 expensive
chevy malibu 2000 50 3500 cheap
bmw 325i 1985 115 450 cheap
honda accord 2001 30 6000 please ask
ford taurus 2004 10 17000 expensive
toyota rav4 2002 180 750 cheap
chevy impala 1985 85 1550 cheap
ford explor 2003 25 9500 please ask

 

OFS  

什么样的情况下才会按照新格式输出呢?

dahu@dahu-OptiPlex-3046:~/myfile$ cat aaa  #OFS输出字段分隔符,默认是空格,但是我改了之后发现只有满足if条件的才会按照新格式输出,如果注释掉了if,就都不会按照新格式输出,存疑.
#!/usr/bin/awk -f
BEGIN{OFS=" >> "}
{
    if ($1 ~ /ply/) $1 ="plymouth"
    if ($1 ~ /chev/) $1 ="chevrolet"
    print $0
}

dahu@dahu-OptiPlex-3046:~/myfile$ ./aaa car
plymouth >> fury >> 1970 >> 73 >> 2500
chevrolet >> malibu >> 1999 >> 60 >> 3000
ford mustang 1965 45 10000
volvo s80 1998 102 9850
ford thundbd 2003 15 10500
chevrolet >> malibu >> 2000 >> 50 >> 3500
bmw 325i 1985 115 450
honda accord 2001 30 6000
ford taurus 2004 10 17000
toyota rav4 2002 180 750
chevrolet >> impala >> 1985 >> 85 >> 1550
ford explor 2003 25 9500

printf

dahu@dahu-OptiPlex-3046:~/myfile$ cat aaa  #printf改善输出格式
#!/usr/bin/awk -f
BEGIN{
    print "                               Miles"
    print "Make       Mode       Year     (000)   Price "
    print \
    "--------------------------------------------"
}
{
    if ($1 ~ /ply/) $1 ="plymouth"
    if ($1 ~ /chev/) $1 ="chevrolet"
    printf "%-10s %-8s   %2d   %5d     $ %8.2f\n",\
           $1,$2,$3,$4,$5
}

dahu@dahu-OptiPlex-3046:~/myfile$ ./aaa car
                               Miles
Make       Mode       Year     (000)   Price 
--------------------------------------------
plymouth   fury       1970      73     $  2500.00
chevrolet  malibu     1999      60     $  3000.00
ford       mustang    1965      45     $ 10000.00
volvo      s80        1998     102     $  9850.00
ford       thundbd    2003      15     $ 10500.00
chevrolet  malibu     2000      50     $  3500.00
bmw        325i       1985     115     $   450.00
honda      accord     2001      30     $  6000.00
ford       taurus     2004      10     $ 17000.00
toyota     rav4       2002     180     $   750.00
chevrolet  impala     1985      85     $  1550.00
ford       explor     2003      25     $  9500.00
dahu@dahu-OptiPlex-3046:~/myfile$ cat redirect     #重定向输出
#!/usr/bin/awk -f
/chevy/ {print > "chevfile"}
/ford/ {print > "fordfile"}
END {print "done."}

dahu@dahu-OptiPlex-3046:~/myfile$ ./redirect car
done.
dahu@dahu-OptiPlex-3046:~/myfile$ cat chevfile 
chevy malibu 1999 60 3000
chevy malibu 2000 50 3500
chevy impala 1985 85 1550
dahu@dahu-OptiPlex-3046:~/myfile$ cat fordfile 
ford mustang 1965 45 10000
ford thundbd 2003 15 10500
ford taurus 2004 10 17000
ford explor 2003 25 9500

FS

输入字段分隔符

for

dahu@dahu-OptiPlex-3046:~/myfile$ cat manuf   #for结构,第一列的内容放入这个字典
awk '{manuu[$1]++}
END {for (name in manuu) {print name ,manuu[name]}}' car    #里面的花括号也可以不用加,命令之间用;间隔
dahu@dahu-OptiPlex-3046:~/myfile$ ./manuf 
honda 1
bmw 1
volvo 1
ford 4
plym 1
chevy 3
toyota 1
dahu@dahu-OptiPlex-3046:~/myfile$ cat mmanuf   #这个程序感觉厉害了
if [ $# !=2 ]
    then
        echo "something wrong!"
        exit 1
fi
awk < $2 '
{count[$'$1']++}      #注意这里,单引号成对出现,两端引号内容连接起来,中间还是直接引用传入的第一个参数,在这里是1,所以调用第一列的内容,666,在END里也试过,单引号随便加,反正连起来的.哪怕是把变量名拆掉也行...吊
END{for (item in count) print item,count[item]}
#END{fo''r (item in count) ''print it''em,count[item]}  #你敢信吗?
' dahu@dahu-OptiPlex-3046:~/myfile$ ./mmanuf 1 car ./mmanuf: line 1: [: 2: unary operator expected honda 1 bmw 1 volvo 1 ford 4 plym 1 chevy 3 toyota 1

 

实操:

1.统计不重复的个数:

xch27@lanzhou:/asrdata/users/ql826/lmwork/comm_cloud/aicar_solution/v28_24Apr2017/data/radio/slot$ awk 'BEGIN{FS="&"}{print $1}' slot.map |sort -u
CLASS-FM频道
CLASS-主持人
CLASS-序列号
CLASS-频道
CLASS-频道类型

2.统计某一项的个数:

xch27@lanzhou:/asrdata/users/ql826/lmwork/comm_cloud/aicar_solution/v28_24Apr2017/data/radio/slot$ awk 'BEGIN{FS="&";a=0}{if($1 =="CLASS-频道")a++}END{print a}' slot.map 
114716

 3.找到某几行的内容

dahu@dahu-OptiPlex-3046:~/Downloads$ awk '{if(NR<=4900 && NR>=4800)print $2}' enwords.oov.cnt-gt800.cnt-pron > en

 4.匹配我要找的CLASS-XXX,且不重复,for的简单应用

xch27@lanzhou:/asrdata/users/xch27/lmwork/comm_cloud/aicar_solution/v30_23May2017/data/life/music/pat$ head gequ_geshou
<s> CLASS-动作二 CLASS-歌曲名 CLASS-歌手名 唱 的 </s>
<s> CLASS-动作二 CLASS-歌曲名 CLASS-歌手名 的 </s>
<s> CLASS-动作二 CLASS-歌曲名 CLASS-歌手名 的 </s>
<s> CLASS-动作二 CLASS-歌曲名 CLASS-歌手名 的 </s>
<s> CLASS-动作二 CLASS-歌手名 的 CLASS-语种 歌曲 CLASS-歌曲名 </s>
<s> CLASS-动作三 CLASS-歌曲名 的 歌词 CLASS-歌手名 唱 的 </s>
<s> CLASS-动作三 CLASS-歌曲名 的 歌词 CLASS-歌手名 的 </s>
<s> CLASS-动作三 CLASS-歌曲名 的 歌词 CLASS-歌手名 演唱 的 </s>
<s> CLASS-动作三 CLASS-歌手名 唱 的 CLASS-歌曲名 的 歌词 </s>
<s> CLASS-动作三 CLASS-歌手名 的 CLASS-歌曲名 的 歌词 </s>
xch27@lanzhou:/.../pat$ awk '{for(i=2;i<NF;i++)if($i ~ "CLASS-"){print $i}}' gequ_geshou |sort -u
CLASS-动作三
CLASS-动作二
CLASS-操作
CLASS-歌手名
CLASS-歌曲名
CLASS-语种

 统计 不匹配"CLASS-"的行数

awk '{a=0;for(i=1;i<=NF;i++){if($i ~ "CLASS-"){a=1}};if(a!=1){count++}}END{print count}' music.comm.mrg.v3.pat.wseg

 修改文件内容,多变量传递

#!/bin/bash
#awk传入变量练习,直接修改小麦
fs=`awk '/你好小迈/{print $2}' wakeup.logp`
#echo $fs
#多个变量这样添加
#p=321
#echo |awk -v tt="$fs" -v tg="$p" 'BEGIN{print tt,tg}'
ft=`awk '/小迈你好/{print $2}' wakeup.logp`
awk -v nhxm="$fs" -v xmnh="$ft" '{if($1~"你好小麦"){a=nhxm+2;print $1,a}else if($1~"小麦你好"){a=xmnh+2;print $1,a}else{print $0}}' wakeup.logp >tmp

 awk根据不同名称输入不同文件.知识点:FS,RS,substr,split,awk内部输出

xch27@taiyuan:/asrdata/users/xch27/lmwork/comm_cloud/aicar_solution/v32_30june2017/data/navi/slot$ head t1
名称:北京南顺油脂有限公司
拼音:BeiJing NanShun YouZhi YouXianGongSi
别称:
地址:良乡南肖庄道口西
类型:公司企业;公司;公司
省:北京市
市:北京市
区县:房山区
热度:9.6901977

xch27@taiyuan:/asrdata/users/xch27/lmwork/comm_cloud/aicar_solution/v32_30june2017/data/navi/slot$ awk 'BEGIN{FS="\n";RS="\n\n"}{a=substr($5,4);split(a,A,";");print substr($1,4)>>A[1];}' t1

xch27@taiyuan:/asrdata/users/xch27/lmwork/comm_cloud/aicar_solution/v32_30june2017/data/navi/slot$ ls
gaode.alldata.0620.txt  readme.sh  tmp       体育休闲服务       公司企业      商务住宅      搜索词_0620.txt     生活服务      购物服务      餐饮服务
lineprocess.py          t1         住宿服务  全量数据_0620.txt  医疗保健服务  地名地址信息  政府机构及社会团体  科教文化服务  金融保险服务

 

posted @ 2017-05-10 22:25  dahu1  Views(315)  Comments(0Edit  收藏  举报