linux 中实现输出指定列元素的重复次数编号
001、 awk数组实现
[root@PC1 test01]# ls a.txt [root@PC1 test01]# cat a.txt ## 测试数据 ERR219543 Africa ERR219546 kkkk ERR229467 qqqq ERR229468 qqqq ERR229471 mmnn ERR219547 kkkk ERR2214955 qqqq ERR219544 Africa ERR219545 Africa ERR2214969 qqqq [root@PC1 test01]# awk '{OFS = "\t"; ay[$2]++; print $0, ay[$2]}' a.txt ## 借助于awk中数组来实现 ERR219543 Africa 1 ERR219546 kkkk 1 ERR229467 qqqq 1 ERR229468 qqqq 2 ERR229471 mmnn 1 ERR219547 kkkk 2 ERR2214955 qqqq 3 ERR219544 Africa 2 ERR219545 Africa 3 ERR2214969 qqqq 4
002、
[root@PC1 test01]# ls a.txt [root@PC1 test01]# cat a.txt ERR219543 Africa ERR219546 kkkk ERR229467 qqqq ERR229468 qqqq ERR229471 mmnn ERR219547 kkkk ERR2214955 qqqq ERR219544 Africa ERR219545 Africa ERR2214969 qqqq ## 输出编号 [root@PC1 test01]# cut -f 2 a.txt | sort | uniq | while read i; do grep "[[:space:]]$i" a.txt | awk '{OFS = "\t"; print $0, NR}' >> xxx; done [root@PC1 test01]# cut -f 1 a.txt | while read i; do grep "$i[[:space:]]" xxx >> yyy; done [root@PC1 test01]# cat yyy ## 原序输出 ERR219543 Africa 1 ERR219546 kkkk 1 ERR229467 qqqq 1 ERR229468 qqqq 2 ERR229471 mmnn 1 ERR219547 kkkk 2 ERR2214955 qqqq 3 ERR219544 Africa 2 ERR219545 Africa 3 ERR2214969 qqqq 4
。