remove duplicated gene pair using awk

cat input.txt


TRINITY_DN106621_c0_g1_i1       TRINITY_DN129833_c0_g1_i2
TRINITY_DN106621_c0_g1_i1       TRINITY_DN140628_c4_g2_i2
TRINITY_DN106621_c0_g1_i1       TRINITY_DN135041_c0_g1_i1
TRINITY_DN135041_c0_g1_i1       TRINITY_DN106621_c0_g1_i1
TRINITY_DN140628_c4_g2_i2       TRINITY_DN106621_c0_g1_i1
TRINITY_DN129833_c0_g1_i2       TRINITY_DN106621_c0_g1_i1
awk '{printf("%s\t%s\n",($1<$2?$1:$2),($1<$2?$2:$1));}' input.txt | sort | uniq > output.txt
cat output.txt

TRINITY_DN106621_c0_g1_i1       TRINITY_DN129833_c0_g1_i2
TRINITY_DN106621_c0_g1_i1       TRINITY_DN140628_c4_g2_i2
TRINITY_DN106621_c0_g1_i1       TRINITY_DN135041_c0_g1_i1
posted @ 2016-02-24 17:01  liuhui_pine  阅读(137)  评论(0编辑  收藏  举报