筛选特定ID的条目信息
使用perl
use strict; use warnings; open NR , "<nr_bestout.xls" or die ("cannot ope file $!"); open RES, ">nr_res.txt"; my @rows; my @list = ("TR10479|c2_g1","TR12583|c0_g1","TR15586|c0_g1","TR18003|c0_g1","TR19319|c0_g1","TR25053|c0_g1","TR25636|c0_g1","TR6050|c2_g3","TR6472|c0_g2","TR9989|c2_g1","TR10892|c0_g1","TR11415|c0_g1","TR11655|c0_g1","TR14439|c0_g1","TR14516|c1_g1","TR15693|c0_g1","TR17096|c0_g1","TR17184|c0_g5","TR17244|c0_g1","TR17475|c0_g1","TR20118|c0_g2","TR20179|c0_g1","TR22269|c0_g1","TR26674|c0_g1","TR4575|c0_g3","TR4743|c0_g5","TR5307|c1_g2","TR5430|c1_g2","TR7186|c5_g3","TR7292|c0_g1","TR7356|c0_g1","TR7991|c1_g2","TR8051|c2_g4","TR8198|c2_g1","TR8371|c1_g3","TR8569|c0_g1","TR9018|c1_g4","TR9310|c2_g2"); foreach (<NR>){ push @rows, [split(/ /),$_]; } foreach my $i (@list){ my $tag = 0; foreach my $j (@rows){ if ($i eq @$j[0]){ print RES $i."\t".@$j[1]."\t".@$j[10]."\n"; $tag = 1; last; } } if ($tag == 0){ print RES $i."\tUnknown\t-\n"; } } close(NR); close(RES);
输入文件格式
query_id subject_id identity alignment_length mismatches gaps query_start query_end subject_start subject_end e_value bit_score TR7308|c0_g1 gi|661899672|emb|CDO97666.1| unnamed protein product [Coffea canephora] 97.01 67 2 0 813 1013 67 133 4.2e-35 146.0 TR19212|c0_g1 gi|747102893|ref|XP_011099626.1| PREDICTED: zinc-finger homeodomain protein 6 [Sesamum indicum] 76.92 143 33 0 680 1108 211 353 5.9e-60 228.0 TR21369|c2_g1 gi|604321542|gb|EYU32118.1| hypothetical protein MIMGU_mgv1a019324mg, partial [Erythranthe guttata] 83.61 122 20 0 1 366 288 409 1.1e-58 224.0 TR17922|c0_g1 gi|604302721|gb|EYU22278.1| hypothetical protein MIMGU_mgv1a025105mg, partial [Erythranthe guttata] 80.81 370 71 0 3 1112 243 612 2.2e-180 628.0