把文件内容按照染色体分开写出
测试序列如下,text.txt:
chr
2
43995310
43995986
chr
17
49788603
49789067
chr
17
59565573
59566163
chr
19
8390308
8390745
chr
12
49188033
49189033
chr
7
974903
975570
chr
7
98878532
98879500
chr
7
44044672
44045322
chr
1
153634052
153634772
chr
11
60905850
60906575
直接看代码:
1 #encoding = utf-8 2 3 import sys 4 from collections import OrderedDict 5 6 7 8 def readfasta(filename): 9 10 tmp_dict = OrderedDict() 11 12 with open(filename) as f: 13 14 for line in f: 15 line = line.rstrip().split(' ',1) 16 17 chr_id = line[0] 18 19 if chr_id not in tmp_dict: 20 tmp_dict[chr_id] = line[1] 21 22 else: 23 tmp_dict[chr_id] += line[1] 24 25 return tmp_dict 26 27 def seperatefile(filename,outfile): 28 29 data = readfasta(filename) 30 31 for chr_id,features in data.items(): 32 import os 33 (name,ext) = os.path.splitext(outfile) 34 35 with open('%s_%s%s' %(name,chr_id,ext),'w') as f_out: 36 f_out.write('%s\n' %chr_id) 37 f_out.write('%s\n' %features) 38 39 seperatefile('test.txt','output.txt')
推荐论坛:生信技能树,http://biotrainee.com/forum.php/