python 中统计不同scafflod的GC含量并输出GC含量最高的scafflod
001、方法1
root@PC1:/home/test# ls a.fasta test.py root@PC1:/home/test# cat a.fasta ## 测试fasta文件 >Rosalind_6404 CCTGCGGAAGATCGGCACTAGAATAGCCAGAACCGTTTCTCTGAGGCTTCCGGCCTTCCC TCCCACTAATAATTCTGAGG >Rosalind_5959 CCATCGGTAGCGCATCCTTAGTCCAATTAAGTCCCTATCCAGGCGCTCCGCCGAAGGTCT ATATCCATTTGTCAGCAGACACGC >Rosalind_0808 CCACCCTCGTGGTATGGCTAGGCATTCAGGAACCGGAGAACGCTTCAGACCAGCCCGGAC TGGGAACCTGCGGGCAGTAGGTGGAAT root@PC1:/home/test# cat test.py ## 测试程序 #!/usr/bin/python in_file = open("a.fasta", "r") out_file = open("result.txt", "w") dict1 = {} dict2 = {} for i in in_file: i = i.strip() if i.startswith(">"): key = i.split(">")[1] dict1[key] = [] else: dict1[key].append(i) for i,j in dict1.items(): j = "".join(j).upper() dict2[i] = (j.count("C") + j.count("G"))/len(j) result = max(dict2.items(), key = lambda x: x[1]) print(result[0] + "\n" + str(result[1]), file = out_file, end = "\n") in_file.close() out_file.close() root@PC1:/home/test# python test.py ## 运行程序 root@PC1:/home/test# ls a.fasta result.txt test.py root@PC1:/home/test# cat result.txt ## 运行结果 Rosalind_0808 0.6091954022988506
002、方法2
root@PC1:/home/test# ls a.fasta test.py root@PC1:/home/test# cat a.fasta ## 测试fasta文件 >Rosalind_6404 CCTGCGGAAGATCGGCACTAGAATAGCCAGAACCGTTTCTCTGAGGCTTCCGGCCTTCCC TCCCACTAATAATTCTGAGG >Rosalind_5959 CCATCGGTAGCGCATCCTTAGTCCAATTAAGTCCCTATCCAGGCGCTCCGCCGAAGGTCT ATATCCATTTGTCAGCAGACACGC >Rosalind_0808 CCACCCTCGTGGTATGGCTAGGCATTCAGGAACCGGAGAACGCTTCAGACCAGCCCGGAC TGGGAACCTGCGGGCAGTAGGTGGAAT root@PC1:/home/test# cat test.py ## 测试程序 #!/usr/bin/python import re ## 利用re包中 findall函数查找指定的碱基数目 in_file = open("a.fasta", "r") out_file = open("result.txt", "w") dict1 = {} dict2 = {} for i in in_file: i = i.strip() if i[0] == ">": key = i.split(">")[1] dict1[key] = [] else: dict1[key].append(i) for i,j in dict1.items(): seq = "".join(j).upper() gc_count = len(re.findall("[GC]", seq)) dict2[i] = "%.4f" % (gc_count/len(seq)) result = max(dict2.items(), key = lambda x: x[1]) out_file.write(result[0] + "\n" + result[1] + "\n") in_file.close() out_file.close() root@PC1:/home/test# python test.py ## 执行程序 root@PC1:/home/test# ls a.fasta result.txt test.py root@PC1:/home/test# cat result.txt ## 运行结果 Rosalind_0808 0.6092
003、方法3
root@PC1:/home/test# ls a.fasta test.py root@PC1:/home/test# cat a.fasta ## 测试fasta文件 >Rosalind_6404 CCTGCGGAAGATCGGCACTAGAATAGCCAGAACCGTTTCTCTGAGGCTTCCGGCCTTCCC TCCCACTAATAATTCTGAGG >Rosalind_5959 CCATCGGTAGCGCATCCTTAGTCCAATTAAGTCCCTATCCAGGCGCTCCGCCGAAGGTCT ATATCCATTTGTCAGCAGACACGC >Rosalind_0808 CCACCCTCGTGGTATGGCTAGGCATTCAGGAACCGGAGAACGCTTCAGACCAGCCCGGAC TGGGAACCTGCGGGCAGTAGGTGGAAT root@PC1:/home/test# cat test.py ## 测试程序 #!/usr/bin/python in_file = open("a.fasta", "r") out_file = open("result.txt", "w") dict1 = dict() dict2 = dict() for i in in_file: i = i.strip() if i.startswith(">"): key = i.split(">")[1] dict1[key] = [] else: dict1[key].append(i) for i,j in dict1.items(): seq = "".join(j).upper() gc_count = 0 for k in seq: if k == "G" or k == "C": gc_count += 1 gc_ratio = "%.4f" % (gc_count/len(seq)) dict2[i] = gc_ratio result = max(dict2.items(), key = lambda x: x[1]) out_file.write(result[0] + "\n" + result[1] + "\n") in_file.close() out_file.close() root@PC1:/home/test# python test.py ## 执行程序 root@PC1:/home/test# ls a.fasta result.txt test.py root@PC1:/home/test# cat result.txt ## 运行结果 Rosalind_0808 0.6092
参考:https://mp.weixin.qq.com/s?__biz=MzIxMjQxMDYxNA==&mid=2247484172&idx=1&sn=d8dec9ae5ffea81ef02e8f0d7ea4672b&chksm=9747ca95a030438313f483f6c62c9c32551e23682f98be6868edf423ea88180165e21c5dedc8&scene=178&cur_album_id=1635727573621997580#rd