python 中统计不同scafflod的GC含量并输出GC含量最高的scafflod
001、方法1
root@PC1:/home/test# ls a.fasta test.py root@PC1:/home/test# cat a.fasta ## 测试fasta文件 >Rosalind_6404 CCTGCGGAAGATCGGCACTAGAATAGCCAGAACCGTTTCTCTGAGGCTTCCGGCCTTCCC TCCCACTAATAATTCTGAGG >Rosalind_5959 CCATCGGTAGCGCATCCTTAGTCCAATTAAGTCCCTATCCAGGCGCTCCGCCGAAGGTCT ATATCCATTTGTCAGCAGACACGC >Rosalind_0808 CCACCCTCGTGGTATGGCTAGGCATTCAGGAACCGGAGAACGCTTCAGACCAGCCCGGAC TGGGAACCTGCGGGCAGTAGGTGGAAT root@PC1:/home/test# cat test.py ## 测试程序 #!/usr/bin/python in_file = open("a.fasta", "r") out_file = open("result.txt", "w") dict1 = {} dict2 = {} for i in in_file: i = i.strip() if i.startswith(">"): key = i.split(">")[1] dict1[key] = [] else: dict1[key].append(i) for i,j in dict1.items(): j = "".join(j).upper() dict2[i] = (j.count("C") + j.count("G"))/len(j) result = max(dict2.items(), key = lambda x: x[1]) print(result[0] + "\n" + str(result[1]), file = out_file, end = "\n") in_file.close() out_file.close() root@PC1:/home/test# python test.py ## 运行程序 root@PC1:/home/test# ls a.fasta result.txt test.py root@PC1:/home/test# cat result.txt ## 运行结果 Rosalind_0808 0.6091954022988506
002、方法2
root@PC1:/home/test# ls a.fasta test.py root@PC1:/home/test# cat a.fasta ## 测试fasta文件 >Rosalind_6404 CCTGCGGAAGATCGGCACTAGAATAGCCAGAACCGTTTCTCTGAGGCTTCCGGCCTTCCC TCCCACTAATAATTCTGAGG >Rosalind_5959 CCATCGGTAGCGCATCCTTAGTCCAATTAAGTCCCTATCCAGGCGCTCCGCCGAAGGTCT ATATCCATTTGTCAGCAGACACGC >Rosalind_0808 CCACCCTCGTGGTATGGCTAGGCATTCAGGAACCGGAGAACGCTTCAGACCAGCCCGGAC TGGGAACCTGCGGGCAGTAGGTGGAAT root@PC1:/home/test# cat test.py ## 测试程序 #!/usr/bin/python import re ## 利用re包中 findall函数查找指定的碱基数目 in_file = open("a.fasta", "r") out_file = open("result.txt", "w") dict1 = {} dict2 = {} for i in in_file: i = i.strip() if i[0] == ">": key = i.split(">")[1] dict1[key] = [] else: dict1[key].append(i) for i,j in dict1.items(): seq = "".join(j).upper() gc_count = len(re.findall("[GC]", seq)) dict2[i] = "%.4f" % (gc_count/len(seq)) result = max(dict2.items(), key = lambda x: x[1]) out_file.write(result[0] + "\n" + result[1] + "\n") in_file.close() out_file.close() root@PC1:/home/test# python test.py ## 执行程序 root@PC1:/home/test# ls a.fasta result.txt test.py root@PC1:/home/test# cat result.txt ## 运行结果 Rosalind_0808 0.6092
003、方法3
root@PC1:/home/test# ls a.fasta test.py root@PC1:/home/test# cat a.fasta ## 测试fasta文件 >Rosalind_6404 CCTGCGGAAGATCGGCACTAGAATAGCCAGAACCGTTTCTCTGAGGCTTCCGGCCTTCCC TCCCACTAATAATTCTGAGG >Rosalind_5959 CCATCGGTAGCGCATCCTTAGTCCAATTAAGTCCCTATCCAGGCGCTCCGCCGAAGGTCT ATATCCATTTGTCAGCAGACACGC >Rosalind_0808 CCACCCTCGTGGTATGGCTAGGCATTCAGGAACCGGAGAACGCTTCAGACCAGCCCGGAC TGGGAACCTGCGGGCAGTAGGTGGAAT root@PC1:/home/test# cat test.py ## 测试程序 #!/usr/bin/python in_file = open("a.fasta", "r") out_file = open("result.txt", "w") dict1 = dict() dict2 = dict() for i in in_file: i = i.strip() if i.startswith(">"): key = i.split(">")[1] dict1[key] = [] else: dict1[key].append(i) for i,j in dict1.items(): seq = "".join(j).upper() gc_count = 0 for k in seq: if k == "G" or k == "C": gc_count += 1 gc_ratio = "%.4f" % (gc_count/len(seq)) dict2[i] = gc_ratio result = max(dict2.items(), key = lambda x: x[1]) out_file.write(result[0] + "\n" + result[1] + "\n") in_file.close() out_file.close() root@PC1:/home/test# python test.py ## 执行程序 root@PC1:/home/test# ls a.fasta result.txt test.py root@PC1:/home/test# cat result.txt ## 运行结果 Rosalind_0808 0.6092
参考:https://mp.weixin.qq.com/s?__biz=MzIxMjQxMDYxNA==&mid=2247484172&idx=1&sn=d8dec9ae5ffea81ef02e8f0d7ea4672b&chksm=9747ca95a030438313f483f6c62c9c32551e23682f98be6868edf423ea88180165e21c5dedc8&scene=178&cur_album_id=1635727573621997580#rd
分类:
python
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· 震惊!C++程序真的从main开始吗?99%的程序员都答错了
· 【硬核科普】Trae如何「偷看」你的代码?零基础破解AI编程运行原理
· 单元测试从入门到精通
· 上周热点回顾(3.3-3.9)
· winform 绘制太阳,地球,月球 运作规律