python中实现DNA序列一致性计算

 

001、

 

[root@pc1 test01]# ls
a.fa  test.py
[root@pc1 test01]# cat a.fa        ## 测试fasta文件
>Rosalind_1
ATCCAGCT
>Rosalind_2
GGGCAACT
>Rosalind_3
ATGGATCT
>Rosalind_4
AAGCAACC
>Rosalind_5
TTGGAACT
>Rosalind_6
ATGCCATT
>Rosalind_7
ATGGCACT
[root@pc1 test01]# cat test.py       ## 统计程序
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

import pysam
base = "ACGT"
profile = []

in_file = pysam.FastxFile("a.fa")

for i in in_file:
        if not profile:
                profile = [[0] * len(i.sequence) for j in base]
        for m,n in enumerate(i.sequence):
                profile[base.index(n)][m] += 1

list1 = [[] for i in profile[0]]

for i in profile:
        for j in range(len(i)):
                list1[j].append(i[j])
for i in list1:
        print(base[i.index(max(i))], end = "")

print("")

for i,j in enumerate(base):
        tmp = [str(i) for i in profile[i]]
        print(j + ":" + "".join(tmp))

 

[root@pc1 test01]# ls
a.fa  test.py
[root@pc1 test01]# python3 test.py      ## 计算结果
ATGCAACT
A:51005500
C:00142061
G:11630100
T:15000116

 。

 

posted @ 2023-09-12 11:46  小鲨鱼2018  阅读(28)  评论(0编辑  收藏  举报