python 中实现DNA一致性序列计算

 

001、

root@PC1:/home/test# ls
a.fasta  test.py
root@PC1:/home/test# cat a.fasta              ## 测试数据
>Rosalind_1
ATCCAGCT
>Rosalind_2
GGGCAACT
>Rosalind_3
ATGGATCT
>Rosalind_4
AAGCAACC
>Rosalind_5
TTGGAACT
>Rosalind_6
ATGCCATT
>Rosalind_7
ATGGCACT
root@PC1:/home/test# cat test.py                     ## 测试程序
#!/usr/bin/python

in_file = open("a.fasta", "r")
out_file = open("result.txt", "w")
dict1 = dict()
dict2 = dict()

for i in in_file:
    i = i.strip()
    if i[0] == ">":
        key = i
        dict1[key] = []
    else:
        dict1[key].append(i)

list1 = list()

for i in dict1.values():
    i = "".join(i)
    list1.append(i)

length = len(list1[0])
for i in range(length):
    dict2[i] = []

for i in list1:
    for j in range(length):
        dict2[j].append(i[j])

dict3 = dict()
for i in "ATCG":
    dict3[i] = []

for i in dict2.values():
    i = "".join(i)
    dict3["A"].append(str(i.count("A")))
    dict3["C"].append(str(i.count("C")))
    dict3["G"].append(str(i.count("G")))
    dict3["T"].append(str(i.count("T")))

for i,j in dict3.items():
    print(i, " ".join(j), file = out_file)

in_file.close()
out_file.close()
root@PC1:/home/test# python test.py                    ## 执行程序
root@PC1:/home/test# ls
a.fasta  result.txt  test.py
root@PC1:/home/test# cat result.txt                    ## 程序结果
A 5 1 0 0 5 5 0 0
T 1 5 0 0 0 1 1 6
C 0 0 1 4 2 0 6 1
G 1 1 6 3 0 1 0 0

 

002、输出判断的序列

(base) root@PC1:/home/test# ls
a.fasta  result.txt  test2.py  test.py
(base) root@PC1:/home/test# cat result.txt                     ## 测试文件
A 5 1 0 0 5 5 0 0
T 1 5 0 0 0 1 1 6
C 0 0 1 4 2 0 6 1
G 1 1 6 3 0 1 0 0
(base) root@PC1:/home/test# cat test2.py                       ## 测试程序
#!/usr/bin/python

in_file = open("result.txt", "r")
out_file = open("seq.txt", "w")
lines = in_file.readlines()
dict1 = dict()

length = len(lines[0].strip().split())

for i in range(length):
    dict1[i] = []

for i in lines:
    i = i.strip().split()
    for j in range(length):
        dict1[j].append(i[j])

list1 = []
head = 1
actg_order = []
for i in dict1.values():
    if head ==1:
        actg_order.extend(i)
        head += 1
    else:
        list1.append(i.index(max(i)))

print("".join([actg_order[k] for k in list1]), file = out_file)

in_file.close()
out_file.close()
(base) root@PC1:/home/test# python test2.py                   ## 执行程序
(base) root@PC1:/home/test# ls
a.fasta  result.txt  seq.txt  test2.py  test.py
(base) root@PC1:/home/test# cat seq.txt                       ## 程序结果
ATGCAACT
(base) root@PC1:/home/test# cat result.txt
A 5 1 0 0 5 5 0 0
T 1 5 0 0 0 1 1 6
C 0 0 1 4 2 0 6 1
G 1 1 6 3 0 1 0 0

 

参考:https://mp.weixin.qq.com/s?__biz=MzIxMjQxMDYxNA==&mid=2247484219&idx=1&sn=45fe0f337db0c2bb078bbbac0c0a3b77&chksm=9747caa2a03043b4e348e7d4a3946a2dcfdbc0dd33f533aa5a615d74da2571f52729f3ae86bd&scene=178&cur_album_id=1635727573621997580#rd

 

posted @ 2022-08-18 19:14  小鲨鱼2018  阅读(160)  评论(0编辑  收藏  举报