python 实现统计fasta文件每一条序列的长度

 

001、

a、

[root@pc1 test1]# ls
a.fa  test.py
[root@pc1 test1]# cat a.fa            ## 测试fasta
>chr1
tttcccggg
>chr2
tttggg
ccc
>chr3
cccttt
>chr4
aaaaattt
[root@pc1 test1]# cat test.py         ## 统计每条序列的长度
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

in_file = open("a.fa", "r")

row = 0
for i in in_file:
        i = i.strip()
        if i[0] == ">" and row >= 1:
                print(id, length)
        if i[0] == ">":
                row += 1
                id = i[1:]
                length = 0
        else:
                length += len(i)
print(id, length)

in_file.close()

 

[root@pc1 test1]# ls
a.fa  test.py
[root@pc1 test1]# cat a.fa
>chr1
tttcccggg
>chr2
tttggg
ccc
>chr3
cccttt
>chr4
aaaaattt
[root@pc1 test1]# python3 test.py      ## 运算结果
chr1 9
chr2 9
chr3 6
chr4 8

 

b、

[root@pc1 test1]# ls
a.fa  test.py
[root@pc1 test1]# cat a.fa           ## 测试fasta
>chr1
tttcccggg
>chr2
tttggg
ccc
>chr3
cccttt
>chr4
aaaaattt
[root@pc1 test1]# cat test.py    ## 计算程序
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

in_file = open("a.fa", "r")
dict1 = dict()

for i in in_file:
        i = i.strip()
        if i[0] == ">":
                id = i
                dict1[id] = 0
        else:
                dict1[id] += len(i)
in_file.close()

for i in dict1:
        print(i[1:], dict1[i])
[root@pc1 test1]# ls
a.fa  test.py
[root@pc1 test1]# cat a.fa
>chr1
tttcccggg
>chr2
tttggg
ccc
>chr3
cccttt
>chr4
aaaaattt
[root@pc1 test1]# python3 test.py     ## 运算结果
chr1 9
chr2 9
chr3 6
chr4 8

 。

 

posted @ 2023-10-13 23:00  小鲨鱼2018  阅读(52)  评论(0编辑  收藏  举报