python 中实现根据序列长度过滤fasta文件

 

001、

 

[root@pc1 test1]# ls
a.fa  test.py
[root@pc1 test1]# cat a.fa
>chr1
tttcccggg
>chr2
tttgggjjj
cccjjjjjj
>chr3
ccc
>chr4
aaaaatt
[root@pc1 test1]# cat test.py
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

seq = ""
key = ""
dict1 = {}

with open("a.fa", "r") as in_put:
        for i in in_put:
                i = i.strip()
                if i[0] == ">":
                        if len(seq) > 8:
                                dict1[key] = seq
                        key = i[1:]
                        seq = ""
                else:
                        seq += i
        if len(seq) > 8:
                dict1[key] = seq
for i in dict1:
        print(i, dict1[i])

 

[root@pc1 test1]# ls
a.fa  test.py
[root@pc1 test1]# cat a.fa
>chr1
tttcccggg
>chr2
tttgggjjj
cccjjjjjj
>chr3
ccc
>chr4
aaaaatt
[root@pc1 test1]# python3 test.py      ## 运算结果
chr1 tttcccggg
chr2 tttgggjjjcccjjjjjj

 。

 

posted @ 2023-10-13 23:27  小鲨鱼2018  阅读(23)  评论(0编辑  收藏  举报