python 中实现根据序列长度过滤fasta文件
001、
[root@pc1 test1]# ls a.fa test.py [root@pc1 test1]# cat a.fa >chr1 tttcccggg >chr2 tttgggjjj cccjjjjjj >chr3 ccc >chr4 aaaaatt [root@pc1 test1]# cat test.py #!/usr/bin/env python3 # -*- coding: utf-8 -*- seq = "" key = "" dict1 = {} with open("a.fa", "r") as in_put: for i in in_put: i = i.strip() if i[0] == ">": if len(seq) > 8: dict1[key] = seq key = i[1:] seq = "" else: seq += i if len(seq) > 8: dict1[key] = seq for i in dict1: print(i, dict1[i])
[root@pc1 test1]# ls a.fa test.py [root@pc1 test1]# cat a.fa >chr1 tttcccggg >chr2 tttgggjjj cccjjjjjj >chr3 ccc >chr4 aaaaatt [root@pc1 test1]# python3 test.py ## 运算结果 chr1 tttcccggg chr2 tttgggjjjcccjjjjjj
。