python 中将单条scaffold的碱基序列按照指定数目输出

 

 

001\

[root@pc1 test3]# cat a.fa
>chr1
cccccggggggggttttgg
cccgggggg
>chr2
uuuuutttttNNNNNddffff
dddgggggggggggcccccccccc
[root@pc1 test3]# cat test.py
#!/usr/bin/python

in_file = open("a.fa", "r")
out_file = open("result.fa", "w")

tag = in_file.readlines()[0].strip()
print(tag, file = out_file)

step = 8
tail = ""

in_file = open("a.fa", "r")
for i in in_file:
        if not i.startswith(">"):
                i = i.strip()
                i = tail + i
                while len(i) >= step:
                        print(i[:step], file = out_file)
                        i = i[step::]
                tail = i

if len(tail) != 0:
        print(tail, file = out_file)

in_file.close()
out_file.close()

 

2\

[root@pc1 test3]# ls
a.fa  test.py
[root@pc1 test3]# python3 test.py
[root@pc1 test3]# ls
a.fa  result.fa  test.py
[root@pc1 test3]# cat result.fa
>chr1
cccccggg
gggggttt
tggcccgg
gggguuuu
utttttNN
NNNddfff
fdddgggg
gggggggc
cccccccc
c

 

posted @ 2022-11-04 10:05  小鲨鱼2018  阅读(61)  评论(0编辑  收藏  举报