python 中实现将fasta文件中碱基以每行指定数目输出

 

001、测试数据

[root@PC1 test2]# ls
a.fa  test.py
[root@PC1 test2]# cat a.fa      ## 测试数据
>OR4F5_ENSG00000186092_ENST00000641515_61_1038_2618
CCCAGATCTCTTCAGTTTTTATGCCTCATTCTGTGAAAATTGCTGTAGTCTCTTCCAGTTATGAAGAAGGTAACTGCAGAGGCTATTTCCTGGAATGAATCAACGAGTGAAACGAATAACTCTATGGTGACTGAATTCATTTTTCTGGGTCTCTCTGATTCTCAGGAACTCCAGACCTTCCTATTTATGTTGTTTTTT
>OR4F29_ENSG00000284733_ENST00000426406_20_955_995
AGCCCAGTTGGCTGGACCAATGGATGGAGAGAATCACTCAGTGGTATCTGAGTTTTTGTTTCTGGGACTCACTCATTCATGGGAGATCCAGCTCCTCCTCCTAGTGTTTTCCTCTGTGCTCTATGTGGCAAGCATTACTGGAAACATCCTCATTGTGTTTTCTGTGACCACTGACCCTCACTTAC
[root@PC1 test2]# cat test.py    ## 转换脚本
#!/usr/bin/python

dict1 = {}
in_file = open("a.fa", "r")
out_file = open("result.fa", "w")

for i in in_file:
    i = i.strip()
    if i.startswith(">"):
        key = i
        dict1[key] = ""
    else:
        dict1[key] += i

len_perline = 20                 ## 指定每行的碱基数目

for key, val in dict1.items():
    out_file.write(key + "\n")
    while len(val) > len_perline:
        out_file.write(val[:len_perline] + "\n")
        val = val[len_perline:]
    out_file.write(val + "\n")

in_file.close()
out_file.close()

 

[root@PC1 test2]# python test.py        ## 执行程序
[root@PC1 test2]# ls
a.fa  result.fa  test.py
[root@PC1 test2]# cat result.fa        ## 查看结果
>OR4F5_ENSG00000186092_ENST00000641515_61_1038_2618
CCCAGATCTCTTCAGTTTTT
ATGCCTCATTCTGTGAAAAT
TGCTGTAGTCTCTTCCAGTT
ATGAAGAAGGTAACTGCAGA
GGCTATTTCCTGGAATGAAT
CAACGAGTGAAACGAATAAC
TCTATGGTGACTGAATTCAT
TTTTCTGGGTCTCTCTGATT
CTCAGGAACTCCAGACCTTC
CTATTTATGTTGTTTTTT
>OR4F29_ENSG00000284733_ENST00000426406_20_955_995
AGCCCAGTTGGCTGGACCAA
TGGATGGAGAGAATCACTCA
GTGGTATCTGAGTTTTTGTT
TCTGGGACTCACTCATTCAT
GGGAGATCCAGCTCCTCCTC
CTAGTGTTTTCCTCTGTGCT
CTATGTGGCAAGCATTACTG
GAAACATCCTCATTGTGTTT
TCTGTGACCACTGACCCTCA
CTTAC

 

posted @ 2022-06-07 22:18  小鲨鱼2018  阅读(215)  评论(0编辑  收藏  举报