使用python提取fastq文件中的序列并转为fasta格式

 

001、

root@PC1:/home/test# ls
test.fastq  test.py
root@PC1:/home/test# cat test.fastq        ## 测试fastq文件
@A00530:26:H35FTDSXX:4:1101:6614:1047 1:N:0:AACGTGAT
GNCCTGATGCTTTCCGTAAAACGGAACAATCTGCTCCTCCTCTTTTTCATCCTTTTTCGATGGCTTAGCCGCAGTCTGAACAAGCGGAGCGAGACCGCCGAGACCGCTGGCACCGATCGCAACGGCTGCCCCCGCCATCGCTCCCCATTT
+
F#FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:F:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF,FFFFF,FFFFFFFFF:FFFFFFFFF,:F,
@A00530:26:H35FTDSXX:4:1101:9200:1047 1:N:0:AACGTGAT
GNTTTAGAAACAAACTCGAGAGCACCATCGAATTTCCGATGACCATCCCGCTGATCGGAATGACATATCGTGCCGTAAGCGGAATGATATGAAGAGACAGCAGAATTCCCTGCGTCACAATTTCTACGATTGCTAACGCGGCAAACACTC
+
F#FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF,F:FFF
root@PC1:/home/test# cat test.py      ## 脚本
#!/usr/bin/python

in_file = open("test.fastq", "r")
out_file = open("result.txt", "w")

for i in in_file:
    if i[0] == "@":
        i = i.strip("[@\n]")
        print(">" + i, file = out_file)
        print(in_file.readline().strip(), file = out_file)

in_file.close()
out_file.close()
root@PC1:/home/test# python test.py
root@PC1:/home/test# ls
result.txt  test.fastq  test.py
root@PC1:/home/test# cat result.txt     ## 结果
>A00530:26:H35FTDSXX:4:1101:6614:1047 1:N:0:AACGTGAT
GNCCTGATGCTTTCCGTAAAACGGAACAATCTGCTCCTCCTCTTTTTCATCCTTTTTCGATGGCTTAGCCGCAGTCTGAACAAGCGGAGCGAGACCGCCGAGACCGCTGGCACCGATCGCAACGGCTGCCCCCGCCATCGCTCCCCATTT
>A00530:26:H35FTDSXX:4:1101:9200:1047 1:N:0:AACGTGAT
GNTTTAGAAACAAACTCGAGAGCACCATCGAATTTCCGATGACCATCCCGCTGATCGGAATGACATATCGTGCCGTAAGCGGAATGATATGAAGAGACAGCAGAATTCCCTGCGTCACAATTTCTACGATTGCTAACGCGGCAAACACTC

 

参考:https://mp.weixin.qq.com/s?__biz=MzIxNzc1Mzk3NQ==&mid=2247491469&idx=1&sn=1a931b75a7737ccbf948d0aaaaf9e67b&chksm=97f5af95a0822683feb89faf7e45cf27697660a96ba4532e675e4a67452c49f898b5748e1673&scene=178&cur_album_id=2403674812188688386#rd

 

posted @ 2022-08-06 22:58  小鲨鱼2018  阅读(583)  评论(0编辑  收藏  举报