顺序的fqlist拆解成sample fqsize fq格式 ,涉及正则取样本名

# -*- coding: utf-8 -*-
# @Time : 2022/1/20 下午10:10
# @Author : lizhichao
# @Email : 13652030758@163.com
# @File : sample_fqsize_fq.py
# @Software: PyCharm

import os,sys,re

def fqlist_new_fqsize(rawfqlist,output):
p=re.compile('\S+\/(CK.*)\/\S+\/(\S+\.fq.gz)')

with open(rawfqlist,"r") as raw,open(output,"w") as pu:


content="Sample,FQ SIZE,FQ PATH\n"
sample="CK28904380"
fqcontent=""
fqsize=0
for line in raw:
linepath=line.strip()
if p.search(linepath):
if str(p.search(linepath).group(1)) !=sample and fqsize>0:
content += "%s,%s%s\n" % (sample, round(fqsize, 2), fqcontent)
fqsize = 0
fqcontent = ""

sample=str(p.search(linepath).group(1))

fqcontent += ",%s" %linepath

fqsize += (os.path.getsize(linepath)) / float(1024 * 1024 * 1024.0)

else:
print ("line not normal:%s"%line)

content += "%s,%s%s\n" % (sample, round(fqsize, 2), fqcontent)
pu.write(content)


if __name__=="__main__":
rawfqlist=sys.argv[1]
output=sys.argv[2]
fqlist_new_fqsize(rawfqlist,output)

posted on 2022-02-10 11:49  BioinformaticsMaster  阅读(27)  评论(0编辑  收藏  举报

导航