顺序的fqlist拆解成sample fqsize fq格式 ,涉及正则取样本名
# -*- coding: utf-8 -*-
# @Time : 2022/1/20 下午10:10
# @Author : lizhichao
# @Email : 13652030758@163.com
# @File : sample_fqsize_fq.py
# @Software: PyCharm
import os,sys,re
def fqlist_new_fqsize(rawfqlist,output):
p=re.compile('\S+\/(CK.*)\/\S+\/(\S+\.fq.gz)')
with open(rawfqlist,"r") as raw,open(output,"w") as pu:
content="Sample,FQ SIZE,FQ PATH\n"
sample="CK28904380"
fqcontent=""
fqsize=0
for line in raw:
linepath=line.strip()
if p.search(linepath):
if str(p.search(linepath).group(1)) !=sample and fqsize>0:
content += "%s,%s%s\n" % (sample, round(fqsize, 2), fqcontent)
fqsize = 0
fqcontent = ""
sample=str(p.search(linepath).group(1))
fqcontent += ",%s" %linepath
fqsize += (os.path.getsize(linepath)) / float(1024 * 1024 * 1024.0)
else:
print ("line not normal:%s"%line)
content += "%s,%s%s\n" % (sample, round(fqsize, 2), fqcontent)
pu.write(content)
if __name__=="__main__":
rawfqlist=sys.argv[1]
output=sys.argv[2]
fqlist_new_fqsize(rawfqlist,output)
本文来自博客园,作者:BioinformaticsMaster,转载请注明原文链接:https://www.cnblogs.com/koujiaodahan/p/15878471.html
posted on 2022-02-10 11:49 BioinformaticsMaster 阅读(27) 评论(0) 编辑 收藏 举报