顺序的fqlist拆解成sample fqsize fq格式 ,涉及正则取样本名

# -*- coding: utf-8 -*-
# @Time : 2022/1/20 下午10:10
# @Author : lizhichao
# @Email : 13652030758@163.com
# @File : sample_fqsize_fq.py
# @Software: PyCharm

import os,sys,re

def fqlist_new_fqsize(rawfqlist,output):
p=re.compile('\S+\/(CK.*)\/\S+\/(\S+\.fq.gz)')

with open(rawfqlist,"r") as raw,open(output,"w") as pu:


content="Sample,FQ SIZE,FQ PATH\n"
sample="CK28904380"
fqcontent=""
fqsize=0
for line in raw:
linepath=line.strip()
if p.search(linepath):
if str(p.search(linepath).group(1)) !=sample and fqsize>0:
content += "%s,%s%s\n" % (sample, round(fqsize, 2), fqcontent)
fqsize = 0
fqcontent = ""

sample=str(p.search(linepath).group(1))

fqcontent += ",%s" %linepath

fqsize += (os.path.getsize(linepath)) / float(1024 * 1024 * 1024.0)

else:
print ("line not normal:%s"%line)

content += "%s,%s%s\n" % (sample, round(fqsize, 2), fqcontent)
pu.write(content)


if __name__=="__main__":
rawfqlist=sys.argv[1]
output=sys.argv[2]
fqlist_new_fqsize(rawfqlist,output)

posted on   BioinformaticsMaster  阅读(29)  评论(0编辑  收藏  举报

相关博文:
阅读排行:
· 震惊!C++程序真的从main开始吗?99%的程序员都答错了
· 【硬核科普】Trae如何「偷看」你的代码?零基础破解AI编程运行原理
· 单元测试从入门到精通
· 上周热点回顾(3.3-3.9)
· winform 绘制太阳,地球,月球 运作规律
历史上的今天:
2018-02-10 coursera 有比较丰富的生物信息等课程 win7 访问设置

导航

< 2025年3月 >
23 24 25 26 27 28 1
2 3 4 5 6 7 8
9 10 11 12 13 14 15
16 17 18 19 20 21 22
23 24 25 26 27 28 29
30 31 1 2 3 4 5
点击右上角即可分享
微信分享提示