儿童天赋脚本
#!/usr/bin/python #coding:utf-8 __author__ = 'similarface' import time import sys,datetime,re,subprocess #发送邮件 from itertools import islice import commands,optparse,os,xlrd import sys import smtplib from email.mime.text import MIMEText mailto_list=["xx@qq.com",'xx@23mofang.com','xx@23mofang.com'] #设置服务器 mail_host="smtp.qq.com" #用户名 mail_user="xx@23mofang.com" #口令 mail_pass="xx" #发件箱的后缀 mail_postfix="qq.com" def send_mail(to_list,sub,content): #这里的hello可以任意设置,收到信后,将按照设置显示 me="数据部运行消息"+"<"+mail_user+"@"+mail_postfix+">" #创建一个实例,这里设置为html格式邮件 msg = MIMEText(content,_subtype='html',_charset='UTF-8') #设置主题 msg['Subject'] = sub msg['From'] = me msg['To'] = ";".join(to_list) try: s = smtplib.SMTP() #连接smtp服务器 s.connect(mail_host) #登陆服务器 s.login(mail_user,mail_pass) #发送邮件 s.sendmail(me, to_list, msg.as_string()) s.close() return True except Exception, e: print str(e) return False #basedir basedir="/data3/childtalent" #rar原始文件存放目录 rarpath=os.path.join(basedir,"current") #old原始文件存放目录 oldpath=os.path.join(basedir,"oldrar") #rar解压缩目录 extractpath=os.path.join(basedir,"extract") #vcf存放的目录 __resultebase='/data2/vsftp/ertongtianfu/' #引用对比数据库 __ref=os.path.join(basedir,"db","refAllele.txt") #命令行 __run=os.path.join(basedir,"run","LDR2Vcf.pl") #版本文件存放处 versionfile=os.path.join(basedir,"version","version.pid") #XLS 转的txt的存放目录: txtoutputdir=os.path.join(basedir,"result") def unrarPackage(inputPath,outputDir): if os.path.exists(inputPath) and inputPath.endswith("rar"): (status, output) = commands.getstatusoutput('/usr/local/bin/rar x '+inputPath+' '+outputDir) if status!=0: print(output) return False else: return True def getXlsxFromunrarDic(unrardir): ''' 获取xlsx文件 :param unrardir: :return: ''' for sondir in os.listdir(unrardir): if os.path.isdir(os.path.join(unrardir,sondir)): for file in os.listdir(os.path.join(unrardir,sondir)): if file.endswith("xlsx"): return os.path.join(unrardir,sondir,file) def readExcelConvertoTxt(excel,txtPath): try: f=open(txtPath,'w') data=xlrd.open_workbook(excel) table=data.sheet_by_name(u'ET样品') #行 nrows = table.nrows #列 ncols = table.ncols for i in range(nrows): if i==0: headerline=['Sample'] for i in table.row_values(i): if i.startswith("rs"): headerline.append(i) f.writelines('\t'.join(headerline)+'\n') elif i==1: pass else: f.writelines('\t'.join([ table.row_values(i)[j] for j in range(ncols) if j not in[0,2,3]])+'\n') print([ table.row_values(i)[j] for j in range(ncols) if j not in[0,2,3]]) f.close() except Exception,e: print(e) def getDateFlag(): ''' 获取日期时间字符串 :return:20000101 ''' return str(time.strftime("%Y%m%d")) def getDateDetailFlag(): ''' 获取日期时间字符串 :return:20000101 ''' return str(time.strftime("%Y%m%d-%H:%M:%S")) def getVersionFromFile(vfile): ''' 获取版本信息 :param vfile: :return: ''' try: with open(vfile,'r') as v: for line in islice(v,0,1): result=str(line.split('\t')[0]) if result==None or result=="": return 0 else: return int(result) except Exception,e: return 0 def getOutPutTxtFile(): return os.path.join(txtoutputdir,getDateFlag()+"GeneResult."+str(getVersionFromFile(versionfile))+'.txt') def writeVersionFrom(file): ''' 书写版本到文件 :param file: :return: ''' version=getVersionFromFile(file) contextlist=[] try: with open(file,'r') as v: context=v.read() contextlist=context.split('\n') contextlist.insert(0,str(version+1)+"\t"+str(time.strftime("%Y-%m-%d %H:%M:%S"))) except Exception,e: contextlist.insert(0,str(version+1)+"\t"+str(time.strftime("%Y-%m-%d %H:%M:%S"))) with open(file,'w') as v: v.write('\n'.join(contextlist)) def txtConvertToVcf(txtPath): ''' 将txt文本转换成vcf ''' resultpath=getResultDirWithTxt() if os.path.exists(txtPath) and os.path.isfile(txtPath): print('源TXT:'+txtPath) print('源TXT行数:'+getTxtLineSum(txtPath)) runStr='perl '+__run+' '+txtPath+' '+__ref +' '+resultpath print(runStr) child = subprocess.Popen(runStr,shell=True) isSuccess=child.wait() if isSuccess: print('txt转换成vcf失败!') return 0 else: print('TXT转换成VCF成功!') print('VCF文件生成路径:'+resultpath) print('VCF生成数:'+getVcfSum(resultpath)) return getVcfSum(resultpath) else: print('源TXT文件不存在') print('VCF生成数:0') return 0 def getTxtLineSum(txtPath): ''' 获取运行文件的行数 txtPath=文件的行数 返回:文件的行数 不存在返回0行 ''' count=1 if os.path.exists(txtPath) and os.path.isfile(txtPath): count = len(open(txtPath,'rU').readlines()) return str(count-1) def getResultDirWithTxt(): #基础文件路径 basepath=__resultebase #运行结果的存放文件夹 try: #获取文件中的日期作为输出目录的标示 flag=re.match('(^\d{8})GeneResult*',getOutPutTxtFile()).group(1) print("flag",flag) path=os.path.join(basepath,flag) except Exception,e: path=getResultDirWithCurrData() #path=os.path.join(basepath,self.__runid) if os.path.exists(path):# and (os.path.isdir(path) or os.path.isfile(path): print('输出的文件夹已经存在, 存在的文件夹中有文件数:'+getVcfSum(path)) print("请选择是否继续y/n?") goon=raw_input() while 1: if goon=='y' or goon=='Y': return path if goon=='N' or goon=='n': sys.exit(-1) print "请输入y/n ?", goon=raw_input() else: os.mkdir(path) return path def getResultDirWithCurrData(): #基础文件路径 basepath=__resultebase #运行结果的存放文件夹 path=os.path.join(basepath,getDateFlag()) return path def getVcfSum(path): ''' 获取vcf生产的个数 ''' "*.vcf" j=0 for i in os.listdir(path): if i.endswith("vcf"): j=j+1 return str(j) def getFileSum(dir): ''' 获取vcf生产的个数 ''' "*.vcf" j=0 for i in os.listdir(dir): j=j+1 return str(j) if __name__=='__main__': usage = "ertongtianfu[ -f <rar>]" opter=optparse.OptionParser(usage) opter.add_option("-f","--file",action="store",dest="rar",help="append rar file") opt,args=opter.parse_args() rarfile=opt.rar if rarfile==None or rarfile=="": for file in os.listdir(rarpath): if file.endswith("rar"): rarfile=os.path.join(rarpath,file) break if rarfile==None or rarfile=="": opter.print_help() sys.exit(0) #解压缩文件 print("清理解压文件") (status, output) = commands.getstatusoutput('rm -rf '+extractpath+"/*") if status!=0: print(output) print('解压缩文件') unrarPackage(rarfile,extractpath) #EXCEL文件 print("读取EXCEL文件") excelfile=getXlsxFromunrarDic(extractpath) #写入批次文件 print("写入批次文件") writeVersionFrom(versionfile) txtPath=getOutPutTxtFile() #EXCEL到txt文件 print("EXCEL到txt文件") readExcelConvertoTxt(excelfile,txtPath) #txt到vcf print("txt到vcf") vcfnum=txtConvertToVcf(txtPath) #清理解压文件 print("清理解压文件") (status, output) = commands.getstatusoutput('rm -rf '+extractpath+"/*") if status!=0: print(output) (status, output) = commands.getstatusoutput('mv '+rarpath+"/* " +oldpath+"/") if status!=0: print(output) print(getVersionFromFile(versionfile),vcfnum,getDateFlag(),rarfile,getTxtLineSum(txtPath)) mailcontext="批次:"+str(getVersionFromFile(versionfile))+"</br>样本个数:"+str(vcfnum)+"<br/>标志位:"+str(getDateDetailFlag())+"<br/>原始文件:"+os.path.basename(rarfile)+"<br/>获取路" \ "径:ftp://192.168.30.252/ertongtianfu/"+getDateFlag() send_mail(mailto_list,"儿童天赋分析结果",mailcontext)