oma同源基因查找
oma查找同源基因【最终需要从HOGFasta文件夹中提取文件】
#同源基因的另一种查找方法oma (https://omabrowser.org/standalone)##################################################### wget -O oma.tgz https://omabrowser.org/standalone/OMA.2.4.2.tgz # on MacOS you can also use curl: # curl https://omabrowser.org/standalone/OMA.2.4.2.tgz -o oma.tgz tar xvzf oma.tgz cd OMA.2.4.2 ./install.sh prefix=files #下载安装 # copy FASTA files into DB directory cp /home/you/fasta/yourFirstGenomeFile.fa DB/ cp /home/you/fasta/yourSecondGenomeFile.fa DB/ cp /home/you/fasta/yourContigFile.contig.fa DB/ #将以.fa结尾的所有蛋白文件放在DB文件夹中 # generate default parameter file oma -p #生成.drw结尾的配置文件 # adjust parameters vim parameters.drw # run oma oma -n 10 #运行命令
#【ly-2021/10/22】 注意文件中有个参数UseOnlyOneSplicingVariant,我将其默认的ture改为了false,因为是true时,发现OrthologousGroupsFasta文件夹下的序列文件,每个文件对于每个物种都至多包含一条序列,这样我就无法获得对于4倍体,可以包含两条序列的情况。改了之后具体有什么变化,出来后再看【结果:跟上面结果一样,每个文件对于每个物种照样至多包含一条序列】
# 从HOGFasta文件夹下提取特定的序列文件
特别注意:
1,放在DB路径里的序列文件必须都是.fa结尾,否则会出错,提示找不到基因组序列;
2,修改parameters.drw的必须参数,一般修改outgroup就行(将outgroup对应的文件名改了,注意不要后缀)
结果文件:主要根据OrthologousGroupsFasta文件夹下的序列文件【值得注意的是,每个文件对于每个物种至多只包含一条序列】,提取单拷贝直系同源基因。【发现在文件夹HOGFasta下的序列文件中,可能存在一个文件包含一个物种的多条序列的情况,因此感觉可以从该文件提取特定拷贝数的文件】
提取但拷贝基因保存到已有的Single_Copy文件夹中:
#!/usr/bin/python #ly-2021/10/08 #Goals: 提取符合条件的序列文件,存到拧一个文件夹中 #version_01
#Usage: python /home/data/vip10t01/manscript/py/06_extrSingle-copy.py
################################# import os import sys import shutil path = "/home/data/vip10t01/project/03_dark/oma/Output/OrthologousGroupsFasta/" #文件路径【修改】 desPath = "/home/data/vip10t01/project/03_dark/oma/Output/single_copy2/" #提取出来后存放的路径【修改】 fileList = os.listdir(path) #获取路径下的文件,存为列表 #print(fileList) def get_files(path, desPath): for filename in fileList: #遍历列表 tmp = path + filename #获得文件的绝对路径 with open(tmp, "r") as file: #打开文件 id=0 #用于统计该文件有多少个“>” a=[] #用于统计同一物种,是否只包含一条序列 for line in file.readlines(): #读取文件 if ">" in line: id += 1 #统计该文件有多少个“>” line_str=line[0:4] #这里,我们只要知道前4个字符相同,那么就可以确定这两条序列属于同一物种 # print(line_str) a.append(line_str) #print(a) #print(id) if id == 9 and len(a)==len(set(a)): #满足两个条件:1、包含9个“>”;2、每个物种只有一条序列 shutil.copy(tmp, desPath) #满足条件的文件复制到指定文件夹中 # a=[] # for line in file.readlines(): # line_str = line[0:2] # print(line) # if line_str not in a: # a.append(line_str) # if len(a) == 9: # shutil.copy(tmp, desPath) get_files(path, desPath)
#!/usr/bin/python #ly2021/10/23 修改第一版,实用性更好 import os import sys import shutil #path = "/home/data/vip10t01/project/03_dark/oma/Output/OrthologousGroupsFasta/" path = "/home/data/vip10t01/project/03_dark/oma_2/Output/OrthologousGroupsFasta/" #desPath = "/home/data/vip10t01/project/03_dark/oma/Output/single_copy3/" #desPath = "/home/data/vip10t01/project/03_dark/oma_2/Output/single_copy/" desPath = "/home/data/vip10t01/project/03_dark/oma_2/Output/single_copy_1/" fileList = os.listdir(path) #print(fileList) def get_files(path, desPath): for filename in fileList: tmp = path + filename with open(tmp, "r") as file: id=0 a=[] for line in file.readlines(): if ">" in line: id += 1 # line_str=line[0:8] line_str=line # print(line_str) a.append(line_str) # print(a) # print(id) req1 = "A_mexicanus" req2 = "C_idella" req3 = "L_oculatus" req4 = "L_tanaka" req5 = "P_nattereri" req6 = "P_swirei" req7 = "P_transmontana" req8 = "S_anshuiensis" req9 = "T_subterraneus" if id == 9 and len(a)==len(set(a)) and req1 in str(a) and req2 in str(a) and req3 in str(a) and req4 in str(a) and req5 in str(a) and req6 in str(a) and req7 in str(a) and req8 in str(a) and req9 in str(a): shutil.copy(tmp, desPath) # a=[] # for line in file.readlines(): # line_str = line[0:2] # print(line) # if line_str not in a: # a.append(line_str) # if len(a) == 9: # shutil.copy(tmp, desPath) get_files(path, desPath)
#!/usr/bin/python #ly-2021/10/22 修改第二版,可以使多倍体保留两条序列 import os import sys import shutil #path = "/home/data/vip10t01/project/03_dark/oma/Output/OrthologousGroupsFasta/" path = "/home/data/vip10t01/project/03_dark/oma_2/Output/OrthologousGroupsFasta/" #desPath = "/home/data/vip10t01/project/03_dark/oma/Output/single_copy3/" desPath = "/home/data/vip10t01/project/03_dark/oma_2/Output/single_copy/" #desPath = "/home/data/vip10t01/project/03_dark/oma_2/Output/single_copy_1/" fileList = os.listdir(path) #print(fileList) def get_files(path, desPath): for filename in fileList: tmp = path + filename with open(tmp, "r") as file: id=0 a=[] for line in file.readlines(): if ">" in line: id += 1 # line_str=line[0:8] line_str=line # print(line_str) a.append(line_str) # print(a) print(id) req1 = "A_mexicanus" req2 = "C_idella" req3 = "L_oculatus" req4 = "L_tanaka" req5 = "P_nattereri" req6 = "P_swirei" req7 = "P_transmontana" req8 = "S_anshuiensis" req9 = "T_subterraneus" # if id == 9 and len(a)==len(set(a)) and req1 in str(a) and req2 in str(a) and req3 in str(a) and req4 in str(a) and req5 in str(a) and req6 in str(a) and req7 in str(a) and req8 in str(a) and req9 in str(a) or id == 10 and req1 in str(a) and req2 in str(a) and req3 in str(a) and req4 in str(a) and req5 in str(a) and req6 in str(a) and req7 in str(a) and req8 in str(a) and req9 in str(a) and str(a).count(req8) == 2: if id == 10 and str(a).count(req8) == 2 and req1 in str(a) and req2 in str(a) and req3 in str(a) and req4 in str(a) and req5 in str(a) and req6 in str(a) and req7 in str(a) and req9 in str(a): shutil.copy(tmp, desPath) # a=[] # for line in file.readlines(): # line_str = line[0:2] # print(line) # if line_str not in a: # a.append(line_str) # if len(a) == 9: # shutil.copy(tmp, desPath) get_files(path, desPath)
#ly-2021/11/01 根据需要,提取文件 #!/usr/bin/python import os import sys import shutil #path = "/home/data/vip10t01/project/03_dark/oma/Output/OrthologousGroupsFasta/" #path = "/home/data/vip10t01/project/03_dark/oma_2/Output/OrthologousGroupsFasta/" path = "/home/data/vip10t01/project/03_dark/oma_2/Output/HOGFasta/" #desPath = "/home/data/vip10t01/project/03_dark/oma/Output/single_copy3/" #desPath = "/home/data/vip10t01/project/03_dark/oma_2/Output/single_copy3/" desPath = "/home/data/vip10t01/project/03_dark/oma_2/Output/single_copy_1/" fileList = os.listdir(path) #print(fileList) def get_files(path, desPath): for filename in fileList: tmp = path + filename with open(tmp, "r") as file: id=0 a=[] for line in file.readlines(): if ">" in line: id += 1 # line_str=line[0:8] line_str=line # print(line_str) a.append(line_str) # print(a) print(id) req1 = "A_mexicanus" req2 = "C_idella" req3 = "L_oculatus" req4 = "L_tanaka" req5 = "P_nattereri" req6 = "P_swirei" req7 = "P_transmontana" req8 = "S_anshuiensis" req9 = "T_subterraneus" if id == 9 and len(a)==len(set(a)) and req1 in str(a) and req2 in str(a) and req3 in str(a) and req4 in str(a) and req5 in str(a) and req6 in str(a) and req7 in str(a) and req8 in str(a) and req9 in str(a) or id == 10 and req1 in str(a) and req2 in str(a) and req3 in str(a) and req4 in str(a) and req5 in str(a) and req6 in str(a) and req7 in str(a) and req8 in str(a) and req9 in str(a) and str(a).count(req8) == 2: # if id == 9 and req1 in str(a) and req2 in str(a) and req3 in str(a) and req4 in str(a) and req5 in str(a) and req6 in str(a) and req7 in str(a) and req8 in str(a) and req9 in str(a): # if id == 10 and str(a).count(req8) == 2 and req1 in str(a) and req2 in str(a) and req3 in str(a) and req4 in str(a) and req5 in str(a) and req6 in str(a) and req7 in str(a) and req9 in str(a): shutil.copy(tmp, desPath) # a=[] # for line in file.readlines(): # line_str = line[0:2] # print(line) # if line_str not in a: # a.append(line_str) # if len(a) == 9: # shutil.copy(tmp, desPath) get_files(path, desPath)
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· 记一次.NET内存居高不下排查解决与启示
· 探究高空视频全景AR技术的实现原理
· 理解Rust引用及其生命周期标识(上)
· 浏览器原生「磁吸」效果!Anchor Positioning 锚点定位神器解析
· 没有源码,如何修改代码逻辑?
· 全程不用写代码,我用AI程序员写了一个飞机大战
· DeepSeek 开源周回顾「GitHub 热点速览」
· MongoDB 8.0这个新功能碉堡了,比商业数据库还牛
· 记一次.NET内存居高不下排查解决与启示
· 白话解读 Dapr 1.15:你的「微服务管家」又秀新绝活了