import re
import xlrd
f1 = open("v9_c8_a3_a16.txt","w")
f2 = open("a9_not_c8a3a16.txt","w")
f3 = open("c8_not_v9a3a16.txt","w")
f4 = open("a3_not_v9c8a16.txt","w")
f5 = open("a16_not_v9c8a3.txt","w")
def read(file, sheet_index=0):
workbook = xlrd.open_workbook(file)
sheet = workbook.sheet_by_index(sheet_index)
print("工作表名称:", sheet.name, "行数:", sheet.nrows, "列数:", sheet.ncols)
data = []
for i in range(0, sheet.nrows):
data.append(sheet.row_values(i))
return data
def red(text):
with open(text, 'r') as f:
file = f.read()
regexp = r'MGG_\d{5}'
pat = re.compile(regexp)
MGG_all = re.findall(pat, file)
Mgg_unique = set(MGG_all)
return Mgg_unique
v9 = read(r'zhu.xlsx')
c8 = read(r'liu.xlsx')
a3 = red(r'ATG3.csv')
a16 = red(r'ATG16.csv')
def reg(data):
regexp = r'MGG_\d{5}'
pat = re.compile(regexp)
MGG_all = re.findall(pat, str(data))#需为string格式
Mgg_unique = set(MGG_all)
return Mgg_unique
def vps9():
return reg(v9)
def cdk8():
return reg(c8)
def Atg3():
return reg(a3)
def Atg16():
return reg(a16)
def Mgg1_Mgg2():
v9 = vps9()
c8 = cdk8()
a3 = Atg3()
a16 = Atg16()
v9_c8_a3_a16 = v9&c8&a3&a16
v9_not_c8a3a16 = v9-(c8|a3|a16)
c8_not_v9a3a16 = c8-(v9|a3|a16)
a3_not_v9c8a16 = a3-(v9|c8|a16)
a16_not_v9c8a3 = a16-(v9|a3|c8)
return v9_c8_a3_a16, v9_not_c8a3a16, c8_not_v9a3a16, a3_not_v9c8a16,a16_not_v9c8a3
def message():
v9_c8_a3a16, v9_not_c8a3a16, c8_not_v9a3a16, a3_not_v9c8a16, a16_not_v9c8a3 = Mgg1_Mgg2()
with open('magnaporthe.txt','r') as f:
file = f.read()
infile = file.split('>')
for m in infile:
for i in v9_c8_a3a16:
if i in m:
f1.write(i+' '+m)
for i2 in v9_not_c8a3a16:
if i2 in m:
f2.write(i2+' '+m )
for i3 in c8_not_v9a3a16:
if i3 in m:
f3.write(i3+' '+m )
for i4 in a3_not_v9c8a16:
if i4 in m:
f4.write(i4+' '+m )
for i5 in a16_not_v9c8a3:
if i5 in m:
f5.write(i5+' '+m )
message()