python 批处理excel文件实现数据的提取

import re
import xlrd
f1 = open("v9_c8_a3_a16.txt","w")
f2 = open("a9_not_c8a3a16.txt","w")
f3 = open("c8_not_v9a3a16.txt","w")
f4 = open("a3_not_v9c8a16.txt","w")
f5 = open("a16_not_v9c8a3.txt","w")
def read(file, sheet_index=0):
    workbook = xlrd.open_workbook(file)
    sheet = workbook.sheet_by_index(sheet_index)
    print("工作表名称:", sheet.name, "行数:", sheet.nrows, "列数:", sheet.ncols)
    data = []
    for i in range(0, sheet.nrows):
        data.append(sheet.row_values(i))
    return data

def red(text):
    with open(text, 'r') as f:
        file = f.read()
        regexp = r'MGG_\d{5}'
        pat = re.compile(regexp)
        MGG_all = re.findall(pat, file)
        Mgg_unique = set(MGG_all)
        return Mgg_unique
v9 = read(r'zhu.xlsx')
c8 = read(r'liu.xlsx')
a3 = red(r'ATG3.csv')
a16 = red(r'ATG16.csv')
def reg(data):
        regexp = r'MGG_\d{5}'
        pat = re.compile(regexp)
        MGG_all = re.findall(pat, str(data))#需为string格式
        Mgg_unique = set(MGG_all)
        return Mgg_unique
def vps9():
        return reg(v9)
def cdk8():
        return reg(c8)
def Atg3():
    return reg(a3)
def Atg16():
    return reg(a16)
def Mgg1_Mgg2():
    v9 = vps9()
    c8 = cdk8()
    a3 = Atg3()
    a16 = Atg16()
    v9_c8_a3_a16 = v9&c8&a3&a16
    v9_not_c8a3a16 = v9-(c8|a3|a16)
    c8_not_v9a3a16 = c8-(v9|a3|a16)
    a3_not_v9c8a16 = a3-(v9|c8|a16)
    a16_not_v9c8a3 = a16-(v9|a3|c8)
    return v9_c8_a3_a16, v9_not_c8a3a16, c8_not_v9a3a16, a3_not_v9c8a16,a16_not_v9c8a3
def message():
    v9_c8_a3a16, v9_not_c8a3a16, c8_not_v9a3a16, a3_not_v9c8a16, a16_not_v9c8a3 = Mgg1_Mgg2()
    with open('magnaporthe.txt','r') as f:
        file = f.read()
        infile = file.split('>')
        for m in infile:
            for i in v9_c8_a3a16:
                if i in m:
                    f1.write(i+' '+m)
            for i2 in v9_not_c8a3a16:
                if i2 in m:
                    f2.write(i2+' '+m )
            for i3 in c8_not_v9a3a16:
                if i3 in m:
                    f3.write(i3+' '+m )
            for i4 in a3_not_v9c8a16:
                if i4 in m:
                    f4.write(i4+' '+m )
            for i5 in a16_not_v9c8a3:
                if i5 in m:
                    f5.write(i5+' '+m )
message()
posted @ 2017-10-25 22:14 pyming 阅读(4070) 评论(0) 编辑收藏举报
会员力量，点亮园子希望
刷新页面返回顶部
pyming

python 批处理excel文件实现数据的提取

公告