返回顶部

Python代码整合两个Excel表格(很多有相同的名字)

一个表(基因信息.xls)包含另外一个表(clinical.xls)所需的信息,将基因信息.xls里面的信息根据名字(id)利用re提取出来,放到clinical.xls后面,每行对应,并且基因信息.xls可能有多个不同的信息,不一样的进行标黄,最后将合并后的信息存到new.xls

import xlrd
import xlwt
import re
import operator

filename0 = r'F:\paper\excel\clinical.xls'
data00 = xlrd.open_workbook(filename0)#文件名以及路径,如果路径或者文件名有中文给前面加一个 r
table00 = data00.sheets()[0]          			#通过索引顺序获取

filename1 = r'F:\paper\excel\基因信息.xls'
data11 = xlrd.open_workbook(filename1)#文件名以及路径,如果路径或者文件名有中文给前面加一个 r
table11 = data11.sheets()[0]          			#通过索引顺序获取


workbook = xlwt.Workbook(encoding= 'ascii')
worksheet = workbook.add_sheet("new Sheet")
# 创建样式用于表格背景标黄
pattern = xlwt.Pattern()
pattern.pattern = xlwt.Pattern.SOLID_PATTERN
pattern.pattern_fore_colour = 5
style = xlwt.XFStyle()
style.pattern = pattern

# 往表格写入内容
d0 = []     # 用列表存clinical03.xls数据,方便操作
sheet = table00
for r in range(sheet.nrows): #将表中数据按行逐步添加到列表中,最后转换为list结构
    data1 = []
    for c in range(sheet.ncols):
        data1.append(sheet.cell_value(r,c))
    d0.append(list(data1))
# print(d0)

d1 = []     #新建一个列表基因信息
sheet = table11
for r in range(sheet.nrows): #将表中数据按行逐步添加到列表中,最后转换为list结构
    data1 = []
    for c in range(sheet.ncols):
        data1.append(sheet.cell_value(r,c))
    d1.append(list(data1))

# 存表头
list0 = d0[0]
list0.extend(d1[0])
for i in range(len(list0)):
    worksheet.write(0, i, list0[i])

# 存所有数据
t = 0
for rowx0 in range(len(d0)):
    if rowx0 == 0:
        continue
    for i in range(len(d0[rowx0])):
        worksheet.write(rowx0+t, i, d0[rowx0][i])
    tt = 0
    for rowx1 in range(len(d1)):
        if rowx1 == 0:
            continue
        if len(re.findall(d0[rowx0][0], d1[rowx1][0])) != 0:
            if tt==0:
                list1 = d1[rowx1][1:]
                for i in range(len(d1[rowx1])):
                    worksheet.write(rowx0 + t + tt, i + len(d0[rowx0]), d1[rowx1][i])
            else:
                if operator.eq(list1,d1[rowx1][1:]):
                    continue#一样的就不显示
                    for i in range(len(d1[rowx1])):
                        worksheet.write(rowx0 + t+tt, i+len(d0[rowx0]), d1[rowx1][i])
                else:
                    for i in range(len(d1[rowx1])):
                        worksheet.write(rowx0 + t+tt, i+len(d0[rowx0]), d1[rowx1][i], style)
            tt+=1
    if tt>1:
        t=t+tt-1

workbook.save("new.xls")
posted @ 2022-08-25 11:21  #wr  阅读(451)  评论(0编辑  收藏  举报
         1 2 3