根据excel表头,合并其他excel数据

# -*- coding: utf-8 -*-
"""
Created on Thu Jul  6 20:57:19 2023

@author: noah
"""

# -*- coding: utf-8 -*-
"""
Created on Thu Jul  6 20:41:17 2023

@author: noah
"""

import xlrd
#import xlwt
import time
import xlsxwriter
import os

def get_title(file):
    #读取结果宽表表头
    #title_file = r"w.xls"# 打开指定路径中的xls文件
    book = xlrd.open_workbook(file)#得到Excel文件的book对象,实例化对象
    #sheet0 = book.sheet_by_index(0) # 通过sheet索引获得sheet对象
    sheet_name = book.sheet_names()[0]# 获得指定索引的sheet表名字
    sheet1 = book.sheet_by_name(sheet_name)# 通过sheet名字来获取,当然如果知道sheet名字就可以直接指定
    #循环打印每一行的内容
    return sheet1.row_values(0)

def str_deal(str):
    #处理字符串
    str = str.strip()#空格
    str = str.replace(' ','')#空格
    str = str.replace('_','-')#下划线
    str = str.replace('-','-')#中文横杆1
    str = str.replace('','-')#中文横杆2
    return str
def get_diff_list(write,read):
    #获取源和目标的差异列表
    result = []
    b_len = len(read)
    i = 0
    res_w = []
    res_r = []
    for i in range(0,len(write)-1):
        for j in range(0,len(read)-1):
            #处理特殊字符和空格
            #
            #
            left =  str_deal(write[i])
            right = str_deal(read[j])
            if left == right:

                res_w.append(i)
                res_r.append(j)
            j = j + 1
        i = i + 1#
    result.append(res_w)
    result.append(res_r)
    return result


#获取表数据
#返回第一个参数:行数-1
#其余为每行数据
def get_data(xlsfile):
    array_list = []
    #xlsfile = r"r.xls"# 打开指定路径中的xls文件
    book = xlrd.open_workbook(xlsfile)#得到Excel文件的book对象,实例化对象
    sheet0 = book.sheet_by_index(0) # 通过sheet索引获得sheet对象
    sheet_name = book.sheet_names()[0]# 获得指定索引的sheet表名字
    sheet1 = book.sheet_by_name(sheet_name)# 通过sheet名字来获取,当然如果知道sheet名字就可以直接指定
    nrows = sheet0.nrows    # 获取行总数
    array_list.append(nrows-1)
    #循环打印每一行的内容
    for i in range(nrows):
        if i > 0:
            array_list.append(sheet1.row_values(i))
    #ncols = sheet0.ncols    #获取列总数
    #row_data = sheet0.row_values(0)     # 获得第1行的数据列表
    return array_list


def create_sheet(start_rows,res_w,res_r,array_list,sheet):
    #sheet.write(0, 0, 'EnglishName')   
    #sheet.write(1, 0, 'Marcovaldo')
    result = []
    k = 1
    for i in range(0,array_list[0]):
        for j in range(0,len(res_w)-1):
            #sheet.write(i,j,str(i)+str(j))
            sheet.write(start_rows,res_w[j],array_list[k][res_r[j]])
        k = k + 1
        start_rows = start_rows + 1
    result.append(start_rows)
    result.append(sheet)
    return result

print('run start ' + time.strftime("%Y-%m-%d %H:%M:%S",time.localtime()))
#修改
ny = '202201'
dir_path = '\\'+ ny +'\\'
writebook = xlsxwriter.Workbook(ny + '.xlsx')                #打开excel
sheet = writebook.add_worksheet(ny)

#获取表头
w = get_title(r'w.xls')

#安装表头
for i in range(0,len(w)-1):
    sheet.write(0,i,w[i])

#get_diff_list_result = get_diff_list(w,r)
#create_sheet_list = create_sheet(1,get_diff_list_result[0],get_diff_list_result[1],get_data("D:\\2023\\2023-07\\2023-07-07\\py\\r.xls"),sheet)
#sheet = create_sheet_list[1]
#current_row = create_sheet_list[0]


dir_path = os.getcwd()+dir_path
files = os.listdir(dir_path)


i = 0
start_rows = 0
for f in files:
    ff = dir_path + f
    print("当前处理文件:%s"%ff)
    r = get_title(ff)
    get_diff_list_result = get_diff_list(w,r)
    if i == 0:
        start_rows = 1
    create_sheet_list = create_sheet(start_rows,get_diff_list_result[0],get_diff_list_result[1],get_data(ff),sheet)
    start_rows = create_sheet_list[0]
    sheet = create_sheet_list[1]
    i = i + 1
print(start_rows)



writebook.close()  

print('run end ' + time.strftime("%Y-%m-%d %H:%M:%S",time.localtime()))

 

posted @ 2023-07-12 17:45  aongao  阅读(72)  评论(0编辑  收藏  举报