饮冰三年-人工智能-Django淘宝拾遗-84-常用的Excel库性能比对
说明
在处理大数据量且数据格式多样的Excel文件时,Python中的xlwt、openpyxl和xlsxwriter是常用的Excel库。本篇技术博客将对这三个库在性能方面进行对比,通过生成大数据,并设置多种数据格式和样式,来评估它们在处理复杂Excel文件时的性能表现,以帮助开发者在选择库时做出明智的决策。
一、基础版
![](https://images.cnblogs.com/OutliningIndicators/ContractedBlock.gif)
import timeit import xlwt import openpyxl import xlsxwriter # 使用xlwt生成Excel文件 def test_xlwt(): workbook = xlwt.Workbook() worksheet = workbook.add_sheet('Sheet1') for i in range(1000): for j in range(100): worksheet.write(i, j, f'Cell ({i}, {j})') workbook.save('xlwt_example.xls') # 使用openpyxl生成Excel文件 def test_openpyxl(): workbook = openpyxl.Workbook() worksheet = workbook.active for i in range(1, 1001): for j in range(1, 101): worksheet.cell(i, j, value=f'Cell ({i}, {j})') workbook.save('openpyxl_example.xlsx') # 使用openpyxl生成Excel文件 def test_openpyxl_fast(): workbook = openpyxl.Workbook(write_only=True) worksheet = workbook.create_sheet() for i in range(1, 1001): row_data = [f'Cell ({i}, {j})' for j in range(1, 101)] worksheet.append(row_data) workbook.save('openpyxl_example_fast.xlsx') # 使用xlsxwriter生成Excel文件 def test_xlsxwriter(): workbook = xlsxwriter.Workbook('xlsxwriter_example.xlsx') worksheet = workbook.add_worksheet() for i in range(1000): for j in range(100): worksheet.write(i, j, f'Cell ({i}, {j})') workbook.close() if __name__ == '__main__': xlwt_time = timeit.timeit(test_xlwt, number=1) print(f"xlwt: {xlwt_time:.5f} seconds") xlsxwriter_time = timeit.timeit(test_xlsxwriter, number=1) print(f"xlsxwriter: {xlsxwriter_time:.5f} seconds") openpyxl_time = timeit.timeit(test_openpyxl_fast, number=1) print(f"openpyxl_fast: {openpyxl_time:.5f} seconds") openpyxl_time = timeit.timeit(test_openpyxl, number=1) print(f"openpyxl: {openpyxl_time:.5f} seconds")
结果
xlwt: 0.36486 seconds
xlsxwriter: 0.53246 seconds
openpyxl_fast: 0.59879 seconds
openpyxl: 0.67676 seconds
二、进阶版
![](https://images.cnblogs.com/OutliningIndicators/ContractedBlock.gif)
import timeit import xlwt import openpyxl import xlsxwriter import random import string from datetime import datetime, timedelta LINK_DICT = { "百度": 'https://www.baidu.com/', "谷歌": 'https://www.google.com/' } def generate_random_data(num_rows, num_cols): data = [] head_row = ["第" + str(i + 1) + "列" for i in range(num_cols)] data.append(head_row) for i in range(num_rows): row_data = [] for j in range(num_cols): # 生成随机数据,包括数字、字符串、长字符串和日期 if j % 8 == 0: row_data.append(''.join(random.choices(list(LINK_DICT)))) # 随机生成10位字符串 elif j % 8 == 1: row_data.append(random.randint(20, 60)) # 随机生成20到60之间的整数 elif j % 8 == 2: row_data.append(''.join( random.choices(string.ascii_letters + string.digits, k=random.randint(20, 50)))) # 随机生成20到50位字符串 elif j % 8 == 3: row_data.append(''.join(random.choices(string.ascii_letters + string.digits + string.punctuation, k=random.randint(50, 100)))) # 随机生成50到100位字符串 elif j % 8 == 4: random_date = datetime(2023, 1, 1) + timedelta(days=random.randint(0, 365)) # 随机生成2023年内的日期 row_data.append(random_date.strftime('%Y-%m-%d')) else: row_data.append(random.random() * 1000) # 随机生成0到1000之间的小数 data.append(row_data) return data def set_style_xlwt(bold=False, underline=False, color=None, bg_color=None): font = xlwt.Font() font.name = 'SimSun' font.bold = bold font.underline = underline if color: font.colour_index = color alignment = xlwt.Alignment() alignment.horz = xlwt.Alignment.HORZ_CENTER alignment.vert = xlwt.Alignment.VERT_CENTER borders = xlwt.Borders() borders.left = xlwt.Borders.THIN borders.right = xlwt.Borders.THIN borders.top = xlwt.Borders.THIN borders.bottom = xlwt.Borders.THIN pattern = xlwt.Pattern() pattern.pattern = xlwt.Pattern.SOLID_PATTERN if bg_color: pattern.pattern_fore_colour = xlwt.Style.colour_map[bg_color] style = xlwt.XFStyle() style.font = font style.alignment = alignment style.borders = borders style.pattern = pattern return style # 使用xlwt生成Excel文件 def test_xlwt(): workbook = xlwt.Workbook() worksheet = workbook.add_sheet('Sheet1', cell_overwrite_ok=True) # 设置表头样式 header_style = set_style_xlwt(bold=True) # 设置内容样式 content_style = set_style_xlwt() # 设置链接样式,添加蓝色下划线 link_style = set_style_xlwt(underline=True, color=xlwt.Style.colour_map['blue']) bg_clolor_style_dict = { 'white': set_style_xlwt(bg_color='white'), 'green': set_style_xlwt(bg_color='green'), 'yellow': set_style_xlwt(bg_color='yellow'), 'red': set_style_xlwt(bg_color='red') } data = generate_random_data(10000, 100) for row_idx, row_data in enumerate(data): for col_idx, value in enumerate(row_data): if row_idx == 0: worksheet.write(row_idx, col_idx, value, header_style) worksheet.col(col_idx).width = 17 * 256 elif isinstance(value, int) or isinstance(value, float): # 根据数字值设置背景颜色 if value < 60: bg_color = 'red' elif value < 80: bg_color = 'yellow' elif value < 100: bg_color = 'green' else: bg_color = 'white' worksheet.write(row_idx, col_idx, value, bg_clolor_style_dict.get(bg_color)) else: # 为超链接列添加超链接 if value in LINK_DICT.keys(): link = LINK_DICT.get(value) worksheet.write(row_idx, col_idx, xlwt.Formula(f'HYPERLINK("{link}"; "{value}")'), link_style) else: worksheet.write(row_idx, col_idx, value, content_style) workbook.save('xlwt_example.xls') # 使用xlsxwriter生成Excel文件 def test_xlsxwriter(): workbook = xlsxwriter.Workbook('xlsxwriter_example.xlsx') worksheet = workbook.add_worksheet() content_style = {'font_name': 'SimSun', 'align': 'center', 'valign': 'vcenter', 'border': 1} content_format = workbook.add_format(content_style) content_style.update({'bold': True}) header_format = workbook.add_format(content_style) # 设置表头样式 del content_style['bold'] # 根据不同条件创建不同的格式对象 content_style['bg_color'] = 'red' red_bg_format = workbook.add_format(content_style) content_style['bg_color'] = 'yellow' yellow_bg_format = workbook.add_format(content_style) content_style['bg_color'] = 'green' green_bg_format = workbook.add_format(content_style) del content_style['bg_color'] content_style.update({'color': 'blue', 'underline': 1, }) link_format = workbook.add_format(content_style) # 设置超链接样式 del content_style['color'] del content_style['underline'] # 写入数据和样式 data = generate_random_data(10000, 100) for row_index, row_data in enumerate(data): for col_index, value in enumerate(row_data): if row_index == 0: worksheet.write(row_index, col_index, value, header_format) worksheet.set_column(row_index, col_index, 17) else: if isinstance(value, int) or isinstance(value, float): bg_color_format = content_format if value < 60: bg_color_format = red_bg_format elif value < 80: bg_color_format = yellow_bg_format elif value < 100: bg_color_format = green_bg_format worksheet.write(row_index, col_index, value, bg_color_format) elif value in LINK_DICT.keys(): # 为超链接列添加超链接 link = LINK_DICT.get(value) worksheet.write_url(row_index, col_index, link, string=value, cell_format=link_format) else: worksheet.write(row_index, col_index, value, content_format) workbook.close() # 使用openpyxl生成Excel文件 def test_openpyxl(): workbook = openpyxl.Workbook() worksheet = workbook.active content_border = openpyxl.styles.Border(left=openpyxl.styles.Side(style='thin'), right=openpyxl.styles.Side(style='thin'), top=openpyxl.styles.Side(style='thin'), bottom=openpyxl.styles.Side(style='thin')) content_alignment = openpyxl.styles.Alignment(vertical='center', horizontal='center') content_fill = openpyxl.styles.PatternFill(start_color=None, end_color=None, fill_type='solid') # 设置表头样式 header_style = openpyxl.styles.Font(bold=True, name='SimSun') header_cell_style = openpyxl.styles.NamedStyle(name='header_style', font=header_style, alignment=content_alignment, border=content_border) # 设置内容样式 content_style = openpyxl.styles.Font(name='SimSun') content_cell_style = openpyxl.styles.NamedStyle(name='content_style', font=content_style, alignment=content_alignment, border=content_border, fill=content_fill) # 设置超链接样式,添加蓝色下划线 link_style = openpyxl.styles.Font(name='SimSun', underline='single', color='0000FF') link_cell_style = openpyxl.styles.NamedStyle(name='link_style', font=link_style, alignment=content_alignment, border=content_border, fill=content_fill) data = generate_random_data(10000, 100) for row_idx, row_data in enumerate(data): for col_idx, value in enumerate(row_data): if row_idx == 0: cell = worksheet.cell(row=row_idx + 1, column=col_idx + 1, value=value) cell.style = header_cell_style worksheet.column_dimensions[openpyxl.utils.get_column_letter(col_idx + 1)].width = 17 else: content_fill = openpyxl.styles.PatternFill(start_color=None, end_color=None, fill_type='solid') content_cell_style.fill = content_fill if isinstance(value, int) or isinstance(value, float): # 根据数字值设置背景颜色 if value < 60: bg_color = 'FF0000' # 红色 elif value < 80: bg_color = 'FFFF00' # 黄色 elif value < 100: bg_color = '00FF00' # 绿色 else: bg_color = 'FFFFFF' # 白色 if bg_color: content_fill = openpyxl.styles.PatternFill(start_color=bg_color, end_color=bg_color, fill_type='solid') content_cell_style.fill = content_fill cell = worksheet.cell(row=row_idx + 1, column=col_idx + 1, value=value) cell.style = content_cell_style elif value in LINK_DICT.keys(): # 为超链接列添加超链接 link = LINK_DICT.get(value) cell = worksheet.cell(row=row_idx + 1, column=col_idx + 1, value=value) cell.hyperlink = link cell.style = link_cell_style else: cell = worksheet.cell(row=row_idx + 1, column=col_idx + 1, value=value) cell.style = content_cell_style workbook.save('openpyxl_example.xlsx') # 使用openpyxl生成Excel文件 def test_openpyxl_fast(): workbook = openpyxl.Workbook(write_only=True) worksheet = workbook.create_sheet() content_border = openpyxl.styles.Border(left=openpyxl.styles.Side(style='thin'), right=openpyxl.styles.Side(style='thin'), top=openpyxl.styles.Side(style='thin'), bottom=openpyxl.styles.Side(style='thin')) content_alignment = openpyxl.styles.Alignment(vertical='center', horizontal='center') content_fill = openpyxl.styles.PatternFill(start_color=None, end_color=None, fill_type='solid') # 设置表头样式 header_style = openpyxl.styles.NamedStyle(name="header_style") header_style.font = openpyxl.styles.Font(bold=True, name='SimSun') header_style.alignment = content_alignment header_style.border = content_border header_style.fill = content_fill # 设置内容样式 content_cell_style = openpyxl.styles.NamedStyle(name="content_cell_style") content_cell_style.font = openpyxl.styles.Font(name='SimSun') content_cell_style.alignment = content_alignment content_cell_style.border = content_border content_cell_style.fill = content_fill # 设置超链接样式,添加蓝色下划线 link_cell_style = openpyxl.styles.NamedStyle(name="link_cell_style") link_cell_style.font = openpyxl.styles.Font(name='SimSun', underline='single', color='0000FF') link_cell_style.alignment = content_alignment link_cell_style.border = content_border link_cell_style.fill = content_fill data = generate_random_data(10000, 100) for row_idx, row_data in enumerate(data): row = [] for col_idx, value in enumerate(row_data): cell = openpyxl.cell.cell.WriteOnlyCell(worksheet, value=value) if row_idx == 0: cell.style = header_style worksheet.column_dimensions[openpyxl.utils.get_column_letter(col_idx + 1)].width = 17 else: content_fill = openpyxl.styles.PatternFill(start_color=None, end_color=None, fill_type='solid') content_cell_style.fill = content_fill if isinstance(value, int) or isinstance(value, float): # 根据数字值设置背景颜色 if value < 60: bg_color = 'FF0000' # 红色 elif value < 80: bg_color = 'FFFF00' # 黄色 elif value < 100: bg_color = '00FF00' # 绿色 else: bg_color = 'FFFFFF' # 白色 if bg_color: content_fill = openpyxl.styles.PatternFill(start_color=bg_color, end_color=bg_color, fill_type='solid') content_cell_style.fill = content_fill cell.style = content_cell_style elif value in LINK_DICT.keys(): # 为超链接列添加超链接 link = LINK_DICT.get(value) cell.hyperlink = link cell.style = link_cell_style else: cell.style = content_cell_style row.append(cell) worksheet.append(row) workbook.save('openpyxl_example_fast.xlsx') if __name__ == '__main__': xlwt_time = timeit.timeit(test_xlwt, number=1) print(f"xlwt: {xlwt_time:.5f} seconds") xlsxwriter_time = timeit.timeit(test_xlsxwriter, number=1) print(f"xlsxwriter: {xlsxwriter_time:.5f} seconds") openpyxl_time = timeit.timeit(test_openpyxl, number=1) print(f"openpyxl: {openpyxl_time:.5f} seconds") openpyxl_time = timeit.timeit(test_openpyxl_fast, number=1) print(f"openpyxl (write_only=True): {openpyxl_time:.5f} seconds")
丰富内容和样式
1:内容 数字(整数、小数)、字符串(长度不一)、日期格式、链接
2:样式 表头(字体、居中、加粗)(边框)(数字类型、背景色)(链接)
结果
百列千行
xlwt: 1.13747 seconds
xlsxwriter: 0.76269 seconds
openpyxl: 14.06028 seconds
openpyxl (write_only=True): 14.18843 seconds
xlwt: 1.13747 seconds
xlsxwriter: 0.76269 seconds
openpyxl: 14.06028 seconds
openpyxl (write_only=True): 14.18843 seconds
百列万行
xlwt: 11.28110 seconds
xlsxwriter: 7.14836 seconds
openpyxl: 757.70731 seconds
openpyxl (write_only=True): 751.23908 seconds