饮冰三年-人工智能-Django淘宝拾遗-84-常用的Excel库性能比对

说明

 在处理大数据量且数据格式多样的Excel文件时,Python中的xlwt、openpyxl和xlsxwriter是常用的Excel库。本篇技术博客将对这三个库在性能方面进行对比,通过生成大数据,并设置多种数据格式和样式,来评估它们在处理复杂Excel文件时的性能表现,以帮助开发者在选择库时做出明智的决策。

一、基础版

import timeit
import xlwt
import openpyxl
import xlsxwriter


# 使用xlwt生成Excel文件
def test_xlwt():
    workbook = xlwt.Workbook()
    worksheet = workbook.add_sheet('Sheet1')
    for i in range(1000):
        for j in range(100):
            worksheet.write(i, j, f'Cell ({i}, {j})')
    workbook.save('xlwt_example.xls')


# 使用openpyxl生成Excel文件
def test_openpyxl():
    workbook = openpyxl.Workbook()
    worksheet = workbook.active
    for i in range(1, 1001):
        for j in range(1, 101):
            worksheet.cell(i, j, value=f'Cell ({i}, {j})')
    workbook.save('openpyxl_example.xlsx')


# 使用openpyxl生成Excel文件
def test_openpyxl_fast():
    workbook = openpyxl.Workbook(write_only=True)
    worksheet = workbook.create_sheet()
    for i in range(1, 1001):
        row_data = [f'Cell ({i}, {j})' for j in range(1, 101)]
        worksheet.append(row_data)
    workbook.save('openpyxl_example_fast.xlsx')


# 使用xlsxwriter生成Excel文件
def test_xlsxwriter():
    workbook = xlsxwriter.Workbook('xlsxwriter_example.xlsx')
    worksheet = workbook.add_worksheet()
    for i in range(1000):
        for j in range(100):
            worksheet.write(i, j, f'Cell ({i}, {j})')
    workbook.close()


if __name__ == '__main__':
    xlwt_time = timeit.timeit(test_xlwt, number=1)
    print(f"xlwt: {xlwt_time:.5f} seconds")

    xlsxwriter_time = timeit.timeit(test_xlsxwriter, number=1)
    print(f"xlsxwriter: {xlsxwriter_time:.5f} seconds")

    openpyxl_time = timeit.timeit(test_openpyxl_fast, number=1)
    print(f"openpyxl_fast: {openpyxl_time:.5f} seconds")

    openpyxl_time = timeit.timeit(test_openpyxl, number=1)
    print(f"openpyxl: {openpyxl_time:.5f} seconds")
基础版 

结果

xlwt: 0.36486 seconds
xlsxwriter: 0.53246 seconds
openpyxl_fast: 0.59879 seconds
openpyxl: 0.67676 seconds

二、进阶版

 

import timeit
import xlwt
import openpyxl
import xlsxwriter
import random
import string
from datetime import datetime, timedelta

LINK_DICT = {
    "百度": 'https://www.baidu.com/',
    "谷歌": 'https://www.google.com/'
}


def generate_random_data(num_rows, num_cols):
    data = []
    head_row = ["" + str(i + 1) + "" for i in range(num_cols)]
    data.append(head_row)
    for i in range(num_rows):
        row_data = []
        for j in range(num_cols):
            # 生成随机数据,包括数字、字符串、长字符串和日期
            if j % 8 == 0:
                row_data.append(''.join(random.choices(list(LINK_DICT))))  # 随机生成10位字符串
            elif j % 8 == 1:
                row_data.append(random.randint(20, 60))  # 随机生成20到60之间的整数
            elif j % 8 == 2:
                row_data.append(''.join(
                    random.choices(string.ascii_letters + string.digits, k=random.randint(20, 50))))  # 随机生成20到50位字符串
            elif j % 8 == 3:
                row_data.append(''.join(random.choices(string.ascii_letters + string.digits + string.punctuation,
                                                       k=random.randint(50, 100))))  # 随机生成50到100位字符串
            elif j % 8 == 4:
                random_date = datetime(2023, 1, 1) + timedelta(days=random.randint(0, 365))  # 随机生成2023年内的日期
                row_data.append(random_date.strftime('%Y-%m-%d'))
            else:
                row_data.append(random.random() * 1000)  # 随机生成0到1000之间的小数
        data.append(row_data)
    return data


def set_style_xlwt(bold=False, underline=False, color=None, bg_color=None):
    font = xlwt.Font()
    font.name = 'SimSun'
    font.bold = bold
    font.underline = underline
    if color:
        font.colour_index = color

    alignment = xlwt.Alignment()
    alignment.horz = xlwt.Alignment.HORZ_CENTER
    alignment.vert = xlwt.Alignment.VERT_CENTER

    borders = xlwt.Borders()
    borders.left = xlwt.Borders.THIN
    borders.right = xlwt.Borders.THIN
    borders.top = xlwt.Borders.THIN
    borders.bottom = xlwt.Borders.THIN

    pattern = xlwt.Pattern()
    pattern.pattern = xlwt.Pattern.SOLID_PATTERN
    if bg_color:
        pattern.pattern_fore_colour = xlwt.Style.colour_map[bg_color]

    style = xlwt.XFStyle()
    style.font = font
    style.alignment = alignment
    style.borders = borders
    style.pattern = pattern

    return style


# 使用xlwt生成Excel文件
def test_xlwt():
    workbook = xlwt.Workbook()
    worksheet = workbook.add_sheet('Sheet1', cell_overwrite_ok=True)

    # 设置表头样式
    header_style = set_style_xlwt(bold=True)
    # 设置内容样式
    content_style = set_style_xlwt()
    # 设置链接样式,添加蓝色下划线
    link_style = set_style_xlwt(underline=True, color=xlwt.Style.colour_map['blue'])
    bg_clolor_style_dict = {
        'white': set_style_xlwt(bg_color='white'),
        'green': set_style_xlwt(bg_color='green'),
        'yellow': set_style_xlwt(bg_color='yellow'),
        'red': set_style_xlwt(bg_color='red')
    }

    data = generate_random_data(10000, 100)
    for row_idx, row_data in enumerate(data):
        for col_idx, value in enumerate(row_data):
            if row_idx == 0:
                worksheet.write(row_idx, col_idx, value, header_style)
                worksheet.col(col_idx).width = 17 * 256
            elif isinstance(value, int) or isinstance(value, float):
                # 根据数字值设置背景颜色
                if value < 60:
                    bg_color = 'red'
                elif value < 80:
                    bg_color = 'yellow'
                elif value < 100:
                    bg_color = 'green'
                else:
                    bg_color = 'white'
                worksheet.write(row_idx, col_idx, value, bg_clolor_style_dict.get(bg_color))
            else:
                # 为超链接列添加超链接
                if value in LINK_DICT.keys():
                    link = LINK_DICT.get(value)
                    worksheet.write(row_idx, col_idx, xlwt.Formula(f'HYPERLINK("{link}"; "{value}")'), link_style)
                else:
                    worksheet.write(row_idx, col_idx, value, content_style)

    workbook.save('xlwt_example.xls')


# 使用xlsxwriter生成Excel文件
def test_xlsxwriter():
    workbook = xlsxwriter.Workbook('xlsxwriter_example.xlsx')
    worksheet = workbook.add_worksheet()
    content_style = {'font_name': 'SimSun', 'align': 'center', 'valign': 'vcenter', 'border': 1}
    content_format = workbook.add_format(content_style)

    content_style.update({'bold': True})
    header_format = workbook.add_format(content_style)  # 设置表头样式
    del content_style['bold']

    # 根据不同条件创建不同的格式对象
    content_style['bg_color'] = 'red'
    red_bg_format = workbook.add_format(content_style)
    content_style['bg_color'] = 'yellow'
    yellow_bg_format = workbook.add_format(content_style)
    content_style['bg_color'] = 'green'
    green_bg_format = workbook.add_format(content_style)
    del content_style['bg_color']

    content_style.update({'color': 'blue', 'underline': 1, })
    link_format = workbook.add_format(content_style)  # 设置超链接样式
    del content_style['color']
    del content_style['underline']

    # 写入数据和样式
    data = generate_random_data(10000, 100)
    for row_index, row_data in enumerate(data):
        for col_index, value in enumerate(row_data):
            if row_index == 0:
                worksheet.write(row_index, col_index, value, header_format)
                worksheet.set_column(row_index, col_index, 17)
            else:
                if isinstance(value, int) or isinstance(value, float):
                    bg_color_format = content_format
                    if value < 60:
                        bg_color_format = red_bg_format
                    elif value < 80:
                        bg_color_format = yellow_bg_format
                    elif value < 100:
                        bg_color_format = green_bg_format
                    worksheet.write(row_index, col_index, value, bg_color_format)
                elif value in LINK_DICT.keys():
                    # 为超链接列添加超链接
                    link = LINK_DICT.get(value)
                    worksheet.write_url(row_index, col_index, link, string=value, cell_format=link_format)
                else:
                    worksheet.write(row_index, col_index, value, content_format)

    workbook.close()


# 使用openpyxl生成Excel文件
def test_openpyxl():
    workbook = openpyxl.Workbook()
    worksheet = workbook.active
    content_border = openpyxl.styles.Border(left=openpyxl.styles.Side(style='thin'),
                                            right=openpyxl.styles.Side(style='thin'),
                                            top=openpyxl.styles.Side(style='thin'),
                                            bottom=openpyxl.styles.Side(style='thin'))
    content_alignment = openpyxl.styles.Alignment(vertical='center', horizontal='center')

    content_fill = openpyxl.styles.PatternFill(start_color=None, end_color=None, fill_type='solid')

    # 设置表头样式
    header_style = openpyxl.styles.Font(bold=True, name='SimSun')
    header_cell_style = openpyxl.styles.NamedStyle(name='header_style', font=header_style,
                                                   alignment=content_alignment, border=content_border)
    # 设置内容样式
    content_style = openpyxl.styles.Font(name='SimSun')
    content_cell_style = openpyxl.styles.NamedStyle(name='content_style', font=content_style,
                                                    alignment=content_alignment, border=content_border,
                                                    fill=content_fill)
    # 设置超链接样式,添加蓝色下划线
    link_style = openpyxl.styles.Font(name='SimSun', underline='single', color='0000FF')
    link_cell_style = openpyxl.styles.NamedStyle(name='link_style', font=link_style,
                                                 alignment=content_alignment, border=content_border,
                                                 fill=content_fill)

    data = generate_random_data(10000, 100)
    for row_idx, row_data in enumerate(data):
        for col_idx, value in enumerate(row_data):
            if row_idx == 0:
                cell = worksheet.cell(row=row_idx + 1, column=col_idx + 1, value=value)
                cell.style = header_cell_style
                worksheet.column_dimensions[openpyxl.utils.get_column_letter(col_idx + 1)].width = 17
            else:
                content_fill = openpyxl.styles.PatternFill(start_color=None, end_color=None, fill_type='solid')
                content_cell_style.fill = content_fill
                if isinstance(value, int) or isinstance(value, float):
                    # 根据数字值设置背景颜色
                    if value < 60:
                        bg_color = 'FF0000'  # 红色
                    elif value < 80:
                        bg_color = 'FFFF00'  # 黄色
                    elif value < 100:
                        bg_color = '00FF00'  # 绿色
                    else:
                        bg_color = 'FFFFFF'  # 白色
                    if bg_color:
                        content_fill = openpyxl.styles.PatternFill(start_color=bg_color, end_color=bg_color,
                                                                   fill_type='solid')
                    content_cell_style.fill = content_fill
                    cell = worksheet.cell(row=row_idx + 1, column=col_idx + 1, value=value)
                    cell.style = content_cell_style
                elif value in LINK_DICT.keys():
                    # 为超链接列添加超链接
                    link = LINK_DICT.get(value)
                    cell = worksheet.cell(row=row_idx + 1, column=col_idx + 1, value=value)
                    cell.hyperlink = link
                    cell.style = link_cell_style
                else:
                    cell = worksheet.cell(row=row_idx + 1, column=col_idx + 1, value=value)
                    cell.style = content_cell_style

    workbook.save('openpyxl_example.xlsx')


# 使用openpyxl生成Excel文件
def test_openpyxl_fast():
    workbook = openpyxl.Workbook(write_only=True)
    worksheet = workbook.create_sheet()
    content_border = openpyxl.styles.Border(left=openpyxl.styles.Side(style='thin'),
                                            right=openpyxl.styles.Side(style='thin'),
                                            top=openpyxl.styles.Side(style='thin'),
                                            bottom=openpyxl.styles.Side(style='thin'))
    content_alignment = openpyxl.styles.Alignment(vertical='center', horizontal='center')
    content_fill = openpyxl.styles.PatternFill(start_color=None, end_color=None, fill_type='solid')

    # 设置表头样式

    header_style = openpyxl.styles.NamedStyle(name="header_style")
    header_style.font = openpyxl.styles.Font(bold=True, name='SimSun')
    header_style.alignment = content_alignment
    header_style.border = content_border
    header_style.fill = content_fill

    # 设置内容样式
    content_cell_style = openpyxl.styles.NamedStyle(name="content_cell_style")
    content_cell_style.font = openpyxl.styles.Font(name='SimSun')
    content_cell_style.alignment = content_alignment
    content_cell_style.border = content_border
    content_cell_style.fill = content_fill

    # 设置超链接样式,添加蓝色下划线
    link_cell_style = openpyxl.styles.NamedStyle(name="link_cell_style")
    link_cell_style.font = openpyxl.styles.Font(name='SimSun', underline='single', color='0000FF')
    link_cell_style.alignment = content_alignment
    link_cell_style.border = content_border
    link_cell_style.fill = content_fill

    data = generate_random_data(10000, 100)
    for row_idx, row_data in enumerate(data):
        row = []
        for col_idx, value in enumerate(row_data):
            cell = openpyxl.cell.cell.WriteOnlyCell(worksheet, value=value)
            if row_idx == 0:
                cell.style = header_style
                worksheet.column_dimensions[openpyxl.utils.get_column_letter(col_idx + 1)].width = 17
            else:
                content_fill = openpyxl.styles.PatternFill(start_color=None, end_color=None, fill_type='solid')
                content_cell_style.fill = content_fill
                if isinstance(value, int) or isinstance(value, float):
                    # 根据数字值设置背景颜色
                    if value < 60:
                        bg_color = 'FF0000'  # 红色
                    elif value < 80:
                        bg_color = 'FFFF00'  # 黄色
                    elif value < 100:
                        bg_color = '00FF00'  # 绿色
                    else:
                        bg_color = 'FFFFFF'  # 白色
                    if bg_color:
                        content_fill = openpyxl.styles.PatternFill(start_color=bg_color, end_color=bg_color,
                                                                   fill_type='solid')
                    content_cell_style.fill = content_fill
                    cell.style = content_cell_style
                elif value in LINK_DICT.keys():
                    # 为超链接列添加超链接
                    link = LINK_DICT.get(value)
                    cell.hyperlink = link
                    cell.style = link_cell_style
                else:
                    cell.style = content_cell_style
            row.append(cell)
        worksheet.append(row)

    workbook.save('openpyxl_example_fast.xlsx')


if __name__ == '__main__':
    xlwt_time = timeit.timeit(test_xlwt, number=1)
    print(f"xlwt: {xlwt_time:.5f} seconds")

    xlsxwriter_time = timeit.timeit(test_xlsxwriter, number=1)
    print(f"xlsxwriter: {xlsxwriter_time:.5f} seconds")

    openpyxl_time = timeit.timeit(test_openpyxl, number=1)
    print(f"openpyxl: {openpyxl_time:.5f} seconds")

    openpyxl_time = timeit.timeit(test_openpyxl_fast, number=1)
    print(f"openpyxl (write_only=True): {openpyxl_time:.5f} seconds")
进阶版

 

 

丰富内容和样式

  1:内容 数字(整数、小数)、字符串(长度不一)、日期格式、链接

      2:样式 表头(字体、居中、加粗)(边框)(数字类型、背景色)(链接)

结果

 

 

百列千行
xlwt: 1.13747 seconds
xlsxwriter: 0.76269 seconds
openpyxl: 14.06028 seconds
openpyxl (write_only=True): 14.18843 seconds
百列万行
xlwt: 11.28110 seconds
xlsxwriter: 7.14836 seconds
openpyxl: 757.70731 seconds
openpyxl (write_only=True): 751.23908 seconds

 

posted @ 2023-07-21 07:18  逍遥小天狼  阅读(24)  评论(0编辑  收藏  举报