使用Python生成30万条Excel 测试数据
from openpyxl import Workbook
from concurrent.futures import ThreadPoolExecutor
# 定义生成数据的函数
def generate_data(start, end, sheet):
# 生成数据
for i in range(start, end + 1):
sheet.append([i, f"姓名{i}", 20 + i % 10, f"城市{i % 100}", f"性别{i % 2}", f"职业{i % 5}", f"邮箱{i}@example.com", f"电话{i}", f"地址{i}", f"学历{i % 3}"])
print(i)
# 创建Workbook对象
workbook = Workbook()
sheet = workbook.active
# 添加表头
sheet.append(["编号", "姓名", "年龄", "城市", "性别", "职业", "邮箱", "电话", "地址", "学历"])
# 定义线程数量
num_threads = 10
# 计算每个线程要生成的数据数量
total_data = 300000
data_per_thread = total_data // num_threads
# 定义线程池
pool = ThreadPoolExecutor(max_workers=num_threads)
# 提交线程任务
futures = []
for i in range(num_threads):
start = i * data_per_thread + 1
end = (i + 1) * data_per_thread
# 对最后一个线程进行修正,使其生成剩余的数据
if i == num_threads - 1:
end = total_data
future = pool.submit(generate_data, start, end, sheet)
futures.append(future)
# 等待所有任务完成
for future in futures:
future.result()
# 关闭线程池
pool.shutdown()
# 保存数据到Excel文件
workbook.save("data.xlsx")