百万千万级Excel导出
一、前提
- 一个Excel的sheet页最多104万行数据
- 需分页处理-数据库分页或内存分页(这里需要注意内存大小问题)
二、处理思路
2.1、同步处理
*单线程处理-前端轮训等待时间过长,不可取
*多线程处理-前端依然轮训等待时间过长,后端还会因为大量线程消耗内存,导致内存不够用,不可取
2.2、异步处理
*前端点击导出按钮后,提示用户,导出任务已建立,待导出成功后,发消息通知用户下载
*引入MQ,自发自收,前端每次点击导出,任务需到MQ中去排队,这样能保证每个Pod中只有一个导出任务在执行
*消费者消费到下载消息时,做分页查询(数据库或内存)处理,并使用EasyExcel做多sheet也写入
*写入完毕,上传文件到OSS,并获取下载链接
*钉消息通知用户下载完成,附带下载地址
三、伪代码
/**
* @author chong.du
* @description
* @date 2023/3/20 16:41
*/
@Slf4j
@Component
public class ExcelUtil {
/**
* 每页的条数
*/
private static final int PER_SHEET_ROW_COUNT = 1000000;
@Autowired
private OssClientUtils ossClientUtils;
@Autowired
private GroupService groupService;
@Autowired
private DetailService detailService;
public void export(String empId){
ExcelWriter writer =null;
try {
StopWatch stopWatch = new StopWatch();
stopWatch.start("归集数据查询耗时");
List<Group> sheetOneList = groupService.queryGroup();
//分组归集sheet页查询
log.info("分组-导出-共计:{}条", sheetOneList.size());
stopWatch.stop();
log.info(stopWatch.prettyPrint());
stopWatch.start("明细数据查询耗时");
List<Detail> sheetTwoList = Lists.newArrayList();
//按分组
Map<String, List<Group>> groupList = sheetOneList.stream()
.collect(Collectors.groupingBy(Group::getGroupId));
groupList.entrySet().stream().forEach(entry->{
List<Detail> exportDtoList = detailService.getDetailDtoList(entry.getKey());
if (!CollectionUtils.isEmpty(exportDtoList)) {
sheetTwoList.addAll(exportDtoList);
}
});
sheetTwoList.sort(Comparator.comparing(Detail::getHeadId));
stopWatch.stop();
log.info(stopWatch.prettyPrint());
log.info("导出明细-导出-共计:{}条", sheetTwoList.size());
ByteArrayOutputStream out = new ByteArrayOutputStream();
writer = EasyExcel.write(out).autoCloseStream(Boolean.FALSE)
.registerWriteHandler(new AutoColumnWidthWriteHandler())
.build();
//1、分组sheet写入
WriteSheet sheet1 = EasyExcel.writerSheet(0, "分组列表").head(Group.class).build();
writer.write(sheetOneList, sheet1);
//2、明细sheet表分页,每1000000数据为一个sheet
int totalPage = sheetTwoList.size() / PER_SHEET_ROW_COUNT + (sheetTwoList.size() % PER_SHEET_ROW_COUNT == 0 ? 0 : 1);
List<Detail> list = null;
for (int i = 1; i <= totalPage; i++) {
int skipNum = PER_SHEET_ROW_COUNT * (i - 1);
list = sheetTwoList.stream()
.skip(skipNum)
.limit(PER_SHEET_ROW_COUNT)
.collect(Collectors.toList());
WriteSheet sheet2 = EasyExcel.writerSheet(i, "明细列表" + i).head(Detail.class).build();
writer.write(list, sheet2);
}
//3、关闭流
writer.finish();
//4、上传文件到OSS
log.info("=====>{}-文件下载消费者接收到消息后,写入Excel完成,开始上传OSS",empId);
String yyyyMMdd_hHmmss = DateUtil.format(new Date(), "yyyyMMdd_HHmmss");
String fileName = StringUtils.join("明细列表导出_", yyyyMMdd_hHmmss, ".xlsx");
String downloadUrl = ossClientUtils.putFileReturnUrl(new ByteArrayInputStream(out.toByteArray()), fileName);
log.info("=====>{}-文件下载消费者接收到消息后,写入Excel完成,上传OSS完成",empId);
//4、异步通知-发送钉钉模板消息通知
Map<String, Object> params = Maps.newHashMap();
params.put("msgTitle", "文件下载完成通知");
params.put("content",fileName);
params.put("linkWords","查看详情");
params.put("link",downloadUrl);
dingMsgSendUtil.sendTemplateMsgToEmp(empId,params);
log.info("=====>{}-文件下载消费者接收到消息后,写入Excel完成,发送钉钉消息完成",empId);
} catch (Exception e) {
log.error("=====>文件下载消费消息异常, msg:{}", e);
e.printStackTrace();
}
finally{
if (Objects.nonNull(writer)) {
writer.finish();
}
}
}
}