百万千万级Excel导出

一、前提

  • 一个Excel的sheet页最多104万行数据
  • 需分页处理-数据库分页或内存分页(这里需要注意内存大小问题)

二、处理思路

2.1、同步处理

*单线程处理-前端轮训等待时间过长,不可取
*多线程处理-前端依然轮训等待时间过长,后端还会因为大量线程消耗内存,导致内存不够用,不可取

2.2、异步处理

*前端点击导出按钮后,提示用户,导出任务已建立,待导出成功后,发消息通知用户下载

*引入MQ,自发自收,前端每次点击导出,任务需到MQ中去排队,这样能保证每个Pod中只有一个导出任务在执行

*消费者消费到下载消息时,做分页查询(数据库或内存)处理,并使用EasyExcel做多sheet也写入

*写入完毕,上传文件到OSS,并获取下载链接

*钉消息通知用户下载完成,附带下载地址

三、伪代码

/**
 * @author chong.du
 * @description
 * @date 2023/3/20 16:41
 */
@Slf4j
@Component
public class ExcelUtil {

  /**
   * 每页的条数
   */
  private static final int PER_SHEET_ROW_COUNT = 1000000;

  @Autowired
  private OssClientUtils ossClientUtils;
  @Autowired
  private GroupService groupService;
  @Autowired
  private DetailService detailService;

   public  void export(String empId){

    ExcelWriter writer =null;
    try {

        StopWatch stopWatch = new StopWatch();
        stopWatch.start("归集数据查询耗时");
        List<Group> sheetOneList = groupService.queryGroup();
        //分组归集sheet页查询
        log.info("分组-导出-共计:{}条", sheetOneList.size());
        stopWatch.stop();
        log.info(stopWatch.prettyPrint());
        stopWatch.start("明细数据查询耗时");
        List<Detail> sheetTwoList = Lists.newArrayList();
        //按分组
        Map<String, List<Group>> groupList = sheetOneList.stream()
            .collect(Collectors.groupingBy(Group::getGroupId));
        groupList.entrySet().stream().forEach(entry->{
          List<Detail> exportDtoList = detailService.getDetailDtoList(entry.getKey());
          if (!CollectionUtils.isEmpty(exportDtoList)) {
            sheetTwoList.addAll(exportDtoList);
          }
        });
        sheetTwoList.sort(Comparator.comparing(Detail::getHeadId));
        stopWatch.stop();
        log.info(stopWatch.prettyPrint());
        log.info("导出明细-导出-共计:{}条", sheetTwoList.size());

        ByteArrayOutputStream out = new ByteArrayOutputStream();
        writer = EasyExcel.write(out).autoCloseStream(Boolean.FALSE)
            .registerWriteHandler(new AutoColumnWidthWriteHandler())
            .build();
        //1、分组sheet写入
        WriteSheet sheet1 = EasyExcel.writerSheet(0, "分组列表").head(Group.class).build();
        writer.write(sheetOneList, sheet1);
        //2、明细sheet表分页,每1000000数据为一个sheet
        int totalPage = sheetTwoList.size() / PER_SHEET_ROW_COUNT + (sheetTwoList.size() % PER_SHEET_ROW_COUNT == 0 ? 0 : 1);
        List<Detail> list = null;
        for (int i = 1; i <= totalPage; i++) {
          int skipNum = PER_SHEET_ROW_COUNT * (i - 1);
          list = sheetTwoList.stream()
              .skip(skipNum)
              .limit(PER_SHEET_ROW_COUNT)
              .collect(Collectors.toList());
          WriteSheet sheet2 = EasyExcel.writerSheet(i, "明细列表" + i).head(Detail.class).build();
          writer.write(list, sheet2);
        }
        //3、关闭流
        writer.finish();

        //4、上传文件到OSS
        log.info("=====>{}-文件下载消费者接收到消息后,写入Excel完成,开始上传OSS",empId);
        String yyyyMMdd_hHmmss = DateUtil.format(new Date(), "yyyyMMdd_HHmmss");
        String fileName = StringUtils.join("明细列表导出_", yyyyMMdd_hHmmss, ".xlsx");
        String downloadUrl = ossClientUtils.putFileReturnUrl(new ByteArrayInputStream(out.toByteArray()), fileName);
        log.info("=====>{}-文件下载消费者接收到消息后,写入Excel完成,上传OSS完成",empId);

        //4、异步通知-发送钉钉模板消息通知
        Map<String, Object> params = Maps.newHashMap();
        params.put("msgTitle", "文件下载完成通知");
        params.put("content",fileName);
        params.put("linkWords","查看详情");
        params.put("link",downloadUrl);
        dingMsgSendUtil.sendTemplateMsgToEmp(empId,params);
        log.info("=====>{}-文件下载消费者接收到消息后,写入Excel完成,发送钉钉消息完成",empId);

    } catch (Exception e) {
      log.error("=====>文件下载消费消息异常, msg:{}", e);
      e.printStackTrace();
    }
    finally{
      if (Objects.nonNull(writer)) {
        writer.finish();
      }
    }
  }
}
posted @ 2023-03-20 17:17  npe0  阅读(299)  评论(0编辑  收藏  举报