easyExcel分批导入文件
原文地址 : https://blog.csdn.net/qq_42060055/article/details/116139016
一些关于easyExcel导入文件操作
需求: 导入大数据量文件 其中数据达到万级、十万级, 错误文件进行错误单元格标红, 可导出修改完继续导入
由于数据量多大 一次行全部读到内存中可能会导致内存溢出问题
使用easyExcel poi的监听器进行操作
三步曲:
1、解析excel为inputStream流, 读取流,解析excel
2、判断excel中每条数据的格式, 正确和错误相对记录
3、通过监听器每解析150条数据, 进行入库操作, 错误数据存在内存中(考虑错误数据不多的情况)
// 这里用到ossfs 反正就是读取excel为input流, 涉及到两个系统之间流的传输, 这里直接把文件上传到oss try { in = new FileInputStream(localFileName); } catch (FileNotFoundException e) { in = HttpUtil.io(HttpUtil.Atom.builder().url(diseaseDto.getFileUrl()).build()); }
// 这里解析excel其中 OltHosIcdDiseaseListener为自定义监听器 try { LoggerUtil.info(LOGGER, "开始解析IcdDisease"); OltHosIcdDiseaseListener oltHosIcdDiseaseListener = new OltHosIcdDiseaseListener(isCfgPrd, icdCodeList, delIcdCodeList, diseaseDto, oltConfigService, exportTaskHandler); excelReader = EasyExcel.read(in, oltHosIcdDiseaseListener).build(); ReadSheet readSheet = EasyExcel.readSheet(0).build(); excelReader.read(readSheet); } finally { try { if (in != null) { in.close(); } if (excelReader != null) { // 这里千万别忘记关闭,读的时候会创建临时文件,到时磁盘会崩的 excelReader.finish(); } } catch (Exception e) { LoggerUtil.error(LOGGER, "{0},{1}", e.getMessage(), e); } }
// 通过构造函数, 初始化一些list与对象 // 这个是导入的核心方法, 所有的导入逻辑, 判断逻辑与入库都在这里操作 // 采用无对象方式 @Slf4j public class OltHosIcdDiseaseListener extends AnalysisEventListener<Map<Integer, String>> { private OltConfigService oltConfigService; private ExportTaskHandler exportTaskHandler; private static final int batchCount = 150; private int countNum = 0; private boolean isCfgPrd; private int successCount = 0; private int errorCount = 0; private List<String> checkRepeatCode = new ArrayList<>(); private List<String> icdCodeList; private List<String> delIcdCodeList; private OltHosIcdDiseaseDto diseaseDto; private List<OltHosIcdDiseaseDto> successList = new ArrayList<>(); private List<OltHosIcdDiseaseDto> errorList = new ArrayList<>(); private List<OltHosIcdDiseaseDto> tempErrorList = new ArrayList<>(); public OltHosIcdDiseaseListener(boolean isCfgPrd, List<String> icdCodeList, List<String> delIcdCodeList, OltHosIcdDiseaseDto diseaseDto, OltConfigService oltConfigService, ExportTaskHandler exportTaskHandler) { this.isCfgPrd = isCfgPrd; this.icdCodeList = icdCodeList; this.delIcdCodeList = delIcdCodeList; this.diseaseDto = diseaseDto; this.oltConfigService = oltConfigService; this.exportTaskHandler = exportTaskHandler; } /** * 这个每一条数据解析都会来调用 * data --> 实体类 * analysisContext excel信息 */ @Override public void invoke(Map<Integer, String> data, AnalysisContext context) { int rouNumber = context.readRowHolder().getRowIndex() + 1; // 这里是因为表头在第二行 if (rouNumber == 2) { // 这里是校验表头 checkExcelHead(data); } else if (rouNumber > 2) { // 这里是校验数据 checkReadData(data); } // 超过150条就先入库 if (countNum >= batchCount) { // 处理excel导出的正确数据 batchOperateData(); } countNum++; } /** * @author songhc * @create * @desc 调用完成监听, 确保数据已全部处理完 **/ @Override public void doAfterAllAnalysed(AnalysisContext analysisContext) { // 这里也要保存数据,确保最后遗留的数据也存储到数据库 Map<String, Object> objMap = new HashMap<>(); // 处理excel导出的正确数据 batchOperateData(); // 错误数据填充oss表格 Object object = uploadErrorData(errorList, diseaseDto); objMap.put("errorInfo", object); objMap.put("successCount", successCount); objMap.put("errorCount", errorCount); // 错误数据记录redis, 下次使用 RedisStringHandler.set(String.format(RedisKeyConstants.EXPORT_ERROR_RESULT, "disease" + diseaseDto.getUserId() + "_" + diseaseDto.getRgId() + "_" + diseaseDto.getHosId()), JSONObject.toJSONString(objMap)); } // 这里是封装所有的错误数据 // 包括封装单元格 private Object uploadErrorData (List<OltHosIcdDiseaseDto> errorList, OltHosIcdDiseaseDto dto) { Map<Integer, List<Integer>> map = new HashMap<>(); LinkedList<OltHosIcdDiseaseDto> newErrorList = new LinkedList<>(); if (CollectionUtils.isNotEmpty(errorList)) { for (int i = 0; i < errorList.size(); i++) { OltHosIcdDiseaseDto e = errorList.get(i); List<Integer> integerList = new ArrayList<>(); if (e.getErrorReasonMap() != null && !e.getErrorReasonMap().isEmpty()) { List<String> reasonList = new ArrayList<>(); for (Integer key: e.getErrorReasonMap().keySet()) { // 标红单元格 integerList.add(key); reasonList.add(e.getErrorReasonMap().get(key)); } map.put(i + 2, integerList); e.setErrorReason(String.join("、", reasonList)); } newErrorList.add(e); } } // 封装导出服务入参 String uuid = UUIDUtil.create(); String errorFileName = dto.getHosName() + "(待处理诊断数据)" + dto.getStatDataStr() + ".xlsx"; SysExportRecordDto sysExportRecordDto = SysExportRecordDto.builder().batchId(uuid).userId(dto.getUserId()).pfCode(dto.getPfCode()) .source(dto.getSource()).fileName(errorFileName).creator(dto.getCreator()).operator(dto.getCreator()).build(); // 创建导出记录 QueueHandler.createTaskRecord(sysExportRecordDto); // 获取url // 伪代码 String fileName = "aaa.xlsx"; String BUCKET_NAME = "bbb"; String fileUrl = String.format(OssClientConfig.OSS_REAL_PATH, BUCKET_NAME, UploadFileType.getFolderByType(UploadFileType.REPORT)).concat(fileName); // 加入异步线程任务 this.exportTaskHandler.exportIcdErrorDiseaseData(OltErrorResult.builder().map(map).errorList(newErrorList) .fileName(errorFileName).source(dto.getSource()).build(), uuid, errorFileName, fileUrl); // 构建返回队列信息 return QueueHandler.buildQueueInfo(sysExportRecordDto); } private void batchOperateData() { checkErrorExcelList(tempErrorList, icdCodeList); checkSuccessExcelList(successList, tempErrorList, icdCodeList); // 将临时错误数据存储到所有错误数据列表 this.errorList.addAll(tempErrorList); // 清理list this.successList.clear(); this.tempErrorList.clear(); this.countNum = 0; } private void checkExcelHead(Map<Integer, String> data) { boolean templateFlag = true; // 第二行 校验excel标题 try { String diseaseCategoryStr = data.get(0); if (StringUtils.isBlank(diseaseCategoryStr) || !"诊eee(必填)".equals(diseaseCategoryStr)) { templateFlag = false; } } catch (Exception e) { templateFlag = false; } try { String icdNameStr = data.get(1); if (StringUtils.isBlank(icdNameStr) || !"医vv称(必填)".equals(icdNameStr)) { templateFlag = false; } } catch (Exception e) { templateFlag = false; } try { String icdCodeStr = data.get(2); if (StringUtils.isBlank(icdCodeStr) || !"医aa(必填)".equals(icdCodeStr)) { templateFlag = false; } } catch (Exception e) { templateFlag = false; } if (!templateFlag) { throw new PlatException("文件模版不匹配"); } } private void checkReadData(Map<Integer, String> data) { // 循环cell OltHosIcdDiseaseDto temDisDto = OltHosIcdDiseaseDto.buildDefault(); temDisDto.setHosId(diseaseDto.getHosId()); // key为所在的列, value为错误原因 Map<Integer, String> map = new HashMap<>(); boolean flag = true; try { // 解析第二列 String diseaseCategory = data.get(0); if (StringUtils.isBlank(diseaseCategory)) { temDisDto.setDiseaseCategoryStr(StringUtils.EMPTY); map.put(0, "aaa为空"); flag = false; } else { temDisDto.setDiseaseCategoryStr(diseaseCategory); } } catch (Exception e) { temDisDto.setDiseaseCategoryStr(StringUtils.EMPTY); map.put(0, "bbb为空"); flag = false; } try { String icdName = data.get(1); if (StringUtils.isBlank(icdName)) { temDisDto.setIcdName(StringUtils.EMPTY); map.put(1, "为空"); flag = false; } else { temDisDto.setIcdName(icdName); } } catch (Exception e) { temDisDto.setIcdName(StringUtils.EMPTY); map.put(1, "ccc称为空"); flag = false; } try { String icdCode = data.get(2); if (StringUtils.isBlank(icdCode)) { temDisDto.setIcdCode(StringUtils.EMPTY); map.put(2, "ddd为空"); flag = false; } else { temDisDto.setIcdCode(icdCode); } } catch (Exception e) { temDisDto.setIcdCode(StringUtils.EMPTY); map.put(2, "ddd为空"); flag = false; } try { if (!DiseaseCategory.TCM_SYNDROME.getDesc().equals(temDisDto.getDiseaseCategoryStr())) { String standardIcdName = data.get(3); if (isCfgPrd && StringUtils.isBlank(standardIcdName)) { temDisDto.setStandardIcdName(StringUtils.EMPTY); map.put(3, "vvv为空"); flag = false; } else { temDisDto.setStandardIcdName(standardIcdName); } } } catch (Exception e) { temDisDto.setStandardIcdName(StringUtils.EMPTY); map.put(3, "vvv为空"); flag = false; } try { if (!DiseaseCategory.TCM_SYNDROME.getDesc().equals(temDisDto.getDiseaseCategoryStr())) { String standardIcdCode = data.get(4); if (isCfgPrd && StringUtils.isBlank(standardIcdCode)) { temDisDto.setStandardIcdCode(StringUtils.EMPTY); map.put(4, "eee为空"); flag = false; } else { temDisDto.setStandardIcdCode(standardIcdCode); } } } catch (Exception e) { temDisDto.setStandardIcdCode(StringUtils.EMPTY); map.put(4, "eee为空"); flag = false; } temDisDto.setErrorReasonMap(map); // 如果flag为 false 说明数据有问题 if (!flag) { tempErrorList.add(temDisDto); } else { successList.add(temDisDto); } } private void checkErrorExcelList(List<OltHosIcdDiseaseDto> errorList, List<String> icdCodeList) { if (CollectionUtils.isNotEmpty(errorList)) { // 错误就往里加, 正确重新定义列表 errorList.forEach(e -> { Map<Integer, String> map = new HashMap<>(); if (!DiseaseCategory.belongTo(e.getDiseaseCategoryStr())) { map.put(0, "aaa不正确"); } else { e.setDiseaseCategory(DiseaseCategory.getCodeByDesc(e.getDiseaseCategoryStr())); } // excel是否存在重复数据 if (checkRepeatCode.contains(e.getIcdCode())) { map.put(2, "bbb重复"); } if (CollectionUtils.isNotEmpty(icdCodeList) && icdCodeList.contains(e.getIcdCode())) { map.put(2, "ttt重复"); } if (e.getErrorReasonMap() != null && !e.getErrorReasonMap().isEmpty()) { Map<Integer, String> errorReasonMap = e.getErrorReasonMap(); errorReasonMap.putAll(map); e.setErrorReasonMap(errorReasonMap); } errorCount++; }); } } /** * 侵入式给errorList赋值 * @param list * @param errorList * @param icdCodeList */ private void checkSuccessExcelList(List<OltHosIcdDiseaseDto> list, List<OltHosIcdDiseaseDto> errorList, List<String> icdCodeList) { List<OltHosIcdDiseaseDto> newList = new ArrayList<>(); if (CollectionUtils.isNotEmpty(list)) { // 错误就往里加, 正确重新定义列表 list.forEach(e -> { Map<Integer, String> map = new HashMap<>(); boolean flag = false; // 判 if (!DiseaseCategory.belongTo(e.getDiseaseCategoryStr())) { map.put(0, "不正确"); flag = true; } else { e.setDiseaseCategory(DiseaseCategory.getCodeByDesc(e.getDiseaseCategoryStr())); } // excel是否存在重复数据 if (checkRepeatCode.contains(e.getIcdCode())) { map.put(2, "重复"); flag = true; } else { // 判断诊断编码 if (CollectionUtils.isNotEmpty(icdCodeList) && icdCodeList.contains(e.getIcdCode())) { map.put(2, "重复"); flag = true; } } e.setErrorReasonMap(map); if (flag) { errorCount++; errorList.add(e); } else { e.setIcdPinyin(HzUtils.getPinyinCap(e.getIcdName(), HzUtils.CaseType.UPPERCASE)); e.setIcdWb(HzUtils.getWbCap(e.getIcdName(), HzUtils.CaseType.UPPERCASE)); newList.add(e); checkRepeatCode.add(e.getIcdCode()); successCount++; } }); } // 正确数据入库 if (CollectionUtils.isNotEmpty(newList)) { oltConfigService.batchAddHosIcdDisease(delIcdCodeList, newList); } }
其中,导入错误数据用了easyExcel的模版填充方式, 模版存于oss上
/** * @author songhc * @create * @desc 导出错误数据 **/ @Async public void exportIcdErrorDiseaseData(OltErrorResult dto, String fileBatch, String fileName, String fileUrl) { Map<Integer, List> map = new HashMap<>(); map.put(0, dto.getErrorList()); Map<Integer, Map<Integer, List<Integer>>> styleMap = new HashMap<>(); styleMap.put(0, dto.getMap()); ExportExistHandler.exportExistTemplateData(map, styleMap, fileBatch, fileName, fileUrl); }
接下来就是填充错误模版的实现
/** * @param errorMap key为sheetNo, value为填充的数据 * @param styleMap key为sheetNo, value为错误数据坐标 * @param fileBatch 批次号 * @param fileName 文件名 * @param fileUrl 文件路径 * @description 导出服务封装方法(无需分页查询, 数据为动态传入) * @className exportNoModelData */ public static void exportExistTemplateData(Map<Integer, List> errorMap, Map<Integer, Map<Integer, List<Integer>>> styleMap, String fileBatch, String fileName, String fileUrl) { String ossFileName = fileName.substring(0, fileName.lastIndexOf('.')) .concat("-").concat(LocalDateTime.now() .format(DateTimeFormatter.ofPattern("yyyyMMddHHmmss"))).concat(fileName.substring(fileName.lastIndexOf('.'))); InputStream inputStream = HttpUtil.io(HttpUtil.Atom.builder().url(fileUrl).build()); if (null == inputStream) { return; } String localFileName = String.format(TaskNoteHandler.staticExportConfig.getExportPath(), ossFileName); ExcelWriter excelWriter = null; int resultCount = 0; try { if (errorMap != null && !errorMap.isEmpty()) { excelWriter = EasyExcel.write(localFileName) .withTemplate(inputStream) .build(); // for循环是一个excel可能有多个sheet的兼容写法 for (Integer i: errorMap.keySet()) { // 这里使用easyExcel的 registerWriteHandler 方法, 自定义CellColorSheetWriteHandler实现, 给每一个单元格填充颜色 WriteSheet writeSheet = EasyExcel.writerSheet(i).registerWriteHandler(new CellColorSheetWriteHandler(styleMap.get(i), IndexedColors.RED1.getIndex())).build(); excelWriter.fill(errorMap.get(i), writeSheet); } } } catch (Exception e){ LoggerUtil.error(LOGGER, "文件写入异常,error{0}", e); // 文件导出失败 TaskNoteHandler.doUploadFailed(fileBatch, resultCount); return; } finally { // 关闭流 if (excelWriter != null) { excelWriter.finish(); } } // 1、上传文件(多种方案);2、更新记录 TaskNoteHandler.doUploadAndNote(fileBatch, ossFileName, localFileName, resultCount); }
/** * @description 自定义单元格格式拦截器 * @className CellColorSheetWriteHandler * @package * @Author songhc */ public class CellColorSheetWriteHandler implements CellWriteHandler { /** * map * key:第i行 * value:第i行中单元格索引集合 */ private Map<Integer, List<Integer>> map; /** * 颜色 */ private Short colorIndex; /** * 有参构造 */ public CellColorSheetWriteHandler(Map<Integer, List<Integer>> map, Short colorIndex) { this.map = map; this.colorIndex = colorIndex; } /** * 无参构造 */ public CellColorSheetWriteHandler() { } @Override public void beforeCellCreate(WriteSheetHolder writeSheetHolder, WriteTableHolder writeTableHolder, Row row, Head head, Integer integer, Integer integer1, Boolean aBoolean) { } /** * 在单元格创建后调用 */ @Override public void afterCellCreate(WriteSheetHolder writeSheetHolder, WriteTableHolder writeTableHolder, Cell cell, Head head, Integer integer, Boolean aBoolean) { } @Override public void afterCellDataConverted(WriteSheetHolder writeSheetHolder, WriteTableHolder writeTableHolder, CellData cellData, Cell cell, Head head, Integer integer, Boolean aBoolean) { } /** * 在单元上的所有操作完成后调用 */ @Override public void afterCellDispose(WriteSheetHolder writeSheetHolder, WriteTableHolder writeTableHolder, List<CellData> cellDataList, Cell cell, Head head, Integer relativeRowIndex, Boolean isHead) { //当前行的第i列 int i = cell.getColumnIndex(); // 根据单元格获取workbook Workbook workbook = cell.getSheet().getWorkbook(); //不处理第一行 if (0 != cell.getRowIndex()) { List<Integer> integerList = map.get(cell.getRowIndex()); // 自定义单元格样式 if (CollectionUtils.isNotEmpty(integerList)) { if (integerList.contains(i)) { // 单元格策略 WriteCellStyle contentWriteCellStyle = new WriteCellStyle(); // 设置背景颜色白色 contentWriteCellStyle.setFillForegroundColor(colorIndex); // 设置垂直居中为居中对齐 contentWriteCellStyle.setVerticalAlignment(VerticalAlignment.CENTER); // 设置左右对齐为中央对齐 contentWriteCellStyle.setHorizontalAlignment(HorizontalAlignment.RIGHT); // 设置单元格上下左右边框为细边框 contentWriteCellStyle.setBorderBottom(BorderStyle.THIN); contentWriteCellStyle.setBorderLeft(BorderStyle.THIN); contentWriteCellStyle.setBorderRight(BorderStyle.THIN); contentWriteCellStyle.setBorderTop(BorderStyle.THIN); // 创建字体实例 WriteFont cellWriteFont = new WriteFont(); // 设置字体大小 cellWriteFont.setFontName("宋体"); cellWriteFont.setFontHeightInPoints((short) 10); //设置字体颜色 // cellWriteFont.setColor(IndexedColors.BLACK1.getIndex()); //单元格颜色 //contentWriteCellStyle.setFillForegroundColor(IndexedColors.GREY_25_PERCENT.getIndex()); contentWriteCellStyle.setWriteFont(cellWriteFont); CellStyle cellStyle = StyleUtil.buildHeadCellStyle(workbook, contentWriteCellStyle); //设置当前行第i列的样式 cell.getRow().getCell(i).setCellStyle(cellStyle); } } } } }
对于一个excel多sheet, 操作也是一样
// 不同的是这里可以定义多个监听器 // readSheet(0) —-> 这里的数据代表sheet的位置 OltDrugFrequencyListener oltDrugFrequencyListener = new OltDrugFrequencyListener(isCfgPrd, dfCodeList, frequencyDto, oltConfigService); OltDrugUsageListener oltDrugUsageListener = new OltDrugUsageListener(isCfgPrd, dUCodeList, OltDrugUsageDto.builder().hosId(frequencyDto.getHosId()).build(), oltConfigService); OltDrugDurationListener oltDrugDurationListener = new OltDrugDurationListener(durationCodeList, OltDrugDurationDefDto.builder().hosId(frequencyDto.getHosId()).build(), oltConfigService); ReadSheet readSheet = EasyExcel.readSheet(0).registerReadListener(oltDrugFrequencyListener).build(); ReadSheet readSheet2 = EasyExcel.readSheet(2).registerReadListener(oltDrugUsageListener).build(); ReadSheet readSheet4 = EasyExcel.readSheet(4).registerReadListener(oltDrugDurationListener).build(); excelReader.read(readSheet, readSheet2, readSheet4);
经过测试, 该方法导入2W条数据差不多需要10秒, 也不会影响内存