POI处理excel数据量大文件工具类

POI处理excel数据量大文件工具类(单个)

maven依赖

<dependency>
    <groupId>org.apache.poi</groupId>
    <artifactId>poi-ooxml</artifactId>
    <version>3.17</version>
</dependency>
<dependency>
    <groupId>org.apache.commons</groupId>
    <artifactId>commons-lang3</artifactId>
    <version>3.7</version>
</dependency>
<dependency>
    <groupId>com.google.guava</groupId>
    <artifactId>guava</artifactId>
    <version>25.1-jre</version>
</dependency>

ExcelUtils

package com.test.util;

import com.google.common.collect.Lists;
import com.test.entity.WayInfo;
import org.apache.commons.lang3.StringUtils;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
public class ExcelUtils {
    public ExcelUtils() {
    }
    /**
     * 获取excel的所有数据<br/>
     * 所有数据类型都是String<br/>
     * 会以第一行数据的列数为总列数,所以第一行的数据必须都不为空,否则可能出java.lang.IndexOutOfBoundsException
     * @param filePath 文件路径
     * @return
     */
    public static List<List<String>> getAllData(String filePath) {
        if (StringUtils.isBlank(filePath)) {
            throw new IllegalArgumentException("传入文件路径不能为空");
        }
        try {
            return LargeExcelFileReadUtil.getRowsFromSheetOne(filePath);
        } catch (Exception e) {
            // LOGGER.info("获取excel[" + filePath + "]表头失败,原因:", e);
            e.printStackTrace();
        }
        return Lists.newArrayList();
    }

    /**
     * 获取excel标题头部 title
     * @param excelTitle excel List0的list
     * @return title
     */
    public static Map<String,Integer> getExcelTitle(List<String> excelTitle){
        HashMap hashMap = new HashMap();
        for (int j = 0; j < excelTitle.size(); j++){
            hashMap.put(excelTitle.get(j),j);
        }
        return hashMap;
    }



    public static void main(String[] args) {
        long start = System.currentTimeMillis();
        String filepath = "D:\\a_war\\execl.xlsx";
        List<List<String>> result = ExcelUtils.getAllData(filepath);
        long end = System.currentTimeMillis();
        Map<String, Integer> excelTitle = getExcelTitle(result.get(0));
        System.out.println("解析时间:"+(end - start) / 1000+"秒。");
        //根据下标标题下标获取该行下标内容。
        for (int i = 1; i < result.size(); i++){
            List<String> row = result.get(i);
            WayInfo wayInfo = new WayInfo();
            wayInfo.setWayStartTime(row.get(excelTitle.get("运单提货日期")));
            wayInfo.setWaySourceAddr(row.get(excelTitle.get("起运地")));
            wayInfo.setWayDestAddr(row.get(excelTitle.get("目的地")));
        }
    }


}

ExcelClass类

package utils;
public class ExcelClass {
    private String userName;//名称
    private String sex;//性别
    private String phone;//手机
    //省略get,set方法...
}

LargeExcelFileReadUtil

package utils;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.xssf.eventusermodel.XSSFReader;
import org.apache.poi.xssf.model.SharedStringsTable;
import org.xml.sax.InputSource;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.XMLReaderFactory;
import java.io.InputStream;
import java.util.List;
import java.util.Objects;
public class LargeExcelFileReadUtil {
    // 处理一个sheet
    public static List<List<String>> getRowsFromSheetOne(String filename) throws Exception {
        InputStream inputStream = null;
        OPCPackage pkg = null;
        MultiRowHandler multiRowHandler = null;
        try {
            pkg = OPCPackage.open(filename);
            XSSFReader r = new XSSFReader(pkg);
            SharedStringsTable sst = r.getSharedStringsTable();
            multiRowHandler = new MultiRowHandler(sst);
            XMLReader parser = XMLReaderFactory.createXMLReader("com.sun.org.apache.xerces.internal.parsers.SAXParser");
            parser.setContentHandler(multiRowHandler);
            inputStream = r.getSheet("rId1");
            InputSource sheetSource = new InputSource(inputStream);
            parser.parse(sheetSource);
            return multiRowHandler.getRows();
        } catch (Exception e){
            throw e;
        } finally {
            if (Objects.nonNull(pkg)){
                pkg.close();
            }
            if (Objects.nonNull(inputStream)) {
                inputStream.close();
            }
        }
    }
}

MultiRowHandler

package utils;
import org.apache.commons.lang3.StringUtils;
import org.apache.poi.xssf.model.SharedStringsTable;
import org.apache.poi.xssf.usermodel.XSSFRichTextString;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;
import java.util.ArrayList;
import java.util.List;
import java.util.Objects;
import java.util.regex.Pattern;
/**
 * 获取完整excel数据的handler<br/>
 * @author Administrator
 */
public class MultiRowHandler extends DefaultHandler {
    private int curRowNum = 0;// 行号,从1开始
    private int curColIndex = -1;// 列索引,从0开始
    private int colCnt = 0;// 列数,取第一行列数做为列总数
    private String cellType = "";
    private SharedStringsTable sst;
    private String lastContents;
    private boolean nextIsString;
    private String cellPosition;
    private List<String> head = null;
    private List<String> curRowData = null;
    private boolean curRowIsBlank = true;// 当前是个空行
    private List<List<String>> rows = new ArrayList<>();
    public List<List<String>> getRows() {
        return rows;
    }
    public MultiRowHandler(SharedStringsTable sst) {
        this.sst = sst;
    }
    @Override
    public void startElement(String uri, String localName, String name, Attributes attributes) throws SAXException {
        if (name.equals("c")) {
            cellPosition = attributes.getValue("r");
            curColIndex = getColIndex(cellPosition);
            // 这是一个新行
            if (isNewRow(cellPosition)) {
                curRowNum = getRowNum(cellPosition);
                if (2 == curRowNum && Objects.nonNull(curRowData)) {
                    head = curRowData;
                    colCnt = head.size();
                }
                curRowData = getBlankRow(colCnt);
            }
            cellType = "";
            cellType = attributes.getValue("t");
            if ("s".equals(cellType)) {
                nextIsString = true;
            } else {
                nextIsString = false;
            }
        }
        // 清楚缓存内容
        lastContents = "";
    }

    private boolean isNewRow(String cellPosition) {
        // 坐标以A开头,后面跟数字 或者坐标行和当前行不一致的
        boolean newRow = Pattern.compile("^A[0-9]+$").matcher(cellPosition).find();
        if (!newRow) {
            int cellRowNum = getRowNum(cellPosition);
            newRow = (cellRowNum != curRowNum);
        }
        return newRow;
    }

    /**
     * 根据列坐标获取行号,从1开始,返回0时标示出错
     *
     * @param cellPosition
     *      列坐标,为A1,B23等
     * @return 行号,从1开始,返回0是为失败
     */
    private static int getRowNum(String cellPosition) {
        String strVal = Pattern.compile("[^0-9]").matcher(cellPosition).replaceAll("").trim();// 获取坐标中的数字
        if (StringUtils.isNotBlank(strVal)) {
            return Integer.valueOf(strVal);
        }
        return 0;
    }

    /**
     * 根据列坐标返回当前列索引,从0开始,返回-1时标示出错<br/>
     * A1->0; B1->1...AA1->26
     *
     * @param cellPosition
     *      列坐标,为A1,B23等
     * @return 列索引,从0开始,返回-1是为失败,A1->0; B1->1...AA1->26
     */
    private static int getColIndex(String cellPosition) {
        int index = -1;
        int num = 65;// A的Unicode码
        int length = cellPosition.length();
        for (int i = 0; i < length; i++) {
            char c = cellPosition.charAt(i);
            if (Character.isDigit(c)) {
                break;// 确定指定的char值是否为数字
            }
            index = (index + 1) * 26 + (int) c - num;
        }
        return index;
    }

    /**
     * 返回一个全部为空字符串的空行
     *
     * @param cnt
     * @return
     */
    private List<String> getBlankRow(int cnt) {
        List<String> result = new ArrayList<>(cnt);
        for (int i = 0; i < cnt; i++) {
            result.add(i, "");
        }
        curRowIsBlank = true;
        return result;
    }

    @Override
    public void endElement(String uri, String localName, String name) throws SAXException {
        if (nextIsString) {
            int idx = Integer.parseInt(lastContents);
            lastContents = new XSSFRichTextString(sst.getEntryAt(idx)).toString();
            nextIsString = false;
        }

        if (name.equals("v")) {
            // System.out.println(MessageFormat.format("当前列定位:{0},当前行:{1},当前列:{2},当前值:{3}",
            // cellPosition, curRowNum,
            // curColIndex, lastContents));
            if (Objects.isNull(head)) {
                curRowData.add(lastContents);
            } else {
                curRowData.set(curColIndex, lastContents);
            }
            curRowIsBlank = false;
            // 这是一个新行
            if (isNewRow(cellPosition)) {
                if (Objects.nonNull(curRowData)) {
                    if (curRowIsBlank) {
                        curRowData.clear();// 如果当前行是空行,则清空当前行数据
                    }
                    rows.add(curRowData);
                }
            }

        }
    }

    @Override
    public void endDocument() throws SAXException {
        if (Objects.nonNull(curRowData) && !curRowIsBlank) {
            rows.add(curRowData);// 最后一行在上面不好加入,最后一行全是空行的不加入
        }
        super.endDocument();
    }

    @Override
    public void characters(char[] ch, int start, int length) throws SAXException {
        lastContents += new String(ch, start, length);
    }

    @Override
    public void ignorableWhitespace(char[] ch, int start, int length) throws SAXException {
        lastContents += "";
    }

    public static void main(String[] args) {
        System.out.println(getColIndex("BC2"));
    }
}
posted @   渝思  阅读(371)  评论(0编辑  收藏  举报
(评论功能已被禁用)
编辑推荐:
· Linux系列:如何用heaptrack跟踪.NET程序的非托管内存泄露
· 开发者必知的日志记录最佳实践
· SQL Server 2025 AI相关能力初探
· Linux系列:如何用 C#调用 C方法造成内存泄露
· AI与.NET技术实操系列(二):开始使用ML.NET
阅读排行:
· 无需6万激活码!GitHub神秘组织3小时极速复刻Manus,手把手教你使用OpenManus搭建本
· C#/.NET/.NET Core优秀项目和框架2025年2月简报
· Manus爆火,是硬核还是营销?
· 终于写完轮子一部分:tcp代理 了,记录一下
· 【杭电多校比赛记录】2025“钉耙编程”中国大学生算法设计春季联赛(1)
点击右上角即可分享
微信分享提示