解析csv、 pdf文件

/**
 * 解析csv文件 到一个list中
 * 每个单元个为一个String类型记录,每一行为一个list。
 * 再将所有的行放到一个总list中
 *
 * @return
 * @throws IOException
 */
public static List<List<String>> importCsv(MultipartFile file) {
    List<List<String>> dataList = new ArrayList<>();
    BufferedReader brReader = null;
    InputStreamReader inReader = null;
    try {
        inReader = new InputStreamReader(file.getInputStream());
        brReader = new BufferedReader(inReader);
        String rec = null;//一行
        String str;//一个单元格
        while ((rec = brReader.readLine()) != null) {
            Pattern pCells = Pattern.compile("(\"[^\"]*(\"{2})*[^\"]*\")*[^,]*,");
            Matcher mCells = pCells.matcher(rec);
            List<String> cells = new ArrayList<>(); //每行记录一个list
            //读取每个单元格
            while (mCells.find()) {
                str = mCells.group();
                str = str.replaceAll("(?sm)\"?([^\"]*(\"{2})*[^\"]*)\"?.*,", "$1");
                str = str.replaceAll("(?sm)(\"(\"))", "$2");
                cells.add(str);
            }
            dataList.add(cells);
        }
    } catch (Exception e) {
    } finally {
        if (brReader != null) {
            try {
                brReader.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
        if (inReader != null) {
            try {
                inReader.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
    }
    return dataList;
}

解析pdf文件
需要的jar包,配置到maven <dependency> <groupId>org.apache.pdfbox</groupId> <artifactId>pdfbox</artifactId> <version>2.0.6</version> </dependency> //demo public static void main(String[] args) { try (PDDocument document = PDDocument.load(new File("pdf文件路径"))) { document.getClass(); if(!document.isEncrypted()) { PDFTextStripperByArea stripper = new PDFTextStripperByArea(); stripper.setSortByPosition(true); PDFTextStripper tStripper = new PDFTextStripper(); String pdfFileInText = tStripper.getText(document); String[] lines = pdfFileInText.split("\\r?\\n"); for(String line : lines) { System.out.println(line); } } } catch (InvalidPasswordException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } }  

  

posted @ 2018-11-09 13:40  令狐る侠  阅读(433)  评论(0编辑  收藏  举报