解析csv、 pdf文件
/** * 解析csv文件 到一个list中 * 每个单元个为一个String类型记录,每一行为一个list。 * 再将所有的行放到一个总list中 * * @return * @throws IOException */ public static List<List<String>> importCsv(MultipartFile file) { List<List<String>> dataList = new ArrayList<>(); BufferedReader brReader = null; InputStreamReader inReader = null; try { inReader = new InputStreamReader(file.getInputStream()); brReader = new BufferedReader(inReader); String rec = null;//一行 String str;//一个单元格 while ((rec = brReader.readLine()) != null) { Pattern pCells = Pattern.compile("(\"[^\"]*(\"{2})*[^\"]*\")*[^,]*,"); Matcher mCells = pCells.matcher(rec); List<String> cells = new ArrayList<>(); //每行记录一个list //读取每个单元格 while (mCells.find()) { str = mCells.group(); str = str.replaceAll("(?sm)\"?([^\"]*(\"{2})*[^\"]*)\"?.*,", "$1"); str = str.replaceAll("(?sm)(\"(\"))", "$2"); cells.add(str); } dataList.add(cells); } } catch (Exception e) { } finally { if (brReader != null) { try { brReader.close(); } catch (IOException e) { e.printStackTrace(); } } if (inReader != null) { try { inReader.close(); } catch (IOException e) { e.printStackTrace(); } } } return dataList; }
解析pdf文件
需要的jar包,配置到maven <dependency> <groupId>org.apache.pdfbox</groupId> <artifactId>pdfbox</artifactId> <version>2.0.6</version> </dependency> //demo public static void main(String[] args) { try (PDDocument document = PDDocument.load(new File("pdf文件路径"))) { document.getClass(); if(!document.isEncrypted()) { PDFTextStripperByArea stripper = new PDFTextStripperByArea(); stripper.setSortByPosition(true); PDFTextStripper tStripper = new PDFTextStripper(); String pdfFileInText = tStripper.getText(document); String[] lines = pdfFileInText.split("\\r?\\n"); for(String line : lines) { System.out.println(line); } } } catch (InvalidPasswordException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } }