java 读取pdf、word、Excel文件
用到的jar:
itextpdf-5.5.8.jar (PDF)
poi.jar
public class FileUtils { /** * 判断文件是否存在 * * @Title: isExcite * @param @param filePath * @param @return * @return boolean 返回类型 * @throws */ public static boolean isExcite(String filePath) { File file = new File(filePath); // 如果文件夹不存在则创建 if (!file.exists() && !file.isDirectory()) { return false; } else { return true; } } /** * * @Title: getPdfFileText * @Description: 获取指定位置pdf的文件内容 * @param @param fileName * @param @return * @param @throws IOException * @return String 返回类型 * @throws */ public static String getPdfFileText(String fileName) throws IOException { PdfReader reader = new PdfReader(fileName); PdfReaderContentParser parser = new PdfReaderContentParser(reader); StringBuffer buff = new StringBuffer(); TextExtractionStrategy strategy; for (int i = 1; i <= reader.getNumberOfPages(); i++) { strategy = parser.processContent(i, new SimpleTextExtractionStrategy()); buff.append(strategy.getResultantText()); } return buff.toString(); } /** * 获取doc文档 * * @Title: getTextFromWord * @param @param filePath * @param @return * @return String 返回类型 * @throws */ public static String getTextFromWord(String filePath) { String result = null; File file = new File(filePath); try { FileInputStream fis = new FileInputStream(file); WordExtractor wordExtractor = new WordExtractor(fis); result = wordExtractor.getText(); } catch (FileNotFoundException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } return result; } /** * 读取excel内容 * * @Title: getTextFromExcel * @param @param filePath * @param @return * @return String 返回类型 * @throws */ public static String getTextFromExcel(String filePath) { StringBuffer buff = new StringBuffer(); try { // 创建对Excel工作簿文件的引用 HSSFWorkbook wb = new HSSFWorkbook(new FileInputStream(filePath)); // 创建对工作表的引用。 for (int numSheets = 0; numSheets < wb.getNumberOfSheets(); numSheets++) { if (null != wb.getSheetAt(numSheets)) { HSSFSheet aSheet = wb.getSheetAt(numSheets);// 获得一个sheet for (int rowNumOfSheet = 0; rowNumOfSheet <= aSheet .getLastRowNum(); rowNumOfSheet++) { if (null != aSheet.getRow(rowNumOfSheet)) { HSSFRow aRow = aSheet.getRow(rowNumOfSheet); // 获得一个行 for (int cellNumOfRow = 0; cellNumOfRow <= aRow .getLastCellNum(); cellNumOfRow++) { if (null != aRow.getCell(cellNumOfRow)) { HSSFCell aCell = aRow.getCell(cellNumOfRow);// 获得列值 switch (aCell.getCellType()) { case HSSFCell.CELL_TYPE_FORMULA: break; case HSSFCell.CELL_TYPE_NUMERIC: buff .append( aCell .getNumericCellValue()) .append('\t'); break; case HSSFCell.CELL_TYPE_STRING: buff.append(aCell.getStringCellValue()) .append('\t'); break; } } } buff.append('\n'); } } } } } catch (FileNotFoundException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } return buff.toString(); } /** * 替换文件内容 * @Title: replaceContentToFile * @param @param path 文件路径 * @param @param str 要替换的内容 * @param @param con 替换称的内容 * @return void 返回类型 * @throws */ public static void replaceContentToFile(String path, String str, String con) { try { if (isExcite(path)) { FileReader read = new FileReader(path); BufferedReader br = new BufferedReader(read); StringBuilder content = new StringBuilder(); while (br.ready() != false) { content.append(br.readLine()); content.append("\r\n"); } int dex = content.indexOf(str); if (dex != -1) { System.out.println("找到标记!"); } else { System.out.println("指定标记不存在!"); } content.replace(dex, dex, con); br.close(); read.close(); FileOutputStream fs = new FileOutputStream(path); fs.write(content.toString().getBytes()); fs.close(); } else { System.out.println("文件不存在!"); } } catch (FileNotFoundException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } } }
留着以后直接拿过来用。