pdfBox 解析 pdf文件
Spting boot 项目
1.添加依赖
<dependency> <groupId>org.apache.pdfbox</groupId> <artifactId>pdfbox</artifactId> <version>2.0.16</version> </dependency>
2.PDFTest.java
package com.nenu.pdf; import org.apache.pdfbox.io.RandomAccessFile; import org.apache.pdfbox.pdfparser.PDFParser; import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.text.PDFTextStripper; import java.io.File; import java.io.FileNotFoundException; import java.io.IOException; public class PDFTest { public static String getTextFromPDF(String pdfFilePath) { String result = null; PDDocument document = null; File file = new File(pdfFilePath); try { PDFParser parser = new PDFParser(new RandomAccessFile(file,"rw")); parser.parse(); document = parser.getPDDocument(); PDFTextStripper stripper = new PDFTextStripper(); result = stripper.getText(document); } catch (FileNotFoundException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } finally { if (document != null) { try { document.close(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } } } return result; } public static void main(String[] args) { String str=PDFTest.getTextFromPDF("D:\\pdf.pdf"); System.out.println(str); } }