PDFBox 解析PDF文件-解析服务器文件

1.首先引进pom

<!-- PDF读取依赖 -->
<dependency>
<groupId>org.apache.pdfbox</groupId>
<artifactId>pdfbox</artifactId>
<version>2.0.4</version>
</dependency>

 

2.controller层直接代码

/**
* PDF解析
* @return
*/
@PostMapping("/getPdf")
public StringBuffer getPdf(@RequestBody JSONObject jsonObject) throws IOException {
StringBuffer stringBuffer = null;

//获取服务器地址
ImportParams params = new ImportParams();
params.setSaveUrl("/file");
String filePath = jsonObject.getString("filePath");
filePath = fileServer + "/" + filePath;
URL url = new URL(filePath);
URLConnection connection = url.openConnection();
InputStream inputStream = connection.getInputStream();
try {
PDDocument document;
PDFParser parser = new PDFParser(new RandomAccessBuffer(inputStream));
parser.parse();
document = parser.getPDDocument();
document.getClass();
if(!document.isEncrypted()) {
PDFTextStripperByArea stripper = new PDFTextStripperByArea();
stripper.setSortByPosition(true);
PDFTextStripper textStripper = new PDFTextStripper();
String exposeContent = textStripper.getText(document);
String[] content = exposeContent.split("\\n");
stringBuffer = new StringBuffer();
for(String line:content) {
stringBuffer.append(line);
}
}

} catch (Exception e) {
e.printStackTrace();

}
return stringBuffer;
}
posted @ 2020-03-06 16:51  new海  阅读(1179)  评论(0编辑  收藏  举报