PDFBox 解析PDF文件-解析服务器文件

1.首先引进pom

<dependency>
<groupId>org.apache.pdfbox</groupId>
<artifactId>pdfbox</artifactId>
<version>2.0.4</version>
</dependency>

2.controller层直接代码

/**
 * PDF解析
 * @return
 */
@PostMapping("/getPdf")
public StringBuffer getPdf(@RequestBody JSONObject jsonObject) throws IOException {
    StringBuffer stringBuffer = null;

  //获取服务器地址
    ImportParams params = new ImportParams();
    params.setSaveUrl("/file");
    String filePath = jsonObject.getString("filePath");
    filePath = fileServer + "/" + filePath;
    URL url = new URL(filePath);
    URLConnection connection = url.openConnection();
    InputStream inputStream = connection.getInputStream();
    try {
        PDDocument document;
        PDFParser parser = new PDFParser(new RandomAccessBuffer(inputStream));
        parser.parse();
        document = parser.getPDDocument();
        document.getClass();
        if(!document.isEncrypted()) {
            PDFTextStripperByArea stripper = new PDFTextStripperByArea();
            stripper.setSortByPosition(true);
            PDFTextStripper textStripper = new PDFTextStripper();
            String exposeContent = textStripper.getText(document);
            String[] content = exposeContent.split("\\n");
             stringBuffer = new StringBuffer();
            for(String line:content) {
                stringBuffer.append(line);
            }
        }

    } catch (Exception e) {
        e.printStackTrace();

    }
    return stringBuffer;
}

posted @ 2020-03-06 16:51 new海阅读(1232) 评论(0) 收藏举报

刷新页面返回顶部

new海

PDFBox 解析PDF文件-解析服务器文件

公告