PDF解析返回一个对象

1.首先引入Maven jar包
<dependency>
<groupId> e-iceblue </groupId>
<artifactId>spire.pdf</artifactId>
<version>2.2.0</version>
</dependency>

<repository>
<id>com.e-iceblue</id>
<name>e-iceblue</name>
<url>http://repo.e-iceblue.com/nexus/content/groups/public/</url>
</repository>



2.直接在COontroller层里面开工
import com.spire.pdf.PdfDocument;
import com.spire.pdf.PdfPageBase;


@PostMapping("/getPdf")
public StringBuilder getPdf() {
StringBuilder stringBuilder = null;
//因为项目需要,所以先把文件上传到 fastdns服务器 服务器返回一个地址后端下载 然后拿着这个地址里面的文件进行解析 一般不需要
ImportParams params = new ImportParams();
params.setSaveUrl("/file");
String filePath="http://dev.file.aq-100.com:80/group1/M00/00/04/wKgCt15fCcuAEXm9AABV0XpTz3g800.pdf";
OkHttpUtil.download(filePath,"D://","test.pdf");
/*InputStream in = new FileInputStream(new File("D://test.pdf"));*/
try {
PdfDocument doc = new PdfDocument();
doc.loadFromFile("D://test.pdf");
stringBuilder = new StringBuilder();
PdfPageBase page;
for (int i = 0; i < doc.getPages().getCount(); i++) {
//获取每一行的page对象
page = doc.getPages().get(i);
stringBuilder.append(page.extractText(true));
//这个if中是图片的解析输出
if (page.extractImages() != null) {
for (BufferedImage image : page.extractImages()) {
if (image != null) {
//指定输出图片名,指定图片格式,后缀自己换
File output = new File(String.valueOf("D://test.pdf"));
ImageIO.write(image, "PNG", output);
}
}
}
}
System.out.println(stringBuilder);
doc.close();
} catch (Exception e) {
e.printStackTrace();
}
return stringBuilder;
}
posted @ 2020-03-04 10:28  new海  阅读(418)  评论(0编辑  收藏  举报