java实现截取PDF指定页并进行图片格式转换
1、引入依赖
<dependency> <groupId>org.apache.pdfbox</groupId> <artifactId>pdfbox</artifactId> <version>2.0.16</version> </dependency> <dependency> <groupId>org.apache.pdfbox</groupId> <artifactId>fontbox</artifactId> <version>2.0.16</version> </dependency>
jar包下载地址:
https://mvnrepository.com/artifact/org.apache.pdfbox/pdfbox
https://mvnrepository.com/artifact/org.apache.pdfbox/fontbox
2、实现DEMO
package com.dddpeter.app; import org.apache.pdfbox.multipdf.Splitter; import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.rendering.PDFRenderer; import javax.imageio.ImageIO; import javax.imageio.stream.ImageOutputStream; import java.awt.image.BufferedImage; import java.io.*; import java.util.List; import java.util.ListIterator; public class PDFUtils { public static String splitPdf(int pageNum, String source, String dest) { File indexFile = new File(source); File outFile = new File(dest); PDDocument document = null; try { document = PDDocument.load(indexFile); // document.getNumberOfPages(); Splitter splitter = new Splitter(); splitter.setStartPage(pageNum); splitter.setEndPage(pageNum); List<PDDocument> pages = splitter.split(document); ListIterator<PDDocument> iterator = pages.listIterator(); while (iterator.hasNext()) { PDDocument pd = iterator.next(); if (outFile.exists()) { outFile.delete(); } pd.save(outFile); pd.close(); if (outFile.exists()) { return outFile.getPath(); } } document.close(); } catch (IOException e) { e.printStackTrace(); } catch (Exception e) { e.printStackTrace(); } return null; } public static void pdfFileToImage(File pdffile,String targetPath){ try { FileInputStream instream = new FileInputStream(pdffile); InputStream byteInputStream=null; try { PDDocument doc = PDDocument.load(instream); PDFRenderer renderer = new PDFRenderer(doc); int pageCount = doc.getNumberOfPages(); if (pageCount > 0) { BufferedImage image = renderer.renderImage(0, 4.0f); image.flush(); ByteArrayOutputStream bs = new ByteArrayOutputStream(); ImageOutputStream imOut; imOut = ImageIO.createImageOutputStream(bs); ImageIO.write(image, "png", imOut); byteInputStream = new ByteArrayInputStream(bs.toByteArray()); byteInputStream.close(); } doc.close(); } catch (IOException e) { e.printStackTrace(); } File uploadFile = new File(targetPath); FileOutputStream fops; fops = new FileOutputStream(uploadFile); fops.write(readInputStream(byteInputStream)); fops.flush(); fops.close(); } catch (Exception e) { e.printStackTrace(); } } public static byte[] readInputStream(InputStream inStream) throws Exception { ByteArrayOutputStream outStream = new ByteArrayOutputStream(); byte[] buffer = new byte[1024]; int len = 0; while ((len = inStream.read(buffer)) != -1) { outStream.write(buffer, 0, len); } inStream.close(); return outStream.toByteArray(); } public static void main(String[] args) { String path = splitPdf(4,"D:\\data\\11.pdf","D:\\data\\out11.pdf"); File file =new File(path); //上传的是png格式的图片结尾 String targetfile="D:\\data\\out11.png"; pdfFileToImage(file,targetfile); } }
其中指定第几页可配合https://www.cnblogs.com/xsdty/p/11463174.html
实现动态获取关键字页码,动态截取转换为自己想要的格式图片。