openOffice word转pdf,pdf转图片优化版
之前写了一个版本的,不过代码繁琐而且不好用,效率有些问题。尤其pdf转图片速度太慢。下面是优化版本的代码。
spriing_boot 版本信息:2.0.1.RELEASE
优化版代码:https://gitee.com/liran123/transfer_easy_fast
1、配置信息:
package com.yunfatong.conf; import lombok.extern.slf4j.Slf4j; import org.apache.commons.lang3.ArrayUtils; import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.math.NumberUtils; import org.jodconverter.DocumentConverter; import org.jodconverter.LocalConverter; import org.jodconverter.office.LocalOfficeManager; import org.jodconverter.office.OfficeManager; import org.springframework.boot.autoconfigure.condition.ConditionalOnBean; import org.springframework.boot.autoconfigure.condition.ConditionalOnClass; import org.springframework.boot.autoconfigure.condition.ConditionalOnMissingBean; import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty; import org.springframework.boot.context.properties.EnableConfigurationProperties; import org.springframework.context.annotation.Bean; import org.springframework.context.annotation.Configuration; import java.util.HashSet; import java.util.Set; /** * openoffice 配置 * * @author liran * @date 20190517 */ @Configuration @ConditionalOnClass({DocumentConverter.class}) @ConditionalOnProperty(prefix = "jodconverter", name = {"enabled"}, havingValue = "true", matchIfMissing = true) @EnableConfigurationProperties({JodConverterProperties.class}) @Slf4j public class JodConverterAutoConfiguration { private final JodConverterProperties properties; public JodConverterAutoConfiguration(JodConverterProperties properties) { this.properties = properties; } private OfficeManager createOfficeManager() { LocalOfficeManager.Builder builder = LocalOfficeManager.builder(); if (!StringUtils.isBlank(this.properties.getPortNumbers())) { Set<Integer> iports = new HashSet<>(); String[] var3 = StringUtils.split(this.properties.getPortNumbers(), ", "); int var4 = var3.length; for (int var5 = 0; var5 < var4; ++var5) { String portNumber = var3[var5]; iports.add(NumberUtils.toInt(portNumber, 2002)); } builder.portNumbers(ArrayUtils.toPrimitive(iports.toArray(new Integer[iports.size()]))); } builder.officeHome(this.properties.getOfficeHome()); builder.workingDir(this.properties.getWorkingDir()); builder.templateProfileDir(this.properties.getTemplateProfileDir()); builder.killExistingProcess(this.properties.isKillExistingProcess()); builder.processTimeout(this.properties.getProcessTimeout()); builder.processRetryInterval(this.properties.getProcessRetryInterval()); builder.taskExecutionTimeout(this.properties.getTaskExecutionTimeout()); builder.maxTasksPerProcess(this.properties.getMaxTasksPerProcess()); builder.taskQueueTimeout(this.properties.getTaskQueueTimeout()); return builder.build(); } @Bean(initMethod = "start", destroyMethod = "stop") @ConditionalOnMissingBean public OfficeManager officeManager() { return this.createOfficeManager(); } @Bean @ConditionalOnMissingBean @ConditionalOnBean({OfficeManager.class}) public DocumentConverter jodConverter(OfficeManager officeManager) { return LocalConverter.make(officeManager); } }
package com.yunfatong.conf; import org.springframework.boot.context.properties.ConfigurationProperties; import java.util.regex.Pattern; /** * openoffice 配置 * * @author liran * @date 20190517 */ @ConfigurationProperties("jodconverter") public class JodConverterProperties { private boolean enabled; private String officeHome; private String portNumbers = "2002"; private String workingDir; private String templateProfileDir; private boolean killExistingProcess = true; private long processTimeout = 120000L; private long processRetryInterval = 250L; private long taskExecutionTimeout = 120000L; private int maxTasksPerProcess = 200; private long taskQueueTimeout = 30000L; public JodConverterProperties() { } public boolean isEnabled() { return this.enabled; } public void setEnabled(boolean enabled) { this.enabled = enabled; } public String getOfficeHome() { //根据不同系统分别设置 //office-home: C:\Program Files (x86)\OpenOffice 4 #windows下默认 不用修改 // #office-home: /opt/openoffice4 #linux 默认 不用修改 String osName = System.getProperty("os.name"); if (Pattern.matches("Linux.*", osName)) { this.officeHome = "/opt/openoffice4"; } else if (Pattern.matches("Windows.*", osName)) { this.officeHome = "C:\\Program Files (x86)\\OpenOffice 4"; } else { this.officeHome = "/opt/openoffice4"; } return this.officeHome; } public void setOfficeHome(String officeHome) { this.officeHome = officeHome; } public String getPortNumbers() { return this.portNumbers; } public void setPortNumbers(String portNumbers) { this.portNumbers = portNumbers; } public String getWorkingDir() { return this.workingDir; } public void setWorkingDir(String workingDir) { this.workingDir = workingDir; } public String getTemplateProfileDir() { return this.templateProfileDir; } public void setTemplateProfileDir(String templateProfileDir) { this.templateProfileDir = templateProfileDir; } public boolean isKillExistingProcess() { return this.killExistingProcess; } public void setKillExistingProcess(boolean killExistingProcess) { this.killExistingProcess = killExistingProcess; } public long getProcessTimeout() { return this.processTimeout; } public void setProcessTimeout(long processTimeout) { this.processTimeout = processTimeout; } public long getProcessRetryInterval() { return this.processRetryInterval; } public void setProcessRetryInterval(long procesRetryInterval) { this.processRetryInterval = procesRetryInterval; } public long getTaskExecutionTimeout() { return this.taskExecutionTimeout; } public void setTaskExecutionTimeout(long taskExecutionTimeout) { this.taskExecutionTimeout = taskExecutionTimeout; } public int getMaxTasksPerProcess() { return this.maxTasksPerProcess; } public void setMaxTasksPerProcess(int maxTasksPerProcess) { this.maxTasksPerProcess = maxTasksPerProcess; } public long getTaskQueueTimeout() { return this.taskQueueTimeout; } public void setTaskQueueTimeout(long taskQueueTimeout) { this.taskQueueTimeout = taskQueueTimeout; } }
application.yml
jodconverter: enabled: true office-home: linuxOrwindows #/opt/openoffice4 #linux 默认 不用修改 C:\Program Files (x86)\OpenOffice 4 #windows下默认 不用修改 port-numbers: 2002 max-tasks-per-process: 10
2、转换入口
package com.yunfatong.ojd.util.pdf;
import cn.hutool.core.date.DatePattern;
import com.yunfatong.ojd.common.exception.CommonException;
import com.yunfatong.ojd.service.FileSystemStorageService;
import com.yunfatong.ojd.util.SpringUtil;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.io.FileUtils;
import org.apache.commons.lang.StringUtils;
import org.jodconverter.DocumentConverter;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Component;
import java.io.File;
import java.time.LocalDateTime;
import java.time.format.DateTimeFormatter;
import java.util.ArrayList;
import java.util.List;
/**
* word 转pdf
*
* @author lr
*/
@Component
@Slf4j
public class TransferUtil {
//这里没有@Autowired 主要是配置不启用的话 无法注入
private DocumentConverter documentConverter;
@Autowired
private FileSystemStorageService fileSystemStorageService;
/**
* fileSystemStorageService 就是拼接出本地路径的作用
* storage.winLocation=D:\\ojd\\upload\\images\\
* ##上传图片linux存储路径
* storage.linuxLocation=/home/ojd/upload/images/
*/
final static String WORD_SUFFIX_DOC = "doc";
final static String WORD_SUFFIX_DOCX = "docx";
/**
* word ->pdf
*
* @param webPath 浏览器可访问路径(数据库存的)如 /test/wd.word
* @return 相同文件夹下的转换后的pdf 路径 如/test/wd_20190517151515333.pdf
* @throws Exception
*/
public String transferWordToPdf(String webPath) throws Exception {
if(documentConverter==null){
documentConverter = SpringUtil.getBean(DocumentConverter.class);
}
//转换成本地实际磁盘路径
String originLocalFilePath = fileSystemStorageService.getLocation(webPath);
File inputFile = new File(originLocalFilePath);
if (!inputFile.exists() ||
!inputFile.isFile() ||
(!StringUtils.contains(inputFile.getName(), WORD_SUFFIX_DOC) &&
!StringUtils.contains(inputFile.getName(), WORD_SUFFIX_DOCX))) {
throw new CommonException("word -> pdf转换错误 当前文件不是word或 文件不存在: " + webPath);
}
DateTimeFormatter formatter = DateTimeFormatter.ofPattern(DatePattern.PURE_DATETIME_MS_PATTERN);
String timeNow = formatter.format(LocalDateTime.now());
String newPdfWebPath = StringUtils.substringBeforeLast(webPath, ".") + "_" + timeNow + ".pdf";
try {
File outputFile = new File(fileSystemStorageService.getLocation(newPdfWebPath));
documentConverter.convert(inputFile).to(outputFile).execute();
} catch (Exception e) {
log.error("word->pdf 转换错误------------> Exception:{}", e);
throw e;
}
return newPdfWebPath;
}
public List<String> transferPdfToImage(String webPath) throws Exception {
String originLocalFilePath = fileSystemStorageService.getLocation(webPath);
File inputFile = new File(originLocalFilePath);
if (!inputFile.exists() ||
!inputFile.isFile() ||
webPath.lastIndexOf(".pdf") < 0) {
throw new CommonException("pdf-> img 源文件不是pdf文件 或者文件不存在!" + webPath);
}
String localPdfpath = fileSystemStorageService.getLocation(webPath);
String newImgWebPathPreSuffix = StringUtils.substringBeforeLast(webPath, ".");
String localImgPath = fileSystemStorageService.getLocation(newImgWebPathPreSuffix);
PdfTransferUtil pdfTranfer = new PdfTransferUtil();
List<byte[]> ins = pdfTranfer.pdf2Image(localPdfpath, "png", 1.5f);
List<String> webPaths = new ArrayList<>(ins.size());
for (int i = 0; i < ins.size(); i++) {
byte[] data = ins.get(i);
String pathReal = localImgPath + "_ojd_" + i + ".png";
FileUtils.writeByteArrayToFile(new File(pathReal), data);
webPaths.add(pathReal);
}
return webPaths;
}
}
pdf 转图片参考 https://gitee.com/cycmy/pdftranfer.git
package com.yunfatong.ojd.util.pdf; import lombok.extern.slf4j.Slf4j; import org.icepdf.core.pobjects.Document; import org.icepdf.core.pobjects.Page; import org.icepdf.core.util.GraphicsRenderingHints; import javax.imageio.ImageIO; import javax.imageio.stream.ImageOutputStream; import java.awt.image.BufferedImage; import java.io.ByteArrayOutputStream; import java.io.InputStream; import java.util.ArrayList; import java.util.List; import java.util.concurrent.Callable; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.Future; /** * @author lr * */ @Slf4j public class PdfTransferUtil { //*********************************pdf to image ********************************************************** /** * 将指定pdf字节数组转换为指定格式图片二进制数组 * * @param pdfBytes PDF字节数组 * @param imageType 转换图片格式 默认png * @param zoom 缩略图显示倍数,1表示不缩放,0.3则缩小到30% * @return List<byte [ ]> * @throws Exception */ public List<byte[]> pdf2Image(byte[] pdfBytes, String imageType, float zoom) throws Exception { Document document = new Document(); document.setByteArray(pdfBytes, 0, pdfBytes.length, null); return pageExtraction(document, imageType, 0f, zoom); } /** * 将指定pdf输入流转换为指定格式图片二进制数组 * * @param inputPDF PDF二进制流 * @param imageType 转换图片格式 默认png * @param zoom 缩略图显示倍数,1表示不缩放,0.3则缩小到30% * @return List<byte [ ]> * @throws Exception */ public List<byte[]> pdf2Image(InputStream inputPDF, String imageType, float zoom) throws Exception { Document document = new Document(); document.setInputStream(inputPDF, null); return pageExtraction(document, imageType, 0f, zoom); } /** * 将指定pdf文件转换为指定格式图片二进制数组 * * @param pdfPath 原文件路径,例如d:/test.pdf * @param imageType 转换图片格式 默认png * @param zoom 缩略图显示倍数,1表示不缩放,0.3则缩小到30% * @return List<byte [ ]> * @throws Exception */ public List<byte[]> pdf2Image(String pdfPath, String imageType, float zoom) throws Exception { Document document = new Document(); document.setFile(pdfPath); return pageExtraction(document, imageType, 0f, zoom); } //*********************************pdf to image ********************************************************** private List<byte[]> pageExtraction(Document document, String imageType, float rotation, float zoom) { // setup two threads to handle image extraction. ExecutorService executorService = Executors.newFixedThreadPool(5); try { // create a list of callables. int pages = document.getNumberOfPages(); List<byte[]> result = new ArrayList<byte[]>(pages); List<Callable<byte[]>> callables = new ArrayList<Callable<byte[]>>(pages); for (int i = 0; i < pages; i++) { callables.add(new CapturePage(document, i, imageType, rotation, zoom)); } List<Future<byte[]>> listFuture = executorService.invokeAll(callables); executorService.submit(new DocumentCloser(document)).get(); for (Future<byte[]> future : listFuture) { result.add(future.get()); } return result; } catch (Exception ex) { log.error(" pdf 转换图片错误 Error handling PDF document " + ex); } finally { executorService.shutdown(); } return null; } public class CapturePage implements Callable<byte[]> { private Document document; private int pageNumber; private String imageType; private float rotation; private float zoom; private CapturePage(Document document, int pageNumber, String imageType, float rotation, float zoom) { this.document = document; this.pageNumber = pageNumber; this.imageType = imageType; this.rotation = rotation; this.zoom = zoom; } @Override public byte[] call() throws Exception { BufferedImage image = (BufferedImage) document.getPageImage(pageNumber, GraphicsRenderingHints.SCREEN, Page.BOUNDARY_CROPBOX, rotation, zoom); ByteArrayOutputStream bs = new ByteArrayOutputStream(); ImageOutputStream imOut = ImageIO.createImageOutputStream(bs); ImageIO.write(image, imageType, imOut); image.flush(); return bs.toByteArray(); } } /** * Disposes the document. */ public class DocumentCloser implements Callable<Void> { private Document document; private DocumentCloser(Document document) { this.document = document; } @Override public Void call() { if (document != null) { document.dispose(); log.info("Document disposed"); } return null; } } }
springutils
package com.yunfatong.ojd.util; /** * @Auther liran * @Date 2018/8/30 14:49 * @Description */ import org.springframework.beans.BeansException; import org.springframework.context.ApplicationContext; import org.springframework.context.ApplicationContextAware; import org.springframework.stereotype.Component; @Component public class SpringUtil implements ApplicationContextAware { private static ApplicationContext applicationContext; @Override public void setApplicationContext(ApplicationContext applicationContext) throws BeansException { if(SpringUtil.applicationContext == null) { SpringUtil.applicationContext = applicationContext; } System.out.println("========ApplicationContext配置成功,在普通类可以通过调用SpringUtils.getAppContext()获取applicationContext对象,applicationContext="+SpringUtil.applicationContext+"========"); } //获取applicationContext public static ApplicationContext getApplicationContext() { return applicationContext; } //通过name获取 Bean. public static Object getBean(String name){ return getApplicationContext().getBean(name); } //通过class获取Bean. public static <T> T getBean(Class<T> clazz){ return getApplicationContext().getBean(clazz); } //通过name,以及Clazz返回指定的Bean public static <T> T getBean(String name,Class<T> clazz){ return getApplicationContext().getBean(name, clazz); } }
pom.xml
<!--word转换pdf begin--> <dependency> <groupId>org.jodconverter</groupId> <artifactId>jodconverter-core</artifactId> <version>4.2.2</version> </dependency> <dependency> <groupId>org.jodconverter</groupId> <artifactId>jodconverter-local</artifactId> <version>4.2.2</version> </dependency> <dependency> <groupId>org.jodconverter</groupId> <artifactId>jodconverter-spring-boot-starter</artifactId> <version>4.2.2</version> </dependency> <!--word转换pdf end--> <!--PDF转图片--> <dependency> <groupId>org.icepdf.os</groupId> <artifactId>icepdf-core</artifactId> <version>6.2.2</version> <exclusions> <exclusion> <groupId>javax.media</groupId> <artifactId>jai_core</artifactId> </exclusion> </exclusions> </dependency> <dependency> <groupId>org.icepdf.os</groupId> <artifactId>icepdf-viewer</artifactId> <version>6.2.2</version> </dependency> <!--PDF转图片 end-->
3、调用测试:
import com.yunfatong.ojd.service.FileSystemStorageService; import lombok.extern.slf4j.Slf4j; import org.junit.Test; import org.junit.runner.RunWith; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.boot.test.context.SpringBootTest; import org.springframework.test.context.junit4.SpringRunner; import java.util.List; @RunWith(SpringRunner.class) @SpringBootTest @Slf4j public class WordTransferPdfUtilTest { @Autowired TransferUtil wordTransferPdfUtil; @Autowired FileSystemStorageService fileSystemStorageService; @Test public void transferLocalFile() {
try {
/*******************word 转pdf******************/
long time = System.currentTimeMillis();
System.out.println("start :======" + time);
wordTransferPdfUtil.transferWordToPdf("courtChongqing/test_new/555.docx");
log.error(System.currentTimeMillis() + " time============================== :" + ((System.currentTimeMillis() - time) / 1000));
/*******************pdf转图片******************/
long time2 = System.currentTimeMillis();
List<String> pdfImages2 = wordTransferPdfUtil.transferPdfToImage("courtChongqing/test_new/333.pdf");
for (String pdfImage : pdfImages2) {
log.error(pdfImage);
}
log.error(" time===============================22222222 :" + ((System.currentTimeMillis() - time2) / 1000));
// System.out.println("pdf path =============" + path);
} catch (Exception e) {
e.printStackTrace();
}
} }