doc或docx(word)或image类型文件批量转PDF脚本

1.实际生产环境中遇到文件展示只能适配PDF版本的文件，奈何一万个文件有七千个都是word或者image类型的，由此搞个脚本批量转换下上传至OSS，为前端提供数据支撑。

2.环境准备，这里使用的是aspose-words-18.6-jdk16-crack.jar工具包，资源包就不提供了，网上百度一下即可。

3.javaMaven项目，jdk1.8.maven3.6

4.使用aspose-words-18.6-jdk16-crack.jar工具包会产生水印，需要配置resources下去除水印配置：

<?xml version="1.0" encoding="UTF-8" ?>
<License>
    <Data>
        <Products>
            <Product>Aspose.Total for Java</Product>
            <Product>Aspose.Words for Java</Product>
        </Products>
        <EditionType>Enterprise</EditionType>
        <SubscriptionExpiry>20991231</SubscriptionExpiry>
        <LicenseExpiry>20991231</LicenseExpiry>
        <SerialNumber>8bfe198c-7f0c-4ef8-8ff0-acc3237bf0d7</SerialNumber>
    </Data>
    <Signature>sNLLKGMUdF0r8O1kKilWAGdgfs2BvJb/2Xp8p5iuDVfZXmhppo+d0Ran1P9TKdjV4ABwAgKXxJ3jcQTqE/2IRfqwnPf8itN8aFZlV3TJPYeD3yWE7IT55Gz6EijUpC7aKeoohTb4w2fpox58wWoF3SNp6sK6jDfiAUGEHYJ9pjU=</Signature>
</License>

license.xml

5.工具类编写：

package org.utiles.dongl.tools;
import com.aspose.words.License;

import com.aspose.words.SaveFormat;

import com.itextpdf.text.*;

import com.itextpdf.text.pdf.PdfWriter;

import org.apache.log4j.Logger;

import org.utiles.dongl.comment.WordTranPDF;
import java.io.File;

import java.io.FileOutputStream;

import java.io.IOException;

import java.io.InputStream;

import java.util.*;

import java.util.List;
/**


@ClassName: FileTranPDFTool


@Description TODO


@Author: 东霖


@Date: 2022/7/23 10:50


@Version 1.0

/

public class FileTranPDFTool {

private static Logger logger = Logger.getLogger(FileTranPDFTool.class);
public static boolean getLicense() {

boolean result = false;

try {

InputStream is = WordTranPDF.class.getClassLoader().getResourceAsStream("\license.xml"); // license.xml应放在..\WebRoot\WEB-INF\classes路径下

License aposeLic = new License();

aposeLic.setLicense(is);

result = true;

} catch (Exception e) {

e.printStackTrace();

}

return result;

}
/**

ImageToPDF
支持类型：jpg/tif/..

@param source
@param target

*/

public static void ImageToPDF(String source, String target) {

Document document = new Document();

//设置文档页边距

document.setMargins(0, 0, 0, 0);

FileOutputStream fos = null;

try {

fos = new FileOutputStream(target);

PdfWriter.getInstance(document, fos);

//打开文档

            document.open();

//获取图片的宽高

Image image = Image.getInstance(source);

float imageHeight = image.getScaledHeight();

float imageWidth = image.getScaledWidth();

//设置页面宽高与图片一致

Rectangle rectangle = new Rectangle(imageWidth, imageHeight);

document.setPageSize(rectangle);

//图片居中

            image.setAlignment(Image.ALIGN_CENTER);

//新建一页添加图片

            document.newPage();

document.add(image);

} catch (Exception ioe) {

System.out.println(ioe.getMessage());

} finally {

//关闭文档

            document.close();

try {

fos.flush();

fos.close();

} catch (IOException e) {

e.printStackTrace();

}

}

}

/**

word 文档类型转pdf

@param inPath
@param outPath
@return

*/

public static boolean doc2pdf(String inPath, String outPath) {

if (!getLicense()) { // 验证License 若不验证则转化出的pdf文档会有水印产生

return false;

}

FileOutputStream os = null;

try {

File file = new File(outPath); // 新建一个空白pdf文档

os = new FileOutputStream(file);

com.aspose.words.Document doc = new com.aspose.words.Document(inPath); // Address是将要被转化的word文档

//            doc.save(os, SaveFormat.PDF);// 全面支持DOC, DOCX, OOXML, RTF HTML, OpenDocument, PDF,

doc.save(os, SaveFormat.DOCX);// 全面支持DOC, DOCX, OOXML, RTF HTML, OpenDocument, PDF,

// EPUB, XPS, SWF 相互转换

} catch (Exception e) {

e.printStackTrace();

return false;

} finally {

if (os != null) {

try {

os.flush();

os.close();

} catch (IOException e) {

e.printStackTrace();

}

}

}

return true;

}

/**

遍历指定目录取文件名称

@param foldPath 文件目录绝对路径
@return

*/

public static List<String> listFileName(String foldPath) {

List<String> listFiles = new ArrayList<>();

//创建文件对象

File f = new File(foldPath);

//列出文件名称存入数组

File[] files = f.listFiles();

for (int i = 0; i < Objects.requireNonNull(files).length; i++) {

listFiles.add(files[i].getName());

}

return listFiles;

}

/**

删除指定文件
@param filePath
@return

*/

public static boolean deleteByFilePath(String filePath) {

File file = new File(filePath);

return file.delete();

}

/**

遍历指定目录取文件名称并接入路径

@param oldPath 遍历文件目录绝对路径，也是要删除的文件目录
@return

*/

public static Map<String, String> listFileNameAndPath(String oldPath) {

Map<String, String> listFiles = new HashMap();

//创建文件对象

File f = new File(oldPath);

//列出文件名称存入数组

File[] files = f.listFiles();

for (int i = 0; i < Objects.requireNonNull(files).length; i++) {

listFiles.put(files[i].getPath(), files[i].getName());

}

return listFiles;

}

/**

获取指定文件目录文件大小为0Size的
@param foldPath
@return

*/

public static Integer getFileSize(String foldPath,String newFoldPath) {

int j=1;

//创建文件对象

File file = new File(foldPath);

File[] files = file.listFiles();

for (int i = 0; i < files.length; i++) {

if (files[i].length()0){

Boolean aBoolean = WriteToFileExample.moveFileToTarget("D:\OSS\ghwb\ghksj_1_copy\《**************》.pdf", newFoldPath+files[i].getName(),null);

if (aBooleantrue){

j++;

logger.info("移动："+files[i].getPath()+"到"+newFoldPath);

}

System.out.println(files[i].getPath());

}

}

return j;

}

/**

文件对比删除重复文件
@param oldFileNames
@param newPath 对比文件目录
@return

*/

public static Integer deleteByFileName(Map<String, String> oldFileNames, String newPath) {

int j = 0;

List<String> newListNames = listFileName(newPath);

for (Map.Entry<String, String> entry : oldFileNames.entrySet()) {

for (int i = 0; i < newListNames.size(); i++) {

String value = entry.getValue();

String s = newListNames.get(i);

if (value.substring(0,value.lastIndexOf(".")).equals(s.substring(0,s.lastIndexOf(".")))) {

boolean b = deleteByFilePath(entry.getKey());

if (b==true){

logger.info("成功删除指定文件："+entry.getKey()+"，共计："+j+"个");

j++;

}else{

logger.error("指定文件不存在："+entry.getKey());

}

}

}

}

return j;

}

public static void main(String[] args) {

//文件对比删除

Map<String, String> map = listFileNameAndPath("D:\OSS\ghwb\word");

int b = deleteByFileName(map, "D:\OSS\ghwb\ghksj - 副本");

//word转pdf

doc2pdf("D:\OSS\ghwb\13c5ad939a0b2001.doc",

"D:\OSS\ghwb\doc2docx\13c5ad939a0b2001.docx");

//移动文件size为0的数据到指定文件夹

//        getFileSize("D:\OSS\ghwb\ghksj_3_copy","D:\OSS\ghwb\test");

    }

}

WordORImageTranPDF

6.逻辑代码：

package org.utiles.dongl.comment;
import org.apache.log4j.Logger;

import org.utiles.dongl.tools.FileTranPDFTool;

import org.utiles.dongl.tools.WriteToFileExample;
import java.io.*;

import java.util.HashMap;

import java.util.Map;
import static org.utiles.dongl.tools.FileTranPDFTool.doc2pdf;
/**


@ClassName: WordTranPDF


@Description TODO


@Author: 东霖


@Date: 2022/7/22 8:55


@Version 1.0

/

public class WordTranPDF {

private static Logger logger = Logger.getLogger(WordTranPDF.class);
/**

获取指定文件路径下所有文件对象

@param inFilePath
@return

*/

public static Map<String, String> getFilePathName(String inFilePath,String replacePathOld

,String replacePathNew,String wjjl,String pdfToPath) {

Map<String, String> fileList = new HashMap();

//创建文件对象

File f = new File(inFilePath);

//列出文件名称存入数组

File[] files = f.listFiles();

for (int i = 0; i < files.length; i++) {

if (files[i].getName().endsWith("docx") || files[i].getName().endsWith("doc")

|| files[i].getName().endsWith("wps") || files[i].getName().endsWith("rtf"))

{

//                String str=files[i].getPath().substring(0,files[i].getPath().lastIndexOf(".")+1)+"pdf";

String str=files[i].getPath().substring(0,files[i].getPath().lastIndexOf(".")+1)+"docx";

fileList.put(files[i].getPath()+"&"+"word",str.replace(replacePathOld,replacePathNew));

//                logger.info("当前文件路径为："+files[i].getPath());

} else if (files[i].getName().endsWith(".png") || files[i].getName().endsWith(".jpg") || files[i].getName().endsWith(".gif")

|| files[i].getName().endsWith(".jpeg") || files[i].getName().endsWith(".tif"))

{

String str=files[i].getPath().substring(0,files[i].getPath().lastIndexOf(".")+1)+"pdf";

fileList.put(files[i].getPath()+"&"+"image", str.replace(replacePathOld,replacePathNew));

//                logger.info("当前文件路径为："+files[i].getPath());

}else if(files[i].getName().endsWith(".pdf")) {

WriteToFileExample.moveFileToTarget(files[i].getPath(),pdfToPath+files[i].getName(),"");

logger.info("移动："+files[i].getPath()+"到"+pdfToPath);

}else{

WriteToFileExample.writeFileSQL("当前文件无法转换："+files[i].getPath(),wjjl);

}

}

return fileList;

}

public static void start(Map<String, String> hashMap) throws InterruptedException {

long old = System.currentTimeMillis();

int j = 0;

for (Map.Entry<String, String> entry : hashMap.entrySet()) {

//            doc2pdf(entry.getKey(),entry.getValue());

String[] split = entry.getKey().split("&");

if(split[1].equals("word")){

System.out.println(entry.getValue());

doc2pdf(split[0],entry.getValue());

Thread.sleep(Long.parseLong("15"));

}else if (split[1].equals("image")){

FileTranPDFTool.ImageToPDF(split[0],entry.getValue());

Thread.sleep(Long.parseLong("15"));

}else {

//                break;

            }

j++;

logger.info("转换第："+j+"个！"+"文件名称为："+entry.getKey());

}

long now = System.currentTimeMillis();

logger.info("pdf转换成功，共耗时：" + ((now - old) / 1000.0) + "秒");

logger.info("共转换：" + j + "个文件!");

}
public static void main(String[] args) throws InterruptedException {

/**

* inFilePath: 需要转换的文件夹路径

* replacePathOld: 抓换后的文件要写入新文件，直接替换文件的上级目录关键字即可

* replacePathNew: 新的文件父路径

* wjjl: 不能转换的文件记录位置及记录名称

* pdfToPath：当文件中已有pdf不用抓换的需配置文件留存方向。会从原文件目录移动至新文件目录

*/

Map<String, String> filePathName = getFilePathName("D:\OSS\ghwb\doc11",

"doc11","doc2docx",

"D:\OSS\ghwb\"+System.currentTimeMillis()+".txt"

,"D:\OSS\yjbg\gjxxzx\ghksj_copy\");

start(filePathName);

}

}

View Code

7.上述就是word或者image类型的批量脚本，可以在工具类中单元测试之后在使用批量逻辑代码。

posted @ 2022-08-07 22:10 zhuzhu&you 阅读(676) 评论(0) 编辑收藏举报

刷新页面返回顶部

追码人

doc或docx(word)或image类型文件批量转PDF脚本

doc或docx(word)或image类型文件批量转PDF脚本

公告