SpringBoot整合openoffice实现word文档的读取和导入及报错处理

 

先安装openoffice4

Linux系统安装参考:https://www.cnblogs.com/pxblog/p/11622969.html

Windows系统安装参考:https://www.cnblogs.com/pxblog/p/14346148.html

 

引入jar包

https://yvioo.lanzous.com/b00o97q6d
密码:1cjp

 

如果是pom文件的话

复制代码
<dependency>
            <groupId>local</groupId>
            <artifactId>jodconverter</artifactId>
            <version>2.2.2</version>
            <scope>system</scope>
            <systemPath>${project.basedir}/src/main/webapp/WEB-INF/lib/jodconverter-2.2.2.jar
            </systemPath>
        </dependency>
        <dependency>
            <groupId>local</groupId>
            <artifactId>jodconverter-cli</artifactId>
            <version>2.2.2</version>
            <scope>system</scope>
            <systemPath>${project.basedir}/src/main/webapp/WEB-INF/lib/jodconverter-cli-2.2.2.jar
            </systemPath>
        </dependency>
        <dependency>
            <groupId>local</groupId>
            <artifactId>jodconverter-core</artifactId>
            <version>3.0-beta-4</version>
            <scope>system</scope>
            <systemPath>${project.basedir}/src/main/webapp/WEB-INF/lib/jodconverter-core-3.0-beta-4.jar
            </systemPath>
        </dependency>
复制代码

然后把jar包放到项目/webapp/WEB-INF/lib/下,这位置可以根据自己的来,然后pom文件路径也做相应修改即可

 

日志注解用到了

<!--lombok插件-->
        <dependency>
            <groupId>org.projectlombok</groupId>
            <artifactId>lombok</artifactId>
            <optional>true</optional>
        </dependency> 

 

 

 

application.yml

复制代码
spring:
  main:
    allow-bean-definition-overriding: true
  servlet:
    multipart:
      enabled: true #是否处理上传
      max-file-size: 50MB #允许最大的单个上传大小,单位可以是kb
      max-request-size: 50MB #允许最大请求大小

#文件上传目录
fileUploadPath: E://test//

openoffice:
  officeHome: D:\openoffice4  #openoffice的安装路径
  officePort: 8002   #openoffice启动端口
复制代码

 

 

UploadUtils.java

复制代码
import org.apache.commons.lang.RandomStringUtils;


public class UploadUtils {


    /**
     * 36个小写字母和数字
     */
    public static final char[] N36_CHARS = { '0', '1', '2', '3', '4', '5', '6',
            '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j',
            'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
            'x', 'y', 'z' };


    public static String generateFilename(String path, String ext) {
        return path + RandomStringUtils.random(8, N36_CHARS) + "." + ext;
    }




}
复制代码

 

 

 

启动类

OpenOfficeConverter.java

复制代码
import com.test.UploadUtils;
import lombok.extern.slf4j.Slf4j;
import org.artofsolving.jodconverter.OfficeDocumentConverter;
import org.artofsolving.jodconverter.office.DefaultOfficeManagerConfiguration;
import org.artofsolving.jodconverter.office.OfficeManager;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Component;

import java.io.File;
import java.io.FileNotFoundException;


@Component
@Slf4j
public class OpenOfficeConverter {


    @Value("${openoffice.officeHome}")
    public String officeHome;

    @Value("${openoffice.officePort}")
    public Integer officePort;


    public void startService() {
        DefaultOfficeManagerConfiguration configuration = new DefaultOfficeManagerConfiguration();
        try {
            log.info("准备启动服务....");
            //设置OpenOffice.org安装目录
            configuration.setOfficeHome(getOfficeHome());
            //设置转换端口,默认为8100
            configuration.setPortNumber(getPort());
            //设置任务执行超时为5分钟
            configuration.setTaskExecutionTimeout(1000 * 60 * 5L);
            //设置任务队列超时为24小时
            configuration.setTaskQueueTimeout(1000 * 60 * 60 * 24L);

            officeManager = configuration.buildOfficeManager();
            //启动服务
            officeManager.start();
            log.info("office转换服务启动成功!");
        } catch (Exception ce) {
            log.error("office转换服务启动失败!详细信息:" + ce);
        }
    }

    public void stopService() {
        log.info("关闭office转换服务....");
        if (officeManager != null) {
            officeManager.stop();
        }
        log.info("关闭office转换成功!");
    }


    /**
     * 转换格式
     *
     * @param inputFile 需要转换的原文件路径
     * @param fileType  要转换的目标文件类型 html,pdf
     */
    public File convert(String inputFile, String fileType) {
        String outputFile = UploadUtils.generateFilename(getFilePath(), fileType);
        if (inputFile.endsWith(".txt")) {
            String odtFile = FileUtils.getFilePrefix(inputFile) + ".odt";
            if (new File(odtFile).exists()) {
                log.error("odt文件已存在!");
                inputFile = odtFile;
            } else {
                try {
                    FileUtils.copyFile(inputFile, odtFile);
                    inputFile = odtFile;
                } catch (FileNotFoundException e) {
                    log.error("文档不存在!");
                    e.printStackTrace();
                }
            }
        }
        OfficeDocumentConverter converter = new OfficeDocumentConverter(officeManager);
        File output = new File(outputFile);
        converter.convert(new File(inputFile), output);
        return output;
    }


    public void init() {
        OpenOfficeConverter coverter = new OpenOfficeConverter(officeHome, officePort);
        coverter.startService();
        this.openOfficeConverter = coverter;
    }

    public void destroy() {
        this.openOfficeConverter.stopService();
    }


    @Autowired
    private OpenOfficeConverter openOfficeConverter;
    private static OfficeManager officeManager;
    public static final String HTML = "html";
    public static final String PDF = "pdf";
    public static final String TXT = "txt";
    public static final String DOC = "doc";
    public static final String DOCX = "docx";
    public static final String XLS = "xls";
    public static final String XLSX = "xlsx";
    public static final String PPT = "ppt";
    public static final String PPTX = "pptx";
    public static final String WPS = "wps";
    private int port = 8100;
    private String filePath;


    public OpenOfficeConverter(String officeHome, int port, String filePath) {
        super();
        this.officeHome = officeHome;
        this.port = port;
        this.filePath = filePath;
    }

    public OpenOfficeConverter(String officeHome, int port) {
        super();
        this.officeHome = officeHome;
        this.port = port;
    }

    public OpenOfficeConverter() {
        super();
    }


    public String getOfficeHome() {
        return officeHome;
    }


    public int getPort() {
        return port;
    }


    public String getFilePath() {
        return filePath;
    }

    public void setFilePath(String filePath) {
        this.filePath = filePath;
    }

}
复制代码

 

配置类

OpenOfficeConfig.java

复制代码
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;

@Configuration
public class OpenOfficeConfig {


    @Bean(initMethod = "init",destroyMethod = "destroy")
    public OpenOfficeConverter openOfficeConverter(){
        return new OpenOfficeConverter();
    }

}
复制代码

 

文件工具类

FileUtils.java

复制代码
import java.io.*;
import java.util.HashSet;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
 * @author Tom
 */
public class FileUtils {

    public static String getFilePrefix(String fileName) {
        int splitIndex = fileName.lastIndexOf(".");
        return fileName.substring(0, splitIndex);
    }


    public static void copyFile(String inputFile, String outputFile)
            throws FileNotFoundException {
        File sFile = new File(inputFile);
        File tFile = new File(outputFile);
        FileInputStream fis = new FileInputStream(sFile);
        FileOutputStream fos = new FileOutputStream(tFile);
        int temp = 0;
        try {
            while ((temp = fis.read()) != -1) {
                fos.write(temp);
            }
        } catch (IOException e) {
            e.printStackTrace();
        } finally {
            try {
                fis.close();
                fos.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
    }

    public static String toHtmlString(File file) {
        // 获取HTML文件流
        StringBuffer htmlSb = new StringBuffer();
        try {
            BufferedReader br = new BufferedReader(new InputStreamReader(
                    new FileInputStream(file), "gb2312"));
            while (br.ready()) {
                htmlSb.append(br.readLine());
            }
            br.close();
            // 删除临时文件
            file.delete();
        } catch (FileNotFoundException e) {
            e.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        }
        // HTML文件字符串
        String htmlStr = htmlSb.toString();
        // 返回经过清洁的html文本
        return htmlStr;
    }
    
    
    public static String subString(String html,String prefix,String subfix) {
        return html.substring(html.indexOf(prefix)+prefix.length(), html.indexOf(subfix));
    }

    /**
     * 清除一些不需要的html标记
     * 
     * @param htmlStr
     *            带有复杂html标记的html语句
     * @return 去除了不需要html标记的语句
     */
    public static String clearFormat(String htmlStr, String docImgPath) {
        // 获取body内容的正则
        String bodyReg = "<BODY .*</BODY>";
        Pattern bodyPattern = Pattern.compile(bodyReg);
        Matcher bodyMatcher = bodyPattern.matcher(htmlStr);
        if (bodyMatcher.find()) {
            // 获取BODY内容,并转化BODY标签为DIV
            htmlStr = bodyMatcher.group().replaceFirst("<BODY", "<DIV")
                    .replaceAll("</BODY>", "</DIV>");
        }
        // 调整图片地址
        htmlStr = htmlStr.replaceAll("<IMG SRC=\"", "<IMG SRC=\"" + docImgPath
                + "/");
        // 把<P></P>转换成</div></div>保留样式
        // content = content.replaceAll("(<P)([^>]*>.*?)(<\\/P>)",
        // "<div$2</div>");
        // 把<P></P>转换成</div></div>并删除样式
        htmlStr = htmlStr.replaceAll("(<P)([^>]*)(>.*?)(<\\/P>)", "<p$3</p>");
        // 删除不需要的标签
        htmlStr = htmlStr
                .replaceAll(
                        "<[/]?(font|FONT|span|SPAN|xml|XML|del|DEL|ins|INS|meta|META|[ovwxpOVWXP]:\\w+)[^>]*?>",
                        "");
        // 删除不需要的属性
        htmlStr = htmlStr
                .replaceAll(
                        "<([^>]*)(?:lang|LANG|class|CLASS|style|STYLE|size|SIZE|face|FACE|[ovwxpOVWXP]:\\w+)=(?:'[^']*'|\"\"[^\"\"]*\"\"|[^>]+)([^>]*)>",
                        "<$1$2>");
        return htmlStr;
    }
}
复制代码

 

 

控制器使用类

OpenOfficeController.java

 

复制代码
import org.json.JSONObject;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.util.FileCopyUtils;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RequestMethod;
import org.springframework.web.bind.annotation.RequestParam;
import org.springframework.web.bind.annotation.RestController;
import org.springframework.web.multipart.MultipartFile;

import java.io.File;
import java.io.FileOutputStream;
import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.UUID;

/**
 * @author yvioo
 */
@RestController
public class OpenOfficeController {

    public static final DateFormat YEAR_MONTH_FORMAT = new SimpleDateFormat(
            "yyyyMM");


    @Autowired
    private OpenOfficeConverter openOfficeConverter;


    @Value("${fileUploadPath}")
    private String fileUploadPath ;


    @RequestMapping(value = "/o_docUpload", method = RequestMethod.POST)
    public String docUpload(@RequestParam(value = "Filedata", required = false) MultipartFile file) {
        JSONObject data = new JSONObject();
        String origName=file.getOriginalFilename();
        // TODO 检查允许上传的后缀

        //先把文件上传到服务器
        String extName = file.getOriginalFilename().substring(file.getOriginalFilename().lastIndexOf("."));
        String fileName = UUID.randomUUID().toString() + extName;
        //文件所在绝对路径 上传路径和文件名
        String path = fileUploadPath + fileName;
        File toFile=new File(path);
        if (!toFile.getParentFile().exists()){
            //文件夹不存在,先创建文件夹
            toFile.getParentFile().mkdirs();
        }
        try {
            //进行文件复制上传
            FileCopyUtils.copy(file.getInputStream(), new FileOutputStream(toFile));
        } catch (Exception e) {
            //上传失败
            e.printStackTrace();
        }

        //这个是word文档图片存放的路径
        String docImgPath=fileUploadPath+generateMonthname()+"/";
        openOfficeConverter.setFilePath(docImgPath);
        path = path.replace("\\", "/");
        try {
            File outFile = openOfficeConverter.convert(path, OpenOfficeConverter.HTML);
            String html = FileUtils.toHtmlString(outFile);
            String txt = FileUtils.clearFormat(FileUtils.subString(html, "<HTML>", "</HTML>"), docImgPath);
            System.out.println(txt);
            data.put("status", 0);
            data.put("txt", txt);
            data.put("title", origName);
            return  data.toString();
        } catch (Exception e) {
            e.printStackTrace();
            data.put("status", 1);
        }
        return "";
    }


    /**
     * 根据月份生成文件夹名称
     * @return
     */
    public static String generateMonthname() {
        return YEAR_MONTH_FORMAT.format(new Date());
    }
}
复制代码

 

 

如果idea启动报错了 

复制代码
Description:

The bean 'openOfficeConverter', defined in class path resource [com/web/openoffice/OpenOfficeConfig.class], could not be registered. A bean with that name has already been defined in file [D:\admin\target\classes\com\web\openoffice\OpenOfficeConverter.class] and overriding is disabled.

Action:

Consider renaming one of the beans or enabling overriding by setting spring.main.allow-bean-definition-overriding=true

Disconnected from the target VM, address: '127.0.0.1:50132', transport: 'socket'

Process finished with exit code 0
复制代码

 

就在application.properties 配置里面增加

spring.main.allow-bean-definition-overriding=true

 

 

如果报错

Caused by: java.lang.ClassNotFoundException: com.sun.star.lang.XEventListener
    at java.net.URLClassLoader.findClass(URLClassLoader.java:381)
    at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
    at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:331)
    at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
    ... 60 common frames omitted

 

加入maven

 <!-- https://mvnrepository.com/artifact/org.openoffice/ridl -->
        <dependency>
            <groupId>org.openoffice</groupId>
            <artifactId>ridl</artifactId>
            <version>2.2.1</version>
        </dependency>

 

报错

Caused by: java.util.concurrent.ExecutionException: java.lang.NoClassDefFoundError: com/sun/star/comp/helper/Bootstrap
    at java.util.concurrent.FutureTask.report(FutureTask.java:122)
    at java.util.concurrent.FutureTask.get(FutureTask.java:192)
    at org.artofsolving.jodconverter.office.ManagedOfficeProcess.startAndWait(ManagedOfficeProcess.java:62)
    ... 45 more

 

引入maven

<!-- https://mvnrepository.com/artifact/org.openoffice/juh -->
<dependency>
    <groupId>org.openoffice</groupId>
    <artifactId>juh</artifactId>
    <version>2.2.1</version>
</dependency>

 

报错

复制代码
Caused by: java.lang.NoClassDefFoundError: com/sun/star/frame/XComponentLoader
    at org.artofsolving.jodconverter.AbstractConversionTask.loadDocument(AbstractConversionTask.java:86)
    at org.artofsolving.jodconverter.AbstractConversionTask.execute(AbstractConversionTask.java:59)
    at org.artofsolving.jodconverter.office.PooledOfficeManager$2.run(PooledOfficeManager.java:80)
    at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
    at java.util.concurrent.FutureTask.run$$$capture(FutureTask.java:266)
    at java.util.concurrent.FutureTask.run(FutureTask.java)
    at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
    at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
    ... 1 more
复制代码

 

引入

 <!-- https://mvnrepository.com/artifact/org.openoffice/unoil -->
        <dependency>
            <groupId>org.openoffice</groupId>
            <artifactId>unoil</artifactId>
            <version>2.2.1</version>
        </dependency>

 

posted @   yvioo  阅读(3360)  评论(0编辑  收藏  举报
编辑推荐:
· 记一次.NET内存居高不下排查解决与启示
· 探究高空视频全景AR技术的实现原理
· 理解Rust引用及其生命周期标识(上)
· 浏览器原生「磁吸」效果!Anchor Positioning 锚点定位神器解析
· 没有源码,如何修改代码逻辑?
阅读排行:
· 全程不用写代码,我用AI程序员写了一个飞机大战
· MongoDB 8.0这个新功能碉堡了,比商业数据库还牛
· 记一次.NET内存居高不下排查解决与启示
· DeepSeek 开源周回顾「GitHub 热点速览」
· 白话解读 Dapr 1.15:你的「微服务管家」又秀新绝活了
点击右上角即可分享
微信分享提示