word转html
1. maven依赖
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
<version>4.1.2</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-scratchpad</artifactId>
<version>4.1.2</version>
</dependency>
<dependency>
<groupId>fr.opensagres.xdocreport</groupId>
<artifactId>fr.opensagres.poi.xwpf.converter.xhtml</artifactId>
<version>2.0.2</version>
</dependency>
2. 实例
package com.baidu.cms.utils;
import cn.hutool.core.img.ImgUtil;
import fr.opensagres.poi.xwpf.converter.xhtml.Base64EmbedImgManager;
import fr.opensagres.poi.xwpf.converter.xhtml.XHTMLConverter;
import fr.opensagres.poi.xwpf.converter.xhtml.XHTMLOptions;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.converter.WordToHtmlConverter;
import org.apache.poi.hwpf.extractor.WordExtractor;
import org.apache.poi.openxml4j.util.ZipSecureFile;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.apache.poi.xwpf.extractor.XWPFWordExtractor;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.w3c.dom.Document;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import java.awt.image.BufferedImage;
import java.io.*;
import java.util.List;
public class WordUtils {
private final static Logger log = LoggerFactory.getLogger(WordUtils.class);
public static boolean word2Html(String sourcePath, String outPath) {
boolean flag = false;
try {
File file = new File(sourcePath);
if (!file.exists()) {
return flag;
}
String fName = file.getName();
String suffix = fName.substring(fName.lastIndexOf(".") + 1).toLowerCase();
if (suffix.endsWith("doc")) {
flag = docToHtml(sourcePath, outPath);
} else if (suffix.endsWith("docx")) {
flag = docxToHtml(sourcePath, outPath);
}
boolean editFlag = editHtml(outPath);
log.info("word2html({}->{}):parser({});edit({})", sourcePath, outPath, flag, editFlag);
} catch (Exception e) {
e.printStackTrace();
log.error("word2htmlError({}->{}):{}", sourcePath, outPath, String.valueOf(e));
}
return flag;
}
public static boolean docToHtml(String wordPath, String htmlPath) {
boolean flag = false;
try {
File htmlFile = new File(htmlPath);
InputStream input = new FileInputStream(new File(wordPath));
HWPFDocument wordDocument = new HWPFDocument(input);
WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(
DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument());
wordToHtmlConverter.setPicturesManager((content, pictureType, suggestedName, widthInches, heightInches) -> {
BufferedImage bufferedImage = ImgUtil.toImage(content);
String base64Img = ImgUtil.toBase64(bufferedImage, pictureType.getExtension());
StringBuilder sb = (new StringBuilder(base64Img.length() + "data:;base64,".length()).append("data:;base64,").append(base64Img));
return sb.toString();
});
wordToHtmlConverter.processDocument(wordDocument);
Document htmlDocument = wordToHtmlConverter.getDocument();
File folder = htmlFile.getParentFile();
if (!folder.exists()) {
folder.mkdirs();
}
OutputStream outStream = new FileOutputStream(htmlFile);
DOMSource domSource = new DOMSource(htmlDocument);
StreamResult streamResult = new StreamResult(outStream);
TransformerFactory factory = TransformerFactory.newInstance();
Transformer serializer = factory.newTransformer();
serializer.setOutputProperty(OutputKeys.ENCODING, "utf-8");
serializer.setOutputProperty(OutputKeys.INDENT, "yes");
serializer.setOutputProperty(OutputKeys.METHOD, "html");
serializer.transform(domSource, streamResult);
outStream.close();
flag = true;
} catch (Exception e) {
e.printStackTrace();
log.error("Doc解析异常({}->{}):{}", wordPath, htmlPath, String.valueOf(e));
}
return flag;
}
public static boolean docxToHtml(String wordPath, String htmlPath) {
boolean flag = false;
try {
ZipSecureFile.setMinInflateRatio(-1.0d);
File htmlFile = new File(htmlPath);
File parentFile = htmlFile.getParentFile();
if (!parentFile.exists()) {
parentFile.mkdirs();
}
String imagePath = parentFile.getPath() + "image" + File.separator;
File wordFile = new File(wordPath);
InputStream in = new FileInputStream(wordFile);
XWPFDocument document = new XWPFDocument(in);
File imgFolder = new File(imagePath);
XHTMLOptions options = XHTMLOptions.create().indent(4).setImageManager(new Base64EmbedImgManager());
OutputStream out = new FileOutputStream(htmlFile);
XHTMLConverter.getInstance().convert(document, out, options);
flag = true;
} catch (Exception e) {
e.printStackTrace();
log.error("Docx解析异常({}->{}):{}", wordPath, htmlPath, String.valueOf(e));
}
return flag;
}
public static boolean editHtml(String htmlPath) {
boolean flag = false;
BufferedReader br = null;
BufferedWriter bw = null;
try{
br = new BufferedReader(new FileReader(htmlPath));
String line;
StringBuilder cb = new StringBuilder();
while ((line=br.readLine()) != null){
cb.append(line);
}
br.close();
String content = cb.toString();
int i = content.indexOf("</head>");
String newContent = new StringBuilder(content).insert(i, "<meta http-equiv='Content-Type' content='text/html;charset=utf-8'/>").toString();
bw = new BufferedWriter(new FileWriter(htmlPath));
bw.write(newContent);
bw.close();
flag = true;
}catch (Exception e){
e.printStackTrace();
try {
if (br != null){
br.close();
}
if (bw != null){
bw.close();
}
}catch (IOException ex){
ex.printStackTrace();
}
}
return flag;
}
public static void main(String[] args) {
word2Html("G:\\test\\download\\test.docx", "G:\\test\\download\\1.html");
word2Html("G:\\test\\download\\test.doc", "G:\\test\\download\\2.html");
}
}


【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· 全程不用写代码,我用AI程序员写了一个飞机大战
· DeepSeek 开源周回顾「GitHub 热点速览」
· 记一次.NET内存居高不下排查解决与启示
· 物流快递公司核心技术能力-地址解析分单基础技术分享
· .NET 10首个预览版发布:重大改进与新特性概览!