Java + POI导出富文本的内容到word文档
一、需求:
当创建使用富文本编辑器,操作完的数据,传输到后台都是带有html标签的。
如:<h1>标题头</h1><h2>第二个标题</h2><a href="www.baidu.com">百度搜索</a>我们想把富文本数据转换为Word内容。
二,依赖
<!-- jsoup依赖 主要是解析图片标签,然后缩放图片大小--> <dependency> <groupId>org.jsoup</groupId> <artifactId>jsoup</artifactId> <version>1.12.1</version> </dependency> <!-- poi依赖--> <dependency> <groupId>org.apache.poi</groupId> <artifactId>poi</artifactId> <version>4.1.0</version> </dependency> <dependency> <groupId>org.apache.poi</groupId> <artifactId>poi-ooxml</artifactId> <version>4.1.0</version> </dependency>
三、解决方案
Word是完全支持html标签的,但是我们获取到的富文本内容并不是完整的html代码,所有我们需要先补全html标签,然后转码,然后输出。
1,接口类
package com.zl.exportword; import org.apache.commons.lang3.StringUtils; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; import org.springframework.web.bind.annotation.RequestMapping; import org.springframework.web.bind.annotation.RestController; import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletResponse; import java.awt.image.BufferedImage; import java.io.BufferedInputStream; import java.io.File; import java.io.FileInputStream; import java.util.ArrayList; import java.util.List; /** * @author lei * @version 1.0 * @date 2022/11/14 10:25 */ @RestController @RequestMapping("/export") public class ExportController { @RequestMapping(value = "/exportWord") public void export(HttpServletRequest request, HttpServletResponse response) throws Exception { try { String tmpContent = "<h1>如何将富文本内容导出到word文档</h1><p style=\"color:red;font-size:20px;\">采用poi将富文本内容导出到word文档</p><div style=\"background-color:green;\">这是有背景颜色的div内容</div>\n" + "<img src=\"这里写base64后的图片编码\">这是base64编码后的图片"; // 获取img图片标签 // 1.Jsoup解析html Document document = Jsoup.parse(tmpContent); // 获取所有img图片标签 Elements img = document.getElementsByTag("img"); int index = 0; List<String> imgBase64List = new ArrayList<>(); for (Element element : img) { imgBase64List.add(element.attr("src")); // 处理特殊符号 String attrData = element.attr("src"); // base64编码后可能包含 + 特殊字符,所以需要转义 attrData = attrData.replaceAll("\\+", "\\\\+"); tmpContent = tmpContent.replaceAll(attrData, "{{image_src" + index + "}}"); index++; } // 缩放图片大小,然后重新base64编码后替换到富文本内容里面导出word index = 0; String prefix = "data:image/png;base64,"; // base64编码前缀 for (String base64 : imgBase64List) { if (StringUtils.isNotBlank(base64)) { // 缩小图片 base64 = base64.replaceAll(prefix, ""); BufferedImage bufferedImage = ImageUtils.bytesToBufferedImage(ImageUtils.base64ToByte(base64)); if (bufferedImage == null) { tmpContent = tmpContent.replaceAll("\\{\\{image_src" + index + "}}", ""); } else { int height = bufferedImage.getHeight(); int width = bufferedImage.getWidth(); // 如果图片宽度大于650,图片缩放 //System.out.println("----"+width+"-----"+height); if (width > 650) { //高度等比缩放 height = (int)(height*650.0/width); BufferedImage imgZoom = ImageUtils.resizeImage(bufferedImage, 700, height); String imageToBase64 = ImageUtils.imageToBase64(ImageUtils.imageToBytes(imgZoom)); tmpContent = tmpContent.replaceAll("\\{\\{image_src" + index + "}}", prefix + imageToBase64); } else { tmpContent = tmpContent.replaceAll("\\{\\{image_src" + index + "}}", prefix + base64); } } } else { tmpContent = tmpContent.replaceAll("\\{\\{image_src" + index + "}}", ""); } index++; } // 执行导出操作 WordUtil.exportHtmlToWord(request, response, tmpContent, "富文本内容导出word"); } catch (Exception e) { e.printStackTrace(); } } /** * 倒入本地测试 * @throws Exception */ @RequestMapping(value = "/export") public void export() throws Exception { try { StringBuilder sb = new StringBuilder("<h1>如何将富文本内容导出到word文档</h1><p style=\"color:red;font-size:20px;\">采用poi将富文本内容导出到word文档</p><div style=\"background-color:green;\">这是有背景颜色的div内容</div>\n" + "<img src=\"").append("");
//提前转换过base64编码的图片,由于编码文本太长,我提前存储到txt中,再通过io流读取出来 File file = new File("/Users/lei/base.txt"); FileInputStream fileInputStream = new FileInputStream(file); BufferedInputStream bis = new BufferedInputStream(fileInputStream); byte[] bytes = new byte[1024]; int len = -1; while ((len=bis.read(bytes))!=-1){ sb.append(new String(bytes, 0, len)); } sb.append("\">这是base64编码后的图片"); String tmpContent = sb.toString(); // 获取img图片标签 // 1.Jsoup解析html Document document = Jsoup.parse(tmpContent); // 获取所有img图片标签 Elements imgs = document.getElementsByTag("img"); int index = 0; List<String> imgBase64List = new ArrayList<>(); for (Element element : imgs) { imgBase64List.add(element.attr("src")); // 处理特殊符号 String attrData = element.attr("src"); // base64编码后可能包含 + 特殊字符,所以需要转义 attrData = attrData.replaceAll("\\+", "\\\\+"); tmpContent = tmpContent.replaceAll(attrData, "{{image_src" + index + "}}"); index++; } // 缩放图片大小,然后重新base64编码后替换到富文本内容里面导出word index = 0; String prefix = "data:image/png;base64,"; // base64编码前缀 for (String base64 : imgBase64List) { if (StringUtils.isNotBlank(base64)) { // 缩小图片 base64 = base64.replaceAll(prefix, ""); BufferedImage bufferedImage = ImageUtils.bytesToBufferedImage(ImageUtils.base64ToByte(base64)); if (bufferedImage == null) { tmpContent = tmpContent.replaceAll("\\{\\{image_src" + index + "}}", ""); } else { int height = bufferedImage.getHeight(); int width = bufferedImage.getWidth(); // 如果图片宽度大于650,图片缩放 System.out.println("----"+width+"-----"+height); if (width > 650) { //高度等比缩放 height = (int)(height*650.0/width); BufferedImage imgZoom = ImageUtils.resizeImage(bufferedImage, 650, height); String imageToBase64 = ImageUtils.imageToBase64(ImageUtils.imageToBytes(imgZoom)); tmpContent = tmpContent.replaceAll("\\{\\{image_src" + index + "}}", prefix + imageToBase64); } else { tmpContent = tmpContent.replaceAll("\\{\\{image_src" + index + "}}", prefix + base64); } } } else { tmpContent = tmpContent.replaceAll("\\{\\{image_src" + index + "}}", ""); } index++; } // 执行导出操作 WordUtil.exportHtmlToWord("/Users/lei/", tmpContent, "富文本内容导出word.docx"); } catch (Exception e) { e.printStackTrace(); } } }
2,工具类
package com.zl.exportword; import org.apache.poi.poifs.filesystem.DirectoryEntry; import org.apache.poi.poifs.filesystem.DocumentEntry; import org.apache.poi.poifs.filesystem.POIFSFileSystem; import javax.servlet.ServletOutputStream; import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletResponse; import java.io.ByteArrayInputStream; import java.io.File; import java.io.FileOutputStream; /** * poi操作word工具类 * @author lei * @version 1.0 * @date 2022/11/14 10:23 */ public class WordUtil { /** * 导出富文本内容到word * @param request * @param response * @param content 输出内容 * @param fileName 导出文件名称 * @throws Exception */ public static void exportHtmlToWord(HttpServletRequest request, HttpServletResponse response, String content, String fileName) throws Exception { // 拼接html格式内容 StringBuffer sbf = new StringBuffer(); // 这里拼接一下html标签,便于word文档能够识别 sbf.append("<html " + "xmlns:v=\"urn:schemas-microsoft-com:vml\" xmlns:o=\"urn:schemas-microsoft-com:office:office\" xmlns:w=\"urn:schemas-microsoft-com:office:word\" xmlns:m=\"http://schemas.microsoft.com/office/2004/12/omml\" xmlns=\"http://www.w3.org/TR/REC-html40\"" + //将版式从web版式改成页面试图 ">"); sbf.append("<head>" + "<!--[if gte mso 9]><xml><w:WordDocument><w:View>Print</w:View><w:TrackMoves>false</w:TrackMoves><w:TrackFormatting/><w:ValidateAgainstSchemas/><w:SaveIfXMLInvalid>false</w:SaveIfXMLInvalid><w:IgnoreMixedContent>false</w:IgnoreMixedContent><w:AlwaysShowPlaceholderText>false</w:AlwaysShowPlaceholderText><w:DoNotPromoteQF/><w:LidThemeOther>EN-US</w:LidThemeOther><w:LidThemeAsian>ZH-CN</w:LidThemeAsian><w:LidThemeComplexScript>X-NONE</w:LidThemeComplexScript><w:Compatibility><w:BreakWrappedTables/><w:SnapToGridInCell/><w:WrapTextWithPunct/><w:UseAsianBreakRules/><w:DontGrowAutofit/><w:SplitPgBreakAndParaMark/><w:DontVertAlignCellWithSp/><w:DontBreakConstrainedForcedTables/><w:DontVertAlignInTxbx/><w:Word11KerningPairs/><w:CachedColBalance/><w:UseFELayout/></w:Compatibility><w:BrowserLevel>MicrosoftInternetExplorer4</w:BrowserLevel><m:mathPr><m:mathFont m:val=\"Cambria Math\"/><m:brkBin m:val=\"before\"/><m:brkBinSub m:val=\"--\"/><m:smallFrac m:val=\"off\"/><m:dispDef/><m:lMargin m:val=\"0\"/> <m:rMargin m:val=\"0\"/><m:defJc m:val=\"centerGroup\"/><m:wrapIndent m:val=\"1440\"/><m:intLim m:val=\"subSup\"/><m:naryLim m:val=\"undOvr\"/></m:mathPr></w:WordDocument></xml><![endif]-->" + "</head>"); sbf.append("<body>"); // 富文本内容 sbf.append(content); sbf.append("</body></html>"); // 必须要设置编码,避免中文就会乱码 byte[] b = sbf.toString().getBytes("GBK"); // 将字节数组包装到流中 ByteArrayInputStream bais = new ByteArrayInputStream(b); POIFSFileSystem poifs = new POIFSFileSystem(); DirectoryEntry directory = poifs.getRoot(); // 这代码不能省略,否则导出乱码。 DocumentEntry documentEntry = directory.createDocument("WordDocument", bais); //输出文件 request.setCharacterEncoding("utf-8"); // 导出word格式 response.setContentType("application/msword"); response.addHeader("Content-Disposition", "attachment;filename=" + new String(fileName.getBytes("GB2312"),"iso8859-1") + ".doc"); ServletOutputStream ostream = response.getOutputStream(); poifs.writeFilesystem(ostream); bais.close(); ostream.close(); } /** * 富文本内容到word(本地) * @param content 输出内容 * @param fileName 导出文件名称 * @throws Exception */ public static void exportHtmlToWord(String filepath, String content, String fileName) throws Exception { // 拼接html格式内容 StringBuffer sbf = new StringBuffer(); // 这里拼接一下html标签,便于word文档能够识别 sbf.append("<html " + "xmlns:v=\"urn:schemas-microsoft-com:vml\" xmlns:o=\"urn:schemas-microsoft-com:office:office\" xmlns:w=\"urn:schemas-microsoft-com:office:word\" xmlns:m=\"http://schemas.microsoft.com/office/2004/12/omml\" xmlns=\"http://www.w3.org/TR/REC-html40\"" + //将版式从web版式改成页面试图 ">"); sbf.append("<head>" + "<!--[if gte mso 9]><xml><w:WordDocument><w:View>Print</w:View><w:TrackMoves>false</w:TrackMoves><w:TrackFormatting/><w:ValidateAgainstSchemas/><w:SaveIfXMLInvalid>false</w:SaveIfXMLInvalid><w:IgnoreMixedContent>false</w:IgnoreMixedContent><w:AlwaysShowPlaceholderText>false</w:AlwaysShowPlaceholderText><w:DoNotPromoteQF/><w:LidThemeOther>EN-US</w:LidThemeOther><w:LidThemeAsian>ZH-CN</w:LidThemeAsian><w:LidThemeComplexScript>X-NONE</w:LidThemeComplexScript><w:Compatibility><w:BreakWrappedTables/><w:SnapToGridInCell/><w:WrapTextWithPunct/><w:UseAsianBreakRules/><w:DontGrowAutofit/><w:SplitPgBreakAndParaMark/><w:DontVertAlignCellWithSp/><w:DontBreakConstrainedForcedTables/><w:DontVertAlignInTxbx/><w:Word11KerningPairs/><w:CachedColBalance/><w:UseFELayout/></w:Compatibility><w:BrowserLevel>MicrosoftInternetExplorer4</w:BrowserLevel><m:mathPr><m:mathFont m:val=\"Cambria Math\"/><m:brkBin m:val=\"before\"/><m:brkBinSub m:val=\"--\"/><m:smallFrac m:val=\"off\"/><m:dispDef/><m:lMargin m:val=\"0\"/> <m:rMargin m:val=\"0\"/><m:defJc m:val=\"centerGroup\"/><m:wrapIndent m:val=\"1440\"/><m:intLim m:val=\"subSup\"/><m:naryLim m:val=\"undOvr\"/></m:mathPr></w:WordDocument></xml><![endif]-->" + "</head>"); sbf.append("<body>"); // 富文本内容 sbf.append(content); sbf.append("</body></html>"); // 必须要设置编码,避免中文就会乱码 byte[] b = sbf.toString().getBytes("GBK"); // 将字节数组包装到流中 ByteArrayInputStream bais = new ByteArrayInputStream(b); POIFSFileSystem poifs = new POIFSFileSystem(); DirectoryEntry directory = poifs.getRoot(); // 这代码不能省略,否则导出乱码。 DocumentEntry documentEntry = directory.createDocument("WordDocument", bais); FileOutputStream out = new FileOutputStream(new File(filepath+fileName)); poifs.writeFilesystem(out); bais.close(); out.close(); } }
package com.zl.exportword; import sun.misc.BASE64Decoder; import sun.misc.BASE64Encoder; import javax.imageio.ImageIO; import java.awt.*; import java.awt.image.BufferedImage; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.IOException; /** * 图片处理工具类 * @author lei * @date 2022/11/14 10:20 * @version 1.0 */ public class ImageUtils { /** * 通过BufferedImage图片流调整图片大小 */ public static BufferedImage resizeImage(BufferedImage originalImage, int targetWidth, int targetHeight) throws IOException { Image resultingImage = originalImage.getScaledInstance(targetWidth, targetHeight, Image.SCALE_AREA_AVERAGING); BufferedImage outputImage = new BufferedImage(targetWidth, targetHeight, BufferedImage.TYPE_INT_RGB); outputImage.getGraphics().drawImage(resultingImage, 0, 0, null); return outputImage; } /** * 返回base64图片 * @param data * @return */ public static String imageToBase64(byte[] data) { BASE64Encoder encoder = new BASE64Encoder(); // 返回Base64编码过的字节数组字符串 return encoder.encode(data); } /** * base64转换成byte数组 * @param base64 * @return * @throws IOException */ public static byte[] base64ToByte(String base64) throws IOException { BASE64Decoder decoder = new BASE64Decoder(); // 返回Base64编码过的字节数组字符串 return decoder.decodeBuffer(base64); } /** * BufferedImage图片流转byte[]数组 */ public static byte[] imageToBytes(BufferedImage bImage) { ByteArrayOutputStream out = new ByteArrayOutputStream(); try { ImageIO.write(bImage, "png", out); } catch (IOException e) { e.printStackTrace(); } return out.toByteArray(); } /** * byte[]数组转BufferedImage图片流 */ public static BufferedImage bytesToBufferedImage(byte[] ImageByte) { ByteArrayInputStream in = new ByteArrayInputStream(ImageByte); BufferedImage image = null; try { image = ImageIO.read(in); } catch (IOException e) { e.printStackTrace(); } return image; } }
四,测试
注意:
<h1>如何将富文本内容导出到word文档</h1><p style="color:red;font-size:20px;">采用poi将富文本内容导出到word文档</p><div style="background-color:green;">这是有背景颜色的div内容</div> <img src="这里写base64后的图片编码">这是base64编码后的图片
首先将图片转换成base64编码,然后把编码赋值到上面的【img】标签中的【src】属性里面即可测试。
图片在线转base64编码:http://www.jsons.cn/img2base64/
package com.zl; import com.zl.exportword.ExportController; import org.junit.jupiter.api.Test; import org.springframework.boot.test.context.SpringBootTest; import javax.annotation.Resource; @SpringBootTest class JavaSeBaseApplicationTests { @Resource private ExportController exportController; @Test void contextLoads() throws Exception { exportController.export(); } }
心有所想,必有回响