java合并多个word 2007 文档 基于docx4j
参考文章:http://dh.swzhinan.com/post/185.html
引入的jar包
1 <dependency> 2 <groupId>org.docx4j</groupId> 3 <artifactId>docx4j</artifactId> 4 <version>6.0.1</version> 5 </dependency> 6 <dependency> 7 <groupId>org.apache.commons</groupId> 8 <artifactId>commons-compress</artifactId> 9 <version>1.8.1</version> 10 </dependency> 11 <dependency> 12 <groupId>com.alibaba</groupId> 13 <artifactId>fastjson</artifactId> 14 <version>1.2.58</version> 15 </dependency> 16 17 <dependency> 18 <groupId>org.apache.poi</groupId> 19 <artifactId>poi</artifactId> 20 <version>3.10-FINAL</version> 21 </dependency> 22 <dependency> 23 <groupId>org.apache.poi</groupId> 24 <artifactId>poi-ooxml</artifactId> 25 <version>3.10-FINAL</version> 26 </dependency> 27 <dependency> 28 <groupId>org.apache.xmlbeans</groupId> 29 <artifactId>xmlbeans</artifactId> 30 <version>2.5.0</version> 31 </dependency> 32 <dependency> 33 <groupId>org.apache.xmlgraphics</groupId> 34 <artifactId>xmlgraphics-commons</artifactId> 35 <version>1.3.1</version> 36 </dependency>
代码
1 package com.htsoft.oa.action.sjrh.tool; 2 3 import java.io.File; 4 import java.io.FileInputStream; 5 import java.io.FileOutputStream; 6 import java.io.IOException; 7 import java.io.InputStream; 8 import java.io.OutputStream; 9 import java.io.RandomAccessFile; 10 import java.nio.MappedByteBuffer; 11 import java.nio.channels.FileChannel; 12 import java.nio.channels.FileChannel.MapMode; 13 import java.text.SimpleDateFormat; 14 import java.util.ArrayList; 15 import java.util.Date; 16 import java.util.Iterator; 17 import java.util.List; 18 19 import org.apache.commons.io.IOUtils; 20 import org.docx4j.dml.wordprocessingDrawing.Inline; 21 import org.docx4j.jaxb.Context; 22 import org.docx4j.openpackaging.exceptions.Docx4JException; 23 import org.docx4j.openpackaging.packages.WordprocessingMLPackage; 24 import org.docx4j.openpackaging.parts.PartName; 25 import org.docx4j.openpackaging.parts.WordprocessingML.AlternativeFormatInputPart; 26 import org.docx4j.openpackaging.parts.WordprocessingML.BinaryPartAbstractImage; 27 import org.docx4j.openpackaging.parts.WordprocessingML.MainDocumentPart; 28 import org.docx4j.relationships.Relationship; 29 import org.docx4j.wml.Br; 30 import org.docx4j.wml.CTAltChunk; 31 import org.docx4j.wml.Drawing; 32 import org.docx4j.wml.ObjectFactory; 33 import org.docx4j.wml.P; 34 import org.docx4j.wml.R; 35 import org.docx4j.wml.STBrType; 36 37 import com.alibaba.fastjson.JSONObject; 38 import com.htsoft.oa.action.sjrh.pojo.MergeResult; 39 40 public class WordMergeUtils { 41 private static ObjectFactory factory = new ObjectFactory(); 42 43 /** 44 * 合并docx 45 * 46 * @param streams 47 * 要合并的word文件的输入流 48 * @param path 49 * 合并后的文件的路径 50 * @return 51 * @throws Docx4JException 52 * @throws IOException 53 */ 54 public static File mergeDocx(final List<InputStream> streams, String path) throws Docx4JException, IOException { 55 56 WordprocessingMLPackage target = null; 57 final File generated = new File(path); 58 59 int chunkId = 0; 60 Iterator<InputStream> it = streams.iterator(); 61 while (it.hasNext()) { 62 InputStream is = it.next(); 63 if (is != null) { 64 try { 65 if (target == null) { 66 // Copy first (master) document 67 OutputStream os = new FileOutputStream(generated); 68 os.write(IOUtils.toByteArray(is)); 69 os.close(); 70 71 target = WordprocessingMLPackage.load(generated); 72 } else { 73 MainDocumentPart documentPart = target.getMainDocumentPart(); 74 75 // addPageBreak(documentPart); // 另起一页,换页 76 77 insertDocx(documentPart, IOUtils.toByteArray(is), chunkId++); 78 } 79 } catch (Exception e) { 80 e.printStackTrace(); 81 } finally { 82 is.close(); 83 } 84 } 85 } 86 87 if (target != null) { 88 target.save(generated); 89 // Docx4J.save(target, generated, Docx4J.FLAG_NONE); 90 return generated; 91 } else { 92 return null; 93 } 94 } 95 96 // 插入文档 97 private static void insertDocx(MainDocumentPart main, byte[] bytes, int chunkId) { 98 try { 99 AlternativeFormatInputPart afiPart = new AlternativeFormatInputPart( 100 new PartName("/part" + chunkId + ".docx")); 101 // afiPart.setContentType(new ContentType(CONTENT_TYPE)); 102 afiPart.setBinaryData(bytes); 103 Relationship altChunkRel = main.addTargetPart(afiPart); 104 105 CTAltChunk chunk = Context.getWmlObjectFactory().createCTAltChunk(); 106 chunk.setId(altChunkRel.getId()); 107 108 main.addObject(chunk); 109 } catch (Exception e) { 110 e.printStackTrace(); 111 } 112 } 113 114 /** 115 * wordML转word,原文件不变,返回转换完成的word文件对象。 116 * 117 * @param file 118 * @return 119 * @throws Docx4JException 120 * @throws IOException 121 */ 122 public static File wordMLToWord(File file) throws Docx4JException, IOException { 123 WordprocessingMLPackage target = WordprocessingMLPackage.load(file); 124 File temp = File.createTempFile(file.getName(), ".doc"); 125 target.save(temp); 126 return temp; 127 } 128 129 /** 130 * xml转docx,原文件不变,返回转换完成的word文件对象。 131 * 132 * @param file 133 * @return 134 * @throws Docx4JException 135 * @throws IOException 136 */ 137 public static File xmlToWord(File file) throws Docx4JException, IOException { 138 WordprocessingMLPackage target = WordprocessingMLPackage.load(file); 139 File temp = File.createTempFile(file.getName(), ".doc"); 140 target.save(temp); 141 return temp; 142 } 143 144 /** 145 * 合并wordML文档 146 * 147 * @param list 148 * @param path 149 * @throws Docx4JException 150 * @throws IOException 151 */ 152 public static File mergeWordML(List<File> list, String path) throws Docx4JException, IOException { 153 final List<InputStream> streams = new ArrayList<InputStream>(); 154 for (int i = 0; i < list.size(); i++) { 155 File file = list.get(i); 156 // file = WordMLUtil.wordMLToWord(file); // wordML转word 157 streams.add(new FileInputStream(file)); 158 } 159 return WordMergeUtils.mergeDocx(streams, path); 160 } 161 162 /** 163 * 把文件转换成Byte[] Mapped File way MappedByteBuffer 可以在处理大文件时,提升性能 164 * 165 * @param filename 166 * @return 167 * @throws IOException 168 */ 169 public static byte[] fileToByteArray(String filename) throws IOException { 170 171 RandomAccessFile raf = null; 172 FileChannel fc = null; 173 try { 174 raf = new RandomAccessFile(filename, "r"); 175 fc = raf.getChannel(); 176 MappedByteBuffer byteBuffer = fc.map(MapMode.READ_ONLY, 0, fc.size()).load(); 177 System.out.println(byteBuffer.isLoaded()); 178 byte[] result = new byte[(int) fc.size()]; 179 if (byteBuffer.remaining() > 0) { 180 byteBuffer.get(result, 0, byteBuffer.remaining()); 181 } 182 return result; 183 } catch (IOException e) { 184 e.printStackTrace(); 185 throw e; 186 } finally { 187 try { 188 fc.close(); 189 raf.close(); 190 } catch (IOException e) { 191 e.printStackTrace(); 192 } 193 } 194 } 195 196 /** 197 * Docx4j拥有一个由字节数组创建图片部件的工具方法, 随后将其添加到给定的包中. 为了能将图片添加 到一个段落中, 198 * 我们需要将图片转换成内联对象. 这也有一个方法, 方法需要文件名提示, 替换文本, 两个id标识符和一个是嵌入还是链接到的指示作为参数. 199 * 一个id用于文档中绘图对象不可见的属性, 另一个id用于图片本身不可见的绘制属性. 最后我们将内联 对象添加到段落中并将段落添加到包的主文档部件. 200 * 201 * @param word 202 * 需要编辑的文件 203 * @param imageList 204 * 图片对象集合( 图片对象属性: url 图片文件路径 keyword 文档中的图片占位符 name 图片文件名 ) 205 * @throws Exception 206 * 不幸的createImageInline方法抛出一个异常(没有更多具体的异常类型) 207 */ 208 public static void addImageToPackage(File word, List<JSONObject> imageList) throws Exception { 209 210 WordprocessingMLPackage wordMLPackage = WordprocessingMLPackage.load(word); 211 212 for (int i = 0; i < imageList.size(); i++) { 213 JSONObject image = imageList.get(i); 214 215 byte[] bytes = fileToByteArray(image.getString("url")); 216 217 BinaryPartAbstractImage imagePart = BinaryPartAbstractImage.createImagePart(wordMLPackage, bytes); 218 219 int docPrId = 1; 220 int cNvPrId = 2; 221 Inline inline = imagePart.createImageInline(image.getString("name"), image.getString("keyword"), docPrId, 222 cNvPrId, false); 223 224 P paragraph = addInlineImageToParagraph(inline); 225 226 wordMLPackage.getMainDocumentPart().addObject(paragraph); 227 } 228 229 wordMLPackage.save(word); 230 } 231 232 /** 233 * Docx4j拥有一个由字节数组创建图片部件的工具方法, 随后将其添加到给定的包中. 为了能将图片添加 到一个段落中, 234 * 我们需要将图片转换成内联对象. 这也有一个方法, 方法需要文件名提示, 替换文本, 两个id标识符和一个是嵌入还是链接到的指示作为参数. 235 * 一个id用于文档中绘图对象不可见的属性, 另一个id用于图片本身不可见的绘制属性. 最后我们将内联 对象添加到段落中并将段落添加到包的主文档部件. 236 * 237 * @param wordFilePath 238 * 文件路径 239 * @param imageList 240 * 图片对象集合( 图片对象属性: url 图片文件路径 keyword 文档中的图片占位符 name 图片文件名 ) 241 * @throws Exception 242 * 不幸的createImageInline方法抛出一个异常(没有更多具体的异常类型) 243 */ 244 public static void addImageToPackage(String wordFilePath, List<JSONObject> imageList) throws Exception { 245 addImageToPackage(new File(wordFilePath), imageList); 246 } 247 248 /** 249 * 创建一个对象工厂并用它创建一个段落和一个可运行块R. 然后将可运行块添加到段落中. 接下来创建一个图画并将其添加到可运行块R中. 最后我们将内联 250 * 对象添加到图画中并返回段落对象. 251 * 252 * @param inline 253 * 包含图片的内联对象. 254 * @return 包含图片的段落 255 */ 256 private static P addInlineImageToParagraph(Inline inline) { 257 // 添加内联对象到一个段落中 258 P paragraph = factory.createP(); 259 R run = factory.createR(); 260 paragraph.getContent().add(run); 261 Drawing drawing = factory.createDrawing(); 262 run.getContent().add(drawing); 263 drawing.getAnchorOrInline().add(inline); 264 return paragraph; 265 } 266 267 /** 268 * 文档结尾添加一个空白页 269 * 270 * @throws Docx4JException 271 */ 272 public static void addPageBreak(File word) throws Docx4JException { 273 274 WordprocessingMLPackage wordMLPackage = WordprocessingMLPackage.load(word); 275 276 MainDocumentPart documentPart = wordMLPackage.getMainDocumentPart(); 277 278 Br breakObj = new Br(); 279 breakObj.setType(STBrType.PAGE); 280 281 P paragraph = factory.createP(); 282 paragraph.getContent().add(breakObj); 283 documentPart.getJaxbElement().getBody().getContent().add(paragraph); 284 wordMLPackage.save(word); 285 } 286 287 /** 288 * 文档结尾添加一个空白页 289 * 290 * @throws Docx4JException 291 */ 292 public static void addPageBreak(MainDocumentPart documentPart) { 293 Br breakObj = new Br(); 294 breakObj.setType(STBrType.PAGE); 295 296 P paragraph = factory.createP(); 297 paragraph.getContent().add(breakObj); 298 documentPart.getJaxbElement().getBody().getContent().add(paragraph); 299 } 300 301 /** 302 * 文档结尾添加一个空白页 303 * 304 * @throws Docx4JException 305 */ 306 public static void addPageBreak(String wordFilePath) throws Docx4JException { 307 addPageBreak(new File(wordFilePath)); 308 } 309 310 /** 311 * 合并word文档 接口方法 312 * 313 * @param sourceFiles待合并文件 314 * @param mergedFileName合并后的文件名称 315 * @throws Exception 316 */ 317 public static MergeResult merge(String djxh, List<String> sourceFiles, String mergedFileName) { 318 319 if (djxh == null || djxh.isEmpty()) { 320 return new MergeResult(-1, null, "登记序号为空!", null); 321 } else if (sourceFiles == null || sourceFiles.size() <= 0) { 322 return new MergeResult(-1, null, "待合并文件路径为空!", null); 323 } 324 325 try { 326 List<File> files = new ArrayList<File>(); 327 for (String filePath : sourceFiles) { 328 File file = new File(filePath); 329 files.add(file); 330 } 331 332 // 保存基础路径 333 String path = ""; 334 if ("1".equals(WordStaticFileds.open_Fixed_path)) { 335 // 创建固定路径 336 path = WordStaticFileds.create_word_path + "word/fixed/" + djxh; 337 } else { 338 // 创建不固定路径 339 path = WordStaticFileds.create_word_path + "word/notFixed/" 340 + new SimpleDateFormat("yyyyMMdd").format(new Date()) + "/" + djxh; 341 } 342 343 if (mergedFileName == null || mergedFileName.isEmpty()) { 344 if (files.size() > 0) { 345 String oldName = files.get(0).getName(); 346 int lastIndexOf = oldName.lastIndexOf("."); 347 if (lastIndexOf > 0) { 348 mergedFileName = oldName.substring(0, lastIndexOf) + "-合并后.docx"; 349 } 350 } 351 } 352 353 File mergedfile = new File(path); 354 355 if (!mergedfile.exists()) { 356 mergedfile.mkdirs(); 357 } 358 359 String mergedFullPath = path + "/" + mergedFileName; 360 File mergeWordML = WordMergeUtils.mergeWordML(files, mergedFullPath); 361 362 363 return new MergeResult(0, mergeWordML, "合并word文件成功!", mergeWordML.getAbsolutePath()); 364 } catch (Exception e) { 365 return new MergeResult(-1, null, "合并word文件出错!错误信息:" + e.getMessage(), null); 366 } 367 368 } 369 }