word文档操作-doc转docx、合并多个docx
前言:
临时来了一条新的需求:多个doc文档进行合并。
在网上苦苦搜罗了很久才找到可用的文件(原文出处到不到了 所以暂时不能加链接地址了),现在记录下留给有需要的人。
一:doc转docx
所需jar包:链接: https://pan.baidu.com/s/1WQ33HDsON8lpFQKgLu8pCQ 提取码: n1xt
具体代码
public class Doc2Docx { public static void main(String[] args) { String docFile = "D:/files/dfa3cbb9-a0a0-497a-aa9f-d26cbee9a25b_linux.doc"; String docxFile = "D:/print/linux.docx"; doc2docx(docFile, docxFile); } /** * doc转docx * @param docFile 源文件 * @param docxFile 目标文件 */ public static void doc2docx(String docFile, String docxFile) { Document doc; try { String tempFile = docxFile.substring(0, docxFile.lastIndexOf(".")) + "_temp" + docxFile.substring(docxFile.lastIndexOf("."), docxFile.length()); doc = new Document(docFile); doc.save(tempFile); Map<String, String> map = new HashMap<String, String>(); map.put("Evaluation Only. Created with Aspose.Words. Copyright 2003-2018 Aspose Pty Ltd.", ""); DocxReplace.replaceAndGenerateWord(tempFile, docxFile, map); // forceDelete(new File(tempFile)); } catch (Exception e1) { log.error("doc 2 docx exception:{}",e1.getMessage()); } } }
public class DocxReplace { // 返回Docx中需要替换的特殊字符,没有重复项 // 推荐传入正则表达式参数"\\$\\{[^{}]+\\}" public ArrayList<String> getReplaceElementsInWord(String filePath, String regex) { String[] p = filePath.split("\\."); if (p.length > 0) {// 判断文件有无扩展名 // 比较文件扩展名 if (p[p.length - 1].equalsIgnoreCase("doc")) { ArrayList<String> al = new ArrayList<>(); File file = new File(filePath); HWPFDocument document = null; try { InputStream is = new FileInputStream(file); document = new HWPFDocument(is); } catch (FileNotFoundException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } Range range = document.getRange(); String rangeText = range.text(); CharSequence cs = rangeText.subSequence(0, rangeText.length()); Pattern pattern = Pattern.compile(regex); Matcher matcher = pattern.matcher(cs); int startPosition = 0; while (matcher.find(startPosition)) { if (!al.contains(matcher.group())) { al.add(matcher.group()); } startPosition = matcher.end(); } return al; } else if (p[p.length - 1].equalsIgnoreCase("docx")) { ArrayList<String> al = new ArrayList<>(); XWPFDocument document = null; try { document = new XWPFDocument( POIXMLDocument.openPackage(filePath)); } catch (IOException e) { e.printStackTrace(); } // 遍历段落 Iterator<XWPFParagraph> itPara = document .getParagraphsIterator(); while (itPara.hasNext()) { XWPFParagraph paragraph = (XWPFParagraph) itPara.next(); String paragraphString = paragraph.getText(); CharSequence cs = paragraphString.subSequence(0, paragraphString.length()); Pattern pattern = Pattern.compile(regex); Matcher matcher = pattern.matcher(cs); int startPosition = 0; while (matcher.find(startPosition)) { if (!al.contains(matcher.group())) { al.add(matcher.group()); } startPosition = matcher.end(); } } // 遍历表 Iterator<XWPFTable> itTable = document.getTablesIterator(); while (itTable.hasNext()) { XWPFTable table = (XWPFTable) itTable.next(); int rcount = table.getNumberOfRows(); for (int i = 0; i < rcount; i++) { XWPFTableRow row = table.getRow(i); List<XWPFTableCell> cells = row.getTableCells(); for (XWPFTableCell cell : cells) { String cellText = ""; cellText = cell.getText(); CharSequence cs = cellText.subSequence(0, cellText.length()); Pattern pattern = Pattern.compile(regex); Matcher matcher = pattern.matcher(cs); int startPosition = 0; while (matcher.find(startPosition)) { if (!al.contains(matcher.group())) { al.add(matcher.group()); } startPosition = matcher.end(); } } } } return al; } else { return null; } } else { return null; } } // 替换word中需要替换的特殊字符 public static boolean replaceAndGenerateWord(String srcPath, String destPath, Map<String, String> map) { String[] sp = srcPath.split("\\."); String[] dp = destPath.split("\\."); if ((sp.length > 0) && (dp.length > 0)) {// 判断文件有无扩展名 // 比较文件扩展名 if (sp[sp.length - 1].equalsIgnoreCase("docx")) { try { XWPFDocument document = new XWPFDocument(POIXMLDocument.openPackage(srcPath)); // 替换段落中的指定文字 Iterator<XWPFParagraph> itPara = document.getParagraphsIterator(); while (itPara.hasNext()) { XWPFParagraph paragraph = (XWPFParagraph) itPara.next(); List<XWPFRun> runs = paragraph.getRuns(); for (int i = 0; i < runs.size(); i++) { String oneparaString = runs.get(i).getText(runs.get(i).getTextPosition()); for (Entry<String, String> entry : map.entrySet()) { if(oneparaString.indexOf(entry.getKey())!=-1){ oneparaString = oneparaString.replace(entry.getKey(), entry.getValue()); runs.get(i).setText(oneparaString, 0); } } } } // 替换表格中的指定文字 Iterator<XWPFTable> itTable = document.getTablesIterator(); while (itTable.hasNext()) { XWPFTable table = (XWPFTable) itTable.next(); int rcount = table.getNumberOfRows(); for (int i = 0; i < rcount; i++) { XWPFTableRow row = table.getRow(i); List<XWPFTableCell> cells = row.getTableCells(); for (XWPFTableCell cell : cells) { String cellTextString = cell.getText(); for (Entry<String, String> e : map.entrySet()) { if (cellTextString.contains(e.getKey())) { cellTextString = cellTextString.replace(e.getKey(),e.getValue()); cell.removeParagraph(0); cell.setText(cellTextString); cell.setColor(cell.getColor()); } } } } } FileOutputStream outStream = null; outStream = new FileOutputStream(destPath); document.write(outStream); outStream.close(); return true; } catch (Exception e) { e.printStackTrace(); return false; } } else // doc只能生成doc,如果生成docx会出错 if ((sp[sp.length - 1].equalsIgnoreCase("doc")) && (dp[dp.length - 1].equalsIgnoreCase("doc"))) { HWPFDocument document = null; try { document = new HWPFDocument(new FileInputStream(srcPath)); Range range = document.getRange(); for (Entry<String, String> entry : map.entrySet()) { range.replaceText(entry.getKey(), entry.getValue()); } FileOutputStream outStream = null; outStream = new FileOutputStream(destPath); document.write(outStream); outStream.close(); return true; } catch (FileNotFoundException e) { e.printStackTrace(); return false; } catch (IOException e) { e.printStackTrace(); return false; } } else { return false; } } else { return false; } } }
二:合并多个docx
public class WordMergeUtil { public static void main (String[] args) throws Exception { File newFile = new File("D:\\wdpj\\t.docx"); List<File> srcfile = new ArrayList<>(); // File file1 = new File("D:\\wdpj\\函.docx"); // File file2 = new File("D:\\wdpj\\1.docx"); File file1 = new File("D:\\wdpj\\2.docx"); File file2 = new File("D:\\wdpj\\3.docx"); File file3 = new File("D:\\wdpj\\函.docx"); // srcfile.add(file2); // srcfile.add(file1); srcfile.add(file1); srcfile.add(file2); srcfile.add(file3); try { OutputStream dest = new FileOutputStream(newFile); ArrayList<XWPFDocument> documentList = new ArrayList<>(); XWPFDocument doc = null; for (int i = 0; i < srcfile.size(); i++) { FileInputStream in = new FileInputStream(srcfile.get(i).getPath()); OPCPackage open = OPCPackage.open(in); XWPFDocument document = new XWPFDocument(open); documentList.add(document); } for (int i = 0; i < documentList.size(); i++) { doc = documentList.get(0); if(i != 0){ documentList.get(i).createParagraph().setPageBreak(true); appendBody(doc,documentList.get(i)); } } // doc.createParagraph().setPageBreak(true); doc.write(dest); } catch (Exception e) { e.printStackTrace(); } } public static void appendBody(XWPFDocument src, XWPFDocument append) throws Exception { XWPFParagraph p = src.createParagraph(); //设置分页符 p.setPageBreak(true); CTBody src1Body = src.getDocument().getBody(); CTBody src2Body = append.getDocument().getBody(); List<XWPFPictureData> allPictures = append.getAllPictures(); // 记录图片合并前及合并后的ID Map<String,String> map = new HashMap(); for (XWPFPictureData picture : allPictures) { String before = append.getRelationId(picture); //将原文档中的图片加入到目标文档中 String after = src.addPictureData(picture.getData(), Document.PICTURE_TYPE_PNG); map.put(before, after); } appendBody(src1Body, src2Body,map); } private static void appendBody(CTBody src, CTBody append,Map<String,String> map) throws Exception { XmlOptions optionsOuter = new XmlOptions(); optionsOuter.setSaveOuter(); String appendString = append.xmlText(optionsOuter); String srcString = src.xmlText(); String prefix = srcString.substring(0,srcString.indexOf(">")+1); String mainPart = srcString.substring(srcString.indexOf(">")+1,srcString.lastIndexOf("<")); String sufix = srcString.substring( srcString.lastIndexOf("<") ); String addPart = appendString.substring(appendString.indexOf(">") + 1, appendString.lastIndexOf("<")); if (map != null && !map.isEmpty()) { //对xml字符串中图片ID进行替换 for (Map.Entry<String, String> set : map.entrySet()) { addPart = addPart.replace(set.getKey(), set.getValue()); } } //将两个文档的xml内容进行拼接 CTBody makeBody = CTBody.Factory.parse(prefix+mainPart+addPart+sufix); src.set(makeBody); }
作者:
不二尘
本文版权归作者和博客园共有,欢迎转载,但未经作者同意必须保留此段声明,且在文章页面明显位置给出原文连接,否则保留追究法律责任的权利。