poi完美word转html(表格、图片、样式)

直入正题,需求为页面预览word文档,用的是poi3.8,以下代码支持表格、图片,不支持分页,只支持doc,不支持docx; 
Java代码  收藏代码
  1. /** 
  2.  *  
  3.  */  
  4.   
  5.   
  6. import java.io.BufferedWriter;  
  7. import java.io.File;  
  8. import java.io.FileInputStream;  
  9. import java.io.FileNotFoundException;  
  10. import java.io.FileOutputStream;  
  11. import java.io.IOException;  
  12. import java.io.OutputStream;  
  13. import java.io.OutputStreamWriter;  
  14. import java.util.List;  
  15.   
  16. import javax.xml.parsers.DocumentBuilderFactory;  
  17. import javax.xml.parsers.ParserConfigurationException;  
  18. import javax.xml.transform.OutputKeys;  
  19. import javax.xml.transform.Transformer;  
  20. import javax.xml.transform.TransformerException;  
  21. import javax.xml.transform.TransformerFactory;  
  22. import javax.xml.transform.dom.DOMSource;  
  23. import javax.xml.transform.stream.StreamResult;  
  24.   
  25. import org.apache.commons.io.output.ByteArrayOutputStream;  
  26. import org.apache.poi.hwpf.HWPFDocument;  
  27. import org.apache.poi.hwpf.converter.PicturesManager;  
  28. import org.apache.poi.hwpf.converter.WordToHtmlConverter;  
  29. import org.apache.poi.hwpf.model.PicturesTable;  
  30. import org.apache.poi.hwpf.usermodel.CharacterRun;  
  31. import org.apache.poi.hwpf.usermodel.Paragraph;  
  32. import org.apache.poi.hwpf.usermodel.Picture;  
  33. import org.apache.poi.hwpf.usermodel.PictureType;  
  34. import org.apache.poi.hwpf.usermodel.Range;  
  35. import org.apache.poi.hwpf.usermodel.Table;  
  36. import org.apache.poi.hwpf.usermodel.TableCell;  
  37. import org.apache.poi.hwpf.usermodel.TableIterator;  
  38. import org.apache.poi.hwpf.usermodel.TableRow;  
  39. import org.w3c.dom.Document;  
  40.   
  41. /** 
  42.  * @author: Chembo Huang 
  43.  * @since: May 3, 2012 
  44.  * @modified: May 3, 2012 
  45.  * @version: 
  46.  */  
  47. public class Word2Html {  
  48.   
  49.     public static void main(String argv[]) {  
  50.         try {  
  51.             convert2Html("D://1.doc","D://1.html");  
  52.         } catch (Exception e) {  
  53.             e.printStackTrace();  
  54.         }  
  55.     }  
  56.   
  57.     public static void writeFile(String content, String path) {  
  58.         FileOutputStream fos = null;  
  59.         BufferedWriter bw = null;  
  60.         try {  
  61.             File file = new File(path);  
  62.             fos = new FileOutputStream(file);  
  63.             bw = new BufferedWriter(new OutputStreamWriter(fos,"GB2312"));  
  64.             bw.write(content);  
  65.         } catch (FileNotFoundException fnfe) {  
  66.             fnfe.printStackTrace();  
  67.         } catch (IOException ioe) {  
  68.             ioe.printStackTrace();  
  69.         } finally {  
  70.             try {  
  71.                 if (bw != null)  
  72.                     bw.close();  
  73.                 if (fos != null)  
  74.                     fos.close();  
  75.             } catch (IOException ie) {  
  76.             }  
  77.         }  
  78.     }  
  79.   
  80.     public static void convert2Html(String fileName, String outPutFile)  
  81.             throws TransformerException, IOException,  
  82.             ParserConfigurationException {  
  83.         HWPFDocument wordDocument = new HWPFDocument(new FileInputStream(fileName));//WordToHtmlUtils.loadDoc(new FileInputStream(inputFile));  
  84.         WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(  
  85.                 DocumentBuilderFactory.newInstance().newDocumentBuilder()  
  86.                         .newDocument());  
  87.          wordToHtmlConverter.setPicturesManager( new PicturesManager()  
  88.          {  
  89.              public String savePicture( byte[] content,  
  90.                      PictureType pictureType, String suggestedName,  
  91.                      float widthInches, float heightInches )  
  92.              {  
  93.                  return "test/"+suggestedName;  
  94.              }  
  95.          } );  
  96.         wordToHtmlConverter.processDocument(wordDocument);  
  97.         //save pictures  
  98.         List pics=wordDocument.getPicturesTable().getAllPictures();  
  99.         if(pics!=null){  
  100.             for(int i=0;i<pics.size();i++){  
  101.                 Picture pic = (Picture)pics.get(i);  
  102.                 System.out.println();  
  103.                 try {  
  104.                     pic.writeImageContent(new FileOutputStream("D:/test/"  
  105.                             + pic.suggestFullFileName()));  
  106.                 } catch (FileNotFoundException e) {  
  107.                     e.printStackTrace();  
  108.                 }    
  109.             }  
  110.         }  
  111.         Document htmlDocument = wordToHtmlConverter.getDocument();  
  112.         ByteArrayOutputStream out = new ByteArrayOutputStream();  
  113.         DOMSource domSource = new DOMSource(htmlDocument);  
  114.         StreamResult streamResult = new StreamResult(out);  
  115.   
  116.         TransformerFactory tf = TransformerFactory.newInstance();  
  117.         Transformer serializer = tf.newTransformer();  
  118.         serializer.setOutputProperty(OutputKeys.ENCODING, "GB2312");  
  119.         serializer.setOutputProperty(OutputKeys.INDENT, "yes");  
  120.         serializer.setOutputProperty(OutputKeys.METHOD, "html");  
  121.         serializer.transform(domSource, streamResult);  
  122.         out.close();  
  123.         writeFile(new String(out.toByteArray()), outPutFile);  
  124.     }  
  125. }  




下载poi-bin-3.9-20121203.tar.gz并解压,提取查看Office文档所依赖的包。
posted on 2013-10-08 09:44  记性特差  阅读(8628)  评论(0编辑  收藏  举报