利用POI抽取word中的图片并保存在文件中
利用POI抽取word中的图片并保存在文件中
poi.apache.org/hwpf/quick-guide.html
1.抽取word doc中的图片
1 package parse; 2 3 import java.io.*; 4 import java.util.*; 5 6 7 import org.apache.poi.hwpf.HWPFDocument; 8 import org.apache.poi.hwpf.model.PicturesTable; 9 import org.apache.poi.hwpf.usermodel.CharacterRun; 10 import org.apache.poi.hwpf.usermodel.Picture; 11 import org.apache.poi.hwpf.usermodel.Range; 12 13 public class ReadImgDoc { 14 15 public static void main(String[] args) throws Exception { 16 new ReadImgDoc().readPicture("E:\\上海项目测试\\文档\\模板.doc"); 17 } 18 19 private void readPicture(String path)throws Exception{ 20 FileInputStream in=new FileInputStream(new File(path)); 21 HWPFDocument doc=new HWPFDocument(in); 22 int length=doc.characterLength(); 23 PicturesTable pTable=doc.getPicturesTable(); 24 // int TitleLength=doc.getSummaryInformation().getTitle().length(); 25 26 // System.out.println(TitleLength); 27 // System.out.println(length); 28 for (int i=0;i<length;i++){ 29 Range range=new Range(i, i+1,doc); 30 31 CharacterRun cr=range.getCharacterRun(0); 32 if(pTable.hasPicture(cr)){ 33 Picture pic=pTable.extractPicture(cr, false); 34 String afileName=pic.suggestFullFileName(); 35 OutputStream out=new FileOutputStream(new File("E:\\上海项目测试\\docImage\\"+UUID.randomUUID()+afileName)); 36 pic.writeImageContent(out); 37 38 } 39 } 40 41 } 42 43 }
2.抽取 word docx文件中的图片
1 package parse; 2 3 import java.io.File; 4 import java.io.FileInputStream; 5 import java.io.FileOutputStream; 6 import java.io.IOException; 7 import java.util.List; 8 9 import org.apache.poi.xwpf.extractor.XWPFWordExtractor; 10 import org.apache.poi.xwpf.usermodel.XWPFDocument; 11 import org.apache.poi.xwpf.usermodel.XWPFPictureData; 12 13 public class GetPicsDocx { 14 public static void main(String[] args) { 15 String path ="E:\\上海项目测试\\文档\\35.docx"; 16 File file = new File(path); 17 try { 18 FileInputStream fis = new FileInputStream(file); 19 XWPFDocument document = new XWPFDocument(fis); 20 XWPFWordExtractor xwpfWordExtractor = new XWPFWordExtractor(document); 21 String text = xwpfWordExtractor.getText(); 22 System.out.println(text); 23 List<XWPFPictureData> picList = document.getAllPictures(); 24 for (XWPFPictureData pic : picList) { 25 System.out.println(pic.getPictureType() + file.separator + pic.suggestFileExtension() 26 +file.separator+pic.getFileName()); 27 byte[] bytev = pic.getData(); 28 FileOutputStream fos = new FileOutputStream("E:\\上海项目测试\\docxImage\\"+pic.getFileName()); 29 fos.write(bytev); 30 } 31 fis.close(); 32 } catch (IOException e) { 33 e.printStackTrace(); 34 } 35 } 36 }
版权申明:欢迎转载,但请注明出处
一些博文中有一些参考内容因时间久远找不到来源了没有注明,如果侵权请联系我删除。
在校每年国奖、每年专业第一,加拿大留学,先后工作于华东师范大学和香港教育大学。
2024-10-30:27岁,宅加太忙,特此在网上找女朋友,坐标上海,非诚勿扰,vx:fan404006308
AI交流资料群:753014672