java提取docx文件中的图片
使用的maven为:
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi</artifactId>
<version>3.17</version>
</dependency>
1. 获取docx文件中的所有图片
// 获取word中的所有图片 public static void readImageInParagraph() throws IOException { XWPFDocument document = new XWPFDocument(new FileInputStream("C:\\Users\\Administrator\\Desktop\\JP6-0-2006.docx")); // 用XWPFDocument的getAllPictures来获取所有的图片 List<XWPFPictureData> picList = document.getAllPictures(); for (XWPFPictureData pic : picList) { System.out.println(pic.getPictureType() + pic.getFileName()); byte[] bytev = pic.getData(); // System.out.println(bytev.length); // 大于1000bites的图片我们才弄下来,消除word中莫名的小图片的影响 if (bytev.length > 300) { FileOutputStream fos = new FileOutputStream("C:\\Users\\Administrator\\Desktop\\cc\\" + pic.getFileName()); fos.write(bytev); } } }
2. 根据docx的内容,获取每段内容对应的图片
public static List<String> readImageInParagraphTest() throws IOException { //XWPFParagraph paragraph
InputStream is = new FileInputStream("C:\\Users\\Administrator\\Desktop\\JP6-0-2006.docx");
XWPFDocument doc = new XWPFDocument(is);
List<XWPFParagraph> paras = doc.getParagraphs(); //将得到包含段落列表
List<String> picArr = new ArrayList<>();
for (XWPFParagraph paragraph : paras) {
List<XWPFRun> runs = paragraph.getRuns();
for (XWPFRun run : runs) {
List<XWPFPicture> pictures = run.getEmbeddedPictures();
// System.out.println(pictures);
if (!pictures.isEmpty()) {
for (XWPFPicture picture : pictures) {
// 将图片下载到本地,
// XWPFPicture picture = pictures.get(0);
XWPFPictureData pictureData = picture.getPictureData();
// System.out.println(pictureData.getPictureType());
byte[] bytev = pictureData.getData();
// 大于1000bites的图片我们才弄下来,消除word中莫名的小图片的影响
if (bytev.length > 20) {
// String fileName = "../../../../resources/static/picture/" + pictureData.getFileName();
String fileName = "D:\\programming\\fileex\\src\\main\\resources\\static\\picture\\" + pictureData.getFileName();
FileOutputStream fos = new FileOutputStream(fileName);
fos.write(bytev);
String pic_string = "/picture/" + pictureData.getFileName();
picArr.add(pic_string);
System.out.println(pic_string); //获取指定大小的图片
}
}
}
}
}
return picArr;
}