java 实现poi方式读取word文件内容
1、下载poi的jar包
下载地址:https://www.apache.org/dyn/closer.lua/poi/release/bin/poi-bin-3.17-20170915.tar.gz
下载解压后用到的jar包,这些包都能在下载的文件夹中找到
2、代码
//package com.word; import java.io.*; import java.io.File; import java.io.FileInputStream; import java.io.InputStream; import org.apache.poi.POIXMLDocument; import org.apache.poi.POIXMLTextExtractor; import org.apache.poi.hwpf.extractor.WordExtractor; import org.apache.poi.openxml4j.opc.OPCPackage; import org.apache.poi.xwpf.extractor.XWPFWordExtractor; /** * @Author:sks * @Description: * @Date:Created in 15:49 2018/1/24 * @Modified by: **/ public class poi_doc { public static void main(String[] args) { String filePath = "D:/work/Solr/ImportData/test.docx"; String content = readWord(filePath); } public static String readWord(String path) { String buffer = ""; try { if (path.endsWith(".doc")) { InputStream is = new FileInputStream(new File(path)); WordExtractor ex = new WordExtractor(is); buffer = ex.getText(); ex.close(); } else if (path.endsWith("docx")) { OPCPackage opcPackage = POIXMLDocument.openPackage(path); POIXMLTextExtractor extractor = new XWPFWordExtractor(opcPackage); buffer = extractor.getText(); extractor.close(); } else { System.out.println("此文件不是word文件!"); } } catch (Exception e) { e.printStackTrace(); } return buffer; } }