java中使用apache poi 读取 doc,docx,ppt,pptx,xls,xlsx,txt,csv格式的文件示例代码

java使用apache poi 读取 doc,docx,ppt,pptx,xls,xlsx,txt,csv格式的文件示例代码

1、maven依赖添加

在 pom 文件中添加如下依赖

<dependency>
    <groupId>org.apache.poi</groupId>
    <artifactId>poi</artifactId>
    <version>4.1.0</version>
</dependency>
<dependency>
    <groupId>org.apache.poi</groupId>
    <artifactId>poi-ooxml</artifactId>
    <version>4.1.0</version>
</dependency>
<dependency>
    <groupId>org.apache.poi</groupId>
    <artifactId>poi-ooxml-schemas</artifactId>
    <version>4.1.0</version>
</dependency>
<dependency>
    <groupId>org.apache.poi</groupId>
    <artifactId>poi-scratchpad</artifactId>
    <version>4.1.0</version>
</dependency>
<dependency>
    <groupId>org.apache.poi</groupId>
    <artifactId>ooxml-schemas</artifactId>
    <version>1.4</version>
</dependency>

2、文件读取代码示例

doc 格式文件

// --------- doc -----------
File file = new File("E:\\search-file\\22.doc");
FileInputStream fis = null;
HWPFDocument document = null;
WordExtractor extractor = null;
try {
    fis = new FileInputStream(file);
    document = new HWPFDocument(fis);
    extractor = new WordExtractor(document);
    log.info("extractor.getText:{}", extractor.getText());
} catch (Exception e) {
    e.printStackTrace();
}

docx 格式文件

// --------- docx -----------
File file = new File("E:\\search-file\\11.docx");
FileInputStream fis = null;
XWPFDocument document = null;
XWPFWordExtractor extractor = null;
try {
    fis = new FileInputStream(file);
    document = new XWPFDocument(fis);
    extractor = new XWPFWordExtractor(document);
    log.info("extractor.getText:{}", extractor.getText());
} catch (Exception e) {
    e.printStackTrace();
}

pptx 格式文件

// --------- pptx -----------
File file = new File("E:\\search-file\\33.pptx");
FileInputStream fis = null;
XMLSlideShow document = null;
SlideShowExtractor extractor = null;
try {
    fis = new FileInputStream(file);
    document = new XMLSlideShow(fis);
    extractor = new SlideShowExtractor(document);
    log.info("extractor.getText:{}", extractor.getText());
} catch (Exception e) {
    e.printStackTrace();
}

ppt 格式文件

// --------- ppt -----------
File file = new File("E:\\search-file\\44.ppt");
FileInputStream fis = null;
HSLFSlideShow document = null;
SlideShowExtractor extractor = null;
try {
    fis = new FileInputStream(file);
    document = new HSLFSlideShow(fis);
    extractor = new SlideShowExtractor(document);
    log.info("extractor.getText:{}", extractor.getText());
} catch (Exception e) {
    e.printStackTrace();
}

xlsx 格式文件

// --------- xlsx -----------

File file = new File("E:\\search-file\\55.xlsx");
FileInputStream fis = null;
XSSFWorkbook document = null;
XSSFExcelExtractor extractor = null;
try {
    fis = new FileInputStream(file);
    document = new XSSFWorkbook(fis);
    extractor = new XSSFExcelExtractor(document);
    log.info("extractor.getText:{}", extractor.getText());
} catch (Exception e) {
    e.printStackTrace();
}

xls 格式文件

// --------- xls -----------
File file = new File("E:\\search-file\\66.xls");
FileInputStream fis = null;
HSSFWorkbook document = null;
ExcelExtractor extractor = null;
try {
    fis = new FileInputStream(file);
    document = new HSSFWorkbook(fis);
    extractor = new ExcelExtractor(document);
    log.info("extractor.getText:{}", extractor.getText());
} catch (Exception e) {
    e.printStackTrace();
}

txt,csv 格式文件

// --------- txt,csv -----------
File file = new File("E:\\search-file\\77.txt");
StringBuffer buffer = new StringBuffer();
try (BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(file), "utf8"))){
    String line = null;
    while ((line = reader.readLine()) != null) {
        buffer.append(line).append('\n');
    }
} catch (Exception e) {
    e.printStackTrace();
}
log.info("txt-context:{}", buffer);
posted @   向宁的光  阅读(1308)  评论(0编辑  收藏  举报
相关博文:
阅读排行:
· 本地部署 DeepSeek:小白也能轻松搞定!
· 如何给本地部署的DeepSeek投喂数据,让他更懂你
· 从 Windows Forms 到微服务的经验教训
· 李飞飞的50美金比肩DeepSeek把CEO忽悠瘸了,倒霉的却是程序员
· 超详细,DeepSeek 接入PyCharm实现AI编程!(支持本地部署DeepSeek及官方Dee
点击右上角即可分享
微信分享提示