JAVA - 实现 - 利用POI读取word文档实例

package read.document;

import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.sql.Connection;
import java.util.ArrayList;
import java.util.List;

import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.usermodel.CharacterRun;
import org.apache.poi.hwpf.usermodel.Range;

import pers.mysql.DBUtil;
import pers.mysql.MysqlDao;
import pers.mysql.MysqlDaoImp;

public class WordReading {

    public static void main(String[] args) {

        String filePath = "*****.doc";

        readOnWord(filePath);

    }

    public static void readOnWord(String filePath) {

        if (filePath.endsWith(".doc")) {

            // 输入流-基类
            InputStream is = null;
            try {
                is = new FileInputStream(filePath);
            } catch (FileNotFoundException e) {
                e.printStackTrace();
                System.out.println("文件打开失败。");
            }

            // 加载doc文档
            try {

                HWPFDocument doc = new HWPFDocument(is);

                Range text = doc.getRange();// 整个文档

                /*
                 * 分解word:文本 ->小节 ->段落 ->characterRun(理解为小单元)
                 * section -小节; paragraph - 段落
                 */

                //1分出内容节点
                Range hotWord = text.getSection(2);// 0-封面,1-目录,2-文本;第3小节

                //2段落处理
                /*
                 * 维护两个变量
                 * 
                 * 热词和解释区别 :大小-word:26,explaining:18
                 * 
                 */
                String word = "";
                String explaining = "";
                int wordOK = 0;
                int explainOK = 0;// 判断当前word&explain是否可以填入数据库

                int count = 24;// 读取几条数据到数据库
                int begin = 2;// 段落读取位置

                for (int i = 0; i < count;) {
                    Range para = hotWord.getParagraph(begin);
                    CharacterRun field = para.getCharacterRun(0);
                    int fontSize = field.getFontSize();
                    if (fontSize == 26) {
                        word = para.text();
                        wordOK = 1;
                        begin++;
                    } else {
                        while (fontSize < 26) {
                            explaining += para.text();
                            begin++;
                            para = hotWord.getParagraph(begin);
                            field = para.getCharacterRun(0);
                            fontSize = field.getFontSize();
                        }
                        explainOK = 1;
                    }
                    // 判断word&explain是否可以填入数据库
                    if (wordOK == 1 && explainOK == 1) {
                        MysqlDaoImp.addData(word, explaining);
                        i++;
                        //填入数据库后,一切归"0"
                        wordOK = 0;
                        explainOK = 0;
                        word="";
                        explaining="";
                    }
                }
                // 输出测试
                // System.out.println("读取:" + "head:");

            } catch (IOException e) {
                e.printStackTrace();
                System.out.println("IO错误。");
            }

        } else {
            System.out.println("文件格式 error:not .doc");
        }

    }

   
posted @ 2019-03-21 16:00  丨Kouch  阅读(3403)  评论(0编辑  收藏  举报