【Java】Word题库解析

一、需求场景:

一共四种题型,单选、多选、判断、简答

题目构成要素:题目、选项、答案、解析

一种题型一个Word文档存放,需要把这些题目写入DB维护

 

二、题库格式:

单选案例:

多选案例:

 判断案例:

简答题案例:

可以看出,单选,多选和判断都是一样的

- 题目有数字和点开头,并设置了标题样式

- 选项由ABCDEF和点组成

- 每一个答案的前缀固定有【答案:】

- 每一个解析的前缀固定有【解析:】

简答题的部分组成没有选项,只有题目 + 答案

三、解析实现

依赖poi实现,mvn坐标:

<!-- https://mvnrepository.com/artifact/org.apache.poi/poi-ooxml -->
<dependency>
    <groupId>org.apache.poi</groupId>
    <artifactId>poi-ooxml</artifactId>
    <version>4.1.2</version>
</dependency>

文档读取:

@SneakyThrows
public static XWPFDocument getWordFile(String path) {
    FileInputStream fileInputStream = new FileInputStream(path);
    XWPFDocument xwpfDocument = new XWPFDocument(fileInputStream);
    fileInputStream.close();
    return xwpfDocument;
}

获取所有段落:

List<XWPFParagraph> paragraphs = xwpfDocument.getParagraphs();  

根据格式得知,每一个题目和题型都是一个段落,选项,答案,解析也是段落

相互之间没有关联性,和上一次的HTML报告相似

但是每个标题存在一个序号数前缀,使用一个迭代值进行计数

循环至下一个带序号数前缀的段落对象时,就是下一道题目了

 

为了保存每次读取的段落,需要创建一个原始的Item类

序列值用来分组管理,把题目、选项、答案、解析合并起来

@Data
@AllArgsConstructor
@NoArgsConstructor
@Builder
@ToString
public static final class RoughItem {
    public int serial;
    public String content;
}

最终要保存成一个题目对象

题目对象只有四个属性,题目、题型、答案、解析

@Data
@AllArgsConstructor
@NoArgsConstructor
@Builder
@ToString
public static final class ExamItem {
    public String title;
    public String type;
    public String answer;
    public String explain;
}

  

完整工具类实现:

package jnpf.util;

import lombok.*;
import org.apache.commons.lang3.StringUtils;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.apache.poi.xwpf.usermodel.XWPFParagraph;

import java.io.FileInputStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.function.Consumer;
import java.util.stream.Collectors;

public class DbcpExamUtil {

    private static final List<String> OPTIONS = Arrays.asList("A", "B", "C", "D", "E", "F", "G");;
    private static final String ANSWER_PREFIX = "答案:";
    private static final  String EXPLAIN_PREFIX = "解析:";
    private static final String NUMBER_REGEXP = "^[1-9]\\d*";

    private static final String TYPE1_RADIO = "0";
    private static final String TYPE2_CHECKBOX = "1";
    private static final String TYPE3_TRUE_OR_FASE = "2";
    private static final String TYPE4_SHORT_QA = "3";
    private static final String SPLIT_IDENTIFY = "\\.";


    @Data
    @AllArgsConstructor
    @NoArgsConstructor
    @Builder
    @ToString
    public static final class RoughItem {
        public int serial;
        public String content;
    }

    @Data
    @AllArgsConstructor
    @NoArgsConstructor
    @Builder
    @ToString
    public static final class ExamItem {
        public String title;
        public String type;
        public String answer;
        public String explain;
    }

    @SneakyThrows
    public static XWPFDocument getWordFile(String path) {
        FileInputStream fileInputStream = new FileInputStream(path);
        XWPFDocument xwpfDocument = new XWPFDocument(fileInputStream);
        fileInputStream.close();
        return xwpfDocument;
    }

    @SneakyThrows
    public static void radioTypeRead(String path, Consumer<ExamItem> consumer) {
        XWPFDocument xwpfDocument = getWordFile(path);
        int examCount = 0;
        List<DbcpExamUtil.RoughItem> roughItems = new ArrayList<>();
        List<XWPFParagraph> paragraphs = xwpfDocument.getParagraphs();
        for (XWPFParagraph xwpfParagraph : paragraphs) {
            String text = xwpfParagraph.getText();
            /* 无内容段落跳过 */
            if (StringUtils.isBlank(text)) continue;
            /* 按点号分割字符串 */
            String[] split = text.split(SPLIT_IDENTIFY);
            /* 首个字符串是否匹配数值序号 */
            boolean isExamNo = split[0].matches(NUMBER_REGEXP);
            /* 是否为选项 */
            boolean isOptions = OPTIONS.contains(split[0]);
            /* 是否为答案 */
            boolean isAnswer = text.startsWith(ANSWER_PREFIX);
            /* 是否为解析 */
            boolean isExplain = text.startsWith(EXPLAIN_PREFIX);
            /* 当判断为题目序列时,迭代计数变量,是一道新的题目 */
            if (isExamNo) {
                ++ examCount;
                DbcpExamUtil.RoughItem roughItem = DbcpExamUtil.RoughItem.builder().serial(examCount).content(text).build() ;
                roughItems.add(roughItem);
            } else if (isOptions || isAnswer || isExplain) {
                /* 反之不是题目序列,而是选项,答案,解析时,保存起来 */
                DbcpExamUtil.RoughItem roughItem = DbcpExamUtil.RoughItem.builder().serial(examCount).content(text).build() ;
                roughItems.add(roughItem);
            }
        }
        /* 收集完成后使用序列进行分组处理 */
        Map<Integer, List<RoughItem>> listMap = roughItems.stream().collect(Collectors.groupingBy(DbcpExamUtil.RoughItem::getSerial));
        listMap.forEach((k, v) -> {
            /* 第一项一定是题目 */
            RoughItem titleItem = v.get(0);
            String content = titleItem.getContent();
            /* 将选项和题目合并为题目 */
            String collect = v.parallelStream().map(RoughItem::getContent).filter(xContent -> OPTIONS.contains(xContent.split("\\.")[0])).collect(Collectors.joining("\n"));
            content = content + "\n" + collect;
            /* 处理集合得到答案和解析,解析不一定存在,所以orElse设置空串默认值 */
            String answer = v.parallelStream().map(RoughItem::getContent).filter(xContent -> xContent.startsWith(ANSWER_PREFIX)).map(x -> x.replace(ANSWER_PREFIX, "")).findFirst().orElse("");
            String explain = v.parallelStream().map(RoughItem::getContent).filter(xContent -> xContent.startsWith(EXPLAIN_PREFIX)).map(x -> x.replace(EXPLAIN_PREFIX, "")).findFirst().orElse("");
            /* 包装成题目对象后给调用者消费 */
            consumer.accept(ExamItem
                    .builder()
                    .title(content)
                    .type(TYPE1_RADIO)
                    .answer(answer)
                    .explain(explain)
                    .build());
        });

    }

    @SneakyThrows
    public static void checkBoxTypeRead(String path, Consumer<ExamItem> consumer) {
        int examCount = 0;
        List<DbcpExamUtil.RoughItem> roughItems = new ArrayList<>();
        XWPFDocument xwpfDocument = getWordFile(path);
        List<XWPFParagraph> paragraphs = xwpfDocument.getParagraphs();
        for (XWPFParagraph xwpfParagraph : paragraphs) {
            String text = xwpfParagraph.getText();
            if (StringUtils.isBlank(text)) continue;
            String[] split = text.split(SPLIT_IDENTIFY);
            boolean isExamNo = split[0].matches(NUMBER_REGEXP);
            boolean isOptions = OPTIONS.contains(split[0]);
            boolean isAnswer = text.startsWith(ANSWER_PREFIX);
            boolean isExplain = text.startsWith(EXPLAIN_PREFIX);
            if (isExamNo) {
                ++ examCount;
                DbcpExamUtil.RoughItem roughItem = DbcpExamUtil.RoughItem.builder().serial(examCount).content(text).build() ;
                roughItems.add(roughItem);
            } else if (isOptions || isAnswer || isExplain) {
                DbcpExamUtil.RoughItem roughItem = DbcpExamUtil.RoughItem.builder().serial(examCount).content(text).build() ;
                roughItems.add(roughItem);
            }
        }
        System.out.println(examCount);
        Map<Integer, List<DbcpExamUtil.RoughItem>> listMap = roughItems.stream().collect(Collectors.groupingBy(DbcpExamUtil.RoughItem::getSerial));
        listMap.forEach((k, v) -> {
            RoughItem titleItem = v.get(0);
            String content = titleItem.getContent();
            String collect = v.parallelStream().map(RoughItem::getContent).filter(xContent -> OPTIONS.contains(xContent.split("\\.")[0])).collect(Collectors.joining("\n"));
            content = content + "\n" + collect;
            String answer = v.parallelStream().map(RoughItem::getContent).filter(xContent -> xContent.startsWith(ANSWER_PREFIX)).map(x -> x.replace(ANSWER_PREFIX, "")).findFirst().orElse("");
            String explain = v.parallelStream().map(RoughItem::getContent).filter(xContent -> xContent.startsWith(EXPLAIN_PREFIX)).map(x -> x.replace(EXPLAIN_PREFIX, "")).findFirst().orElse("");
            consumer.accept(ExamItem
                    .builder()
                    .title(content)
                    .type(TYPE2_CHECKBOX)
                    .answer(answer)
                    .explain(explain)
                    .build());
        });
    }

    @SneakyThrows
    public static void trueOrFalseTypeRead(String path, Consumer<ExamItem> consumer) {
        int examCount = 0;
        List<DbcpExamUtil.RoughItem> roughItems = new ArrayList<>();
        XWPFDocument xwpfDocument = getWordFile(path);
        List<XWPFParagraph> paragraphs = xwpfDocument.getParagraphs();
        for (XWPFParagraph xwpfParagraph : paragraphs) {
            String text = xwpfParagraph.getText();
            if (StringUtils.isBlank(text)) continue;
            String[] split = text.split(SPLIT_IDENTIFY);
            boolean isExamNo = split[0].matches(NUMBER_REGEXP);
            boolean isOptions = OPTIONS.contains(split[0]);
            boolean isAnswer = text.startsWith(ANSWER_PREFIX);
            boolean isExplain = text.startsWith(EXPLAIN_PREFIX);
            if (isExamNo) {
                ++ examCount;
                DbcpExamUtil.RoughItem roughItem = DbcpExamUtil.RoughItem.builder().serial(examCount).content(text).build() ;
                roughItems.add(roughItem);
            } else if (isOptions || isAnswer || isExplain) {
                DbcpExamUtil.RoughItem roughItem = DbcpExamUtil.RoughItem.builder().serial(examCount).content(text).build() ;
                roughItems.add(roughItem);
            }
        }
        System.out.println(examCount);
        Map<Integer, List<DbcpExamUtil.RoughItem>> listMap = roughItems.stream().collect(Collectors.groupingBy(DbcpExamUtil.RoughItem::getSerial));
        listMap.forEach((k, v) -> {
            RoughItem titleItem = v.get(0);
            String content = titleItem.getContent();
            String collect = v.parallelStream().map(RoughItem::getContent).filter(xContent -> OPTIONS.contains(xContent.split("\\.")[0])).collect(Collectors.joining("\n"));
            content = content + "\n" + collect;
            String answer = v.parallelStream().map(RoughItem::getContent).filter(xContent -> xContent.startsWith(ANSWER_PREFIX)).map(x -> x.replace(ANSWER_PREFIX, "")).findFirst().orElse("");
            String explain = v.parallelStream().map(RoughItem::getContent).filter(xContent -> xContent.startsWith(EXPLAIN_PREFIX)).map(x -> x.replace(EXPLAIN_PREFIX, "")).findFirst().orElse("");
            consumer.accept(ExamItem
                    .builder()
                    .title(content)
                    .type(TYPE3_TRUE_OR_FASE)
                    .answer(answer)
                    .explain(explain)
                    .build());
        });
    }

    public static void shortQaTypeRead(String path, Consumer<ExamItem> consumer) {
        int examCount = 0;
        List<DbcpExamUtil.RoughItem> roughItems = new ArrayList<>();
        XWPFDocument xwpfDocument = getWordFile(path);
        List<XWPFParagraph> paragraphs = xwpfDocument.getParagraphs();
        for (XWPFParagraph xwpfParagraph : paragraphs) {
            String text = xwpfParagraph.getText();
            if (StringUtils.isBlank(text)) continue;
            String style = xwpfParagraph.getStyle();
            boolean isTittle = StringUtils.isNotBlank(style);
            if (isTittle) {
                ++ examCount;
                DbcpExamUtil.RoughItem roughItem = DbcpExamUtil.RoughItem.builder().serial(examCount).content(text).build() ;
                roughItems.add(roughItem);
            } else {
                DbcpExamUtil.RoughItem roughItem = DbcpExamUtil.RoughItem.builder().serial(examCount).content(text).build() ;
                roughItems.add(roughItem);
            }
        }
        Map<Integer, List<DbcpExamUtil.RoughItem>> listMap = roughItems.stream().collect(Collectors.groupingBy(DbcpExamUtil.RoughItem::getSerial));
        listMap.forEach((k, v) -> {
            RoughItem titleItem = v.get(0);
            String content = titleItem.getContent();
            String answer = v.stream().skip(1).map(RoughItem::getContent).collect(Collectors.joining("\n"));
            consumer.accept(ExamItem
                    .builder()
                    .title(content)
                    .type(TYPE4_SHORT_QA)
                    .answer(answer)
                    .explain("")
                    .build());
        });
    }
}

  

调用工具方法:

@Override
public void qaImport() {
    String T1 = "D:\\exam-repo\\单选题-答案.docx";
    String T2 = "D:\\exam-repo\\多选题-答案.docx";
    String T3 = "D:\\exam-repo\\判断题-答案.docx";
    String T4 = "D:\\exam-repo\\简答题.docx";
    DbcpExamUtil.radioTypeRead(T1, ei -> {
        baseMapper.insert(TrnExQabank.builder()
                .qaSubject(ei.getTitle())
                .qaType(ei.getType())
                .qaAnswer(ei.getAnswer())
                .qaAnaly(ei.getExplain())
                .build());
    });
    DbcpExamUtil.checkBoxTypeRead(T2, ei -> {
        baseMapper.insert(TrnExQabank.builder()
                .qaSubject(ei.getTitle())
                .qaType(ei.getType())
                .qaAnswer(ei.getAnswer())
                .qaAnaly(ei.getExplain())
                .build());
    });
    DbcpExamUtil.trueOrFalseTypeRead(T3, ei -> {
        baseMapper.insert(TrnExQabank.builder()
                .qaSubject(ei.getTitle())
                .qaType(ei.getType())
                .qaAnswer(ei.getAnswer())
                .qaAnaly(ei.getExplain())
                .build());
    });
    DbcpExamUtil.shortQaTypeRead(T4, ei -> {
        baseMapper.insert(TrnExQabank.builder()
                .qaSubject(ei.getTitle())
                .qaType(ei.getType())
                .qaAnswer(ei.getAnswer())
                .qaAnaly(ei.getExplain())
                .build());
    });
}

  

 

posted @ 2024-08-16 09:15  emdzz  阅读(65)  评论(0编辑  收藏  举报