【Java】Word题库解析
一、需求场景:
一共四种题型,单选、多选、判断、简答
题目构成要素:题目、选项、答案、解析
一种题型一个Word文档存放,需要把这些题目写入DB维护
二、题库格式:
单选案例:
多选案例:
判断案例:
简答题案例:可以看出,单选,多选和判断都是一样的
- 题目有数字和点开头,并设置了标题样式
- 选项由ABCDEF和点组成
- 每一个答案的前缀固定有【答案:】
- 每一个解析的前缀固定有【解析:】
简答题的部分组成没有选项,只有题目 + 答案
三、解析实现
依赖poi实现,mvn坐标:
<!-- https://mvnrepository.com/artifact/org.apache.poi/poi-ooxml --> <dependency> <groupId>org.apache.poi</groupId> <artifactId>poi-ooxml</artifactId> <version>4.1.2</version> </dependency>
文档读取:
@SneakyThrows public static XWPFDocument getWordFile(String path) { FileInputStream fileInputStream = new FileInputStream(path); XWPFDocument xwpfDocument = new XWPFDocument(fileInputStream); fileInputStream.close(); return xwpfDocument; }
获取所有段落:
List<XWPFParagraph> paragraphs = xwpfDocument.getParagraphs();
根据格式得知,每一个题目和题型都是一个段落,选项,答案,解析也是段落
相互之间没有关联性,和上一次的HTML报告相似
但是每个标题存在一个序号数前缀,使用一个迭代值进行计数
循环至下一个带序号数前缀的段落对象时,就是下一道题目了
为了保存每次读取的段落,需要创建一个原始的Item类
序列值用来分组管理,把题目、选项、答案、解析合并起来
@Data @AllArgsConstructor @NoArgsConstructor @Builder @ToString public static final class RoughItem { public int serial; public String content; }
最终要保存成一个题目对象
题目对象只有四个属性,题目、题型、答案、解析
@Data @AllArgsConstructor @NoArgsConstructor @Builder @ToString public static final class ExamItem { public String title; public String type; public String answer; public String explain; }
完整工具类实现:
package jnpf.util; import lombok.*; import org.apache.commons.lang3.StringUtils; import org.apache.poi.xwpf.usermodel.XWPFDocument; import org.apache.poi.xwpf.usermodel.XWPFParagraph; import java.io.FileInputStream; import java.util.ArrayList; import java.util.Arrays; import java.util.List; import java.util.Map; import java.util.function.Consumer; import java.util.stream.Collectors; public class DbcpExamUtil { private static final List<String> OPTIONS = Arrays.asList("A", "B", "C", "D", "E", "F", "G");; private static final String ANSWER_PREFIX = "答案:"; private static final String EXPLAIN_PREFIX = "解析:"; private static final String NUMBER_REGEXP = "^[1-9]\\d*"; private static final String TYPE1_RADIO = "0"; private static final String TYPE2_CHECKBOX = "1"; private static final String TYPE3_TRUE_OR_FASE = "2"; private static final String TYPE4_SHORT_QA = "3"; private static final String SPLIT_IDENTIFY = "\\."; @Data @AllArgsConstructor @NoArgsConstructor @Builder @ToString public static final class RoughItem { public int serial; public String content; } @Data @AllArgsConstructor @NoArgsConstructor @Builder @ToString public static final class ExamItem { public String title; public String type; public String answer; public String explain; } @SneakyThrows public static XWPFDocument getWordFile(String path) { FileInputStream fileInputStream = new FileInputStream(path); XWPFDocument xwpfDocument = new XWPFDocument(fileInputStream); fileInputStream.close(); return xwpfDocument; } @SneakyThrows public static void radioTypeRead(String path, Consumer<ExamItem> consumer) { XWPFDocument xwpfDocument = getWordFile(path); int examCount = 0; List<DbcpExamUtil.RoughItem> roughItems = new ArrayList<>(); List<XWPFParagraph> paragraphs = xwpfDocument.getParagraphs(); for (XWPFParagraph xwpfParagraph : paragraphs) { String text = xwpfParagraph.getText(); /* 无内容段落跳过 */ if (StringUtils.isBlank(text)) continue; /* 按点号分割字符串 */ String[] split = text.split(SPLIT_IDENTIFY); /* 首个字符串是否匹配数值序号 */ boolean isExamNo = split[0].matches(NUMBER_REGEXP); /* 是否为选项 */ boolean isOptions = OPTIONS.contains(split[0]); /* 是否为答案 */ boolean isAnswer = text.startsWith(ANSWER_PREFIX); /* 是否为解析 */ boolean isExplain = text.startsWith(EXPLAIN_PREFIX); /* 当判断为题目序列时,迭代计数变量,是一道新的题目 */ if (isExamNo) { ++ examCount; DbcpExamUtil.RoughItem roughItem = DbcpExamUtil.RoughItem.builder().serial(examCount).content(text).build() ; roughItems.add(roughItem); } else if (isOptions || isAnswer || isExplain) { /* 反之不是题目序列,而是选项,答案,解析时,保存起来 */ DbcpExamUtil.RoughItem roughItem = DbcpExamUtil.RoughItem.builder().serial(examCount).content(text).build() ; roughItems.add(roughItem); } } /* 收集完成后使用序列进行分组处理 */ Map<Integer, List<RoughItem>> listMap = roughItems.stream().collect(Collectors.groupingBy(DbcpExamUtil.RoughItem::getSerial)); listMap.forEach((k, v) -> { /* 第一项一定是题目 */ RoughItem titleItem = v.get(0); String content = titleItem.getContent(); /* 将选项和题目合并为题目 */ String collect = v.parallelStream().map(RoughItem::getContent).filter(xContent -> OPTIONS.contains(xContent.split("\\.")[0])).collect(Collectors.joining("\n")); content = content + "\n" + collect; /* 处理集合得到答案和解析,解析不一定存在,所以orElse设置空串默认值 */ String answer = v.parallelStream().map(RoughItem::getContent).filter(xContent -> xContent.startsWith(ANSWER_PREFIX)).map(x -> x.replace(ANSWER_PREFIX, "")).findFirst().orElse(""); String explain = v.parallelStream().map(RoughItem::getContent).filter(xContent -> xContent.startsWith(EXPLAIN_PREFIX)).map(x -> x.replace(EXPLAIN_PREFIX, "")).findFirst().orElse(""); /* 包装成题目对象后给调用者消费 */ consumer.accept(ExamItem .builder() .title(content) .type(TYPE1_RADIO) .answer(answer) .explain(explain) .build()); }); } @SneakyThrows public static void checkBoxTypeRead(String path, Consumer<ExamItem> consumer) { int examCount = 0; List<DbcpExamUtil.RoughItem> roughItems = new ArrayList<>(); XWPFDocument xwpfDocument = getWordFile(path); List<XWPFParagraph> paragraphs = xwpfDocument.getParagraphs(); for (XWPFParagraph xwpfParagraph : paragraphs) { String text = xwpfParagraph.getText(); if (StringUtils.isBlank(text)) continue; String[] split = text.split(SPLIT_IDENTIFY); boolean isExamNo = split[0].matches(NUMBER_REGEXP); boolean isOptions = OPTIONS.contains(split[0]); boolean isAnswer = text.startsWith(ANSWER_PREFIX); boolean isExplain = text.startsWith(EXPLAIN_PREFIX); if (isExamNo) { ++ examCount; DbcpExamUtil.RoughItem roughItem = DbcpExamUtil.RoughItem.builder().serial(examCount).content(text).build() ; roughItems.add(roughItem); } else if (isOptions || isAnswer || isExplain) { DbcpExamUtil.RoughItem roughItem = DbcpExamUtil.RoughItem.builder().serial(examCount).content(text).build() ; roughItems.add(roughItem); } } System.out.println(examCount); Map<Integer, List<DbcpExamUtil.RoughItem>> listMap = roughItems.stream().collect(Collectors.groupingBy(DbcpExamUtil.RoughItem::getSerial)); listMap.forEach((k, v) -> { RoughItem titleItem = v.get(0); String content = titleItem.getContent(); String collect = v.parallelStream().map(RoughItem::getContent).filter(xContent -> OPTIONS.contains(xContent.split("\\.")[0])).collect(Collectors.joining("\n")); content = content + "\n" + collect; String answer = v.parallelStream().map(RoughItem::getContent).filter(xContent -> xContent.startsWith(ANSWER_PREFIX)).map(x -> x.replace(ANSWER_PREFIX, "")).findFirst().orElse(""); String explain = v.parallelStream().map(RoughItem::getContent).filter(xContent -> xContent.startsWith(EXPLAIN_PREFIX)).map(x -> x.replace(EXPLAIN_PREFIX, "")).findFirst().orElse(""); consumer.accept(ExamItem .builder() .title(content) .type(TYPE2_CHECKBOX) .answer(answer) .explain(explain) .build()); }); } @SneakyThrows public static void trueOrFalseTypeRead(String path, Consumer<ExamItem> consumer) { int examCount = 0; List<DbcpExamUtil.RoughItem> roughItems = new ArrayList<>(); XWPFDocument xwpfDocument = getWordFile(path); List<XWPFParagraph> paragraphs = xwpfDocument.getParagraphs(); for (XWPFParagraph xwpfParagraph : paragraphs) { String text = xwpfParagraph.getText(); if (StringUtils.isBlank(text)) continue; String[] split = text.split(SPLIT_IDENTIFY); boolean isExamNo = split[0].matches(NUMBER_REGEXP); boolean isOptions = OPTIONS.contains(split[0]); boolean isAnswer = text.startsWith(ANSWER_PREFIX); boolean isExplain = text.startsWith(EXPLAIN_PREFIX); if (isExamNo) { ++ examCount; DbcpExamUtil.RoughItem roughItem = DbcpExamUtil.RoughItem.builder().serial(examCount).content(text).build() ; roughItems.add(roughItem); } else if (isOptions || isAnswer || isExplain) { DbcpExamUtil.RoughItem roughItem = DbcpExamUtil.RoughItem.builder().serial(examCount).content(text).build() ; roughItems.add(roughItem); } } System.out.println(examCount); Map<Integer, List<DbcpExamUtil.RoughItem>> listMap = roughItems.stream().collect(Collectors.groupingBy(DbcpExamUtil.RoughItem::getSerial)); listMap.forEach((k, v) -> { RoughItem titleItem = v.get(0); String content = titleItem.getContent(); String collect = v.parallelStream().map(RoughItem::getContent).filter(xContent -> OPTIONS.contains(xContent.split("\\.")[0])).collect(Collectors.joining("\n")); content = content + "\n" + collect; String answer = v.parallelStream().map(RoughItem::getContent).filter(xContent -> xContent.startsWith(ANSWER_PREFIX)).map(x -> x.replace(ANSWER_PREFIX, "")).findFirst().orElse(""); String explain = v.parallelStream().map(RoughItem::getContent).filter(xContent -> xContent.startsWith(EXPLAIN_PREFIX)).map(x -> x.replace(EXPLAIN_PREFIX, "")).findFirst().orElse(""); consumer.accept(ExamItem .builder() .title(content) .type(TYPE3_TRUE_OR_FASE) .answer(answer) .explain(explain) .build()); }); } public static void shortQaTypeRead(String path, Consumer<ExamItem> consumer) { int examCount = 0; List<DbcpExamUtil.RoughItem> roughItems = new ArrayList<>(); XWPFDocument xwpfDocument = getWordFile(path); List<XWPFParagraph> paragraphs = xwpfDocument.getParagraphs(); for (XWPFParagraph xwpfParagraph : paragraphs) { String text = xwpfParagraph.getText(); if (StringUtils.isBlank(text)) continue; String style = xwpfParagraph.getStyle(); boolean isTittle = StringUtils.isNotBlank(style); if (isTittle) { ++ examCount; DbcpExamUtil.RoughItem roughItem = DbcpExamUtil.RoughItem.builder().serial(examCount).content(text).build() ; roughItems.add(roughItem); } else { DbcpExamUtil.RoughItem roughItem = DbcpExamUtil.RoughItem.builder().serial(examCount).content(text).build() ; roughItems.add(roughItem); } } Map<Integer, List<DbcpExamUtil.RoughItem>> listMap = roughItems.stream().collect(Collectors.groupingBy(DbcpExamUtil.RoughItem::getSerial)); listMap.forEach((k, v) -> { RoughItem titleItem = v.get(0); String content = titleItem.getContent(); String answer = v.stream().skip(1).map(RoughItem::getContent).collect(Collectors.joining("\n")); consumer.accept(ExamItem .builder() .title(content) .type(TYPE4_SHORT_QA) .answer(answer) .explain("") .build()); }); } }
调用工具方法:
@Override public void qaImport() { String T1 = "D:\\exam-repo\\单选题-答案.docx"; String T2 = "D:\\exam-repo\\多选题-答案.docx"; String T3 = "D:\\exam-repo\\判断题-答案.docx"; String T4 = "D:\\exam-repo\\简答题.docx"; DbcpExamUtil.radioTypeRead(T1, ei -> { baseMapper.insert(TrnExQabank.builder() .qaSubject(ei.getTitle()) .qaType(ei.getType()) .qaAnswer(ei.getAnswer()) .qaAnaly(ei.getExplain()) .build()); }); DbcpExamUtil.checkBoxTypeRead(T2, ei -> { baseMapper.insert(TrnExQabank.builder() .qaSubject(ei.getTitle()) .qaType(ei.getType()) .qaAnswer(ei.getAnswer()) .qaAnaly(ei.getExplain()) .build()); }); DbcpExamUtil.trueOrFalseTypeRead(T3, ei -> { baseMapper.insert(TrnExQabank.builder() .qaSubject(ei.getTitle()) .qaType(ei.getType()) .qaAnswer(ei.getAnswer()) .qaAnaly(ei.getExplain()) .build()); }); DbcpExamUtil.shortQaTypeRead(T4, ei -> { baseMapper.insert(TrnExQabank.builder() .qaSubject(ei.getTitle()) .qaType(ei.getType()) .qaAnswer(ei.getAnswer()) .qaAnaly(ei.getExplain()) .build()); }); }