【Java】Word题库解析2

 

初稿见:https://www.cnblogs.com/mindzone/p/18362194

一、新增需求

在原稿题库之后,还需要生成一份纯题目 + 纯答案

答案放在开头,题目里面去掉答案

在检查题型时还发现部分内容略有区别:

 所以在判断是否为答案的时候需要兼容这种答案

二、关于老版本支持

doc2000版需要追加一个scratchpad的库支持才行

<dependency>
    <groupId>org.apache.poi</groupId>
    <artifactId>poi</artifactId>
    <version>5.0.0</version>
</dependency>
<dependency>
    <groupId>org.apache.poi</groupId>
    <artifactId>poi-ooxml</artifactId>
    <version>5.0.0</version>
</dependency>
<dependency>
    <groupId>org.apache.poi</groupId>
    <artifactId>poi-scratchpad</artifactId>
    <version>5.0.0</version>
</dependency>

  

需要导入的资源:

import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.usermodel.CharacterProperties;
import org.apache.poi.hwpf.usermodel.CharacterRun;
import org.apache.poi.hwpf.usermodel.Paragraph;
import org.apache.poi.hwpf.usermodel.Range;

  

三、工具类实现

package cn.cloud9.word;

import com.alibaba.druid.util.StringUtils;
import lombok.*;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.usermodel.CharacterProperties;
import org.apache.poi.hwpf.usermodel.CharacterRun;
import org.apache.poi.hwpf.usermodel.Paragraph;
import org.apache.poi.hwpf.usermodel.Range;
import org.apache.poi.xwpf.usermodel.XWPFDocument;

import java.io.File;
import java.io.FileInputStream;
import java.util.*;
import java.util.stream.Collectors;

public class ExamUtil {
    private static final List<String> ANSWER_PREFIX = Arrays.asList("答案:", "参考答案:");
    private static final List<String> OPTIONS = Arrays.asList("A", "B", "C", "D", "E", "F", "G");;
    private static final String NUMBER_REGEXP = "^[1-9]\\d*";
    private static final String SPLIT_IDENTIFY = "\\.";

    @Data
    @AllArgsConstructor
    @NoArgsConstructor
    @Builder
    @ToString
    public static final class RoughItem {
        public int serial;
        public String exCode;
        public String content;
    }

    @Data
    @AllArgsConstructor
    @NoArgsConstructor
    @Builder
    @ToString
    public static final class ExamItem {
        public String no;
        public String title;
        public String type;
        public String answer;
        public String explain;
    }

    @SneakyThrows
    public static XWPFDocument getWordFileDocxType(String path) {
        FileInputStream fileInputStream = new FileInputStream(path);
        XWPFDocument xwpfDocument = new XWPFDocument(fileInputStream);
        fileInputStream.close();
        return xwpfDocument;
    }

    @SneakyThrows
    public static HWPFDocument getWordFileDocType(String path) {
        FileInputStream fileInputStream = new FileInputStream(path);
        HWPFDocument hwpfDocument = new HWPFDocument(fileInputStream);
        fileInputStream.close();
        return hwpfDocument;
    }


    @SneakyThrows
    public static void main(String[] args) {
        int examCount = 0;
        String exCode = "";
        List<RoughItem> roughItems = new ArrayList<>();
        CharacterProperties props = new CharacterProperties();
        props.setFontSize(32);

        String filePath = "C:\\Users\\Administrator\\Documents\\Tencent Files\\1791255334\\FileRecv\\答案  (增加 1301-2100共 800)中级保育师增加题库 .doc";
        String newFilePath = "C:\\Users\\Administrator\\Documents\\Tencent Files\\1791255334\\FileRecv\\答案  (增加 1301-2100共 800)中级保育师增加题库 " + new Date().getTime() + ".doc";
        HWPFDocument wordFile = getWordFileDocType(filePath);
        Range range = wordFile.getRange();
        int numParagraphs = range.numParagraphs();


        for (int i = 0; i < numParagraphs; i++) {
            Paragraph paragraph = range.getParagraph(i);
            String text = paragraph.text();
            if (StringUtils.isEmpty(text)) continue;
            /* 按点号分割字符串 */
            String[] split = text.split(SPLIT_IDENTIFY);
            /* 首个字符串是否匹配数值序号 */
            boolean isExamNo = split[0].matches(NUMBER_REGEXP);
            /* 是否为答案 */
            boolean isAnswer = text.startsWith(ANSWER_PREFIX.get(0)) || text.startsWith(ANSWER_PREFIX.get(1));
            /* 是否为选项 */
            boolean isOptions = OPTIONS.contains(split[0]);
            /* 当判断为题目序列时,迭代计数变量,是一道新的题目 */
            if (isExamNo) {
                ++ examCount;
                exCode = split[0];
                ExamUtil.RoughItem roughItem = ExamUtil.RoughItem.builder()
                        .serial(examCount)
                        .content(text)
                        .exCode(exCode)
                        .build() ;
                roughItems.add(roughItem);
            } else if (isAnswer || isOptions) {
                /* 反之不是题目序列,而是选项,答案,解析时,保存起来 */
                RoughItem roughItem = RoughItem.builder()
                        .serial(examCount)
                        .content(text)
                        .exCode(exCode)
                        .build() ;
                roughItems.add(roughItem);
            }
            /* 答案部分是一个完整段落,所以对其删除即可 */
            if (isAnswer) paragraph.delete();
        }

        List<ExamItem> examItems = new ArrayList<>();
        /* 收集完成后使用序列进行分组处理 */
        Map<Integer, List<RoughItem>> listMap = roughItems.stream().collect(Collectors.groupingBy(RoughItem::getSerial));
        listMap.forEach((k, v) -> {
            /* 第一项一定是题目 */
            RoughItem titleItem = v.get(0);
            String content = titleItem.getContent();
            content = content.replaceAll("\r", "");
            /* 处理集合得到答案和解析,解析不一定存在,所以orElse设置空串默认值 */
            String answer = v.stream()
                    .map(RoughItem::getContent)
                    .filter(xContent -> xContent.startsWith(ANSWER_PREFIX.get(0)) || xContent.startsWith(ANSWER_PREFIX.get(1)))
                    .map(x -> x.replaceAll(ANSWER_PREFIX.get(1), "").replaceAll(ANSWER_PREFIX.get(0), ""))
                    .findFirst()
                    .orElse("");
            answer = answer.replaceAll("\r", "");
            /* 包装成题目对象后给调用者消费 */
            ExamItem build = ExamItem
                    .builder()
                    .no(titleItem.getExCode())
                    .title(content)
                    .type(null)
                    .answer(answer)
                    .explain(null)
                    .build();
            examItems.add(build);
        });

        examItems.forEach(System.out::println);

        /* 创建一行para,写N个答案在一行中  rowSize = N */
        int examTotal = examItems.size();
        int rowSize = 10;
        boolean isComplete = examTotal % rowSize == 0;
        int totalRow = examTotal / rowSize;
        totalRow = isComplete ? totalRow : totalRow + 1;
        /* 因为用的是insertBefore方式插入,所以需要反着翻页写入 */
        for (int currentRow = totalRow; currentRow >= 1; currentRow--) {
            int begin = (currentRow - 1) * rowSize;
            int end = (currentRow * rowSize) - 1;
            StringBuilder rowText = new StringBuilder();
            for (int exIdx = begin; exIdx <= end; exIdx++) {
                if (exIdx < 0) break;
                else if (exIdx >= examTotal) break;
                ExamItem examItem = examItems.get(exIdx);
                String no = examItem.getNo();
                String answer = examItem.getAnswer();
                rowText.append(no).append(".").append(answer).append(" ");
            }
            rowText.append("\r");
            CharacterRun characterRun = range.insertBefore(rowText.toString());
        }

        wordFile.write(new File(newFilePath));
    }
}

  

四、答案嵌套在题目里的处理

选项嵌套在选项,题目中,需要再写逻辑判断

 

 

 

为了处理这种类型的题库文档,单开了一个新的工具类处理

细节部分看代码实现就行

package cn.cloud9.word;

import com.alibaba.druid.util.StringUtils;
import lombok.*;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.usermodel.CharacterProperties;
import org.apache.poi.hwpf.usermodel.CharacterRun;
import org.apache.poi.hwpf.usermodel.Paragraph;
import org.apache.poi.hwpf.usermodel.Range;
import org.apache.poi.xwpf.usermodel.XWPFDocument;

import java.io.File;
import java.io.FileInputStream;
import java.util.*;
import java.util.stream.Collectors;

public class ExamUtil2 {
    // private static final List<String> ANSWER_PREFIX = Arrays.asList("答案:", "参考答案:");
    private static final List<String> ANSWER_IDENT = Arrays.asList("(正确答案)", "【正确答案】");
    private static final List<String> ANSWER_IDENT2 = Arrays.asList("×", "√");
    private static final List<String> ANSWER_IDENT3 = Arrays.asList("A", "B", "C", "D", "E", "F", "G");;
    private static final List<String> OPTIONS = Arrays.asList("A", "B", "C", "D", "E", "F", "G");;
    private static final List<String> OPTIONS2 = Arrays.asList("A、", "B、", "C、", "D、", "E、", "F、", "G、");
    private static final String NUMBER_REGEXP = "^[1-9]\\d*";
    private static final String SPLIT_IDENTIFY = "\\.";

    @Data
    @AllArgsConstructor
    @NoArgsConstructor
    @Builder
    @ToString
    public static final class RoughItem {
        public int serial;
        public String exCode;
        public String content;
    }

    @Data
    @AllArgsConstructor
    @NoArgsConstructor
    @Builder
    @ToString
    public static final class ExamItem {
        public String no;
        public String title;
        public String type;
        public String answer;
        public String explain;
    }

    @SneakyThrows
    public static XWPFDocument getWordFileDocxType(String path) {
        FileInputStream fileInputStream = new FileInputStream(path);
        XWPFDocument xwpfDocument = new XWPFDocument(fileInputStream);
        fileInputStream.close();
        return xwpfDocument;
    }

    @SneakyThrows
    public static HWPFDocument getWordFileDocType(String path) {
        FileInputStream fileInputStream = new FileInputStream(path);
        HWPFDocument hwpfDocument = new HWPFDocument(fileInputStream);
        fileInputStream.close();
        return hwpfDocument;
    }


    @SneakyThrows
    public static void main(String[] args) {
        int examCount = 0;
        String exCode = "";
        List<RoughItem> roughItems = new ArrayList<>();
        CharacterProperties props = new CharacterProperties();
        props.setFontSize(32);

        String filePath = "C:\\Users\\Administrator\\Documents\\Tencent Files\\1791255334\\FileRecv\\11 (   )高级保育师理论题库增加.doc";
        String newFilePath = "C:\\Users\\Administrator\\Documents\\Tencent Files\\1791255334\\FileRecv\\11 (   )高级保育师理论题库增加- " + new Date().getTime() + ".doc";
        HWPFDocument wordFile = getWordFileDocType(filePath);
        Range range = wordFile.getRange();
        int numParagraphs = range.numParagraphs();


        for (int i = 0; i < numParagraphs; i++) {
            Paragraph paragraph = range.getParagraph(i);
            String text = paragraph.text();
            if (StringUtils.isEmpty(text)) continue;

            /* 按点号分割字符串 */
            String[] split = text.split(SPLIT_IDENTIFY);
            /* 首个字符串是否匹配数值序号 */
            boolean isExamNo = split[0].matches(NUMBER_REGEXP);
            /* 是否为选项 */
            boolean isOptions = OPTIONS.contains(split[0]) || OPTIONS2.stream().anyMatch(text::contains);
            /* 是否为答案 */
            boolean rightOption = ANSWER_IDENT.stream().anyMatch(text::contains) && isOptions; /* 答案在选项中 */
            boolean rightOption2 = ANSWER_IDENT2.stream().anyMatch(text::contains) && isExamNo; /* 答案填放在题目里面 */
            boolean rightOption3 = ANSWER_IDENT3.stream().anyMatch(text::contains) && isExamNo; /* 答案填放在题目里面 */
            boolean isAnswer = rightOption || rightOption2 || rightOption3;


            /* 当判断为题目序列时,迭代计数变量,是一道新的题目 */
            if (isExamNo) {
                ++ examCount;
                exCode = split[0];
                ExamUtil2.RoughItem roughItem = ExamUtil2.RoughItem.builder()
                        .serial(examCount)
                        .content(text)
                        .exCode(exCode)
                        .build() ;
                roughItems.add(roughItem);
            }
            if (isAnswer) {
                String correctOption = "";
                if (rightOption) {
                    for (String answer : ANSWER_IDENT) text = text.replaceAll(answer, "");
                    paragraph.replaceText(text, false);
                    correctOption = String.valueOf(text.charAt(0));
                }
                if (rightOption2) {
                    correctOption = text.contains(ANSWER_IDENT2.get(0)) ? ANSWER_IDENT2.get(0) : ANSWER_IDENT2.get(1);
                    for (String answer : ANSWER_IDENT2)  text = text.replaceAll(answer, "");
                    paragraph.replaceText(text, false);
                }
                if (rightOption3) {
                    for (String option : ANSWER_IDENT3) {
                        if (text.contains(option)) {
                            correctOption = option;
                            text = text.replaceAll(option, "");
                            break;
                        }
                    }
                    paragraph.replaceText(text, false);
                }
                RoughItem roughItem = RoughItem.builder()
                        .serial(examCount)
                        .content(correctOption)
                        .exCode(exCode)
                        .build() ;
                roughItems.add(roughItem);
            }
        }

        List<ExamItem> examItems = new ArrayList<>();
        /* 收集完成后使用序列进行分组处理 */
        Map<Integer, List<RoughItem>> listMap = roughItems.stream().collect(Collectors.groupingBy(RoughItem::getSerial));
        listMap.forEach((k, v) -> {
            if (v.size() == 1) return;
            /* 第一项一定是题目 */
            RoughItem titleItem = v.get(0);
            String content = titleItem.getContent();
            content = content.replaceAll("\r", "");
            /* 处理集合得到答案和解析,解析不一定存在,所以orElse设置空串默认值 */
            String answer = v.get(1).content;
            answer = answer.replaceAll("\r", "");
            /* 包装成题目对象后给调用者消费 */
            ExamItem build = ExamItem
                    .builder()
                    .no(titleItem.getExCode())
                    .title(content)
                    .type(null)
                    .answer(answer)
                    .explain(null)
                    .build();
            examItems.add(build);
        });

        examItems.forEach(System.out::println);

        /* 创建一行para,写10个答案上来 */
        int examTotal = examItems.size();
        int rowSize = 10;
        boolean isComplete = examTotal % rowSize == 0;
        int totalRow = examTotal / rowSize;
        totalRow = isComplete ? totalRow : totalRow + 1;
        for (int currentRow = totalRow; currentRow >= 1; currentRow--) {
            int begin = (currentRow - 1) * rowSize;
            int end = (currentRow * rowSize) - 1;
            StringBuilder rowText = new StringBuilder();
            for (int exIdx = begin; exIdx <= end; exIdx++) {
                if (exIdx < 0) break;
                else if (exIdx >= examTotal) break;
                ExamItem examItem = examItems.get(exIdx);
                String no = examItem.getNo();
                String answer = examItem.getAnswer();
                rowText.append(no).append(".").append(answer).append(" ");
            }
            rowText.append("\r");
            CharacterRun characterRun = range.insertBefore(rowText.toString());
        }

         wordFile.write(new File(newFilePath));
    }
}

  

 

posted @ 2024-09-08 19:40  emdzz  阅读(27)  评论(0编辑  收藏  举报