团队冲刺第九天

今天上课展示了第一阶段冲刺的成果,可以做到读取传输的图片上的表格,并输出到web页面,但是要保存到本地形成excel表格还有点问题,

还需要解决,观看了其他组的展示后,我看到了他们的优点,比如页面好看,简洁,功能多多,也认识到了自己的不足,还有许多地方需要改进。

 

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
package com.example.demo;
 
import com.sun.org.slf4j.internal.Logger;
import com.sun.org.slf4j.internal.LoggerFactory;
import net.sourceforge.tess4j.ITesseract;
import net.sourceforge.tess4j.Tesseract;
import net.sourceforge.tess4j.TesseractException;
import org.opencv.core.*;
import org.opencv.core.Rect;
 
 
import org.opencv.highgui.HighGui;
import org.opencv.imgcodecs.Imgcodecs;
import org.opencv.imgproc.Imgproc;
import org.opencv.objdetect.Objdetect;
 
import javax.imageio.ImageIO;
import java.awt.image.BufferedImage;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.List;
 
public class TableScanner {
    private static final Logger logger = LoggerFactory.getLogger(TableScanner.class);
 
    private static final String TESSERACT_DATA_PATH = "/usr/share/tesseract-ocr/4.00/tessdata"; // Tesseract OCR数据路径
 
    private String imagePath; // 图像文件路径
    private String tablePath; // 表格文件保存路径
 
    public TableScanner(String imagePath, String tablePath) {
        this.imagePath = imagePath;
        this.tablePath = tablePath;
    }
 
    public void scanTable() throws IOException {
        // 加载OpenCV库
        System.loadLibrary(Core.NATIVE_LIBRARY_NAME);
 
        // 读取图像文件
        Mat image = Imgcodecs.imread(imagePath);
 
        // 转换为灰度图像
        Mat gray = new Mat();
        Imgproc.cvtColor(image, gray, Imgproc.COLOR_BGR2GRAY);
 
        // 对图像进行二值化处理
        Mat binary = new Mat();
        Imgproc.threshold(gray, binary, 0, 255, Imgproc.THRESH_BINARY_INV | Imgproc.THRESH_OTSU);
 
        // 进行表格检测
        Rect tableRect = detectTable(binary);
 
        if (tableRect == null) {
            logger.warn("Failed to detect table in image: {}", imagePath);
            return;
        }
 
        // 提取表格区域
        Mat table = new Mat(image, tableRect);
 
        // 进行表格识别
        ITesseract tesseract = new Tesseract();
        tesseract.setDatapath(TESSERACT_DATA_PATH);
        tesseract.setLanguage("eng"); // 使用英文语言库
        String text = null;
        BufferedImage bufferedImage = null;
        try {
            MatOfByte matOfByte = new MatOfByte();
            Imgcodecs.imencode(".jpg", table, matOfByte);
            byte[] byteArray = matOfByte.toArray();
            InputStream in = new ByteArrayInputStream(byteArray);
            bufferedImage = ImageIO.read(in);
        } catch (IOException e) {
            logger.error("Failed to convert Mat to BufferedImage: {}", e.getMessage());
            return;
        }
        try {
            text = tesseract.doOCR(bufferedImage);
        } catch (TesseractException e) {
            logger.error("Failed to recognize table in image: {}", imagePath, e);
            return;
        }
//        try {
//            text = tesseract.doOCR(table);
//        } catch (TesseractException e) {
//            logger.error("Failed to recognize table in image: {}", imagePath, e);
//            return;
//        }
 
        // 保存为CSV文件
        Path tableFile = Paths.get(tablePath);
        Files.write(tableFile, text.getBytes());
    }
 
    /**
     * 检测图像中的表格区域
     */
    private Rect detectTable(Mat binary) {
        // 进行轮廓检测
        List<MatOfPoint> contours = Lists.newArrayList();
        Mat hierarchy = new Mat();
        Imgproc.findContours(binary, contours, hierarchy, Imgproc.RETR_EXTERNAL, Imgproc.CHAIN_APPROX_SIMPLE);
 
        // 查找最大的矩形轮廓
        Rect maxRect = null;
        double maxArea = 0;
        for (MatOfPoint contour : contours) {
            Rect rect = (Rect) Imgproc.boundingRect(contour);
            double area = rect.width * rect.height;
            if (area > maxArea && isTable(rect)) {
                maxRect = rect;
                maxArea = area;
            }
        }
 
        return maxRect;
    }
 
    /**
     * 判断矩形是否为表格
     */
    private boolean isTable(Rect rect) {
       return rect.width > 50 && rect.height > 50;
        
    }
 
}

  

posted @   lcz111  阅读(13)  评论(0编辑  收藏  举报
相关博文:
阅读排行:
· 震惊!C++程序真的从main开始吗?99%的程序员都答错了
· winform 绘制太阳,地球,月球 运作规律
· 【硬核科普】Trae如何「偷看」你的代码?零基础破解AI编程运行原理
· 上周热点回顾(3.3-3.9)
· 超详细:普通电脑也行Windows部署deepseek R1训练数据并当服务器共享给他人
点击右上角即可分享
微信分享提示