文字识别

基于Opencv & Tesseract

 

这里记录一下如何找出文字轮廓

1、首先获取到图片的灰度图

1
2
3
4
Mat img = [MMOpenCVHelper cvMatFromUIImage:self.receiveImage];
//灰度化
Mat gray;
cvtColor(img, gray, COLOR_BGR2GRAY);

2、画出文字轮廓 这里面操作相对较多 

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
- (void)findTextRegionWithGrayMat:(Mat )gray{
    Mat img = [MMOpenCVHelper cvMatFromUIImage:self.showView.image];
    //为了切割图片时不会带有绿线 拷贝一份原图
    Mat img_copy = [MMOpenCVHelper cvMatFromUIImage:[self.showView.image copy]];
    //形态学变换的预处理,得到可以查找矩形的轮廓
    Mat dilation  = [self preprocess:gray];
    //查找和筛选文字区域
    std::vector<RotatedRect> rects = findTextRegion(dilation);
    queue_count = rects.size();
    begin_Queue_Count = 0;
    //创建画轮廓的队列
    [self.opQueue addOperationWithBlock:^{
        for (RotatedRect rect :  rects){
            if (rect.size.height <=0 || rect.size.width <=0) {
                continue;
            }
            //画线
            Point2f P[4];
            rect.points(P);
            for (int j = 0; j <= 3; j++){
                line(img, P[j], P[(j + 1) % 4], Scalar(0,255,0), 4);
            }
            //主线程中更新UI
            [[NSOperationQueue mainQueue] addOperationWithBlock:^{
                self.showView.image = [MMOpenCVHelper UIImageFromCVMat:img];
                begin_Queue_Count ++;
                if (begin_Queue_Count == queue_count) {
                    [self beginTesseract:gray rects:rects withImageCopy:img_copy];
                }
            }];
        }
    }];
     
}

   图片的预处理,这里应该要做降噪处理,暂时没有研究

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
//预处理
- (Mat )preprocess:(Mat )gray{
    //形态梯度
    Mat grad;
    Mat morphKernel = getStructuringElement(MORPH_ELLIPSE, cv::Size(3, 3));
    morphologyEx(gray, grad, MORPH_GRADIENT, morphKernel);
    //二值化
    Mat bw;
    threshold(grad, bw, 0.0, 255.0, THRESH_BINARY | THRESH_OTSU);
    //闭运算
    Mat connected;
    morphKernel = getStructuringElement(MORPH_RECT, cv::Size(14, 2));
    //iterations 默认值为1
    morphologyEx(bw, connected, MORPH_CLOSE, morphKernel,cv::Point(-1,-1), 3);
    return connected;
}

 

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
//取出字体区域
std::vector<RotatedRect> findTextRegion(Mat img){
    std::vector<RotatedRect> rects;
    //1.查找轮廓
    std::vector<std::vector<cv::Point>> contours;
    std::vector<Vec4i> hierarchy;
    findContours(img, contours, hierarchy, CV_RETR_CCOMP, CHAIN_APPROX_SIMPLE,cv::Point(0, 0));
    //2.筛选那些面积小的
    for (int i = contours.size() - 1; i > -1 ; i--){
         
        //计算当前轮廓的面积
        double area = contourArea(contours[i]);
         
        //面积小于1000的全部筛选掉
        if (area < 100)
            continue;
         
        //轮廓近似,作用较小,approxPolyDP函数有待研究
        double epsilon = 0.001*arcLength(contours[i], true);
        Mat approx;
        approxPolyDP(contours[i], approx, epsilon, true);
 
        //找到最小矩形,该矩形可能有方向
        RotatedRect rect = minAreaRect(contours[i]);
        //计算高和宽
        int m_width = rect.boundingRect().width;
        int m_height = rect.boundingRect().height;
         
        if (m_height > m_width * 1.2){
            continue;
        }
        //获取ROI
        Mat maskROI;
        //判断ROI内容是否超过0.45
        Point2f center = rect.center;//外接矩形中心点坐标
        //提取
        float rect_width ;
        float rect_height;
        if(rect.size.width < rect.size.height){
            rect_width = rect.size.height;
            rect_height = rect.size.width;
        } else {
            rect_width = rect.size.width;
            rect_height = rect.size.height;
        }
         
        if (center.x - rect_width/2  < 0|| rect_width  < 0 || center.x - rect_width/2  + rect_width  > img.cols || center.y - rect_height/2  < 0 || rect_height  <0 || center.y - rect_height/2  + rect_height  > img.rows) {
            NSLog(@"超出范围了");
            continue;
             
        }
        maskROI = img(cv::Rect(center.x - (rect_width/ 2) , center.y - (rect_height/2)   , rect_width , rect_height ));//提取ROi
        double content = (double)countNonZero(maskROI);
        double r = content/(m_width*m_height);
        if (r < 0.4 || (rect_width < 8 && rect_width < 8)) {
            NSLog(@"内容少于0.4占比");
            continue;
        }
        rects.push_back(rect);
    }
    return rects;
}

文字轮廓基本到这里就可以画出来了,接下来取出每个小块图片丢给tesseract去识别。

因为这个速度没有很快所以要充分利用CPU,我后面识别用的

可以设置

posted @   小师傅啊小师傅  阅读(364)  评论(0编辑  收藏  举报
编辑推荐:
· go语言实现终端里的倒计时
· 如何编写易于单元测试的代码
· 10年+ .NET Coder 心语,封装的思维:从隐藏、稳定开始理解其本质意义
· .NET Core 中如何实现缓存的预热?
· 从 HTTP 原因短语缺失研究 HTTP/2 和 HTTP/3 的设计差异
阅读排行:
· 分享 3 个 .NET 开源的文件压缩处理库,助力快速实现文件压缩解压功能!
· Ollama——大语言模型本地部署的极速利器
· 使用C#创建一个MCP客户端
· 分享一个免费、快速、无限量使用的满血 DeepSeek R1 模型,支持深度思考和联网搜索!
· Windows编程----内核对象竟然如此简单?
点击右上角即可分享
微信分享提示