图像ocr识别(一)

 

研究了点OCR识别,本文讲下opencv方式-找出字符区域,虽然还不完善,但是记录下,后续往CNN+RNN+CTC方向走,此处就作为练手了。

效果1:

 

 

 

 

效果2:

 

 

 

 

效果3:

 

 

 

 

效果4(识别率不太好,只把大框识别了,字符的分割有问题):

 

 

 

 

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
import cv2
import imutils
import numpy as np
from imageio import imread
import math
import matplotlib.pyplot as plt
 
 
def point_distance(p1, p2):
    return math.sqrt(math.pow(p2[0] - p1[0], 2) + math.pow(p2[1] - p1[1], 2))
 
 
def calc_height_width(box):
    width = point_distance(box[1], box[0])
    height = point_distance(box[0], box[3])
    return (width, height)
 
 
fileName = 'test1'
 
img = imread('imgs\\' + fileName + '.jpg')
img = imutils.resize(img, width=1920, height=2080)
 
 
 
cannyImg = cv2.Canny(img, 200, 200)
 
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))
closed = cv2.morphologyEx(img, cv2.MORPH_CLOSE, kernel)
eroded = cv2.erode(closed, kernel)
 
cannyImg = cv2.Canny(eroded, 200, 200)
blurred = cv2.GaussianBlur(cannyImg, (105, 105), 0)
# blurred = cv2.GaussianBlur(cannyImg, (15, 15),0)
 
 
_, skin = cv2.threshold(blurred, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
 
 
contours, hierarchy = cv2.findContours(skin, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
 
contours = sorted(contours, key=cv2.contourArea, reverse=True)
 
boxes = []
for i in range(len(contours)):
    c = contours[i]
    rect = cv2.minAreaRect(c)
    box = np.int0(cv2.boxPoints(rect))
    (w, h) = calc_height_width(box)
    if w == 0 or h == 0:
        continue
    if w < 20 or h < 20:
        continue
    # boxes.append(box)
    rate1 = h / w * 100
    rate2 = w / h * 100
    if (10 <= rate1 <= 20) or (10 <= rate2 <= 20):
        print((w, h), '--------', rate1, '%', rate2, '%')
        boxes.append(box)
 
img = img.copy()
i = 0
 
 
def parse_chars(positions, min_thresh, min_range, max_range):
    charInfos = []
    begin = 0
    end = 0
    for idx in range(len(positions)):
        if positions[idx] > min_thresh and begin == 0:
            begin = idx
        elif positions[idx] > min_thresh and begin != 0:
            if idx - begin > max_range:
                charInfo = {'begin': begin, 'end': idx}
                charInfos.append(charInfo)
 
                begin = 0
                end = 0
            continue
        elif positions[idx] < min_thresh and begin != 0:
            end = idx
            if end - begin >= min_range:
                charInfo = {'begin': begin, 'end': end}
                charInfos.append(charInfo)
 
                begin = 0
                end = 0
        elif positions[idx] < min_thresh or begin == 0:
            continue
 
    return charInfos
 
 
def process_more(windowName, imgSrc):
    ori_imgSrc = imgSrc.copy()
    # cv2.imshow(windowName+'111', ori_imgSrc)
 
    kernel = cv2.getStructuringElement(cv2.MORPH_CROSS, (3, 3))
    closed = cv2.morphologyEx(imgSrc, cv2.MORPH_CLOSE, kernel)
    imgSrc = cv2.erode(closed, kernel)
 
    imgSrc = cv2.Canny(imgSrc, 300, 300)
    kernel = np.ones((5, 5), np.uint8)
    imgSrc = cv2.dilate(imgSrc, kernel, iterations=1)
    _, imgSrc = cv2.threshold(imgSrc, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
 
    # rows---> ori_imgSrc.shape[0]
    # cols---> ori_imgSrc.shape[1]
    rows = ori_imgSrc.shape[0]
    cols = ori_imgSrc.shape[1]
    tags = np.zeros((cols))
 
    for row in range(rows):
        for col in range(cols):
            if imgSrc[row][col] == 255:
                tags[col] += 1
 
    char_positions = parse_chars(positions=tags, min_thresh=8, min_range=25, max_range=100)
<br>    print(len(char_positions))
    for p in char_positions:
        leftTop = (p['begin'], 0)
        rightBottom = (p['end'], rows - 2)
        cv2.rectangle(ori_imgSrc, (leftTop[0], leftTop[1]), (rightBottom[0], rightBottom[1]), (0, 255, 0), 2)
 
    ori_imgSrc = imutils.resize(ori_imgSrc, width=450)
    cv2.imshow(windowName, ori_imgSrc)
 
 
for box in boxes:
    # img = cv2.drawContours(img, [box], -1, (0, 0, 255), 3)
    x_from = np.min(box[:, 1])
    x_end = np.max(box[:, 1])
    y_from = np.min(box[:, 0])
    y_end = np.max(box[:, 0])
 
    if x_from < 0:
        x_from = 0
    if y_from < 0:
        y_from = 0
 
    img_tmp = img[x_from:x_end, y_from:y_end]
    # cv2.imshow("ffff111" + str(i), img_tmp)
 
    (w, h) = calc_height_width(box)
 
    if w > h:
        # 左上角, 左下角,右上角
        # 3,2,4
        matSrc = np.float32([
            [box[2][0], box[2][1]],
            [box[1][0], box[1][1]],
            [box[3][0], box[3][1]]
        ])
        matDst = np.float32([
            [0, 0],
            [0, h],
            [w, 0]
        ])
        matAffine = cv2.getAffineTransform(matSrc, matDst)
        dst = cv2.warpAffine(img, matAffine, (int(w), int(h)))
    else:
        # 左上角, 左下角,右上角
        # 右上角, 左上角, 右下角
        # 3,2,4
        # 4,3,1
        matSrc = np.float32([
            [box[3][0], box[3][1]],
            [box[2][0], box[2][1]],
            [box[0][0], box[0][1]]
        ])
        matDst = np.float32([
            [0, 0],
            [0, w],
            [h, 0]
        ])
        matAffine = cv2.getAffineTransform(matSrc, matDst)
        dst = cv2.warpAffine(img, matAffine, (int(h), int(w)))
 
    process_more("ffff222asdfas" + str(i), dst.copy())
 
    i += 1
 
img = imutils.resize(img, width=600, height=600)
cv2.imshow("Frame6", img)
 
cv2.waitKey(100000) & 0xFF
 
cv2.destroyAllWindows()

  

 

posted @   McKay  阅读(952)  评论(0编辑  收藏  举报
编辑推荐:
· 如何编写易于单元测试的代码
· 10年+ .NET Coder 心语,封装的思维:从隐藏、稳定开始理解其本质意义
· .NET Core 中如何实现缓存的预热?
· 从 HTTP 原因短语缺失研究 HTTP/2 和 HTTP/3 的设计差异
· AI与.NET技术实操系列:向量存储与相似性搜索在 .NET 中的实现
阅读排行:
· 地球OL攻略 —— 某应届生求职总结
· 周边上新:园子的第一款马克杯温暖上架
· Open-Sora 2.0 重磅开源!
· 提示词工程——AI应用必不可少的技术
· .NET周刊【3月第1期 2025-03-02】
历史上的今天:
2019-02-02 飞控遥控器原型
点击右上角即可分享
微信分享提示