图像ocr识别(一)
研究了点OCR识别,本文讲下opencv方式-找出字符区域,虽然还不完善,但是记录下,后续往CNN+RNN+CTC方向走,此处就作为练手了。
效果1:
效果2:
效果3:
效果4(识别率不太好,只把大框识别了,字符的分割有问题):
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 | import cv2 import imutils import numpy as np from imageio import imread import math import matplotlib.pyplot as plt def point_distance(p1, p2): return math.sqrt(math. pow (p2[ 0 ] - p1[ 0 ], 2 ) + math. pow (p2[ 1 ] - p1[ 1 ], 2 )) def calc_height_width(box): width = point_distance(box[ 1 ], box[ 0 ]) height = point_distance(box[ 0 ], box[ 3 ]) return (width, height) fileName = 'test1' img = imread( 'imgs\\' + fileName + ' .jpg') img = imutils.resize(img, width = 1920 , height = 2080 ) cannyImg = cv2.Canny(img, 200 , 200 ) kernel = cv2.getStructuringElement(cv2.MORPH_RECT, ( 3 , 3 )) closed = cv2.morphologyEx(img, cv2.MORPH_CLOSE, kernel) eroded = cv2.erode(closed, kernel) cannyImg = cv2.Canny(eroded, 200 , 200 ) blurred = cv2.GaussianBlur(cannyImg, ( 105 , 105 ), 0 ) # blurred = cv2.GaussianBlur(cannyImg, (15, 15),0) _, skin = cv2.threshold(blurred, 0 , 255 , cv2.THRESH_BINARY + cv2.THRESH_OTSU) contours, hierarchy = cv2.findContours(skin, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) contours = sorted (contours, key = cv2.contourArea, reverse = True ) boxes = [] for i in range ( len (contours)): c = contours[i] rect = cv2.minAreaRect(c) box = np.int0(cv2.boxPoints(rect)) (w, h) = calc_height_width(box) if w = = 0 or h = = 0 : continue if w < 20 or h < 20 : continue # boxes.append(box) rate1 = h / w * 100 rate2 = w / h * 100 if ( 10 < = rate1 < = 20 ) or ( 10 < = rate2 < = 20 ): print ((w, h), '--------' , rate1, '%' , rate2, '%' ) boxes.append(box) img = img.copy() i = 0 def parse_chars(positions, min_thresh, min_range, max_range): charInfos = [] begin = 0 end = 0 for idx in range ( len (positions)): if positions[idx] > min_thresh and begin = = 0 : begin = idx elif positions[idx] > min_thresh and begin ! = 0 : if idx - begin > max_range: charInfo = { 'begin' : begin, 'end' : idx} charInfos.append(charInfo) begin = 0 end = 0 continue elif positions[idx] < min_thresh and begin ! = 0 : end = idx if end - begin > = min_range: charInfo = { 'begin' : begin, 'end' : end} charInfos.append(charInfo) begin = 0 end = 0 elif positions[idx] < min_thresh or begin = = 0 : continue return charInfos def process_more(windowName, imgSrc): ori_imgSrc = imgSrc.copy() # cv2.imshow(windowName+'111', ori_imgSrc) kernel = cv2.getStructuringElement(cv2.MORPH_CROSS, ( 3 , 3 )) closed = cv2.morphologyEx(imgSrc, cv2.MORPH_CLOSE, kernel) imgSrc = cv2.erode(closed, kernel) imgSrc = cv2.Canny(imgSrc, 300 , 300 ) kernel = np.ones(( 5 , 5 ), np.uint8) imgSrc = cv2.dilate(imgSrc, kernel, iterations = 1 ) _, imgSrc = cv2.threshold(imgSrc, 0 , 255 , cv2.THRESH_BINARY + cv2.THRESH_OTSU) # rows---> ori_imgSrc.shape[0] # cols---> ori_imgSrc.shape[1] rows = ori_imgSrc.shape[ 0 ] cols = ori_imgSrc.shape[ 1 ] tags = np.zeros((cols)) for row in range (rows): for col in range (cols): if imgSrc[row][col] = = 255 : tags[col] + = 1 char_positions = parse_chars(positions = tags, min_thresh = 8 , min_range = 25 , max_range = 100 ) <br> print ( len (char_positions)) for p in char_positions: leftTop = (p[ 'begin' ], 0 ) rightBottom = (p[ 'end' ], rows - 2 ) cv2.rectangle(ori_imgSrc, (leftTop[ 0 ], leftTop[ 1 ]), (rightBottom[ 0 ], rightBottom[ 1 ]), ( 0 , 255 , 0 ), 2 ) ori_imgSrc = imutils.resize(ori_imgSrc, width = 450 ) cv2.imshow(windowName, ori_imgSrc) for box in boxes: # img = cv2.drawContours(img, [box], -1, (0, 0, 255), 3) x_from = np. min (box[:, 1 ]) x_end = np. max (box[:, 1 ]) y_from = np. min (box[:, 0 ]) y_end = np. max (box[:, 0 ]) if x_from < 0 : x_from = 0 if y_from < 0 : y_from = 0 img_tmp = img[x_from:x_end, y_from:y_end] # cv2.imshow("ffff111" + str(i), img_tmp) (w, h) = calc_height_width(box) if w > h: # 左上角, 左下角,右上角 # 3,2,4 matSrc = np.float32([ [box[ 2 ][ 0 ], box[ 2 ][ 1 ]], [box[ 1 ][ 0 ], box[ 1 ][ 1 ]], [box[ 3 ][ 0 ], box[ 3 ][ 1 ]] ]) matDst = np.float32([ [ 0 , 0 ], [ 0 , h], [w, 0 ] ]) matAffine = cv2.getAffineTransform(matSrc, matDst) dst = cv2.warpAffine(img, matAffine, ( int (w), int (h))) else : # 左上角, 左下角,右上角 # 右上角, 左上角, 右下角 # 3,2,4 # 4,3,1 matSrc = np.float32([ [box[ 3 ][ 0 ], box[ 3 ][ 1 ]], [box[ 2 ][ 0 ], box[ 2 ][ 1 ]], [box[ 0 ][ 0 ], box[ 0 ][ 1 ]] ]) matDst = np.float32([ [ 0 , 0 ], [ 0 , w], [h, 0 ] ]) matAffine = cv2.getAffineTransform(matSrc, matDst) dst = cv2.warpAffine(img, matAffine, ( int (h), int (w))) process_more( "ffff222asdfas" + str (i), dst.copy()) i + = 1 img = imutils.resize(img, width = 600 , height = 600 ) cv2.imshow( "Frame6" , img) cv2.waitKey( 100000 ) & 0xFF cv2.destroyAllWindows() |
自省推动进步,视野决定未来。
心怀远大理想。
为了家庭幸福而努力。
商业合作请看此处:https://www.magicube.ai
心怀远大理想。
为了家庭幸福而努力。
商业合作请看此处:https://www.magicube.ai
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· 如何编写易于单元测试的代码
· 10年+ .NET Coder 心语,封装的思维:从隐藏、稳定开始理解其本质意义
· .NET Core 中如何实现缓存的预热?
· 从 HTTP 原因短语缺失研究 HTTP/2 和 HTTP/3 的设计差异
· AI与.NET技术实操系列:向量存储与相似性搜索在 .NET 中的实现
· 地球OL攻略 —— 某应届生求职总结
· 周边上新:园子的第一款马克杯温暖上架
· Open-Sora 2.0 重磅开源!
· 提示词工程——AI应用必不可少的技术
· .NET周刊【3月第1期 2025-03-02】
2019-02-02 飞控遥控器原型