模板匹配法验证码识别
1. 降噪二指化预处理
from skimage.io import imread, imsave, imshow from skimage.filters import threshold_otsu import skimage.morphology as sm from skimage.measure import regionprops %matplotlib inline from skimage.feature import match_template import numpy as np import matplotlib.pyplot as plt def clearNoise(fname): img = imread(fname, as_grey=True) img = 1 * (img < 0.6) return img
2. 连通域切割生成模板, 重命名模板文件label_xxx.jpg
import os cnt = 100 path = 'img/origin/' fnames = [os.path.join(path, fname) for fname in os.listdir(path) if fname.endswith('jpg')] for fname in fnames[2::5]: print fname img = imread(fname, as_grey=True) img = 1 * (img < threshold_otsu(img)) img = sm.label(img) for region in regionprops(img): imsave('img/templates/0_sx%d.jpg' % cnt, 250*np.uint8(region.image)) cnt += 1
3. 逐个模板匹配,找到最相似,按x轴排序输出
path = 'img/templates/' from PIL import Image # 模板文件名 tpl_names = [os.path.join(path, fname) for fname in os.listdir(path) if fname.endswith('jpg')] # 模板读取 tpls = [np.array(Image.open(tpl).convert('1')) for tpl in tpl_names] # 模板标签 tpl_labels = [fname.split('/')[-1].split('_')[0] for fname in tpl_names] # 匹配结果,key为x轴坐标, value为识别出来的标签 res = {} for tpl, tpl_label in zip(tpls, tpl_labels): match_score = match_template(img, tpl) x, y = np.unravel_index(np.argmax(match_score), match_score.shape) h, w = tpl.shape matched_img = img[x:x+h, y:y+w] diff_num = np.count_nonzero(tpl-matched_img) dist = diff_num/float(h*w) if dist<0.05: / # 95% 雷同时判定为正确值 res[y] = tpl_label res = sorted(res.iteritems(), key=lambda x: x[0]) res = [x[1] for x in res] # 排序输出 print ''.join(res)
每天一小步,人生一大步!Good luck~