模板匹配法验证码识别

 

1. 降噪二指化预处理

from skimage.io import imread, imsave, imshow
from skimage.filters import threshold_otsu
import skimage.morphology as sm
from skimage.measure import regionprops
%matplotlib inline
from skimage.feature import match_template
import numpy as np
import matplotlib.pyplot as plt


def clearNoise(fname):
    img = imread(fname, as_grey=True)
    img = 1 * (img < 0.6)
    return img

 

2. 连通域切割生成模板, 重命名模板文件label_xxx.jpg

import os

cnt = 100
path = 'img/origin/'
fnames = [os.path.join(path, fname) for fname in os.listdir(path) if fname.endswith('jpg')]

for fname in fnames[2::5]:
    print fname
    img = imread(fname, as_grey=True)
    img = 1 * (img < threshold_otsu(img))
    img = sm.label(img)
    for region in regionprops(img):
        imsave('img/templates/0_sx%d.jpg' % cnt, 250*np.uint8(region.image))
        cnt += 1

 

3. 逐个模板匹配,找到最相似,按x轴排序输出

path = 'img/templates/'
from PIL import Image

# 模板文件名
tpl_names = [os.path.join(path, fname) for fname in os.listdir(path) if fname.endswith('jpg')]
# 模板读取
tpls = [np.array(Image.open(tpl).convert('1')) for tpl in tpl_names]
# 模板标签
tpl_labels = [fname.split('/')[-1].split('_')[0] for fname in tpl_names]

# 匹配结果,key为x轴坐标, value为识别出来的标签
res = {}
for tpl, tpl_label in zip(tpls, tpl_labels):
    match_score = match_template(img, tpl)
    x, y = np.unravel_index(np.argmax(match_score), match_score.shape)
    h, w = tpl.shape
    matched_img = img[x:x+h, y:y+w]
    diff_num = np.count_nonzero(tpl-matched_img)
    dist = diff_num/float(h*w)
    if dist<0.05:  / # 95% 雷同时判定为正确值
        res[y] = tpl_label

        
res = sorted(res.iteritems(), key=lambda x: x[0])
res = [x[1] for x in res]  # 排序输出
print ''.join(res)
posted on 2017-04-14 15:56  星空守望者--jkmiao  阅读(462)  评论(1编辑  收藏  举报