tf识别固定长度验证码图片ocr(0到9 4位)- CNN方式
我们先生成些验证码图片
import cv2 as cv import numpy as np import os def create_digit_image(dir_path): image = np.ones(shape=[24, 72], dtype=np.uint8) image = image * 127 a = np.random.randint(0, 10) b = np.random.randint(0, 10) c = np.random.randint(0, 10) d = np.random.randint(0, 10) text = str(a)+str(b)+str(c)+str(d) print(text) cv.putText(image, text, (6, 20), cv.FONT_HERSHEY_PLAIN, 1.5, (255), 2) for i in range(100): row = np.random.randint(0, 24) col = np.random.randint(0, 72) image[row, col] = 0 full_path = dir_path + text + ".png" cv.imwrite(full_path, image) os.mkdir(os.getcwd()+'\\train\\') os.mkdir(os.getcwd()+'\\test\\') for i in range(1000): create_digit_image(os.getcwd()+'\\train\\') for i in range(100): create_digit_image(os.getcwd()+'\\test\\')
会生成1000张训练图片+100张测试图片
One-hot编码:
def text2vec(text): text_len = len(text) if text_len > 4: print("text code : ", text) raise ValueError('验证码最长4个字符') vector = np.zeros(4 * 10) def char2pos(c): k = ord(c) if 48 <= k <= 57: return k - 48 for i, c in enumerate(text): idx = i * 10 + char2pos(c) vector[idx] = 1 return vector # 向量转回文本 def vec2text(vec): char_pos = vec.nonzero()[0] text = [] for i, c in enumerate(char_pos): char_idx = c % 10 if char_idx < 10: char_code = char_idx + ord('0') else: raise ValueError('error') text.append(chr(char_code)) return "".join(text) s=text2vec('1030') print(s) s = vec2text(s) print(s)
[0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0.] 1030
变化成如下:
[ 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. ] 第一行代表1 第二行代表0 第三行代表3 第四行代表0
完整代码:
import os import tensorflow as tf from random import choice from tfdemo3.data_engine import get_one_image, get_image_files w = 72 h = 24 label_vector_size = 40 train_dir = os.getcwd()+'\\train\\' test_dir = os.getcwd()+'\\test\\' train_files = get_image_files(train_dir) test_files = get_image_files(test_dir) # 占位符 x_image = tf.placeholder(shape=[None, h, w, 1], dtype=tf.float32) y = tf.placeholder(shape=[None, label_vector_size], dtype=tf.float32) keep_prob = tf.placeholder(dtype=tf.float32) # convolution layer 1 conv1_w = tf.Variable(tf.random_normal(shape=[3, 3, 1, 32], stddev=0.1, dtype=tf.float32)) conv1_bias = tf.Variable(tf.random_normal(shape=[32], stddev=0.1)) conv1_out = tf.nn.conv2d(input=x_image, filter=conv1_w, strides=[1, 1, 1, 1], padding='SAME') conv1_relu = tf.nn.relu(tf.add(conv1_out, conv1_bias)) # max pooling 1 maxpooling_1 = tf.nn.max_pool(conv1_relu, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME') # convolution layer 2 conv2_w = tf.Variable(tf.random_normal(shape=[3, 3, 32, 64], stddev=0.1, dtype=tf.float32)) conv2_bias = tf.Variable(tf.random_normal(shape=[64], stddev=0.1)) conv2_out = tf.nn.conv2d(input=maxpooling_1, filter=conv2_w, strides=[1, 1, 1, 1], padding='SAME') conv2_relu = tf.nn.relu(tf.add(conv2_out, conv2_bias)) # max pooling 2 maxpooling_2 = tf.nn.max_pool(conv2_relu, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME') # convolution layer 3 conv3_w = tf.Variable(tf.random_normal(shape=[3, 3, 64, 64], stddev=0.1, dtype=tf.float32)) conv3_bias = tf.Variable(tf.random_normal(shape=[64], stddev=0.1)) conv3_out = tf.nn.conv2d(input=maxpooling_2, filter=conv3_w, strides=[1, 1, 1, 1], padding='SAME') conv3_relu = tf.nn.relu(tf.add(conv3_out, conv3_bias)) # max pooling 2 maxpooling_3 = tf.nn.max_pool(conv3_relu, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME') # fc-1 w_fc1 = tf.Variable(tf.random_normal(shape=[3*9*64, 1024], stddev=0.1, dtype=tf.float32)) b_fc1 = tf.Variable(tf.constant(0.1, shape=[1024])) h_pool2 = tf.reshape(maxpooling_3, [-1, 3*9*64]) output_fc1 = tf.nn.relu(tf.add(tf.matmul(h_pool2, w_fc1), b_fc1)) # dropout h2 = tf.nn.dropout(output_fc1, keep_prob=keep_prob) # fc-2 w_fc2 = tf.Variable(tf.random_normal(shape=[1024, 40], stddev=0.1, dtype=tf.float32)) b_fc2 = tf.Variable(tf.constant(0.1, shape=[40])) y_conv = tf.add(tf.matmul(output_fc1, w_fc2), b_fc2) # loss cross_loss = tf.nn.sigmoid_cross_entropy_with_logits(logits=y_conv, labels=y) loss = tf.reduce_mean(cross_loss) step = tf.train.AdamOptimizer(learning_rate=0.001).minimize(loss) # accuracy saver = tf.train.Saver() predict = tf.reshape(y_conv, [-1, 4, 10]) max_idx_p = tf.argmax(predict, 2) max_idx_l = tf.argmax(tf.reshape(y, [-1, 4, 10]), 2) correct_pred = tf.equal(max_idx_p, max_idx_l) accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32)) def get_train_batch(files, batch_size=128): images = [] labels = [] for f in range(batch_size): image, label = get_one_image(train_dir, choice(files)) images.append(image) labels.append(label) return images, labels def get_batch(root_dir, files): images = [] labels = [] for f in files: image, label = get_one_image(root_dir, f) images.append(image) labels.append(label) return images, labels test_images, test_labels = get_batch(test_dir, test_files) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) for i in range(500): batch_xs, batch_ys = get_train_batch(train_files, 100) curr_loss, curr_ = sess.run([loss, step], feed_dict={x_image: batch_xs, y: batch_ys, keep_prob: 0.5}) if (i + 1) % 100 == 0: print("run step (%d) ..., loss : (%f)" % (i+1, curr_loss)) curr_acc = sess.run(accuracy, feed_dict={x_image: test_images, y: test_labels, keep_prob: 1.0}) print("current test Accuracy : %f" % (curr_acc)) saver.save(sess, "./ckp/code_break.ckpt", global_step=500)
data_engine.py
import numpy as np import cv2 as cv import os def text2vec(text): text_len = len(text) if text_len > 4: print("text code : ", text) raise ValueError('验证码最长4个字符') vector = np.zeros(4 * 10) def char2pos(c): k = ord(c) if 48 <= k <= 57: return k - 48 for i, c in enumerate(text): idx = i * 10 + char2pos(c) vector[idx] = 1 return vector # 向量转回文本 def vec2text(vec): char_pos = vec.nonzero()[0] text = [] for i, c in enumerate(char_pos): char_idx = c % 10 if char_idx < 10: char_code = char_idx + ord('0') else: raise ValueError('error') text.append(chr(char_code)) return "".join(text) def get_one_image(root_dir, f): gray = cv.imread(os.path.join(root_dir, f), cv.IMREAD_GRAYSCALE) resize = cv.resize(gray, (72, 24)) result = np.zeros(resize.shape, dtype=np.float32) cv.normalize(resize, result, 0, 1, cv.NORM_MINMAX, dtype=cv.CV_32F) image = np.expand_dims(result, axis=2) label = text2vec(f[0:4]) return image, label def get_image_files(root_dir): img_list = [] files = os.listdir(root_dir) for f in files: if os.path.isfile(os.path.join(root_dir, f)): img_list.append(f) return img_list
run step (100) ..., loss : (0.023609) current test Accuracy : 0.992500 run step (200) ..., loss : (0.000665) current test Accuracy : 1.000000 run step (300) ..., loss : (0.000046) current test Accuracy : 1.000000 run step (400) ..., loss : (0.000010) current test Accuracy : 1.000000 run step (500) ..., loss : (0.000005) current test Accuracy : 1.000000
卷积网络确实比较好。
自省推动进步,视野决定未来。
心怀远大理想。
为了家庭幸福而努力。
商业合作请看此处:https://www.magicube.ai
心怀远大理想。
为了家庭幸福而努力。
商业合作请看此处:https://www.magicube.ai