tf识别固定长度验证码图片ocr(0到9 4位)- CNN方式
我们先生成些验证码图片
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 | import cv2 as cv import numpy as np import os def create_digit_image(dir_path): image = np.ones(shape = [ 24 , 72 ], dtype = np.uint8) image = image * 127 a = np.random.randint( 0 , 10 ) b = np.random.randint( 0 , 10 ) c = np.random.randint( 0 , 10 ) d = np.random.randint( 0 , 10 ) text = str (a) + str (b) + str (c) + str (d) print (text) cv.putText(image, text, ( 6 , 20 ), cv.FONT_HERSHEY_PLAIN, 1.5 , ( 255 ), 2 ) for i in range ( 100 ): row = np.random.randint( 0 , 24 ) col = np.random.randint( 0 , 72 ) image[row, col] = 0 full_path = dir_path + text + ".png" cv.imwrite(full_path, image) os.mkdir(os.getcwd() + '\\train\\' ) os.mkdir(os.getcwd() + '\\test\\' ) for i in range ( 1000 ): create_digit_image(os.getcwd() + '\\train\\' ) for i in range ( 100 ): create_digit_image(os.getcwd() + '\\test\\' ) |
会生成1000张训练图片+100张测试图片
One-hot编码:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 | def text2vec(text): text_len = len (text) if text_len > 4 : print ( "text code : " , text) raise ValueError( '验证码最长4个字符' ) vector = np.zeros( 4 * 10 ) def char2pos(c): k = ord (c) if 48 < = k < = 57 : return k - 48 for i, c in enumerate (text): idx = i * 10 + char2pos(c) vector[idx] = 1 return vector # 向量转回文本 def vec2text(vec): char_pos = vec.nonzero()[ 0 ] text = [] for i, c in enumerate (char_pos): char_idx = c % 10 if char_idx < 10 : char_code = char_idx + ord ( '0' ) else : raise ValueError( 'error' ) text.append( chr (char_code)) return "".join(text) s = text2vec( '1030' ) print (s) s = vec2text(s) print (s) |
1 2 | [0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0.] 1030 |
变化成如下:
1 2 3 4 5 6 7 8 9 10 11 | [ 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. ] 第一行代表1 第二行代表0 第三行代表3 第四行代表0 |
完整代码:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 | import os import tensorflow as tf from random import choice from tfdemo3.data_engine import get_one_image, get_image_files w = 72 h = 24 label_vector_size = 40 train_dir = os.getcwd() + '\\train\\' test_dir = os.getcwd() + '\\test\\' train_files = get_image_files(train_dir) test_files = get_image_files(test_dir) # 占位符 x_image = tf.placeholder(shape = [ None , h, w, 1 ], dtype = tf.float32) y = tf.placeholder(shape = [ None , label_vector_size], dtype = tf.float32) keep_prob = tf.placeholder(dtype = tf.float32) # convolution layer 1 conv1_w = tf.Variable(tf.random_normal(shape = [ 3 , 3 , 1 , 32 ], stddev = 0.1 , dtype = tf.float32)) conv1_bias = tf.Variable(tf.random_normal(shape = [ 32 ], stddev = 0.1 )) conv1_out = tf.nn.conv2d( input = x_image, filter = conv1_w, strides = [ 1 , 1 , 1 , 1 ], padding = 'SAME' ) conv1_relu = tf.nn.relu(tf.add(conv1_out, conv1_bias)) # max pooling 1 maxpooling_1 = tf.nn.max_pool(conv1_relu, ksize = [ 1 , 2 , 2 , 1 ], strides = [ 1 , 2 , 2 , 1 ], padding = 'SAME' ) # convolution layer 2 conv2_w = tf.Variable(tf.random_normal(shape = [ 3 , 3 , 32 , 64 ], stddev = 0.1 , dtype = tf.float32)) conv2_bias = tf.Variable(tf.random_normal(shape = [ 64 ], stddev = 0.1 )) conv2_out = tf.nn.conv2d( input = maxpooling_1, filter = conv2_w, strides = [ 1 , 1 , 1 , 1 ], padding = 'SAME' ) conv2_relu = tf.nn.relu(tf.add(conv2_out, conv2_bias)) # max pooling 2 maxpooling_2 = tf.nn.max_pool(conv2_relu, ksize = [ 1 , 2 , 2 , 1 ], strides = [ 1 , 2 , 2 , 1 ], padding = 'SAME' ) # convolution layer 3 conv3_w = tf.Variable(tf.random_normal(shape = [ 3 , 3 , 64 , 64 ], stddev = 0.1 , dtype = tf.float32)) conv3_bias = tf.Variable(tf.random_normal(shape = [ 64 ], stddev = 0.1 )) conv3_out = tf.nn.conv2d( input = maxpooling_2, filter = conv3_w, strides = [ 1 , 1 , 1 , 1 ], padding = 'SAME' ) conv3_relu = tf.nn.relu(tf.add(conv3_out, conv3_bias)) # max pooling 2 maxpooling_3 = tf.nn.max_pool(conv3_relu, ksize = [ 1 , 2 , 2 , 1 ], strides = [ 1 , 2 , 2 , 1 ], padding = 'SAME' ) # fc-1 w_fc1 = tf.Variable(tf.random_normal(shape = [ 3 * 9 * 64 , 1024 ], stddev = 0.1 , dtype = tf.float32)) b_fc1 = tf.Variable(tf.constant( 0.1 , shape = [ 1024 ])) h_pool2 = tf.reshape(maxpooling_3, [ - 1 , 3 * 9 * 64 ]) output_fc1 = tf.nn.relu(tf.add(tf.matmul(h_pool2, w_fc1), b_fc1)) # dropout h2 = tf.nn.dropout(output_fc1, keep_prob = keep_prob) # fc-2 w_fc2 = tf.Variable(tf.random_normal(shape = [ 1024 , 40 ], stddev = 0.1 , dtype = tf.float32)) b_fc2 = tf.Variable(tf.constant( 0.1 , shape = [ 40 ])) y_conv = tf.add(tf.matmul(output_fc1, w_fc2), b_fc2) # loss cross_loss = tf.nn.sigmoid_cross_entropy_with_logits(logits = y_conv, labels = y) loss = tf.reduce_mean(cross_loss) step = tf.train.AdamOptimizer(learning_rate = 0.001 ).minimize(loss) # accuracy saver = tf.train.Saver() predict = tf.reshape(y_conv, [ - 1 , 4 , 10 ]) max_idx_p = tf.argmax(predict, 2 ) max_idx_l = tf.argmax(tf.reshape(y, [ - 1 , 4 , 10 ]), 2 ) correct_pred = tf.equal(max_idx_p, max_idx_l) accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32)) def get_train_batch(files, batch_size = 128 ): images = [] labels = [] for f in range (batch_size): image, label = get_one_image(train_dir, choice(files)) images.append(image) labels.append(label) return images, labels def get_batch(root_dir, files): images = [] labels = [] for f in files: image, label = get_one_image(root_dir, f) images.append(image) labels.append(label) return images, labels test_images, test_labels = get_batch(test_dir, test_files) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) for i in range ( 500 ): batch_xs, batch_ys = get_train_batch(train_files, 100 ) curr_loss, curr_ = sess.run([loss, step], feed_dict = {x_image: batch_xs, y: batch_ys, keep_prob: 0.5 }) if (i + 1 ) % 100 = = 0 : print ( "run step (%d) ..., loss : (%f)" % (i + 1 , curr_loss)) curr_acc = sess.run(accuracy, feed_dict = {x_image: test_images, y: test_labels, keep_prob: 1.0 }) print ( "current test Accuracy : %f" % (curr_acc)) saver.save(sess, "./ckp/code_break.ckpt" , global_step = 500 ) |
data_engine.py
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 | import numpy as np import cv2 as cv import os def text2vec(text): text_len = len (text) if text_len > 4 : print ( "text code : " , text) raise ValueError( '验证码最长4个字符' ) vector = np.zeros( 4 * 10 ) def char2pos(c): k = ord (c) if 48 < = k < = 57 : return k - 48 for i, c in enumerate (text): idx = i * 10 + char2pos(c) vector[idx] = 1 return vector # 向量转回文本 def vec2text(vec): char_pos = vec.nonzero()[ 0 ] text = [] for i, c in enumerate (char_pos): char_idx = c % 10 if char_idx < 10 : char_code = char_idx + ord ( '0' ) else : raise ValueError( 'error' ) text.append( chr (char_code)) return "".join(text) def get_one_image(root_dir, f): gray = cv.imread(os.path.join(root_dir, f), cv.IMREAD_GRAYSCALE) resize = cv.resize(gray, ( 72 , 24 )) result = np.zeros(resize.shape, dtype = np.float32) cv.normalize(resize, result, 0 , 1 , cv.NORM_MINMAX, dtype = cv.CV_32F) image = np.expand_dims(result, axis = 2 ) label = text2vec(f[ 0 : 4 ]) return image, label def get_image_files(root_dir): img_list = [] files = os.listdir(root_dir) for f in files: if os.path.isfile(os.path.join(root_dir, f)): img_list.append(f) return img_list |
1 2 3 4 5 6 7 8 9 10 | run step (100) ..., loss : (0.023609) current test Accuracy : 0.992500 run step (200) ..., loss : (0.000665) current test Accuracy : 1.000000 run step (300) ..., loss : (0.000046) current test Accuracy : 1.000000 run step (400) ..., loss : (0.000010) current test Accuracy : 1.000000 run step (500) ..., loss : (0.000005) current test Accuracy : 1.000000 |
卷积网络确实比较好。
自省推动进步,视野决定未来。
心怀远大理想。
为了家庭幸福而努力。
商业合作请看此处:https://www.magicube.ai
心怀远大理想。
为了家庭幸福而努力。
商业合作请看此处:https://www.magicube.ai
标签:
tensorflow
, OCR
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· Linux系列:如何用 C#调用 C方法造成内存泄露
· AI与.NET技术实操系列(二):开始使用ML.NET
· 记一次.NET内存居高不下排查解决与启示
· 探究高空视频全景AR技术的实现原理
· 理解Rust引用及其生命周期标识(上)
· 单线程的Redis速度为什么快?
· 展开说说关于C#中ORM框架的用法!
· 阿里最新开源QwQ-32B,效果媲美deepseek-r1满血版,部署成本又又又降低了!
· Pantheons:用 TypeScript 打造主流大模型对话的一站式集成库
· SQL Server 2025 AI相关能力初探
2014-02-12 消息队列工具类(MSMQ)