tf识别固定长度验证码图片ocr(0到9 4位)- CNN方式

我们先生成些验证码图片

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
import cv2 as cv
import numpy as np
import os
 
 
def create_digit_image(dir_path):
    image = np.ones(shape=[24, 72], dtype=np.uint8)
    image = image * 127
    a = np.random.randint(0, 10)
    b = np.random.randint(0, 10)
    c = np.random.randint(0, 10)
    d = np.random.randint(0, 10)
    text = str(a)+str(b)+str(c)+str(d)
    print(text)
    cv.putText(image, text, (6, 20), cv.FONT_HERSHEY_PLAIN, 1.5, (255), 2)
    for i in range(100):
        row = np.random.randint(0, 24)
        col = np.random.randint(0, 72)
        image[row, col] = 0
    full_path = dir_path + text + ".png"
    cv.imwrite(full_path, image)
 
 
os.mkdir(os.getcwd()+'\\train\\')
os.mkdir(os.getcwd()+'\\test\\')
 
for i in range(1000):
    create_digit_image(os.getcwd()+'\\train\\')
 
for i in range(100):
    create_digit_image(os.getcwd()+'\\test\\')  

 

会生成1000张训练图片+100张测试图片

 

 

 

One-hot编码:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
def text2vec(text):
    text_len = len(text)
    if text_len > 4:
        print("text code : ", text)
        raise ValueError('验证码最长4个字符')
    vector = np.zeros(4 * 10)
 
    def char2pos(c):
        k = ord(c)
        if 48 <= k <= 57:
            return k - 48
    for i, c in enumerate(text):
        idx = i * 10 + char2pos(c)
        vector[idx] = 1
    return vector
 
 
# 向量转回文本
def vec2text(vec):
    char_pos = vec.nonzero()[0]
    text = []
    for i, c in enumerate(char_pos):
        char_idx = c % 10
        if char_idx < 10:
            char_code = char_idx + ord('0')
        else:
            raise ValueError('error')
        text.append(chr(char_code))
    return "".join(text)
 
 
 
s=text2vec('1030')
print(s)
 
s = vec2text(s)
print(s)

  

1
2
[0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
1030

  

变化成如下:

1
2
3
4
5
6
7
8
9
10
11
[
  0. 1. 0. 0. 0. 0. 0. 0. 0. 0.
  1. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 1. 0. 0. 0. 0. 0. 0.
  1. 0. 0. 0. 0. 0. 0. 0. 0. 0.
]
 
第一行代表1
第二行代表0
第三行代表3
第四行代表0

  

完整代码:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
import os
import tensorflow as tf
from random import choice
from tfdemo3.data_engine import get_one_image, get_image_files
 
w = 72
h = 24
label_vector_size = 40
train_dir = os.getcwd()+'\\train\\'
test_dir = os.getcwd()+'\\test\\'
train_files = get_image_files(train_dir)
test_files = get_image_files(test_dir)
 
# 占位符
x_image = tf.placeholder(shape=[None, h, w, 1], dtype=tf.float32)
y = tf.placeholder(shape=[None, label_vector_size], dtype=tf.float32)
keep_prob = tf.placeholder(dtype=tf.float32)
 
# convolution layer 1
conv1_w = tf.Variable(tf.random_normal(shape=[3, 3, 1, 32], stddev=0.1, dtype=tf.float32))
conv1_bias = tf.Variable(tf.random_normal(shape=[32], stddev=0.1))
conv1_out = tf.nn.conv2d(input=x_image, filter=conv1_w, strides=[1, 1, 1, 1], padding='SAME')
conv1_relu = tf.nn.relu(tf.add(conv1_out, conv1_bias))
 
# max pooling 1
maxpooling_1 = tf.nn.max_pool(conv1_relu, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
 
# convolution layer 2
conv2_w = tf.Variable(tf.random_normal(shape=[3, 3, 32, 64], stddev=0.1, dtype=tf.float32))
conv2_bias = tf.Variable(tf.random_normal(shape=[64], stddev=0.1))
conv2_out = tf.nn.conv2d(input=maxpooling_1, filter=conv2_w, strides=[1, 1, 1, 1], padding='SAME')
conv2_relu = tf.nn.relu(tf.add(conv2_out, conv2_bias))
 
# max pooling 2
maxpooling_2 = tf.nn.max_pool(conv2_relu, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
 
# convolution layer 3
conv3_w = tf.Variable(tf.random_normal(shape=[3, 3, 64, 64], stddev=0.1, dtype=tf.float32))
conv3_bias = tf.Variable(tf.random_normal(shape=[64], stddev=0.1))
conv3_out = tf.nn.conv2d(input=maxpooling_2, filter=conv3_w, strides=[1, 1, 1, 1], padding='SAME')
conv3_relu = tf.nn.relu(tf.add(conv3_out, conv3_bias))
 
# max pooling 2
maxpooling_3 = tf.nn.max_pool(conv3_relu, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
 
# fc-1
w_fc1 = tf.Variable(tf.random_normal(shape=[3*9*64, 1024], stddev=0.1, dtype=tf.float32))
b_fc1 = tf.Variable(tf.constant(0.1, shape=[1024]))
h_pool2 = tf.reshape(maxpooling_3, [-1, 3*9*64])
output_fc1 = tf.nn.relu(tf.add(tf.matmul(h_pool2, w_fc1), b_fc1))
 
# dropout
h2 = tf.nn.dropout(output_fc1, keep_prob=keep_prob)
 
# fc-2
w_fc2 = tf.Variable(tf.random_normal(shape=[1024, 40], stddev=0.1, dtype=tf.float32))
b_fc2 = tf.Variable(tf.constant(0.1, shape=[40]))
y_conv = tf.add(tf.matmul(output_fc1, w_fc2), b_fc2)
 
# loss
cross_loss = tf.nn.sigmoid_cross_entropy_with_logits(logits=y_conv, labels=y)
loss = tf.reduce_mean(cross_loss)
step = tf.train.AdamOptimizer(learning_rate=0.001).minimize(loss)
 
# accuracy
saver = tf.train.Saver()
predict = tf.reshape(y_conv, [-1, 4, 10])
max_idx_p = tf.argmax(predict, 2)
max_idx_l = tf.argmax(tf.reshape(y, [-1, 4, 10]), 2)
correct_pred = tf.equal(max_idx_p, max_idx_l)
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
 
 
def get_train_batch(files, batch_size=128):
    images = []
    labels = []
    for f in range(batch_size):
        image, label = get_one_image(train_dir, choice(files))
        images.append(image)
        labels.append(label)
    return images, labels
 
 
def get_batch(root_dir, files):
    images = []
    labels = []
    for f in files:
        image, label = get_one_image(root_dir, f)
        images.append(image)
        labels.append(label)
    return images, labels
 
 
test_images, test_labels = get_batch(test_dir, test_files)
 
 
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    for i in range(500):
        batch_xs, batch_ys = get_train_batch(train_files, 100)
        curr_loss, curr_ = sess.run([loss, step], feed_dict={x_image: batch_xs, y: batch_ys, keep_prob: 0.5})
        if (i + 1) % 100 == 0:
            print("run step (%d) ..., loss : (%f)" % (i+1, curr_loss))
            curr_acc = sess.run(accuracy, feed_dict={x_image: test_images, y: test_labels, keep_prob: 1.0})
            print("current test Accuracy : %f" % (curr_acc))
    saver.save(sess, "./ckp/code_break.ckpt", global_step=500)

  

data_engine.py

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
import numpy as np
import cv2 as cv
import os
 
 
def text2vec(text):
    text_len = len(text)
    if text_len > 4:
        print("text code : ", text)
        raise ValueError('验证码最长4个字符')
    vector = np.zeros(4 * 10)
 
    def char2pos(c):
        k = ord(c)
        if 48 <= k <= 57:
            return k - 48
    for i, c in enumerate(text):
        idx = i * 10 + char2pos(c)
        vector[idx] = 1
    return vector
 
 
# 向量转回文本
def vec2text(vec):
    char_pos = vec.nonzero()[0]
    text = []
    for i, c in enumerate(char_pos):
        char_idx = c % 10
        if char_idx < 10:
            char_code = char_idx + ord('0')
        else:
            raise ValueError('error')
        text.append(chr(char_code))
    return "".join(text)
 
 
def get_one_image(root_dir, f):
    gray = cv.imread(os.path.join(root_dir, f), cv.IMREAD_GRAYSCALE)
    resize = cv.resize(gray, (72, 24))
    result = np.zeros(resize.shape, dtype=np.float32)
    cv.normalize(resize, result, 0, 1, cv.NORM_MINMAX, dtype=cv.CV_32F)
    image = np.expand_dims(result, axis=2)
    label = text2vec(f[0:4])
    return image, label
 
 
def get_image_files(root_dir):
    img_list = []
    files = os.listdir(root_dir)
    for f in files:
        if os.path.isfile(os.path.join(root_dir, f)):
            img_list.append(f)
    return img_list

  

 

1
2
3
4
5
6
7
8
9
10
run step (100) ..., loss : (0.023609)
current test Accuracy : 0.992500
run step (200) ..., loss : (0.000665)
current test Accuracy : 1.000000
run step (300) ..., loss : (0.000046)
current test Accuracy : 1.000000
run step (400) ..., loss : (0.000010)
current test Accuracy : 1.000000
run step (500) ..., loss : (0.000005)
current test Accuracy : 1.000000

  

卷积网络确实比较好。

 

posted @   McKay  阅读(835)  评论(1编辑  收藏  举报
编辑推荐:
· Linux系列:如何用 C#调用 C方法造成内存泄露
· AI与.NET技术实操系列(二):开始使用ML.NET
· 记一次.NET内存居高不下排查解决与启示
· 探究高空视频全景AR技术的实现原理
· 理解Rust引用及其生命周期标识(上)
阅读排行:
· 单线程的Redis速度为什么快?
· 展开说说关于C#中ORM框架的用法!
· 阿里最新开源QwQ-32B,效果媲美deepseek-r1满血版,部署成本又又又降低了!
· Pantheons:用 TypeScript 打造主流大模型对话的一站式集成库
· SQL Server 2025 AI相关能力初探
历史上的今天:
2014-02-12 消息队列工具类(MSMQ)
点击右上角即可分享
微信分享提示