tf识别单张图片ocr(0到9的识别)- CNN方式

继上篇文章后,这次使用卷积网络做实验(上篇用的是普通2层网络)

 

 

 

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
import time
import tensorflow as tf
import numpy as np
import cv2 as cv
 
def generate_image(a, rnd_size=100):
    image = np.zeros([28, 28], dtype=np.uint8)
    cv.putText(image, str(a), (7, 21), cv.FONT_HERSHEY_PLAIN, 1.3, 255, 2, 8)
 
    for i in range(rnd_size):
        row = np.random.randint(0, 28)
        col = np.random.randint(0, 28)
        image[row, col] = 0
 
    data = np.reshape(image, [1, 784])
    return image, data / 255
 
 
def display_images(images):
    import matplotlib.pyplot as plt
    size = len(images)
    for i in range(size):
        plt.subplot(2, 5, i + 1)
        plt.imshow(images[i])
 
    plt.show()
 
 
def load_data(sess, rnd_size=100, should_display_images=False):
    zero_image, zero = generate_image(0, rnd_size)
    one_image, one = generate_image(1, rnd_size)
    two_image, two = generate_image(2, rnd_size)
    three_image, three = generate_image(3, rnd_size)
    four_image, four = generate_image(4, rnd_size)
    five_image, five = generate_image(5, rnd_size)
    six_image, six = generate_image(6, rnd_size)
    seven_image, seven = generate_image(7, rnd_size)
    eight_image, eight = generate_image(8, rnd_size)
    nine_image, nine = generate_image(9, rnd_size)
 
    if should_display_images is True:
        display_images(
            [zero_image, one_image, two_image, three_image, four_image, five_image, six_image, seven_image, eight_image,
             nine_image])
 
    x_features = [zero, one, two, three, four, five, six, seven, eight, nine]
    x_features = np.array(x_features)
    x_features = np.reshape(x_features, (-1,784))
 
    y = None
    y_lables = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
    y = sess.run(tf.one_hot(y_lables, 10))
 
    return x_features, y
 
 
def build_network():
    x = tf.placeholder(shape=[None, 784], dtype=tf.float32)
    y = tf.placeholder(shape=[None, 10], dtype=tf.float32)
    x_image = tf.reshape(x, [-1, 28, 28, 1])
 
    # convolution layer 1
    conv1_w = tf.Variable(tf.truncated_normal(shape=[5, 5, 1, 32], stddev=0.1, dtype=tf.float32))
    conv1_bias = tf.Variable(tf.truncated_normal(shape=[32], stddev=0.1))
    conv1_out = tf.nn.conv2d(input=x_image, filter=conv1_w, strides=[1, 1, 1, 1], padding='SAME')
    conv1_relu = tf.nn.relu(tf.add(conv1_out, conv1_bias))
 
    # max pooling 1
    maxpooling_1 = tf.nn.max_pool(conv1_relu, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
 
    # convolution layer 2
    conv2_w = tf.Variable(tf.truncated_normal(shape=[5, 5, 32, 64], stddev=0.1, dtype=tf.float32))
    conv2_bias = tf.Variable(tf.truncated_normal(shape=[64], stddev=0.1))
    conv2_out = tf.nn.conv2d(input=maxpooling_1, filter=conv2_w, strides=[1, 1, 1, 1], padding='SAME')
    conv2_relu = tf.nn.relu(tf.add(conv2_out, conv2_bias))
 
    # max pooling 2
    maxpooling_2 = tf.nn.max_pool(conv2_relu, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
 
    # fc-1
    w_fc1 = tf.Variable(tf.truncated_normal(shape=[7*7*64, 1024], stddev=0.1, dtype=tf.float32))
    b_fc1 = tf.Variable(tf.constant(0.1, shape=[1024]))
    h_pool2 = tf.reshape(maxpooling_2, [-1, 7*7*64])
    output_fc1 = tf.nn.relu(tf.add(tf.matmul(h_pool2, w_fc1), b_fc1))
 
    # dropout
    keep_prob = tf.placeholder(dtype=tf.float32)
    h2 = tf.nn.dropout(output_fc1, keep_prob=keep_prob)
 
    # fc-2
    w_fc2 = tf.Variable(tf.truncated_normal(shape=[1024, 10], stddev=0.1, dtype=tf.float32))
    b_fc2 = tf.Variable(tf.constant(0.1, shape=[10]))
    y_conv = tf.add(tf.matmul(output_fc1, w_fc2), b_fc2)
 
    cross_loss = tf.nn.softmax_cross_entropy_with_logits(logits=y_conv, labels=y)
    loss = tf.reduce_mean(cross_loss)
    step = tf.train.GradientDescentOptimizer(0.05).minimize(loss)
 
    # accuracy
    acc_mat = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y, 1))
    acc = tf.reduce_sum(tf.cast(acc_mat, tf.float32))
 
    prediction = tf.argmax(y_conv, 1)
 
    tf.summary.scalar("accuracy", acc)
    tf.summary.image('testing', x_image, max_outputs = 10)
 
    return x, y, step, acc, acc_mat, prediction, keep_prob
 
 
def train():
    x, y, step, acc, acc_mat, prediction, keep_prob = build_network()
 
    with tf.Session() as sess:
        saver = tf.train.Saver()
        sess.run(tf.global_variables_initializer())
        summary_merged = tf.summary.merge_all()         #必须放在初始化的后面,否则报错
        writer = tf.summary.FileWriter('logs'+str(time.time()), sess.graph)
        for i in range(50):
            x_features, y_lables = load_data(sess)
            _, summary_ = sess.run([step, summary_merged], feed_dict={x: x_features, y: y_lables, keep_prob: 0.5})
            writer.add_summary(summary_, i)
            if (i + 1) % 5 == 0:
                curr_acc = sess.run(acc, feed_dict={x: x_features, y: y_lables, keep_prob: 1.0})
                print("current test Accuracy : %f" % (curr_acc))
        saver.save(sess, "./checkpoint/tf_mnist.model", global_step=50)
        writer.close()
 
        print('*************************')
        x_features, y_labels = load_data(sess, 300, should_display_images=True)
        pred_ys = sess.run(prediction, feed_dict={x: x_features, keep_prob: 1.0})
        print('图片识别结果:', pred_ys)
 
 
if __name__ == '__main__':
    train()

  

 

 

 

 

复制代码
current test Accuracy : 4.000000
current test Accuracy : 7.000000
current test Accuracy : 7.000000
current test Accuracy : 10.000000
current test Accuracy : 8.000000
current test Accuracy : 10.000000
current test Accuracy : 10.000000
current test Accuracy : 10.000000
current test Accuracy : 10.000000
current test Accuracy : 10.000000
*************************
图片识别结果: [0 1 2 3 4 5 6 7 8 9]
复制代码

 

 

 

 

 

 

 

 

 

我们可以看到,在用卷积网络做训练时,大概35此迭代就实现了正确率达到了10张图片全部识别正确,但是普通2层全连接网络需要花费大概400次迭代才能达到100%正确率

 

posted @   McKay  阅读(586)  评论(0编辑  收藏  举报
编辑推荐:
· 如何编写易于单元测试的代码
· 10年+ .NET Coder 心语,封装的思维:从隐藏、稳定开始理解其本质意义
· .NET Core 中如何实现缓存的预热?
· 从 HTTP 原因短语缺失研究 HTTP/2 和 HTTP/3 的设计差异
· AI与.NET技术实操系列:向量存储与相似性搜索在 .NET 中的实现
阅读排行:
· 地球OL攻略 —— 某应届生求职总结
· 周边上新:园子的第一款马克杯温暖上架
· Open-Sora 2.0 重磅开源!
· 提示词工程——AI应用必不可少的技术
· .NET周刊【3月第1期 2025-03-02】
历史上的今天:
2014-02-12 消息队列工具类(MSMQ)
点击右上角即可分享
微信分享提示