VGGNet学习——实践
0 - DataSet
http://www.csc.kth.se/~att/Site/Animals.html
1 - Code
1.1 - Import Packages
import tensorflow as tf import os, glob import numpy as np from skimage import io, transform
1.2 - Initialize Parameters
DATA_PATH = "animal_database/" INPUT_W = 224 INPUT_H = 224 INPUT_C = 3 OUTPUT_C = 19 TRAINING_STEPS = 50 MODEL_SAVE_PATH = "model" MODEL_NAME = "model.ckpt" BATCH_SIZE = 64 LEARNING_RATE_BASE = 1e-6 LEARNING_RATE_DECAY = 0.99 MOMENTUM = 0.9 TRAIN_KEEP_PROB = 0.6 VAL_KEEP_PROB = 1.0 TEST_KEEP_PROB = 1.0
1.3 - Build Data Reader
class DCdataset(object): def __init__(self, path, w, h, c, ratio=0.8): def onehot(n): l = np.zeros([OUTPUT_C]) l[n] = 1 return l print("Process images start") cate = [path+x for x in os.listdir(path) if os.path.isdir(path+x)] x = [] y = [] for (i, folder) in enumerate(cate): for img_path in glob.glob(folder+"/original/*.jpg"): # print("reading the image: %s" % img_path) img = io.imread(img_path) img = transform.resize(img, (w, h, c)) x.append(img) y.append(i) x = np.asarray(x, np.float32) y = np.asarray(y, np.int32) num_example = x.shape[0] arr = np.arange(num_example) np.random.shuffle(arr) x = x[arr] y = y[arr] x = np.asarray([np.reshape(x_, (w, h, c)) for x_ in x]) y = np.asarray([onehot(y_) for y_ in y]) s = np.int(num_example * ratio) self.x_train, self.x_val = x[:s], x[s:] self.y_train, self.y_val = y[:s], y[s:] self.train_size = s self.val_size = num_example - s print("Process images end") def next_batch(self, batch_size): arr = np.arange(self.train_size) np.random.shuffle(arr) arr = arr[:batch_size] batch_x = self.x_train[arr] batch_y = self.y_train[arr] return batch_x, batch_y def next_val_batch(self, batch_size): arr = np.arange(self.val_size) np.random.shuffle(arr) arr = arr[:batch_size] batch_x = self.x_val[arr] batch_y = self.y_val[arr] return batch_x, batch_y
1.4 - Build Network
def conv_op(input_op, name, kh, kw, n_out, dh, dw, p): n_in = input_op.get_shape()[-1].value with tf.name_scope(name) as scope: kernel = tf.get_variable(scope+"w", shape=[kh, kw, n_in, n_out], dtype=tf.float32, # initializer=tf.truncated_normal_initializer(mean=0, stddev=10e-2)) initializer=tf.contrib.layers.xavier_initializer_conv2d()) conv = tf.nn.conv2d(input_op, kernel, (1, dh, dw, 1), padding="SAME") bias_init_val = tf.constant(0.0, shape=[n_out], dtype=tf.float32) biases = tf.Variable(bias_init_val, trainable=True, name="b") z = tf.nn.bias_add(conv, biases) activation = tf.nn.relu(z, name=scope) p += [kernel, biases] return activation
def fc_op(input_op, name, n_out, p): n_in = input_op.get_shape()[-1].value with tf.name_scope(name) as scope: kernel = tf.get_variable(scope+"w", shape=[n_in, n_out], dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer()) biases = tf.Variable(tf.constant(0.1, shape=[n_out], dtype=tf.float32), name="b") activation = tf.nn.relu_layer(input_op, kernel, biases, name=scope) p += [kernel, biases] return activation
def mpool_op(input_op, name, kh, kw, dh, dw): return tf.nn.max_pool(input_op, ksize=[1, kh, kw, 1], strides=[1, dh, dw, 1], padding="SAME", name=name)
def inference_op(input_op, keep_prob): p = [] conv1_1 = conv_op(input_op, name="conv1_1", kh=3, kw=3, n_out=64, dh=1, dw=1, p=p) conv1_2 = conv_op(conv1_1, name="conv1_2", kh=3, kw=3, n_out=64, dh=1, dw=1, p=p) pool1 = mpool_op(conv1_2, name="pool1", kh=2, kw=2, dh=2, dw=2) conv2_1 = conv_op(pool1, name="conv2_1", kh=3, kw=3, n_out=128, dh=1, dw=1, p=p) conv2_2 = conv_op(conv2_1, name="conv2_2", kh=3, kw=3, n_out=128, dh=1, dw=1, p=p) pool2 = mpool_op(conv2_2, name="pool2", kh=2, kw=2, dh=2, dw=2) conv3_1 = conv_op(pool2, name="conv3_1", kh=3, kw=3, n_out=256, dh=1, dw=1, p=p) conv3_2 = conv_op(conv3_1, name="conv3_2", kh=3, kw=3, n_out=256, dh=1, dw=1, p=p) conv3_3 = conv_op(conv3_2, name="conv3_3", kh=3, kw=3, n_out=256, dh=1, dw=1, p=p) pool3 = mpool_op(conv3_3, name="pool3", kh=2, kw=2, dh=2, dw=2) conv4_1 = conv_op(pool3, name="conv4_1", kh=3, kw=3, n_out=512, dh=1, dw=1, p=p) conv4_2 = conv_op(conv4_1, name="conv4_2", kh=3, kw=3, n_out=512, dh=1, dw=1, p=p) conv4_3 = conv_op(conv4_2, name="conv4_3", kh=3, kw=3, n_out=512, dh=1, dw=1, p=p) pool4 = mpool_op(conv4_3, name="pool4", kh=2, kw=2, dh=2, dw=2) conv5_1 = conv_op(pool4, name="conv5_1", kh=3, kw=3, n_out=512, dh=1, dw=1, p=p) conv5_2 = conv_op(conv5_1, name="conv5_2", kh=3, kw=3, n_out=512, dh=1, dw=1, p=p) conv5_3 = conv_op(conv5_2, name="conv5_3", kh=3, kw=3, n_out=512, dh=1, dw=1, p=p) pool5 = mpool_op(conv5_3, name="pool5", kh=2, kw=2, dh=2, dw=2) shp = pool5.get_shape() flattened_shape = shp[1].value * shp[2].value * shp[3].value resh1 = tf.reshape(pool5, [-1, flattened_shape], name="resh1") fc6 = fc_op(resh1, name="fc6", n_out=4096, p=p) fc6_drop = tf.nn.dropout(fc6, keep_prob, name="fc6_drop") fc7 = fc_op(fc6_drop, name="fc7", n_out=4096, p=p) fc7_drop = tf.nn.dropout(fc7, keep_prob, name="fc7_drop") fc8 = fc_op(fc7_drop, name="fc8", n_out=OUTPUT_C, p=p) # softmax = tf.nn.softmax(fc8) # predictions = tf.argmax(softmax, 1) return fc8, p
1.5 - Train
def train(): x = tf.placeholder(tf.float32, [None, INPUT_W, INPUT_H, INPUT_C], name="x-input") y_ = tf.placeholder(tf.float32, [None, OUTPUT_C], name="y-input") keep_prob = tf.placeholder(tf.float32, name="keep_prob") dataset = DCdataset(DATA_PATH, INPUT_W, INPUT_H, INPUT_C) global_step = tf.Variable(0, trainable=False) y, p = inference_op(x, keep_prob) loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=y, labels=tf.argmax(y_, 1))) accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(tf.nn.softmax(y), 1), tf.argmax(y_, 1)), tf.float32)) learning_rate = tf.train.exponential_decay( LEARNING_RATE_BASE, global_step, dataset.train_size / BATCH_SIZE, LEARNING_RATE_DECAY ) optimizer = tf.train.MomentumOptimizer(learning_rate, MOMENTUM).minimize(loss, global_step=global_step) # tf.reset_default_graph() with tf.Session() as sess: tf.initialize_all_variables().run() saver = tf.train.Saver() for i in range(TRAINING_STEPS): xs, ys = dataset.next_batch(BATCH_SIZE) _, loss_value, accuracy_value, step = sess.run([optimizer, loss, accuracy, global_step], feed_dict={x: xs, y_: ys, keep_prob: TRAIN_KEEP_PROB}) print("After %d training step(s), loss on training batch is %g, accuracy on training batch is %g%%." % (step, loss_value, accuracy_value*100)) if i % 2 == 0: xs, ys = dataset.next_val_batch(BATCH_SIZE) _, loss_value, accuracy_value, step = sess.run([optimizer, loss, accuracy, global_step], feed_dict={x: xs, y_: ys, keep_prob: VAL_KEEP_PROB}) print("[Validation] Step %d: Validation loss is %g and Validation accuracy is %g%%." % (step, loss_value, accuracy_value*100)) saver.save(sess, os.path.join(MODEL_SAVE_PATH, MODEL_NAME), global_step=global_step)
train()
1.6 - Test
def test(img_path, model_path): with tf.Session() as sess: saver = tf.train.import_meta_graph(model_path+".meta") saver.restore(sess, model_path) graph = tf.get_default_graph() x = graph.get_tensor_by_name("x-input:0") keep_prob = graph.get_tensor_by_name("keep_prob:0") fc8 = graph.get_tensor_by_name("fc8:0") img = io.imread(img_path) img = transform.resize(img, (INPUT_W, INPUT_H, INPUT_C)) y = sess.run(fc8, feed_dict={ x: np.reshape(img, [-1, INPUT_W, INPUT_H, INPUT_C]), keep_prob: TEST_KEEP_PEOB }) softmax = tf.nn.softmax(y) prediction_labels = tf.argmax(softmax, 1) print("label: ", sess.run(softmax))
img_path = os.path.join(DATA_PATH, "cougar", "original", "4400.jpg") model_path = os.path.join(MODEL_SAVE_PATH, MODEL_NAME+"-2") test(img_path, model_path)