SSD: Single Shot MultiBox Detector 论文解读,附代码
2018-01-17 21:47 张小贤TT 阅读(1266) 评论(0) 编辑 收藏 举报一改常规套路,先上图,备注一下,这个图是盗来的
文笔实在是烂透了, 想看论文解读的可以看这篇博客
我这里就来个代码实现好了,强烈建议代码和论文配合这来看, 挺难的一篇论文
1 #!/usr/bin/env python 2 # -*- coding:utf-8 -*- 3 import tensorflow as tf 4 import numpy as np 5 import time 6 7 class SSD(object): 8 def __init__(self, sess): 9 self.scales = [0.2,0.35,0.50,0.65,0.80] 10 self.ratios = [1.,2.,3.,1./2,1./3] 11 self.num_of_class = 21 12 self.jaccard_val = 0.6 13 self.background_calss_val = 0 14 self.sess = sess 15 self.base_input = tf.placeholder(dtype=tf.float32, shape=[None, 300, 300, 3]) 16 self.feature_list = self.build_base_net(self.base_input) 17 self.feature_class, self.feature_location = self.reshape_and_split_feature() 18 self.default_boxes = self.generate_default_boxes() 19 self.num_of_default_boxes = self.default_boxes.shape[0] 20 self.groundtruth_class = tf.placeholder(shape=[None, self.num_of_default_boxes, self.num_of_class], dtype=tf.int32) 21 self.groundtruth_location = tf.placeholder(shape=[None, self.num_of_default_boxes, 4], dtype=tf.float32) 22 self.groundtruth_positives = tf.placeholder(shape=[None, self.num_of_default_boxes], dtype=tf.float32) 23 self.groundtruth_negatives = tf.placeholder(shape=[None, self.num_of_default_boxes], dtype=tf.float32) 24 self.groundtruth_count = tf.add(self.groundtruth_positives, self.groundtruth_negatives) 25 self.softmax_cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=self.feature_class, 26 labels=self.groundtruth_class) 27 self.loss_location = tf.div(tf.reduce_sum(tf.multiply( 28 tf.reduce_sum(self.smooth_L1(tf.subtract(self.groundtruth_location, self.feature_location)), 29 reduction_indices=2), self.groundtruth_positives), reduction_indices=1), 30 tf.reduce_sum(self.groundtruth_positives, reduction_indices=1)) 31 self.loss_class = tf.div( 32 tf.reduce_sum(tf.multiply(self.softmax_cross_entropy, self.groundtruth_count), reduction_indices=1), 33 tf.reduce_sum(self.groundtruth_count, reduction_indices=1)) 34 self.loss_all = tf.reduce_sum(tf.add(self.loss_class, self.loss_location)) 35 self.optimizer = tf.train.AdamOptimizer(0.001) 36 self.train_op = self.optimizer.minimize(self.loss_all) 37 38 def build_base_net(self,base_input): 39 with tf.name_scope('net'): 40 with tf.name_scope('base_net'): 41 base = base_input 42 base = tf.layers.conv2d(base, 32, 3, padding='same') 43 base = tf.layers.batch_normalization(base, training=True) 44 base = tf.nn.relu(base) 45 base = tf.layers.max_pooling2d(base, 3, (2, 2)) 46 # 150,150 47 base = tf.layers.conv2d(base, 64, 3, padding='same') 48 base = tf.layers.batch_normalization(base, training=True) 49 base = tf.nn.relu(base) 50 base = tf.layers.max_pooling2d(base, 3, (2, 2)) 51 # 75,75 52 base = tf.layers.conv2d(base, 128, 3, padding='same') 53 base = tf.layers.conv2d(base, 128, 3, padding='same') 54 base = tf.layers.batch_normalization(base, training=True) 55 base = tf.nn.relu(base) 56 base = tf.layers.max_pooling2d(base, 3, (2, 2)) 57 # 37,37 58 predict_1, down_sample_1 = self.down_sample_and_predict(base) 59 predict_2, down_sample_2 = self.down_sample_and_predict(down_sample_1) 60 predict_3, down_sample_3 = self.down_sample_and_predict(down_sample_2) 61 predict_4, down_sample_4 = self.down_sample_and_predict(down_sample_3) 62 predict_5 = self.predict_only(down_sample_4) 63 feature_list = [predict_1, predict_2, predict_3, predict_4, predict_5] 64 65 return feature_list 66 67 def down_sample_and_predict(self, feature): 68 with tf.name_scope('down_and_predict'): 69 channels = feature.get_shape().as_list()[3] 70 predict = tf.layers.conv2d(feature, 5*(self.num_of_class + 4), 3, padding='same') 71 down_sample= tf.layers.conv2d(feature, 2*channels, 3, padding='same') 72 down_sample = tf.nn.relu(down_sample) 73 down_sample = tf.layers.max_pooling2d(down_sample, 3, strides=(2,2), padding='same') 74 print('predictor shape :',predict.get_shape().as_list()) 75 print('down_sample shape :', down_sample.get_shape().as_list()) 76 return predict, down_sample 77 78 def predict_only(self, feature): 79 with tf.name_scope('down_and_predict'): 80 predict = tf.layers.conv2d(feature, 5*(self.num_of_class + 4), 3, padding='same') 81 print('predictor shape :',predict.get_shape().as_list()) 82 return predict 83 84 def reshape_and_split_feature(self): 85 feature_list = self.feature_list 86 reshape_feature = [] 87 for feature in feature_list: 88 width = feature.get_shape().as_list()[2] 89 height = feature.get_shape().as_list()[1] 90 reshape_feature.append(tf.reshape(feature, [-1, width*height*5, self.num_of_class+4])) 91 reshape_feature = tf.concat(reshape_feature, axis=1) 92 print('预测得到 %d 个default boxes'%reshape_feature.get_shape().as_list()[1]) 93 feature_class = reshape_feature[:,:,:self.num_of_class] 94 feature_location = reshape_feature[:,:,self.num_of_class:] 95 print('feature_class shape:',feature_class.get_shape().as_list()) 96 print('feature_location shape:', feature_location.get_shape().as_list()) 97 return feature_class, feature_location 98 99 def generate_default_boxes(self): 100 default_boxes = [] 101 t_start = time.time() 102 feature_list = self.feature_list 103 for index, feature in enumerate(feature_list): 104 width = feature.get_shape().as_list()[2] 105 height = feature.get_shape().as_list()[1] 106 scale = self.scales[index] 107 for x in range(width): 108 for y in range(height): 109 for i in range(len(self.ratios)): 110 top_x = x*1. / width 111 top_y = y*1. / height 112 box_width = scale * np.sqrt(self.ratios[i]) 113 box_height = scale / np.sqrt(self.ratios[i]) 114 default_boxes.append([top_x, top_y, box_width, box_height]) 115 t_end = time.time() 116 print('generate %d boxes '%len(default_boxes),'takes %f seconds'%(t_end - t_start)) 117 default_boxes = np.asarray(default_boxes, dtype=np.float32) 118 print('default_boxes shape',default_boxes.shape) 119 return default_boxes 120 121 def compute_jaccard(self, box_1, box_2): 122 x_len = max(0, min(box_1[0]+box_1[2], box_2[0]+ box_2[2]) - max(box_1[0], box_2[0])) 123 y_len = max(0, min(box_1[1] + box_1[3], box_2[1] + box_2[3]) - max(box_1[1], box_2[1])) 124 inter = x_len* y_len 125 union = box_1[2]*box_1[3] + box_2[2]*box_2[3] - inter 126 if union == 0: 127 return 0 128 else: 129 return inter / union 130 131 def process_ground_truth(self, actual_input): 132 num_of_input = len(actual_input) 133 process_ground_truth_class = np.zeros(
shape=[num_of_input, self.num_of_default_boxes, self.num_of_class],
dtype=np.int32) 134 process_ground_truth_location = np.zeros(
shape=[num_of_input, self.num_of_default_boxes, 4],
dtype=np.float32) 135 process_ground_truth_positives = np.zeros(
shape=[num_of_input, self.num_of_default_boxes],
dtype=np.float32) 136 process_ground_truth_negatives = np.zeros_like(process_ground_truth_positives) 137 process_ground_truth_jaccard = np.zeros_like(process_ground_truth_positives) 138 for index, actual in enumerate(actual_input): 139 for actual_in in actual: 140 label = actual_in[-1:][0] 141 box_info = actual_in[:-1] 142 for box_index in range(self.num_of_default_boxes): 143 jacc = self.compute_jaccard(self.default_boxes[box_index], box_info) 144 if jacc >= self.jaccard_val: 145 process_ground_truth_class[index][box_index][label] = 1 146 process_ground_truth_location[index][box_index] = box_info 147 process_ground_truth_positives[index][box_index] = 1 148 process_ground_truth_negatives[index][box_index] = 0 149 process_ground_truth_jaccard[index][box_index] = jacc 150 151 if int(np.sum(process_ground_truth_positives[index])) == 0: 152 random_index = np.random.randint(0,self.num_of_default_boxes,1)[0] 153 process_ground_truth_class[index][random_index][0] = 1 154 process_ground_truth_location[index][random_index] = [0,0,0,0] 155 process_ground_truth_positives[index][random_index] = 1 156 process_ground_truth_negatives[index][random_index] = 0 157 process_ground_truth_jaccard[index][random_index] = self.jaccard_val 158 159 negative_count = 3*int(np.sum(process_ground_truth_positives[index])) 160 if 4*int(np.sum(process_ground_truth_positives[index])) > self.num_of_default_boxes: 161 negative_count = self.num_of_default_boxes - int(np.sum(process_ground_truth_positives[index])) 162 nega_indexs = np.random.randint(0,self.num_of_default_boxes, negative_count) 163 for nega_index in nega_indexs: 164 if process_ground_truth_jaccard[index][nega_index] < 0.3: 165 process_ground_truth_class[index][nega_index][0] = 1 166 process_ground_truth_positives[index][nega_index] = 0 167 process_ground_truth_negatives[index][nega_index] = 1 168 169 return process_ground_truth_class, process_ground_truth_location,
process_ground_truth_positives, process_ground_truth_negatives 170 171 def smooth_L1(self, x): 172 return tf.where(tf.less_equal(tf.abs(x),1.0), tf.multiply(0.5, tf.pow(x, 2.0)), tf.subtract(tf.abs(x), 0.5)) 173 174 sess= tf.InteractiveSession() 175 ssd = SSD(sess)
要回去睡觉了,差不多先搞这么多