SSD: Single Shot MultiBox Detector 论文解读，附代码

2018-01-17 21:47 张小贤TT 阅读(1260) 评论(0) 编辑收藏举报
一改常规套路，先上图，备注一下，这个图是盗来的
文笔实在是烂透了，想看论文解读的可以看这篇博客
我这里就来个代码实现好了，强烈建议代码和论文配合这来看，挺难的一篇论文
  1 #!/usr/bin/env python
  2 # -*- coding:utf-8 -*-
  3 import tensorflow as tf
  4 import numpy as np
  5 import time
  6 
  7 class SSD(object):
  8     def __init__(self, sess):
  9         self.scales = [0.2,0.35,0.50,0.65,0.80]
 10         self.ratios = [1.,2.,3.,1./2,1./3]
 11         self.num_of_class = 21
 12         self.jaccard_val = 0.6
 13         self.background_calss_val = 0
 14         self.sess = sess
 15         self.base_input = tf.placeholder(dtype=tf.float32, shape=[None, 300, 300, 3])
 16         self.feature_list = self.build_base_net(self.base_input)
 17         self.feature_class, self.feature_location = self.reshape_and_split_feature()
 18         self.default_boxes = self.generate_default_boxes()
 19         self.num_of_default_boxes = self.default_boxes.shape[0]
 20         self.groundtruth_class = tf.placeholder(shape=[None, self.num_of_default_boxes, self.num_of_class], dtype=tf.int32)
 21         self.groundtruth_location = tf.placeholder(shape=[None, self.num_of_default_boxes, 4], dtype=tf.float32)
 22         self.groundtruth_positives = tf.placeholder(shape=[None, self.num_of_default_boxes], dtype=tf.float32)
 23         self.groundtruth_negatives = tf.placeholder(shape=[None, self.num_of_default_boxes], dtype=tf.float32)
 24         self.groundtruth_count = tf.add(self.groundtruth_positives, self.groundtruth_negatives)
 25         self.softmax_cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=self.feature_class,
 26                                                                                     labels=self.groundtruth_class)
 27         self.loss_location = tf.div(tf.reduce_sum(tf.multiply(
 28             tf.reduce_sum(self.smooth_L1(tf.subtract(self.groundtruth_location, self.feature_location)),
 29                           reduction_indices=2), self.groundtruth_positives), reduction_indices=1),
 30                                     tf.reduce_sum(self.groundtruth_positives, reduction_indices=1))
 31         self.loss_class = tf.div(
 32             tf.reduce_sum(tf.multiply(self.softmax_cross_entropy, self.groundtruth_count), reduction_indices=1),
 33             tf.reduce_sum(self.groundtruth_count, reduction_indices=1))
 34         self.loss_all = tf.reduce_sum(tf.add(self.loss_class, self.loss_location))
 35         self.optimizer = tf.train.AdamOptimizer(0.001)
 36         self.train_op = self.optimizer.minimize(self.loss_all)
 37 
 38     def build_base_net(self,base_input):
 39         with tf.name_scope('net'):
 40             with tf.name_scope('base_net'):
 41                 base = base_input
 42                 base = tf.layers.conv2d(base, 32, 3, padding='same')
 43                 base = tf.layers.batch_normalization(base, training=True)
 44                 base = tf.nn.relu(base)
 45                 base = tf.layers.max_pooling2d(base, 3, (2, 2))
 46                 # 150,150
 47                 base = tf.layers.conv2d(base, 64, 3, padding='same')
 48                 base = tf.layers.batch_normalization(base, training=True)
 49                 base = tf.nn.relu(base)
 50                 base = tf.layers.max_pooling2d(base, 3, (2, 2))
 51                 # 75,75
 52                 base = tf.layers.conv2d(base, 128, 3, padding='same')
 53                 base = tf.layers.conv2d(base, 128, 3, padding='same')
 54                 base = tf.layers.batch_normalization(base, training=True)
 55                 base = tf.nn.relu(base)
 56                 base = tf.layers.max_pooling2d(base, 3, (2, 2))
 57                 # 37,37
 58             predict_1, down_sample_1 = self.down_sample_and_predict(base)
 59             predict_2, down_sample_2 = self.down_sample_and_predict(down_sample_1)
 60             predict_3, down_sample_3 = self.down_sample_and_predict(down_sample_2)
 61             predict_4, down_sample_4 = self.down_sample_and_predict(down_sample_3)
 62             predict_5 = self.predict_only(down_sample_4)
 63             feature_list = [predict_1, predict_2, predict_3, predict_4, predict_5]
 64 
 65         return feature_list
 66 
 67     def down_sample_and_predict(self, feature):
 68         with tf.name_scope('down_and_predict'):
 69             channels = feature.get_shape().as_list()[3]
 70             predict = tf.layers.conv2d(feature, 5*(self.num_of_class + 4), 3, padding='same')
 71             down_sample= tf.layers.conv2d(feature, 2*channels, 3, padding='same')
 72             down_sample = tf.nn.relu(down_sample)
 73             down_sample = tf.layers.max_pooling2d(down_sample, 3, strides=(2,2), padding='same')
 74             print('predictor shape :',predict.get_shape().as_list())
 75             print('down_sample shape :', down_sample.get_shape().as_list())
 76             return predict, down_sample
 77 
 78     def predict_only(self, feature):
 79         with tf.name_scope('down_and_predict'):
 80             predict = tf.layers.conv2d(feature, 5*(self.num_of_class + 4), 3, padding='same')
 81             print('predictor shape :',predict.get_shape().as_list())
 82             return predict
 83 
 84     def reshape_and_split_feature(self):
 85         feature_list = self.feature_list
 86         reshape_feature = []
 87         for feature in feature_list:
 88             width = feature.get_shape().as_list()[2]
 89             height = feature.get_shape().as_list()[1]
 90             reshape_feature.append(tf.reshape(feature, [-1, width*height*5, self.num_of_class+4]))
 91         reshape_feature = tf.concat(reshape_feature, axis=1)
 92         print('预测得到 %d 个default boxes'%reshape_feature.get_shape().as_list()[1])
 93         feature_class = reshape_feature[:,:,:self.num_of_class]
 94         feature_location = reshape_feature[:,:,self.num_of_class:]
 95         print('feature_class shape:',feature_class.get_shape().as_list())
 96         print('feature_location shape:', feature_location.get_shape().as_list())
 97         return feature_class, feature_location
 98 
 99     def generate_default_boxes(self):
100         default_boxes = []
101         t_start = time.time()
102         feature_list = self.feature_list
103         for index, feature in enumerate(feature_list):
104             width = feature.get_shape().as_list()[2]
105             height = feature.get_shape().as_list()[1]
106             scale = self.scales[index]
107             for x in range(width):
108                 for y in range(height):
109                     for i in range(len(self.ratios)):
110                         top_x = x*1. / width
111                         top_y = y*1. / height
112                         box_width = scale * np.sqrt(self.ratios[i])
113                         box_height = scale / np.sqrt(self.ratios[i])
114                         default_boxes.append([top_x, top_y, box_width, box_height])
115         t_end = time.time()
116         print('generate %d boxes '%len(default_boxes),'takes %f seconds'%(t_end - t_start))
117         default_boxes = np.asarray(default_boxes, dtype=np.float32)
118         print('default_boxes shape',default_boxes.shape)
119         return default_boxes
120 
121     def compute_jaccard(self, box_1, box_2):
122         x_len = max(0, min(box_1[0]+box_1[2], box_2[0]+ box_2[2]) - max(box_1[0], box_2[0]))
123         y_len = max(0, min(box_1[1] + box_1[3], box_2[1] + box_2[3]) - max(box_1[1], box_2[1]))
124         inter = x_len* y_len
125         union = box_1[2]*box_1[3] + box_2[2]*box_2[3] - inter
126         if union == 0:
127             return 0
128         else:
129             return inter / union
130 
131     def process_ground_truth(self, actual_input):
132         num_of_input = len(actual_input)
133         process_ground_truth_class = np.zeros(
　　　　　　　　　　　　　　　　　　　　shape=[num_of_input, self.num_of_default_boxes, self.num_of_class], 
　　　　　　　　　　　　　　　　　　　　dtype=np.int32)
134         process_ground_truth_location = np.zeros(
　　　　　　　　　　　　　　　　　　　　shape=[num_of_input, self.num_of_default_boxes, 4], 
　　　　　　　　　　　　　　　　　　　　dtype=np.float32)
135         process_ground_truth_positives = np.zeros(
　　　　　　　　　　　　　　　　　　　　shape=[num_of_input, self.num_of_default_boxes], 
　　　　　　　　　　　　　　　　　　　　dtype=np.float32)
136         process_ground_truth_negatives = np.zeros_like(process_ground_truth_positives)
137         process_ground_truth_jaccard = np.zeros_like(process_ground_truth_positives)
138         for index, actual in enumerate(actual_input):
139             for actual_in in actual:
140                 label = actual_in[-1:][0]
141                 box_info = actual_in[:-1]
142                 for box_index in range(self.num_of_default_boxes):
143                     jacc = self.compute_jaccard(self.default_boxes[box_index], box_info)
144                     if jacc >= self.jaccard_val:
145                         process_ground_truth_class[index][box_index][label] = 1
146                         process_ground_truth_location[index][box_index] = box_info
147                         process_ground_truth_positives[index][box_index] = 1
148                         process_ground_truth_negatives[index][box_index] = 0
149                         process_ground_truth_jaccard[index][box_index] = jacc
150 
151             if int(np.sum(process_ground_truth_positives[index])) == 0:
152                 random_index = np.random.randint(0,self.num_of_default_boxes,1)[0]
153                 process_ground_truth_class[index][random_index][0] = 1
154                 process_ground_truth_location[index][random_index] = [0,0,0,0]
155                 process_ground_truth_positives[index][random_index] = 1
156                 process_ground_truth_negatives[index][random_index] = 0
157                 process_ground_truth_jaccard[index][random_index] = self.jaccard_val
158 
159             negative_count = 3*int(np.sum(process_ground_truth_positives[index]))
160             if 4*int(np.sum(process_ground_truth_positives[index])) > self.num_of_default_boxes:
161                 negative_count = self.num_of_default_boxes - int(np.sum(process_ground_truth_positives[index]))
162             nega_indexs = np.random.randint(0,self.num_of_default_boxes, negative_count)
163             for nega_index in nega_indexs:
164                 if process_ground_truth_jaccard[index][nega_index] < 0.3:
165                     process_ground_truth_class[index][nega_index][0] = 1
166                     process_ground_truth_positives[index][nega_index] = 0
167                     process_ground_truth_negatives[index][nega_index] = 1
168 
169         return process_ground_truth_class, process_ground_truth_location, 
　　　　　　　　　　　process_ground_truth_positives, process_ground_truth_negatives
170 
171     def smooth_L1(self, x):
172         return tf.where(tf.less_equal(tf.abs(x),1.0), tf.multiply(0.5, tf.pow(x, 2.0)), tf.subtract(tf.abs(x), 0.5))
173 
174 sess= tf.InteractiveSession()
175 ssd = SSD(sess)
要回去睡觉了，差不多先搞这么多
会员力量，点亮园子希望
刷新页面返回顶部
张小贤TT

SSD: Single Shot MultiBox Detector 论文解读，附代码

About