TensorFlow实战8——TensorFlow实现ResNet
1 #coding = utf-8 2 3 import collections 4 import tensorflow as tf 5 from datetime import datetime 6 import math 7 import time 8 9 slim = tf.contrib.slim 10 11 12 class Block(collections.namedtuple('Block', ['scope', 'unit_fn', 'args'])): 13 '''A named tuple describing a ResNet block.''' 14 15 def subsample(inputs, factor, scope=None): 16 '''降采样方法: 17 factor:采样因子 1:不做修改直接返回 不为1:使用slim.max_pool2d降采样''' 18 if factor ==1: 19 return inputs 20 else: 21 return slim.max_pool2d(inputs, [1, 1], stride=factor, scope=scope) 22 23 24 def conv2d_same(inputs, num_outputs, kernel_size, stride, scope=None): 25 '''创建卷积层''' 26 if stride == 1: 27 '''stride为1,使用slim.conv2d,padding为SAME''' 28 return slim.conv2d(inputs, num_outputs, kernel_size, stride=1, 29 padding='SAME', scope=scope) 30 31 else: 32 '''显示地pad zero: 33 pad zero总数为kernel size-1,pad_beg:pad//2, pad_end:余下部分''' 34 pad_total = kernel_size-1 35 pad_beg = pad_total//2 36 pad_end = pad_total - pad_beg 37 '''tf.pad对inputs进行补零操作''' 38 inputs = tf.pad(inputs, [[0,0], [pad_beg, pad_end], 39 [pad_beg, pad_end], [0, 0]]) 40 41 return slim.conv2d(inputs, num_outputs, kernel_size, stride=stride, 42 padding='VALID', scope=scope) 43 44 @slim.add_arg_scope 45 def stack_blocks_dense(net, blocks, outputs_collections=None): 46 '''net:input 47 blocks:Block的class的列表 48 outputs_collections:收集各个end_points的collections''' 49 for block in blocks: 50 '''双层for循环,逐个Block,逐个Residual Unit堆叠''' 51 with tf.variable_scope(block.scope, 'block', [net]) as sc: 52 '''两个tf.variable将残差学习单元命名为block_1/unit_1形式''' 53 54 for i, unit in enumerate(block.args): 55 with tf.variable_scope('unit_%d' %(i+1), values=[net]): 56 57 '''利用第二层for循环拿到前面定义Blocks Residual Unit中args, 58 将其展开为depth、depth_bottleneck、stride''' 59 unit_depth, unit_depth_bottleneck, unit_stride = unit 60 61 '''使用unit_fn函数(残差学习单元的生成函数) 62 顺序地创建并连接所有的残差学习单元''' 63 net = block.unit_fn(net, 64 depth=unit_depth, 65 depth_bottleneck=unit_depth_bottleneck, 66 stride=unit_stride) 67 68 '''slim.utils.collect_named_outputs将输出net添加到collection中''' 69 net = slim.utils.collect_named_outputs(outputs_collections, sc.name, net) 70 71 '''所有的Residual Unit都堆叠完后,最后返回net作为stack_blocks_dense的结果''' 72 return net 73 74 75 def resnet_arg_scope(is_training=True, 76 weight_decay=0.0001, 77 batch_norm_decay=0.097, 78 batch_norm_epsilon=1e-5, 79 batch_norm_scale=True): 80 '''创建ResNet通用的arg_scope(作用:定义某些函数的参数默认值)''' 81 82 batch_norm_params = { 83 'is_training': is_training, 84 'decay': batch_norm_decay,#默认为0.0001,BN的衰减速率默认为:0.997 85 'epsilon': batch_norm_epsilon,#默认为1e-5 86 'scale': batch_norm_scale,#BN的scale默认为True 87 'updates_collections': tf.GraphKeys.UPDATE_OPS, 88 } 89 90 with slim.arg_scope( 91 [slim.conv2d], 92 weights_regularizer=slim.l2_regularizer(weight_decay), 93 weights_initializer=slim.variance_scaling_initializer(), 94 activation_fn=tf.nn.relu, 95 normalizer_fn=slim.batch_norm, 96 normalizer_params=batch_norm_params): 97 98 with slim.arg_scope([slim.batch_norm], **batch_norm_params): 99 with slim.arg_scope([slim.max_pool2d], padding='SAME') as arg_sc: 100 101 return arg_sc 102 103 @slim.add_arg_scope 104 def bottleneck(inputs, depth, depth_bottleneck, stride, 105 outputs_collections=None, scope=None): 106 '''bottleneck残差学习单元 107 inputs:输入 108 depth、depth_bottleneck、stride是Blocks类中的args 109 outputs_collections:收集end_points的collection 110 scope:unit的名称''' 111 with tf.variable_scope(scope, 'bottleneck_v2', [inputs]) as sc: 112 113 '''slim.utils.last_dimension获取输入的最后一个维度,输出通道数,min_rank=4限定最少为4个维度''' 114 depth_in = slim.utils.last_dimension(inputs.get_shape(), min_rank=4) 115 116 '''slim.batch_norm对输入进行Batch Normalization,接着用relu进行预激活的Preactivate''' 117 preact = slim.batch_norm(inputs, activation_fn=tf.nn.relu, 118 scope='preact') 119 '''定义shortcut(直连的x)''' 120 if depth == depth_in: 121 '''如果残差单元输入通道数和输出通道数一样 122 使用subsample按步长对inputs进行空间上的降采样''' 123 shortcut = subsample(inputs, stride, 'shortcut') 124 125 else: 126 '''如果残差单元输入通道数和输出通道数不一样, 127 使用stride步长的1x1卷积改变其通道数,使得输入通道数和输出通道数一致''' 128 shortcut = slim.conv2d(preact, depth, [1, 1], stride=stride, 129 normalizer_fn=None, activation_fn=None, 130 scope='shortcut') 131 '''定义残差: 132 第一步:1x1尺寸、步长为1、输出通道数为depth_bottleneck的卷积 133 第二步:3x3尺寸、步长为stride、输出通道数为depth_bottleneck的卷积 134 第三步:1x1尺寸、步长为1、输出通道数为depth的卷积''' 135 residual = slim.conv2d(preact, depth_bottleneck, [1, 1], stride=1, 136 scope='conv1') 137 138 residual = slim.conv2d(residual, depth_bottleneck, 3, stride, 139 scope='conv2') 140 residual = slim.conv2d(residual, depth, [1, 1], stride=1, 141 normalizer_fn=None, activation_fn=None, 142 scope='conv3') 143 144 output = shortcut + residual 145 146 '''slim.utils.collect_named_ouputs将结果添加到outputs_collections并返回output作为函数结果''' 147 return slim.utils.collect_named_outputs(outputs_collections, sc.name, output) 148 149 150 def resnet_v2(inputs, 151 blocks, 152 num_classes=None, 153 global_pool=True, 154 include_root_block=True, 155 reuse=None, 156 scope=None): 157 '''定义生成ResNet V2的主函数 158 inputs:输入 159 blocks:定义好的Blocks类的的列表 160 num_classes:最后输出的类数 161 global_pool:是否加上最后的一层全局平均池化的标志 162 include_root_blocks:是否加上ResNet网络最前面通常使用的7x7卷积核最大池化的标志 163 reuse:是否重用的标志 164 scope:整个网络名称''' 165 166 with tf.variable_scope(scope, 'resent_v2', [inputs], reuse=reuse) as sc: 167 end_points_collection = sc.original_name_scope + '_end_points' 168 169 '''slim.arg_scope将slim.conv2d, bottleneck,stack_blocks_dense 3个函数的参数 170 outputs_collections默认设置为end_points_collection''' 171 with slim.arg_scope([slim.conv2d, bottleneck, 172 stack_blocks_dense], 173 outputs_collections=end_points_collection): 174 175 net = inputs 176 177 if include_root_block: 178 179 with slim.arg_scope([slim.conv2d], activation_fn=None, 180 normalizer_fn=None): 181 '''根据include_root_block标记,创建ResNet 182 最前面的64输出通道的步长为2的7x7卷积''' 183 net = conv2d_same(net, 64, 7, stride=2, scope='conv1') 184 185 '''步长为2的3x3最大池化,经过2次步长为2的层后,图片尺寸已经缩小为1/4''' 186 net = slim.max_pool2d(net, [3, 3], stride=2, scope='pool1') 187 '''利用stack_blocks_dens将残差学习模块完成''' 188 net = stack_blocks_dense(net, blocks) 189 net = slim.batch_norm(net, activation_fn=tf.nn.relu, scope='postnorm') 190 191 if global_pool: 192 '''根据标记添加平均池化层,这里用tf.reduce_mean比avg_pool高''' 193 net = tf.reduce_mean(net, [1, 2], name='pool5', keep_dims=True) 194 195 if num_classes is not None: 196 '''根据是否有分类数,添加一个输出通道为num_classes的1x1卷积''' 197 net = slim.conv2d(net, num_classes, [1, 1], activation_fn=None, 198 normalizer_fn=None, scope='logits') 199 200 '''slim.utils.convert_collection_to_dict将collection转化为dict''' 201 end_points = slim.utils.convert_collection_to_dict(end_points_collection) 202 203 if num_classes is not None: 204 '''添加一个softmax层输出网络结果''' 205 end_points['prediction'] = slim.softmax(net, scope='predictions') 206 207 return net, end_points 208 209 210 def resnet_v2_50(inputs, 211 num_classes=None, 212 global_pool=True, 213 reuse=None, 214 scope='resnet_v2_50'): 215 '''设计50层的ResNet 216 四个blocks的units数量为3、4、6、3,总层数为(3+4+6+3)*3+2=50 217 前3个blocks包含步长为2的层,总尺寸224/(4*2*2*2)=7 输出通道变为2048''' 218 blocks = [ 219 Block('block1', bottleneck, [(256, 64, 1)]*2 + [(256, 64, 2)]), 220 Block('block2', bottleneck, [(512, 128, 1)] * 3 + [(512, 128, 2)]), 221 Block('block3', bottleneck, [(1024, 256, 1)] * 5 + [(1024, 256, 2)]), 222 Block('block4', bottleneck, [(2048, 512, 1)] * 3) 223 ] 224 225 return resnet_v2(inputs, blocks, num_classes, global_pool, 226 include_root_block=True, reuse=reuse, scope=scope) 227 228 def resnet_v2_101(inputs, 229 num_classes=None, 230 global_pool=True, 231 reuse=None, 232 scope='resnet_v2_101'): 233 '''设计101层的ResNet 234 四个blocks的units数量为3、4、23、3,总层数为(3+4+23+3)*3+2=101 235 前3个blocks包含步长为2的层,总尺寸224/(4*2*2*2)=7 输出通道变为2048''' 236 blocks = [ 237 Block('block1', bottleneck, [(256, 64, 1)] * 2 + [(256, 64, 2)]), 238 Block('block2', bottleneck, [(512, 128, 1)] * 3 + [(512, 128, 2)]), 239 Block('block3', bottleneck, [(1024, 256, 1)] * 22 + [(1024, 256, 2)]), 240 Block('block4', bottleneck, [(2048, 512, 1)] * 3) 241 ] 242 243 return resnet_v2(inputs, blocks, num_classes, global_pool, 244 include_root_block=True, reuse=reuse, scope=scope) 245 246 def resnet_v2_152(inputs, 247 num_classes=None, 248 global_pool=True, 249 reuse=None, 250 scope='resnet_v2_152'): 251 '''设计152层的ResNet 252 四个blocks的units数量为3、8、36、3,总层数为(3+8+36+3)*3+2=152 253 前3个blocks包含步长为2的层,总尺寸224/(4*2*2*2)=7 输出通道变为2048''' 254 blocks = [ 255 Block('block1', bottleneck, [(256, 64, 1)] * 2 + [(256, 64, 2)]), 256 Block('block2', bottleneck, [(512, 128, 1)] * 7 + [(512, 128, 2)]), 257 Block('block3', bottleneck, [(1024, 256, 1)] * 35 + [(1024, 256, 2)]), 258 Block('block4', bottleneck, [(2048, 512, 1)] * 3) 259 ] 260 261 return resnet_v2(inputs, blocks, num_classes, global_pool, 262 include_root_block=True, reuse=reuse, scope=scope) 263 264 def resnet_v2_200(inputs, 265 num_classes=None, 266 global_pool=True, 267 reuse=None, 268 scope='resnet_v2_200'): 269 '''设计200层的ResNet 270 四个blocks的units数量为3、8、36、3,总层数为(3+24+36+3)*3+2=200 271 前3个blocks包含步长为2的层,总尺寸224/(4*2*2*2)=7 输出通道变为2048''' 272 blocks = [ 273 Block('block1', bottleneck, [(256, 64, 1)] * 2 + [(256, 64, 2)]), 274 Block('block2', bottleneck, [(512, 128, 1)] * 23 + [(512, 128, 2)]), 275 Block('block3', bottleneck, [(1024, 256, 1)] * 35 + [(1024, 256, 2)]), 276 Block('block4', bottleneck, [(2048, 512, 1)] * 3) 277 ] 278 279 return resnet_v2(inputs, blocks, num_classes, global_pool, 280 include_root_block=True, reuse=reuse, scope=scope) 281 282 def time_tensorflow_run(session, target, info_string): 283 284 num_steps_burn_in = 10 285 total_duration = 0.0 286 total_duration_squared = 0.0 287 for i in range(num_batches+num_steps_burn_in): 288 start_time = time.time() 289 _ = session.run(target) 290 duration = time.time()-start_time 291 292 if i >= num_steps_burn_in: 293 if not i % 10: 294 print('%s: step %d, duration = %.3f' %(datetime.now(), i-num_steps_burn_in, duration)) 295 total_duration += duration 296 total_duration_squared += duration*duration 297 298 mn = total_duration/num_batches 299 vr = total_duration_squared/num_batches-mn*mn 300 sd = math.sqrt(vr) 301 302 print('%s: %s across %d steps, %.3f +/- %3.3f sec/batch' %(datetime.now(), info_string, num_batches, mn, sd)) 303 304 batch_size = 32 305 height, width = 224, 224 306 inputs = tf.random_uniform((batch_size, height, width, 3)) 307 with slim.arg_scope(resnet_arg_scope(is_training=False)): 308 net, end_points = resnet_v2_152(inputs, 1000) 309 310 init = tf.global_variables_initializer() 311 sess = tf.Session() 312 sess.run(init) 313 num_batches = 100 314 time_tensorflow_run(sess, net, 'Forward')
1 2017-12-23 23:51:01.359100: step 0, duration = 0.099 2 2017-12-23 23:51:02.359100: step 10, duration = 0.100 3 2017-12-23 23:51:03.358100: step 20, duration = 0.099 4 2017-12-23 23:51:04.359100: step 30, duration = 0.100 5 2017-12-23 23:51:05.361100: step 40, duration = 0.100 6 2017-12-23 23:51:06.363100: step 50, duration = 0.100 7 2017-12-23 23:51:07.366100: step 60, duration = 0.100 8 2017-12-23 23:51:08.372100: step 70, duration = 0.100 9 2017-12-23 23:51:09.388100: step 80, duration = 0.102 10 2017-12-23 23:51:10.394100: step 90, duration = 0.100 11 2017-12-23 23:51:11.298100: Forward across 100 steps, 0.010 +/- 0.030 sec/batch