Tensorflow版Faster RCNN源码解析(TFFRCNN) (06) train.py
本blog为github上CharlesShang/TFFRCNN版源码解析系列代码笔记
---------------个人学习笔记---------------
----------------本文作者疆--------------
------点击此处链接至博客园原文------
_DEBUG默认为False
1.SolverWrapper类
class SolverWrapper(object): # caffe中solver通过协调网络前向推理和反向梯度传播来进行模型优化,并通过权重参数更新来改善网络损失求解最优算法 # 而solver学习的任务被划分为:监督优化和参数更新,生成损失并计算梯度 # 它定义着整个模型如何运转,不管是命令行方式还是pycaffe接口方式进行网络训练或测试,都是需要一个solver配置文件的 """A simple wrapper around Caffe's solver. This wrapper gives us control over the snapshotting process, which we use to unnormalize the learned bounding-box regression weights. """
类中定义如下函数:
------------------------------------------------------__init__(...)---------------------------------------------------------
def __init__(self,sess,network,imdb,output_dir,logdir,pretrained_model=None) 构造函数
def __init__(self, sess, network, imdb, roidb, output_dir, logdir, pretrained_model=None): """Initialize the SolverWrapper.""" self.net = network self.imdb = imdb self.roidb = roidb self.output_dir = output_dir self.pretrained_model = pretrained_model print 'Computing bounding-box regression targets...' if cfg.TRAIN.BBOX_REG: # 默认cfg.TRAIN.BBOX_REG=True # 暂未知? self.bbox_means, self.bbox_stds = rdl_roidb.add_bbox_regression_targets(roidb) print 'done' # For checkpoint # 模型保存与恢复 self.saver = tf.train.Saver(max_to_keep=100,write_version=saver_pb2.SaverDef.V1) # 指定一个文件保存图,其中,tf.get_default_graph()事件文件要记录的图,也就是tensorflow默认的图 self.writer = tf.summary.FileWriter(logdir=logdir, graph=tf.get_default_graph(), flush_secs=5)
其中,调用add_bbox_regression_targets(roidb)返回bbox回归目标(均值bbox_means和标准差bbox_stds)(roi_data_layer/roidb.py中)参与计算bbox_pred层weights和bias的计算规范化?
def add_bbox_regression_targets(roidb): """ Add information needed to train bounding-box regressors. For each roi find the corresponding gt box, and compute the distance. then normalize the distance into Gaussian by minus mean and divided by std """
tf.train.Saver(...)与模型保存与恢复有关、tf.summary.FileWriter(...)与保存计算图有关
------------------------------------------------------snapshot(...)---------------------------------------------------------
def snapshot(self,sess,iter)
def snapshot(self, sess, iter): """Take a snapshot of the network after unnormalizing the learned 非规范 bounding-box regression weights. This enables easy use at test-time. """ net = self.net if cfg.TRAIN.BBOX_REG and net.layers.has_key('bbox_pred') and cfg.TRAIN.BBOX_NORMALIZE_TARGETS: # net.layers字典记录网络各层输出 # 默认cfg.TRAIN.BBOX_REG=True cfg.TRAIN.BBOX_NORMALIZE_TARGETS=True 含义? # save original values,bbox_pred为VGGnet最后一层 with tf.variable_scope('bbox_pred', reuse=True): weights = tf.get_variable("weights") biases = tf.get_variable("biases") # 暂未知eval()函数? orig_0 = weights.eval() orig_1 = biases.eval() # scale and shift with bbox reg unnormalization; then save snapshot weights_shape = weights.get_shape().as_list() # np.tile(array,reps) 按reps指定轴用于重复array构造新的数组 # 暂未知含义?为何后面又恢复weights和biases的原值? sess.run(weights.assign(orig_0 * np.tile(self.bbox_stds, (weights_shape[0],1)))) sess.run(biases.assign(orig_1 * self.bbox_stds + self.bbox_means)) if not os.path.exists(self.output_dir): os.makedirs(self.output_dir) # TRAIN.SNAPSHOT_INFIX = '' infix = ('_' + cfg.TRAIN.SNAPSHOT_INFIX if cfg.TRAIN.SNAPSHOT_INFIX != '' else '') # TRAIN.SNAPSHOT_PREFIX = 'VGGnet_fast_rcnn' filename = (cfg.TRAIN.SNAPSHOT_PREFIX + infix + '_iter_{:d}'.format(iter+1) + '.ckpt') filename = os.path.join(self.output_dir, filename) #如E:\TFFRCNN\output\faster_rcnn_voc_vgg\voc_2007_trainval\VGGnet_fast_rcnn_iter_100.ckpt # 存储.ckpt模型文件 self.saver.save(sess, filename) print 'Wrote snapshot to: {:s}'.format(filename) # 这里为什么又恢复bbox_pred的原始weights和biases? if cfg.TRAIN.BBOX_REG and net.layers.has_key('bbox_pred'): # restore net to original state sess.run(weights.assign(orig_0)) sess.run(biases.assign(orig_1))
与存储.ckpt训练模型有关,这里又涉及对bbox_pred层的weights和biases值的规范化后又恢复原始值,不解其意,cfg.TRAIN.BBOX_NORMALIZE_TARGETS和eval()函数意义不明,被train_model(...)调用
------------------------------------------------------build_image_summary(...)---------------------------------------------------------
def build_image_summary(self)
def build_image_summary(self): """ A simple graph for write image summary """ log_image_data = tf.placeholder(tf.uint8, [None, None, 3]) log_image_name = tf.placeholder(tf.string) # import tensorflow.python.ops.gen_logging_ops as logging_ops from tensorflow.python.ops import gen_logging_ops from tensorflow.python.framework import ops as _ops log_image = gen_logging_ops._image_summary(log_image_name, tf.expand_dims(log_image_data, 0), max_images=1) _ops.add_to_collection(_ops.GraphKeys.SUMMARIES, log_image) # log_image = tf.summary.image(log_image_name, tf.expand_dims(log_image_data, 0), max_outputs=1) return log_image, log_image_data, log_image_name
生成与图像、图像数据、图像名相关的摘要日志文件,tensorflow相关机制不懂,如gen_logging_ops._image_summary(...)与_ops.add_to_collection(...),被train_model(...)调用
2.SolverWrapper类中train_model(...)函数(训练过程主要代码)代码逻辑
def train_model(self,sess,max_iters,restore=False)
调用get_data_layer(self.roidb, self.imdb.num_classes)实例化RoIDataLayer类(roi_data_layer/layer.py中)对象并返回layer---->
调用build_loss定义各种训练loss(network.py中)--->
利用tf.summary中各类方法保存训练过程,可供tensorboard,如tf.summary.scalar记录loss可用于绘制loss曲线--->
# 训练过程中,用到的tf.summary()各类方法,能够保存训练过程以及参数分布图并在tensorboard显示 # scalar summary 生成标量图,可以用于画各类loss图像 tf.summary.scalar('rpn_rgs_loss', rpn_loss_box) tf.summary.scalar('rpn_cls_loss', rpn_cross_entropy) tf.summary.scalar('cls_loss', cross_entropy) tf.summary.scalar('rgs_loss', loss_box) tf.summary.scalar('loss', loss) summary_op = tf.summary.merge_all()
调用类内函数build_image_summary()生成与图像、图像数据、图像名相关的摘要日志文件log_image, log_image_data, log_image_name--->
定义默认优化方式为opt=tf.train.MomentumOptimizer(lr,momentum)--->
# optimizer 定义优化方式 # 默认TRAIN.SOLVER = 'Momentum' TRAIN.LEARNING_RATE = 0.001 TRAIN.MOMENTUM = 0.9 if cfg.TRAIN.SOLVER == 'Adam': opt = tf.train.AdamOptimizer(cfg.TRAIN.LEARNING_RATE) elif cfg.TRAIN.SOLVER == 'RMS': opt = tf.train.RMSPropOptimizer(cfg.TRAIN.LEARNING_RATE) else: lr = tf.Variable(cfg.TRAIN.LEARNING_RATE, trainable=False) # lr = tf.Variable(0.0, trainable=False) momentum = cfg.TRAIN.MOMENTUM opt = tf.train.MomentumOptimizer(lr, momentum) # Momentum优化方式,使用了超参数
定义记录全局训练步骤的单值global_step = tf.Variable(0, trainable=False)--->
训练核心代码,tf.clip_by_global_norm(tf.gradients(loss, tvars), 10.0)、opt.apply_gradients(zip(grads, tvars), global_step=global_step),但是反向传播体现在哪?--->
with_clip = True if with_clip: # 获取所有可训练的向量 tvars = tf.trainable_variables() # tf.gradients计算向量梯度,传入参数loss和所有trainable的向量 # tf.clip_by_global_norm梯度缩放 # 传入所有trainable向量的梯度,grad为clip好的梯度,globalnorm为梯度平方和 grads, norm = tf.clip_by_global_norm(tf.gradients(loss, tvars), 10.0) # apply_gradients是tf.train.Optimizer.minimize实际操作中两步中的一步 # minizie其实是分了两步运算,第一步计算梯度tf.train.Optimizer.compute_gradients # 和第二步更新梯度tf.train.Optimizer.apply_gradients # 由于我们已经计算了梯度,所以我们只用更新梯度 train_op = opt.apply_gradients(zip(grads, tvars), global_step=global_step) else: train_op = opt.minimize(loss, global_step=global_step)
创建会话权值初始化--->定义restore_iter = 0--->
调用self.net.load(self.pretrained_model, sess, True)函数(network.py中)加载预训练模型--->
# load vgg16 if self.pretrained_model is not None and not restore: try: print ('Loading pretrained model ' 'weights from {:s}').format(self.pretrained_model) self.net.load(self.pretrained_model, sess, True) except: raise 'Check your pretrained model {:s}'.format(self.pretrained_model) # resuming a trainer # 恢复继续训练 if restore: try: ckpt = tf.train.get_checkpoint_state(self.output_dir) print 'Restoring from {}...'.format(ckpt.model_checkpoint_path), self.saver.restore(sess, ckpt.model_checkpoint_path) # basename函数去掉目录路径单独返回文件名、splitext分离文件名和扩展名 stem = os.path.splitext(os.path.basename(ckpt.model_checkpoint_path))[0] restore_iter = int(stem.split('_')[-1]) sess.run(global_step.assign(restore_iter)) # 注意global_step变量 print 'done' except: raise 'Check your pretrained {:s}'.format(ckpt.model_checkpoint_path)
迭代训练(获取blobs blobs = data_layer.forward(),roi_data_layer/layer.py中)--->定义feed_dict和res_fetches(res表示result)--->
通过sess获取loss等输出--->
rpn_loss_cls_value, rpn_loss_box_value, loss_cls_value, loss_box_value, \
summary_str, \
cls_prob, bbox_pred, rois, \
rpn_cls_score_reshape_np, rpn_cls_prob_reshape_np\
= sess.run(fetches=fetch_list, feed_dict=feed_dict)
image summary(记录训练日志、调用_draw_gt_to_image()函数和_draw_dontcare_to_image()函数等绘制gt、hard example、dontcare box等)、调用_process_boxes_scores()函数得到回归后boxes和scores
调用nms_wrapper()函数(nms_wrapper.py中)返回res列表,共有num_classes-1个元素,每个元素对应一类的检测结果、调用_draw_boxes_to_image()将检测结果绘制在图像上--->
# image summary # 默认TRAIN.LOG_IMAGE_ITERS = 100,每100次迭代记录一次log if (iter) % cfg.TRAIN.LOG_IMAGE_ITERS == 0: # plus mean # PIXEL_MEANS = np.array([[[102.9801, 115.9465, 122.7717]]]) 对图像加均值操作 # np.squeeze从数组的形状中删除单维度条目,即把shape中为1的维度去掉 # 为何这里要对图像加均值? ori_im = np.squeeze(blobs['data']) + cfg.PIXEL_MEANS ori_im = ori_im.astype(dtype=np.uint8, copy=False) # draw rects ori_im = _draw_gt_to_image(ori_im, blobs['gt_boxes'], blobs['gt_ishard']) ori_im = _draw_dontcare_to_image(ori_im, blobs['dontcare_areas']) # print 'rois:', rois.shape[0] # 默认TRAIN.BBOX_REG = True、TRAIN.BBOX_NORMALIZE_TARGETS = True???含义不明 if cfg.TRAIN.BBOX_REG and cfg.TRAIN.BBOX_NORMALIZE_TARGETS: # 对于网络输出的bbox_pred为何还要乘方差bbox_stds加均值bbox_means以更新bbox_pred,在测试阶段却未做类似做处理 # bbox_pred仅是预测得到的关系,不是最终的box左上、右下坐标 # np.tile()将原矩阵横向、纵向地复制 bbox_pred = bbox_pred * np.tile(self.bbox_stds, (bbox_pred.shape[0], 1)) + \ np.tile(self.bbox_means, (bbox_pred.shape[0], 1)) # 得到回归后的boxes和对应得分 boxes, scores = _process_boxes_scores(cls_prob, bbox_pred, rois, blobs['im_info'][0][2], ori_im.shape) # nms处理等,res为列表,每个列表的元素为1个字典,字典内含box的类别和det(box坐标和得分) # res为列表,列表中每个元素为字典,每个字典含某类标号如(class_1,class_2...)和dets(该类box的坐标和对应score res = nms_wrapper(scores, boxes, threshold=0.7) # nms_wrapper.py中 # 将回归后经过nms处理后的box绘制在图像上,并转换BGR to RGB image = cv2.cvtColor(_draw_boxes_to_image(ori_im, res), cv2.COLOR_BGR2RGB) # 记录image相关日志 log_image_name_str = ('%06d_' % iter ) + blobs['im_name'] log_image_summary_op = \ sess.run(log_image, \ feed_dict={log_image_name: log_image_name_str,\ log_image_data: image}) self.writer.add_summary(log_image_summary_op, global_step=global_step.eval())
为何每TRAIN.LOG_IMAGE_ITERS = 100次需对图像加均值处理;TRAIN.BBOX_NORMALIZE_TARGETS = True;
对于网络输出的bbox_pred为何还要乘方差bbox_stds加均值bbox_means以更新bbox_pred,在测试阶段却未做类似做处理;
网络训练模型snapshot
if (iter) % (cfg.TRAIN.DISPLAY) == 0: print 'iter: %d / %d, total loss: %.4f, rpn_loss_cls: %.4f, rpn_loss_box: %.4f, loss_cls: %.4f, loss_box: %.4f, lr: %f'%\ (iter, max_iters, rpn_loss_cls_value + rpn_loss_box_value + loss_cls_value + loss_box_value ,\ rpn_loss_cls_value, rpn_loss_box_value,loss_cls_value, loss_box_value, lr.eval()) print 'speed: {:.3f}s / iter'.format(_diff_time) # 进行网络的snapshot获取并保存整个Faster RCNN网络 if (iter+1) % cfg.TRAIN.SNAPSHOT_ITERS == 0: # 默认TRAIN.SNAPSHOT_ITERS = 5000 last_snapshot_iter = iter self.snapshot(sess, iter) # 结束的时候再进行依次snapshot获取和网络保存 if last_snapshot_iter != iter: self.snapshot(sess, iter)
train_model(...)完整代码:
def train_model(self, sess, max_iters, restore=False): """Network training loop.""" # 训练循环 # 实例化RoIDataLayer类对象并返回layer data_layer = get_data_layer(self.roidb, self.imdb.num_classes) # 定义loss loss, cross_entropy, loss_box, rpn_cross_entropy, rpn_loss_box = \ self.net.build_loss(ohem=cfg.TRAIN.OHEM) # 默认TRAIN.OHEM = False # 训练过程中,用到的tf.summary()各类方法,能够保存训练过程以及参数分布图并在tensorboard显示 # scalar summary 生成标量图,可以用于画各类loss图像 tf.summary.scalar('rpn_rgs_loss', rpn_loss_box) tf.summary.scalar('rpn_cls_loss', rpn_cross_entropy) tf.summary.scalar('cls_loss', cross_entropy) tf.summary.scalar('rgs_loss', loss_box) tf.summary.scalar('loss', loss) summary_op = tf.summary.merge_all() # image writer # NOTE: this image is independent to summary_op log_image, log_image_data, log_image_name =\ self.build_image_summary() # optimizer 定义优化方式 # 默认TRAIN.SOLVER = 'Momentum' TRAIN.LEARNING_RATE = 0.001 TRAIN.MOMENTUM = 0.9 if cfg.TRAIN.SOLVER == 'Adam': opt = tf.train.AdamOptimizer(cfg.TRAIN.LEARNING_RATE) elif cfg.TRAIN.SOLVER == 'RMS': opt = tf.train.RMSPropOptimizer(cfg.TRAIN.LEARNING_RATE) else: lr = tf.Variable(cfg.TRAIN.LEARNING_RATE, trainable=False) # lr = tf.Variable(0.0, trainable=False) momentum = cfg.TRAIN.MOMENTUM opt = tf.train.MomentumOptimizer(lr, momentum) # Momentum优化方式,使用了超参数 # 记录全局训练步骤的单值 global_step = tf.Variable(0, trainable=False) with_clip = True if with_clip: # 获取所有可训练的向量 tvars = tf.trainable_variables() # tf.gradients计算向量梯度,传入参数loss和所有trainable的向量 # tf.clip_by_global_norm梯度缩放 # 传入所有trainable向量的梯度,grad为clip好的梯度,globalnorm为梯度平方和 grads, norm = tf.clip_by_global_norm(tf.gradients(loss, tvars), 10.0) # apply_gradients是tf.train.Optimizer.minimize实际操作中两步中的一步 # minizie其实是分了两步运算,第一步计算梯度tf.train.Optimizer.compute_gradients # 和第二步更新梯度tf.train.Optimizer.apply_gradients # 由于我们已经计算了梯度,所以我们只用更新梯度就可以了 train_op = opt.apply_gradients(zip(grads, tvars), global_step=global_step) else: train_op = opt.minimize(loss, global_step=global_step) # intialize variables 权值初始化 sess.run(tf.global_variables_initializer()) restore_iter = 0 # load vgg16 if self.pretrained_model is not None and not restore: try: print ('Loading pretrained model ' 'weights from {:s}').format(self.pretrained_model) self.net.load(self.pretrained_model, sess, True) except: raise 'Check your pretrained model {:s}'.format(self.pretrained_model) # resuming a trainer # 恢复继续训练 if restore: try: ckpt = tf.train.get_checkpoint_state(self.output_dir) print 'Restoring from {}...'.format(ckpt.model_checkpoint_path), self.saver.restore(sess, ckpt.model_checkpoint_path) # basename函数去掉目录路径单独返回文件名、splitext分离文件名和扩展名 stem = os.path.splitext(os.path.basename(ckpt.model_checkpoint_path))[0] restore_iter = int(stem.split('_')[-1]) sess.run(global_step.assign(restore_iter)) # 注意global_step变量 print 'done' except: raise 'Check your pretrained {:s}'.format(ckpt.model_checkpoint_path) last_snapshot_iter = -1 timer = Timer() # 迭代训练 for iter in range(restore_iter, max_iters): # max_iters为train_model(...)传入参数 timer.tic() # learning rate # 每TRAIN.STEPSIZE=50000更新一次lr(检查该值是否被修改!) if iter != 0 and iter % cfg.TRAIN.STEPSIZE == 0: sess.run(tf.assign(lr, lr.eval() * cfg.TRAIN.GAMMA)) # 默认TRAIN.GAMMA = 0.1 # get one batch # Get blobs and copy them into this layer's top blob vector # roi_data_layer/layer.py中 blobs = data_layer.forward() if (iter + 1) % (cfg.TRAIN.DISPLAY) == 0: # 默认TRAIN.DISPLAY = 1000,每1000次输出一次显示训练状态 print 'image: %s' %(blobs['im_name']), feed_dict={ self.net.data: blobs['data'], self.net.im_info: blobs['im_info'], self.net.keep_prob: 0.5, self.net.gt_boxes: blobs['gt_boxes'], self.net.gt_ishard: blobs['gt_ishard'], self.net.dontcare_areas: blobs['dontcare_areas'] } res_fetches = [self.net.get_output('cls_prob'), # FRCNN class prob output self.net.get_output('bbox_pred'), # FRCNN rgs output self.net.get_output('rois')] # RPN rgs output # fetch_list = [rpn_cross_entropy, # rpn_loss_box, # cross_entropy, # loss_box, # summary_op, # train_op] + res_fetches if _DEBUG: # 默认_DEBUG为false # add profiling # link libcupti.so in LD_LIBRARY_PATH # # run_metadata = tf.RunMetadata() # rpn_loss_cls_value, rpn_loss_box_value,loss_cls_value, loss_box_value,\ # summary_str, _, \ # cls_prob, bbox_pred, rois, \ # = sess.run(fetches=fetch_list, # feed_dict=feed_dict, # options=tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE), # run_metadata=run_metadata # ) # # # write profiling # trace = timeline.Timeline(step_stats=run_metadata.step_stats) # with open('timeline.ctf.json', 'w') as trace_file: # trace_file.write(trace.generate_chrome_trace_format()) fetch_list = [rpn_cross_entropy, rpn_loss_box, cross_entropy, loss_box, summary_op] + res_fetches fetch_list += [self.net.get_output('rpn_cls_score_reshape'), self.net.get_output('rpn_cls_prob_reshape')] fetch_list += [] rpn_loss_cls_value, rpn_loss_box_value, loss_cls_value, loss_box_value, \ summary_str, \ cls_prob, bbox_pred, rois, \ rpn_cls_score_reshape_np, rpn_cls_prob_reshape_np\ = sess.run(fetches=fetch_list, feed_dict=feed_dict) else: fetch_list = [rpn_cross_entropy, rpn_loss_box, cross_entropy, loss_box, summary_op, train_op] + res_fetches fetch_list += [] rpn_loss_cls_value, rpn_loss_box_value, loss_cls_value, loss_box_value, \ summary_str, _, \ cls_prob, bbox_pred, rois = sess.run(fetches=fetch_list, feed_dict=feed_dict) self.writer.add_summary(summary=summary_str, global_step=global_step.eval()) _diff_time = timer.toc(average=False) # image summary # 默认TRAIN.LOG_IMAGE_ITERS = 100,每100次迭代记录一次log if (iter) % cfg.TRAIN.LOG_IMAGE_ITERS == 0: # plus mean # PIXEL_MEANS = np.array([[[102.9801, 115.9465, 122.7717]]]) 对图像加均值操作 # np.squeeze从数组的形状中删除单维度条目,即把shape中为1的维度去掉 # 为何这里要对图像加均值? ori_im = np.squeeze(blobs['data']) + cfg.PIXEL_MEANS ori_im = ori_im.astype(dtype=np.uint8, copy=False) # draw rects ori_im = _draw_gt_to_image(ori_im, blobs['gt_boxes'], blobs['gt_ishard']) ori_im = _draw_dontcare_to_image(ori_im, blobs['dontcare_areas']) # print 'rois:', rois.shape[0] # 默认TRAIN.BBOX_REG = True、TRAIN.BBOX_NORMALIZE_TARGETS = True???含义不明 if cfg.TRAIN.BBOX_REG and cfg.TRAIN.BBOX_NORMALIZE_TARGETS: # 对于最终回归后的bbox_pred为何还要乘方差bbox_stds加均值bbox_means以更新bbox_pred,在测试阶段却未做类似做处理 # bbox_pred仅是预测得到的关系,不是最终的box左上、右下坐标 # np.tile()将原矩阵横向、纵向地复制 bbox_pred = bbox_pred * np.tile(self.bbox_stds, (bbox_pred.shape[0], 1)) + \ np.tile(self.bbox_means, (bbox_pred.shape[0], 1)) # 得到回归后的boxes和对应得分 boxes, scores = _process_boxes_scores(cls_prob, bbox_pred, rois, blobs['im_info'][0][2], ori_im.shape) # nms处理等,res为列表,每个列表的元素为1个字典,字典内含box的类别和det(box坐标和得分) # res为列表,列表中每个元素为字典,每个字典含某类标号如(class_1,class_2...)和dets(该类box的坐标和对应score res = nms_wrapper(scores, boxes, threshold=0.7) # nms_wrapper.py中 # 将回归后经过nms处理后的box绘制在图像上,并转换BGR to RGB image = cv2.cvtColor(_draw_boxes_to_image(ori_im, res), cv2.COLOR_BGR2RGB) # 记录image相关日志 log_image_name_str = ('%06d_' % iter ) + blobs['im_name'] log_image_summary_op = \ sess.run(log_image, \ feed_dict={log_image_name: log_image_name_str,\ log_image_data: image}) self.writer.add_summary(log_image_summary_op, global_step=global_step.eval()) if (iter) % (cfg.TRAIN.DISPLAY) == 0: print 'iter: %d / %d, total loss: %.4f, rpn_loss_cls: %.4f, rpn_loss_box: %.4f, loss_cls: %.4f, loss_box: %.4f, lr: %f'%\ (iter, max_iters, rpn_loss_cls_value + rpn_loss_box_value + loss_cls_value + loss_box_value ,\ rpn_loss_cls_value, rpn_loss_box_value,loss_cls_value, loss_box_value, lr.eval()) print 'speed: {:.3f}s / iter'.format(_diff_time) # 进行网络的snapshot获取并保存整个Faster RCNN网络 if (iter+1) % cfg.TRAIN.SNAPSHOT_ITERS == 0: # 默认TRAIN.SNAPSHOT_ITERS = 5000 last_snapshot_iter = iter self.snapshot(sess, iter) # 结束的时候再进行依次snapshot获取和网络保存 if last_snapshot_iter != iter: self.snapshot(sess, iter)
2.其他函数
def get_training_roidb(imdb)
def get_training_roidb(imdb): """Returns a roidb (Region of Interest database) for use in training.""" # 默认TRAIN.USE_FLIPPED = True使用了水平翻转 if cfg.TRAIN.USE_FLIPPED: print 'Appending horizontally-flipped training examples...' imdb.append_flipped_images() # 水平翻转 print 'done' print 'Preparing training data...' if cfg.TRAIN.HAS_RPN: if cfg.IS_MULTISCALE: # TODO: fix multiscale training (single scale is already a good trade-off) print ('#### warning: multi-scale has not been tested.') print ('#### warning: using single scale by setting IS_MULTISCALE: False.') # from ..gt_data_layer import roidb as gdl_roidb # from ..roi_data_layer import roidb as rdl_roidb gdl_roidb.prepare_roidb(imdb) else: rdl_roidb.prepare_roidb(imdb) else: rdl_roidb.prepare_roidb(imdb) print 'done' return imdb.roidb # 返回imdb.roidb
获得roi数据库roidb参与训练,未见调用(与imdb类相关,涉及gt_data_layer/roidb.py和roi_data_layer/roidb.py),未见调用
def get_data_layer(roidb,num_classes)
def get_data_layer(roidb, num_classes): # num_classes=21 """return a data layer.""" if cfg.TRAIN.HAS_RPN: # 默认TRAIN.HAS_RPN = True if cfg.IS_MULTISCALE: # 默认IS_MULTISCALE = False # obsolete # layer = GtDataLayer(roidb) raise "Calling caffe modules..." else: layer = RoIDataLayer(roidb, num_classes) # 实例化RoIDataLayer类对象 else: layer = RoIDataLayer(roidb, num_classes) return layer
实例化RoIDataLayer类对象(该类在roi_data_layer/layer.py中定义,70余行代码),被train_model.py调用,这里roidb不明含义
def _draw_gt_to_image(im,gt_boxes,gt_ishard)
def _draw_gt_to_image(im, gt_boxes, gt_ishard): # 在图像上以不同颜色绘制gt box和hard example image = np.copy(im) for i in range(0, gt_boxes.shape[0]): (x1, y1, x2, y2, score) = gt_boxes[i, :] if gt_ishard[i] == 0: cv2.rectangle(image, (int(x1), int(y1)), (int(x2), int(y2)), (255, 255, 255), 2) else: cv2.rectangle(image, (int(x1), int(y1)), (int(x2), int(y2)), (255, 0, 0), 2) return image
在图像上以不同颜色绘制gt box和hard example box,被train_model()函数调用
def _draw_dontcare_to_image(im,dontcare)
def _draw_dontcare_to_image(im, dontcare): #被当前文件调用 image = np.copy(im) for i in range(0, dontcare.shape[0]): (x1, y1, x2, y2) = dontcare[i, :] # Python中是有这种用法的 cv2.rectangle(image, (int(x1), int(y1)), (int(x2), int(y2)), (0, 0, 255), 2) return image
在图像上绘制dontcare box,被train_model()函数调用
def _process_boxes_scores(cls_prob,bbox_pred,rois,im_scale,im_shape)
def _process_boxes_scores(cls_prob, bbox_pred, rois, im_scale, im_shape): """ process the output tensors, to get the boxes and scores """ assert rois.shape[0] == bbox_pred.shape[0],\ 'rois and bbox_pred must have the same shape' boxes = rois[:, 1:5] scores = cls_prob if cfg.TEST.BBOX_REG: # 默认TEST.BBOX_REG = True pred_boxes = bbox_transform_inv(boxes, deltas=bbox_pred) pred_boxes = clip_boxes(pred_boxes, im_shape) # 回归后得到的box(4维坐标) else: # Simply repeat the boxes, once for each class # boxes = np.tile(boxes, (1, scores.shape[1])) pred_boxes = clip_boxes(boxes, im_shape) return pred_boxes, scores
根据网络输出的bbox_pred回归box得到回归后的pred_boxes和box得分scores
def _draw_boxes_to_image(im,res)
def _draw_boxes_to_image(im, res): colors = [(86, 0, 240), (173, 225, 61), (54, 137, 255),\ (151, 0, 255), (243, 223, 48), (0, 117, 255),\ (58, 184, 14), (86, 67, 140), (121, 82, 6),\ (174, 29, 128), (115, 154, 81), (86, 255, 234)] font = cv2.FONT_HERSHEY_SIMPLEX image = np.copy(im) cnt = 0 # res为列表,列表中每个元素为字典,每个字典含某类标号如(class_1,class_2...)和dets(某类box的坐标和对应score) for ind, r in enumerate(res): if r['dets'] is None: continue dets = r['dets'] for i in range(0, dets.shape[0]): (x1, y1, x2, y2, score) = dets[i, :] cv2.rectangle(image, (int(x1), int(y1)), (int(x2), int(y2)), colors[ind % len(colors)], 2) text = '{:s} {:.2f}'.format(r['class'], score) cv2.putText(image, text, (x1, y1), font, 0.6, colors[ind % len(colors)], 1) cnt = (cnt + 1) return image
在图像上逐类别(class_1、class_2...)画出检测的最终结果box和score等
def train_net(network,imdb,roidb,output_dir,log_dir,pretrained_model = None,max_iters = 40000,restore = False)
def train_net(network, imdb, roidb, output_dir, log_dir, pretrained_model=None, max_iters=40000, restore=False): """Train a Fast R-CNN network.""" # 创建session的时候用来对session进行参数配置,allow_soft_placement=True若指定设备不存在,则允许TF自动分配设备 config = tf.ConfigProto(allow_soft_placement=True) config.gpu_options.allocator_type = 'BFC' config.gpu_options.per_process_gpu_memory_fraction = 0.40 with tf.Session(config=config) as sess: # 实例化SolverWrapper类 sw = SolverWrapper(sess, network, imdb, roidb, output_dir, logdir= log_dir, pretrained_model=pretrained_model) print 'Solving...' # 调用train_model(...)迭代训练 sw.train_model(sess, max_iters, restore=restore) print 'done solving'
被faster_rcnn/train_net.py调用,启动训练过程