Tensorflow版Faster RCNN源码解析(TFFRCNN) (13) gt_data_layer/roidb.py
本blog为github上CharlesShang/TFFRCNN版源码解析系列代码笔记
---------------个人学习笔记---------------
----------------本文作者疆--------------
------点击此处链接至博客园原文------
定义函数与roi_data_layer/roidb.py类似
1.prepare_roidb(imdb)
给imdb.roidb增加'info_boxes'字段信息,shape为N*18,18表示(cx, cy, scale_ind, box, scale_ind_map, box_map, gt_label, gt_sublabel, target),未见调用
get_boxes_grid(image_height, image_width)(由C编译)未知函数意义
为何imdb.roidb中既有gt roi又有一般的roi???ex_rois和gt_rois???
默认无cfg.TRAIN.SCALE_MAPPING值,将报错,该函数并未被执行
# 为imdb.roidb增加'info_boxes'字段信息 # 包含(cx, cy, scale_ind, box, scale_ind_map, box_map, gt_label, gt_sublabel, target)共18列 def prepare_roidb(imdb): """ Enrich the imdb's roidb by adding some derived quantities that are useful for training. This function precomputes the maximum overlap, taken over ground-truth boxes, between each ROI and each ground-truth box. The class with maximum overlap is also recorded. """ cache_file = os.path.join(imdb.cache_path, imdb.name + '_gt_roidb_prepared.pkl') if os.path.exists(cache_file): with open(cache_file, 'rb') as fid: imdb._roidb = cPickle.load(fid) print '{} gt roidb prepared loaded from {}'.format(imdb.name, cache_file) return roidb = imdb.roidb # 遍历数据集每张图像 for i in xrange(len(imdb.image_index)): roidb[i]['image'] = imdb.image_path_at(i) boxes = roidb[i]['boxes'] labels = roidb[i]['gt_classes'] info_boxes = np.zeros((0, 18), dtype=np.float32) if boxes.shape[0] == 0: roidb[i]['info_boxes'] = info_boxes continue # compute grid boxes s = PIL.Image.open(imdb.image_path_at(i)).size image_height = s[1] image_width = s[0] # 未知函数意义??? boxes_grid, cx, cy = get_boxes_grid(image_height, image_width) # for each scale # 默认TRAIN.SCALES = (600,) for scale_ind, scale in enumerate(cfg.TRAIN.SCALES): boxes_rescaled = boxes * scale # compute overlap overlaps = bbox_overlaps(boxes_grid.astype(np.float), boxes_rescaled.astype(np.float)) # rois与某类gt最大的overlap值 max_overlaps = overlaps.max(axis = 1) argmax_overlaps = overlaps.argmax(axis = 1) # 对应的该类 max_classes = labels[argmax_overlaps] # select positive boxes fg_inds = [] for k in xrange(1, imdb.num_classes): fg_inds.extend(np.where((max_classes == k) & (max_overlaps >= cfg.TRAIN.FG_THRESH))[0]) # 为何imdb.roidb中既有gt roi又有一般的roi???ex_rois和gt_rois??? if len(fg_inds) > 0: gt_inds = argmax_overlaps[fg_inds] # bounding box regression targets gt_targets = _compute_targets(boxes_grid[fg_inds,:], boxes_rescaled[gt_inds,:]) # scale mapping for RoI pooling??? # 无TRAIN.SCALE_MAPPING该值,将报错 scale_ind_map = cfg.TRAIN.SCALE_MAPPING[scale_ind] scale_map = cfg.TRAIN.SCALES[scale_ind_map] # contruct the list of positive boxes # 18对应存储的内容 # (cx, cy, scale_ind, box, scale_ind_map, box_map, gt_label, gt_sublabel, target) info_box = np.zeros((len(fg_inds), 18), dtype=np.float32) info_box[:, 0] = cx[fg_inds] info_box[:, 1] = cy[fg_inds] info_box[:, 2] = scale_ind info_box[:, 3:7] = boxes_grid[fg_inds,:] info_box[:, 7] = scale_ind_map info_box[:, 8:12] = boxes_grid[fg_inds,:] * scale_map / scale info_box[:, 12] = labels[gt_inds] info_box[:, 14:] = gt_targets info_boxes = np.vstack((info_boxes, info_box)) roidb[i]['info_boxes'] = info_boxes
2.add_bbox_regression_targets(roidb)
规范化目标回归值,并返回目标回归值的均值和标准差供测试阶段使用,未见调用
# 规范化目标回归值,并返回目标回归值的均值和标准差供测试阶段使用 def add_bbox_regression_targets(roidb): """Add information needed to train bounding-box regressors.""" assert len(roidb) > 0 assert 'info_boxes' in roidb[0], 'Did you call prepare_roidb first?' num_images = len(roidb) # Infer number of classes from the number of columns in gt_overlaps num_classes = roidb[0]['gt_overlaps'].shape[1] # Compute values needed for means and stds # var(x) = E(x^2) - E(x)^2 class_counts = np.zeros((num_classes, 1)) + cfg.EPS sums = np.zeros((num_classes, 4)) squared_sums = np.zeros((num_classes, 4)) # (cx, cy, scale_ind, 4box, scale_ind_map, 4box_map, gt_label, gt_sublabel, 4target)共18列 for im_i in xrange(num_images): targets = roidb[im_i]['info_boxes'] for cls in xrange(1, num_classes): cls_inds = np.where(targets[:, 12] == cls)[0] if cls_inds.size > 0: class_counts[cls] += cls_inds.size sums[cls, :] += targets[cls_inds, 14:].sum(axis=0) squared_sums[cls, :] += (targets[cls_inds, 14:] ** 2).sum(axis=0) # 计算回归目标值的均值和标准差 means = sums / class_counts stds = np.sqrt(squared_sums / class_counts - means ** 2) # Normalize targets # 规范化目标回归值 for im_i in xrange(num_images): targets = roidb[im_i]['info_boxes'] for cls in xrange(1, num_classes): cls_inds = np.where(targets[:, 12] == cls)[0] roidb[im_i]['info_boxes'][cls_inds, 14:] -= means[cls, :] if stds[cls, 0] != 0: roidb[im_i]['info_boxes'][cls_inds, 14:] /= stds[cls, :] # These values will be needed for making predictions # (the predicts will need to be unnormalized and uncentered) # ravel()将多维降为1维 return means.ravel(), stds.ravel()
3._compute_target(ex_rois, gt_rois)
根据ex_rois和gt_rois计算回归目标值,与bbox_transform.py中bbox_transform(ex_rois, gt_rois)函数类似,被prepare_roidb(...)函数调用
# 计算回归目标值,与bbox_transform.py中函数类似 def _compute_targets(ex_rois, gt_rois): """Compute bounding-box regression targets for an image. The targets are scale invariance""" ex_widths = ex_rois[:, 2] - ex_rois[:, 0] + cfg.EPS ex_heights = ex_rois[:, 3] - ex_rois[:, 1] + cfg.EPS ex_ctr_x = ex_rois[:, 0] + 0.5 * ex_widths ex_ctr_y = ex_rois[:, 1] + 0.5 * ex_heights gt_widths = gt_rois[:, 2] - gt_rois[:, 0] + cfg.EPS gt_heights = gt_rois[:, 3] - gt_rois[:, 1] + cfg.EPS gt_ctr_x = gt_rois[:, 0] + 0.5 * gt_widths gt_ctr_y = gt_rois[:, 1] + 0.5 * gt_heights targets_dx = (gt_ctr_x - ex_ctr_x) / ex_widths targets_dy = (gt_ctr_y - ex_ctr_y) / ex_heights targets_dw = np.log(gt_widths / ex_widths) targets_dh = np.log(gt_heights / ex_heights) targets = np.zeros((ex_rois.shape[0], 4), dtype=np.float32) targets[:, 0] = targets_dx targets[:, 1] = targets_dy targets[:, 2] = targets_dw targets[:, 3] = targets_dh return targets