Tensorflow版Faster RCNN源码解析(TFFRCNN) (20) datasets/pascal_voc.py

本blog为github上CharlesShang/TFFRCNN版源码解析系列代码笔记

---------------个人学习笔记---------------

----------------本文作者疆--------------

------点击此处链接至博客园原文------

 

定义了pascal_voc类,继承自imdb类,类中定义了18个函数

1.__init__(self,image_set,year,devkit_path=None)构造函数,初始化部分变量

这里面部分变量未在本脚本中被更新如self._num_classes和self._roidb

# pascal_voc类继承自imdb类
class pascal_voc(imdb):
    # image_set(如trainval等),如voc_2007_tainval
    def __init__(self, image_set, year, devkit_path=None):
        # 初始化self._name(如voc_2007_tainval)、self._num_classes(该数据集对应值应为21,但imdb类构造函数初始化为0???)、
        # self._classes(空列表--->本脚本中被更新为存储类别名称的元组)
        # self._image_index列表(空列表--->本脚本中被更新为不含后缀的数据集(如trainval数据集)图像名称组成的列表)
        # 和self._obj_proposer、self._roidb(为None,本脚本中未被更新???)、self._roidb_handler(self.default_roidb--->本脚本被更新为self.gt_roidb)、
        # self.config字典(空字典--->本脚本中更新为数据集相关设置信息构成的字典,字段包括‘cleanup’、'use_salt'、'use_diff'、'matlab_eval'、'rpn_file'、'min_size')
        imdb.__init__(self, 'voc_' + year + '_' + image_set)
        self._year = year
        self._image_set = image_set
        # 如E:\TFFRCNN\data\VOCdevkit2007
        self._devkit_path = self._get_default_path() if devkit_path is None \
                            else devkit_path
        # 如E:\TFFRCNN\data\VOCdevkit2007\VOC2007  VOC2007数据路径
        self._data_path = os.path.join(self._devkit_path, 'VOC' + self._year)
        self._classes = ('__background__', # always index 0
                         'aeroplane', 'bicycle', 'bird', 'boat',
                         'bottle', 'bus', 'car', 'cat', 'chair',
                         'cow', 'diningtable', 'dog', 'horse',
                         'motorbike', 'person', 'pottedplant',
                         'sheep', 'sofa', 'train', 'tvmonitor')
        '''
        self._classes = ('__background__', # always index 0
                         'craft')   #2018.1.30
        '''
        # self._class_to_ind中存放的是{'__background__':0,'craft':1  ...}key-value 字典
        # zip函数:对应取出每一个数组中的元素再组合
        self._class_to_ind = dict(zip(self.classes, xrange(self.num_classes)))
        self._image_ext = '.jpg'
        # 不含后缀的数据集(如trainval数据集)图像名称组成的列表(从trainval.txt中获取)
        self._image_index = self._load_image_set_index()
        # Default to roidb handler
        # self._roidb_handler = self.selective_search_roidb
        # 返回的是gt_roidb(各图像gt roi字典组成的列表)的内存地址
        self._roidb_handler = self.gt_roidb
        # 生成一个随机的uuid,即对于分布式数据,每个数据都有自己对应的唯一的标识符!
        # _get_comp_id(...)中使用
        self._salt = str(uuid.uuid4())
        self._comp_id = 'comp4'
        # PASCAL specific config options
        self.config = {'cleanup'     : True,
                       'use_salt'    : True,
                       'use_diff'    : False,
                       'matlab_eval' : False,
                       'rpn_file'    : None,
                       'min_size'    : 2}
        assert os.path.exists(self._devkit_path), \
                'VOCdevkit path does not exist: {}'.format(self._devkit_path)
        assert os.path.exists(self._data_path), \
                'Path does not exist: {}'.format(self._data_path)

2.image_path_at(self,i)获取数据集第i张图像的绝对路径,未见调用

    # 获取数据集第i张图像的绝对路径
    def image_path_at(self, i):  
        """
        Return the absolute path to image i in the image sequence.
        """
        # self._image_index为不含后缀的图像名称组成的列表
        return self.image_path_from_index(self._image_index[i])   

3.image_path_from_index(self,index)根据图像不含后缀的名称(如000001)获取该图像绝对路径,被image_path_at(...)调用

    # 根据图像不含后缀的名称获取图像绝对路径
    def image_path_from_index(self, index):

        """
        Construct an image path from the image's "index" identifier.
        """
        image_path = os.path.join(self._data_path, 'JPEGImages',
                                  index + self._image_ext)    #self._image_ext = '.jpg'
        assert os.path.exists(image_path), \
                'Path does not exist: {}'.format(image_path)
        return image_path

4._load_image_set_index(self)

获得数据集图像名称构成的列表,如trainval数据集从trainval.txt中取出[000001, 000002, ...]图像名称列表,被__init__(...)调用

    # 获得数据集图像名称构成的列表,如[000001, 000003, ...]
    def _load_image_set_index(self):   
        """
        Load the indexes listed in this dataset's image set file.
        """
        # Example path to image set file:
        # 如E:\TFFRCNN\data\VOCdevkit2007\VOC2007\ImageSets\Main\trainval.txt
        # 该路劲下存储相关图像名称信息,如000001 000002...
        image_set_file = os.path.join(self._data_path, 'ImageSets', 'Main',
                                      self._image_set + '.txt')
        assert os.path.exists(image_set_file), \
                'Path does not exist: {}'.format(image_set_file)
        with open(image_set_file) as f:
            # readlines()函数一行一行读取
            # x.strip(rm)函数 删除x字符串中开头结尾处rm删除序列的字符
            image_index = [x.strip() for x in f.readlines()]                                                
        return image_index

5._get_default_path(self)

获取数据集文件夹默认路径,如E:\TFFRCNN\data\VOCdevkit2007,被__init__(...)调用

    # 获取pascal voc数据集文件夹默认路径
    def _get_default_path(self):    
        """
        Return the default path where PASCAL VOC is expected to be installed.
        """
        # 默认DATA_DIR = osp.abspath(osp.join(__C.ROOT_DIR, 'data'))
        # ROOT_DIR = osp.abspath(osp.join(osp.dirname(__file__), '..', '..'))
        return os.path.join(cfg.DATA_DIR, 'VOCdevkit' + self._year)

6.gt_roidb(self)

从/向cache文件夹中(如E:\TFFRCNN\data\cache\voc_2007_trainval_gt_roidb.pkl)cPickle序列化读/写gt roi相关信息。若路径已存在表明曾经创建过,则读,否则则写供下次读。被__init__(...)不加()调用表明加载gt roi相关信息,返回gt_roidb的地址(self._roidb_handler = self.gt_roidb,但该值未见调用该函数是通过调用_load_pascal_annotation(...)得到由各图像gt roi信息字典组成的列表gt_roidb。实际上还被selective_search_roidb(...)、rpn_roidb(...)调用,但未使用SS产生roi。

 # 从/向cache文件夹中cPickle序列化读/写groundtruth roi相关信息
    # cache路径存在则读,否则则写
    def gt_roidb(self):
        """               
        Return the database of ground-truth regions of interest.
        This function loads/saves from/to a cache file to speed up future calls.
        """
        # 如E:\TFFRCNN\data\cache\voc_2007_trainval_gt_roidb.pkl
        # cache_path为imdb类中的属性方法
        cache_file = os.path.join(self.cache_path, self.name + '_gt_roidb.pkl')
        # 序列化读gt_roidb
        if os.path.exists(cache_file):
            with open(cache_file, 'rb') as fid:
                roidb = cPickle.load(fid)
            print '{} gt roidb loaded from {}'.format(self.name, cache_file)
            return roidb
        # 序列化写gt_roidb(先有写,后有读)
        # 读取每张图片对应的xml文件信息构成的字典 存入gt_roidb列表!!!
        gt_roidb = [self._load_pascal_annotation(index)
                    for index in self.image_index]
        with open(cache_file, 'wb') as fid:
            cPickle.dump(gt_roidb, fid, cPickle.HIGHEST_PROTOCOL)
        print 'wrote gt roidb to {}'.format(cache_file)
        return gt_roidb

7.selective_search_roidb(self)

与SS算法相关,未使用,类似于gt_roidb(...),从/向cache文件夹中(如E:\TFFRCNN\data\cache\voc_2007_trainval__selective_search_roidb.pkl)cPickle序列化读/写gt roi相关信息,未见调用。

该函数表明(VOC2007数据集上,可见if判断语句)self.roidb既包含了gt roi也包含了(由SS)产生的roi

 # 与SS算法有关,未使用
    def selective_search_roidb(self):
        """
        Return the database of selective search regions of interest.
        Ground-truth ROIs are also included !!!!!!
        This function loads/saves from/to a cache file to speed up future calls.
        """
        cache_file = os.path.join(self.cache_path,
                                  self.name + '_selective_search_roidb.pkl')
        if os.path.exists(cache_file):
            with open(cache_file, 'rb') as fid:
                roidb = cPickle.load(fid)
            print '{} ss roidb loaded from {}'.format(self.name, cache_file)
            return roidb
        if int(self._year) == 2007 or self._image_set != 'test':    
            gt_roidb = self.gt_roidb()
            ss_roidb = self._load_selective_search_roidb(gt_roidb)
            roidb = imdb.merge_roidbs(gt_roidb, ss_roidb)    # 合并gt_roidb和ss_roidb
        else:
            roidb = self._load_selective_search_roidb(None)
        with open(cache_file, 'wb') as fid:
            cPickle.dump(roidb, fid, cPickle.HIGHEST_PROTOCOL)
        print 'wrote ss roidb to {}'.format(cache_file)
        return roidb

8.rpn_roidb(self)

返回rpn_roidb,从该函数同样看出(int(self._year) == 2007 or self._image_set != 'test')时self.roidb来源包括gt_roidb和rpn_roidb,未见调用(应该在某个地方被调用了!)

    def rpn_roidb(self):
        if int(self._year) == 2007 or self._image_set != 'test':
            gt_roidb = self.gt_roidb()
            rpn_roidb = self._load_rpn_roidb(gt_roidb)
            # 合并gt_roidb和rpn_roidb
            roidb = imdb.merge_roidbs(gt_roidb, rpn_roidb)
        else:
            roidb = self._load_rpn_roidb(None)
        return roidb

9._load_rpn_roidb(self,gt_roidb)

调用(imdb.py中create_roidb_from_box_list(...)函数更新由RPN产生的rpn_roidb列表中(各图像rpn_roi信息构成的)字典内容(传入gt_roidb是为了得到‘gt_overlaps’,其他字段‘gt_classes’全0且不更新、‘flipped’为False、‘seg_areas’全0且不更新此处0表明该roi非gt roi而是由RPN产生的roi,可见create_roidb_from_box_list(...)函数),被rpn_roidb(...)调用,应注意这里self.config['rpn_file']表示rpn_roidb序列化内容的存储路径,在__init__()构造函数中初值为None,在调用该函数之前self.config应在某处更新

注意:rpn_roidb为各张图像产生roi相关信息构成的字典组成的列表,字典内容见如下create_roidb_from_box_list(...)函数

    def _load_rpn_roidb(self, gt_roidb):
        # __init__()构造函数中该字段初值为None,在本句之前self.config应在某处更新!
        # 该字段为rpn_roidb序列化内容的存储路径
        filename = self.config['rpn_file']   
        print 'loading {}'.format(filename)
        assert os.path.exists(filename), \
               'rpn data not found at: {}'.format(filename)
        with open(filename, 'rb') as f:
            box_list = cPickle.load(f)
        return self.create_roidb_from_box_list(box_list, gt_roidb)

----------------注意以下函数中gt_classes为全0表明:对应的roi不是gt roi,这也解释了test.py中的遗留的问题----------------------

对于各张图像中由RPN产生的roi,与gt_roi计算IoU值,最大值对应的gt_roi作为gt,因此overlaps仅对应类别位置有>0的IoU值,其余位置全0,但是这里并没有更新‘gt_classes’字段为gt_roi对应的类别,而是设置为全0,同时‘seg_areas’也被设置为全0

    def create_roidb_from_box_list(self, box_list, gt_roidb):
        # box_list(即rpn_roidb存储路径中序列化的内容)为数据集各张图像产生的boxz组成的元组-组成的列表
        assert len(box_list) == self.num_images, \
                'Number of boxes must match number of ground-truth images'
        roidb = []       # rpn_roidb为列表
        for i in xrange(self.num_images):
            boxes = box_list[i]
            num_boxes = boxes.shape[0]
            overlaps = np.zeros((num_boxes, self.num_classes), dtype=np.float32)

            if gt_roidb is not None and gt_roidb[i]['boxes'].size > 0:
                # 对于各张图像,为什么rpn_roi和gt_roi是对应起来的,需要看rpn_roidb存储的序列化内容
                gt_boxes = gt_roidb[i]['boxes']
                gt_classes = gt_roidb[i]['gt_classes']
                gt_overlaps = bbox_overlaps(boxes.astype(np.float),
                                            gt_boxes.astype(np.float))
                argmaxes = gt_overlaps.argmax(axis=1)
                maxes = gt_overlaps.max(axis=1)
                I = np.where(maxes > 0)[0]
                # 对于各张图像中由RPN产生的roi,与gt_roi计算IoU值,最大值对应的gt_roi作为gt,因此overlaps仅对应类别位置有>0的IoU值,其余位置全0
                # 但是这里并没有更新‘gt_classes’字段为gt_roi对应的类别,而是设置为全0,同时‘seg_areas’也被设置为全0
                overlaps[I, gt_classes[argmaxes[I]]] = maxes[I]
            overlaps = scipy.sparse.csr_matrix(overlaps)
            # 因此,rpn_roidb为各张图像产生roi相关信息构成的字典组成的列表
            # 这里并没有更新‘gt_classes’字段为gt_roi对应的类别,而是设置为全0,同时‘seg_areas’也被设置为全0
            # 此处‘gt_classes’表明对应的roi非gt roi
            roidb.append({
                'boxes' : boxes,
                'gt_classes' : np.zeros((num_boxes,), dtype=np.int32),
                'gt_overlaps' : overlaps,
                'flipped' : False,
                'seg_areas' : np.zeros((num_boxes,), dtype=np.float32),
            })
        return roidb

10._load_selective_search_roidb(self,gt_roidb)

类似于_load_rpn_roidb(self,gt_roidb),返回由SS算法得到的roidb数据,由于算法中未使用,不过多解释,被selective_search_roidb(...)调用

    def _load_selective_search_roidb(self, gt_roidb):
        # 用SS算法预先得到的.mat文件
        filename = os.path.abspath(os.path.join(cfg.DATA_DIR,
                                                'selective_search_data',
                                                self.name + '.mat'))
        assert os.path.exists(filename), \
               'Selective search data not found at: {}'.format(filename)
        # sio即scipy.io读写mat文件
        # ravel()扁平化函数
        raw_data = sio.loadmat(filename)['boxes'].ravel()
        box_list = []
        for i in xrange(raw_data.shape[0]):
            boxes = raw_data[i][:, (1, 0, 3, 2)] - 1
            # 可见ds_utils.py文件
            keep = ds_utils.unique_boxes(boxes)
            boxes = boxes[keep, :]
            keep = ds_utils.filter_small_boxes(boxes, self.config['min_size'])
            boxes = boxes[keep, :]
            box_list.append(boxes)
        return self.create_roidb_from_box_list(box_list, gt_roidb)

11._load_pascal_annotation(self, index)

根据不含后缀的图像名称(如index为000001)读取相应xml文件,获得该图像gt roi相关信息构成的字典,字典包含'boxes'(shape为(None,4),存储该图像所有gt roi坐标信息)、'gt_classes'(None,),存储该图像所有gt roi类别索引信息)、'gt_ishard'(shape为(None,),存储该图像所有gt roi是否为难例)、'gt_overlaps'(稀疏矩阵未压缩前shape为(None,21),存储该图像所有gt roi IOU值,对应gt类别位置其值为1.0,其他全0)、'flipped'(为false)、'seg_areas'(shape为(None,),存储该图像所有gt roi面积)字段,数据集全部图像的gt roi信息字典组成的列表为gt_roidb,该函数被gt_roidb(...)调用,可以看到gt_roidb与rpn_roidb在结构上是一致的,未查到相关资料 overlaps = scipy.sparse.csr_matrix(overlaps)

    # 根据不含后缀的图像名称加载图片,读取xml文件获取groundtruth roi相关信息
    def _load_pascal_annotation(self, index):
        """
        Load image and bounding boxes info from XML file in the PASCAL VOC
        format.
        """
        filename = os.path.join(self._data_path, 'Annotations', index + '.xml')
        tree = ET.parse(filename)
        objs = tree.findall('object')
        # if not self.config['use_diff']:
        #     # Exclude the samples labeled as difficult
        #     non_diff_objs = [
        #         obj for obj in objs if int(obj.find('difficult').text) == 0]
        #     # if len(non_diff_objs) != len(objs):
        #     #     print 'Removed {} difficult objects'.format(
        #     #         len(objs) - len(non_diff_objs))
        #     objs = non_diff_objs
        num_objs = len(objs)
        # 初始化boxes,建立一个shape为(num_objs, 4)的全0数组,4列表示某个object gt bbox坐标
        boxes = np.zeros((num_objs, 4), dtype=np.uint16)
        # 初始化gt_classes,建立一个shape为(num_objs)的向量,pascal voc数据集对应值为1--21中的任一个
        gt_classes = np.zeros((num_objs), dtype=np.int32)
        # 初始化overlaps,建立一个shape为(num_objs, self.num_classes)的全0数组,gt roi对应类别所在列为1,其余全0
        overlaps = np.zeros((num_objs, self.num_classes), dtype=np.float32)
        # "Seg" area for pascal is just the box area
        # 存储gt roi面积
        seg_areas = np.zeros((num_objs), dtype=np.float32)
        # 存储是否为难例(0或1,1表示hard )
        ishards = np.zeros((num_objs), dtype=np.int32)

        # Load object bounding boxes into a data frame.
        # 对该图像所有的obj循环处理,存储相应值
        for ix, obj in enumerate(objs):
            bbox = obj.find('bndbox')
            # Make pixel indexes 0-based
            # 记录gt roi位置信息,这里为何要减1
            x1 = float(bbox.find('xmin').text) - 1
            y1 = float(bbox.find('ymin').text) - 1
            x2 = float(bbox.find('xmax').text) - 1
            y2 = float(bbox.find('ymax').text) - 1

            diffc = obj.find('difficult')
            difficult = 0 if diffc == None else int(diffc.text)
            ishards[ix] = difficult
            # self._class_to_ind中存放的是{'__background__':0,'craft':1  ...}key-value 字典
            # 取出类别名对应的index
            cls = self._class_to_ind[obj.find('name').text.lower().strip()]
            boxes[ix, :] = [x1, y1, x2, y2]
            gt_classes[ix] = cls
            # 生成类似于one-hot编码[[0,0,0,0,1,0,0,0,...][0,0,0,0,1,0,0,0,...]]
            overlaps[ix, cls] = 1.0
            seg_areas[ix] = (x2 - x1 + 1) * (y2 - y1 + 1)
        # 对于那些零元素数目远远多于非零元素数目,并且非零元素的分布没有规律的矩阵称为稀疏矩阵、存储和计算更为高效
        # 将overlaps稀疏矩阵压缩!!!
        # 如(0,0) 1.0 (1,2) 1.0等???未查到相关内容
        overlaps = scipy.sparse.csr_matrix(overlaps)
        # 该图像gt roi信息构成的字典
        return {'boxes' : boxes,               # (None,4)
                'gt_classes': gt_classes,      # (None,1)
                'gt_ishard': ishards,          # (None,1)
                'gt_overlaps' : overlaps,      # 压缩前为(None,21)  压缩后的,形式见上
                'flipped' : False,             # 1
                'seg_areas' : seg_areas}       # (None,1)

xml文件示例

<annotation>
    <folder>VOC2007</folder>
    <filename>000001.jpg</filename>
    <source>
        <database>My Database</database>
        <annotation>VOC2007</annotation>
        <image>flickr</image>
        <flickrid>NULL</flickrid>
    </source>
    <owner>
        <flickrid>NULL</flickrid>
        <name>sunyifeng</name>
    </owner>
    <size>
        <width>1920</width>
        <height>1080</height>
        <depth>3</depth>
    </size>
    <segmented>0</segmented>
    <object>
        <name>craft</name>
        <pose>Unspecified</pose>
        <truncated>0</truncated>
        <difficult>0</difficult>
        <bndbox>
            <xmin>963</xmin>
            <ymin>696</ymin>
            <xmax>1038</xmax>
            <ymax>739</ymax>
        </bndbox>
    </object>
</annotation>
# -*- coding:utf-8 -*-
# Author: WUJiang
# 测试功能,未查到相关内容

import scipy.sparse.csr
import numpy as np

# pascal voc数据集,该值none行21列
overlaps = np.array([
    [1.0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
    [0, 0, 1.0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
])
"""
(0,0) 1.0
(1,2) 1.0

"""
print(scipy.sparse.csr_matrix(overlaps))
View Code

12._get_com_id(self)

返回cop_id随机数,被_get_voc_results_file_template(...)和_do_matlab_eval(...)调用

    def _get_comp_id(self):   
        # 其中self._salt = str(uuid.uuid4())
        # self._comp_id = 'comp4'  use_salt=True
        comp_id = (self._comp_id + '_' + self._salt if self.config['use_salt']  
            else self._comp_id)
        return comp_id

13._get_voc_result_file_template(self)

返回PASCAL VOC各个类别self._image_set(如test 检测)结果存储路径模板,如/TFFRCNN/data/VOCdevkit2007/results/VOC2007/Main/<comp_id>_det_test_bus.txt,被_write_voc_results_file(...)调用,针对各类保存的检测结果,将在evaluate_detections(...)中被删除

    # 返回PASCAL VOC各个类别self._image_set(如test 检测)结果存储路径模板
    def _get_voc_results_file_template(self): 
        # .../results/VOC2007/Main/<comp_id>_det_test_xxxxxxxxx.txt
        filename = self._get_comp_id() + '_det_' + self._image_set + '_{:s}.txt'
        filedir = os.path.join(self._devkit_path, 'results', 'VOC' + self._year, 'Main')
        if not os.path.exists(filedir):
            os.makedirs(filedir)
        path = os.path.join(filedir, filename)
        return path

14._write_voc_results_file(self,all_boxes)

为各类检测结果写一个txt文件,如..VOCdevkit/results/VOC2007/Main/<comp_id>_det_test_aeroplane.txt,其中每行分别为不含后缀的图像名、该图像某box的置信得分、该box的4维坐标,被evaluate_detections(...)调用

    # 为每类检测结果写一个txt文件
    # 注意传入参数all_boxes
    def _write_voc_results_file(self, all_boxes):
        for cls_ind, cls in enumerate(self.classes):
            if cls == '__background__':
                continue
            print 'Writing {} VOC results file'.format(cls)
            # 如VOCdevkit/results/VOC2007/Main/<comp_id>_det_test_aeroplane.txt
            filename = self._get_voc_results_file_template().format(cls)
            with open(filename, 'wt') as f:
                # self.image_index 不含后缀的图像名组成的列表
                for im_ind, index in enumerate(self.image_index):  
                    # 遍历每一张图像,取出对应图像某类的检测结果
                    dets = all_boxes[cls_ind][im_ind]
                    if dets == []:
                        continue
                    # the VOCdevkit expects 1-based indices  索引
                    # 逐行写入:不含后缀图像名  该图像某box置信得分  该box四维坐标+1
                    for k in xrange(dets.shape[0]):
                        f.write('{:s} {:.3f} {:.1f} {:.1f} {:.1f} {:.1f}\n'.
                                format(index, dets[k, -1],                    
                                       dets[k, 0] + 1, dets[k, 1] + 1,
                                       dets[k, 2] + 1, dets[k, 3] + 1))

15.362

 

posted @ 2019-09-06 20:54  JiangJ~  阅读(751)  评论(0编辑  收藏  举报