欢迎来到我的的博客园,祝大家学有所成,早点实现自己的人生理想。

Python+H5py实现将SVHN样本库转换为FasterRcnn训练样本

一、上代码

import os
import h5py

svhnPath = 'D:\\Project\\AIProject\\SVHNClassifier\\data'


def loadSvhn(path, subdir):
    print('process folder : %s' % subdir)
    filenames = []
    dir = os.path.join(svhnPath, subdir)
    for filename in os.listdir(dir):
        filenameParts = os.path.splitext(filename)
        if filenameParts[1] != '.png':
            continue
        filenames.append(filenameParts)
    svhnMat = h5py.File(name=os.path.join(dir, 'digitStruct.mat'), mode='r')
    datasets = []
    filecounts = len(filenames)
    for idx, file in enumerate(filenames):
        boxes = {}
        filenameNum = file[0]
        item = svhnMat['digitStruct']['bbox'][int(filenameNum) - 1].item()
        for key in ['label', 'left', 'top', 'width', 'height']:
            attr = svhnMat[item][key]
            values = [svhnMat[attr.value[i].item()].value[0][0]
                      for i in range(len(attr))] if len(attr) > 1 else [attr.value[0][0]]
            boxes[key] = values
        datasets.append({'dir': dir, 'file': file, 'boxes': boxes})
        if idx % 10 == 0: print('-- loading %d / %d' % (idx, filecounts))
    return datasets


if __name__ == '__main__':
    for sub_dir in ['extra','train']:
        data_sets = loadSvhn(svhnPath, sub_dir)
        # data_sets = [{'dir': './', 'file': ('01', '.png'),
        #              'boxes': {'label': ['0'], 'left': [12], 'top': [10], 'width': [20], 'height': [30]}}]
        print('processing locations to txt file ...')
        for ds in data_sets:
            txt_file = os.path.join(ds['dir'], ds['file'][0] + '.txt')
            boxes = ds['boxes']
            labels = boxes['label']
            lines = []
            with open(txt_file, mode='w', encoding='utf-8') as fs:
                for i in range(len(labels)):
                    label = boxes['label'][i]
                    left = boxes['left'][i]
                    top = boxes['top'][i]
                    width = boxes['width'][i]
                    height = boxes['height'][i]
                    lines.append('%s,%s,%s,%s,%s' % (int(label), left, top, width, height))
                fs.write('\n'.join(lines))
        print('done.')

二、效果

 

posted @ 2017-12-19 11:40  宋兴柱  阅读(1228)  评论(0编辑  收藏  举报