win10 caffe python Faster-RCNN训练自己数据集(转)
一、制作数据集
1. 关于训练的图片
不论你是网上找的图片或者你用别人的数据集,记住一点你的图片不能太小,width和height最好不要小于150。需要是jpeg的图片。
2.制作xml文件
1)LabelImg
如果你的数据集比较小的话,你可以考虑用LabelImg手工打框https://github.com/tzutalin/labelImg。关于labelimg的具体使用方法我在这就不详细说明了,大家可以去网上找一下。labelimg生成的xml直接就能给frcnn训练使用。
2)自己制作xml
如果你的数据集比较小的话,你还可以考虑用上面的方法手工打框。如果你的数据集有1w+你就可以考虑自动生成xml文件。网上有些资料基本用的是matlab坐标生成xml。我给出一段python的生成xml的代码
- <span style="font-size:14px;">
- def write_xml(bbox,w,h,iter):
- '''''
- bbox为你保存的当前图片的类别的信息和对应坐标的dict
- w,h为你当前保存图片的width和height
- iter为你图片的序号
- '''
- root=Element("annotation")
- folder=SubElement(root,"folder")#1
- folder.text="JPEGImages"
- filename=SubElement(root,"filename")#1
- filename.text=iter
- path=SubElement(root,"path")#1
- path.text='D:\\py-faster-rcnn\\data\\VOCdevkit2007\\VOC2007\\JPEGImages'+'\\'+iter+'.jpg'#把这个路径改为你的路径就行
- source=SubElement(root,"source")#1
- database=SubElement(source,"database")#2
- database.text="Unknown"
- size=SubElement(root,"size")#1
- width=SubElement(size,"width")#2
- height=SubElement(size,"height")#2
- depth=SubElement(size,"depth")#2
- width.text=str(w)
- height.text=str(h)
- depth.text='3'
- segmented=SubElement(root,"segmented")#1
- segmented.text='0'
- for i in bbox:
- object=SubElement(root,"object")#1
- name=SubElement(object,"name")#2
- name.text=i['cls']
- pose=SubElement(object,"pose")#2
- pose.text="Unspecified"
- truncated=SubElement(object,"truncated")#2
- truncated.text='0'
- difficult=SubElement(object,"difficult")#2
- difficult.text='0'
- bndbox=SubElement(object,"bndbox")#2
- xmin=SubElement(bndbox,"xmin")#3
- ymin=SubElement(bndbox,"ymin")#3
- xmax=SubElement(bndbox,"xmax")#3
- ymax=SubElement(bndbox,"ymax")#3
- xmin.text=str(i['xmin'])
- ymin.text=str(i['ymin'])
- xmax.text=str(i['xmax'])
- ymax.text=str(i['ymax'])
- xml=tostring(root,pretty_print=True)
- file=open('D:/py-faster-rcnn/data/VOCdevkit2007/VOC2007/Annotations/'+iter+'.xml','w+')#这里的路径也改为你自己的路径
- file.write(xml)</span>
def write_xml(bbox,w,h,iter): ''' bbox为你保存的当前图片的类别的信息和对应坐标的dict w,h为你当前保存图片的width和height iter为你图片的序号 ''' root=Element("annotation") folder=SubElement(root,"folder")#1 folder.text="JPEGImages" filename=SubElement(root,"filename")#1 filename.text=iter path=SubElement(root,"path")#1 path.text='D:\\py-faster-rcnn\\data\\VOCdevkit2007\\VOC2007\\JPEGImages'+'\\'+iter+'.jpg'#把这个路径改为你的路径就行 source=SubElement(root,"source")#1 database=SubElement(source,"database")#2 database.text="Unknown" size=SubElement(root,"size")#1 width=SubElement(size,"width")#2 height=SubElement(size,"height")#2 depth=SubElement(size,"depth")#2 width.text=str(w) height.text=str(h) depth.text='3' segmented=SubElement(root,"segmented")#1 segmented.text='0' for i in bbox: object=SubElement(root,"object")#1 name=SubElement(object,"name")#2 name.text=i['cls'] pose=SubElement(object,"pose")#2 pose.text="Unspecified" truncated=SubElement(object,"truncated")#2 truncated.text='0' difficult=SubElement(object,"difficult")#2 difficult.text='0' bndbox=SubElement(object,"bndbox")#2 xmin=SubElement(bndbox,"xmin")#3 ymin=SubElement(bndbox,"ymin")#3 xmax=SubElement(bndbox,"xmax")#3 ymax=SubElement(bndbox,"ymax")#3 xmin.text=str(i['xmin']) ymin.text=str(i['ymin']) xmax.text=str(i['xmax']) ymax.text=str(i['ymax']) xml=tostring(root,pretty_print=True) file=open('D:/py-faster-rcnn/data/VOCdevkit2007/VOC2007/Annotations/'+iter+'.xml','w+')#这里的路径也改为你自己的路径 file.write(xml)
3.制作训练、测试、验证集
这个网上可以参考的资料比较多,我直接copy一个小咸鱼的用matlab的代码
我建议train和trainval的部分占得比例可以更大一点
- <span style="font-size:14px;">%%
- %该代码根据已生成的xml,制作VOC2007数据集中的trainval.txt;train.txt;test.txt和val.txt
- %trainval占总数据集的50%,test占总数据集的50%;train占trainval的50%,val占trainval的50%;
- %上面所占百分比可根据自己的数据集修改,如果数据集比较少,test和val可少一些
- %%
- %注意修改下面四个值
- xmlfilepath='E:\Annotations';
- txtsavepath='E:\ImageSets\Main\';
- trainval_percent=0.5;%trainval占整个数据集的百分比,剩下部分就是test所占百分比
- train_percent=0.5;%train占trainval的百分比,剩下部分就是val所占百分比
- %%
- xmlfile=dir(xmlfilepath);
- numOfxml=length(xmlfile)-2;%减去.和.. 总的数据集大小
- trainval=sort(randperm(numOfxml,floor(numOfxml*trainval_percent)));
- test=sort(setdiff(1:numOfxml,trainval));
- trainvalsize=length(trainval);%trainval的大小
- train=sort(trainval(randperm(trainvalsize,floor(trainvalsize*train_percent))));
- val=sort(setdiff(trainval,train));
- ftrainval=fopen([txtsavepath 'trainval.txt'],'w');
- ftest=fopen([txtsavepath 'test.txt'],'w');
- ftrain=fopen([txtsavepath 'train.txt'],'w');
- fval=fopen([txtsavepath 'val.txt'],'w');
- for i=1:numOfxml
- if ismember(i,trainval)
- fprintf(ftrainval,'%s\n',xmlfile(i+2).name(1:end-4));
- if ismember(i,train)
- fprintf(ftrain,'%s\n',xmlfile(i+2).name(1:end-4));
- else
- fprintf(fval,'%s\n',xmlfile(i+2).name(1:end-4));
- end
- else
- fprintf(ftest,'%s\n',xmlfile(i+2).name(1:end-4));
- end
- end
- fclose(ftrainval);
- fclose(ftrain);
- fclose(fval);
- fclose(ftest);</span>
%% %该代码根据已生成的xml,制作VOC2007数据集中的trainval.txt;train.txt;test.txt和val.txt %trainval占总数据集的50%,test占总数据集的50%;train占trainval的50%,val占trainval的50%; %上面所占百分比可根据自己的数据集修改,如果数据集比较少,test和val可少一些 %% %注意修改下面四个值 xmlfilepath='E:\Annotations'; txtsavepath='E:\ImageSets\Main\'; trainval_percent=0.5;%trainval占整个数据集的百分比,剩下部分就是test所占百分比 train_percent=0.5;%train占trainval的百分比,剩下部分就是val所占百分比 %% xmlfile=dir(xmlfilepath); numOfxml=length(xmlfile)-2;%减去.和.. 总的数据集大小 trainval=sort(randperm(numOfxml,floor(numOfxml*trainval_percent))); test=sort(setdiff(1:numOfxml,trainval)); trainvalsize=length(trainval);%trainval的大小 train=sort(trainval(randperm(trainvalsize,floor(trainvalsize*train_percent)))); val=sort(setdiff(trainval,train)); ftrainval=fopen([txtsavepath 'trainval.txt'],'w'); ftest=fopen([txtsavepath 'test.txt'],'w'); ftrain=fopen([txtsavepath 'train.txt'],'w'); fval=fopen([txtsavepath 'val.txt'],'w'); for i=1:numOfxml if ismember(i,trainval) fprintf(ftrainval,'%s\n',xmlfile(i+2).name(1:end-4)); if ismember(i,train) fprintf(ftrain,'%s\n',xmlfile(i+2).name(1:end-4)); else fprintf(fval,'%s\n',xmlfile(i+2).name(1:end-4)); end else fprintf(ftest,'%s\n',xmlfile(i+2).name(1:end-4)); end end fclose(ftrainval); fclose(ftrain); fclose(fval); fclose(ftest);
4.文件保存路径
jpg,txt,xml分别保存到data\VOCdevkit2007\VOC2007\下的JPEGImages、ImageSets\Main、Annotations文件夹
二、根据自己的数据集修改文件
1.模型配置文件
我用end2end的方式训练,这里我用vgg_cnn_m_1024为例说明。所以我们先打开models\pascal_voc\VGG_CNN_M_1024\faster_rcnn_end2end\train.prototxt,有4处需要修改
- <span style="font-size:14px;">layer {
- name: 'input-data'
- type: 'Python'
- top: 'data'
- top: 'im_info'
- top: 'gt_boxes'
- python_param {
- module: 'roi_data_layer.layer'
- layer: 'RoIDataLayer'
- param_str: "'num_classes': 3" #这里改为你训练类别数+1
- }
- }</span>
layer { name: 'input-data' type: 'Python' top: 'data' top: 'im_info' top: 'gt_boxes' python_param { module: 'roi_data_layer.layer' layer: 'RoIDataLayer' param_str: "'num_classes': 3" #这里改为你训练类别数+1 } }
- <span style="font-size:14px;">layer {
- name: 'roi-data'
- type: 'Python'
- bottom: 'rpn_rois'
- bottom: 'gt_boxes'
- top: 'rois'
- top: 'labels'
- top: 'bbox_targets'
- top: 'bbox_inside_weights'
- top: 'bbox_outside_weights'
- python_param {
- module: 'rpn.proposal_target_layer'
- layer: 'ProposalTargetLayer'
- param_str: "'num_classes': 3" #这里改为你训练类别数+1
- }
- }</span>
layer { name: 'roi-data' type: 'Python' bottom: 'rpn_rois' bottom: 'gt_boxes' top: 'rois' top: 'labels' top: 'bbox_targets' top: 'bbox_inside_weights' top: 'bbox_outside_weights' python_param { module: 'rpn.proposal_target_layer' layer: 'ProposalTargetLayer' param_str: "'num_classes': 3" #这里改为你训练类别数+1 } }
- <span style="font-size:14px;">layer {
- name: "cls_score"
- type: "InnerProduct"
- bottom: "fc7"
- top: "cls_score"
- param {
- lr_mult: 1
- }
- param {
- lr_mult: 2
- }
- inner_product_param {
- num_output: 3 #这里改为你训练类别数+1
- weight_filler {
- type: "gaussian"
- std: 0.01
- }
- bias_filler {
- type: "constant"
- value: 0
- }
- }
- }
- layer {
- name: "bbox_pred"
- type: "InnerProduct"
- bottom: "fc7"
- top: "bbox_pred"
- param {
- lr_mult: 1
- }
- param {
- lr_mult: 2
- }
- inner_product_param {
- num_output: 12 #这里改为你的(类别数+1)*4
- weight_filler {
- type: "gaussian"
- std: 0.001
- }
- bias_filler {
- type: "constant"
- value: 0
- }
- }
- }</span>
layer { name: "cls_score" type: "InnerProduct" bottom: "fc7" top: "cls_score" param { lr_mult: 1 } param { lr_mult: 2 } inner_product_param { num_output: 3 #这里改为你训练类别数+1 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 0 } } } layer { name: "bbox_pred" type: "InnerProduct" bottom: "fc7" top: "bbox_pred" param { lr_mult: 1 } param { lr_mult: 2 } inner_product_param { num_output: 12 #这里改为你的(类别数+1)*4 weight_filler { type: "gaussian" std: 0.001 } bias_filler { type: "constant" value: 0 } } }然后我们修改models\pascal_voc\VGG_CNN_M_1024\faster_rcnn_end2end\test.prototxt。
- <span style="font-size:14px;">layer {
- name: "relu7"
- type: "ReLU"
- bottom: "fc7"
- top: "fc7"
- }
- layer {
- name: "cls_score"
- type: "InnerProduct"
- bottom: "fc7"
- top: "cls_score"
- param {
- lr_mult: 1
- decay_mult: 1
- }
- param {
- lr_mult: 2
- decay_mult: 0
- }
- inner_product_param {
- num_output: 3 </span><span style="font-size:14px;"> #这里改为你训练类别数+1</span><span style="font-size:14px;">
- </span><span style="font-size:14px;"></span>
layer { name: "relu7" type: "ReLU" bottom: "fc7" top: "fc7" } layer { name: "cls_score" type: "InnerProduct" bottom: "fc7" top: "cls_score" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 } inner_product_param { num_output: 3 #这里改为你训练类别数+1
- <span style="font-size:14px;"> weight_filler {
- type: "gaussian"
- std: 0.01
- }
- bias_filler {
- type: "constant"
- value: 0
- }
- }
- }
- layer {
- name: "bbox_pred"
- type: "InnerProduct"
- bottom: "fc7"
- top: "bbox_pred"
- param {
- lr_mult: 1
- decay_mult: 1
- }
- param {
- lr_mult: 2
- decay_mult: 0
- }
- inner_product_param {
- num_output: 12 </span><span style="font-size:14px;"> #这里改为你的(类别数+1)*4</span><span style="font-size:14px;">
- </span>
weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 0 } } } layer { name: "bbox_pred" type: "InnerProduct" bottom: "fc7" top: "bbox_pred" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 } inner_product_param { num_output: 12 #这里改为你的(类别数+1)*4
- <span style="font-size:14px;"> weight_filler {
- type: "gaussian"
- std: 0.001
- }
- bias_filler {
- type: "constant"
- value: 0
- }
- }
- }</span>
weight_filler { type: "gaussian" std: 0.001 } bias_filler { type: "constant" value: 0 } } }
另外在 solver里可以调训练的学习率等参数,在这篇文章里不做说明
==================以下修改lib中的文件==================
2.修改imdb.py
- <span style="font-size:14px;"> def append_flipped_images(self):
- num_images = self.num_images
- widths = [PIL.Image.open(self.image_path_at(i)).size[0]
- for i in xrange(num_images)]
- for i in xrange(num_images):
- boxes = self.roidb[i]['boxes'].copy()
- oldx1 = boxes[:, 0].copy()
- oldx2 = boxes[:, 2].copy()
- boxes[:, 0] = widths[i] - oldx2 - 1
- boxes[:, 2] = widths[i] - oldx1 - 1
- for b in range(len(boxes)):
- if boxes[b][2]< boxes[b][0]:
- boxes[b][0] = 0
- assert (boxes[:, 2] >= boxes[:, 0]).all()
- entry = {'boxes' : boxes,
- 'gt_overlaps' : self.roidb[i]['gt_overlaps'],
- 'gt_classes' : self.roidb[i]['gt_classes'],
- 'flipped' : True}
- self.roidb.append(entry)
- self._image_index = self._image_index * 2 </span>
def append_flipped_images(self): num_images = self.num_images widths = [PIL.Image.open(self.image_path_at(i)).size[0] for i in xrange(num_images)] for i in xrange(num_images): boxes = self.roidb[i]['boxes'].copy() oldx1 = boxes[:, 0].copy() oldx2 = boxes[:, 2].copy() boxes[:, 0] = widths[i] - oldx2 - 1 boxes[:, 2] = widths[i] - oldx1 - 1 for b in range(len(boxes)): if boxes[b][2]< boxes[b][0]: boxes[b][0] = 0 assert (boxes[:, 2] >= boxes[:, 0]).all() entry = {'boxes' : boxes, 'gt_overlaps' : self.roidb[i]['gt_overlaps'], 'gt_classes' : self.roidb[i]['gt_classes'], 'flipped' : True} self.roidb.append(entry) self._image_index = self._image_index * 2找到这个函数,并修改为如上
3、修改rpn层的5个文件
在如下目录下,将文件中param_str_全部改为param_str
4、修改config.py
将训练和测试的proposals改为gt
- <span style="font-size:14px;"># Train using these proposals
- __C.TRAIN.PROPOSAL_METHOD = 'gt'
- # Test using these proposals
- __C.TEST.PROPOSAL_METHOD = 'gt</span>
# Train using these proposals __C.TRAIN.PROPOSAL_METHOD = 'gt' # Test using these proposals __C.TEST.PROPOSAL_METHOD = 'gt
5、修改pascal_voc.py
因为我们使用VOC来训练,所以这个是我们主要修改的训练的文件。
- <span style="font-size:14px;"> def __init__(self, image_set, year, devkit_path=None):
- imdb.__init__(self, 'voc_' + year + '_' + image_set)
- self._year = year
- self._image_set = image_set
- self._devkit_path = self._get_default_path() if devkit_path is None \
- else devkit_path
- self._data_path = os.path.join(self._devkit_path, 'VOC' + self._year)
- self._classes = ('__background__', # always index 0
- 'cn-character','seal')
- self._class_to_ind = dict(zip(self.classes, xrange(self.num_classes)))
- self._image_ext = '.jpg'
- self._image_index = self._load_image_set_index()
- # Default to roidb handler
- self._roidb_handler = self.selective_search_roidb
- self._salt = str(uuid.uuid4())
- self._comp_id = 'comp4'</span>
def __init__(self, image_set, year, devkit_path=None): imdb.__init__(self, 'voc_' + year + '_' + image_set) self._year = year self._image_set = image_set self._devkit_path = self._get_default_path() if devkit_path is None \ else devkit_path self._data_path = os.path.join(self._devkit_path, 'VOC' + self._year) self._classes = ('__background__', # always index 0 'cn-character','seal') self._class_to_ind = dict(zip(self.classes, xrange(self.num_classes))) self._image_ext = '.jpg' self._image_index = self._load_image_set_index() # Default to roidb handler self._roidb_handler = self.selective_search_roidb self._salt = str(uuid.uuid4()) self._comp_id = 'comp4'
在self.classes这里,'__background__'使我们的背景类,不要动他。下面的改为你自己标签的内容。
修改以下2段内容。否则你的test部分一定会出问题。
- def _get_voc_results_file_template(self):
- # VOCdevkit/results/VOC2007/Main/<comp_id>_det_test_aeroplane.txt
- filename = self._get_comp_id() + '_det_' + self._image_set + '_{:s}.txt'
- path = os.path.join(
- self._devkit_path,
- 'VOC' + self._year,
- ImageSets,
- 'Main',
- '{}' + '_test.txt')
- return path
def _get_voc_results_file_template(self): # VOCdevkit/results/VOC2007/Main/<comp_id>_det_test_aeroplane.txt filename = self._get_comp_id() + '_det_' + self._image_set + '_{:s}.txt' path = os.path.join( self._devkit_path, 'VOC' + self._year, ImageSets, 'Main', '{}' + '_test.txt') return path
- def _write_voc_results_file(self, all_boxes):
- for cls_ind, cls in enumerate(self.classes):
- if cls == '__background__':
- continue
- print 'Writing {} VOC results file'.format(cls)
- filename = self._get_voc_results_file_template().format(cls)
- with open(filename, 'w+') as f:
- for im_ind, index in enumerate(self.image_index):
- dets = all_boxes[cls_ind][im_ind]
- if dets == []:
- continue
- # the VOCdevkit expects 1-based indices
- for k in xrange(dets.shape[0]):
- f.write('{:s} {:.3f} {:.1f} {:.1f} {:.1f} {:.1f}\n'.
- format(index, dets[k, -1],
- dets[k, 0] + 1, dets[k, 1] + 1,
- dets[k, 2] + 1, dets[k, 3] + 1))
def _write_voc_results_file(self, all_boxes): for cls_ind, cls in enumerate(self.classes): if cls == '__background__': continue print 'Writing {} VOC results file'.format(cls) filename = self._get_voc_results_file_template().format(cls) with open(filename, 'w+') as f: for im_ind, index in enumerate(self.image_index): dets = all_boxes[cls_ind][im_ind] if dets == []: continue # the VOCdevkit expects 1-based indices for k in xrange(dets.shape[0]): f.write('{:s} {:.3f} {:.1f} {:.1f} {:.1f} {:.1f}\n'. format(index, dets[k, -1], dets[k, 0] + 1, dets[k, 1] + 1, dets[k, 2] + 1, dets[k, 3] + 1))
三、end2end训练
1、删除缓存文件
每次训练前将data\cache 和 data\VOCdevkit2007\annotations_cache中的文件删除。
2、开始训练
在py-faster-rcnn的根目录下打开git bash输入
- <span style="font-size:18px;">./experiments/scripts/faster_rcnn_end2end.sh 0 VGG_CNN_M_1024 pascal_voc</span>
./experiments/scripts/faster_rcnn_end2end.sh 0 VGG_CNN_M_1024 pascal_voc
当然你可以去experiments\scripts\faster_rcnn_end2end.sh中调自己的训练的一些参数,也可以中VGG16、ZF模型去训练。我这里就用默认给的参数说明。
出现了这种东西的话,那就是训练成功了。用vgg1024的话还是很快的,还是要看你的配置,我用1080ti的话也就85min左右。我就没有让他训练结束了。
四、测试
训练完成之后,将output中的最终模型拷贝到data/faster_rcnn_models,修改tools下的demo.py,我是使用VGG_CNN_M_1024这个中型网络,不是默认的ZF,所以要改的地方挺多
1. 修改class
1
|
CLASSES = ('__background__',
|
2. 增加你自己训练的模型
1
|
NETS = {'vgg16': ('VGG16',
|
3. 修改prototxt,如果你用的是ZF,就不用改了
1
|
prototxt = os.path.join(cfg.MODELS_DIR, NETS[args.demo_net][0],
|
if __name__ == '__main__': cfg.TEST.HAS_RPN = True # Use RPN for proposals args = parse_args() prototxt = os.path.join(cfg.MODELS_DIR, NETS[args.demo_net][0], 'faster_rcnn_end2end', 'test.prototxt') caffemodel = os.path.join(cfg.DATA_DIR, 'faster_rcnn_models', NETS[args.demo_net][1]) if not os.path.isfile(caffemodel): raise IOError(('{:s} not found.\nDid you run ./data/script/' 'fetch_faster_rcnn_models.sh?').format(caffemodel)) if args.cpu_mode: caffe.set_mode_cpu() else: caffe.set_mode_gpu() caffe.set_device(args.gpu_id) cfg.GPU_ID = args.gpu_id net = caffe.Net(prototxt, caffemodel, caffe.TEST) print '\n\nLoaded network {:s}'.format(caffemodel) # Warmup on a dummy image im = 128 * np.ones((300, 500, 3), dtype=np.uint8) for i in xrange(2): _, _= im_detect(net, im) im_names = ['f1.jpg','f8.jpg','f7.jpg','f6.jpg','f5.jpg','f4.jpg','f3.jpg','f2.jpg',] for im_name in im_names: print '~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~' print 'Demo for data/demo/{}'.format(im_name) demo(net, im_name) plt.show()
在这个部分,将你要测试的图片写在im_names里,并把图片放在data\demo这个文件夹下。
4. 开始检测
执行 ./tools/demo.py –net myvgg1024
假如不想那么麻烦输入参数,可以在demo的parse_args()里修改默认参数
parser.add_argument(‘–net’, dest=’demo_net’, help=’Network to use [myvgg1024]’,
choices=NETS.keys(), default=’myvgg1024’)
这样只需要输入 ./tools/demo.py 就可以了
本文来自博客园,作者:NLazyo,转载请注明原文链接:https://www.cnblogs.com/bile/p/9110954.html