【python / mxnet / gluoncv / jupyter notebook】变换场景的同一行人多重识别
程序环境为高性能集群:
CPU:Intel Xeon Gold 6140 Processor * 2(共36核心)
内存:512GB RAM
GPU:Tesla P100-PCIE-16GB * 2
数据集和源代码可以在此处获得
tutorials:https://github.com/wnm1503303791/pycode/tree/master/gluoncv/re-id/baseline
In [ ]:
#market1501.py
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from __future__ import print_function, division
import json, os
from os import path as osp
from zipfile import ZipFile
from gluoncv.utils import download
def extract(fpath, exdir):
print("Extracting zip file")
with ZipFile(fpath) as z:
z.extractall(path=exdir)
print("Extracting Done")
def make_list(exdir):
train_dir = osp.join(exdir, "bounding_box_train")
train_list = {}
for _, _, files in os.walk(train_dir, topdown=False):
for name in files:
if '.jpg' in name:
name_split = name.split('_')
pid = name_split[0]
pcam = name_split[1][1]
if pid not in train_list:
train_list[pid] = []
train_list[pid].append({"name":name, "pid":pid, "pcam":pcam})
with open(osp.join(exdir, 'train.txt'), 'w') as f:
for i, key in enumerate(train_list):
for item in train_list[key]:
f.write(item['name']+" "+str(i)+" "+item["pcam"]+"\n")
print("Make Label List Done")
def main():
name = "Market-1501-v15.09.15"
url = "http://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/gluon/dataset/"+name+".zip"
root = osp.expanduser("~/.mxnet/datasets")
if not os.path.exists(root):
os.mkdir(root)
fpath = osp.join(root, name+'.zip')
exdir = osp.join(root, name)
if os.path.exists(fpath):
if not osp.isdir(exdir):
extract(fpath, root)
make_list(exdir)
else:
download(url, fpath, False)
extract(fpath, root)
make_list(exdir)
if __name__ == '__main__':
main()
In [5]:
! python market1501.py
In [ ]:
#train.py
from __future__ import division
import argparse, datetime, os
import logging
logging.basicConfig(level=logging.INFO)
import mxnet as mx
from mxnet import gluon, nd
from mxnet.gluon.model_zoo import vision as models
from mxnet.gluon.data.vision import transforms
from mxnet import autograd
from networks import resnet18, resnet34, resnet50
from gluoncv.data.market1501.data_read import ImageTxtDataset
from gluoncv.data.market1501.label_read import LabelList
from gluoncv.data.transforms.block import RandomCrop
# CLI
parser = argparse.ArgumentParser(description='Train a model for image classification.')
parser.add_argument('--img-height', type=int, default=384,
help='the height of image for input')
parser.add_argument('--img-width', type=int, default=128,
help='the width of image for input')
parser.add_argument('--batch-size', type=int, default=32,
help='training batch size per device (CPU/GPU).')
parser.add_argument('--num-workers', type=int, default=8,
help='the number of workers for data loader')
parser.add_argument('--dataset-root', type=str, default="~/.mxnet/datasets",
help='the number of workers for data loader')
parser.add_argument('--dataset', type=str, default="market1501",
help='the number of workers for data loader')
parser.add_argument('--num-gpus', type=int, default=1,
help='number of gpus to use.')
parser.add_argument('--warmup', type=bool, default=True,
help='number of training epochs.')
parser.add_argument('--epochs', type=str, default="5,25,50,75")
parser.add_argument('--ratio', type=float, default=1.,
help="ratio of training set to all set")
parser.add_argument('--pad', type=int, default=10)
parser.add_argument('--lr', type=float, default=3.5e-4,
help='learning rate. default is 0.1.')
parser.add_argument('-momentum', type=float, default=0.9,
help='momentum value for optimizer, default is 0.9.')
parser.add_argument('--wd', type=float, default=5e-4,
help='weight decay rate. default is 5e-4.')
parser.add_argument('--seed', type=int, default=613,
help='random seed to use. Default=613.')
parser.add_argument('--lr-decay', type=int, default=0.1)
parser.add_argument('--hybridize', type=bool, default=True)
def get_data_iters(batch_size):
train_set, val_set = LabelList(ratio=opt.ratio, root=opt.dataset_root, name=opt.dataset)
normalizer = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
transform_train = transforms.Compose([
transforms.Resize(size=(opt.img_width, opt.img_height), interpolation=1),
transforms.RandomFlipLeftRight(),
RandomCrop(size=(opt.img_width, opt.img_height), pad=opt.pad),
transforms.ToTensor(),
normalizer])
train_imgs = ImageTxtDataset(train_set, transform=transform_train)
train_data = gluon.data.DataLoader(train_imgs, batch_size, shuffle=True, last_batch='discard', num_workers=opt.num_workers)
if opt.ratio < 1:
transform_test = transforms.Compose([
transforms.Resize(size=(opt.img_width, opt.img_height), interpolation=1),
transforms.ToTensor(),
normalizer])
val_imgs = ImageTxtDataset(val_set, transform=transform_test)
val_data = gluon.data.DataLoader(val_imgs, batch_size, shuffle=True, last_batch='discard', num_workers=opt.num_workers)
else:
val_data = None
return train_data, val_data
def validate(val_data, net, criterion, ctx):
loss = 0.0
for data, label in val_data:
data_list = gluon.utils.split_and_load(data, ctx)
label_list = gluon.utils.split_and_load(label, ctx)
with autograd.predict_mode():
outpus = [net(X) for X in data_list]
losses = [criterion(X, y) for X, y in zip(outpus, label_list)]
accuracy = [(X.argmax(axis=1)==y.astype('float32')).mean.asscalar() for X, y in zip(outpus, label_list)]
loss_list = [l.mean().asscalar() for l in losses]
loss += sum(loss_list) / len(loss_list)
return loss/len(val_data), sum(accuracy)/len(accuracy)
def main(net, batch_size, epochs, opt, ctx):
train_data, val_data = get_data_iters(batch_size)
if opt.hybridize:
net.hybridize()
trainer = gluon.Trainer(net.collect_params(), 'adam', {'learning_rate': opt.lr, 'wd': opt.wd})
criterion = gluon.loss.SoftmaxCrossEntropyLoss()
lr = opt.lr
if opt.warmup:
minlr = lr*0.01
dlr = (lr-minlr)/(epochs[0]-1)
prev_time = datetime.datetime.now()
for epoch in range(epochs[-1]):
_loss = 0.
if opt.warmup:
if epoch<epochs[0]:
lr = minlr + dlr*epoch
if epoch in epochs[1:]:
lr = lr * opt.lr_decay
trainer.set_learning_rate(lr)
for data, label in train_data:
data_list = gluon.utils.split_and_load(data, ctx)
label_list = gluon.utils.split_and_load(label, ctx)
with autograd.record():
output = [net(X) for X in data_list]
losses = [criterion(X, y) for X, y in zip(output, label_list)]
for l in losses:
l.backward()
trainer.step(batch_size)
_loss_list = [l.mean().asscalar() for l in losses]
_loss += sum(_loss_list) / len(_loss_list)
cur_time = datetime.datetime.now()
h, remainder = divmod((cur_time - prev_time).seconds, 3600)
m, s = divmod(remainder, 60)
time_str = "Time %02d:%02d:%02d" % (h, m, s)
__loss = _loss/len(train_data)
if val_data is not None:
val_loss, val_accuracy = validate(val_data, net, criterion, ctx)
epoch_str = ("Epoch %d. Train loss: %f, Val loss %f, Val accuracy %f, " % (epoch, __loss , val_loss, val_accuracy))
else:
epoch_str = ("Epoch %d. Train loss: %f, " % (epoch, __loss))
prev_time = cur_time
print(epoch_str + time_str + ', lr ' + str(trainer.learning_rate))
if not os.path.exists("params"):
os.mkdir("params")
net.save_parameters("params/resnet50.params")
if __name__ == '__main__':
opt = parser.parse_args()
logging.info(opt)
mx.random.seed(opt.seed)
batch_size = opt.batch_size
num_gpus = opt.num_gpus
epochs = [int(i) for i in opt.epochs.split(',')]
batch_size *= max(1, num_gpus)
context = [mx.gpu(i) for i in range(num_gpus)]
net = resnet50(ctx=context, num_classes=751)
main(net, batch_size, epochs, opt, context)
In [7]:
!pwd
In [8]:
!nvidia-smi -L
In [9]:
!nvidia-smi
In [10]:
!CUDA_VISIBLE_DEVICES=1 python baseline/train.py
其实我早就训练好了...
所以就省略gpu跑训练代码的输出过程
下面直接上测试代码吧
In [ ]:
#test.py
# -*- coding: utf-8 -*-
from __future__ import print_function, division
import mxnet as mx
import numpy as np
from mxnet import gluon, nd
from mxnet.gluon import nn
from mxnet.gluon.data.vision import transforms
from networks import resnet18, resnet34, resnet50
from gluoncv.data.market1501.data_read import ImageTxtDataset
import time, os, sys
import scipy.io as sio
from os import path as osp
def get_data(batch_size, test_set, query_set):
normalizer = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
transform_test = transforms.Compose([
transforms.Resize(size=(128, 384), interpolation=1),
transforms.ToTensor(),
normalizer])
test_imgs = ImageTxtDataset(test_set, transform=transform_test)
query_imgs = ImageTxtDataset(query_set, transform=transform_test)
test_data = gluon.data.DataLoader(test_imgs, batch_size, shuffle=False, last_batch='keep', num_workers=8)
query_data = gluon.data.DataLoader(query_imgs, batch_size, shuffle=False, last_batch='keep', num_workers=8)
return test_data, query_data
def load_network(network, ctx):
network.load_parameters('params/resnet50.params', ctx=ctx, allow_missing=True, ignore_extra=True)
return network
def fliplr(img):
'''flip horizontal'''
img_flip = nd.flip(img, axis=3)
return img_flip
def extract_feature(model, dataloaders, ctx):
count = 0
features = []
for img, _ in dataloaders:
n = img.shape[0]
count += n
print(count)
ff = np.zeros((n, 2048))
for i in range(2):
if(i==1):
img = fliplr(img)
f = model(img.as_in_context(ctx)).as_in_context(mx.cpu()).asnumpy()
ff = ff+f
features.append(ff)
features = np.concatenate(features)
return features/np.linalg.norm(features, axis=1, keepdims=True)
def get_id(img_path):
cameras = []
labels = []
for path in img_path:
cameras.append(int(path[0].split('/')[-1].split('_')[1][1]))
labels.append(path[1])
return np.array(cameras), np.array(labels)
def compute_mAP(index, good_index, junk_index):
ap = 0
cmc = np.zeros(len(index))
if good_index.size==0: # if empty
cmc[0] = -1
return ap,cmc
# remove junk_index
mask = np.in1d(index, junk_index, invert=True)
index = index[mask]
# find good_index index
ngood = len(good_index)
mask = np.in1d(index, good_index)
rows_good = np.argwhere(mask==True)
rows_good = rows_good.flatten()
cmc[rows_good[0]:] = 1
for i in range(ngood):
d_recall = 1.0/ngood
precision = (i+1)*1.0/(rows_good[i]+1)
if rows_good[i]!=0:
old_precision = i*1.0/rows_good[i]
else:
old_precision=1.0
ap = ap + d_recall*(old_precision + precision)/2
return ap, cmc
if __name__ == '__main__':
batch_size = 256
data_dir = osp.expanduser("~/.mxnet/datasets/Market-1501-v15.09.15/")
gpu_ids = [0]
# set gpu ids
if len(gpu_ids)>0:
context = mx.gpu()
test_set = [(osp.join(data_dir,'bounding_box_test',line), int(line.split('_')[0])) for line in os.listdir(data_dir+'bounding_box_test') if "jpg" in line and "-1" not in line]
query_set = [(osp.join(data_dir,'query',line), int(line.split('_')[0])) for line in os.listdir(data_dir+'query') if "jpg" in line]
test_cam, test_label = get_id(test_set)
query_cam, query_label = get_id(query_set)
######################################################################
# Load Collected data Trained model
model_structure = resnet50(ctx=context, pretrained=False)
model = load_network(model_structure, context)
# Extract feature
test_loader, query_loader = get_data(batch_size, test_set, query_set)
print('start test')
test_feature = extract_feature(model, test_loader, context)
print('start query')
query_feature = extract_feature(model, query_loader, context)
query_feature = nd.array(query_feature).as_in_context(mx.gpu(0))
test_feature = nd.array(test_feature).as_in_context(mx.gpu(0))
num = query_label.size
dist_all = nd.linalg.gemm2(query_feature, test_feature, transpose_b=True)
CMC = np.zeros(test_label.size)
ap = 0.0
for i in range(num):
cam = query_cam[i]
label = query_label[i]
index = dist_all[i].argsort(is_ascend=False).as_in_context(mx.cpu()).asnumpy().astype("int32")
query_index = np.argwhere(test_label==label)
camera_index = np.argwhere(test_cam==cam)
good_index = np.setdiff1d(query_index, camera_index, assume_unique=True)
junk_index = np.intersect1d(query_index, camera_index)
ap_tmp, CMC_tmp = compute_mAP(index, good_index, junk_index)
CMC = CMC + CMC_tmp
ap += ap_tmp
CMC = CMC/num #average CMC
print('top1:%f top5:%f top10:%f mAP:%f'%(CMC[0],CMC[4],CMC[9],ap/num))
In [15]:
!CUDA_VISIBLE_DEVICES=1 python baseline/test.py
In [ ]:
tz@croplab,HZAU