MSAFF复现总结
MSAFF代码对于Gait3D的复现过程总体还是很顺利的,需要注意的几点如下:
- 使用Gait3D-smpls-pkl与Gait3D-sils-64-44-pkl无法复现,处理后的数据集有问题。需要使用自己预处理的数据集进行处理。在预处理过程中,出现了3493因不存在而报错,这可能是测试后的结果偏低的原因
- 测试结束后控制台没有看到测试结果。日志文件存储在"/home/deng1/gao/MSAFF/output/Gait3D-Multimodal/MsaffGait6L/MsaffGait6L/summary/events.out.tfevents.1728994200.tyut-PowerEdge-R750.247887.0",该日志文件需要启动
TensorBoard
打开浏览器查看,具体步骤:
pip install tensorboard #安装 TensorBoard
tensorboard --logdir=/home/deng1/gao/MSAFF/output/Gait3D-Multimodal/MsaffGait6L/MsaffGait6L/summary/ # 启动 TensorBoard
在浏览器中打开http://localhost:6006/
查看
假设你正在远程服务器上运行 TensorBoard
,执行以下命令来创建 SSH 隧道:
ssh -L 6006:localhost:6006 username@remote_server_ip
3. 上述在本地查看日志我没有成功实现,仍然不知道失败的原因,于是我选择直接将输出结果打印在控制面版:
lib/main.py
def run_model(cfgs, training):
msg_mgr = get_msg_mgr()
model_cfg = cfgs['model_cfg']
# Log model configuration
msg_mgr.log_info(model_cfg)
Model = getattr(models, model_cfg['model'])
model = Model(cfgs, training)
if training:
result = Model.run_train(model)
else:
result = Model.run_test(model)
print(f"Result from run_test: {result}") # 添加调试输出
if 'Rank-1' in result:
with open(log_file_path, 'a') as log_file:
log_file.write("Rank-1: {:.2f}%\n".format(result['Rank-1']))
log_file.write("Rank-5: {:.2f}%\n".format(result['Rank-5']))
log_file.write("Rank-10: {:.2f}%\n".format(result['Rank-10']))
log_file.write("mAP: {:.2f}%\n".format(result['mAP']))
log_file.write("mINP: {:.2f}%\n".format(result['mINP']))
print("Rank-1: {:.2f}%".format(result['Rank-1']))
print("Rank-5: {:.2f}%".format(result['Rank-5']))
print("Rank-10: {:.2f}%".format(result['Rank-10']))
print("mAP: {:.2f}%".format(result['mAP']))
print("mINP: {:.2f}%".format(result['mINP']))
else:
print("Result does not contain 'Rank-1'. Check the evaluation function.")
lib/utils/evaluation.py
def evaluation_Gait3D(data, conf, probe_num, metric='euc'):
msg_mgr = get_msg_mgr()
features, labels, cams, time_seqs = data['embeddings'], data['labels'], data['types'], data['views']
probe_features = features[:probe_num]
gallery_features = features[probe_num:]
probe_lbls = np.asarray(labels[:probe_num])
gallery_lbls = np.asarray(labels[probe_num:])
results = OrderedDict()
results['iter'] = conf["evaluator_cfg"]["restore_hint"]
msg_mgr.log_info(f"The test metric you choose is {metric}.")
dist = cuda_dist(probe_features, gallery_features, metric).cpu().numpy()
cmc, all_AP, all_INP = evaluate_rank(dist, probe_lbls, gallery_lbls)
mAP = np.mean(all_AP)
mINP = np.mean(all_INP)
for r in [1, 5, 10]:
results['Rank-{}'.format(r)] = cmc[r - 1] * 100
results['mAP'] = mAP * 100
results['mINP'] = mINP * 100
return results
主要修改还是MSAFF/lib/modeling/base_model.py中的test_run函数
@staticmethod
def run_test(model):
"""Accept the instance object(model) here, and then run the test loop."""
rank = torch.distributed.get_rank()
with torch.no_grad():
info_dict = model.inference(rank)
if rank == 0:
loader = model.test_loader
label_list = loader.dataset.label_list
types_list = loader.dataset.types_list
views_list = loader.dataset.views_list
info_dict.update({
'labels': label_list,
'types': types_list,
'views': views_list
})
if 'eval_func' in model.cfgs["evaluator_cfg"].keys():
eval_func = model.cfgs['evaluator_cfg']["eval_func"]
else:
eval_func = 'evaluation_Gait3D' # Default evaluation function
eval_func = getattr(eval_functions, eval_func)
valid_args = get_valid_args(
eval_func, model.cfgs["evaluator_cfg"], ['metric'])
dataset_name = model.cfgs['data_cfg'].get('test_dataset_name', model.cfgs['data_cfg']['dataset_name'])
# Call the evaluation function and return the results
result = eval_func(info_dict, model.cfgs, model.probe_seqs_num, **valid_args)
return result
运行结果展示:
`nohup bash -c "CUDA_VISIBLE_DEVICES=0,1 torchrun --nproc_per_node=2 lib/main.py --cfgs ./config/MsaffGait_CasiaB.yaml --phase train --iter 49600" > output.log 2>&1 &
`###