MSAFF复现总结

MSAFF代码对于Gait3D的复现过程总体还是很顺利的,需要注意的几点如下:

  1. 使用Gait3D-smpls-pkl与Gait3D-sils-64-44-pkl无法复现,处理后的数据集有问题。需要使用自己预处理的数据集进行处理。在预处理过程中,出现了3493因不存在而报错,这可能是测试后的结果偏低的原因
  2. 测试结束后控制台没有看到测试结果。日志文件存储在"/home/deng1/gao/MSAFF/output/Gait3D-Multimodal/MsaffGait6L/MsaffGait6L/summary/events.out.tfevents.1728994200.tyut-PowerEdge-R750.247887.0",该日志文件需要启动 TensorBoard打开浏览器查看,具体步骤:
pip install tensorboard  #安装 TensorBoard
tensorboard --logdir=/home/deng1/gao/MSAFF/output/Gait3D-Multimodal/MsaffGait6L/MsaffGait6L/summary/  # 启动 TensorBoard

在浏览器中打开http://localhost:6006/查看
假设你正在远程服务器上运行 TensorBoard,执行以下命令来创建 SSH 隧道:
ssh -L 6006:localhost:6006 username@remote_server_ip
3. 上述在本地查看日志我没有成功实现,仍然不知道失败的原因,于是我选择直接将输出结果打印在控制面版:

lib/main.py

def run_model(cfgs, training):
    msg_mgr = get_msg_mgr()
    model_cfg = cfgs['model_cfg']
    
    # Log model configuration
    msg_mgr.log_info(model_cfg)
    Model = getattr(models, model_cfg['model'])
    model = Model(cfgs, training)
    
    if training:
        result = Model.run_train(model)
    else:
        result = Model.run_test(model)
        print(f"Result from run_test: {result}")  # 添加调试输出

        if 'Rank-1' in result:
            with open(log_file_path, 'a') as log_file:
                log_file.write("Rank-1: {:.2f}%\n".format(result['Rank-1']))
                log_file.write("Rank-5: {:.2f}%\n".format(result['Rank-5']))
                log_file.write("Rank-10: {:.2f}%\n".format(result['Rank-10']))
                log_file.write("mAP: {:.2f}%\n".format(result['mAP']))
                log_file.write("mINP: {:.2f}%\n".format(result['mINP']))

            print("Rank-1: {:.2f}%".format(result['Rank-1']))
            print("Rank-5: {:.2f}%".format(result['Rank-5']))
            print("Rank-10: {:.2f}%".format(result['Rank-10']))
            print("mAP: {:.2f}%".format(result['mAP']))
            print("mINP: {:.2f}%".format(result['mINP']))
        else:
            print("Result does not contain 'Rank-1'. Check the evaluation function.")

lib/utils/evaluation.py

def evaluation_Gait3D(data, conf, probe_num, metric='euc'):
    msg_mgr = get_msg_mgr()

    features, labels, cams, time_seqs = data['embeddings'], data['labels'], data['types'], data['views']

    probe_features = features[:probe_num]
    gallery_features = features[probe_num:]
    probe_lbls = np.asarray(labels[:probe_num])
    gallery_lbls = np.asarray(labels[probe_num:])

    results = OrderedDict()
    results['iter'] = conf["evaluator_cfg"]["restore_hint"]
    msg_mgr.log_info(f"The test metric you choose is {metric}.")
    dist = cuda_dist(probe_features, gallery_features, metric).cpu().numpy()
    cmc, all_AP, all_INP = evaluate_rank(dist, probe_lbls, gallery_lbls)

    mAP = np.mean(all_AP)
    mINP = np.mean(all_INP)
    for r in [1, 5, 10]:
        results['Rank-{}'.format(r)] = cmc[r - 1] * 100
    results['mAP'] = mAP * 100
    results['mINP'] = mINP * 100

    return results

主要修改还是MSAFF/lib/modeling/base_model.py中的test_run函数

    @staticmethod
    def run_test(model):
        """Accept the instance object(model) here, and then run the test loop."""
        rank = torch.distributed.get_rank()
        with torch.no_grad():
            info_dict = model.inference(rank)
        
        if rank == 0:
            loader = model.test_loader
            label_list = loader.dataset.label_list
            types_list = loader.dataset.types_list
            views_list = loader.dataset.views_list
    
            info_dict.update({
                'labels': label_list, 
                'types': types_list, 
                'views': views_list
            })
    
            if 'eval_func' in model.cfgs["evaluator_cfg"].keys():
                eval_func = model.cfgs['evaluator_cfg']["eval_func"]
            else:
                eval_func = 'evaluation_Gait3D'  # Default evaluation function
            
            eval_func = getattr(eval_functions, eval_func)
            valid_args = get_valid_args(
                eval_func, model.cfgs["evaluator_cfg"], ['metric'])
    
            dataset_name = model.cfgs['data_cfg'].get('test_dataset_name', model.cfgs['data_cfg']['dataset_name'])
    
            # Call the evaluation function and return the results
            result = eval_func(info_dict, model.cfgs, model.probe_seqs_num, **valid_args)
            
            return result

运行结果展示:

`nohup bash -c "CUDA_VISIBLE_DEVICES=0,1 torchrun --nproc_per_node=2 lib/main.py --cfgs ./config/MsaffGait_CasiaB.yaml --phase train --iter 49600" > output.log 2>&1 &

`###

posted @ 2024-10-16 11:27  珍惜时光,辉煌拔尖  阅读(8)  评论(0编辑  收藏  举报