Loading

wandb 多GPU日志记录

https://docs.wandb.ai/guides/track/advanced/distributed-training

def is_dist_avail_and_initialized():
    if not dist.is_available():
        return False
    if not dist.is_initialized():
        return False
    return True


def get_world_size():
    if not is_dist_avail_and_initialized():
        return 1
    return dist.get_world_size()


def get_rank():
    if not is_dist_avail_and_initialized():
        return 0
    return dist.get_rank()


def get_local_size():
    if not is_dist_avail_and_initialized():
        return 1
    return int(os.environ['LOCAL_SIZE'])


def get_local_rank():
    if not is_dist_avail_and_initialized():
        return 0
    return int(os.environ['LOCAL_RANK'])


def is_main_process():
    return get_rank() == 0

通常之在主进程当中记录日志即可

if utils.is_main_process():
	wandb.init(project = args.exp_name)

# ... after collecting and syncronizing processes

if utils.is_main_process():
	wandb.log(loss_dict)

posted @ 2022-07-18 11:45  ZXYFrank  阅读(218)  评论(0编辑  收藏  举报