wandb 多GPU日志记录
https://docs.wandb.ai/guides/track/advanced/distributed-training
def is_dist_avail_and_initialized():
if not dist.is_available():
return False
if not dist.is_initialized():
return False
return True
def get_world_size():
if not is_dist_avail_and_initialized():
return 1
return dist.get_world_size()
def get_rank():
if not is_dist_avail_and_initialized():
return 0
return dist.get_rank()
def get_local_size():
if not is_dist_avail_and_initialized():
return 1
return int(os.environ['LOCAL_SIZE'])
def get_local_rank():
if not is_dist_avail_and_initialized():
return 0
return int(os.environ['LOCAL_RANK'])
def is_main_process():
return get_rank() == 0
通常之在主进程当中记录日志即可
if utils.is_main_process():
wandb.init(project = args.exp_name)
# ... after collecting and syncronizing processes
if utils.is_main_process():
wandb.log(loss_dict)
本博文本意在于记录个人的思考与经验,部分博文采用英语写作,可能影响可读性,请见谅
本文来自博客园,作者:ZXYFrank,转载请注明原文链接:https://www.cnblogs.com/zxyfrank/p/16489905.html