pytorch中tensorboardX进行可视化
环境依赖:
pytorch 0.4以上
tensorboardX: pip install tensorboardX、pip install tensorflow
在项目代码中加入tensorboardX的记录代码,生成文件并返回到浏览器中显示可视化结果。
官方示例:
默认设置是在根目录下生成一个runs文件夹,里面存储summary的信息。
在runs的同级目录下命令行中输入:
tensorboard --logdir runs (不是输tensorboardX)
会出来一个网站,复制到浏览器即可可视化loss,acc,lr等数据的变化过程.
举例说明pytorch中设置summary的方式:实例化summary对象,然后在对象上add_scalar想要监督的指标即可。
包含调用logging包对log文件保存方法,详见链接(https://www.cnblogs.com/ywheunji/p/14125085.html)
1 class Trainer(object): 2 def __init__(self, weight_path, resume, gpu_id, accumulate, fp_16): 3 init_seeds(0) 4 self.train_dataloader = DataLoader( 5 self.train_dataset, 6 batch_size=cfg.TRAIN["BATCH_SIZE"], 7 num_workers=cfg.TRAIN["NUMBER_WORKERS"], 8 shuffle=True, 9 pin_memory=True, 10 ) 11 12 self.yolov4 = Build_Model(weight_path=weight_path, resume=resume).to( 13 self.device 14 ) 15 16 def train(self): 17 global writer 18 logger.info( 19 "Training start,img size is: {:d},batchsize is: {:d},work number is {:d}".format( 20 cfg.TRAIN["TRAIN_IMG_SIZE"], 21 cfg.TRAIN["BATCH_SIZE"], 22 cfg.TRAIN["NUMBER_WORKERS"], 23 ) 24 ) 25 logger.info(self.yolov4) 26 logger.info( 27 "Train datasets number is : {}".format(len(self.train_dataset)) 28 ) 29 30 if self.fp_16: 31 self.yolov4, self.optimizer = amp.initialize( 32 self.yolov4, self.optimizer, opt_level="O1", verbosity=0 33 ) 34 logger.info(" ======= start training ====== ") 35 for epoch in range(self.start_epoch, self.epochs): 36 start = time.time() 37 self.yolov4.train() 38 39 mloss = torch.zeros(4) 40 logger.info("===Epoch:[{}/{}]===".format(epoch, self.epochs)) 41 for i, (imgs, label_sbbox, 42 ) in enumerate(self.train_dataloader): 43 44 loss, loss_ciou, loss_conf, loss_cls = self.criterion(p, p_d, label_sbbox) 45 46 loss.backward() 47 # Print batch results 48 if i % 10 == 0: 49 logger.info( 50 " === Epoch:[{:3}/{}],step:[{:3}/{}],img_size:[{:3}],total_loss:{:.4f}|loss_ciou:{:.4f}|loss_conf:{:.4f}|loss_cls:{:.4f}|lr:{:.4f}".format( 51 epoch, 52 self.epochs, 53 i, 54 len(self.train_dataloader) - 1, 55 self.train_dataset.img_size, 56 mloss[3], 57 mloss[0], 58 mloss[1], 59 mloss[2], 60 self.optimizer.param_groups[0]["lr"], 61 ) 62 ) 63 writer.add_scalar( 64 "loss_ciou", 65 mloss[0], 66 len(self.train_dataloader) 67 * epoch 68 + i, 69 ) 70 writer.add_scalar( 71 "train_loss", 72 mloss[3], 73 len(self.train_dataloader) 74 * epoch 75 + i, 76 ) 77 78 79 # eval 80 logger.info( 81 "===== Validate =====".format(epoch, self.epochs) 82 ) 83 logger.info("val img size is {}".format(cfg.VAL["TEST_IMG_SIZE"])) 84 with torch.no_grad(): 85 APs, inference_time = Evaluator( 86 self.yolov4, showatt=False 87 ).APs_voc() 88 for i in APs: 89 logger.info("{} --> mAP : {}".format(i, APs[i])) 90 mAP += APs[i] 91 mAP = mAP / self.train_dataset.num_classes 92 logger.info("mAP : {}".format(mAP)) 93 logger.info( 94 "inference time: {:.2f} ms".format(inference_time) 95 ) 96 writer.add_scalar("mAP", mAP, epoch) 97 self.__save_model_weights(epoch, mAP) 98 logger.info("save weights done") 99 logger.info(" ===test mAP:{:.3f}".format(mAP)) 100 101 if __name__ == "__main__": 102 global logger, writer 103 writer = SummaryWriter(logdir=opt.log_path + "/event") 104 logger = Logger( 105 log_file_name=opt.log_path + "/log.txt", 106 log_level=logging.DEBUG, 107 logger_name="YOLOv4", 108 ).get_log() 109 110 Trainer( 111 weight_path=opt.weight_path, 112 resume=opt.resume, 113 gpu_id=opt.gpu_id, 114 accumulate=opt.accumulate, 115 fp_16=opt.fp_16, 116 ).train()
凤舞九天