| Using cuda device |
| Wrapping the env with a `Monitor` wrapper |
| Wrapping the env in a DummyVecEnv. |
| Wrapping the env in a VecTransposeImage. |
| Logging to ./tensorboard_log/PPO_1 |
| |
| |
| D:\software\e_anaconda\envs\pytorch\lib\site-packages\gym_super_mario_bros\smb_env.py:148: RuntimeWarning: overflow encountered in ubyte_scalars |
| return (self.ram[0x86] - self.ram[0x071c]) % 256 |
| |
| |
| ----------------------------- |
| | time/ | | |
| | fps | 116 | |
| | iterations | 1 | |
| | time_elapsed | 17 | |
| | total_timesteps | 2048 | |
| ----------------------------- |
| ----------------------------------------- |
| | time/ | | |
| | fps | 81 | |
| | iterations | 2 | |
| | time_elapsed | 50 | |
| | total_timesteps | 4096 | |
| | train/ | | |
| | approx_kl | 0.025405666 | |
| | clip_fraction | 0.274 | |
| | clip_range | 0.2 | |
| | entropy_loss | -1.92 | |
| | explained_variance | 0.00504 | |
| | learning_rate | 0.0003 | |
| | loss | 0.621 | |
| | n_updates | 10 | |
| | policy_gradient_loss | 0.0109 | |
| | value_loss | 17.4 | |
| ----------------------------------------- |
| ----------------------------------------- |
| | time/ | | |
| | fps | 73 | |
| | iterations | 3 | |
| | time_elapsed | 83 | |
| | total_timesteps | 6144 | |
| | train/ | | |
| | approx_kl | 0.010906073 | |
| | clip_fraction | 0.109 | |
| | clip_range | 0.2 | |
| | entropy_loss | -1.92 | |
| | explained_variance | 0.0211 | |
| | learning_rate | 0.0003 | |
| | loss | 0.101 | |
| | n_updates | 20 | |
| | policy_gradient_loss | -0.00392 | |
| | value_loss | 0.187 | |
| ----------------------------------------- |
| ----------------------------------------- |
| | time/ | | |
| | fps | 69 | |
| | iterations | 4 | |
| | time_elapsed | 117 | |
| | total_timesteps | 8192 | |
| | train/ | | |
| | approx_kl | 0.009882288 | |
| | clip_fraction | 0.0681 | |
| | clip_range | 0.2 | |
| | entropy_loss | -1.9 | |
| | explained_variance | 0.101 | |
| | learning_rate | 0.0003 | |
| | loss | 0.0738 | |
| | n_updates | 30 | |
| | policy_gradient_loss | -0.00502 | |
| | value_loss | 0.13 | |
| ----------------------------------------- |
| ----------------------------------------- |
| | rollout/ | | |
| | ep_len_mean | 1.01e+04 | |
| | ep_rew_mean | 891 | |
| | time/ | | |
| | fps | 65 | |
| | iterations | 5 | |
| | time_elapsed | 156 | |
| | total_timesteps | 10240 | |
| | train/ | | |
| | approx_kl | 0.008186281 | |
| | clip_fraction | 0.105 | |
| | clip_range | 0.2 | |
| | entropy_loss | -1.87 | |
| | explained_variance | 0.0161 | |
| | learning_rate | 0.0003 | |
| | loss | 0.28 | |
| | n_updates | 40 | |
| | policy_gradient_loss | -0.00649 | |
| | value_loss | 0.811 | |
| ----------------------------------------- |
| ----------------------------------------- |
| | rollout/ | | |
| | ep_len_mean | 1.01e+04 | |
| | ep_rew_mean | 891 | |
| | time/ | | |
| | fps | 64 | |
| | iterations | 6 | |
| | time_elapsed | 190 | |
| | total_timesteps | 12288 | |
| | train/ | | |
| | approx_kl | 0.024062362 | |
| | clip_fraction | 0.246 | |
| | clip_range | 0.2 | |
| | entropy_loss | -1.9 | |
| | explained_variance | 0.269 | |
| | learning_rate | 0.0003 | |
| | loss | 0.54 | |
| | n_updates | 50 | |
| | policy_gradient_loss | 0.0362 | |
| | value_loss | 10.8 | |
| ----------------------------------------- |
| ----------------------------------------- |
| | rollout/ | | |
| | ep_len_mean | 1.01e+04 | |
| | ep_rew_mean | 891 | |
| | time/ | | |
| | fps | 63 | |
| | iterations | 7 | |
| | time_elapsed | 225 | |
| | total_timesteps | 14336 | |
| | train/ | | |
| | approx_kl | 0.024466533 | |
| | clip_fraction | 0.211 | |
| | clip_range | 0.2 | |
| | entropy_loss | -1.89 | |
| | explained_variance | 0.839 | |
| | learning_rate | 0.0003 | |
| | loss | 0.435 | |
| | n_updates | 60 | |
| | policy_gradient_loss | 0.023 | |
| | value_loss | 3.06 | |
| ----------------------------------------- |
| ---------------------------------------- |
| | rollout/ | | |
| | ep_len_mean | 1.01e+04 | |
| | ep_rew_mean | 891 | |
| | time/ | | |
| | fps | 63 | |
| | iterations | 8 | |
| | time_elapsed | 259 | |
| | total_timesteps | 16384 | |
| | train/ | | |
| | approx_kl | 0.01970315 | |
| | clip_fraction | 0.242 | |
| | clip_range | 0.2 | |
| | entropy_loss | -1.9 | |
| | explained_variance | 0.486 | |
| | learning_rate | 0.0003 | |
| | loss | 0.526 | |
| | n_updates | 70 | |
| | policy_gradient_loss | 0.00486 | |
| | value_loss | 1.57 | |
| ---------------------------------------- |
| ----------------------------------------- |
| | rollout/ | | |
| | ep_len_mean | 1.01e+04 | |
| | ep_rew_mean | 891 | |
| | time/ | | |
| | fps | 62 | |
| | iterations | 9 | |
| | time_elapsed | 293 | |
| | total_timesteps | 18432 | |
| | train/ | | |
| | approx_kl | 0.012460884 | |
| | clip_fraction | 0.217 | |
| | clip_range | 0.2 | |
| | entropy_loss | -1.87 | |
| | explained_variance | 0.74 | |
| | learning_rate | 0.0003 | |
| | loss | 0.139 | |
| | n_updates | 80 | |
| | policy_gradient_loss | -0.000311 | |
| | value_loss | 0.734 | |
| ----------------------------------------- |
| ---------------------------------------- |
| | rollout/ | | |
| | ep_len_mean | 1.01e+04 | |
| | ep_rew_mean | 891 | |
| | time/ | | |
| | fps | 62 | |
| | iterations | 10 | |
| | time_elapsed | 327 | |
| | total_timesteps | 20480 | |
| | train/ | | |
| | approx_kl | 0.02535792 | |
| | clip_fraction | 0.298 | |
| | clip_range | 0.2 | |
| | entropy_loss | -1.88 | |
| | explained_variance | 0.405 | |
| | learning_rate | 0.0003 | |
| | loss | 1.17 | |
| | n_updates | 90 | |
| | policy_gradient_loss | 0.0205 | |
| | value_loss | 6.6 | |
| ---------------------------------------- |
| ----------------------------------------- |
| | rollout/ | | |
| | ep_len_mean | 1.01e+04 | |
| | ep_rew_mean | 891 | |
| | time/ | | |
| | fps | 62 | |
| | iterations | 11 | |
| | time_elapsed | 361 | |
| | total_timesteps | 22528 | |
| | train/ | | |
| | approx_kl | 0.019694094 | |
| | clip_fraction | 0.243 | |
| | clip_range | 0.2 | |
| | entropy_loss | -1.91 | |
| | explained_variance | 0.952 | |
| | learning_rate | 0.0003 | |
| | loss | 0.39 | |
| | n_updates | 100 | |
| | policy_gradient_loss | -0.00434 | |
| | value_loss | 1.31 | |
| ----------------------------------------- |
| ----------------------------------------- |
| | rollout/ | | |
| | ep_len_mean | 1.19e+04 | |
| | ep_rew_mean | 884 | |
| | time/ | | |
| | fps | 61 | |
| | iterations | 12 | |
| | time_elapsed | 398 | |
| | total_timesteps | 24576 | |
| | train/ | | |
| | approx_kl | 0.013096321 | |
| | clip_fraction | 0.227 | |
| | clip_range | 0.2 | |
| | entropy_loss | -1.91 | |
| | explained_variance | 0.0132 | |
| | learning_rate | 0.0003 | |
| | loss | 0.669 | |
| | n_updates | 110 | |
| | policy_gradient_loss | -0.000837 | |
| | value_loss | 1.42 | |
| ----------------------------------------- |
| ----------------------------------------- |
| | rollout/ | | |
| | ep_len_mean | 1.19e+04 | |
| | ep_rew_mean | 884 | |
| | time/ | | |
| | fps | 61 | |
| | iterations | 13 | |
| | time_elapsed | 432 | |
| | total_timesteps | 26624 | |
| | train/ | | |
| | approx_kl | 0.014833134 | |
| | clip_fraction | 0.239 | |
| | clip_range | 0.2 | |
| | entropy_loss | -1.9 | |
| | explained_variance | 0.452 | |
| | learning_rate | 0.0003 | |
| | loss | 18.1 | |
| | n_updates | 120 | |
| | policy_gradient_loss | -7.3e-05 | |
| | value_loss | 26.3 | |
| ----------------------------------------- |
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步