From 9be07d91867d1047b7506c3bba0c42613eb0797b Mon Sep 17 00:00:00 2001 From: Victor Mylle Date: Mon, 9 Mar 2026 21:33:42 +0100 Subject: [PATCH] :sparkles: add new ppo mjx config --- configs/training/ppo_mjx.yaml | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) create mode 100644 configs/training/ppo_mjx.yaml diff --git a/configs/training/ppo_mjx.yaml b/configs/training/ppo_mjx.yaml new file mode 100644 index 0000000..6f098ab --- /dev/null +++ b/configs/training/ppo_mjx.yaml @@ -0,0 +1,22 @@ +# PPO tuned for MJX (1024+ parallel envs on GPU). +# With 1024 envs, each timestep collects 1024 samples, so total_timesteps +# can be much lower than the CPU config. + +hidden_sizes: [128, 128] +total_timesteps: 300000 # 300K × 1024 envs ≈ 307M env steps +rollout_steps: 1024 # PPO batch = 1024 envs × 1024 steps = 1M samples +learning_epochs: 4 +mini_batches: 32 # keep mini-batch size similar to CPU config (~32K) +discount_factor: 0.99 +gae_lambda: 0.95 +learning_rate: 0.001 # ~3x higher LR for 16x larger batch (sqrt scaling) +clip_ratio: 0.2 +value_loss_scale: 0.5 +entropy_loss_scale: 0.05 +log_interval: 100 # log more often (shorter run) +checkpoint_interval: 10000 + +record_video_every: 10000 + +# ClearML remote execution (GPU worker) +remote: false