From 9be07d91867d1047b7506c3bba0c42613eb0797b Mon Sep 17 00:00:00 2001
From: Victor Mylle <victor.mylle@hotmail.com>
Date: Mon, 9 Mar 2026 21:33:42 +0100
Subject: [PATCH] :sparkles: add new ppo mjx config

---
 configs/training/ppo_mjx.yaml | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)
 create mode 100644 configs/training/ppo_mjx.yaml

diff --git a/configs/training/ppo_mjx.yaml b/configs/training/ppo_mjx.yaml
new file mode 100644
index 0000000..6f098ab
--- /dev/null
+++ b/configs/training/ppo_mjx.yaml
@@ -0,0 +1,22 @@
+# PPO tuned for MJX (1024+ parallel envs on GPU).
+# With 1024 envs, each timestep collects 1024 samples, so total_timesteps
+# can be much lower than the CPU config.
+
+hidden_sizes: [128, 128]
+total_timesteps: 300000       # 300K × 1024 envs ≈ 307M env steps
+rollout_steps: 1024           # PPO batch = 1024 envs × 1024 steps = 1M samples
+learning_epochs: 4
+mini_batches: 32              # keep mini-batch size similar to CPU config (~32K)
+discount_factor: 0.99
+gae_lambda: 0.95
+learning_rate: 0.001          # ~3x higher LR for 16x larger batch (sqrt scaling)
+clip_ratio: 0.2
+value_loss_scale: 0.5
+entropy_loss_scale: 0.05
+log_interval: 100             # log more often (shorter run)
+checkpoint_interval: 10000
+
+record_video_every: 10000
+
+# ClearML remote execution (GPU worker)
+remote: false