# PPO tuned for MJX (1024+ parallel envs on GPU). # Inherits defaults + HPO ranges from ppo.yaml. # With 1024 envs, each timestep collects 1024 samples, so total_timesteps # can be much lower than the CPU config. defaults: - ppo - _self_ total_timesteps: 300000 # 300K × 1024 envs ≈ 307M env steps mini_batches: 32 # keep mini-batch size similar (~32K) learning_rate: 0.001 # ~3x higher LR for 16x larger batch (sqrt scaling) log_interval: 100 checkpoint_interval: 10000 record_video_every: 10000 remote: false