# PPO sized for MJX (1024+ parallel envs on GPU). # Inherits defaults + HPO ranges from ppo.yaml. # # Short rollouts × many envs is the GPU-PPO sweet spot: # 24 steps × 1024 envs ≈ 25K samples per update (~6K per mini-batch). # (The old rollout_steps=2048 inherited from the CPU config meant a # 2M-sample memory per update — GBs of VRAM and glacial updates.) defaults: - ppo - _self_ rollout_steps: 24 mini_batches: 4 learning_epochs: 5 learning_rate: 0.0003 # KL-adaptive scheduler handles the rest total_timesteps: 100000 # × 1024 envs ≈ 100M env steps log_interval: 100 checkpoint_interval: 10000 record_video_every: 10000 remote: false