RL-Sim-Framework/configs/training/ppo_mjx.yaml

# PPO sized for MJX (1024+ parallel envs on GPU).
# Inherits defaults + HPO ranges from ppo.yaml.
#
# Short rollouts × many envs is the GPU-PPO sweet spot:
# 24 steps × 1024 envs ≈ 25K samples per update (~6K per mini-batch).
# (The old rollout_steps=2048 inherited from the CPU config meant a
#  2M-sample memory per update — GBs of VRAM and glacial updates.)

defaults:
  - ppo
  - _self_

rollout_steps: 24
mini_batches: 4
learning_epochs: 5
learning_rate: 0.0003           # KL-adaptive scheduler handles the rest
total_timesteps: 100000         # × 1024 envs ≈ 100M env steps
log_interval: 100
checkpoint_interval: 10000

record_video_every: 10000

remote: false