RL-Sim-Framework/configs/training/ppo_mjx.yaml

# PPO tuned for MJX (1024+ parallel envs on GPU).
# Inherits defaults + HPO ranges from ppo.yaml.
# With 1024 envs, each timestep collects 1024 samples, so total_timesteps
# can be much lower than the CPU config.

defaults:
  - ppo
  - _self_

total_timesteps: 300000         # 300K × 1024 envs ≈ 307M env steps
mini_batches: 32                # keep mini-batch size similar (~32K)
learning_rate: 0.001            # ~3x higher LR for 16x larger batch (sqrt scaling)
log_interval: 100
checkpoint_interval: 10000

record_video_every: 10000

remote: false