24 lines
677 B
YAML
24 lines
677 B
YAML
# PPO sized for MJX (1024+ parallel envs on GPU).
|
||
# Inherits defaults + HPO ranges from ppo.yaml.
|
||
#
|
||
# Short rollouts × many envs is the GPU-PPO sweet spot:
|
||
# 24 steps × 1024 envs ≈ 25K samples per update (~6K per mini-batch).
|
||
# (The old rollout_steps=2048 inherited from the CPU config meant a
|
||
# 2M-sample memory per update — GBs of VRAM and glacial updates.)
|
||
|
||
defaults:
|
||
- ppo
|
||
- _self_
|
||
|
||
rollout_steps: 24
|
||
mini_batches: 4
|
||
learning_epochs: 5
|
||
learning_rate: 0.0003 # KL-adaptive scheduler handles the rest
|
||
total_timesteps: 100000 # × 1024 envs ≈ 100M env steps
|
||
log_interval: 100
|
||
checkpoint_interval: 10000
|
||
|
||
record_video_every: 10000
|
||
|
||
remote: false
|