19 lines
556 B
YAML
19 lines
556 B
YAML
# PPO tuned for MJX (1024+ parallel envs on GPU).
|
||
# Inherits defaults + HPO ranges from ppo.yaml.
|
||
# With 1024 envs, each timestep collects 1024 samples, so total_timesteps
|
||
# can be much lower than the CPU config.
|
||
|
||
defaults:
|
||
- ppo
|
||
- _self_
|
||
|
||
total_timesteps: 300000 # 300K × 1024 envs ≈ 307M env steps
|
||
mini_batches: 32 # keep mini-batch size similar (~32K)
|
||
learning_rate: 0.001 # ~3x higher LR for 16x larger batch (sqrt scaling)
|
||
log_interval: 100
|
||
checkpoint_interval: 10000
|
||
|
||
record_video_every: 10000
|
||
|
||
remote: false
|