24 lines
560 B
YAML
24 lines
560 B
YAML
# PPO tuned for single-env simulation — mimics real hardware training.
|
||
# Inherits defaults + HPO ranges from ppo.yaml.
|
||
# Same 50 Hz control (runner=mujoco_single), 1 env, conservative hypers.
|
||
# Sim runs ~100× faster than real time, so we can afford more timesteps.
|
||
|
||
defaults:
|
||
- ppo
|
||
- _self_
|
||
|
||
hidden_sizes: [256, 256]
|
||
total_timesteps: 500000
|
||
learning_epochs: 5
|
||
learning_rate: 0.001
|
||
entropy_loss_scale: 0.0001
|
||
log_interval: 1024
|
||
checkpoint_interval: 10000
|
||
initial_log_std: -0.5
|
||
min_log_std: -4.0
|
||
max_log_std: 0.0
|
||
|
||
record_video_every: 50000
|
||
|
||
remote: false
|