26 lines
597 B
YAML
26 lines
597 B
YAML
# PPO tuned for single-env simulation — mimics real hardware training.
|
||
# Inherits defaults + HPO ranges from ppo.yaml.
|
||
# Same 50 Hz control (runner=mujoco_single), 1 env, conservative hypers.
|
||
# Sim runs ~100× faster than real time, so we can afford more timesteps.
|
||
|
||
defaults:
|
||
- ppo
|
||
- _self_
|
||
|
||
hidden_sizes: [256, 256]
|
||
total_timesteps: 2000000
|
||
learning_epochs: 10
|
||
learning_rate: 0.0003
|
||
entropy_loss_scale: 0.01
|
||
rollout_steps: 2048
|
||
mini_batches: 8
|
||
log_interval: 2048
|
||
checkpoint_interval: 10000
|
||
initial_log_std: -0.5
|
||
min_log_std: -4.0
|
||
max_log_std: 2.0
|
||
|
||
record_video_every: 50000
|
||
|
||
remote: false
|