Files
RL-Sim-Framework/configs/training/ppo_real.yaml
2026-03-22 15:49:13 +01:00

30 lines
912 B
YAML
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
# PPO tuned for single-env real-time training on real hardware.
# Inherits defaults + HPO ranges from ppo.yaml.
# ~50 Hz control × 1 env = ~50 timesteps/s.
# 100k timesteps ≈ 33 minutes of wall-clock training.
defaults:
- ppo
- _self_
hidden_sizes: [256, 256]
total_timesteps: 2000000
learning_epochs: 10
learning_rate: 0.0005 # conservative — can't undo real-world damage
entropy_loss_scale: 0.01
rollout_steps: 2048
mini_batches: 8
log_interval: 2048
checkpoint_interval: 5000 # frequent saves — can't rewind real hardware
initial_log_std: -0.5 # moderate initial exploration
min_log_std: -4.0
max_log_std: 2.0 # cap σ at 1.0
# Never run real-hardware training remotely
remote: false
# Tighter HPO ranges for real hardware (override base ppo.yaml ranges)
hpo:
entropy_loss_scale: {min: 0.00005, max: 0.001}
learning_rate: {min: 0.0003, max: 0.003}