Files
RL-Sim-Framework/configs/training/ppo.yaml
2026-03-09 20:39:02 +01:00

17 lines
330 B
YAML

hidden_sizes: [128, 128]
total_timesteps: 5000000
rollout_steps: 1024
learning_epochs: 4
mini_batches: 4
discount_factor: 0.99
gae_lambda: 0.95
learning_rate: 0.0003
clip_ratio: 0.2
value_loss_scale: 0.5
entropy_loss_scale: 0.05
log_interval: 1000
checkpoint_interval: 50000
# ClearML remote execution (GPU worker)
remote: false