clean up lot of stuff

This commit is contained in:
2026-03-22 15:49:13 +01:00
parent d3ed1c25ad
commit ca0e7b8b03
37 changed files with 3613 additions and 1223 deletions

View File

@@ -8,15 +8,17 @@ defaults:
- _self_
hidden_sizes: [256, 256]
total_timesteps: 100000
learning_epochs: 5
learning_rate: 0.001 # conservative — can't undo real-world damage
entropy_loss_scale: 0.0001
log_interval: 1024
total_timesteps: 2000000
learning_epochs: 10
learning_rate: 0.0005 # conservative — can't undo real-world damage
entropy_loss_scale: 0.01
rollout_steps: 2048
mini_batches: 8
log_interval: 2048
checkpoint_interval: 5000 # frequent saves — can't rewind real hardware
initial_log_std: -0.5 # moderate initial exploration
min_log_std: -4.0
max_log_std: 0.0 # cap σ at 1.0
max_log_std: 2.0 # cap σ at 1.0
# Never run real-hardware training remotely
remote: false

View File

@@ -8,7 +8,7 @@ defaults:
- _self_
hidden_sizes: [256, 256]
total_timesteps: 1000000
total_timesteps: 2000000
learning_epochs: 10
learning_rate: 0.0003
entropy_loss_scale: 0.01