✨ clean up lot of stuff
This commit is contained in:
@@ -8,15 +8,17 @@ defaults:
|
||||
- _self_
|
||||
|
||||
hidden_sizes: [256, 256]
|
||||
total_timesteps: 100000
|
||||
learning_epochs: 5
|
||||
learning_rate: 0.001 # conservative — can't undo real-world damage
|
||||
entropy_loss_scale: 0.0001
|
||||
log_interval: 1024
|
||||
total_timesteps: 2000000
|
||||
learning_epochs: 10
|
||||
learning_rate: 0.0005 # conservative — can't undo real-world damage
|
||||
entropy_loss_scale: 0.01
|
||||
rollout_steps: 2048
|
||||
mini_batches: 8
|
||||
log_interval: 2048
|
||||
checkpoint_interval: 5000 # frequent saves — can't rewind real hardware
|
||||
initial_log_std: -0.5 # moderate initial exploration
|
||||
min_log_std: -4.0
|
||||
max_log_std: 0.0 # cap σ at 1.0
|
||||
max_log_std: 2.0 # cap σ at 1.0
|
||||
|
||||
# Never run real-hardware training remotely
|
||||
remote: false
|
||||
|
||||
@@ -8,7 +8,7 @@ defaults:
|
||||
- _self_
|
||||
|
||||
hidden_sizes: [256, 256]
|
||||
total_timesteps: 1000000
|
||||
total_timesteps: 2000000
|
||||
learning_epochs: 10
|
||||
learning_rate: 0.0003
|
||||
entropy_loss_scale: 0.01
|
||||
|
||||
Reference in New Issue
Block a user