♻️ full agent refactor

2026-06-10 21:15:34 +02:00
parent a98e86ef66
commit 1e0836e1bc
49 changed files with 1309 additions and 829 deletions
--- a/configs/env/cartpole.yaml
+++ b/configs/env/cartpole.yaml
@@ -1,7 +0,0 @@
-max_steps: 500
-robot_path: assets/cartpole
-angle_threshold: 0.418
-cart_limit: 2.4
-reward_alive: 1.0
-reward_pole_upright_scale: 1.0
-reward_action_penalty_scale: 0.01
--- a/configs/env/rotary_cartpole.yaml
+++ b/configs/env/rotary_cartpole.yaml
@@ -9,6 +9,7 @@ balance_vel_scale: 0.5           # how fast the balance bonus decays with pendul
 motor_vel_penalty: 0.01          # penalise high motor angular velocity
 motor_angle_penalty: 0.05        # penalise deviation from centre
 action_penalty: 0.05             # penalise large actions (energy cost)
+action_rate_penalty: 0.01        # penalise action changes (real-motor smoothness)

 # ── Initial state randomisation ──────────────────────────────────────
 pendulum_init_range_deg: 180.0   # pendulum starts in [-180°, +180°]
@@ -22,5 +23,6 @@ hpo:
  motor_vel_penalty: {min: 0.001, max: 0.1}
  motor_angle_penalty: {min: 0.01, max: 0.2}
  action_penalty: {min: 0.01, max: 0.2}
+  action_rate_penalty: {min: 0.001, max: 0.1}
  pendulum_init_range_deg: {min: 30.0, max: 180.0}
  max_steps: {values: [500, 1000, 2000]}