♻️ full agent refactor

This commit is contained in:
2026-06-10 21:15:34 +02:00
parent a98e86ef66
commit 1e0836e1bc
49 changed files with 1309 additions and 829 deletions

View File

@@ -1,7 +0,0 @@
max_steps: 500
robot_path: assets/cartpole
angle_threshold: 0.418
cart_limit: 2.4
reward_alive: 1.0
reward_pole_upright_scale: 1.0
reward_action_penalty_scale: 0.01

View File

@@ -9,6 +9,7 @@ balance_vel_scale: 0.5 # how fast the balance bonus decays with pendul
motor_vel_penalty: 0.01 # penalise high motor angular velocity
motor_angle_penalty: 0.05 # penalise deviation from centre
action_penalty: 0.05 # penalise large actions (energy cost)
action_rate_penalty: 0.01 # penalise action changes (real-motor smoothness)
# ── Initial state randomisation ──────────────────────────────────────
pendulum_init_range_deg: 180.0 # pendulum starts in [-180°, +180°]
@@ -22,5 +23,6 @@ hpo:
motor_vel_penalty: {min: 0.001, max: 0.1}
motor_angle_penalty: {min: 0.01, max: 0.2}
action_penalty: {min: 0.01, max: 0.2}
action_rate_penalty: {min: 0.001, max: 0.1}
pendulum_init_range_deg: {min: 30.0, max: 180.0}
max_steps: {values: [500, 1000, 2000]}