max_steps: 1000 robot_path: assets/rotary_cartpole reward_upright_scale: 1.0 alive_bonus: 0.25 # per-step survival bonus (living must beat dying) balance_bonus: 2.0 # extra reward for upright AND still (beats spinning) balance_vel_scale: 0.5 # how fast the balance bonus decays with pendulum speed # ── Regularisation penalties (prevent fast spinning) ───────────────── motor_vel_penalty: 0.01 # penalise high motor angular velocity motor_angle_penalty: 0.05 # penalise deviation from centre action_penalty: 0.05 # penalise large actions (energy cost) # ── Initial state randomisation ────────────────────────────────────── pendulum_init_range_deg: 180.0 # pendulum starts in [-180°, +180°] # ── Software safety limit (env-level, always applied) ──────────────── motor_angle_limit_deg: 90.0 # terminate episode if motor exceeds ±90° # ── HPO search ranges ──────────────────────────────────────────────── hpo: reward_upright_scale: {min: 0.5, max: 5.0} motor_vel_penalty: {min: 0.001, max: 0.1} motor_angle_penalty: {min: 0.01, max: 0.2} action_penalty: {min: 0.01, max: 0.2} pendulum_init_range_deg: {min: 30.0, max: 180.0} max_steps: {values: [500, 1000, 2000]}