28 lines
1.6 KiB
YAML
28 lines
1.6 KiB
YAML
max_steps: 1000
|
|
robot_path: assets/rotary_cartpole
|
|
reward_upright_scale: 1.0
|
|
alive_bonus: 0.25 # per-step survival bonus (living must beat dying)
|
|
balance_bonus: 2.0 # extra reward for upright AND still (beats spinning)
|
|
balance_vel_scale: 0.5 # how fast the balance bonus decays with pendulum speed
|
|
|
|
# ── Regularisation penalties (prevent fast spinning) ─────────────────
|
|
motor_vel_penalty: 0.01 # penalise high motor angular velocity
|
|
motor_angle_penalty: 0.05 # penalise deviation from centre
|
|
action_penalty: 0.05 # penalise large actions (energy cost)
|
|
action_rate_penalty: 0.01 # penalise action changes (real-motor smoothness)
|
|
|
|
# ── Initial state randomisation ──────────────────────────────────────
|
|
pendulum_init_range_deg: 180.0 # pendulum starts in [-180°, +180°]
|
|
|
|
# ── Software safety limit (env-level, always applied) ────────────────
|
|
motor_angle_limit_deg: 90.0 # terminate episode if motor exceeds ±90°
|
|
|
|
# ── HPO search ranges ────────────────────────────────────────────────
|
|
hpo:
|
|
reward_upright_scale: {min: 0.5, max: 5.0}
|
|
motor_vel_penalty: {min: 0.001, max: 0.1}
|
|
motor_angle_penalty: {min: 0.01, max: 0.2}
|
|
action_penalty: {min: 0.01, max: 0.2}
|
|
action_rate_penalty: {min: 0.001, max: 0.1}
|
|
pendulum_init_range_deg: {min: 30.0, max: 180.0}
|
|
max_steps: {values: [500, 1000, 2000]} |