RL-Sim-Framework/configs/env/rotary_cartpole.yaml

max_steps: 1000
robot_path: assets/rotary_cartpole
reward_upright_scale: 1.0
alive_bonus: 0.25                # per-step survival bonus (living must beat dying)
balance_bonus: 2.0               # extra reward for upright AND still (beats spinning)
balance_vel_scale: 0.5           # how fast the balance bonus decays with pendulum speed

# ── Regularisation penalties (prevent fast spinning) ─────────────────
motor_vel_penalty: 0.01          # penalise high motor angular velocity
motor_angle_penalty: 0.05        # penalise deviation from centre
action_penalty: 0.05             # penalise large actions (energy cost)
action_rate_penalty: 0.01        # penalise action changes (real-motor smoothness)

# ── Initial state randomisation ──────────────────────────────────────
pendulum_init_range_deg: 180.0   # pendulum starts in [-180°, +180°]

# ── Software safety limit (env-level, always applied) ────────────────
motor_angle_limit_deg: 90.0      # terminate episode if motor exceeds ±90°

# ── HPO search ranges ────────────────────────────────────────────────
hpo:
  reward_upright_scale: {min: 0.5, max: 5.0}
  motor_vel_penalty: {min: 0.001, max: 0.1}
  motor_angle_penalty: {min: 0.01, max: 0.2}
  action_penalty: {min: 0.01, max: 0.2}
  action_rate_penalty: {min: 0.001, max: 0.1}
  pendulum_init_range_deg: {min: 30.0, max: 180.0}
  max_steps: {values: [500, 1000, 2000]}