✨ clean up lot of stuff

2026-03-22 15:49:13 +01:00
parent d3ed1c25ad
commit ca0e7b8b03
37 changed files with 3613 additions and 1223 deletions
--- a/configs/env/rotary_cartpole.yaml
+++ b/configs/env/rotary_cartpole.yaml
@@ -1,10 +1,19 @@
 max_steps: 1000
 robot_path: assets/rotary_cartpole
 reward_upright_scale: 1.0
-speed_penalty_scale: 0.1
+
+# ── Regularisation penalties (prevent fast spinning) ─────────────────
+motor_vel_penalty: 0.01          # penalise high motor angular velocity
+motor_angle_penalty: 0.05        # penalise deviation from centre
+action_penalty: 0.05             # penalise large actions (energy cost)
+
+# ── Software safety limit (env-level, always applied) ────────────────
+motor_angle_limit_deg: 90.0      # terminate episode if motor exceeds ±90°

 # ── HPO search ranges ────────────────────────────────────────────────
 hpo:
  reward_upright_scale: {min: 0.5, max: 5.0}
-  speed_penalty_scale: {min: 0.01, max: 1.0}
+  motor_vel_penalty: {min: 0.001, max: 0.1}
+  motor_angle_penalty: {min: 0.01, max: 0.2}
+  action_penalty: {min: 0.01, max: 0.2}
  max_steps: {values: [500, 1000, 2000]}
--- a/configs/runner/mjx.yaml
+++ b/configs/runner/mjx.yaml
@@ -1,4 +1,4 @@
 num_envs: 1024       # MJX shines with many parallel envs
 device: auto         # auto = cuda if available, else cpu
 dt: 0.002
-substeps: 20
+substeps: 10
--- a/configs/runner/mujoco.yaml
+++ b/configs/runner/mujoco.yaml
@@ -1,4 +1,4 @@
 num_envs: 64
 device: auto  # auto = cuda if available, else cpu
 dt: 0.002
-substeps: 20
+substeps: 10
--- a/configs/runner/serial.yaml
+++ b/configs/runner/serial.yaml
@@ -6,6 +6,5 @@ num_envs: 1
 device: cpu
 port: /dev/cu.usbserial-0001
 baud: 115200
-dt: 0.02                    # control loop period (50 Hz)
+dt: 0.02                    # control loop period (50 Hz, matches training)
 no_data_timeout: 2.0        # seconds of silence before declaring disconnect
-encoder_jump_threshold: 200  # encoder tick jump → reboot detection
--- a/configs/sysid.yaml
+++ b/configs/sysid.yaml
@@ -1,5 +1,5 @@
 # System identification defaults.
-# Override via CLI: python -m src.sysid.optimize sysid.max_generations=50
+# Override via CLI: python scripts/sysid.py optimize --max-generations 50
 #
 # These are NOT Hydra config groups — the sysid scripts use argparse.
 # This file serves as documentation and can be loaded by custom wrappers.
@@ -8,18 +8,25 @@ capture:
  port: /dev/cu.usbserial-0001
  baud: 115200
  duration: 20.0          # seconds
-  amplitude: 180           # max PWM magnitude (0–255)
+  amplitude: 150           # max PWM magnitude — must match firmware MAX_MOTOR_SPEED
  hold_min_ms: 50          # PRBS min hold time
  hold_max_ms: 300         # PRBS max hold time
  dt: 0.02                 # sample period (50 Hz)

 optimize:
  sigma0: 0.3              # CMA-ES initial step size (in [0,1] normalised space)
-  population_size: 20      # candidates per generation
-  max_generations: 200     # total generations (~4000 evaluations)
+  population_size: 50      # candidates per generation
+  max_generations: 1000     # total generations (~4000 evaluations)
  sim_dt: 0.002            # MuJoCo physics timestep
  substeps: 10             # physics substeps per control step (ctrl_dt = 0.02s)
  pos_weight: 1.0          # MSE weight for angle errors
  vel_weight: 0.1          # MSE weight for velocity errors
  window_duration: 0.5     # multiple-shooting window length (s); 0 = open-loop
  seed: 42
+
+# Tunable hardware-realism params (added to ROTARY_CARTPOLE_PARAMS):
+#   ctrl_limit      — effective motor range → exported as ctrl_range in robot.yaml
+#   motor_deadzone  — L298N minimum |action| for torque → exported as deadzone in robot.yaml
+# Firmware sends raw (unfiltered) sensor data; EMA filtering is
+# handled on the Python side (env transforms) and is NOT part of
+# the sysid parameter search.
--- a/configs/training/ppo_real.yaml
+++ b/configs/training/ppo_real.yaml
@@ -8,15 +8,17 @@ defaults:
  - _self_

 hidden_sizes: [256, 256]
-total_timesteps: 100000
-learning_epochs: 5
-learning_rate: 0.001            # conservative — can't undo real-world damage
-entropy_loss_scale: 0.0001
-log_interval: 1024
+total_timesteps: 2000000
+learning_epochs: 10
+learning_rate: 0.0005            # conservative — can't undo real-world damage
+entropy_loss_scale: 0.01
+rollout_steps: 2048
+mini_batches: 8
+log_interval: 2048
 checkpoint_interval: 5000       # frequent saves — can't rewind real hardware
 initial_log_std: -0.5           # moderate initial exploration
 min_log_std: -4.0
-max_log_std: 0.0                # cap σ at 1.0
+max_log_std: 2.0                # cap σ at 1.0

 # Never run real-hardware training remotely
 remote: false
--- a/configs/training/ppo_single.yaml
+++ b/configs/training/ppo_single.yaml
@@ -8,7 +8,7 @@ defaults:
  - _self_

 hidden_sizes: [256, 256]
-total_timesteps: 1000000
+total_timesteps: 2000000
 learning_epochs: 10
 learning_rate: 0.0003
 entropy_loss_scale: 0.01