clean up lot of stuff

This commit is contained in:
2026-03-22 15:49:13 +01:00
parent d3ed1c25ad
commit ca0e7b8b03
37 changed files with 3613 additions and 1223 deletions

View File

@@ -1,10 +1,19 @@
max_steps: 1000
robot_path: assets/rotary_cartpole
reward_upright_scale: 1.0
speed_penalty_scale: 0.1
# ── Regularisation penalties (prevent fast spinning) ─────────────────
motor_vel_penalty: 0.01 # penalise high motor angular velocity
motor_angle_penalty: 0.05 # penalise deviation from centre
action_penalty: 0.05 # penalise large actions (energy cost)
# ── Software safety limit (env-level, always applied) ────────────────
motor_angle_limit_deg: 90.0 # terminate episode if motor exceeds ±90°
# ── HPO search ranges ────────────────────────────────────────────────
hpo:
reward_upright_scale: {min: 0.5, max: 5.0}
speed_penalty_scale: {min: 0.01, max: 1.0}
motor_vel_penalty: {min: 0.001, max: 0.1}
motor_angle_penalty: {min: 0.01, max: 0.2}
action_penalty: {min: 0.01, max: 0.2}
max_steps: {values: [500, 1000, 2000]}

View File

@@ -1,4 +1,4 @@
num_envs: 1024 # MJX shines with many parallel envs
device: auto # auto = cuda if available, else cpu
dt: 0.002
substeps: 20
substeps: 10

View File

@@ -1,4 +1,4 @@
num_envs: 64
device: auto # auto = cuda if available, else cpu
dt: 0.002
substeps: 20
substeps: 10

View File

@@ -6,6 +6,5 @@ num_envs: 1
device: cpu
port: /dev/cu.usbserial-0001
baud: 115200
dt: 0.02 # control loop period (50 Hz)
dt: 0.02 # control loop period (50 Hz, matches training)
no_data_timeout: 2.0 # seconds of silence before declaring disconnect
encoder_jump_threshold: 200 # encoder tick jump → reboot detection

View File

@@ -1,5 +1,5 @@
# System identification defaults.
# Override via CLI: python -m src.sysid.optimize sysid.max_generations=50
# Override via CLI: python scripts/sysid.py optimize --max-generations 50
#
# These are NOT Hydra config groups — the sysid scripts use argparse.
# This file serves as documentation and can be loaded by custom wrappers.
@@ -8,18 +8,25 @@ capture:
port: /dev/cu.usbserial-0001
baud: 115200
duration: 20.0 # seconds
amplitude: 180 # max PWM magnitude (0255)
amplitude: 150 # max PWM magnitude — must match firmware MAX_MOTOR_SPEED
hold_min_ms: 50 # PRBS min hold time
hold_max_ms: 300 # PRBS max hold time
dt: 0.02 # sample period (50 Hz)
optimize:
sigma0: 0.3 # CMA-ES initial step size (in [0,1] normalised space)
population_size: 20 # candidates per generation
max_generations: 200 # total generations (~4000 evaluations)
population_size: 50 # candidates per generation
max_generations: 1000 # total generations (~4000 evaluations)
sim_dt: 0.002 # MuJoCo physics timestep
substeps: 10 # physics substeps per control step (ctrl_dt = 0.02s)
pos_weight: 1.0 # MSE weight for angle errors
vel_weight: 0.1 # MSE weight for velocity errors
window_duration: 0.5 # multiple-shooting window length (s); 0 = open-loop
seed: 42
# Tunable hardware-realism params (added to ROTARY_CARTPOLE_PARAMS):
# ctrl_limit — effective motor range → exported as ctrl_range in robot.yaml
# motor_deadzone — L298N minimum |action| for torque → exported as deadzone in robot.yaml
# Firmware sends raw (unfiltered) sensor data; EMA filtering is
# handled on the Python side (env transforms) and is NOT part of
# the sysid parameter search.

View File

@@ -8,15 +8,17 @@ defaults:
- _self_
hidden_sizes: [256, 256]
total_timesteps: 100000
learning_epochs: 5
learning_rate: 0.001 # conservative — can't undo real-world damage
entropy_loss_scale: 0.0001
log_interval: 1024
total_timesteps: 2000000
learning_epochs: 10
learning_rate: 0.0005 # conservative — can't undo real-world damage
entropy_loss_scale: 0.01
rollout_steps: 2048
mini_batches: 8
log_interval: 2048
checkpoint_interval: 5000 # frequent saves — can't rewind real hardware
initial_log_std: -0.5 # moderate initial exploration
min_log_std: -4.0
max_log_std: 0.0 # cap σ at 1.0
max_log_std: 2.0 # cap σ at 1.0
# Never run real-hardware training remotely
remote: false

View File

@@ -8,7 +8,7 @@ defaults:
- _self_
hidden_sizes: [256, 256]
total_timesteps: 1000000
total_timesteps: 2000000
learning_epochs: 10
learning_rate: 0.0003
entropy_loss_scale: 0.01