✨ clean up lot of stuff
This commit is contained in:
13
configs/env/rotary_cartpole.yaml
vendored
13
configs/env/rotary_cartpole.yaml
vendored
@@ -1,10 +1,19 @@
|
||||
max_steps: 1000
|
||||
robot_path: assets/rotary_cartpole
|
||||
reward_upright_scale: 1.0
|
||||
speed_penalty_scale: 0.1
|
||||
|
||||
# ── Regularisation penalties (prevent fast spinning) ─────────────────
|
||||
motor_vel_penalty: 0.01 # penalise high motor angular velocity
|
||||
motor_angle_penalty: 0.05 # penalise deviation from centre
|
||||
action_penalty: 0.05 # penalise large actions (energy cost)
|
||||
|
||||
# ── Software safety limit (env-level, always applied) ────────────────
|
||||
motor_angle_limit_deg: 90.0 # terminate episode if motor exceeds ±90°
|
||||
|
||||
# ── HPO search ranges ────────────────────────────────────────────────
|
||||
hpo:
|
||||
reward_upright_scale: {min: 0.5, max: 5.0}
|
||||
speed_penalty_scale: {min: 0.01, max: 1.0}
|
||||
motor_vel_penalty: {min: 0.001, max: 0.1}
|
||||
motor_angle_penalty: {min: 0.01, max: 0.2}
|
||||
action_penalty: {min: 0.01, max: 0.2}
|
||||
max_steps: {values: [500, 1000, 2000]}
|
||||
@@ -1,4 +1,4 @@
|
||||
num_envs: 1024 # MJX shines with many parallel envs
|
||||
device: auto # auto = cuda if available, else cpu
|
||||
dt: 0.002
|
||||
substeps: 20
|
||||
substeps: 10
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
num_envs: 64
|
||||
device: auto # auto = cuda if available, else cpu
|
||||
dt: 0.002
|
||||
substeps: 20
|
||||
substeps: 10
|
||||
|
||||
@@ -6,6 +6,5 @@ num_envs: 1
|
||||
device: cpu
|
||||
port: /dev/cu.usbserial-0001
|
||||
baud: 115200
|
||||
dt: 0.02 # control loop period (50 Hz)
|
||||
dt: 0.02 # control loop period (50 Hz, matches training)
|
||||
no_data_timeout: 2.0 # seconds of silence before declaring disconnect
|
||||
encoder_jump_threshold: 200 # encoder tick jump → reboot detection
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
# System identification defaults.
|
||||
# Override via CLI: python -m src.sysid.optimize sysid.max_generations=50
|
||||
# Override via CLI: python scripts/sysid.py optimize --max-generations 50
|
||||
#
|
||||
# These are NOT Hydra config groups — the sysid scripts use argparse.
|
||||
# This file serves as documentation and can be loaded by custom wrappers.
|
||||
@@ -8,18 +8,25 @@ capture:
|
||||
port: /dev/cu.usbserial-0001
|
||||
baud: 115200
|
||||
duration: 20.0 # seconds
|
||||
amplitude: 180 # max PWM magnitude (0–255)
|
||||
amplitude: 150 # max PWM magnitude — must match firmware MAX_MOTOR_SPEED
|
||||
hold_min_ms: 50 # PRBS min hold time
|
||||
hold_max_ms: 300 # PRBS max hold time
|
||||
dt: 0.02 # sample period (50 Hz)
|
||||
|
||||
optimize:
|
||||
sigma0: 0.3 # CMA-ES initial step size (in [0,1] normalised space)
|
||||
population_size: 20 # candidates per generation
|
||||
max_generations: 200 # total generations (~4000 evaluations)
|
||||
population_size: 50 # candidates per generation
|
||||
max_generations: 1000 # total generations (~4000 evaluations)
|
||||
sim_dt: 0.002 # MuJoCo physics timestep
|
||||
substeps: 10 # physics substeps per control step (ctrl_dt = 0.02s)
|
||||
pos_weight: 1.0 # MSE weight for angle errors
|
||||
vel_weight: 0.1 # MSE weight for velocity errors
|
||||
window_duration: 0.5 # multiple-shooting window length (s); 0 = open-loop
|
||||
seed: 42
|
||||
|
||||
# Tunable hardware-realism params (added to ROTARY_CARTPOLE_PARAMS):
|
||||
# ctrl_limit — effective motor range → exported as ctrl_range in robot.yaml
|
||||
# motor_deadzone — L298N minimum |action| for torque → exported as deadzone in robot.yaml
|
||||
# Firmware sends raw (unfiltered) sensor data; EMA filtering is
|
||||
# handled on the Python side (env transforms) and is NOT part of
|
||||
# the sysid parameter search.
|
||||
|
||||
@@ -8,15 +8,17 @@ defaults:
|
||||
- _self_
|
||||
|
||||
hidden_sizes: [256, 256]
|
||||
total_timesteps: 100000
|
||||
learning_epochs: 5
|
||||
learning_rate: 0.001 # conservative — can't undo real-world damage
|
||||
entropy_loss_scale: 0.0001
|
||||
log_interval: 1024
|
||||
total_timesteps: 2000000
|
||||
learning_epochs: 10
|
||||
learning_rate: 0.0005 # conservative — can't undo real-world damage
|
||||
entropy_loss_scale: 0.01
|
||||
rollout_steps: 2048
|
||||
mini_batches: 8
|
||||
log_interval: 2048
|
||||
checkpoint_interval: 5000 # frequent saves — can't rewind real hardware
|
||||
initial_log_std: -0.5 # moderate initial exploration
|
||||
min_log_std: -4.0
|
||||
max_log_std: 0.0 # cap σ at 1.0
|
||||
max_log_std: 2.0 # cap σ at 1.0
|
||||
|
||||
# Never run real-hardware training remotely
|
||||
remote: false
|
||||
|
||||
@@ -8,7 +8,7 @@ defaults:
|
||||
- _self_
|
||||
|
||||
hidden_sizes: [256, 256]
|
||||
total_timesteps: 1000000
|
||||
total_timesteps: 2000000
|
||||
learning_epochs: 10
|
||||
learning_rate: 0.0003
|
||||
entropy_loss_scale: 0.01
|
||||
|
||||
Reference in New Issue
Block a user