feat: sim2real domain randomization + reward fixes for rotary cartpole
Close the sim2real gap for the Furuta pendulum (swings up but can't balance on hardware). Root causes were (a) no domain randomization, so the policy overfit one deterministic sim instance, and (b) reward design flaws that produced degenerate policies. Domain randomization (runner-level, backend-agnostic): - BaseRunner: domain_rand config; per-env action-delay buffer (latency), Gaussian qpos/qvel sensor noise, per-env dynamics-scale sampling (friction/damping/torque), resampled per episode. Sensor noise per step. - privileged_obs/privileged_dim expose normalized DR factors (mu) for RMA. - step() now uses clean state for reward/termination, noisy state for the observation the policy sees. - MuJoCoRunner: applies per-env friction/damping/torque scales. - robot.py: compute_motor_force gains friction/damping scale args. - Configs: DR blocks for mujoco (full) and mjx (delay+noise); clean defaults for mujoco_single/serial; noise/delay anchored to recordings. Reward fixes (rotary_cartpole): - Shift upright reward to [0,1] (was [-1,1]) + alive_bonus, so surviving always beats ending early (kills the "suicide into the limit" policy). - Add balance_bonus * upright * stillness so reward requires upright AND near-zero pendulum velocity (kills the "spin in full loops" policy). Deploy: - eval.py load_policy reconstructs the history/adaptation encoder (auto-detects its dim from the checkpoint) so DR+embedding policies load. Fixes: - MuJoCoRunner._sim_reset referenced self._env (typo) -> self.env, which was breaking every rotary-cartpole reset. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -4,14 +4,21 @@ Minimises the trajectory-matching cost between a MuJoCo rollout and a
|
||||
recorded real-robot sequence. Uses the ``cmaes`` package (pure-Python
|
||||
CMA-ES with native box-constraint support).
|
||||
|
||||
All 28 parameters (motor + pendulum/arm) are optimised jointly from a
|
||||
single full-system recording. Velocities are optionally preprocessed
|
||||
with Savitzky-Golay differentiation for cleaner targets.
|
||||
Motor parameters are **locked** from the motor-only sysid — only
|
||||
pendulum/arm inertial parameters, joint dynamics, and ctrl_limit are
|
||||
optimised. Velocities are optionally preprocessed with Savitzky-Golay
|
||||
differentiation for cleaner targets.
|
||||
|
||||
Usage:
|
||||
python -m src.sysid.optimize \
|
||||
--robot-path assets/rotary_cartpole \
|
||||
--recording assets/rotary_cartpole/recordings/capture_....npz
|
||||
--recording assets/rotary_cartpole/recordings/capture_20260314_000435.npz
|
||||
|
||||
# Shorter run for testing:
|
||||
python -m src.sysid.optimize \
|
||||
--robot-path assets/rotary_cartpole \
|
||||
--recording <file>.npz \
|
||||
--max-generations 10 --population-size 8
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
@@ -26,17 +33,13 @@ import numpy as np
|
||||
import structlog
|
||||
|
||||
from src.sysid.rollout import (
|
||||
LOCKED_MOTOR_PARAMS,
|
||||
PARAM_SETS,
|
||||
ROTARY_CARTPOLE_PARAMS,
|
||||
ParamSpec,
|
||||
_make_actuator,
|
||||
_resolve_params,
|
||||
bounds_arrays,
|
||||
build_base_model,
|
||||
defaults_dict,
|
||||
defaults_vector,
|
||||
params_to_dict,
|
||||
patch_model,
|
||||
rollout,
|
||||
windowed_rollout,
|
||||
)
|
||||
@@ -45,8 +48,62 @@ log = structlog.get_logger()
|
||||
|
||||
|
||||
# ── Velocity preprocessing ───────────────────────────────────────────
|
||||
# Shared implementation in src.sysid.preprocess.
|
||||
from src.sysid.preprocess import preprocess_recording as _preprocess_recording
|
||||
|
||||
|
||||
def _preprocess_recording(
|
||||
recording: dict[str, np.ndarray],
|
||||
preprocess_vel: bool = True,
|
||||
sg_window: int = 7,
|
||||
sg_polyorder: int = 3,
|
||||
) -> dict[str, np.ndarray]:
|
||||
"""Optionally recompute velocities using Savitzky-Golay differentiation.
|
||||
|
||||
Applies SG filtering to both motor_vel and pendulum_vel, replacing
|
||||
the noisy firmware finite-difference velocities with smooth
|
||||
analytical derivatives of the (clean) angle signals.
|
||||
"""
|
||||
if not preprocess_vel:
|
||||
return recording
|
||||
|
||||
from scipy.signal import savgol_filter
|
||||
|
||||
rec = dict(recording)
|
||||
times = rec["time"]
|
||||
dt = float(np.mean(np.diff(times)))
|
||||
|
||||
# Motor velocity.
|
||||
rec["motor_vel_raw"] = rec["motor_vel"].copy()
|
||||
rec["motor_vel"] = savgol_filter(
|
||||
rec["motor_angle"],
|
||||
window_length=sg_window,
|
||||
polyorder=sg_polyorder,
|
||||
deriv=1,
|
||||
delta=dt,
|
||||
)
|
||||
|
||||
# Pendulum velocity.
|
||||
rec["pendulum_vel_raw"] = rec["pendulum_vel"].copy()
|
||||
rec["pendulum_vel"] = savgol_filter(
|
||||
rec["pendulum_angle"],
|
||||
window_length=sg_window,
|
||||
polyorder=sg_polyorder,
|
||||
deriv=1,
|
||||
delta=dt,
|
||||
)
|
||||
|
||||
motor_noise = np.std(rec["motor_vel_raw"] - rec["motor_vel"])
|
||||
pend_noise = np.std(rec["pendulum_vel_raw"] - rec["pendulum_vel"])
|
||||
log.info(
|
||||
"velocity_preprocessed",
|
||||
method="savgol",
|
||||
sg_window=sg_window,
|
||||
sg_polyorder=sg_polyorder,
|
||||
dt_ms=f"{dt*1000:.1f}",
|
||||
motor_noise_std=f"{motor_noise:.3f} rad/s",
|
||||
pend_noise_std=f"{pend_noise:.3f} rad/s",
|
||||
)
|
||||
|
||||
return rec
|
||||
|
||||
|
||||
# ── Cost function ────────────────────────────────────────────────────
|
||||
@@ -115,6 +172,7 @@ def cost_function(
|
||||
vel_weight: float = 0.1,
|
||||
pendulum_scale: float = 3.0,
|
||||
window_duration: float = 0.5,
|
||||
motor_params: dict[str, float] | None = None,
|
||||
) -> float:
|
||||
"""Compute trajectory-matching cost for a candidate parameter vector.
|
||||
|
||||
@@ -140,6 +198,7 @@ def cost_function(
|
||||
window_duration=window_duration,
|
||||
sim_dt=sim_dt,
|
||||
substeps=substeps,
|
||||
motor_params=motor_params,
|
||||
)
|
||||
else:
|
||||
sim = rollout(
|
||||
@@ -148,6 +207,7 @@ def cost_function(
|
||||
actions=recording["action"],
|
||||
sim_dt=sim_dt,
|
||||
substeps=substeps,
|
||||
motor_params=motor_params,
|
||||
)
|
||||
except Exception as exc:
|
||||
log.warning("rollout_failed", error=str(exc))
|
||||
@@ -161,122 +221,6 @@ def cost_function(
|
||||
return _compute_trajectory_cost(sim, recording, pos_weight, vel_weight, pendulum_scale)
|
||||
|
||||
|
||||
# ── Fast cost (model-cached) ────────────────────────────────────────
|
||||
|
||||
|
||||
def _fast_cost(
|
||||
params_vec: np.ndarray,
|
||||
recording: dict[str, np.ndarray],
|
||||
model, # mujoco.MjModel (cached, mutated in-place)
|
||||
body_ids: dict[str, int],
|
||||
dof_ids: dict[str, int],
|
||||
specs: list[ParamSpec],
|
||||
sim_dt: float = 0.002,
|
||||
substeps: int = 10,
|
||||
pos_weight: float = 1.0,
|
||||
vel_weight: float = 0.1,
|
||||
pendulum_scale: float = 3.0,
|
||||
window_duration: float = 0.5,
|
||||
) -> float:
|
||||
"""Like cost_function but patches a cached model instead of rebuilding."""
|
||||
import mujoco as _mj
|
||||
from src.runners.mujoco import ActuatorLimits
|
||||
|
||||
params = params_to_dict(params_vec, specs)
|
||||
if not _check_inertia_valid(params):
|
||||
return 1e6
|
||||
|
||||
# Patch the cached model in-place — no XML, no temp files.
|
||||
patch_model(model, params, body_ids, dof_ids)
|
||||
|
||||
# Rebuild actuator from params (motor params change per candidate).
|
||||
actuator = _make_actuator(params)
|
||||
|
||||
try:
|
||||
data = _mj.MjData(model)
|
||||
limits = ActuatorLimits(model)
|
||||
ctrl_limit = params.get("ctrl_limit", 0.588)
|
||||
|
||||
times = recording["time"]
|
||||
actions = recording["action"]
|
||||
n = len(actions)
|
||||
|
||||
sim_motor_angle = np.zeros(n, dtype=np.float64)
|
||||
sim_motor_vel = np.zeros(n, dtype=np.float64)
|
||||
sim_pend_angle = np.zeros(n, dtype=np.float64)
|
||||
sim_pend_vel = np.zeros(n, dtype=np.float64)
|
||||
|
||||
if window_duration > 0:
|
||||
# Windowed (multiple shooting).
|
||||
real_motor = recording["motor_angle"]
|
||||
real_motor_vel = recording["motor_vel"]
|
||||
real_pend = recording["pendulum_angle"]
|
||||
real_pend_vel = recording["pendulum_vel"]
|
||||
|
||||
t0, t_end = times[0], times[-1]
|
||||
window_starts: list[int] = []
|
||||
current_t = t0
|
||||
while current_t < t_end:
|
||||
idx = int(np.searchsorted(times, current_t))
|
||||
idx = min(idx, n - 1)
|
||||
window_starts.append(idx)
|
||||
current_t += window_duration
|
||||
n_windows = len(window_starts)
|
||||
|
||||
for w, w_start in enumerate(window_starts):
|
||||
w_end = window_starts[w + 1] if w + 1 < n_windows else n
|
||||
_mj.mj_resetData(model, data)
|
||||
data.qpos[0] = real_motor[w_start]
|
||||
data.qpos[1] = real_pend[w_start]
|
||||
data.qvel[0] = real_motor_vel[w_start]
|
||||
data.qvel[1] = real_pend_vel[w_start]
|
||||
data.ctrl[:] = 0.0
|
||||
_mj.mj_forward(model, data)
|
||||
|
||||
for i in range(w_start, w_end):
|
||||
action = max(-ctrl_limit, min(ctrl_limit, float(actions[i])))
|
||||
ctrl = actuator.transform_ctrl(action)
|
||||
data.ctrl[0] = ctrl
|
||||
for _ in range(substeps):
|
||||
limits.enforce(model, data)
|
||||
data.qfrc_applied[0] = actuator.compute_motor_force(data.qvel[0], ctrl)
|
||||
_mj.mj_step(model, data)
|
||||
sim_motor_angle[i] = data.qpos[0]
|
||||
sim_pend_angle[i] = data.qpos[1]
|
||||
sim_motor_vel[i] = data.qvel[0]
|
||||
sim_pend_vel[i] = data.qvel[1]
|
||||
else:
|
||||
# Open-loop single-shot.
|
||||
_mj.mj_resetData(model, data)
|
||||
for i in range(n):
|
||||
action = max(-ctrl_limit, min(ctrl_limit, float(actions[i])))
|
||||
ctrl = actuator.transform_ctrl(action)
|
||||
data.ctrl[0] = ctrl
|
||||
for _ in range(substeps):
|
||||
limits.enforce(model, data)
|
||||
data.qfrc_applied[0] = actuator.compute_motor_force(data.qvel[0], ctrl)
|
||||
_mj.mj_step(model, data)
|
||||
sim_motor_angle[i] = data.qpos[0]
|
||||
sim_pend_angle[i] = data.qpos[1]
|
||||
sim_motor_vel[i] = data.qvel[0]
|
||||
sim_pend_vel[i] = data.qvel[1]
|
||||
|
||||
sim = {
|
||||
"motor_angle": sim_motor_angle,
|
||||
"motor_vel": sim_motor_vel,
|
||||
"pendulum_angle": sim_pend_angle,
|
||||
"pendulum_vel": sim_pend_vel,
|
||||
}
|
||||
except Exception:
|
||||
return 1e6
|
||||
|
||||
for key in sim:
|
||||
if np.any(~np.isfinite(sim[key])):
|
||||
return 1e6
|
||||
|
||||
return _compute_trajectory_cost(sim, recording, pos_weight, vel_weight, pendulum_scale)
|
||||
|
||||
|
||||
# ── Parallel evaluation helper (module-level for pickling) ───────────
|
||||
|
||||
# Shared state set by the parent process before spawning workers.
|
||||
@@ -285,34 +229,22 @@ _par_robot_path: Path = Path(".")
|
||||
_par_specs: list[ParamSpec] = []
|
||||
_par_kwargs: dict = {}
|
||||
|
||||
# Cached model (built once per worker, patched per candidate).
|
||||
_par_model: object = None # mujoco.MjModel
|
||||
_par_body_ids: dict[str, int] = {}
|
||||
_par_dof_ids: dict[str, int] = {}
|
||||
|
||||
|
||||
def _init_worker(recording, robot_path, specs, kwargs):
|
||||
"""Initialiser run once per worker process."""
|
||||
global _par_recording, _par_robot_path, _par_specs, _par_kwargs
|
||||
global _par_model, _par_body_ids, _par_dof_ids
|
||||
_par_recording = recording
|
||||
_par_robot_path = robot_path
|
||||
_par_specs = specs
|
||||
_par_kwargs = kwargs
|
||||
|
||||
# Build base model once — reused for all candidates in this worker.
|
||||
_par_model, _, _par_body_ids, _par_dof_ids = build_base_model(robot_path)
|
||||
_par_model.opt.timestep = kwargs.get("sim_dt", 0.002)
|
||||
|
||||
|
||||
def _eval_candidate(x_natural: np.ndarray) -> float:
|
||||
"""Evaluate a single candidate — called in worker processes."""
|
||||
return _fast_cost(
|
||||
return cost_function(
|
||||
x_natural,
|
||||
_par_recording,
|
||||
_par_model,
|
||||
_par_body_ids,
|
||||
_par_dof_ids,
|
||||
_par_robot_path,
|
||||
_par_specs,
|
||||
**_par_kwargs,
|
||||
)
|
||||
@@ -336,11 +268,13 @@ def optimize(
|
||||
window_duration: float = 0.5,
|
||||
seed: int = 42,
|
||||
preprocess_vel: bool = True,
|
||||
sg_window: int = 7,
|
||||
sg_polyorder: int = 3,
|
||||
) -> dict:
|
||||
"""Run CMA-ES optimisation and return results.
|
||||
|
||||
All parameters (motor + pendulum/arm) are optimised jointly from a
|
||||
single full-system recording.
|
||||
Motor parameters are locked from the motor-only sysid.
|
||||
Only pendulum/arm parameters are optimised.
|
||||
|
||||
Returns a dict with:
|
||||
best_params: dict[str, float]
|
||||
@@ -348,6 +282,7 @@ def optimize(
|
||||
history: list of (generation, best_cost) tuples
|
||||
recording: str (path used)
|
||||
specs: list of param names
|
||||
motor_params: dict of locked motor params
|
||||
"""
|
||||
from cmaes import CMA
|
||||
|
||||
@@ -357,11 +292,23 @@ def optimize(
|
||||
if specs is None:
|
||||
specs = ROTARY_CARTPOLE_PARAMS
|
||||
|
||||
motor_params = dict(LOCKED_MOTOR_PARAMS)
|
||||
log.info(
|
||||
"motor_params_locked",
|
||||
n_params=len(motor_params),
|
||||
gear_avg=f"{(motor_params['actuator_gear_pos'] + motor_params['actuator_gear_neg']) / 2:.4f}",
|
||||
)
|
||||
|
||||
# Load recording.
|
||||
recording = dict(np.load(recording_path))
|
||||
|
||||
# Preprocessing: SG velocity recomputation.
|
||||
recording = _preprocess_recording(recording, preprocess_vel=preprocess_vel)
|
||||
recording = _preprocess_recording(
|
||||
recording,
|
||||
preprocess_vel=preprocess_vel,
|
||||
sg_window=sg_window,
|
||||
sg_polyorder=sg_polyorder,
|
||||
)
|
||||
|
||||
n_samples = len(recording["time"])
|
||||
duration = recording["time"][-1] - recording["time"][0]
|
||||
@@ -429,6 +376,7 @@ def optimize(
|
||||
vel_weight=vel_weight,
|
||||
pendulum_scale=pendulum_scale,
|
||||
window_duration=window_duration,
|
||||
motor_params=motor_params,
|
||||
)
|
||||
|
||||
log.info("parallel_workers", n_workers=n_workers)
|
||||
@@ -480,19 +428,6 @@ def optimize(
|
||||
gen_best=f"{min(c for _, c in solutions):.6f}",
|
||||
)
|
||||
|
||||
# Early stopping: no improvement in last 50 generations.
|
||||
patience = 50
|
||||
if len(history) > patience:
|
||||
old_cost = history[-patience][1]
|
||||
if old_cost - best_cost < old_cost * 0.001:
|
||||
log.info(
|
||||
"early_stopping",
|
||||
gen=gen,
|
||||
best_cost=f"{best_cost:.6f}",
|
||||
stall_gens=patience,
|
||||
)
|
||||
break
|
||||
|
||||
total_time = time.monotonic() - t0
|
||||
|
||||
# Clean up process pool.
|
||||
@@ -506,8 +441,7 @@ def optimize(
|
||||
"cmaes_finished",
|
||||
best_cost=f"{best_cost:.6f}",
|
||||
total_time=f"{total_time:.1f}s",
|
||||
generations=len(history),
|
||||
evaluations=len(history) * population_size,
|
||||
evaluations=max_generations * population_size,
|
||||
)
|
||||
|
||||
# Log parameter comparison.
|
||||
@@ -531,6 +465,7 @@ def optimize(
|
||||
"recording": str(recording_path),
|
||||
"param_names": [s.name for s in specs],
|
||||
"defaults": {s.name: s.default for s in specs},
|
||||
"motor_params": motor_params,
|
||||
"preprocess_vel": preprocess_vel,
|
||||
"timestamp": datetime.now().isoformat(),
|
||||
}
|
||||
@@ -581,12 +516,16 @@ def main() -> None:
|
||||
action="store_true",
|
||||
help="Skip Savitzky-Golay velocity preprocessing",
|
||||
)
|
||||
parser.add_argument("--sg-window", type=int, default=7,
|
||||
help="Savitzky-Golay window length (odd, default 7)")
|
||||
parser.add_argument("--sg-polyorder", type=int, default=3,
|
||||
help="Savitzky-Golay polynomial order (default 3)")
|
||||
parser.add_argument(
|
||||
"--param-set",
|
||||
type=str,
|
||||
default="full",
|
||||
choices=list(PARAM_SETS.keys()),
|
||||
help="Parameter set to optimize: 'full' (28), 'motor' (13), or 'pendulum' (15)",
|
||||
help="Parameter set to optimize: 'reduced' (6 params, fast) or 'full' (15 params)",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
@@ -607,6 +546,8 @@ def main() -> None:
|
||||
window_duration=args.window_duration,
|
||||
seed=args.seed,
|
||||
preprocess_vel=not args.no_preprocess_vel,
|
||||
sg_window=args.sg_window,
|
||||
sg_polyorder=args.sg_polyorder,
|
||||
)
|
||||
|
||||
# Save results JSON.
|
||||
@@ -631,6 +572,7 @@ def main() -> None:
|
||||
export_tuned_files(
|
||||
robot_path=args.robot_path,
|
||||
params=result["best_params"],
|
||||
motor_params=result.get("motor_params"),
|
||||
)
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user