Close the sim2real gap for the Furuta pendulum (swings up but can't balance on hardware). Root causes were (a) no domain randomization, so the policy overfit one deterministic sim instance, and (b) reward design flaws that produced degenerate policies. Domain randomization (runner-level, backend-agnostic): - BaseRunner: domain_rand config; per-env action-delay buffer (latency), Gaussian qpos/qvel sensor noise, per-env dynamics-scale sampling (friction/damping/torque), resampled per episode. Sensor noise per step. - privileged_obs/privileged_dim expose normalized DR factors (mu) for RMA. - step() now uses clean state for reward/termination, noisy state for the observation the policy sees. - MuJoCoRunner: applies per-env friction/damping/torque scales. - robot.py: compute_motor_force gains friction/damping scale args. - Configs: DR blocks for mujoco (full) and mjx (delay+noise); clean defaults for mujoco_single/serial; noise/delay anchored to recordings. Reward fixes (rotary_cartpole): - Shift upright reward to [0,1] (was [-1,1]) + alive_bonus, so surviving always beats ending early (kills the "suicide into the limit" policy). - Add balance_bonus * upright * stillness so reward requires upright AND near-zero pendulum velocity (kills the "spin in full loops" policy). Deploy: - eval.py load_policy reconstructs the history/adaptation encoder (auto-detects its dim from the checkpoint) so DR+embedding policies load. Fixes: - MuJoCoRunner._sim_reset referenced self._env (typo) -> self.env, which was breaking every rotary-cartpole reset. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
581 lines
18 KiB
Python
581 lines
18 KiB
Python
"""CMA-ES optimiser — fit simulation parameters to a real-robot recording.
|
||
|
||
Minimises the trajectory-matching cost between a MuJoCo rollout and a
|
||
recorded real-robot sequence. Uses the ``cmaes`` package (pure-Python
|
||
CMA-ES with native box-constraint support).
|
||
|
||
Motor parameters are **locked** from the motor-only sysid — only
|
||
pendulum/arm inertial parameters, joint dynamics, and ctrl_limit are
|
||
optimised. Velocities are optionally preprocessed with Savitzky-Golay
|
||
differentiation for cleaner targets.
|
||
|
||
Usage:
|
||
python -m src.sysid.optimize \
|
||
--robot-path assets/rotary_cartpole \
|
||
--recording assets/rotary_cartpole/recordings/capture_20260314_000435.npz
|
||
|
||
# Shorter run for testing:
|
||
python -m src.sysid.optimize \
|
||
--robot-path assets/rotary_cartpole \
|
||
--recording <file>.npz \
|
||
--max-generations 10 --population-size 8
|
||
"""
|
||
|
||
from __future__ import annotations
|
||
|
||
import argparse
|
||
import json
|
||
import time
|
||
from datetime import datetime
|
||
from pathlib import Path
|
||
|
||
import numpy as np
|
||
import structlog
|
||
|
||
from src.sysid.rollout import (
|
||
LOCKED_MOTOR_PARAMS,
|
||
PARAM_SETS,
|
||
ROTARY_CARTPOLE_PARAMS,
|
||
ParamSpec,
|
||
bounds_arrays,
|
||
defaults_vector,
|
||
params_to_dict,
|
||
rollout,
|
||
windowed_rollout,
|
||
)
|
||
|
||
log = structlog.get_logger()
|
||
|
||
|
||
# ── Velocity preprocessing ───────────────────────────────────────────
|
||
|
||
|
||
def _preprocess_recording(
|
||
recording: dict[str, np.ndarray],
|
||
preprocess_vel: bool = True,
|
||
sg_window: int = 7,
|
||
sg_polyorder: int = 3,
|
||
) -> dict[str, np.ndarray]:
|
||
"""Optionally recompute velocities using Savitzky-Golay differentiation.
|
||
|
||
Applies SG filtering to both motor_vel and pendulum_vel, replacing
|
||
the noisy firmware finite-difference velocities with smooth
|
||
analytical derivatives of the (clean) angle signals.
|
||
"""
|
||
if not preprocess_vel:
|
||
return recording
|
||
|
||
from scipy.signal import savgol_filter
|
||
|
||
rec = dict(recording)
|
||
times = rec["time"]
|
||
dt = float(np.mean(np.diff(times)))
|
||
|
||
# Motor velocity.
|
||
rec["motor_vel_raw"] = rec["motor_vel"].copy()
|
||
rec["motor_vel"] = savgol_filter(
|
||
rec["motor_angle"],
|
||
window_length=sg_window,
|
||
polyorder=sg_polyorder,
|
||
deriv=1,
|
||
delta=dt,
|
||
)
|
||
|
||
# Pendulum velocity.
|
||
rec["pendulum_vel_raw"] = rec["pendulum_vel"].copy()
|
||
rec["pendulum_vel"] = savgol_filter(
|
||
rec["pendulum_angle"],
|
||
window_length=sg_window,
|
||
polyorder=sg_polyorder,
|
||
deriv=1,
|
||
delta=dt,
|
||
)
|
||
|
||
motor_noise = np.std(rec["motor_vel_raw"] - rec["motor_vel"])
|
||
pend_noise = np.std(rec["pendulum_vel_raw"] - rec["pendulum_vel"])
|
||
log.info(
|
||
"velocity_preprocessed",
|
||
method="savgol",
|
||
sg_window=sg_window,
|
||
sg_polyorder=sg_polyorder,
|
||
dt_ms=f"{dt*1000:.1f}",
|
||
motor_noise_std=f"{motor_noise:.3f} rad/s",
|
||
pend_noise_std=f"{pend_noise:.3f} rad/s",
|
||
)
|
||
|
||
return rec
|
||
|
||
|
||
# ── Cost function ────────────────────────────────────────────────────
|
||
|
||
|
||
def _angle_diff(a: np.ndarray, b: np.ndarray) -> np.ndarray:
|
||
"""Shortest signed angle difference, handling wrapping."""
|
||
return np.arctan2(np.sin(a - b), np.cos(a - b))
|
||
|
||
|
||
def _check_inertia_valid(params: dict[str, float]) -> bool:
|
||
"""Quick reject: pendulum inertia tensor must be positive-definite."""
|
||
ixx = params.get("pendulum_ixx", 6.16e-06)
|
||
iyy = params.get("pendulum_iyy", 6.16e-06)
|
||
izz = params.get("pendulum_izz", 1.23e-05)
|
||
ixy = params.get("pendulum_ixy", 6.10e-06)
|
||
det_xy = ixx * iyy - ixy * ixy
|
||
return det_xy > 0 and ixx > 0 and iyy > 0 and izz > 0
|
||
|
||
|
||
def _compute_trajectory_cost(
|
||
sim: dict[str, np.ndarray],
|
||
recording: dict[str, np.ndarray],
|
||
pos_weight: float = 1.0,
|
||
vel_weight: float = 0.1,
|
||
pendulum_scale: float = 3.0,
|
||
vel_outlier_threshold: float = 20.0,
|
||
) -> float:
|
||
"""Weighted MSE between sim and real trajectories.
|
||
|
||
pendulum_scale multiplies the pendulum terms relative to motor terms.
|
||
|
||
Samples where the *real* pendulum velocity exceeds
|
||
``vel_outlier_threshold`` (rad/s) are excluded from the velocity
|
||
terms. These are encoder-wrap artefacts (pendulum swinging past
|
||
vertical) that no simulator can reproduce.
|
||
"""
|
||
motor_err = _angle_diff(sim["motor_angle"], recording["motor_angle"])
|
||
pend_err = _angle_diff(sim["pendulum_angle"], recording["pendulum_angle"])
|
||
motor_vel_err = sim["motor_vel"] - recording["motor_vel"]
|
||
pend_vel_err = sim["pendulum_vel"] - recording["pendulum_vel"]
|
||
|
||
# Mask out encoder-wrap velocity spikes so the optimizer doesn't
|
||
# waste capacity fitting artefacts.
|
||
valid = np.abs(recording["pendulum_vel"]) < vel_outlier_threshold
|
||
if valid.sum() < len(valid):
|
||
motor_vel_err = motor_vel_err[valid]
|
||
pend_vel_err = pend_vel_err[valid]
|
||
|
||
return float(
|
||
pos_weight * np.mean(motor_err**2)
|
||
+ pos_weight * pendulum_scale * np.mean(pend_err**2)
|
||
+ vel_weight * np.mean(motor_vel_err**2)
|
||
+ vel_weight * pendulum_scale * np.mean(pend_vel_err**2)
|
||
)
|
||
|
||
|
||
def cost_function(
|
||
params_vec: np.ndarray,
|
||
recording: dict[str, np.ndarray],
|
||
robot_path: Path,
|
||
specs: list[ParamSpec],
|
||
sim_dt: float = 0.002,
|
||
substeps: int = 10,
|
||
pos_weight: float = 1.0,
|
||
vel_weight: float = 0.1,
|
||
pendulum_scale: float = 3.0,
|
||
window_duration: float = 0.5,
|
||
motor_params: dict[str, float] | None = None,
|
||
) -> float:
|
||
"""Compute trajectory-matching cost for a candidate parameter vector.
|
||
|
||
Uses **multiple-shooting** (windowed rollout): the recording is split
|
||
into short windows (default 0.5 s). Each window is initialised from
|
||
the real qpos/qvel, so early errors don’t compound across the full
|
||
trajectory. This gives a much smoother cost landscape for CMA-ES.
|
||
|
||
Set ``window_duration=0`` to fall back to the original open-loop
|
||
single-shot rollout (not recommended).
|
||
"""
|
||
params = params_to_dict(params_vec, specs)
|
||
|
||
if not _check_inertia_valid(params):
|
||
return 1e6
|
||
|
||
try:
|
||
if window_duration > 0:
|
||
sim = windowed_rollout(
|
||
robot_path=robot_path,
|
||
params=params,
|
||
recording=recording,
|
||
window_duration=window_duration,
|
||
sim_dt=sim_dt,
|
||
substeps=substeps,
|
||
motor_params=motor_params,
|
||
)
|
||
else:
|
||
sim = rollout(
|
||
robot_path=robot_path,
|
||
params=params,
|
||
actions=recording["action"],
|
||
sim_dt=sim_dt,
|
||
substeps=substeps,
|
||
motor_params=motor_params,
|
||
)
|
||
except Exception as exc:
|
||
log.warning("rollout_failed", error=str(exc))
|
||
return 1e6
|
||
|
||
# Check for NaN in sim output.
|
||
for key in ("motor_angle", "motor_vel", "pendulum_angle", "pendulum_vel"):
|
||
if np.any(~np.isfinite(sim[key])):
|
||
return 1e6
|
||
|
||
return _compute_trajectory_cost(sim, recording, pos_weight, vel_weight, pendulum_scale)
|
||
|
||
|
||
# ── Parallel evaluation helper (module-level for pickling) ───────────
|
||
|
||
# Shared state set by the parent process before spawning workers.
|
||
_par_recording: dict[str, np.ndarray] = {}
|
||
_par_robot_path: Path = Path(".")
|
||
_par_specs: list[ParamSpec] = []
|
||
_par_kwargs: dict = {}
|
||
|
||
|
||
def _init_worker(recording, robot_path, specs, kwargs):
|
||
"""Initialiser run once per worker process."""
|
||
global _par_recording, _par_robot_path, _par_specs, _par_kwargs
|
||
_par_recording = recording
|
||
_par_robot_path = robot_path
|
||
_par_specs = specs
|
||
_par_kwargs = kwargs
|
||
|
||
|
||
def _eval_candidate(x_natural: np.ndarray) -> float:
|
||
"""Evaluate a single candidate — called in worker processes."""
|
||
return cost_function(
|
||
x_natural,
|
||
_par_recording,
|
||
_par_robot_path,
|
||
_par_specs,
|
||
**_par_kwargs,
|
||
)
|
||
|
||
|
||
# ── CMA-ES optimisation loop ────────────────────────────────────────
|
||
|
||
|
||
def optimize(
|
||
robot_path: str | Path,
|
||
recording_path: str | Path,
|
||
specs: list[ParamSpec] | None = None,
|
||
sigma0: float = 0.3,
|
||
population_size: int = 20,
|
||
max_generations: int = 1000,
|
||
sim_dt: float = 0.002,
|
||
substeps: int = 10,
|
||
pos_weight: float = 1.0,
|
||
vel_weight: float = 0.1,
|
||
pendulum_scale: float = 3.0,
|
||
window_duration: float = 0.5,
|
||
seed: int = 42,
|
||
preprocess_vel: bool = True,
|
||
sg_window: int = 7,
|
||
sg_polyorder: int = 3,
|
||
) -> dict:
|
||
"""Run CMA-ES optimisation and return results.
|
||
|
||
Motor parameters are locked from the motor-only sysid.
|
||
Only pendulum/arm parameters are optimised.
|
||
|
||
Returns a dict with:
|
||
best_params: dict[str, float]
|
||
best_cost: float
|
||
history: list of (generation, best_cost) tuples
|
||
recording: str (path used)
|
||
specs: list of param names
|
||
motor_params: dict of locked motor params
|
||
"""
|
||
from cmaes import CMA
|
||
|
||
robot_path = Path(robot_path).resolve()
|
||
recording_path = Path(recording_path).resolve()
|
||
|
||
if specs is None:
|
||
specs = ROTARY_CARTPOLE_PARAMS
|
||
|
||
motor_params = dict(LOCKED_MOTOR_PARAMS)
|
||
log.info(
|
||
"motor_params_locked",
|
||
n_params=len(motor_params),
|
||
gear_avg=f"{(motor_params['actuator_gear_pos'] + motor_params['actuator_gear_neg']) / 2:.4f}",
|
||
)
|
||
|
||
# Load recording.
|
||
recording = dict(np.load(recording_path))
|
||
|
||
# Preprocessing: SG velocity recomputation.
|
||
recording = _preprocess_recording(
|
||
recording,
|
||
preprocess_vel=preprocess_vel,
|
||
sg_window=sg_window,
|
||
sg_polyorder=sg_polyorder,
|
||
)
|
||
|
||
n_samples = len(recording["time"])
|
||
duration = recording["time"][-1] - recording["time"][0]
|
||
n_windows = max(1, int(duration / window_duration)) if window_duration > 0 else 1
|
||
log.info(
|
||
"recording_loaded",
|
||
path=str(recording_path),
|
||
samples=n_samples,
|
||
duration=f"{duration:.1f}s",
|
||
window_duration=f"{window_duration}s",
|
||
n_windows=n_windows,
|
||
)
|
||
|
||
# Initial point (defaults) — normalised to [0, 1] for CMA-ES.
|
||
lo, hi = bounds_arrays(specs)
|
||
x0 = defaults_vector(specs)
|
||
|
||
# Normalise to [0, 1] for the optimizer (better conditioned).
|
||
span = hi - lo
|
||
span[span == 0] = 1.0 # avoid division by zero
|
||
|
||
def to_normed(x: np.ndarray) -> np.ndarray:
|
||
return (x - lo) / span
|
||
|
||
def from_normed(x_n: np.ndarray) -> np.ndarray:
|
||
return x_n * span + lo
|
||
|
||
x0_normed = to_normed(x0)
|
||
bounds_normed = np.column_stack(
|
||
[np.zeros(len(specs)), np.ones(len(specs))]
|
||
)
|
||
|
||
optimizer = CMA(
|
||
mean=x0_normed,
|
||
sigma=sigma0,
|
||
bounds=bounds_normed,
|
||
population_size=population_size,
|
||
seed=seed,
|
||
)
|
||
|
||
best_cost = float("inf")
|
||
best_params_vec = x0.copy()
|
||
history: list[tuple[int, float]] = []
|
||
|
||
log.info(
|
||
"cmaes_starting",
|
||
n_params=len(specs),
|
||
population=population_size,
|
||
max_gens=max_generations,
|
||
sigma0=sigma0,
|
||
)
|
||
|
||
t0 = time.monotonic()
|
||
|
||
# ── Parallel evaluation setup ────────────────────────────────
|
||
# Each candidate is independent — evaluate them in parallel using
|
||
# a process pool. Falls back to sequential if n_workers=1.
|
||
import multiprocessing as mp
|
||
n_workers = max(1, mp.cpu_count() - 1) # leave 1 core free
|
||
|
||
eval_kwargs = dict(
|
||
sim_dt=sim_dt,
|
||
substeps=substeps,
|
||
pos_weight=pos_weight,
|
||
vel_weight=vel_weight,
|
||
pendulum_scale=pendulum_scale,
|
||
window_duration=window_duration,
|
||
motor_params=motor_params,
|
||
)
|
||
|
||
log.info("parallel_workers", n_workers=n_workers)
|
||
|
||
# Create a persistent pool (avoids per-generation fork overhead).
|
||
pool = None
|
||
if n_workers > 1:
|
||
pool = mp.Pool(
|
||
n_workers,
|
||
initializer=_init_worker,
|
||
initargs=(recording, robot_path, specs, eval_kwargs),
|
||
)
|
||
|
||
for gen in range(max_generations):
|
||
# Ask all candidates first.
|
||
candidates_normed = []
|
||
candidates_natural = []
|
||
for _ in range(optimizer.population_size):
|
||
x_normed = optimizer.ask()
|
||
x_natural = from_normed(x_normed)
|
||
x_natural = np.clip(x_natural, lo, hi)
|
||
candidates_normed.append(x_normed)
|
||
candidates_natural.append(x_natural)
|
||
|
||
# Evaluate in parallel.
|
||
if pool is not None:
|
||
costs = pool.map(_eval_candidate, candidates_natural)
|
||
else:
|
||
costs = [cost_function(
|
||
x, recording, robot_path, specs, **eval_kwargs
|
||
) for x in candidates_natural]
|
||
|
||
solutions = list(zip(candidates_normed, costs))
|
||
for x_natural, c in zip(candidates_natural, costs):
|
||
if c < best_cost:
|
||
best_cost = c
|
||
best_params_vec = x_natural.copy()
|
||
|
||
optimizer.tell(solutions)
|
||
history.append((gen, best_cost))
|
||
|
||
elapsed = time.monotonic() - t0
|
||
if gen % 5 == 0 or gen == max_generations - 1:
|
||
log.info(
|
||
"cmaes_generation",
|
||
gen=gen,
|
||
best_cost=f"{best_cost:.6f}",
|
||
elapsed=f"{elapsed:.1f}s",
|
||
gen_best=f"{min(c for _, c in solutions):.6f}",
|
||
)
|
||
|
||
total_time = time.monotonic() - t0
|
||
|
||
# Clean up process pool.
|
||
if pool is not None:
|
||
pool.close()
|
||
pool.join()
|
||
|
||
best_params = params_to_dict(best_params_vec, specs)
|
||
|
||
log.info(
|
||
"cmaes_finished",
|
||
best_cost=f"{best_cost:.6f}",
|
||
total_time=f"{total_time:.1f}s",
|
||
evaluations=max_generations * population_size,
|
||
)
|
||
|
||
# Log parameter comparison.
|
||
defaults = params_to_dict(defaults_vector(specs), specs)
|
||
for name in best_params:
|
||
d = defaults[name]
|
||
b = best_params[name]
|
||
change_pct = ((b - d) / abs(d) * 100) if abs(d) > 1e-12 else 0.0
|
||
log.info(
|
||
"param_result",
|
||
name=name,
|
||
default=f"{d:.6g}",
|
||
tuned=f"{b:.6g}",
|
||
change=f"{change_pct:+.1f}%",
|
||
)
|
||
|
||
return {
|
||
"best_params": best_params,
|
||
"best_cost": best_cost,
|
||
"history": history,
|
||
"recording": str(recording_path),
|
||
"param_names": [s.name for s in specs],
|
||
"defaults": {s.name: s.default for s in specs},
|
||
"motor_params": motor_params,
|
||
"preprocess_vel": preprocess_vel,
|
||
"timestamp": datetime.now().isoformat(),
|
||
}
|
||
|
||
|
||
# ── CLI entry point ──────────────────────────────────────────────────
|
||
|
||
|
||
def main() -> None:
|
||
parser = argparse.ArgumentParser(
|
||
description="Fit simulation parameters to a real-robot recording (CMA-ES)."
|
||
)
|
||
parser.add_argument(
|
||
"--robot-path",
|
||
type=str,
|
||
default="assets/rotary_cartpole",
|
||
help="Path to robot asset directory",
|
||
)
|
||
parser.add_argument(
|
||
"--recording",
|
||
type=str,
|
||
required=True,
|
||
help="Path to .npz recording file",
|
||
)
|
||
parser.add_argument("--sigma0", type=float, default=0.3)
|
||
parser.add_argument("--population-size", type=int, default=20)
|
||
parser.add_argument("--max-generations", type=int, default=200)
|
||
parser.add_argument("--sim-dt", type=float, default=0.002)
|
||
parser.add_argument("--substeps", type=int, default=10)
|
||
parser.add_argument("--pos-weight", type=float, default=1.0)
|
||
parser.add_argument("--vel-weight", type=float, default=0.1)
|
||
parser.add_argument("--pendulum-scale", type=float, default=3.0,
|
||
help="Multiplier for pendulum terms relative to motor (default 3.0)")
|
||
parser.add_argument(
|
||
"--window-duration",
|
||
type=float,
|
||
default=0.5,
|
||
help="Shooting window length in seconds (0 = open-loop, default 0.5)",
|
||
)
|
||
parser.add_argument("--seed", type=int, default=42)
|
||
parser.add_argument(
|
||
"--no-export",
|
||
action="store_true",
|
||
help="Skip exporting tuned files (results JSON only)",
|
||
)
|
||
parser.add_argument(
|
||
"--no-preprocess-vel",
|
||
action="store_true",
|
||
help="Skip Savitzky-Golay velocity preprocessing",
|
||
)
|
||
parser.add_argument("--sg-window", type=int, default=7,
|
||
help="Savitzky-Golay window length (odd, default 7)")
|
||
parser.add_argument("--sg-polyorder", type=int, default=3,
|
||
help="Savitzky-Golay polynomial order (default 3)")
|
||
parser.add_argument(
|
||
"--param-set",
|
||
type=str,
|
||
default="full",
|
||
choices=list(PARAM_SETS.keys()),
|
||
help="Parameter set to optimize: 'reduced' (6 params, fast) or 'full' (15 params)",
|
||
)
|
||
args = parser.parse_args()
|
||
|
||
specs = PARAM_SETS[args.param_set]
|
||
|
||
result = optimize(
|
||
robot_path=args.robot_path,
|
||
recording_path=args.recording,
|
||
specs=specs,
|
||
sigma0=args.sigma0,
|
||
population_size=args.population_size,
|
||
max_generations=args.max_generations,
|
||
sim_dt=args.sim_dt,
|
||
substeps=args.substeps,
|
||
pos_weight=args.pos_weight,
|
||
vel_weight=args.vel_weight,
|
||
pendulum_scale=args.pendulum_scale,
|
||
window_duration=args.window_duration,
|
||
seed=args.seed,
|
||
preprocess_vel=not args.no_preprocess_vel,
|
||
sg_window=args.sg_window,
|
||
sg_polyorder=args.sg_polyorder,
|
||
)
|
||
|
||
# Save results JSON.
|
||
robot_path = Path(args.robot_path).resolve()
|
||
result_path = robot_path / "sysid_result.json"
|
||
# Convert numpy types for JSON serialisation.
|
||
result_json = {
|
||
k: v for k, v in result.items() if k != "history"
|
||
}
|
||
result_json["history_summary"] = {
|
||
"first_cost": result["history"][0][1] if result["history"] else None,
|
||
"final_cost": result["history"][-1][1] if result["history"] else None,
|
||
"generations": len(result["history"]),
|
||
}
|
||
result_path.write_text(json.dumps(result_json, indent=2, default=str))
|
||
log.info("results_saved", path=str(result_path))
|
||
|
||
# Export tuned files unless --no-export.
|
||
if not args.no_export:
|
||
from src.sysid.export import export_tuned_files
|
||
|
||
export_tuned_files(
|
||
robot_path=args.robot_path,
|
||
params=result["best_params"],
|
||
motor_params=result.get("motor_params"),
|
||
)
|
||
|
||
|
||
if __name__ == "__main__":
|
||
main()
|