♻️ crazy refactor

2026-03-11 22:52:01 +01:00
parent 35223b3560
commit 4115447022
34 changed files with 4255 additions and 102 deletions
--- a/src/sysid/rollout.py
+++ b/src/sysid/rollout.py
@@ -0,0 +1,477 @@
+"""Deterministic simulation replay — roll out recorded actions in MuJoCo.
+
+Given a parameter vector and a recorded action sequence, builds a MuJoCo
+model with overridden physics parameters, replays the actions, and returns
+the simulated trajectory for comparison with the real recording.
+
+This module is the inner loop of the CMA-ES optimizer: it is called once
+per candidate parameter vector per generation.
+"""
+
+from __future__ import annotations
+
+import copy
+import dataclasses
+import math
+import tempfile
+import xml.etree.ElementTree as ET
+from pathlib import Path
+from typing import Any
+
+import mujoco
+import numpy as np
+import yaml
+
+
+# ── Tunable parameter specification ──────────────────────────────────
+
+
+@dataclasses.dataclass
+class ParamSpec:
+    """Specification for a single tunable parameter."""
+
+    name: str
+    default: float
+    lower: float
+    upper: float
+    log_scale: bool = False  # optimise in log-space (masses, inertias)
+
+
+# Default parameter specs for the rotary cartpole.
+# Order matters: the optimizer maps a flat vector to these specs.
+ROTARY_CARTPOLE_PARAMS: list[ParamSpec] = [
+    # ── Arm link (URDF) ──────────────────────────────────────────
+    ParamSpec("arm_mass", 0.010, 0.003, 0.05, log_scale=True),
+    ParamSpec("arm_com_x", 0.00005, -0.02, 0.02),
+    ParamSpec("arm_com_y", 0.0065, -0.01, 0.02),
+    ParamSpec("arm_com_z", 0.00563, -0.01, 0.02),
+    # ── Pendulum link (URDF) ─────────────────────────────────────
+    ParamSpec("pendulum_mass", 0.015, 0.005, 0.05, log_scale=True),
+    ParamSpec("pendulum_com_x", 0.1583, 0.05, 0.25),
+    ParamSpec("pendulum_com_y", -0.0983, -0.20, 0.0),
+    ParamSpec("pendulum_com_z", 0.0, -0.05, 0.05),
+    ParamSpec("pendulum_ixx", 6.16e-06, 1e-07, 1e-04, log_scale=True),
+    ParamSpec("pendulum_iyy", 6.16e-06, 1e-07, 1e-04, log_scale=True),
+    ParamSpec("pendulum_izz", 1.23e-05, 1e-07, 1e-04, log_scale=True),
+    ParamSpec("pendulum_ixy", 6.10e-06, -1e-04, 1e-04),
+    # ── Actuator / joint dynamics (robot.yaml) ───────────────────
+    ParamSpec("actuator_gear", 0.064, 0.01, 0.2, log_scale=True),
+    ParamSpec("actuator_filter_tau", 0.03, 0.005, 0.15),
+    ParamSpec("motor_damping", 0.003, 1e-4, 0.05, log_scale=True),
+    ParamSpec("pendulum_damping", 0.0001, 1e-5, 0.01, log_scale=True),
+    ParamSpec("motor_armature", 0.0001, 1e-5, 0.01, log_scale=True),
+    ParamSpec("motor_frictionloss", 0.03, 0.001, 0.1, log_scale=True),
+]
+
+
+def params_to_dict(
+    values: np.ndarray, specs: list[ParamSpec] | None = None
+) -> dict[str, float]:
+    """Convert a flat parameter vector to a named dict."""
+    if specs is None:
+        specs = ROTARY_CARTPOLE_PARAMS
+    return {s.name: float(values[i]) for i, s in enumerate(specs)}
+
+
+def defaults_vector(specs: list[ParamSpec] | None = None) -> np.ndarray:
+    """Return the default parameter vector (in natural space)."""
+    if specs is None:
+        specs = ROTARY_CARTPOLE_PARAMS
+    return np.array([s.default for s in specs], dtype=np.float64)
+
+
+def bounds_arrays(
+    specs: list[ParamSpec] | None = None,
+) -> tuple[np.ndarray, np.ndarray]:
+    """Return (lower, upper) bound arrays."""
+    if specs is None:
+        specs = ROTARY_CARTPOLE_PARAMS
+    lo = np.array([s.lower for s in specs], dtype=np.float64)
+    hi = np.array([s.upper for s in specs], dtype=np.float64)
+    return lo, hi
+
+
+# ── MuJoCo model building with parameter overrides ──────────────────
+
+
+def _build_model(
+    robot_path: Path,
+    params: dict[str, float],
+) -> mujoco.MjModel:
+    """Build a MuJoCo model from URDF + robot.yaml with parameter overrides.
+
+    Follows the same two-step approach as ``MuJoCoRunner._load_model()``:
+    1. Parse URDF, inject meshdir, load into MuJoCo
+    2. Export MJCF, inject actuators + joint overrides + param overrides, reload
+    """
+    robot_path = Path(robot_path).resolve()
+    robot_yaml = yaml.safe_load((robot_path / "robot.yaml").read_text())
+    urdf_path = robot_path / robot_yaml["urdf"]
+
+    # ── Step 1: Load URDF ────────────────────────────────────────
+    tree = ET.parse(urdf_path)
+    root = tree.getroot()
+
+    # Inject meshdir compiler directive.
+    meshdir = None
+    for mesh_el in root.iter("mesh"):
+        fn = mesh_el.get("filename", "")
+        parent = str(Path(fn).parent)
+        if parent and parent != ".":
+            meshdir = parent
+            break
+    if meshdir:
+        mj_ext = ET.SubElement(root, "mujoco")
+        ET.SubElement(
+            mj_ext, "compiler", attrib={"meshdir": meshdir, "balanceinertia": "true"}
+        )
+
+    # Override URDF inertial parameters BEFORE MuJoCo loading.
+    for link in root.iter("link"):
+        link_name = link.get("name", "")
+        inertial = link.find("inertial")
+        if inertial is None:
+            continue
+
+        if link_name == "arm":
+            _set_mass(inertial, params.get("arm_mass"))
+            _set_com(
+                inertial,
+                params.get("arm_com_x"),
+                params.get("arm_com_y"),
+                params.get("arm_com_z"),
+            )
+
+        elif link_name == "pendulum":
+            _set_mass(inertial, params.get("pendulum_mass"))
+            _set_com(
+                inertial,
+                params.get("pendulum_com_x"),
+                params.get("pendulum_com_y"),
+                params.get("pendulum_com_z"),
+            )
+            _set_inertia(
+                inertial,
+                ixx=params.get("pendulum_ixx"),
+                iyy=params.get("pendulum_iyy"),
+                izz=params.get("pendulum_izz"),
+                ixy=params.get("pendulum_ixy"),
+            )
+
+    # Write temp URDF and load.
+    tmp_urdf = robot_path / "_tmp_sysid_load.urdf"
+    tree.write(str(tmp_urdf), xml_declaration=True, encoding="unicode")
+    try:
+        model_raw = mujoco.MjModel.from_xml_path(str(tmp_urdf))
+    finally:
+        tmp_urdf.unlink(missing_ok=True)
+
+    # ── Step 2: Export MJCF, inject actuators + overrides ────────
+    tmp_mjcf = robot_path / "_tmp_sysid_inject.xml"
+    try:
+        mujoco.mj_saveLastXML(str(tmp_mjcf), model_raw)
+        mjcf_root = ET.fromstring(tmp_mjcf.read_text())
+
+        # Actuator.
+        gear = params.get("actuator_gear", robot_yaml["actuators"][0].get("gear", 0.064))
+        filter_tau = params.get(
+            "actuator_filter_tau",
+            robot_yaml["actuators"][0].get("filter_tau", 0.03),
+        )
+        act_cfg = robot_yaml["actuators"][0]
+        ctrl_lo, ctrl_hi = act_cfg.get("ctrl_range", [-1.0, 1.0])
+
+        act_elem = ET.SubElement(mjcf_root, "actuator")
+        attribs: dict[str, str] = {
+            "name": f"{act_cfg['joint']}_motor",
+            "joint": act_cfg["joint"],
+            "gear": str(gear),
+            "ctrlrange": f"{ctrl_lo} {ctrl_hi}",
+        }
+        if filter_tau > 0:
+            attribs["dyntype"] = "filter"
+            attribs["dynprm"] = str(filter_tau)
+            attribs["gaintype"] = "fixed"
+            attribs["biastype"] = "none"
+            ET.SubElement(act_elem, "general", attrib=attribs)
+        else:
+            ET.SubElement(act_elem, "motor", attrib=attribs)
+
+        # Joint overrides.
+        motor_damping = params.get("motor_damping", 0.003)
+        pend_damping = params.get("pendulum_damping", 0.0001)
+        motor_armature = params.get("motor_armature", 0.0001)
+        motor_frictionloss = params.get("motor_frictionloss", 0.03)
+
+        for body in mjcf_root.iter("body"):
+            for jnt in body.findall("joint"):
+                name = jnt.get("name")
+                if name == "motor_joint":
+                    jnt.set("damping", str(motor_damping))
+                    jnt.set("armature", str(motor_armature))
+                    jnt.set("frictionloss", str(motor_frictionloss))
+                elif name == "pendulum_joint":
+                    jnt.set("damping", str(pend_damping))
+
+        # Disable self-collision.
+        for geom in mjcf_root.iter("geom"):
+            geom.set("contype", "0")
+            geom.set("conaffinity", "0")
+
+        modified_xml = ET.tostring(mjcf_root, encoding="unicode")
+        tmp_mjcf.write_text(modified_xml)
+        model = mujoco.MjModel.from_xml_path(str(tmp_mjcf))
+    finally:
+        tmp_mjcf.unlink(missing_ok=True)
+
+    return model
+
+
+def _set_mass(inertial: ET.Element, mass: float | None) -> None:
+    if mass is None:
+        return
+    mass_el = inertial.find("mass")
+    if mass_el is not None:
+        mass_el.set("value", str(mass))
+
+
+def _set_com(
+    inertial: ET.Element,
+    x: float | None,
+    y: float | None,
+    z: float | None,
+) -> None:
+    origin = inertial.find("origin")
+    if origin is None:
+        return
+    xyz = origin.get("xyz", "0 0 0").split()
+    if x is not None:
+        xyz[0] = str(x)
+    if y is not None:
+        xyz[1] = str(y)
+    if z is not None:
+        xyz[2] = str(z)
+    origin.set("xyz", " ".join(xyz))
+
+
+def _set_inertia(
+    inertial: ET.Element,
+    ixx: float | None = None,
+    iyy: float | None = None,
+    izz: float | None = None,
+    ixy: float | None = None,
+    iyz: float | None = None,
+    ixz: float | None = None,
+) -> None:
+    ine = inertial.find("inertia")
+    if ine is None:
+        return
+    for attr, val in [
+        ("ixx", ixx), ("iyy", iyy), ("izz", izz),
+        ("ixy", ixy), ("iyz", iyz), ("ixz", ixz),
+    ]:
+        if val is not None:
+            ine.set(attr, str(val))
+
+
+# ── Simulation rollout ───────────────────────────────────────────────
+
+
+def rollout(
+    robot_path: str | Path,
+    params: dict[str, float],
+    actions: np.ndarray,
+    timesteps: np.ndarray,
+    sim_dt: float = 0.002,
+    substeps: int = 10,
+) -> dict[str, np.ndarray]:
+    """Replay recorded actions in MuJoCo with overridden parameters.
+
+    Parameters
+    ----------
+    robot_path : asset directory
+    params : named parameter overrides
+    actions : (N,) normalised actions [-1, 1] from the recording
+    timesteps : (N,) wall-clock times (seconds) from the recording
+    sim_dt : MuJoCo physics timestep
+    substeps : physics substeps per control step
+
+    Returns
+    -------
+    dict with keys: motor_angle, motor_vel, pendulum_angle, pendulum_vel
+    Each is an (N,) numpy array of simulated values.
+    """
+    robot_path = Path(robot_path).resolve()
+    model = _build_model(robot_path, params)
+    model.opt.timestep = sim_dt
+    data = mujoco.MjData(model)
+
+    # Start from pendulum hanging down (qpos=0 is down per URDF convention).
+    mujoco.mj_resetData(model, data)
+
+    # Control dt derived from actual recording sample rate.
+    n = len(actions)
+    ctrl_dt = sim_dt * substeps
+
+    # Pre-allocate output.
+    sim_motor_angle = np.zeros(n, dtype=np.float64)
+    sim_motor_vel = np.zeros(n, dtype=np.float64)
+    sim_pend_angle = np.zeros(n, dtype=np.float64)
+    sim_pend_vel = np.zeros(n, dtype=np.float64)
+
+    # Extract actuator limit info for software limit switch.
+    nu = model.nu
+    if nu > 0:
+        jnt_id = model.actuator_trnid[0, 0]
+        jnt_limited = bool(model.jnt_limited[jnt_id])
+        jnt_lo = model.jnt_range[jnt_id, 0]
+        jnt_hi = model.jnt_range[jnt_id, 1]
+        gear_sign = float(np.sign(model.actuator_gear[0, 0]))
+    else:
+        jnt_limited = False
+        jnt_lo = jnt_hi = gear_sign = 0.0
+
+    for i in range(n):
+        data.ctrl[0] = actions[i]
+
+        for _ in range(substeps):
+            # Software limit switch (mirrors MuJoCoRunner).
+            if jnt_limited and nu > 0:
+                pos = data.qpos[jnt_id]
+                if pos >= jnt_hi and gear_sign * data.ctrl[0] > 0:
+                    data.ctrl[0] = 0.0
+                elif pos <= jnt_lo and gear_sign * data.ctrl[0] < 0:
+                    data.ctrl[0] = 0.0
+            mujoco.mj_step(model, data)
+
+        sim_motor_angle[i] = data.qpos[0]
+        sim_motor_vel[i] = data.qvel[0]
+        sim_pend_angle[i] = data.qpos[1]
+        sim_pend_vel[i] = data.qvel[1]
+
+    return {
+        "motor_angle": sim_motor_angle,
+        "motor_vel": sim_motor_vel,
+        "pendulum_angle": sim_pend_angle,
+        "pendulum_vel": sim_pend_vel,
+    }
+
+
+def windowed_rollout(
+    robot_path: str | Path,
+    params: dict[str, float],
+    recording: dict[str, np.ndarray],
+    window_duration: float = 0.5,
+    sim_dt: float = 0.002,
+    substeps: int = 10,
+) -> dict[str, np.ndarray | float]:
+    """Multiple-shooting rollout — split recording into short windows.
+
+    For each window:
+      1. Initialize MuJoCo state from the real qpos/qvel at the window start.
+      2. Replay the recorded actions within the window.
+      3. Record the simulated output.
+
+    This prevents error accumulation across the full trajectory, giving
+    a much smoother cost landscape for the optimizer.
+
+    Parameters
+    ----------
+    robot_path : asset directory
+    params : named parameter overrides
+    recording : dict with keys time, action, motor_angle, motor_vel,
+                pendulum_angle, pendulum_vel (all 1D arrays of length N)
+    window_duration : length of each shooting window in seconds
+    sim_dt : MuJoCo physics timestep
+    substeps : physics substeps per control step
+
+    Returns
+    -------
+    dict with:
+        motor_angle, motor_vel, pendulum_angle, pendulum_vel — (N,) arrays
+            (stitched from per-window simulations)
+        n_windows — number of windows used
+    """
+    robot_path = Path(robot_path).resolve()
+    model = _build_model(robot_path, params)
+    model.opt.timestep = sim_dt
+    data = mujoco.MjData(model)
+
+    times = recording["time"]
+    actions = recording["action"]
+    real_motor = recording["motor_angle"]
+    real_motor_vel = recording["motor_vel"]
+    real_pend = recording["pendulum_angle"]
+    real_pend_vel = recording["pendulum_vel"]
+    n = len(actions)
+
+    # Pre-allocate output (stitched from all windows).
+    sim_motor_angle = np.zeros(n, dtype=np.float64)
+    sim_motor_vel = np.zeros(n, dtype=np.float64)
+    sim_pend_angle = np.zeros(n, dtype=np.float64)
+    sim_pend_vel = np.zeros(n, dtype=np.float64)
+
+    # Extract actuator limit info.
+    nu = model.nu
+    if nu > 0:
+        jnt_id = model.actuator_trnid[0, 0]
+        jnt_limited = bool(model.jnt_limited[jnt_id])
+        jnt_lo = model.jnt_range[jnt_id, 0]
+        jnt_hi = model.jnt_range[jnt_id, 1]
+        gear_sign = float(np.sign(model.actuator_gear[0, 0]))
+    else:
+        jnt_limited = False
+        jnt_lo = jnt_hi = gear_sign = 0.0
+
+    # Compute window boundaries from recording timestamps.
+    t0 = times[0]
+    t_end = times[-1]
+    window_starts: list[int] = []  # indices into the recording
+    current_t = t0
+    while current_t < t_end:
+        # Find the index closest to current_t.
+        idx = int(np.searchsorted(times, current_t))
+        idx = min(idx, n - 1)
+        window_starts.append(idx)
+        current_t += window_duration
+
+    n_windows = len(window_starts)
+
+    for w, w_start in enumerate(window_starts):
+        # Window end: next window start, or end of recording.
+        w_end = window_starts[w + 1] if w + 1 < n_windows else n
+
+        # Initialize MuJoCo state from real data at window start.
+        mujoco.mj_resetData(model, data)
+        data.qpos[0] = real_motor[w_start]
+        data.qpos[1] = real_pend[w_start]
+        data.qvel[0] = real_motor_vel[w_start]
+        data.qvel[1] = real_pend_vel[w_start]
+        data.ctrl[:] = 0.0
+        # Forward kinematics to make state consistent.
+        mujoco.mj_forward(model, data)
+
+        for i in range(w_start, w_end):
+            data.ctrl[0] = actions[i]
+
+            for _ in range(substeps):
+                if jnt_limited and nu > 0:
+                    pos = data.qpos[jnt_id]
+                    if pos >= jnt_hi and gear_sign * data.ctrl[0] > 0:
+                        data.ctrl[0] = 0.0
+                    elif pos <= jnt_lo and gear_sign * data.ctrl[0] < 0:
+                        data.ctrl[0] = 0.0
+                mujoco.mj_step(model, data)
+
+            sim_motor_angle[i] = data.qpos[0]
+            sim_motor_vel[i] = data.qvel[0]
+            sim_pend_angle[i] = data.qpos[1]
+            sim_pend_vel[i] = data.qvel[1]
+
+    return {
+        "motor_angle": sim_motor_angle,
+        "motor_vel": sim_motor_vel,
+        "pendulum_angle": sim_pend_angle,
+        "pendulum_vel": sim_pend_vel,
+        "n_windows": n_windows,
+    }