♻️ crazy refactor

This commit is contained in:
2026-03-11 22:52:01 +01:00
parent 35223b3560
commit 4115447022
34 changed files with 4255 additions and 102 deletions

477
src/sysid/rollout.py Normal file
View File

@@ -0,0 +1,477 @@
"""Deterministic simulation replay — roll out recorded actions in MuJoCo.
Given a parameter vector and a recorded action sequence, builds a MuJoCo
model with overridden physics parameters, replays the actions, and returns
the simulated trajectory for comparison with the real recording.
This module is the inner loop of the CMA-ES optimizer: it is called once
per candidate parameter vector per generation.
"""
from __future__ import annotations
import copy
import dataclasses
import math
import tempfile
import xml.etree.ElementTree as ET
from pathlib import Path
from typing import Any
import mujoco
import numpy as np
import yaml
# ── Tunable parameter specification ──────────────────────────────────
@dataclasses.dataclass
class ParamSpec:
"""Specification for a single tunable parameter."""
name: str
default: float
lower: float
upper: float
log_scale: bool = False # optimise in log-space (masses, inertias)
# Default parameter specs for the rotary cartpole.
# Order matters: the optimizer maps a flat vector to these specs.
ROTARY_CARTPOLE_PARAMS: list[ParamSpec] = [
# ── Arm link (URDF) ──────────────────────────────────────────
ParamSpec("arm_mass", 0.010, 0.003, 0.05, log_scale=True),
ParamSpec("arm_com_x", 0.00005, -0.02, 0.02),
ParamSpec("arm_com_y", 0.0065, -0.01, 0.02),
ParamSpec("arm_com_z", 0.00563, -0.01, 0.02),
# ── Pendulum link (URDF) ─────────────────────────────────────
ParamSpec("pendulum_mass", 0.015, 0.005, 0.05, log_scale=True),
ParamSpec("pendulum_com_x", 0.1583, 0.05, 0.25),
ParamSpec("pendulum_com_y", -0.0983, -0.20, 0.0),
ParamSpec("pendulum_com_z", 0.0, -0.05, 0.05),
ParamSpec("pendulum_ixx", 6.16e-06, 1e-07, 1e-04, log_scale=True),
ParamSpec("pendulum_iyy", 6.16e-06, 1e-07, 1e-04, log_scale=True),
ParamSpec("pendulum_izz", 1.23e-05, 1e-07, 1e-04, log_scale=True),
ParamSpec("pendulum_ixy", 6.10e-06, -1e-04, 1e-04),
# ── Actuator / joint dynamics (robot.yaml) ───────────────────
ParamSpec("actuator_gear", 0.064, 0.01, 0.2, log_scale=True),
ParamSpec("actuator_filter_tau", 0.03, 0.005, 0.15),
ParamSpec("motor_damping", 0.003, 1e-4, 0.05, log_scale=True),
ParamSpec("pendulum_damping", 0.0001, 1e-5, 0.01, log_scale=True),
ParamSpec("motor_armature", 0.0001, 1e-5, 0.01, log_scale=True),
ParamSpec("motor_frictionloss", 0.03, 0.001, 0.1, log_scale=True),
]
def params_to_dict(
values: np.ndarray, specs: list[ParamSpec] | None = None
) -> dict[str, float]:
"""Convert a flat parameter vector to a named dict."""
if specs is None:
specs = ROTARY_CARTPOLE_PARAMS
return {s.name: float(values[i]) for i, s in enumerate(specs)}
def defaults_vector(specs: list[ParamSpec] | None = None) -> np.ndarray:
"""Return the default parameter vector (in natural space)."""
if specs is None:
specs = ROTARY_CARTPOLE_PARAMS
return np.array([s.default for s in specs], dtype=np.float64)
def bounds_arrays(
specs: list[ParamSpec] | None = None,
) -> tuple[np.ndarray, np.ndarray]:
"""Return (lower, upper) bound arrays."""
if specs is None:
specs = ROTARY_CARTPOLE_PARAMS
lo = np.array([s.lower for s in specs], dtype=np.float64)
hi = np.array([s.upper for s in specs], dtype=np.float64)
return lo, hi
# ── MuJoCo model building with parameter overrides ──────────────────
def _build_model(
robot_path: Path,
params: dict[str, float],
) -> mujoco.MjModel:
"""Build a MuJoCo model from URDF + robot.yaml with parameter overrides.
Follows the same two-step approach as ``MuJoCoRunner._load_model()``:
1. Parse URDF, inject meshdir, load into MuJoCo
2. Export MJCF, inject actuators + joint overrides + param overrides, reload
"""
robot_path = Path(robot_path).resolve()
robot_yaml = yaml.safe_load((robot_path / "robot.yaml").read_text())
urdf_path = robot_path / robot_yaml["urdf"]
# ── Step 1: Load URDF ────────────────────────────────────────
tree = ET.parse(urdf_path)
root = tree.getroot()
# Inject meshdir compiler directive.
meshdir = None
for mesh_el in root.iter("mesh"):
fn = mesh_el.get("filename", "")
parent = str(Path(fn).parent)
if parent and parent != ".":
meshdir = parent
break
if meshdir:
mj_ext = ET.SubElement(root, "mujoco")
ET.SubElement(
mj_ext, "compiler", attrib={"meshdir": meshdir, "balanceinertia": "true"}
)
# Override URDF inertial parameters BEFORE MuJoCo loading.
for link in root.iter("link"):
link_name = link.get("name", "")
inertial = link.find("inertial")
if inertial is None:
continue
if link_name == "arm":
_set_mass(inertial, params.get("arm_mass"))
_set_com(
inertial,
params.get("arm_com_x"),
params.get("arm_com_y"),
params.get("arm_com_z"),
)
elif link_name == "pendulum":
_set_mass(inertial, params.get("pendulum_mass"))
_set_com(
inertial,
params.get("pendulum_com_x"),
params.get("pendulum_com_y"),
params.get("pendulum_com_z"),
)
_set_inertia(
inertial,
ixx=params.get("pendulum_ixx"),
iyy=params.get("pendulum_iyy"),
izz=params.get("pendulum_izz"),
ixy=params.get("pendulum_ixy"),
)
# Write temp URDF and load.
tmp_urdf = robot_path / "_tmp_sysid_load.urdf"
tree.write(str(tmp_urdf), xml_declaration=True, encoding="unicode")
try:
model_raw = mujoco.MjModel.from_xml_path(str(tmp_urdf))
finally:
tmp_urdf.unlink(missing_ok=True)
# ── Step 2: Export MJCF, inject actuators + overrides ────────
tmp_mjcf = robot_path / "_tmp_sysid_inject.xml"
try:
mujoco.mj_saveLastXML(str(tmp_mjcf), model_raw)
mjcf_root = ET.fromstring(tmp_mjcf.read_text())
# Actuator.
gear = params.get("actuator_gear", robot_yaml["actuators"][0].get("gear", 0.064))
filter_tau = params.get(
"actuator_filter_tau",
robot_yaml["actuators"][0].get("filter_tau", 0.03),
)
act_cfg = robot_yaml["actuators"][0]
ctrl_lo, ctrl_hi = act_cfg.get("ctrl_range", [-1.0, 1.0])
act_elem = ET.SubElement(mjcf_root, "actuator")
attribs: dict[str, str] = {
"name": f"{act_cfg['joint']}_motor",
"joint": act_cfg["joint"],
"gear": str(gear),
"ctrlrange": f"{ctrl_lo} {ctrl_hi}",
}
if filter_tau > 0:
attribs["dyntype"] = "filter"
attribs["dynprm"] = str(filter_tau)
attribs["gaintype"] = "fixed"
attribs["biastype"] = "none"
ET.SubElement(act_elem, "general", attrib=attribs)
else:
ET.SubElement(act_elem, "motor", attrib=attribs)
# Joint overrides.
motor_damping = params.get("motor_damping", 0.003)
pend_damping = params.get("pendulum_damping", 0.0001)
motor_armature = params.get("motor_armature", 0.0001)
motor_frictionloss = params.get("motor_frictionloss", 0.03)
for body in mjcf_root.iter("body"):
for jnt in body.findall("joint"):
name = jnt.get("name")
if name == "motor_joint":
jnt.set("damping", str(motor_damping))
jnt.set("armature", str(motor_armature))
jnt.set("frictionloss", str(motor_frictionloss))
elif name == "pendulum_joint":
jnt.set("damping", str(pend_damping))
# Disable self-collision.
for geom in mjcf_root.iter("geom"):
geom.set("contype", "0")
geom.set("conaffinity", "0")
modified_xml = ET.tostring(mjcf_root, encoding="unicode")
tmp_mjcf.write_text(modified_xml)
model = mujoco.MjModel.from_xml_path(str(tmp_mjcf))
finally:
tmp_mjcf.unlink(missing_ok=True)
return model
def _set_mass(inertial: ET.Element, mass: float | None) -> None:
if mass is None:
return
mass_el = inertial.find("mass")
if mass_el is not None:
mass_el.set("value", str(mass))
def _set_com(
inertial: ET.Element,
x: float | None,
y: float | None,
z: float | None,
) -> None:
origin = inertial.find("origin")
if origin is None:
return
xyz = origin.get("xyz", "0 0 0").split()
if x is not None:
xyz[0] = str(x)
if y is not None:
xyz[1] = str(y)
if z is not None:
xyz[2] = str(z)
origin.set("xyz", " ".join(xyz))
def _set_inertia(
inertial: ET.Element,
ixx: float | None = None,
iyy: float | None = None,
izz: float | None = None,
ixy: float | None = None,
iyz: float | None = None,
ixz: float | None = None,
) -> None:
ine = inertial.find("inertia")
if ine is None:
return
for attr, val in [
("ixx", ixx), ("iyy", iyy), ("izz", izz),
("ixy", ixy), ("iyz", iyz), ("ixz", ixz),
]:
if val is not None:
ine.set(attr, str(val))
# ── Simulation rollout ───────────────────────────────────────────────
def rollout(
robot_path: str | Path,
params: dict[str, float],
actions: np.ndarray,
timesteps: np.ndarray,
sim_dt: float = 0.002,
substeps: int = 10,
) -> dict[str, np.ndarray]:
"""Replay recorded actions in MuJoCo with overridden parameters.
Parameters
----------
robot_path : asset directory
params : named parameter overrides
actions : (N,) normalised actions [-1, 1] from the recording
timesteps : (N,) wall-clock times (seconds) from the recording
sim_dt : MuJoCo physics timestep
substeps : physics substeps per control step
Returns
-------
dict with keys: motor_angle, motor_vel, pendulum_angle, pendulum_vel
Each is an (N,) numpy array of simulated values.
"""
robot_path = Path(robot_path).resolve()
model = _build_model(robot_path, params)
model.opt.timestep = sim_dt
data = mujoco.MjData(model)
# Start from pendulum hanging down (qpos=0 is down per URDF convention).
mujoco.mj_resetData(model, data)
# Control dt derived from actual recording sample rate.
n = len(actions)
ctrl_dt = sim_dt * substeps
# Pre-allocate output.
sim_motor_angle = np.zeros(n, dtype=np.float64)
sim_motor_vel = np.zeros(n, dtype=np.float64)
sim_pend_angle = np.zeros(n, dtype=np.float64)
sim_pend_vel = np.zeros(n, dtype=np.float64)
# Extract actuator limit info for software limit switch.
nu = model.nu
if nu > 0:
jnt_id = model.actuator_trnid[0, 0]
jnt_limited = bool(model.jnt_limited[jnt_id])
jnt_lo = model.jnt_range[jnt_id, 0]
jnt_hi = model.jnt_range[jnt_id, 1]
gear_sign = float(np.sign(model.actuator_gear[0, 0]))
else:
jnt_limited = False
jnt_lo = jnt_hi = gear_sign = 0.0
for i in range(n):
data.ctrl[0] = actions[i]
for _ in range(substeps):
# Software limit switch (mirrors MuJoCoRunner).
if jnt_limited and nu > 0:
pos = data.qpos[jnt_id]
if pos >= jnt_hi and gear_sign * data.ctrl[0] > 0:
data.ctrl[0] = 0.0
elif pos <= jnt_lo and gear_sign * data.ctrl[0] < 0:
data.ctrl[0] = 0.0
mujoco.mj_step(model, data)
sim_motor_angle[i] = data.qpos[0]
sim_motor_vel[i] = data.qvel[0]
sim_pend_angle[i] = data.qpos[1]
sim_pend_vel[i] = data.qvel[1]
return {
"motor_angle": sim_motor_angle,
"motor_vel": sim_motor_vel,
"pendulum_angle": sim_pend_angle,
"pendulum_vel": sim_pend_vel,
}
def windowed_rollout(
robot_path: str | Path,
params: dict[str, float],
recording: dict[str, np.ndarray],
window_duration: float = 0.5,
sim_dt: float = 0.002,
substeps: int = 10,
) -> dict[str, np.ndarray | float]:
"""Multiple-shooting rollout — split recording into short windows.
For each window:
1. Initialize MuJoCo state from the real qpos/qvel at the window start.
2. Replay the recorded actions within the window.
3. Record the simulated output.
This prevents error accumulation across the full trajectory, giving
a much smoother cost landscape for the optimizer.
Parameters
----------
robot_path : asset directory
params : named parameter overrides
recording : dict with keys time, action, motor_angle, motor_vel,
pendulum_angle, pendulum_vel (all 1D arrays of length N)
window_duration : length of each shooting window in seconds
sim_dt : MuJoCo physics timestep
substeps : physics substeps per control step
Returns
-------
dict with:
motor_angle, motor_vel, pendulum_angle, pendulum_vel — (N,) arrays
(stitched from per-window simulations)
n_windows — number of windows used
"""
robot_path = Path(robot_path).resolve()
model = _build_model(robot_path, params)
model.opt.timestep = sim_dt
data = mujoco.MjData(model)
times = recording["time"]
actions = recording["action"]
real_motor = recording["motor_angle"]
real_motor_vel = recording["motor_vel"]
real_pend = recording["pendulum_angle"]
real_pend_vel = recording["pendulum_vel"]
n = len(actions)
# Pre-allocate output (stitched from all windows).
sim_motor_angle = np.zeros(n, dtype=np.float64)
sim_motor_vel = np.zeros(n, dtype=np.float64)
sim_pend_angle = np.zeros(n, dtype=np.float64)
sim_pend_vel = np.zeros(n, dtype=np.float64)
# Extract actuator limit info.
nu = model.nu
if nu > 0:
jnt_id = model.actuator_trnid[0, 0]
jnt_limited = bool(model.jnt_limited[jnt_id])
jnt_lo = model.jnt_range[jnt_id, 0]
jnt_hi = model.jnt_range[jnt_id, 1]
gear_sign = float(np.sign(model.actuator_gear[0, 0]))
else:
jnt_limited = False
jnt_lo = jnt_hi = gear_sign = 0.0
# Compute window boundaries from recording timestamps.
t0 = times[0]
t_end = times[-1]
window_starts: list[int] = [] # indices into the recording
current_t = t0
while current_t < t_end:
# Find the index closest to current_t.
idx = int(np.searchsorted(times, current_t))
idx = min(idx, n - 1)
window_starts.append(idx)
current_t += window_duration
n_windows = len(window_starts)
for w, w_start in enumerate(window_starts):
# Window end: next window start, or end of recording.
w_end = window_starts[w + 1] if w + 1 < n_windows else n
# Initialize MuJoCo state from real data at window start.
mujoco.mj_resetData(model, data)
data.qpos[0] = real_motor[w_start]
data.qpos[1] = real_pend[w_start]
data.qvel[0] = real_motor_vel[w_start]
data.qvel[1] = real_pend_vel[w_start]
data.ctrl[:] = 0.0
# Forward kinematics to make state consistent.
mujoco.mj_forward(model, data)
for i in range(w_start, w_end):
data.ctrl[0] = actions[i]
for _ in range(substeps):
if jnt_limited and nu > 0:
pos = data.qpos[jnt_id]
if pos >= jnt_hi and gear_sign * data.ctrl[0] > 0:
data.ctrl[0] = 0.0
elif pos <= jnt_lo and gear_sign * data.ctrl[0] < 0:
data.ctrl[0] = 0.0
mujoco.mj_step(model, data)
sim_motor_angle[i] = data.qpos[0]
sim_motor_vel[i] = data.qvel[0]
sim_pend_angle[i] = data.qpos[1]
sim_pend_vel[i] = data.qvel[1]
return {
"motor_angle": sim_motor_angle,
"motor_vel": sim_motor_vel,
"pendulum_angle": sim_pend_angle,
"pendulum_vel": sim_pend_vel,
"n_windows": n_windows,
}