RL-Sim-Framework/src/sysid/optimize.py

"""CMA-ES optimiser — fit simulation parameters to a real-robot recording.

Minimises the trajectory-matching cost between a MuJoCo rollout and a
recorded real-robot sequence.  Uses the ``cmaes`` package (pure-Python
CMA-ES with native box-constraint support).

Motor parameters are **locked** from the motor-only sysid — only
pendulum/arm inertial parameters, joint dynamics, and ctrl_limit are
optimised.  Velocities are optionally preprocessed with Savitzky-Golay
differentiation for cleaner targets.

Usage:
    python -m src.sysid.optimize \
        --robot-path assets/rotary_cartpole \
        --recording assets/rotary_cartpole/recordings/capture_20260314_000435.npz

    # Shorter run for testing:
    python -m src.sysid.optimize \
        --robot-path assets/rotary_cartpole \
        --recording <file>.npz \
        --max-generations 10 --population-size 8
"""

from __future__ import annotations

import argparse
import json
import time
from datetime import datetime
from pathlib import Path

import numpy as np
import structlog

from src.sysid.rollout import (
    LOCKED_MOTOR_PARAMS,
    PARAM_SETS,
    ROTARY_CARTPOLE_PARAMS,
    ParamSpec,
    bounds_arrays,
    defaults_vector,
    params_to_dict,
    rollout,
    windowed_rollout,
)

log = structlog.get_logger()


# ── Velocity preprocessing ───────────────────────────────────────────


def _preprocess_recording(
    recording: dict[str, np.ndarray],
    preprocess_vel: bool = True,
    sg_window: int = 7,
    sg_polyorder: int = 3,
) -> dict[str, np.ndarray]:
    """Optionally recompute velocities using Savitzky-Golay differentiation.

    Applies SG filtering to both motor_vel and pendulum_vel, replacing
    the noisy firmware finite-difference velocities with smooth
    analytical derivatives of the (clean) angle signals.
    """
    if not preprocess_vel:
        return recording

    from scipy.signal import savgol_filter

    rec = dict(recording)
    times = rec["time"]
    dt = float(np.mean(np.diff(times)))

    # Motor velocity.
    rec["motor_vel_raw"] = rec["motor_vel"].copy()
    rec["motor_vel"] = savgol_filter(
        rec["motor_angle"],
        window_length=sg_window,
        polyorder=sg_polyorder,
        deriv=1,
        delta=dt,
    )

    # Pendulum velocity.
    rec["pendulum_vel_raw"] = rec["pendulum_vel"].copy()
    rec["pendulum_vel"] = savgol_filter(
        rec["pendulum_angle"],
        window_length=sg_window,
        polyorder=sg_polyorder,
        deriv=1,
        delta=dt,
    )

    motor_noise = np.std(rec["motor_vel_raw"] - rec["motor_vel"])
    pend_noise = np.std(rec["pendulum_vel_raw"] - rec["pendulum_vel"])
    log.info(
        "velocity_preprocessed",
        method="savgol",
        sg_window=sg_window,
        sg_polyorder=sg_polyorder,
        dt_ms=f"{dt*1000:.1f}",
        motor_noise_std=f"{motor_noise:.3f} rad/s",
        pend_noise_std=f"{pend_noise:.3f} rad/s",
    )

    return rec


# ── Cost function ────────────────────────────────────────────────────


def _angle_diff(a: np.ndarray, b: np.ndarray) -> np.ndarray:
    """Shortest signed angle difference, handling wrapping."""
    return np.arctan2(np.sin(a - b), np.cos(a - b))


def _check_inertia_valid(params: dict[str, float]) -> bool:
    """Quick reject: pendulum inertia tensor must be positive-definite."""
    ixx = params.get("pendulum_ixx", 6.16e-06)
    iyy = params.get("pendulum_iyy", 6.16e-06)
    izz = params.get("pendulum_izz", 1.23e-05)
    ixy = params.get("pendulum_ixy", 6.10e-06)
    det_xy = ixx * iyy - ixy * ixy
    return det_xy > 0 and ixx > 0 and iyy > 0 and izz > 0


def _compute_trajectory_cost(
    sim: dict[str, np.ndarray],
    recording: dict[str, np.ndarray],
    pos_weight: float = 1.0,
    vel_weight: float = 0.1,
    pendulum_scale: float = 3.0,
    vel_outlier_threshold: float = 20.0,
) -> float:
    """Weighted MSE between sim and real trajectories.

    pendulum_scale multiplies the pendulum terms relative to motor terms.

    Samples where the *real* pendulum velocity exceeds
    ``vel_outlier_threshold`` (rad/s) are excluded from the velocity
    terms.  These are encoder-wrap artefacts (pendulum swinging past
    vertical) that no simulator can reproduce.
    """
    motor_err = _angle_diff(sim["motor_angle"], recording["motor_angle"])
    pend_err = _angle_diff(sim["pendulum_angle"], recording["pendulum_angle"])
    motor_vel_err = sim["motor_vel"] - recording["motor_vel"]
    pend_vel_err = sim["pendulum_vel"] - recording["pendulum_vel"]

    # Mask out encoder-wrap velocity spikes so the optimizer doesn't
    # waste capacity fitting artefacts.
    valid = np.abs(recording["pendulum_vel"]) < vel_outlier_threshold
    if valid.sum() < len(valid):
        motor_vel_err = motor_vel_err[valid]
        pend_vel_err = pend_vel_err[valid]

    return float(
        pos_weight * np.mean(motor_err**2)
        + pos_weight * pendulum_scale * np.mean(pend_err**2)
        + vel_weight * np.mean(motor_vel_err**2)
        + vel_weight * pendulum_scale * np.mean(pend_vel_err**2)
    )


def cost_function(
    params_vec: np.ndarray,
    recording: dict[str, np.ndarray],
    robot_path: Path,
    specs: list[ParamSpec],
    sim_dt: float = 0.002,
    substeps: int = 10,
    pos_weight: float = 1.0,
    vel_weight: float = 0.1,
    pendulum_scale: float = 3.0,
    window_duration: float = 0.5,
    motor_params: dict[str, float] | None = None,
) -> float:
    """Compute trajectory-matching cost for a candidate parameter vector.

    Uses **multiple-shooting** (windowed rollout): the recording is split
    into short windows (default 0.5 s).  Each window is initialised from
    the real qpos/qvel, so early errors don’t compound across the full
    trajectory.  This gives a much smoother cost landscape for CMA-ES.

    Set ``window_duration=0`` to fall back to the original open-loop
    single-shot rollout (not recommended).
    """
    params = params_to_dict(params_vec, specs)

    if not _check_inertia_valid(params):
        return 1e6

    try:
        if window_duration > 0:
            sim = windowed_rollout(
                robot_path=robot_path,
                params=params,
                recording=recording,
                window_duration=window_duration,
                sim_dt=sim_dt,
                substeps=substeps,
                motor_params=motor_params,
            )
        else:
            sim = rollout(
                robot_path=robot_path,
                params=params,
                actions=recording["action"],
                sim_dt=sim_dt,
                substeps=substeps,
                motor_params=motor_params,
            )
    except Exception as exc:
        log.warning("rollout_failed", error=str(exc))
        return 1e6

    # Check for NaN in sim output.
    for key in ("motor_angle", "motor_vel", "pendulum_angle", "pendulum_vel"):
        if np.any(~np.isfinite(sim[key])):
            return 1e6

    return _compute_trajectory_cost(sim, recording, pos_weight, vel_weight, pendulum_scale)


# ── Parallel evaluation helper (module-level for pickling) ───────────

# Shared state set by the parent process before spawning workers.
_par_recording: dict[str, np.ndarray] = {}
_par_robot_path: Path = Path(".")
_par_specs: list[ParamSpec] = []
_par_kwargs: dict = {}


def _init_worker(recording, robot_path, specs, kwargs):
    """Initialiser run once per worker process."""
    global _par_recording, _par_robot_path, _par_specs, _par_kwargs
    _par_recording = recording
    _par_robot_path = robot_path
    _par_specs = specs
    _par_kwargs = kwargs


def _eval_candidate(x_natural: np.ndarray) -> float:
    """Evaluate a single candidate — called in worker processes."""
    return cost_function(
        x_natural,
        _par_recording,
        _par_robot_path,
        _par_specs,
        **_par_kwargs,
    )


# ── CMA-ES optimisation loop ────────────────────────────────────────


def optimize(
    robot_path: str | Path,
    recording_path: str | Path,
    specs: list[ParamSpec] | None = None,
    sigma0: float = 0.3,
    population_size: int = 20,
    max_generations: int = 1000,
    sim_dt: float = 0.002,
    substeps: int = 10,
    pos_weight: float = 1.0,
    vel_weight: float = 0.1,
    pendulum_scale: float = 3.0,
    window_duration: float = 0.5,
    seed: int = 42,
    preprocess_vel: bool = True,
    sg_window: int = 7,
    sg_polyorder: int = 3,
) -> dict:
    """Run CMA-ES optimisation and return results.

    Motor parameters are locked from the motor-only sysid.
    Only pendulum/arm parameters are optimised.

    Returns a dict with:
        best_params: dict[str, float]
        best_cost: float
        history: list of (generation, best_cost) tuples
        recording: str (path used)
        specs: list of param names
        motor_params: dict of locked motor params
    """
    from cmaes import CMA

    robot_path = Path(robot_path).resolve()
    recording_path = Path(recording_path).resolve()

    if specs is None:
        specs = ROTARY_CARTPOLE_PARAMS

    motor_params = dict(LOCKED_MOTOR_PARAMS)
    log.info(
        "motor_params_locked",
        n_params=len(motor_params),
        gear_avg=f"{(motor_params['actuator_gear_pos'] + motor_params['actuator_gear_neg']) / 2:.4f}",
    )

    # Load recording.
    recording = dict(np.load(recording_path))

    # Preprocessing: SG velocity recomputation.
    recording = _preprocess_recording(
        recording,
        preprocess_vel=preprocess_vel,
        sg_window=sg_window,
        sg_polyorder=sg_polyorder,
    )

    n_samples = len(recording["time"])
    duration = recording["time"][-1] - recording["time"][0]
    n_windows = max(1, int(duration / window_duration)) if window_duration > 0 else 1
    log.info(
        "recording_loaded",
        path=str(recording_path),
        samples=n_samples,
        duration=f"{duration:.1f}s",
        window_duration=f"{window_duration}s",
        n_windows=n_windows,
    )

    # Initial point (defaults) — normalised to [0, 1] for CMA-ES.
    lo, hi = bounds_arrays(specs)
    x0 = defaults_vector(specs)

    # Normalise to [0, 1] for the optimizer (better conditioned).
    span = hi - lo
    span[span == 0] = 1.0  # avoid division by zero

    def to_normed(x: np.ndarray) -> np.ndarray:
        return (x - lo) / span

    def from_normed(x_n: np.ndarray) -> np.ndarray:
        return x_n * span + lo

    x0_normed = to_normed(x0)
    bounds_normed = np.column_stack(
        [np.zeros(len(specs)), np.ones(len(specs))]
    )

    optimizer = CMA(
        mean=x0_normed,
        sigma=sigma0,
        bounds=bounds_normed,
        population_size=population_size,
        seed=seed,
    )

    best_cost = float("inf")
    best_params_vec = x0.copy()
    history: list[tuple[int, float]] = []

    log.info(
        "cmaes_starting",
        n_params=len(specs),
        population=population_size,
        max_gens=max_generations,
        sigma0=sigma0,
    )

    t0 = time.monotonic()

    # ── Parallel evaluation setup ────────────────────────────────
    # Each candidate is independent — evaluate them in parallel using
    # a process pool.  Falls back to sequential if n_workers=1.
    import multiprocessing as mp
    n_workers = max(1, mp.cpu_count() - 1)  # leave 1 core free

    eval_kwargs = dict(
        sim_dt=sim_dt,
        substeps=substeps,
        pos_weight=pos_weight,
        vel_weight=vel_weight,
        pendulum_scale=pendulum_scale,
        window_duration=window_duration,
        motor_params=motor_params,
    )

    log.info("parallel_workers", n_workers=n_workers)

    # Create a persistent pool (avoids per-generation fork overhead).
    pool = None
    if n_workers > 1:
        pool = mp.Pool(
            n_workers,
            initializer=_init_worker,
            initargs=(recording, robot_path, specs, eval_kwargs),
        )

    for gen in range(max_generations):
        # Ask all candidates first.
        candidates_normed = []
        candidates_natural = []
        for _ in range(optimizer.population_size):
            x_normed = optimizer.ask()
            x_natural = from_normed(x_normed)
            x_natural = np.clip(x_natural, lo, hi)
            candidates_normed.append(x_normed)
            candidates_natural.append(x_natural)

        # Evaluate in parallel.
        if pool is not None:
            costs = pool.map(_eval_candidate, candidates_natural)
        else:
            costs = [cost_function(
                x, recording, robot_path, specs, **eval_kwargs
            ) for x in candidates_natural]

        solutions = list(zip(candidates_normed, costs))
        for x_natural, c in zip(candidates_natural, costs):
            if c < best_cost:
                best_cost = c
                best_params_vec = x_natural.copy()

        optimizer.tell(solutions)
        history.append((gen, best_cost))

        elapsed = time.monotonic() - t0
        if gen % 5 == 0 or gen == max_generations - 1:
            log.info(
                "cmaes_generation",
                gen=gen,
                best_cost=f"{best_cost:.6f}",
                elapsed=f"{elapsed:.1f}s",
                gen_best=f"{min(c for _, c in solutions):.6f}",
            )

    total_time = time.monotonic() - t0

    # Clean up process pool.
    if pool is not None:
        pool.close()
        pool.join()

    best_params = params_to_dict(best_params_vec, specs)

    log.info(
        "cmaes_finished",
        best_cost=f"{best_cost:.6f}",
        total_time=f"{total_time:.1f}s",
        evaluations=max_generations * population_size,
    )

    # Log parameter comparison.
    defaults = params_to_dict(defaults_vector(specs), specs)
    for name in best_params:
        d = defaults[name]
        b = best_params[name]
        change_pct = ((b - d) / abs(d) * 100) if abs(d) > 1e-12 else 0.0
        log.info(
            "param_result",
            name=name,
            default=f"{d:.6g}",
            tuned=f"{b:.6g}",
            change=f"{change_pct:+.1f}%",
        )

    return {
        "best_params": best_params,
        "best_cost": best_cost,
        "history": history,
        "recording": str(recording_path),
        "param_names": [s.name for s in specs],
        "defaults": {s.name: s.default for s in specs},
        "motor_params": motor_params,
        "preprocess_vel": preprocess_vel,
        "timestamp": datetime.now().isoformat(),
    }


# ── CLI entry point ──────────────────────────────────────────────────


def main() -> None:
    parser = argparse.ArgumentParser(
        description="Fit simulation parameters to a real-robot recording (CMA-ES)."
    )
    parser.add_argument(
        "--robot-path",
        type=str,
        default="assets/rotary_cartpole",
        help="Path to robot asset directory",
    )
    parser.add_argument(
        "--recording",
        type=str,
        required=True,
        help="Path to .npz recording file",
    )
    parser.add_argument("--sigma0", type=float, default=0.3)
    parser.add_argument("--population-size", type=int, default=20)
    parser.add_argument("--max-generations", type=int, default=200)
    parser.add_argument("--sim-dt", type=float, default=0.002)
    parser.add_argument("--substeps", type=int, default=10)
    parser.add_argument("--pos-weight", type=float, default=1.0)
    parser.add_argument("--vel-weight", type=float, default=0.1)
    parser.add_argument("--pendulum-scale", type=float, default=3.0,
                        help="Multiplier for pendulum terms relative to motor (default 3.0)")
    parser.add_argument(
        "--window-duration",
        type=float,
        default=0.5,
        help="Shooting window length in seconds (0 = open-loop, default 0.5)",
    )
    parser.add_argument("--seed", type=int, default=42)
    parser.add_argument(
        "--no-export",
        action="store_true",
        help="Skip exporting tuned files (results JSON only)",
    )
    parser.add_argument(
        "--no-preprocess-vel",
        action="store_true",
        help="Skip Savitzky-Golay velocity preprocessing",
    )
    parser.add_argument("--sg-window", type=int, default=7,
                        help="Savitzky-Golay window length (odd, default 7)")
    parser.add_argument("--sg-polyorder", type=int, default=3,
                        help="Savitzky-Golay polynomial order (default 3)")
    parser.add_argument(
        "--param-set",
        type=str,
        default="full",
        choices=list(PARAM_SETS.keys()),
        help="Parameter set to optimize: 'reduced' (6 params, fast) or 'full' (15 params)",
    )
    args = parser.parse_args()

    specs = PARAM_SETS[args.param_set]

    result = optimize(
        robot_path=args.robot_path,
        recording_path=args.recording,
        specs=specs,
        sigma0=args.sigma0,
        population_size=args.population_size,
        max_generations=args.max_generations,
        sim_dt=args.sim_dt,
        substeps=args.substeps,
        pos_weight=args.pos_weight,
        vel_weight=args.vel_weight,
        pendulum_scale=args.pendulum_scale,
        window_duration=args.window_duration,
        seed=args.seed,
        preprocess_vel=not args.no_preprocess_vel,
        sg_window=args.sg_window,
        sg_polyorder=args.sg_polyorder,
    )

    # Save results JSON.
    robot_path = Path(args.robot_path).resolve()
    result_path = robot_path / "sysid_result.json"
    # Convert numpy types for JSON serialisation.
    result_json = {
        k: v for k, v in result.items() if k != "history"
    }
    result_json["history_summary"] = {
        "first_cost": result["history"][0][1] if result["history"] else None,
        "final_cost": result["history"][-1][1] if result["history"] else None,
        "generations": len(result["history"]),
    }
    result_path.write_text(json.dumps(result_json, indent=2, default=str))
    log.info("results_saved", path=str(result_path))

    # Export tuned files unless --no-export.
    if not args.no_export:
        from src.sysid.export import export_tuned_files

        export_tuned_files(
            robot_path=args.robot_path,
            params=result["best_params"],
            motor_params=result.get("motor_params"),
        )


if __name__ == "__main__":
    main()