"""CMA-ES optimiser — fit simulation parameters to a real-robot recording. Minimises the trajectory-matching cost between a MuJoCo rollout and a recorded real-robot sequence. Uses the ``cmaes`` package (pure-Python CMA-ES with native box-constraint support). Motor parameters are **locked** from the motor-only sysid — only pendulum/arm inertial parameters, joint dynamics, and ctrl_limit are optimised. Velocities are optionally preprocessed with Savitzky-Golay differentiation for cleaner targets. Usage: python -m src.sysid.optimize \ --robot-path assets/rotary_cartpole \ --recording assets/rotary_cartpole/recordings/capture_20260314_000435.npz # Shorter run for testing: python -m src.sysid.optimize \ --robot-path assets/rotary_cartpole \ --recording .npz \ --max-generations 10 --population-size 8 """ from __future__ import annotations import argparse import json import time from datetime import datetime from pathlib import Path import numpy as np import structlog from src.sysid.rollout import ( LOCKED_MOTOR_PARAMS, PARAM_SETS, ROTARY_CARTPOLE_PARAMS, ParamSpec, bounds_arrays, defaults_vector, params_to_dict, rollout, windowed_rollout, ) log = structlog.get_logger() # ── Velocity preprocessing ─────────────────────────────────────────── def _preprocess_recording( recording: dict[str, np.ndarray], preprocess_vel: bool = True, sg_window: int = 7, sg_polyorder: int = 3, ) -> dict[str, np.ndarray]: """Optionally recompute velocities using Savitzky-Golay differentiation. Applies SG filtering to both motor_vel and pendulum_vel, replacing the noisy firmware finite-difference velocities with smooth analytical derivatives of the (clean) angle signals. """ if not preprocess_vel: return recording from scipy.signal import savgol_filter rec = dict(recording) times = rec["time"] dt = float(np.mean(np.diff(times))) # Motor velocity. rec["motor_vel_raw"] = rec["motor_vel"].copy() rec["motor_vel"] = savgol_filter( rec["motor_angle"], window_length=sg_window, polyorder=sg_polyorder, deriv=1, delta=dt, ) # Pendulum velocity. rec["pendulum_vel_raw"] = rec["pendulum_vel"].copy() rec["pendulum_vel"] = savgol_filter( rec["pendulum_angle"], window_length=sg_window, polyorder=sg_polyorder, deriv=1, delta=dt, ) motor_noise = np.std(rec["motor_vel_raw"] - rec["motor_vel"]) pend_noise = np.std(rec["pendulum_vel_raw"] - rec["pendulum_vel"]) log.info( "velocity_preprocessed", method="savgol", sg_window=sg_window, sg_polyorder=sg_polyorder, dt_ms=f"{dt*1000:.1f}", motor_noise_std=f"{motor_noise:.3f} rad/s", pend_noise_std=f"{pend_noise:.3f} rad/s", ) return rec # ── Cost function ──────────────────────────────────────────────────── def _angle_diff(a: np.ndarray, b: np.ndarray) -> np.ndarray: """Shortest signed angle difference, handling wrapping.""" return np.arctan2(np.sin(a - b), np.cos(a - b)) def _check_inertia_valid(params: dict[str, float]) -> bool: """Quick reject: pendulum inertia tensor must be positive-definite.""" ixx = params.get("pendulum_ixx", 6.16e-06) iyy = params.get("pendulum_iyy", 6.16e-06) izz = params.get("pendulum_izz", 1.23e-05) ixy = params.get("pendulum_ixy", 6.10e-06) det_xy = ixx * iyy - ixy * ixy return det_xy > 0 and ixx > 0 and iyy > 0 and izz > 0 def _compute_trajectory_cost( sim: dict[str, np.ndarray], recording: dict[str, np.ndarray], pos_weight: float = 1.0, vel_weight: float = 0.1, pendulum_scale: float = 3.0, vel_outlier_threshold: float = 20.0, ) -> float: """Weighted MSE between sim and real trajectories. pendulum_scale multiplies the pendulum terms relative to motor terms. Samples where the *real* pendulum velocity exceeds ``vel_outlier_threshold`` (rad/s) are excluded from the velocity terms. These are encoder-wrap artefacts (pendulum swinging past vertical) that no simulator can reproduce. """ motor_err = _angle_diff(sim["motor_angle"], recording["motor_angle"]) pend_err = _angle_diff(sim["pendulum_angle"], recording["pendulum_angle"]) motor_vel_err = sim["motor_vel"] - recording["motor_vel"] pend_vel_err = sim["pendulum_vel"] - recording["pendulum_vel"] # Mask out encoder-wrap velocity spikes so the optimizer doesn't # waste capacity fitting artefacts. valid = np.abs(recording["pendulum_vel"]) < vel_outlier_threshold if valid.sum() < len(valid): motor_vel_err = motor_vel_err[valid] pend_vel_err = pend_vel_err[valid] return float( pos_weight * np.mean(motor_err**2) + pos_weight * pendulum_scale * np.mean(pend_err**2) + vel_weight * np.mean(motor_vel_err**2) + vel_weight * pendulum_scale * np.mean(pend_vel_err**2) ) def cost_function( params_vec: np.ndarray, recording: dict[str, np.ndarray], robot_path: Path, specs: list[ParamSpec], sim_dt: float = 0.002, substeps: int = 10, pos_weight: float = 1.0, vel_weight: float = 0.1, pendulum_scale: float = 3.0, window_duration: float = 0.5, motor_params: dict[str, float] | None = None, ) -> float: """Compute trajectory-matching cost for a candidate parameter vector. Uses **multiple-shooting** (windowed rollout): the recording is split into short windows (default 0.5 s). Each window is initialised from the real qpos/qvel, so early errors don’t compound across the full trajectory. This gives a much smoother cost landscape for CMA-ES. Set ``window_duration=0`` to fall back to the original open-loop single-shot rollout (not recommended). """ params = params_to_dict(params_vec, specs) if not _check_inertia_valid(params): return 1e6 try: if window_duration > 0: sim = windowed_rollout( robot_path=robot_path, params=params, recording=recording, window_duration=window_duration, sim_dt=sim_dt, substeps=substeps, motor_params=motor_params, ) else: sim = rollout( robot_path=robot_path, params=params, actions=recording["action"], sim_dt=sim_dt, substeps=substeps, motor_params=motor_params, ) except Exception as exc: log.warning("rollout_failed", error=str(exc)) return 1e6 # Check for NaN in sim output. for key in ("motor_angle", "motor_vel", "pendulum_angle", "pendulum_vel"): if np.any(~np.isfinite(sim[key])): return 1e6 return _compute_trajectory_cost(sim, recording, pos_weight, vel_weight, pendulum_scale) # ── Parallel evaluation helper (module-level for pickling) ─────────── # Shared state set by the parent process before spawning workers. _par_recording: dict[str, np.ndarray] = {} _par_robot_path: Path = Path(".") _par_specs: list[ParamSpec] = [] _par_kwargs: dict = {} def _init_worker(recording, robot_path, specs, kwargs): """Initialiser run once per worker process.""" global _par_recording, _par_robot_path, _par_specs, _par_kwargs _par_recording = recording _par_robot_path = robot_path _par_specs = specs _par_kwargs = kwargs def _eval_candidate(x_natural: np.ndarray) -> float: """Evaluate a single candidate — called in worker processes.""" return cost_function( x_natural, _par_recording, _par_robot_path, _par_specs, **_par_kwargs, ) # ── CMA-ES optimisation loop ──────────────────────────────────────── def optimize( robot_path: str | Path, recording_path: str | Path, specs: list[ParamSpec] | None = None, sigma0: float = 0.3, population_size: int = 20, max_generations: int = 1000, sim_dt: float = 0.002, substeps: int = 10, pos_weight: float = 1.0, vel_weight: float = 0.1, pendulum_scale: float = 3.0, window_duration: float = 0.5, seed: int = 42, preprocess_vel: bool = True, sg_window: int = 7, sg_polyorder: int = 3, ) -> dict: """Run CMA-ES optimisation and return results. Motor parameters are locked from the motor-only sysid. Only pendulum/arm parameters are optimised. Returns a dict with: best_params: dict[str, float] best_cost: float history: list of (generation, best_cost) tuples recording: str (path used) specs: list of param names motor_params: dict of locked motor params """ from cmaes import CMA robot_path = Path(robot_path).resolve() recording_path = Path(recording_path).resolve() if specs is None: specs = ROTARY_CARTPOLE_PARAMS motor_params = dict(LOCKED_MOTOR_PARAMS) log.info( "motor_params_locked", n_params=len(motor_params), gear_avg=f"{(motor_params['actuator_gear_pos'] + motor_params['actuator_gear_neg']) / 2:.4f}", ) # Load recording. recording = dict(np.load(recording_path)) # Preprocessing: SG velocity recomputation. recording = _preprocess_recording( recording, preprocess_vel=preprocess_vel, sg_window=sg_window, sg_polyorder=sg_polyorder, ) n_samples = len(recording["time"]) duration = recording["time"][-1] - recording["time"][0] n_windows = max(1, int(duration / window_duration)) if window_duration > 0 else 1 log.info( "recording_loaded", path=str(recording_path), samples=n_samples, duration=f"{duration:.1f}s", window_duration=f"{window_duration}s", n_windows=n_windows, ) # Initial point (defaults) — normalised to [0, 1] for CMA-ES. lo, hi = bounds_arrays(specs) x0 = defaults_vector(specs) # Normalise to [0, 1] for the optimizer (better conditioned). span = hi - lo span[span == 0] = 1.0 # avoid division by zero def to_normed(x: np.ndarray) -> np.ndarray: return (x - lo) / span def from_normed(x_n: np.ndarray) -> np.ndarray: return x_n * span + lo x0_normed = to_normed(x0) bounds_normed = np.column_stack( [np.zeros(len(specs)), np.ones(len(specs))] ) optimizer = CMA( mean=x0_normed, sigma=sigma0, bounds=bounds_normed, population_size=population_size, seed=seed, ) best_cost = float("inf") best_params_vec = x0.copy() history: list[tuple[int, float]] = [] log.info( "cmaes_starting", n_params=len(specs), population=population_size, max_gens=max_generations, sigma0=sigma0, ) t0 = time.monotonic() # ── Parallel evaluation setup ──────────────────────────────── # Each candidate is independent — evaluate them in parallel using # a process pool. Falls back to sequential if n_workers=1. import multiprocessing as mp n_workers = max(1, mp.cpu_count() - 1) # leave 1 core free eval_kwargs = dict( sim_dt=sim_dt, substeps=substeps, pos_weight=pos_weight, vel_weight=vel_weight, pendulum_scale=pendulum_scale, window_duration=window_duration, motor_params=motor_params, ) log.info("parallel_workers", n_workers=n_workers) # Create a persistent pool (avoids per-generation fork overhead). pool = None if n_workers > 1: pool = mp.Pool( n_workers, initializer=_init_worker, initargs=(recording, robot_path, specs, eval_kwargs), ) for gen in range(max_generations): # Ask all candidates first. candidates_normed = [] candidates_natural = [] for _ in range(optimizer.population_size): x_normed = optimizer.ask() x_natural = from_normed(x_normed) x_natural = np.clip(x_natural, lo, hi) candidates_normed.append(x_normed) candidates_natural.append(x_natural) # Evaluate in parallel. if pool is not None: costs = pool.map(_eval_candidate, candidates_natural) else: costs = [cost_function( x, recording, robot_path, specs, **eval_kwargs ) for x in candidates_natural] solutions = list(zip(candidates_normed, costs)) for x_natural, c in zip(candidates_natural, costs): if c < best_cost: best_cost = c best_params_vec = x_natural.copy() optimizer.tell(solutions) history.append((gen, best_cost)) elapsed = time.monotonic() - t0 if gen % 5 == 0 or gen == max_generations - 1: log.info( "cmaes_generation", gen=gen, best_cost=f"{best_cost:.6f}", elapsed=f"{elapsed:.1f}s", gen_best=f"{min(c for _, c in solutions):.6f}", ) total_time = time.monotonic() - t0 # Clean up process pool. if pool is not None: pool.close() pool.join() best_params = params_to_dict(best_params_vec, specs) log.info( "cmaes_finished", best_cost=f"{best_cost:.6f}", total_time=f"{total_time:.1f}s", evaluations=max_generations * population_size, ) # Log parameter comparison. defaults = params_to_dict(defaults_vector(specs), specs) for name in best_params: d = defaults[name] b = best_params[name] change_pct = ((b - d) / abs(d) * 100) if abs(d) > 1e-12 else 0.0 log.info( "param_result", name=name, default=f"{d:.6g}", tuned=f"{b:.6g}", change=f"{change_pct:+.1f}%", ) return { "best_params": best_params, "best_cost": best_cost, "history": history, "recording": str(recording_path), "param_names": [s.name for s in specs], "defaults": {s.name: s.default for s in specs}, "motor_params": motor_params, "preprocess_vel": preprocess_vel, "timestamp": datetime.now().isoformat(), } # ── CLI entry point ────────────────────────────────────────────────── def main() -> None: parser = argparse.ArgumentParser( description="Fit simulation parameters to a real-robot recording (CMA-ES)." ) parser.add_argument( "--robot-path", type=str, default="assets/rotary_cartpole", help="Path to robot asset directory", ) parser.add_argument( "--recording", type=str, required=True, help="Path to .npz recording file", ) parser.add_argument("--sigma0", type=float, default=0.3) parser.add_argument("--population-size", type=int, default=20) parser.add_argument("--max-generations", type=int, default=200) parser.add_argument("--sim-dt", type=float, default=0.002) parser.add_argument("--substeps", type=int, default=10) parser.add_argument("--pos-weight", type=float, default=1.0) parser.add_argument("--vel-weight", type=float, default=0.1) parser.add_argument("--pendulum-scale", type=float, default=3.0, help="Multiplier for pendulum terms relative to motor (default 3.0)") parser.add_argument( "--window-duration", type=float, default=0.5, help="Shooting window length in seconds (0 = open-loop, default 0.5)", ) parser.add_argument("--seed", type=int, default=42) parser.add_argument( "--no-export", action="store_true", help="Skip exporting tuned files (results JSON only)", ) parser.add_argument( "--no-preprocess-vel", action="store_true", help="Skip Savitzky-Golay velocity preprocessing", ) parser.add_argument("--sg-window", type=int, default=7, help="Savitzky-Golay window length (odd, default 7)") parser.add_argument("--sg-polyorder", type=int, default=3, help="Savitzky-Golay polynomial order (default 3)") parser.add_argument( "--param-set", type=str, default="full", choices=list(PARAM_SETS.keys()), help="Parameter set to optimize: 'reduced' (6 params, fast) or 'full' (15 params)", ) args = parser.parse_args() specs = PARAM_SETS[args.param_set] result = optimize( robot_path=args.robot_path, recording_path=args.recording, specs=specs, sigma0=args.sigma0, population_size=args.population_size, max_generations=args.max_generations, sim_dt=args.sim_dt, substeps=args.substeps, pos_weight=args.pos_weight, vel_weight=args.vel_weight, pendulum_scale=args.pendulum_scale, window_duration=args.window_duration, seed=args.seed, preprocess_vel=not args.no_preprocess_vel, sg_window=args.sg_window, sg_polyorder=args.sg_polyorder, ) # Save results JSON. robot_path = Path(args.robot_path).resolve() result_path = robot_path / "sysid_result.json" # Convert numpy types for JSON serialisation. result_json = { k: v for k, v in result.items() if k != "history" } result_json["history_summary"] = { "first_cost": result["history"][0][1] if result["history"] else None, "final_cost": result["history"][-1][1] if result["history"] else None, "generations": len(result["history"]), } result_path.write_text(json.dumps(result_json, indent=2, default=str)) log.info("results_saved", path=str(result_path)) # Export tuned files unless --no-export. if not args.no_export: from src.sysid.export import export_tuned_files export_tuned_files( robot_path=args.robot_path, params=result["best_params"], motor_params=result.get("motor_params"), ) if __name__ == "__main__": main()