581 lines
18 KiB
Python
581 lines
18 KiB
Python
"""CMA-ES optimiser — fit simulation parameters to a real-robot recording.
|
||
|
||
Minimises the trajectory-matching cost between a MuJoCo rollout and a
|
||
recorded real-robot sequence. Uses the ``cmaes`` package (pure-Python
|
||
CMA-ES with native box-constraint support).
|
||
|
||
Motor parameters are **locked** from the motor-only sysid — only
|
||
pendulum/arm inertial parameters, joint dynamics, and ctrl_limit are
|
||
optimised. Velocities are optionally preprocessed with Savitzky-Golay
|
||
differentiation for cleaner targets.
|
||
|
||
Usage:
|
||
python -m src.sysid.optimize \
|
||
--robot-path assets/rotary_cartpole \
|
||
--recording assets/rotary_cartpole/recordings/capture_20260314_000435.npz
|
||
|
||
# Shorter run for testing:
|
||
python -m src.sysid.optimize \
|
||
--robot-path assets/rotary_cartpole \
|
||
--recording <file>.npz \
|
||
--max-generations 10 --population-size 8
|
||
"""
|
||
|
||
from __future__ import annotations
|
||
|
||
import argparse
|
||
import json
|
||
import time
|
||
from datetime import datetime
|
||
from pathlib import Path
|
||
|
||
import numpy as np
|
||
import structlog
|
||
|
||
from src.sysid.rollout import (
|
||
LOCKED_MOTOR_PARAMS,
|
||
PARAM_SETS,
|
||
ROTARY_CARTPOLE_PARAMS,
|
||
ParamSpec,
|
||
bounds_arrays,
|
||
defaults_vector,
|
||
params_to_dict,
|
||
rollout,
|
||
windowed_rollout,
|
||
)
|
||
|
||
log = structlog.get_logger()
|
||
|
||
|
||
# ── Velocity preprocessing ───────────────────────────────────────────
|
||
|
||
|
||
def _preprocess_recording(
|
||
recording: dict[str, np.ndarray],
|
||
preprocess_vel: bool = True,
|
||
sg_window: int = 7,
|
||
sg_polyorder: int = 3,
|
||
) -> dict[str, np.ndarray]:
|
||
"""Optionally recompute velocities using Savitzky-Golay differentiation.
|
||
|
||
Applies SG filtering to both motor_vel and pendulum_vel, replacing
|
||
the noisy firmware finite-difference velocities with smooth
|
||
analytical derivatives of the (clean) angle signals.
|
||
"""
|
||
if not preprocess_vel:
|
||
return recording
|
||
|
||
from scipy.signal import savgol_filter
|
||
|
||
rec = dict(recording)
|
||
times = rec["time"]
|
||
dt = float(np.mean(np.diff(times)))
|
||
|
||
# Motor velocity.
|
||
rec["motor_vel_raw"] = rec["motor_vel"].copy()
|
||
rec["motor_vel"] = savgol_filter(
|
||
rec["motor_angle"],
|
||
window_length=sg_window,
|
||
polyorder=sg_polyorder,
|
||
deriv=1,
|
||
delta=dt,
|
||
)
|
||
|
||
# Pendulum velocity.
|
||
rec["pendulum_vel_raw"] = rec["pendulum_vel"].copy()
|
||
rec["pendulum_vel"] = savgol_filter(
|
||
rec["pendulum_angle"],
|
||
window_length=sg_window,
|
||
polyorder=sg_polyorder,
|
||
deriv=1,
|
||
delta=dt,
|
||
)
|
||
|
||
motor_noise = np.std(rec["motor_vel_raw"] - rec["motor_vel"])
|
||
pend_noise = np.std(rec["pendulum_vel_raw"] - rec["pendulum_vel"])
|
||
log.info(
|
||
"velocity_preprocessed",
|
||
method="savgol",
|
||
sg_window=sg_window,
|
||
sg_polyorder=sg_polyorder,
|
||
dt_ms=f"{dt*1000:.1f}",
|
||
motor_noise_std=f"{motor_noise:.3f} rad/s",
|
||
pend_noise_std=f"{pend_noise:.3f} rad/s",
|
||
)
|
||
|
||
return rec
|
||
|
||
|
||
# ── Cost function ────────────────────────────────────────────────────
|
||
|
||
|
||
def _angle_diff(a: np.ndarray, b: np.ndarray) -> np.ndarray:
|
||
"""Shortest signed angle difference, handling wrapping."""
|
||
return np.arctan2(np.sin(a - b), np.cos(a - b))
|
||
|
||
|
||
def _check_inertia_valid(params: dict[str, float]) -> bool:
|
||
"""Quick reject: pendulum inertia tensor must be positive-definite."""
|
||
ixx = params.get("pendulum_ixx", 6.16e-06)
|
||
iyy = params.get("pendulum_iyy", 6.16e-06)
|
||
izz = params.get("pendulum_izz", 1.23e-05)
|
||
ixy = params.get("pendulum_ixy", 6.10e-06)
|
||
det_xy = ixx * iyy - ixy * ixy
|
||
return det_xy > 0 and ixx > 0 and iyy > 0 and izz > 0
|
||
|
||
|
||
def _compute_trajectory_cost(
|
||
sim: dict[str, np.ndarray],
|
||
recording: dict[str, np.ndarray],
|
||
pos_weight: float = 1.0,
|
||
vel_weight: float = 0.1,
|
||
pendulum_scale: float = 3.0,
|
||
vel_outlier_threshold: float = 20.0,
|
||
) -> float:
|
||
"""Weighted MSE between sim and real trajectories.
|
||
|
||
pendulum_scale multiplies the pendulum terms relative to motor terms.
|
||
|
||
Samples where the *real* pendulum velocity exceeds
|
||
``vel_outlier_threshold`` (rad/s) are excluded from the velocity
|
||
terms. These are encoder-wrap artefacts (pendulum swinging past
|
||
vertical) that no simulator can reproduce.
|
||
"""
|
||
motor_err = _angle_diff(sim["motor_angle"], recording["motor_angle"])
|
||
pend_err = _angle_diff(sim["pendulum_angle"], recording["pendulum_angle"])
|
||
motor_vel_err = sim["motor_vel"] - recording["motor_vel"]
|
||
pend_vel_err = sim["pendulum_vel"] - recording["pendulum_vel"]
|
||
|
||
# Mask out encoder-wrap velocity spikes so the optimizer doesn't
|
||
# waste capacity fitting artefacts.
|
||
valid = np.abs(recording["pendulum_vel"]) < vel_outlier_threshold
|
||
if valid.sum() < len(valid):
|
||
motor_vel_err = motor_vel_err[valid]
|
||
pend_vel_err = pend_vel_err[valid]
|
||
|
||
return float(
|
||
pos_weight * np.mean(motor_err**2)
|
||
+ pos_weight * pendulum_scale * np.mean(pend_err**2)
|
||
+ vel_weight * np.mean(motor_vel_err**2)
|
||
+ vel_weight * pendulum_scale * np.mean(pend_vel_err**2)
|
||
)
|
||
|
||
|
||
def cost_function(
|
||
params_vec: np.ndarray,
|
||
recording: dict[str, np.ndarray],
|
||
robot_path: Path,
|
||
specs: list[ParamSpec],
|
||
sim_dt: float = 0.002,
|
||
substeps: int = 10,
|
||
pos_weight: float = 1.0,
|
||
vel_weight: float = 0.1,
|
||
pendulum_scale: float = 3.0,
|
||
window_duration: float = 0.5,
|
||
motor_params: dict[str, float] | None = None,
|
||
) -> float:
|
||
"""Compute trajectory-matching cost for a candidate parameter vector.
|
||
|
||
Uses **multiple-shooting** (windowed rollout): the recording is split
|
||
into short windows (default 0.5 s). Each window is initialised from
|
||
the real qpos/qvel, so early errors don’t compound across the full
|
||
trajectory. This gives a much smoother cost landscape for CMA-ES.
|
||
|
||
Set ``window_duration=0`` to fall back to the original open-loop
|
||
single-shot rollout (not recommended).
|
||
"""
|
||
params = params_to_dict(params_vec, specs)
|
||
|
||
if not _check_inertia_valid(params):
|
||
return 1e6
|
||
|
||
try:
|
||
if window_duration > 0:
|
||
sim = windowed_rollout(
|
||
robot_path=robot_path,
|
||
params=params,
|
||
recording=recording,
|
||
window_duration=window_duration,
|
||
sim_dt=sim_dt,
|
||
substeps=substeps,
|
||
motor_params=motor_params,
|
||
)
|
||
else:
|
||
sim = rollout(
|
||
robot_path=robot_path,
|
||
params=params,
|
||
actions=recording["action"],
|
||
sim_dt=sim_dt,
|
||
substeps=substeps,
|
||
motor_params=motor_params,
|
||
)
|
||
except Exception as exc:
|
||
log.warning("rollout_failed", error=str(exc))
|
||
return 1e6
|
||
|
||
# Check for NaN in sim output.
|
||
for key in ("motor_angle", "motor_vel", "pendulum_angle", "pendulum_vel"):
|
||
if np.any(~np.isfinite(sim[key])):
|
||
return 1e6
|
||
|
||
return _compute_trajectory_cost(sim, recording, pos_weight, vel_weight, pendulum_scale)
|
||
|
||
|
||
# ── Parallel evaluation helper (module-level for pickling) ───────────
|
||
|
||
# Shared state set by the parent process before spawning workers.
|
||
_par_recording: dict[str, np.ndarray] = {}
|
||
_par_robot_path: Path = Path(".")
|
||
_par_specs: list[ParamSpec] = []
|
||
_par_kwargs: dict = {}
|
||
|
||
|
||
def _init_worker(recording, robot_path, specs, kwargs):
|
||
"""Initialiser run once per worker process."""
|
||
global _par_recording, _par_robot_path, _par_specs, _par_kwargs
|
||
_par_recording = recording
|
||
_par_robot_path = robot_path
|
||
_par_specs = specs
|
||
_par_kwargs = kwargs
|
||
|
||
|
||
def _eval_candidate(x_natural: np.ndarray) -> float:
|
||
"""Evaluate a single candidate — called in worker processes."""
|
||
return cost_function(
|
||
x_natural,
|
||
_par_recording,
|
||
_par_robot_path,
|
||
_par_specs,
|
||
**_par_kwargs,
|
||
)
|
||
|
||
|
||
# ── CMA-ES optimisation loop ────────────────────────────────────────
|
||
|
||
|
||
def optimize(
|
||
robot_path: str | Path,
|
||
recording_path: str | Path,
|
||
specs: list[ParamSpec] | None = None,
|
||
sigma0: float = 0.3,
|
||
population_size: int = 20,
|
||
max_generations: int = 1000,
|
||
sim_dt: float = 0.002,
|
||
substeps: int = 10,
|
||
pos_weight: float = 1.0,
|
||
vel_weight: float = 0.1,
|
||
pendulum_scale: float = 3.0,
|
||
window_duration: float = 0.5,
|
||
seed: int = 42,
|
||
preprocess_vel: bool = True,
|
||
sg_window: int = 7,
|
||
sg_polyorder: int = 3,
|
||
) -> dict:
|
||
"""Run CMA-ES optimisation and return results.
|
||
|
||
Motor parameters are locked from the motor-only sysid.
|
||
Only pendulum/arm parameters are optimised.
|
||
|
||
Returns a dict with:
|
||
best_params: dict[str, float]
|
||
best_cost: float
|
||
history: list of (generation, best_cost) tuples
|
||
recording: str (path used)
|
||
specs: list of param names
|
||
motor_params: dict of locked motor params
|
||
"""
|
||
from cmaes import CMA
|
||
|
||
robot_path = Path(robot_path).resolve()
|
||
recording_path = Path(recording_path).resolve()
|
||
|
||
if specs is None:
|
||
specs = ROTARY_CARTPOLE_PARAMS
|
||
|
||
motor_params = dict(LOCKED_MOTOR_PARAMS)
|
||
log.info(
|
||
"motor_params_locked",
|
||
n_params=len(motor_params),
|
||
gear_avg=f"{(motor_params['actuator_gear_pos'] + motor_params['actuator_gear_neg']) / 2:.4f}",
|
||
)
|
||
|
||
# Load recording.
|
||
recording = dict(np.load(recording_path))
|
||
|
||
# Preprocessing: SG velocity recomputation.
|
||
recording = _preprocess_recording(
|
||
recording,
|
||
preprocess_vel=preprocess_vel,
|
||
sg_window=sg_window,
|
||
sg_polyorder=sg_polyorder,
|
||
)
|
||
|
||
n_samples = len(recording["time"])
|
||
duration = recording["time"][-1] - recording["time"][0]
|
||
n_windows = max(1, int(duration / window_duration)) if window_duration > 0 else 1
|
||
log.info(
|
||
"recording_loaded",
|
||
path=str(recording_path),
|
||
samples=n_samples,
|
||
duration=f"{duration:.1f}s",
|
||
window_duration=f"{window_duration}s",
|
||
n_windows=n_windows,
|
||
)
|
||
|
||
# Initial point (defaults) — normalised to [0, 1] for CMA-ES.
|
||
lo, hi = bounds_arrays(specs)
|
||
x0 = defaults_vector(specs)
|
||
|
||
# Normalise to [0, 1] for the optimizer (better conditioned).
|
||
span = hi - lo
|
||
span[span == 0] = 1.0 # avoid division by zero
|
||
|
||
def to_normed(x: np.ndarray) -> np.ndarray:
|
||
return (x - lo) / span
|
||
|
||
def from_normed(x_n: np.ndarray) -> np.ndarray:
|
||
return x_n * span + lo
|
||
|
||
x0_normed = to_normed(x0)
|
||
bounds_normed = np.column_stack(
|
||
[np.zeros(len(specs)), np.ones(len(specs))]
|
||
)
|
||
|
||
optimizer = CMA(
|
||
mean=x0_normed,
|
||
sigma=sigma0,
|
||
bounds=bounds_normed,
|
||
population_size=population_size,
|
||
seed=seed,
|
||
)
|
||
|
||
best_cost = float("inf")
|
||
best_params_vec = x0.copy()
|
||
history: list[tuple[int, float]] = []
|
||
|
||
log.info(
|
||
"cmaes_starting",
|
||
n_params=len(specs),
|
||
population=population_size,
|
||
max_gens=max_generations,
|
||
sigma0=sigma0,
|
||
)
|
||
|
||
t0 = time.monotonic()
|
||
|
||
# ── Parallel evaluation setup ────────────────────────────────
|
||
# Each candidate is independent — evaluate them in parallel using
|
||
# a process pool. Falls back to sequential if n_workers=1.
|
||
import multiprocessing as mp
|
||
n_workers = max(1, mp.cpu_count() - 1) # leave 1 core free
|
||
|
||
eval_kwargs = dict(
|
||
sim_dt=sim_dt,
|
||
substeps=substeps,
|
||
pos_weight=pos_weight,
|
||
vel_weight=vel_weight,
|
||
pendulum_scale=pendulum_scale,
|
||
window_duration=window_duration,
|
||
motor_params=motor_params,
|
||
)
|
||
|
||
log.info("parallel_workers", n_workers=n_workers)
|
||
|
||
# Create a persistent pool (avoids per-generation fork overhead).
|
||
pool = None
|
||
if n_workers > 1:
|
||
pool = mp.Pool(
|
||
n_workers,
|
||
initializer=_init_worker,
|
||
initargs=(recording, robot_path, specs, eval_kwargs),
|
||
)
|
||
|
||
for gen in range(max_generations):
|
||
# Ask all candidates first.
|
||
candidates_normed = []
|
||
candidates_natural = []
|
||
for _ in range(optimizer.population_size):
|
||
x_normed = optimizer.ask()
|
||
x_natural = from_normed(x_normed)
|
||
x_natural = np.clip(x_natural, lo, hi)
|
||
candidates_normed.append(x_normed)
|
||
candidates_natural.append(x_natural)
|
||
|
||
# Evaluate in parallel.
|
||
if pool is not None:
|
||
costs = pool.map(_eval_candidate, candidates_natural)
|
||
else:
|
||
costs = [cost_function(
|
||
x, recording, robot_path, specs, **eval_kwargs
|
||
) for x in candidates_natural]
|
||
|
||
solutions = list(zip(candidates_normed, costs))
|
||
for x_natural, c in zip(candidates_natural, costs):
|
||
if c < best_cost:
|
||
best_cost = c
|
||
best_params_vec = x_natural.copy()
|
||
|
||
optimizer.tell(solutions)
|
||
history.append((gen, best_cost))
|
||
|
||
elapsed = time.monotonic() - t0
|
||
if gen % 5 == 0 or gen == max_generations - 1:
|
||
log.info(
|
||
"cmaes_generation",
|
||
gen=gen,
|
||
best_cost=f"{best_cost:.6f}",
|
||
elapsed=f"{elapsed:.1f}s",
|
||
gen_best=f"{min(c for _, c in solutions):.6f}",
|
||
)
|
||
|
||
total_time = time.monotonic() - t0
|
||
|
||
# Clean up process pool.
|
||
if pool is not None:
|
||
pool.close()
|
||
pool.join()
|
||
|
||
best_params = params_to_dict(best_params_vec, specs)
|
||
|
||
log.info(
|
||
"cmaes_finished",
|
||
best_cost=f"{best_cost:.6f}",
|
||
total_time=f"{total_time:.1f}s",
|
||
evaluations=max_generations * population_size,
|
||
)
|
||
|
||
# Log parameter comparison.
|
||
defaults = params_to_dict(defaults_vector(specs), specs)
|
||
for name in best_params:
|
||
d = defaults[name]
|
||
b = best_params[name]
|
||
change_pct = ((b - d) / abs(d) * 100) if abs(d) > 1e-12 else 0.0
|
||
log.info(
|
||
"param_result",
|
||
name=name,
|
||
default=f"{d:.6g}",
|
||
tuned=f"{b:.6g}",
|
||
change=f"{change_pct:+.1f}%",
|
||
)
|
||
|
||
return {
|
||
"best_params": best_params,
|
||
"best_cost": best_cost,
|
||
"history": history,
|
||
"recording": str(recording_path),
|
||
"param_names": [s.name for s in specs],
|
||
"defaults": {s.name: s.default for s in specs},
|
||
"motor_params": motor_params,
|
||
"preprocess_vel": preprocess_vel,
|
||
"timestamp": datetime.now().isoformat(),
|
||
}
|
||
|
||
|
||
# ── CLI entry point ──────────────────────────────────────────────────
|
||
|
||
|
||
def main() -> None:
|
||
parser = argparse.ArgumentParser(
|
||
description="Fit simulation parameters to a real-robot recording (CMA-ES)."
|
||
)
|
||
parser.add_argument(
|
||
"--robot-path",
|
||
type=str,
|
||
default="assets/rotary_cartpole",
|
||
help="Path to robot asset directory",
|
||
)
|
||
parser.add_argument(
|
||
"--recording",
|
||
type=str,
|
||
required=True,
|
||
help="Path to .npz recording file",
|
||
)
|
||
parser.add_argument("--sigma0", type=float, default=0.3)
|
||
parser.add_argument("--population-size", type=int, default=20)
|
||
parser.add_argument("--max-generations", type=int, default=200)
|
||
parser.add_argument("--sim-dt", type=float, default=0.002)
|
||
parser.add_argument("--substeps", type=int, default=10)
|
||
parser.add_argument("--pos-weight", type=float, default=1.0)
|
||
parser.add_argument("--vel-weight", type=float, default=0.1)
|
||
parser.add_argument("--pendulum-scale", type=float, default=3.0,
|
||
help="Multiplier for pendulum terms relative to motor (default 3.0)")
|
||
parser.add_argument(
|
||
"--window-duration",
|
||
type=float,
|
||
default=0.5,
|
||
help="Shooting window length in seconds (0 = open-loop, default 0.5)",
|
||
)
|
||
parser.add_argument("--seed", type=int, default=42)
|
||
parser.add_argument(
|
||
"--no-export",
|
||
action="store_true",
|
||
help="Skip exporting tuned files (results JSON only)",
|
||
)
|
||
parser.add_argument(
|
||
"--no-preprocess-vel",
|
||
action="store_true",
|
||
help="Skip Savitzky-Golay velocity preprocessing",
|
||
)
|
||
parser.add_argument("--sg-window", type=int, default=7,
|
||
help="Savitzky-Golay window length (odd, default 7)")
|
||
parser.add_argument("--sg-polyorder", type=int, default=3,
|
||
help="Savitzky-Golay polynomial order (default 3)")
|
||
parser.add_argument(
|
||
"--param-set",
|
||
type=str,
|
||
default="full",
|
||
choices=list(PARAM_SETS.keys()),
|
||
help="Parameter set to optimize: 'reduced' (6 params, fast) or 'full' (15 params)",
|
||
)
|
||
args = parser.parse_args()
|
||
|
||
specs = PARAM_SETS[args.param_set]
|
||
|
||
result = optimize(
|
||
robot_path=args.robot_path,
|
||
recording_path=args.recording,
|
||
specs=specs,
|
||
sigma0=args.sigma0,
|
||
population_size=args.population_size,
|
||
max_generations=args.max_generations,
|
||
sim_dt=args.sim_dt,
|
||
substeps=args.substeps,
|
||
pos_weight=args.pos_weight,
|
||
vel_weight=args.vel_weight,
|
||
pendulum_scale=args.pendulum_scale,
|
||
window_duration=args.window_duration,
|
||
seed=args.seed,
|
||
preprocess_vel=not args.no_preprocess_vel,
|
||
sg_window=args.sg_window,
|
||
sg_polyorder=args.sg_polyorder,
|
||
)
|
||
|
||
# Save results JSON.
|
||
robot_path = Path(args.robot_path).resolve()
|
||
result_path = robot_path / "sysid_result.json"
|
||
# Convert numpy types for JSON serialisation.
|
||
result_json = {
|
||
k: v for k, v in result.items() if k != "history"
|
||
}
|
||
result_json["history_summary"] = {
|
||
"first_cost": result["history"][0][1] if result["history"] else None,
|
||
"final_cost": result["history"][-1][1] if result["history"] else None,
|
||
"generations": len(result["history"]),
|
||
}
|
||
result_path.write_text(json.dumps(result_json, indent=2, default=str))
|
||
log.info("results_saved", path=str(result_path))
|
||
|
||
# Export tuned files unless --no-export.
|
||
if not args.no_export:
|
||
from src.sysid.export import export_tuned_files
|
||
|
||
export_tuned_files(
|
||
robot_path=args.robot_path,
|
||
params=result["best_params"],
|
||
motor_params=result.get("motor_params"),
|
||
)
|
||
|
||
|
||
if __name__ == "__main__":
|
||
main()
|