Files
RL-Sim-Framework/src/sysid/optimize.py
2026-03-22 15:49:13 +01:00

581 lines
18 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""CMA-ES optimiser — fit simulation parameters to a real-robot recording.
Minimises the trajectory-matching cost between a MuJoCo rollout and a
recorded real-robot sequence. Uses the ``cmaes`` package (pure-Python
CMA-ES with native box-constraint support).
Motor parameters are **locked** from the motor-only sysid — only
pendulum/arm inertial parameters, joint dynamics, and ctrl_limit are
optimised. Velocities are optionally preprocessed with Savitzky-Golay
differentiation for cleaner targets.
Usage:
python -m src.sysid.optimize \
--robot-path assets/rotary_cartpole \
--recording assets/rotary_cartpole/recordings/capture_20260314_000435.npz
# Shorter run for testing:
python -m src.sysid.optimize \
--robot-path assets/rotary_cartpole \
--recording <file>.npz \
--max-generations 10 --population-size 8
"""
from __future__ import annotations
import argparse
import json
import time
from datetime import datetime
from pathlib import Path
import numpy as np
import structlog
from src.sysid.rollout import (
LOCKED_MOTOR_PARAMS,
PARAM_SETS,
ROTARY_CARTPOLE_PARAMS,
ParamSpec,
bounds_arrays,
defaults_vector,
params_to_dict,
rollout,
windowed_rollout,
)
log = structlog.get_logger()
# ── Velocity preprocessing ───────────────────────────────────────────
def _preprocess_recording(
recording: dict[str, np.ndarray],
preprocess_vel: bool = True,
sg_window: int = 7,
sg_polyorder: int = 3,
) -> dict[str, np.ndarray]:
"""Optionally recompute velocities using Savitzky-Golay differentiation.
Applies SG filtering to both motor_vel and pendulum_vel, replacing
the noisy firmware finite-difference velocities with smooth
analytical derivatives of the (clean) angle signals.
"""
if not preprocess_vel:
return recording
from scipy.signal import savgol_filter
rec = dict(recording)
times = rec["time"]
dt = float(np.mean(np.diff(times)))
# Motor velocity.
rec["motor_vel_raw"] = rec["motor_vel"].copy()
rec["motor_vel"] = savgol_filter(
rec["motor_angle"],
window_length=sg_window,
polyorder=sg_polyorder,
deriv=1,
delta=dt,
)
# Pendulum velocity.
rec["pendulum_vel_raw"] = rec["pendulum_vel"].copy()
rec["pendulum_vel"] = savgol_filter(
rec["pendulum_angle"],
window_length=sg_window,
polyorder=sg_polyorder,
deriv=1,
delta=dt,
)
motor_noise = np.std(rec["motor_vel_raw"] - rec["motor_vel"])
pend_noise = np.std(rec["pendulum_vel_raw"] - rec["pendulum_vel"])
log.info(
"velocity_preprocessed",
method="savgol",
sg_window=sg_window,
sg_polyorder=sg_polyorder,
dt_ms=f"{dt*1000:.1f}",
motor_noise_std=f"{motor_noise:.3f} rad/s",
pend_noise_std=f"{pend_noise:.3f} rad/s",
)
return rec
# ── Cost function ────────────────────────────────────────────────────
def _angle_diff(a: np.ndarray, b: np.ndarray) -> np.ndarray:
"""Shortest signed angle difference, handling wrapping."""
return np.arctan2(np.sin(a - b), np.cos(a - b))
def _check_inertia_valid(params: dict[str, float]) -> bool:
"""Quick reject: pendulum inertia tensor must be positive-definite."""
ixx = params.get("pendulum_ixx", 6.16e-06)
iyy = params.get("pendulum_iyy", 6.16e-06)
izz = params.get("pendulum_izz", 1.23e-05)
ixy = params.get("pendulum_ixy", 6.10e-06)
det_xy = ixx * iyy - ixy * ixy
return det_xy > 0 and ixx > 0 and iyy > 0 and izz > 0
def _compute_trajectory_cost(
sim: dict[str, np.ndarray],
recording: dict[str, np.ndarray],
pos_weight: float = 1.0,
vel_weight: float = 0.1,
pendulum_scale: float = 3.0,
vel_outlier_threshold: float = 20.0,
) -> float:
"""Weighted MSE between sim and real trajectories.
pendulum_scale multiplies the pendulum terms relative to motor terms.
Samples where the *real* pendulum velocity exceeds
``vel_outlier_threshold`` (rad/s) are excluded from the velocity
terms. These are encoder-wrap artefacts (pendulum swinging past
vertical) that no simulator can reproduce.
"""
motor_err = _angle_diff(sim["motor_angle"], recording["motor_angle"])
pend_err = _angle_diff(sim["pendulum_angle"], recording["pendulum_angle"])
motor_vel_err = sim["motor_vel"] - recording["motor_vel"]
pend_vel_err = sim["pendulum_vel"] - recording["pendulum_vel"]
# Mask out encoder-wrap velocity spikes so the optimizer doesn't
# waste capacity fitting artefacts.
valid = np.abs(recording["pendulum_vel"]) < vel_outlier_threshold
if valid.sum() < len(valid):
motor_vel_err = motor_vel_err[valid]
pend_vel_err = pend_vel_err[valid]
return float(
pos_weight * np.mean(motor_err**2)
+ pos_weight * pendulum_scale * np.mean(pend_err**2)
+ vel_weight * np.mean(motor_vel_err**2)
+ vel_weight * pendulum_scale * np.mean(pend_vel_err**2)
)
def cost_function(
params_vec: np.ndarray,
recording: dict[str, np.ndarray],
robot_path: Path,
specs: list[ParamSpec],
sim_dt: float = 0.002,
substeps: int = 10,
pos_weight: float = 1.0,
vel_weight: float = 0.1,
pendulum_scale: float = 3.0,
window_duration: float = 0.5,
motor_params: dict[str, float] | None = None,
) -> float:
"""Compute trajectory-matching cost for a candidate parameter vector.
Uses **multiple-shooting** (windowed rollout): the recording is split
into short windows (default 0.5 s). Each window is initialised from
the real qpos/qvel, so early errors dont compound across the full
trajectory. This gives a much smoother cost landscape for CMA-ES.
Set ``window_duration=0`` to fall back to the original open-loop
single-shot rollout (not recommended).
"""
params = params_to_dict(params_vec, specs)
if not _check_inertia_valid(params):
return 1e6
try:
if window_duration > 0:
sim = windowed_rollout(
robot_path=robot_path,
params=params,
recording=recording,
window_duration=window_duration,
sim_dt=sim_dt,
substeps=substeps,
motor_params=motor_params,
)
else:
sim = rollout(
robot_path=robot_path,
params=params,
actions=recording["action"],
sim_dt=sim_dt,
substeps=substeps,
motor_params=motor_params,
)
except Exception as exc:
log.warning("rollout_failed", error=str(exc))
return 1e6
# Check for NaN in sim output.
for key in ("motor_angle", "motor_vel", "pendulum_angle", "pendulum_vel"):
if np.any(~np.isfinite(sim[key])):
return 1e6
return _compute_trajectory_cost(sim, recording, pos_weight, vel_weight, pendulum_scale)
# ── Parallel evaluation helper (module-level for pickling) ───────────
# Shared state set by the parent process before spawning workers.
_par_recording: dict[str, np.ndarray] = {}
_par_robot_path: Path = Path(".")
_par_specs: list[ParamSpec] = []
_par_kwargs: dict = {}
def _init_worker(recording, robot_path, specs, kwargs):
"""Initialiser run once per worker process."""
global _par_recording, _par_robot_path, _par_specs, _par_kwargs
_par_recording = recording
_par_robot_path = robot_path
_par_specs = specs
_par_kwargs = kwargs
def _eval_candidate(x_natural: np.ndarray) -> float:
"""Evaluate a single candidate — called in worker processes."""
return cost_function(
x_natural,
_par_recording,
_par_robot_path,
_par_specs,
**_par_kwargs,
)
# ── CMA-ES optimisation loop ────────────────────────────────────────
def optimize(
robot_path: str | Path,
recording_path: str | Path,
specs: list[ParamSpec] | None = None,
sigma0: float = 0.3,
population_size: int = 20,
max_generations: int = 1000,
sim_dt: float = 0.002,
substeps: int = 10,
pos_weight: float = 1.0,
vel_weight: float = 0.1,
pendulum_scale: float = 3.0,
window_duration: float = 0.5,
seed: int = 42,
preprocess_vel: bool = True,
sg_window: int = 7,
sg_polyorder: int = 3,
) -> dict:
"""Run CMA-ES optimisation and return results.
Motor parameters are locked from the motor-only sysid.
Only pendulum/arm parameters are optimised.
Returns a dict with:
best_params: dict[str, float]
best_cost: float
history: list of (generation, best_cost) tuples
recording: str (path used)
specs: list of param names
motor_params: dict of locked motor params
"""
from cmaes import CMA
robot_path = Path(robot_path).resolve()
recording_path = Path(recording_path).resolve()
if specs is None:
specs = ROTARY_CARTPOLE_PARAMS
motor_params = dict(LOCKED_MOTOR_PARAMS)
log.info(
"motor_params_locked",
n_params=len(motor_params),
gear_avg=f"{(motor_params['actuator_gear_pos'] + motor_params['actuator_gear_neg']) / 2:.4f}",
)
# Load recording.
recording = dict(np.load(recording_path))
# Preprocessing: SG velocity recomputation.
recording = _preprocess_recording(
recording,
preprocess_vel=preprocess_vel,
sg_window=sg_window,
sg_polyorder=sg_polyorder,
)
n_samples = len(recording["time"])
duration = recording["time"][-1] - recording["time"][0]
n_windows = max(1, int(duration / window_duration)) if window_duration > 0 else 1
log.info(
"recording_loaded",
path=str(recording_path),
samples=n_samples,
duration=f"{duration:.1f}s",
window_duration=f"{window_duration}s",
n_windows=n_windows,
)
# Initial point (defaults) — normalised to [0, 1] for CMA-ES.
lo, hi = bounds_arrays(specs)
x0 = defaults_vector(specs)
# Normalise to [0, 1] for the optimizer (better conditioned).
span = hi - lo
span[span == 0] = 1.0 # avoid division by zero
def to_normed(x: np.ndarray) -> np.ndarray:
return (x - lo) / span
def from_normed(x_n: np.ndarray) -> np.ndarray:
return x_n * span + lo
x0_normed = to_normed(x0)
bounds_normed = np.column_stack(
[np.zeros(len(specs)), np.ones(len(specs))]
)
optimizer = CMA(
mean=x0_normed,
sigma=sigma0,
bounds=bounds_normed,
population_size=population_size,
seed=seed,
)
best_cost = float("inf")
best_params_vec = x0.copy()
history: list[tuple[int, float]] = []
log.info(
"cmaes_starting",
n_params=len(specs),
population=population_size,
max_gens=max_generations,
sigma0=sigma0,
)
t0 = time.monotonic()
# ── Parallel evaluation setup ────────────────────────────────
# Each candidate is independent — evaluate them in parallel using
# a process pool. Falls back to sequential if n_workers=1.
import multiprocessing as mp
n_workers = max(1, mp.cpu_count() - 1) # leave 1 core free
eval_kwargs = dict(
sim_dt=sim_dt,
substeps=substeps,
pos_weight=pos_weight,
vel_weight=vel_weight,
pendulum_scale=pendulum_scale,
window_duration=window_duration,
motor_params=motor_params,
)
log.info("parallel_workers", n_workers=n_workers)
# Create a persistent pool (avoids per-generation fork overhead).
pool = None
if n_workers > 1:
pool = mp.Pool(
n_workers,
initializer=_init_worker,
initargs=(recording, robot_path, specs, eval_kwargs),
)
for gen in range(max_generations):
# Ask all candidates first.
candidates_normed = []
candidates_natural = []
for _ in range(optimizer.population_size):
x_normed = optimizer.ask()
x_natural = from_normed(x_normed)
x_natural = np.clip(x_natural, lo, hi)
candidates_normed.append(x_normed)
candidates_natural.append(x_natural)
# Evaluate in parallel.
if pool is not None:
costs = pool.map(_eval_candidate, candidates_natural)
else:
costs = [cost_function(
x, recording, robot_path, specs, **eval_kwargs
) for x in candidates_natural]
solutions = list(zip(candidates_normed, costs))
for x_natural, c in zip(candidates_natural, costs):
if c < best_cost:
best_cost = c
best_params_vec = x_natural.copy()
optimizer.tell(solutions)
history.append((gen, best_cost))
elapsed = time.monotonic() - t0
if gen % 5 == 0 or gen == max_generations - 1:
log.info(
"cmaes_generation",
gen=gen,
best_cost=f"{best_cost:.6f}",
elapsed=f"{elapsed:.1f}s",
gen_best=f"{min(c for _, c in solutions):.6f}",
)
total_time = time.monotonic() - t0
# Clean up process pool.
if pool is not None:
pool.close()
pool.join()
best_params = params_to_dict(best_params_vec, specs)
log.info(
"cmaes_finished",
best_cost=f"{best_cost:.6f}",
total_time=f"{total_time:.1f}s",
evaluations=max_generations * population_size,
)
# Log parameter comparison.
defaults = params_to_dict(defaults_vector(specs), specs)
for name in best_params:
d = defaults[name]
b = best_params[name]
change_pct = ((b - d) / abs(d) * 100) if abs(d) > 1e-12 else 0.0
log.info(
"param_result",
name=name,
default=f"{d:.6g}",
tuned=f"{b:.6g}",
change=f"{change_pct:+.1f}%",
)
return {
"best_params": best_params,
"best_cost": best_cost,
"history": history,
"recording": str(recording_path),
"param_names": [s.name for s in specs],
"defaults": {s.name: s.default for s in specs},
"motor_params": motor_params,
"preprocess_vel": preprocess_vel,
"timestamp": datetime.now().isoformat(),
}
# ── CLI entry point ──────────────────────────────────────────────────
def main() -> None:
parser = argparse.ArgumentParser(
description="Fit simulation parameters to a real-robot recording (CMA-ES)."
)
parser.add_argument(
"--robot-path",
type=str,
default="assets/rotary_cartpole",
help="Path to robot asset directory",
)
parser.add_argument(
"--recording",
type=str,
required=True,
help="Path to .npz recording file",
)
parser.add_argument("--sigma0", type=float, default=0.3)
parser.add_argument("--population-size", type=int, default=20)
parser.add_argument("--max-generations", type=int, default=200)
parser.add_argument("--sim-dt", type=float, default=0.002)
parser.add_argument("--substeps", type=int, default=10)
parser.add_argument("--pos-weight", type=float, default=1.0)
parser.add_argument("--vel-weight", type=float, default=0.1)
parser.add_argument("--pendulum-scale", type=float, default=3.0,
help="Multiplier for pendulum terms relative to motor (default 3.0)")
parser.add_argument(
"--window-duration",
type=float,
default=0.5,
help="Shooting window length in seconds (0 = open-loop, default 0.5)",
)
parser.add_argument("--seed", type=int, default=42)
parser.add_argument(
"--no-export",
action="store_true",
help="Skip exporting tuned files (results JSON only)",
)
parser.add_argument(
"--no-preprocess-vel",
action="store_true",
help="Skip Savitzky-Golay velocity preprocessing",
)
parser.add_argument("--sg-window", type=int, default=7,
help="Savitzky-Golay window length (odd, default 7)")
parser.add_argument("--sg-polyorder", type=int, default=3,
help="Savitzky-Golay polynomial order (default 3)")
parser.add_argument(
"--param-set",
type=str,
default="full",
choices=list(PARAM_SETS.keys()),
help="Parameter set to optimize: 'reduced' (6 params, fast) or 'full' (15 params)",
)
args = parser.parse_args()
specs = PARAM_SETS[args.param_set]
result = optimize(
robot_path=args.robot_path,
recording_path=args.recording,
specs=specs,
sigma0=args.sigma0,
population_size=args.population_size,
max_generations=args.max_generations,
sim_dt=args.sim_dt,
substeps=args.substeps,
pos_weight=args.pos_weight,
vel_weight=args.vel_weight,
pendulum_scale=args.pendulum_scale,
window_duration=args.window_duration,
seed=args.seed,
preprocess_vel=not args.no_preprocess_vel,
sg_window=args.sg_window,
sg_polyorder=args.sg_polyorder,
)
# Save results JSON.
robot_path = Path(args.robot_path).resolve()
result_path = robot_path / "sysid_result.json"
# Convert numpy types for JSON serialisation.
result_json = {
k: v for k, v in result.items() if k != "history"
}
result_json["history_summary"] = {
"first_cost": result["history"][0][1] if result["history"] else None,
"final_cost": result["history"][-1][1] if result["history"] else None,
"generations": len(result["history"]),
}
result_path.write_text(json.dumps(result_json, indent=2, default=str))
log.info("results_saved", path=str(result_path))
# Export tuned files unless --no-export.
if not args.no_export:
from src.sysid.export import export_tuned_files
export_tuned_files(
robot_path=args.robot_path,
params=result["best_params"],
motor_params=result.get("motor_params"),
)
if __name__ == "__main__":
main()