✨ clean up lot of stuff

2026-03-22 15:49:13 +01:00
parent d3ed1c25ad
commit ca0e7b8b03
37 changed files with 3613 additions and 1223 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,23 @@
-outputs/
+# IDE / editor
 .vscode/
-runs/
+
+# Training & HPO outputs
+outputs/
+runs/
+smac3_output/
+
+# MuJoCo
+MUJOCO_LOG.TXT
+
+# Python
+__pycache__/
+*.pyc
+*.pyo
+*.egg-info/
+.eggs/
+dist/
+build/
+
+# Temp files
+*.stl
+*.scad
--- a/assets/rotary_cartpole/hardware.yaml
+++ b/assets/rotary_cartpole/hardware.yaml
@@ -10,7 +10,7 @@ encoder:
  # counts_per_rev = ppr × gear_ratio × 4 (quadrature) = 1320

 safety:
-  max_motor_angle_deg: 90.0   # hard termination limit (0 = disabled)
+  max_motor_angle_deg: 90.0    # hard termination limit (physical endstop ~70-80°)
  soft_limit_deg: 40.0        # progressive penalty ramp starts here

 reset:
--- a/assets/rotary_cartpole/recordings/capture_20260311_215608.npz
+++ b/assets/rotary_cartpole/recordings/capture_20260311_215608.npz
--- a/assets/rotary_cartpole/robot.yaml
+++ b/assets/rotary_cartpole/robot.yaml
@@ -1,20 +1,21 @@
 # Tuned robot config — generated by src.sysid.optimize
-# Original: robot.yaml
-# Run `python -m src.sysid.visualize` to compare real vs sim.

 urdf: rotary_cartpole.urdf
 actuators:
 - joint: motor_joint
  type: motor
-  gear: 0.176692
-  ctrl_range:
-  - -1.0
-  - 1.0
-  damping: 0.009505
-  filter_tau: 0.040906
+  gear: [0.424182, 0.425031]          # torque constant [pos, neg] (motor sysid)
+  ctrl_range: [-0.592, 0.592]         # effective control bound (sysid-tuned)
+  deadzone: [0.141291, 0.078015]      # L298N min |ctrl| for torque [pos, neg]
+  damping: [0.002027, 0.014665]       # viscous damping [pos, neg]
+  frictionloss: [0.057328, 0.053355]  # Coulomb friction [pos, neg]
+  filter_tau: 0.005035                # 1st-order actuator filter (motor sysid)
+  viscous_quadratic: 0.000285         # velocity² drag
+  back_emf_gain: 0.006758             # back-EMF torque reduction
 joints:
  motor_joint:
-    armature: 0.001389
-    frictionloss: 0.002179
+    armature: 0.002773                # reflected rotor inertia (motor sysid)
+    frictionloss: 0.0                 # handled by motor model via qfrc_applied
  pendulum_joint:
-    damping: 6.1e-05
+    damping: 0.000119
+    frictionloss: 1.0e-05
--- a/assets/rotary_cartpole/rotary_cartpole.urdf
+++ b/assets/rotary_cartpole/rotary_cartpole.urdf
@@ -26,8 +26,8 @@
  </joint>
  <link name="arm">
    <inertial>
-      <origin xyz="0.014950488360794875 0.006089886527968399 0.004470745447817278" rpy="0 0 0" />
-      <mass value="0.012391951282440451" />
+      <origin xyz="-0.0071030505291264975 0.0008511826488989179 0.007952020186701035" rpy="0 0 0" />
+      <mass value="0.02110029934220782" />
      <inertia ixx="2.70e-06" iyy="7.80e-07" izz="2.44e-06" ixy="0.0" iyz="7.20e-08" ixz="0.0" />
    </inertial>
    <visual>
@@ -53,9 +53,9 @@
  </joint>
  <link name="pendulum">
    <inertial>
-      <origin xyz="0.06432778588634695 -0.05999895841669392 0.0008769789937631209" rpy="0 0 0" />
-      <mass value="0.035508993892747365" />
-      <inertia ixx="3.139576982078822e-05" iyy="9.431951659638859e-06" izz="4.07315891863556e-05" ixy="-1.8892943833253423e-06" iyz="0.0" ixz="0.0" />
+      <origin xyz="0.060245187591695615 -0.07601707109312682 -0.0034636702158137786" rpy="0 0 0" />
+      <mass value="0.03936742845036306" />
+      <inertia ixx="6.202768755990066e-05" iyy="3.70078470430685e-05" izz="7.827356811788924e-05" ixy="-6.925117819616428e-06" iyz="0.0" ixz="0.0" />
    </inertial>
    <visual>
      <origin xyz="0.006895 -0.023224 -0.162953" rpy="0 0 0" />
--- a/assets/rotary_cartpole/sysid_result.json
+++ b/assets/rotary_cartpole/sysid_result.json
@@ -1,70 +0,0 @@
-{
-  "best_params": {
-    "arm_mass": 0.012391951282440451,
-    "arm_com_x": 0.014950488360794875,
-    "arm_com_y": 0.006089886527968399,
-    "arm_com_z": 0.004470745447817278,
-    "pendulum_mass": 0.035508993892747365,
-    "pendulum_com_x": 0.06432778588634695,
-    "pendulum_com_y": -0.05999895841669392,
-    "pendulum_com_z": 0.0008769789937631209,
-    "pendulum_ixx": 3.139576982078822e-05,
-    "pendulum_iyy": 9.431951659638859e-06,
-    "pendulum_izz": 4.07315891863556e-05,
-    "pendulum_ixy": -1.8892943833253423e-06,
-    "actuator_gear": 0.17669161390939517,
-    "actuator_filter_tau": 0.040905643692382504,
-    "motor_damping": 0.009504542103348917,
-    "pendulum_damping": 6.128535042404019e-05,
-    "motor_armature": 0.0013894759540138252,
-    "motor_frictionloss": 0.002179448047511452
-  },
-  "best_cost": 0.7471380533090072,
-  "recording": "/Users/victormylle/Library/CloudStorage/SeaDrive-VictorMylle(cloud.optimize-it.be)/My Libraries/Projects/AI/RL-Framework/assets/rotary_cartpole/recordings/capture_20260311_215608.npz",
-  "param_names": [
-    "arm_mass",
-    "arm_com_x",
-    "arm_com_y",
-    "arm_com_z",
-    "pendulum_mass",
-    "pendulum_com_x",
-    "pendulum_com_y",
-    "pendulum_com_z",
-    "pendulum_ixx",
-    "pendulum_iyy",
-    "pendulum_izz",
-    "pendulum_ixy",
-    "actuator_gear",
-    "actuator_filter_tau",
-    "motor_damping",
-    "pendulum_damping",
-    "motor_armature",
-    "motor_frictionloss"
-  ],
-  "defaults": {
-    "arm_mass": 0.01,
-    "arm_com_x": 5e-05,
-    "arm_com_y": 0.0065,
-    "arm_com_z": 0.00563,
-    "pendulum_mass": 0.015,
-    "pendulum_com_x": 0.1583,
-    "pendulum_com_y": -0.0983,
-    "pendulum_com_z": 0.0,
-    "pendulum_ixx": 6.16e-06,
-    "pendulum_iyy": 6.16e-06,
-    "pendulum_izz": 1.23e-05,
-    "pendulum_ixy": 6.1e-06,
-    "actuator_gear": 0.064,
-    "actuator_filter_tau": 0.03,
-    "motor_damping": 0.003,
-    "pendulum_damping": 0.0001,
-    "motor_armature": 0.0001,
-    "motor_frictionloss": 0.03
-  },
-  "timestamp": "2026-03-11T22:08:04.782736",
-  "history_summary": {
-    "first_cost": 3.909456214944022,
-    "final_cost": 0.7471380533090072,
-    "generations": 200
-  }
-}
--- a/configs/env/rotary_cartpole.yaml
+++ b/configs/env/rotary_cartpole.yaml
@@ -1,10 +1,19 @@
 max_steps: 1000
 robot_path: assets/rotary_cartpole
 reward_upright_scale: 1.0
-speed_penalty_scale: 0.1
+
+# ── Regularisation penalties (prevent fast spinning) ─────────────────
+motor_vel_penalty: 0.01          # penalise high motor angular velocity
+motor_angle_penalty: 0.05        # penalise deviation from centre
+action_penalty: 0.05             # penalise large actions (energy cost)
+
+# ── Software safety limit (env-level, always applied) ────────────────
+motor_angle_limit_deg: 90.0      # terminate episode if motor exceeds ±90°

 # ── HPO search ranges ────────────────────────────────────────────────
 hpo:
  reward_upright_scale: {min: 0.5, max: 5.0}
-  speed_penalty_scale: {min: 0.01, max: 1.0}
+  motor_vel_penalty: {min: 0.001, max: 0.1}
+  motor_angle_penalty: {min: 0.01, max: 0.2}
+  action_penalty: {min: 0.01, max: 0.2}
  max_steps: {values: [500, 1000, 2000]}
--- a/configs/runner/mjx.yaml
+++ b/configs/runner/mjx.yaml
@@ -1,4 +1,4 @@
 num_envs: 1024       # MJX shines with many parallel envs
 device: auto         # auto = cuda if available, else cpu
 dt: 0.002
-substeps: 20
+substeps: 10
--- a/configs/runner/mujoco.yaml
+++ b/configs/runner/mujoco.yaml
@@ -1,4 +1,4 @@
 num_envs: 64
 device: auto  # auto = cuda if available, else cpu
 dt: 0.002
-substeps: 20
+substeps: 10
--- a/configs/runner/serial.yaml
+++ b/configs/runner/serial.yaml
@@ -6,6 +6,5 @@ num_envs: 1
 device: cpu
 port: /dev/cu.usbserial-0001
 baud: 115200
-dt: 0.02                    # control loop period (50 Hz)
+dt: 0.02                    # control loop period (50 Hz, matches training)
 no_data_timeout: 2.0        # seconds of silence before declaring disconnect
-encoder_jump_threshold: 200  # encoder tick jump → reboot detection
--- a/configs/sysid.yaml
+++ b/configs/sysid.yaml
@@ -1,5 +1,5 @@
 # System identification defaults.
-# Override via CLI: python -m src.sysid.optimize sysid.max_generations=50
+# Override via CLI: python scripts/sysid.py optimize --max-generations 50
 #
 # These are NOT Hydra config groups — the sysid scripts use argparse.
 # This file serves as documentation and can be loaded by custom wrappers.
@@ -8,18 +8,25 @@ capture:
  port: /dev/cu.usbserial-0001
  baud: 115200
  duration: 20.0          # seconds
-  amplitude: 180           # max PWM magnitude (0–255)
+  amplitude: 150           # max PWM magnitude — must match firmware MAX_MOTOR_SPEED
  hold_min_ms: 50          # PRBS min hold time
  hold_max_ms: 300         # PRBS max hold time
  dt: 0.02                 # sample period (50 Hz)

 optimize:
  sigma0: 0.3              # CMA-ES initial step size (in [0,1] normalised space)
-  population_size: 20      # candidates per generation
-  max_generations: 200     # total generations (~4000 evaluations)
+  population_size: 50      # candidates per generation
+  max_generations: 1000     # total generations (~4000 evaluations)
  sim_dt: 0.002            # MuJoCo physics timestep
  substeps: 10             # physics substeps per control step (ctrl_dt = 0.02s)
  pos_weight: 1.0          # MSE weight for angle errors
  vel_weight: 0.1          # MSE weight for velocity errors
  window_duration: 0.5     # multiple-shooting window length (s); 0 = open-loop
  seed: 42
+
+# Tunable hardware-realism params (added to ROTARY_CARTPOLE_PARAMS):
+#   ctrl_limit      — effective motor range → exported as ctrl_range in robot.yaml
+#   motor_deadzone  — L298N minimum |action| for torque → exported as deadzone in robot.yaml
+# Firmware sends raw (unfiltered) sensor data; EMA filtering is
+# handled on the Python side (env transforms) and is NOT part of
+# the sysid parameter search.
--- a/configs/training/ppo_real.yaml
+++ b/configs/training/ppo_real.yaml
@@ -8,15 +8,17 @@ defaults:
  - _self_

 hidden_sizes: [256, 256]
-total_timesteps: 100000
-learning_epochs: 5
-learning_rate: 0.001            # conservative — can't undo real-world damage
-entropy_loss_scale: 0.0001
-log_interval: 1024
+total_timesteps: 2000000
+learning_epochs: 10
+learning_rate: 0.0005            # conservative — can't undo real-world damage
+entropy_loss_scale: 0.01
+rollout_steps: 2048
+mini_batches: 8
+log_interval: 2048
 checkpoint_interval: 5000       # frequent saves — can't rewind real hardware
 initial_log_std: -0.5           # moderate initial exploration
 min_log_std: -4.0
-max_log_std: 0.0                # cap σ at 1.0
+max_log_std: 2.0                # cap σ at 1.0

 # Never run real-hardware training remotely
 remote: false
--- a/configs/training/ppo_single.yaml
+++ b/configs/training/ppo_single.yaml
@@ -8,7 +8,7 @@ defaults:
  - _self_

 hidden_sizes: [256, 256]
-total_timesteps: 1000000
+total_timesteps: 2000000
 learning_epochs: 10
 learning_rate: 0.0003
 entropy_loss_scale: 0.01
--- a/scripts/eval.py
+++ b/scripts/eval.py
@@ -0,0 +1,379 @@
+"""Evaluate a trained policy on real hardware (or in simulation).
+
+Loads a checkpoint and runs the policy in a closed loop.  For real
+hardware the serial runner talks to the ESP32; for sim it uses the
+MuJoCo runner.  A digital-twin MuJoCo viewer mirrors the robot state
+in both modes.
+
+Usage (real hardware):
+    mjpython scripts/eval.py env=rotary_cartpole runner=serial \
+        checkpoint=runs/26-03-12_00-16-43-308420_PPO/checkpoints/agent_1000000.pt
+
+Usage (simulation):
+    mjpython scripts/eval.py env=rotary_cartpole runner=mujoco_single \
+        checkpoint=runs/26-03-12_00-16-43-308420_PPO/checkpoints/agent_1000000.pt
+
+Controls:
+    Space   — pause / resume policy (motor stops while paused)
+    R       — reset environment
+    Esc     — quit
+"""
+
+import math
+import sys
+import time
+from pathlib import Path
+
+# Ensure project root is on sys.path
+_PROJECT_ROOT = str(Path(__file__).resolve().parent.parent)
+if _PROJECT_ROOT not in sys.path:
+    sys.path.insert(0, _PROJECT_ROOT)
+
+import hydra
+import mujoco
+import mujoco.viewer
+import numpy as np
+import structlog
+import torch
+from gymnasium import spaces
+from hydra.core.hydra_config import HydraConfig
+from omegaconf import DictConfig, OmegaConf
+from skrl.resources.preprocessors.torch import RunningStandardScaler
+
+from src.core.registry import build_env
+from src.models.mlp import SharedMLP
+
+logger = structlog.get_logger()
+
+
+# ── keyboard state ───────────────────────────────────────────────────
+_reset_flag = [False]
+_paused = [False]
+_quit_flag = [False]
+
+
+def _key_callback(keycode: int) -> None:
+    """Called by MuJoCo viewer on key press."""
+    if keycode == 32:         # GLFW_KEY_SPACE
+        _paused[0] = not _paused[0]
+    elif keycode == 82:       # GLFW_KEY_R
+        _reset_flag[0] = True
+    elif keycode == 256:      # GLFW_KEY_ESCAPE
+        _quit_flag[0] = True
+
+
+# ── checkpoint loading ───────────────────────────────────────────────
+
+def _infer_hidden_sizes(state_dict: dict[str, torch.Tensor]) -> tuple[int, ...]:
+    """Infer hidden layer sizes from a SharedMLP state dict."""
+    sizes = []
+    i = 0
+    while f"net.{i}.weight" in state_dict:
+        sizes.append(state_dict[f"net.{i}.weight"].shape[0])
+        i += 2  # skip activation layers (ELU)
+    return tuple(sizes)
+
+
+def load_policy(
+    checkpoint_path: str,
+    observation_space: spaces.Space,
+    action_space: spaces.Space,
+    device: torch.device = torch.device("cpu"),
+) -> tuple[SharedMLP, RunningStandardScaler]:
+    """Load a trained SharedMLP + observation normalizer from a checkpoint.
+
+    Returns:
+        (model, state_preprocessor) ready for inference.
+    """
+    ckpt = torch.load(checkpoint_path, map_location=device, weights_only=False)
+
+    # Infer architecture from saved weights.
+    hidden_sizes = _infer_hidden_sizes(ckpt["policy"])
+
+    # Reconstruct model.
+    model = SharedMLP(
+        observation_space=observation_space,
+        action_space=action_space,
+        device=device,
+        hidden_sizes=hidden_sizes,
+    )
+    model.load_state_dict(ckpt["policy"])
+    model.eval()
+
+    # Reconstruct observation normalizer.
+    state_preprocessor = RunningStandardScaler(size=observation_space, device=device)
+    state_preprocessor.running_mean = ckpt["state_preprocessor"]["running_mean"].to(device)
+    state_preprocessor.running_variance = ckpt["state_preprocessor"]["running_variance"].to(device)
+    state_preprocessor.current_count = ckpt["state_preprocessor"]["current_count"]
+    # Freeze the normalizer — don't update stats during eval.
+    state_preprocessor.training = False
+
+    logger.info(
+        "checkpoint_loaded",
+        path=checkpoint_path,
+        hidden_sizes=hidden_sizes,
+        obs_mean=[round(x, 3) for x in state_preprocessor.running_mean.tolist()],
+        obs_std=[round(x, 3) for x in state_preprocessor.running_variance.sqrt().tolist()],
+    )
+    return model, state_preprocessor
+
+
+# ── action arrow overlay ─────────────────────────────────────────────
+
+def _add_action_arrow(viewer, model, data, action_val: float) -> None:
+    """Draw an arrow showing applied torque direction."""
+    if abs(action_val) < 0.01 or model.nu == 0:
+        return
+
+    jnt_id = model.actuator_trnid[0, 0]
+    body_id = model.jnt_bodyid[jnt_id]
+    pos = data.xpos[body_id].copy()
+    pos[2] += 0.02
+
+    axis = data.xmat[body_id].reshape(3, 3) @ model.jnt_axis[jnt_id]
+    arrow_len = 0.08 * action_val
+    direction = axis * np.sign(arrow_len)
+
+    z = direction / (np.linalg.norm(direction) + 1e-8)
+    up = np.array([0, 0, 1]) if abs(z[2]) < 0.99 else np.array([0, 1, 0])
+    x = np.cross(up, z)
+    x /= np.linalg.norm(x) + 1e-8
+    y = np.cross(z, x)
+    mat = np.column_stack([x, y, z]).flatten()
+
+    rgba = np.array(
+        [0.2, 0.8, 0.2, 0.8] if action_val > 0 else [0.8, 0.2, 0.2, 0.8],
+        dtype=np.float32,
+    )
+
+    geom = viewer.user_scn.geoms[viewer.user_scn.ngeom]
+    mujoco.mjv_initGeom(
+        geom,
+        type=mujoco.mjtGeom.mjGEOM_ARROW,
+        size=np.array([0.008, 0.008, abs(arrow_len)]),
+        pos=pos,
+        mat=mat,
+        rgba=rgba,
+    )
+    viewer.user_scn.ngeom += 1
+
+
+# ── main loops ───────────────────────────────────────────────────────
+
+@hydra.main(version_base=None, config_path="../configs", config_name="config")
+def main(cfg: DictConfig) -> None:
+    choices = HydraConfig.get().runtime.choices
+    env_name = choices.get("env", "cartpole")
+    runner_name = choices.get("runner", "mujoco_single")
+
+    checkpoint_path = cfg.get("checkpoint", None)
+    if checkpoint_path is None:
+        logger.error("No checkpoint specified. Use: +checkpoint=path/to/agent.pt")
+        sys.exit(1)
+
+    # Resolve relative paths against original working directory.
+    checkpoint_path = str(Path(hydra.utils.get_original_cwd()) / checkpoint_path)
+    if not Path(checkpoint_path).exists():
+        logger.error("checkpoint_not_found", path=checkpoint_path)
+        sys.exit(1)
+
+    if runner_name == "serial":
+        _eval_serial(cfg, env_name, checkpoint_path)
+    else:
+        _eval_sim(cfg, env_name, checkpoint_path)
+
+
+def _eval_sim(cfg: DictConfig, env_name: str, checkpoint_path: str) -> None:
+    """Evaluate policy in MuJoCo simulation with viewer."""
+    from src.runners.mujoco import MuJoCoRunner, MuJoCoRunnerConfig
+
+    env = build_env(env_name, cfg)
+    runner_dict = OmegaConf.to_container(cfg.runner, resolve=True)
+    runner_dict["num_envs"] = 1
+    runner = MuJoCoRunner(env=env, config=MuJoCoRunnerConfig(**runner_dict))
+
+    device = runner.device
+    model, preprocessor = load_policy(
+        checkpoint_path, runner.observation_space, runner.action_space, device
+    )
+
+    mj_model = runner._model
+    mj_data = runner._data[0]
+    dt_ctrl = runner.config.dt * runner.config.substeps
+
+    with mujoco.viewer.launch_passive(mj_model, mj_data, key_callback=_key_callback) as viewer:
+        obs, _ = runner.reset()
+        step = 0
+        episode = 0
+        episode_reward = 0.0
+
+        logger.info(
+            "eval_started",
+            env=env_name,
+            mode="simulation",
+            checkpoint=Path(checkpoint_path).name,
+            controls="Space=pause, R=reset, Esc=quit",
+        )
+
+        while viewer.is_running() and not _quit_flag[0]:
+            if _reset_flag[0]:
+                _reset_flag[0] = False
+                obs, _ = runner.reset()
+                step = 0
+                episode += 1
+                episode_reward = 0.0
+                logger.info("reset", episode=episode)
+
+            if _paused[0]:
+                viewer.sync()
+                time.sleep(0.05)
+                continue
+
+            # Policy inference
+            with torch.no_grad():
+                normalized_obs = preprocessor(obs.unsqueeze(0) if obs.dim() == 1 else obs)
+                action = model.act({"states": normalized_obs}, role="policy")[0]
+                action = action.clamp(-1.0, 1.0)
+
+            obs, reward, terminated, truncated, info = runner.step(action)
+            episode_reward += reward.item()
+            step += 1
+
+            # Sync viewer
+            mujoco.mj_forward(mj_model, mj_data)
+            viewer.user_scn.ngeom = 0
+            _add_action_arrow(viewer, mj_model, mj_data, action[0, 0].item())
+            viewer.sync()
+
+            if step % 50 == 0:
+                joints = {mj_model.jnt(i).name: round(math.degrees(mj_data.qpos[i]), 1)
+                          for i in range(mj_model.njnt)}
+                logger.debug(
+                    "step", n=step, reward=round(reward.item(), 3),
+                    action=round(action[0, 0].item(), 2),
+                    ep_reward=round(episode_reward, 1), **joints,
+                )
+
+            if terminated.any() or truncated.any():
+                logger.info(
+                    "episode_done", episode=episode, steps=step,
+                    total_reward=round(episode_reward, 2),
+                    reason="terminated" if terminated.any() else "truncated",
+                )
+                obs, _ = runner.reset()
+                step = 0
+                episode += 1
+                episode_reward = 0.0
+
+            time.sleep(dt_ctrl)
+
+    runner.close()
+
+
+def _eval_serial(cfg: DictConfig, env_name: str, checkpoint_path: str) -> None:
+    """Evaluate policy on real hardware via serial, with digital-twin viewer."""
+    from src.runners.serial import SerialRunner, SerialRunnerConfig
+
+    env = build_env(env_name, cfg)
+    runner_dict = OmegaConf.to_container(cfg.runner, resolve=True)
+    serial_runner = SerialRunner(env=env, config=SerialRunnerConfig(**runner_dict))
+
+    device = serial_runner.device
+    model, preprocessor = load_policy(
+        checkpoint_path, serial_runner.observation_space, serial_runner.action_space, device
+    )
+
+    # Set up digital-twin MuJoCo model for visualization.
+    serial_runner._ensure_viz_model()
+    mj_model = serial_runner._viz_model
+    mj_data = serial_runner._viz_data
+
+    with mujoco.viewer.launch_passive(mj_model, mj_data, key_callback=_key_callback) as viewer:
+        obs, _ = serial_runner.reset()
+        step = 0
+        episode = 0
+        episode_reward = 0.0
+
+        logger.info(
+            "eval_started",
+            env=env_name,
+            mode="real hardware (serial)",
+            port=serial_runner.config.port,
+            checkpoint=Path(checkpoint_path).name,
+            controls="Space=pause, R=reset, Esc=quit",
+        )
+
+        while viewer.is_running() and not _quit_flag[0]:
+            if _reset_flag[0]:
+                _reset_flag[0] = False
+                serial_runner._send("M0")
+                serial_runner._drive_to_center()
+                serial_runner._wait_for_pendulum_still()
+                obs, _ = serial_runner.reset()
+                step = 0
+                episode += 1
+                episode_reward = 0.0
+                logger.info("reset", episode=episode)
+
+            if _paused[0]:
+                serial_runner._send("M0")  # safety: stop motor while paused
+                serial_runner._sync_viz()
+                viewer.sync()
+                time.sleep(0.05)
+                continue
+
+            # Policy inference
+            with torch.no_grad():
+                normalized_obs = preprocessor(obs.unsqueeze(0) if obs.dim() == 1 else obs)
+                action = model.act({"states": normalized_obs}, role="policy")[0]
+                action = action.clamp(-1.0, 1.0)
+
+            obs, reward, terminated, truncated, info = serial_runner.step(action)
+            episode_reward += reward.item()
+            step += 1
+
+            # Sync digital twin with real sensor data.
+            serial_runner._sync_viz()
+            viewer.user_scn.ngeom = 0
+            _add_action_arrow(viewer, mj_model, mj_data, action[0, 0].item())
+            viewer.sync()
+
+            if step % 25 == 0:
+                state = serial_runner._read_state()
+                logger.debug(
+                    "step", n=step, reward=round(reward.item(), 3),
+                    action=round(action[0, 0].item(), 2),
+                    ep_reward=round(episode_reward, 1),
+                    motor_enc=state["encoder_count"],
+                    pend_deg=round(state["pendulum_angle"], 1),
+                )
+
+            # Check for safety / disconnection.
+            if info.get("reboot_detected") or info.get("motor_limit_exceeded"):
+                logger.error(
+                    "safety_stop",
+                    reboot=info.get("reboot_detected", False),
+                    motor_limit=info.get("motor_limit_exceeded", False),
+                )
+                serial_runner._send("M0")
+                break
+
+            if terminated.any() or truncated.any():
+                logger.info(
+                    "episode_done", episode=episode, steps=step,
+                    total_reward=round(episode_reward, 2),
+                    reason="terminated" if terminated.any() else "truncated",
+                )
+                # Auto-reset for next episode.
+                obs, _ = serial_runner.reset()
+                step = 0
+                episode += 1
+                episode_reward = 0.0
+
+            # Real-time pacing is handled by serial_runner.step() (dt sleep).
+
+    serial_runner.close()
+
+
+if __name__ == "__main__":
+    main()
--- a/scripts/motor_sysid.py
+++ b/scripts/motor_sysid.py
@@ -0,0 +1,64 @@
+"""Unified CLI for motor-only system identification.
+
+Usage:
+    python scripts/motor_sysid.py capture   --duration 20
+    python scripts/motor_sysid.py optimize  --recording assets/motor/recordings/<file>.npz
+    python scripts/motor_sysid.py visualize --recording assets/motor/recordings/<file>.npz
+    python scripts/motor_sysid.py export    --result assets/motor/motor_sysid_result.json
+"""
+
+from __future__ import annotations
+
+import sys
+from pathlib import Path
+
+# Ensure project root is on sys.path
+_PROJECT_ROOT = str(Path(__file__).resolve().parent.parent)
+if _PROJECT_ROOT not in sys.path:
+    sys.path.insert(0, _PROJECT_ROOT)
+
+
+def main() -> None:
+    if len(sys.argv) < 2 or sys.argv[1] in ("-h", "--help"):
+        print(
+            "Motor System Identification\n"
+            "===========================\n"
+            "Usage: python scripts/motor_sysid.py <command> [options]\n"
+            "\n"
+            "Commands:\n"
+            "  capture    Record motor trajectory under PRBS excitation\n"
+            "  optimize   Run CMA-ES to fit motor parameters\n"
+            "  visualize  Plot real vs simulated motor response\n"
+            "  export     Write tuned MJCF + robot.yaml files\n"
+            "\n"
+            "Workflow:\n"
+            "  1. Flash sysid firmware to ESP32 (motor-only, no limits)\n"
+            "  2. python scripts/motor_sysid.py capture --duration 20\n"
+            "  3. python scripts/motor_sysid.py optimize --recording <file>.npz\n"
+            "  4. python scripts/motor_sysid.py visualize --recording <file>.npz\n"
+            "\n"
+            "Run '<command> --help' for command-specific options."
+        )
+        sys.exit(0)
+
+    command = sys.argv[1]
+    sys.argv = [f"motor_sysid {command}"] + sys.argv[2:]
+
+    if command == "capture":
+        from src.sysid.motor.capture import main as cmd_main
+    elif command == "optimize":
+        from src.sysid.motor.optimize import main as cmd_main
+    elif command == "visualize":
+        from src.sysid.motor.visualize import main as cmd_main
+    elif command == "export":
+        from src.sysid.motor.export import main as cmd_main
+    else:
+        print(f"Unknown command: {command}")
+        print("Available commands: capture, optimize, visualize, export")
+        sys.exit(1)
+
+    cmd_main()
+
+
+if __name__ == "__main__":
+    main()
--- a/src/core/env.py
+++ b/src/core/env.py
@@ -48,9 +48,10 @@ class BaseEnv(abc.ABC, Generic[T]):
    def compute_truncations(self, step_counts: torch.Tensor) -> torch.Tensor:
        return step_counts >= self.config.max_steps

-    def get_default_qpos(self, nq: int) -> list[float] | None:
-        """Return the default joint positions for reset.
-        Override in subclass if the URDF zero pose doesn't match
-        the desired initial state (e.g. pendulum hanging down).
-        Returns None to use the URDF default (all zeros)."""
-        return None
+    def is_reset_ready(self, qpos: torch.Tensor, qvel: torch.Tensor) -> bool:
+        """Check whether the physical robot has settled enough to start an episode.
+
+        Used by the SerialRunner after driving to center and waiting for the
+        pendulum.  Default: always ready (sim doesn't need settling).
+        """
+        return True
--- a/src/core/hardware.py
+++ b/src/core/hardware.py
@@ -1,91 +0,0 @@
-"""Real-hardware configuration — loaded from hardware.yaml next to robot.yaml.
-
-Provides robot-specific constants for the SerialRunner: encoder specs,
-safety limits, and reset behaviour.  Simulation-only robots simply don't
-have a hardware.yaml (the loader returns None).
-
-Usage:
-    hw = load_hardware_config("assets/rotary_cartpole")
-    if hw is not None:
-        counts_per_rev = hw.encoder.ppr * hw.encoder.gear_ratio * 4.0
-"""
-
-import dataclasses
-from pathlib import Path
-
-import structlog
-import yaml
-
-log = structlog.get_logger()
-
-
-@dataclasses.dataclass
-class EncoderConfig:
-    """Rotary encoder parameters."""
-
-    ppr: int = 11  # pulses per revolution (before quadrature)
-    gear_ratio: float = 30.0  # gearbox ratio
-
-    @property
-    def counts_per_rev(self) -> float:
-        """Total encoder counts per output-shaft revolution (quadrature)."""
-        return self.ppr * self.gear_ratio * 4.0
-
-
-@dataclasses.dataclass
-class SafetyConfig:
-    """Safety limits enforced by the runner (not the env)."""
-
-    max_motor_angle_deg: float = 90.0  # hard termination (0 = disabled)
-    soft_limit_deg: float = 40.0  # progressive penalty ramp start
-
-
-@dataclasses.dataclass
-class ResetConfig:
-    """Parameters for the physical reset procedure."""
-
-    drive_speed: int = 80  # PWM for bang-bang drive-to-center
-    deadband: int = 15  # encoder count threshold for "centered"
-    drive_timeout: float = 3.0  # seconds
-
-    settle_angle_deg: float = 2.0  # pendulum angle threshold (degrees)
-    settle_vel_dps: float = 5.0  # pendulum velocity threshold (deg/s)
-    settle_duration: float = 0.5  # seconds the pendulum must stay still
-    settle_timeout: float = 30.0  # give up after this many seconds
-
-
-@dataclasses.dataclass
-class HardwareConfig:
-    """Complete real-hardware description for a robot."""
-
-    encoder: EncoderConfig = dataclasses.field(default_factory=EncoderConfig)
-    safety: SafetyConfig = dataclasses.field(default_factory=SafetyConfig)
-    reset: ResetConfig = dataclasses.field(default_factory=ResetConfig)
-
-
-def load_hardware_config(robot_dir: str | Path) -> HardwareConfig | None:
-    """Load hardware.yaml from a directory.
-
-    Returns None if the file doesn't exist (simulation-only robot).
-    """
-    robot_dir = Path(robot_dir).resolve()
-    yaml_path = robot_dir / "hardware.yaml"
-
-    if not yaml_path.exists():
-        return None
-
-    raw = yaml.safe_load(yaml_path.read_text()) or {}
-
-    encoder = EncoderConfig(**raw.get("encoder", {}))
-    safety = SafetyConfig(**raw.get("safety", {}))
-    reset = ResetConfig(**raw.get("reset", {}))
-
-    config = HardwareConfig(encoder=encoder, safety=safety, reset=reset)
-
-    log.debug(
-        "hardware_config_loaded",
-        robot_dir=str(robot_dir),
-        counts_per_rev=encoder.counts_per_rev,
-        max_motor_angle_deg=safety.max_motor_angle_deg,
-    )
-    return config
--- a/src/core/robot.py
+++ b/src/core/robot.py
@@ -11,6 +11,7 @@ Usage:
 """

 import dataclasses
+import math
 from pathlib import Path

 import structlog
@@ -19,10 +20,20 @@ import yaml
 log = structlog.get_logger()


+def _as_pair(val) -> tuple[float, float]:
+    """Convert scalar or [pos, neg] list to (pos, neg) tuple."""
+    if isinstance(val, (list, tuple)) and len(val) == 2:
+        return (float(val[0]), float(val[1]))
+    return (float(val), float(val))
+
+
@dataclasses.dataclass
 class ActuatorConfig:
    """Motor/actuator attached to a joint.

+    Asymmetric fields use (positive_dir, negative_dir) tuples.
+    A scalar in YAML is expanded to a symmetric pair.
+
    type:
        motor    — direct torque control (ctrl = normalised torque)
        position — PD position servo  (ctrl = target angle, needs kp)
@@ -30,12 +41,91 @@ class ActuatorConfig:
    """
    joint: str = ""
    type: str = "motor"
-    gear: float = 1.0
-    ctrl_range: tuple[float, float] = (-1.0, 1.0)
-    damping: float = 0.05
-    kp: float = 0.0   # proportional gain (position / velocity actuators)
-    kv: float = 0.0   # derivative gain  (position actuators)
-    filter_tau: float = 0.0  # 1st-order filter time constant (s); 0 = no filter
+    gear: tuple[float, float] = (1.0, 1.0)            # torque constant (pos, neg)
+    ctrl_range: tuple[float, float] = (-1.0, 1.0)     # (lower, upper) control bounds
+    deadzone: tuple[float, float] = (0.0, 0.0)        # min |ctrl| for torque (pos, neg)
+    damping: tuple[float, float] = (0.0, 0.0)         # viscous damping (pos, neg)
+    frictionloss: tuple[float, float] = (0.0, 0.0)    # Coulomb friction (pos, neg)
+    kp: float = 0.0            # proportional gain (position / velocity actuators)
+    kv: float = 0.0            # derivative gain  (position actuators)
+    filter_tau: float = 0.0    # 1st-order filter time constant (s); 0 = no filter
+    viscous_quadratic: float = 0.0   # velocity² drag coefficient
+    back_emf_gain: float = 0.0      # back-EMF torque reduction
+
+    @property
+    def gear_avg(self) -> float:
+        return (self.gear[0] + self.gear[1]) / 2.0
+
+    @property
+    def has_motor_model(self) -> bool:
+        """True if this actuator needs the runtime motor model."""
+        return (
+            self.gear[0] != self.gear[1]
+            or self.deadzone != (0.0, 0.0)
+            or self.damping != (0.0, 0.0)
+            or self.frictionloss != (0.0, 0.0)
+            or self.viscous_quadratic > 0
+            or self.back_emf_gain > 0
+        )
+
+    def transform_ctrl(self, ctrl: float) -> float:
+        """Apply asymmetric deadzone and gear compensation to a scalar ctrl."""
+        # Deadzone
+        dz_pos, dz_neg = self.deadzone
+        if ctrl >= 0 and ctrl < dz_pos:
+            return 0.0
+        if ctrl < 0 and ctrl > -dz_neg:
+            return 0.0
+
+        # Gear compensation: rescale so ctrl × gear_avg ≈ action × gear_dir
+        gear_avg = self.gear_avg
+        if gear_avg > 1e-8:
+            gear_dir = self.gear[0] if ctrl >= 0 else self.gear[1]
+            ctrl *= gear_dir / gear_avg
+
+        return ctrl
+
+    def compute_motor_force(self, vel: float, ctrl: float) -> float:
+        """Asymmetric friction, damping, drag, back-EMF → applied torque."""
+        torque = 0.0
+
+        # Coulomb friction (direction-dependent)
+        fl_pos, fl_neg = self.frictionloss
+        if abs(vel) > 1e-6:
+            fl = fl_pos if vel > 0 else fl_neg
+            torque -= math.copysign(fl, vel)
+
+        # Viscous damping (direction-dependent)
+        damp = self.damping[0] if vel > 0 else self.damping[1]
+        torque -= damp * vel
+
+        # Quadratic velocity drag
+        if self.viscous_quadratic > 0:
+            torque -= self.viscous_quadratic * vel * abs(vel)
+
+        # Back-EMF torque reduction
+        if self.back_emf_gain > 0 and abs(ctrl) > 1e-6:
+            torque -= self.back_emf_gain * vel * math.copysign(1.0, ctrl)
+
+        return max(-10.0, min(10.0, torque))
+
+    def transform_action(self, action):
+        """Vectorised deadzone + gear compensation for a torch batch."""
+        dz_pos, dz_neg = self.deadzone
+        if dz_pos > 0 or dz_neg > 0:
+            action = action.clone()
+            pos_dead = (action >= 0) & (action < dz_pos)
+            neg_dead = (action < 0) & (action > -dz_neg)
+            action[pos_dead | neg_dead] = 0.0
+
+        gear_avg = self.gear_avg
+        if gear_avg > 1e-8 and self.gear[0] != self.gear[1]:
+            action = action.clone() if dz_pos == 0 and dz_neg == 0 else action
+            pos = action >= 0
+            action[pos] *= self.gear[0] / gear_avg
+            action[~pos] *= self.gear[1] / gear_avg
+
+        return action


@dataclasses.dataclass
@@ -84,6 +174,9 @@ def load_robot_config(robot_dir: str | Path) -> RobotConfig:
    for a in raw.get("actuators", []):
        if "ctrl_range" in a:
            a["ctrl_range"] = tuple(a["ctrl_range"])
+        for key in ("gear", "deadzone", "damping", "frictionloss"):
+            if key in a:
+                a[key] = _as_pair(a[key])
        actuators.append(ActuatorConfig(**a))

    # Parse joint overrides
--- a/src/core/runner.py
+++ b/src/core/runner.py
@@ -1,9 +1,12 @@
 import dataclasses
 import abc
 from typing import Any, Generic, TypeVar
-from src.core.env import BaseEnv
+
+import numpy as np
 import torch

+from src.core.env import BaseEnv
+

 T = TypeVar("T")

@@ -21,6 +24,8 @@ class BaseRunner(abc.ABC, Generic[T]):
        if getattr(self.config, "device", None) == "auto":
            self.config.device = "cuda" if torch.cuda.is_available() else "cpu"

+        self._last_actions: torch.Tensor | None = None
+
        self._sim_initialize(config)

        self.observation_space = self.env.observation_space
@@ -68,6 +73,7 @@ class BaseRunner(abc.ABC, Generic[T]):
        return obs, {}
    
    def step(self, actions: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor, dict[str, Any]]:
+        self._last_actions = actions
        qpos, qvel = self._sim_step(actions)
        self.step_counts += 1

@@ -95,9 +101,50 @@ class BaseRunner(abc.ABC, Generic[T]):
        # skrl expects (num_envs, 1) for rewards/terminated/truncated
        return obs, rewards.unsqueeze(-1), terminated.unsqueeze(-1), truncated.unsqueeze(-1), info
        
-    def render(self, env_idx: int = 0):
-        """Offscreen render → RGB numpy array. Override in subclass."""
+    def _render_frame(self, env_idx: int = 0) -> np.ndarray:
+        """Return a raw RGB frame. Override in subclass."""
        raise NotImplementedError("Render not implemented for this runner.")
-    
+
+    def render(self, env_idx: int = 0) -> np.ndarray:
+        """Render frame with action overlay."""
+        frame = self._render_frame(env_idx)
+        if self._last_actions is not None:
+            ctrl = float(self._last_actions[env_idx, 0].clamp(-1.0, 1.0))
+            _draw_action_overlay(frame, ctrl)
+        return frame
+
    def close(self) -> None:
-        self._sim_close()
+        self._sim_close()
+
+
+def _draw_action_overlay(frame: np.ndarray, action: float) -> None:
+    """Draw an action bar on a rendered frame (no OpenCV needed).
+
+    Bar is centered horizontally: green to the right (+), red to the left (-).
+    """
+    h, w = frame.shape[:2]
+
+    bar_y = h - 30
+    bar_h = 16
+    bar_x_center = w // 2
+    bar_half_w = w // 4
+    bar_x_left = bar_x_center - bar_half_w
+    bar_x_right = bar_x_center + bar_half_w
+
+    # Background (dark grey)
+    frame[bar_y:bar_y + bar_h, bar_x_left:bar_x_right] = [40, 40, 40]
+
+    # Filled bar
+    fill_len = int(abs(action) * bar_half_w)
+    if action > 0:
+        color = [60, 200, 60]  # green
+        x0 = bar_x_center
+        x1 = min(bar_x_center + fill_len, bar_x_right)
+    else:
+        color = [200, 60, 60]  # red
+        x1 = bar_x_center
+        x0 = max(bar_x_center - fill_len, bar_x_left)
+    frame[bar_y:bar_y + bar_h, x0:x1] = color
+
+    # Center tick mark (white)
+    frame[bar_y:bar_y + bar_h, bar_x_center - 1:bar_x_center + 1] = [255, 255, 255]
--- a/src/envs/rotary_cartpole.py
+++ b/src/envs/rotary_cartpole.py
@@ -1,4 +1,5 @@
 import dataclasses
+import math
 import torch
 from src.core.env import BaseEnv, BaseEnvConfig
 from gymnasium import spaces
@@ -21,7 +22,12 @@ class RotaryCartPoleConfig(BaseEnvConfig):
    """
    # Reward shaping
    reward_upright_scale: float = 1.0      # cos(pendulum) when upright = +1
-    speed_penalty_scale: float = 0.1       # penalty for high pendulum velocity near top
+    motor_vel_penalty: float = 0.01        # penalise high motor angular velocity
+    motor_angle_penalty: float = 0.05      # penalise deviation from centre
+    action_penalty: float = 0.05           # penalise large actions (energy cost)
+
+    # ── Software safety limit (env-level, on top of URDF + hardware) ──
+    motor_angle_limit_deg: float = 90.0    # terminate episode if exceeded


 class RotaryCartPoleEnv(BaseEnv[RotaryCartPoleConfig]):
@@ -34,12 +40,14 @@ class RotaryCartPoleEnv(BaseEnv[RotaryCartPoleConfig]):
    Using sin/cos avoids discontinuities at ±π for continuous joints.

    Actions (1):
-        Torque applied to the motor_joint.
+        Torque applied to the motor_joint (normalised to [-1, 1]).
    """

    def __init__(self, config: RotaryCartPoleConfig):
        super().__init__(config)

+    # ── Spaces ───────────────────────────────────────────────────
+
    @property
    def observation_space(self) -> spaces.Space:
        return spaces.Box(low=-torch.inf, high=torch.inf, shape=(6,))
@@ -48,6 +56,8 @@ class RotaryCartPoleEnv(BaseEnv[RotaryCartPoleConfig]):
    def action_space(self) -> spaces.Space:
        return spaces.Box(low=-1.0, high=1.0, shape=(1,))

+    # ── State building ───────────────────────────────────────────
+
    def build_state(self, qpos: torch.Tensor, qvel: torch.Tensor) -> RotaryCartPoleState:
        return RotaryCartPoleState(
            motor_angle=qpos[:, 0],
@@ -56,32 +66,64 @@ class RotaryCartPoleEnv(BaseEnv[RotaryCartPoleConfig]):
            pendulum_vel=qvel[:, 1],
        )

+    # ── Observations ─────────────────────────────────────────────
+
    def compute_observations(self, state: RotaryCartPoleState) -> torch.Tensor:
-        return torch.stack([
+        obs = [
            torch.sin(state.motor_angle),
            torch.cos(state.motor_angle),
            torch.sin(state.pendulum_angle),
            torch.cos(state.pendulum_angle),
            state.motor_vel,
            state.pendulum_vel,
-        ], dim=-1)
+        ]
+        return torch.stack(obs, dim=-1)
+
+    # ── Rewards ──────────────────────────────────────────────────

    def compute_rewards(self, state: RotaryCartPoleState, actions: torch.Tensor) -> torch.Tensor:
        # Upright reward: -cos(θ) ∈ [-1, +1]
-        upright = -torch.cos(state.pendulum_angle)
+        reward = -torch.cos(state.pendulum_angle) * self.config.reward_upright_scale

-        # Penalise high pendulum velocity when near the top (upright).
-        # "nearness" is weighted by how upright the pendulum is (0 at bottom, 1 at top).
-        near_top = torch.clamp(-torch.cos(state.pendulum_angle), min=0.0)  # 0‥1
-        speed_penalty = self.config.speed_penalty_scale * near_top * state.pendulum_vel.abs()
+        # Penalise fast motor spinning (discourages violent oscillation)
+        reward = reward - self.config.motor_vel_penalty * state.motor_vel.pow(2)

-        return upright * self.config.reward_upright_scale - speed_penalty
+        # Penalise motor deviation from centre (keep arm near zero)
+        reward = reward - self.config.motor_angle_penalty * state.motor_angle.pow(2)
+
+        # Penalise large actions (energy efficiency / smoother control)
+        reward = reward - self.config.action_penalty * actions.squeeze(-1).pow(2)
+
+        # Penalty for exceeding motor angle limit (episode also terminates)
+        limit_rad = math.radians(self.config.motor_angle_limit_deg)
+        exceeded = state.motor_angle.abs() >= limit_rad
+        reward = torch.where(exceeded, torch.tensor(-1000.0, device=reward.device), reward)
+
+        return reward
+
+    # ── Terminations ─────────────────────────────────────────────

    def compute_terminations(self, state: RotaryCartPoleState) -> torch.Tensor:
-        # No early termination — episode runs for max_steps (truncation only).
-        # The agent must learn to swing up AND balance continuously.
-        return torch.zeros_like(state.motor_angle, dtype=torch.bool)
+        # Software safety: terminate if motor angle exceeds limit.
+        limit_rad = math.radians(self.config.motor_angle_limit_deg)
+        exceeded = state.motor_angle.abs() >= limit_rad
+        return exceeded
+
+    # ── Reset readiness (for SerialRunner) ───────────────────────
+
+    def is_reset_ready(self, qpos: torch.Tensor, qvel: torch.Tensor) -> bool:
+        """Pendulum must be hanging still and motor near center."""
+        motor_angle = float(qpos[0, 0])
+        pend_angle = float(qpos[0, 1])
+        motor_vel = float(qvel[0, 0])
+        pend_vel = float(qvel[0, 1])
+
+        # Pendulum near hanging-down (angle ~0) and slow
+        angle_ok = abs(pend_angle) < math.radians(2.0)
+        vel_ok = abs(pend_vel) < math.radians(5.0)
+        # Motor near center and slow
+        motor_ok = abs(motor_angle) < math.radians(5.0)
+        motor_vel_ok = abs(motor_vel) < math.radians(10.0)
+        return angle_ok and vel_ok and motor_ok and motor_vel_ok
+

-    def get_default_qpos(self, nq: int) -> list[float] | None:
-        # qpos=0 = pendulum hanging down (joint frame rotated in URDF).
-        return None
--- a/src/runners/mjx.py
+++ b/src/runners/mjx.py
@@ -29,7 +29,10 @@ import numpy as np

 from src.core.env import BaseEnv
 from src.core.runner import BaseRunner, BaseRunnerConfig
-from src.runners.mujoco import MuJoCoRunner  # reuse _load_model
+from src.runners.mujoco import (
+    ActuatorLimits,
+    load_mujoco_model,
+)

 log = structlog.get_logger()

@@ -39,7 +42,7 @@ class MJXRunnerConfig(BaseRunnerConfig):
    num_envs: int = 1024
    device: str = "cuda"
    dt: float = 0.002
-    substeps: int = 20
+    substeps: int = 10


 class MJXRunner(BaseRunner[MJXRunnerConfig]):
@@ -64,7 +67,7 @@ class MJXRunner(BaseRunner[MJXRunnerConfig]):

    def _sim_initialize(self, config: MJXRunnerConfig) -> None:
        # Step 1: Load CPU model (reuses URDF → MJCF → actuator injection)
-        self._mj_model = MuJoCoRunner._load_model(self.env.robot)
+        self._mj_model = load_mujoco_model(self.env.robot)
        self._mj_model.opt.timestep = config.dt
        self._nq = self._mj_model.nq
        self._nv = self._mj_model.nv
@@ -75,9 +78,6 @@ class MJXRunner(BaseRunner[MJXRunnerConfig]):

        # Step 3: Default reset state on GPU
        default_data = mujoco.MjData(self._mj_model)
-        default_qpos = self.env.get_default_qpos(self._nq)
-        if default_qpos is not None:
-            default_data.qpos[:] = default_qpos
        mujoco.mj_forward(self._mj_model, default_data)
        self._default_mjx_data = mjx.put_data(self._mj_model, default_data)

@@ -85,6 +85,18 @@ class MJXRunner(BaseRunner[MJXRunnerConfig]):
        self._rng = jax.random.PRNGKey(42)
        self._batch_data = self._make_batched_data(config.num_envs)

+        # Step 4b: Build motor model info (ctrl_idx, qvel_idx, ActuatorConfig)
+        self._motor_info: list[tuple[int, int]] = []
+        self._motor_acts: list = []
+        for ctrl_idx, act in enumerate(self.env.robot.actuators):
+            if act.has_motor_model:
+                jnt_id = mujoco.mj_name2id(
+                    self._mj_model, mujoco.mjtObj.mjOBJ_JOINT, act.joint,
+                )
+                qvel_idx = self._mj_model.jnt_dofadr[jnt_id]
+                self._motor_info.append((ctrl_idx, qvel_idx))
+                self._motor_acts.append(act)
+
        # Step 5: JIT-compile the hot-path functions
        self._compile_jit_fns(config.substeps)

@@ -121,28 +133,77 @@ class MJXRunner(BaseRunner[MJXRunnerConfig]):
        model = self._mjx_model
        default = self._default_mjx_data

-        # ── Per-actuator limit info (shared helper on MuJoCoRunner) ──
-        lim = MuJoCoRunner._extract_actuator_limits(self._mj_model)
+        lim = ActuatorLimits(self._mj_model)
        act_jnt_ids = jnp.array(lim.jnt_ids)
        act_limited = jnp.array(lim.limited)
        act_lo = jnp.array(lim.lo)
        act_hi = jnp.array(lim.hi)
        act_gs = jnp.array(lim.gear_sign)

+        # ── Motor model params (JAX arrays for JIT) ─────────────────
+        _has_motor = len(self._motor_info) > 0
+        if _has_motor:
+            acts = self._motor_acts
+            _ctrl_ids = jnp.array([c for c, _ in self._motor_info])
+            _qvel_ids = jnp.array([q for _, q in self._motor_info])
+            _dz_pos = jnp.array([a.deadzone[0] for a in acts])
+            _dz_neg = jnp.array([a.deadzone[1] for a in acts])
+            _gear_pos = jnp.array([a.gear[0] for a in acts])
+            _gear_neg = jnp.array([a.gear[1] for a in acts])
+            _gear_avg = jnp.array([a.gear_avg for a in acts])
+            _fl_pos = jnp.array([a.frictionloss[0] for a in acts])
+            _fl_neg = jnp.array([a.frictionloss[1] for a in acts])
+            _damp_pos = jnp.array([a.damping[0] for a in acts])
+            _damp_neg = jnp.array([a.damping[1] for a in acts])
+            _visc_quad = jnp.array([a.viscous_quadratic for a in acts])
+            _back_emf = jnp.array([a.back_emf_gain for a in acts])
+
        # ── Batched step (N substeps per call) ──────────────────────
        @jax.jit
        def step_fn(data):
            # Software limit switch: clamp ctrl once before substeps.
-            # Armature + default joint limits prevent significant overshoot
-            # within the substep window, so per-substep clamping isn't needed.
            pos = data.qpos[:, act_jnt_ids]
            ctrl = data.ctrl
            at_hi = act_limited & (pos >= act_hi) & (act_gs * ctrl > 0)
            at_lo = act_limited & (pos <= act_lo) & (act_gs * ctrl < 0)
-            clamped = jnp.where(at_hi | at_lo, 0.0, ctrl)
-            data = data.replace(ctrl=clamped)
+            ctrl = jnp.where(at_hi | at_lo, 0.0, ctrl)
+
+            if _has_motor:
+                # Deadzone + asymmetric gear compensation
+                mc = ctrl[:, _ctrl_ids]
+                mc = jnp.where((mc >= 0) & (mc < _dz_pos), 0.0, mc)
+                mc = jnp.where((mc < 0) & (mc > -_dz_neg), 0.0, mc)
+                gear_dir = jnp.where(mc >= 0, _gear_pos, _gear_neg)
+                mc = mc * gear_dir / _gear_avg
+                ctrl = ctrl.at[:, _ctrl_ids].set(mc)
+
+            data = data.replace(ctrl=ctrl)

            def body(_, d):
+                if _has_motor:
+                    vel = d.qvel[:, _qvel_ids]
+                    mc = d.ctrl[:, _ctrl_ids]
+
+                    # Coulomb friction (direction-dependent)
+                    fl = jnp.where(vel > 0, _fl_pos, _fl_neg)
+                    torque = -jnp.where(
+                        jnp.abs(vel) > 1e-6, jnp.sign(vel) * fl, 0.0,
+                    )
+                    # Viscous damping (direction-dependent)
+                    damp = jnp.where(vel > 0, _damp_pos, _damp_neg)
+                    torque = torque - damp * vel
+                    # Quadratic velocity drag
+                    torque = torque - _visc_quad * vel * jnp.abs(vel)
+                    # Back-EMF torque reduction
+                    bemf = _back_emf * vel * jnp.sign(mc)
+                    torque = torque - jnp.where(
+                        jnp.abs(mc) > 1e-6, bemf, 0.0,
+                    )
+                    torque = jnp.clip(torque, -10.0, 10.0)
+                    d = d.replace(
+                        qfrc_applied=d.qfrc_applied.at[:, _qvel_ids].set(torque),
+                    )
+
                return jax.vmap(mjx.step, in_axes=(None, 0))(model, d)

            return jax.lax.fori_loop(0, substeps, body, data)
@@ -210,7 +271,7 @@ class MJXRunner(BaseRunner[MJXRunnerConfig]):

    # ── Rendering ────────────────────────────────────────────────────

-    def render(self, env_idx: int = 0) -> np.ndarray:
+    def _render_frame(self, env_idx: int = 0) -> np.ndarray:
        """Offscreen render — copies one env's state from GPU to CPU."""
        self._render_data.qpos[:] = np.asarray(self._batch_data.qpos[env_idx])
        self._render_data.qvel[:] = np.asarray(self._batch_data.qvel[env_idx])
@@ -222,10 +283,4 @@ class MJXRunner(BaseRunner[MJXRunnerConfig]):
                self._mj_model, width=640, height=480,
            )
        self._offscreen_renderer.update_scene(self._render_data)
-        frame = self._offscreen_renderer.render().copy()
-
-        # Import shared overlay helper from mujoco runner
-        from src.runners.mujoco import _draw_action_overlay
-        ctrl_val = float(self._render_data.ctrl[0]) if self._mj_model.nu > 0 else 0.0
-        _draw_action_overlay(frame, ctrl_val)
-        return frame
+        return self._offscreen_renderer.render().copy()
--- a/src/runners/mujoco.py
+++ b/src/runners/mujoco.py
@@ -1,4 +1,6 @@
 import dataclasses
+import os
+import tempfile
 import xml.etree.ElementTree as ET
 from pathlib import Path

@@ -10,26 +12,195 @@ from src.core.env import BaseEnv
 from src.core.robot import RobotConfig
 from src.core.runner import BaseRunner, BaseRunnerConfig

+
@dataclasses.dataclass
 class MuJoCoRunnerConfig(BaseRunnerConfig):
    num_envs: int = 16
    device: str = "cpu"
-    dt: float = 0.02
-    substeps: int = 2
+    dt: float = 0.002
+    substeps: int = 10


-@dataclasses.dataclass
 class ActuatorLimits:
-    """Per-actuator joint-limit info used for software ctrl clamping.
+    """Software limit-switch: cuts motor ctrl when a joint hits its range.

-    Real motor controllers have position limits that prevent the motor
-    from driving into the mechanical hard stop.
+    The real robot has physical limit switches that kill motor current
+    at the travel endpoints.  MuJoCo's built-in joint limits only apply
+    a spring force — they don't zero the actuator signal.  This class
+    replicates the hardware behavior.
    """
-    jnt_ids: np.ndarray     # (nu,) joint index for each actuator
-    limited: np.ndarray     # (nu,) bool — whether that joint is limited
-    lo: np.ndarray          # (nu,) lower position bound
-    hi: np.ndarray          # (nu,) upper position bound
-    gear_sign: np.ndarray   # (nu,) sign of gear ratio
+
+    def __init__(self, model: mujoco.MjModel) -> None:
+        jnt_ids = model.actuator_trnid[:model.nu, 0]
+        self.jnt_ids = jnt_ids
+        self.limited = model.jnt_limited[jnt_ids].astype(bool)
+        self.lo = model.jnt_range[jnt_ids, 0]
+        self.hi = model.jnt_range[jnt_ids, 1]
+        self.gear_sign = np.sign(model.actuator_gear[:model.nu, 0])
+
+    def enforce(self, model: mujoco.MjModel, data: mujoco.MjData) -> None:
+        """Zero ctrl that would push past joint limits (call every substep)."""
+        if not self.limited.any():
+            return
+        pos = data.qpos[self.jnt_ids]
+        signed_ctrl = self.gear_sign * data.ctrl[:model.nu]
+        at_hi = self.limited & (pos >= self.hi) & (signed_ctrl > 0)
+        at_lo = self.limited & (pos <= self.lo) & (signed_ctrl < 0)
+        data.ctrl[at_hi | at_lo] = 0.0
+
+
+# ── Public utilities ─────────────────────────────────────────────────
+
+
+def load_mujoco_model(robot: RobotConfig) -> mujoco.MjModel:
+    """Load a URDF (or MJCF) and apply robot.yaml settings.
+
+    Single model-loading entry point for all MuJoCo-based code:
+    training runners, MJX, and system identification.
+
+    Two-step approach required because MuJoCo's URDF parser ignores
+    ``<actuator>`` in the ``<mujoco>`` extension block:
+
+      1. Load the URDF -> MuJoCo converts it to internal MJCF
+      2. Export the MJCF XML, inject actuators + joint overrides, reload
+
+    This keeps the URDF clean (re-exportable from CAD) -- all hardware
+    tuning lives in ``robot.yaml``.
+    """
+    abs_path = robot.urdf_path.resolve()
+    model_dir = abs_path.parent
+    is_urdf = abs_path.suffix.lower() == ".urdf"
+
+    # -- Step 1: Load URDF with meshdir injection --
+    if is_urdf:
+        tree = ET.parse(abs_path)
+        root = tree.getroot()
+
+        # MuJoCo's URDF parser strips directory prefixes from mesh
+        # filenames, so we inject a <mujoco><compiler meshdir="..."/>
+        # block.  The original URDF stays clean and simulator-agnostic.
+        meshdir = None
+        for mesh_el in root.iter("mesh"):
+            fn = mesh_el.get("filename", "")
+            parent = str(Path(fn).parent)
+            if parent and parent != ".":
+                meshdir = parent
+                break
+        if meshdir:
+            mj_ext = ET.SubElement(root, "mujoco")
+            ET.SubElement(mj_ext, "compiler", attrib={
+                "meshdir": meshdir,
+                "balanceinertia": "true",
+            })
+
+        # Write to a temp file (unique name for multiprocessing safety).
+        fd, tmp_path = tempfile.mkstemp(
+            suffix=".urdf", prefix="_mj_", dir=str(model_dir),
+        )
+        os.close(fd)
+        try:
+            tree.write(tmp_path, xml_declaration=True, encoding="unicode")
+            model_raw = mujoco.MjModel.from_xml_path(tmp_path)
+        finally:
+            Path(tmp_path).unlink(missing_ok=True)
+    else:
+        model_raw = mujoco.MjModel.from_xml_path(str(abs_path))
+
+    # If robot.yaml has no actuators/joints, we're done.
+    if not robot.actuators and not robot.joints:
+        return model_raw
+
+    # -- Step 2: Export MJCF, inject actuators + joint overrides --
+    fd, tmp_path = tempfile.mkstemp(
+        suffix=".xml", prefix="_mj_", dir=str(model_dir),
+    )
+    os.close(fd)
+    try:
+        mujoco.mj_saveLastXML(tmp_path, model_raw)
+        mjcf_str = Path(tmp_path).read_text()
+        root = ET.fromstring(mjcf_str)
+
+        # -- Inject actuators --
+        if robot.actuators:
+            act_elem = ET.SubElement(root, "actuator")
+            for act in robot.actuators:
+                attribs = {
+                    "name": f"{act.joint}_{act.type}",
+                    "joint": act.joint,
+                    "gear": str(act.gear_avg),
+                    "ctrlrange": f"{act.ctrl_range[0]} {act.ctrl_range[1]}",
+                }
+
+                # dyntype is only available on <general>, not on
+                # shortcut elements like <motor>/<position>/<velocity>.
+                use_general = act.filter_tau > 0
+
+                if use_general:
+                    attribs["dyntype"] = "filter"
+                    attribs["dynprm"] = str(act.filter_tau)
+                    attribs["gaintype"] = "fixed"
+                    if act.type == "position":
+                        attribs["biastype"] = "affine"
+                        attribs["gainprm"] = str(act.kp)
+                        attribs["biasprm"] = f"0 -{act.kp} -{act.kv}"
+                    elif act.type == "velocity":
+                        attribs["biastype"] = "affine"
+                        attribs["gainprm"] = str(act.kp)
+                        attribs["biasprm"] = f"0 0 -{act.kp}"
+                    else:  # motor
+                        attribs["biastype"] = "none"
+                    ET.SubElement(act_elem, "general", attrib=attribs)
+                else:
+                    if act.type == "position":
+                        attribs["kp"] = str(act.kp)
+                        if act.kv > 0:
+                            attribs["kv"] = str(act.kv)
+                    elif act.type == "velocity":
+                        attribs["kp"] = str(act.kp)
+                    ET.SubElement(act_elem, act.type, attrib=attribs)
+
+        # -- Apply joint overrides from robot.yaml --
+        # For actuated joints with a motor model, MuJoCo damping/frictionloss
+        # are set to 0 — the motor model handles them via qfrc_applied.
+        joint_damping: dict[str, float] = {}
+        joint_frictionloss: dict[str, float] = {}
+        for act in robot.actuators:
+            if act.has_motor_model:
+                joint_damping[act.joint] = 0.0
+                joint_frictionloss[act.joint] = 0.0
+        joint_armature: dict[str, float] = {}
+        for name, jcfg in robot.joints.items():
+            if jcfg.damping is not None:
+                joint_damping[name] = jcfg.damping
+            if jcfg.armature is not None:
+                joint_armature[name] = jcfg.armature
+            if jcfg.frictionloss is not None:
+                joint_frictionloss[name] = jcfg.frictionloss
+
+        for body in root.iter("body"):
+            for jnt in body.findall("joint"):
+                name = jnt.get("name")
+                if name in joint_damping:
+                    jnt.set("damping", str(joint_damping[name]))
+                if name in joint_armature:
+                    jnt.set("armature", str(joint_armature[name]))
+                if name in joint_frictionloss:
+                    jnt.set("frictionloss", str(joint_frictionloss[name]))
+
+        # Disable self-collision on all geoms.
+        for geom in root.iter("geom"):
+            geom.set("contype", "0")
+            geom.set("conaffinity", "0")
+
+        modified_xml = ET.tostring(root, encoding="unicode")
+        Path(tmp_path).write_text(modified_xml)
+        return mujoco.MjModel.from_xml_path(tmp_path)
+    finally:
+        Path(tmp_path).unlink(missing_ok=True)
+
+
+# -- Runner -----------------------------------------------------------
+

 class MuJoCoRunner(BaseRunner[MuJoCoRunnerConfig]):
    def __init__(self, env: BaseEnv, config: MuJoCoRunnerConfig):
@@ -38,201 +209,52 @@ class MuJoCoRunner(BaseRunner[MuJoCoRunnerConfig]):
    @property
    def num_envs(self) -> int:
        return self.config.num_envs
-    
+
    @property
    def device(self) -> torch.device:
        return torch.device(self.config.device)
-    
-    @staticmethod
-    def _load_model(robot: RobotConfig) -> mujoco.MjModel:
-        """Load a URDF (or MJCF) and apply robot.yaml settings.
-
-        Two-step approach required because MuJoCo's URDF parser ignores
-        <actuator> in the <mujoco> extension block:
-          1. Load the URDF → MuJoCo converts it to internal MJCF
-          2. Export the MJCF XML, inject actuators + joint overrides, reload
-
-        This keeps the URDF clean (re-exportable from CAD) — all hardware
-        tuning lives in robot.yaml.
-        """
-        abs_path = robot.urdf_path.resolve()
-        model_dir = abs_path.parent
-        is_urdf = abs_path.suffix.lower() == ".urdf"
-
-        # MuJoCo's URDF parser strips directory prefixes from mesh filenames,
-        # so we inject a <mujoco><compiler meshdir="..."/> block into a
-        # temporary copy.  The original URDF stays clean and simulator-agnostic.
-        if is_urdf:
-            tree = ET.parse(abs_path)
-            root = tree.getroot()
-            # Detect the mesh subdirectory from the first mesh filename
-            meshdir = None
-            for mesh_el in root.iter("mesh"):
-                fn = mesh_el.get("filename", "")
-                parent = str(Path(fn).parent)
-                if parent and parent != ".":
-                    meshdir = parent
-                    break
-            if meshdir:
-                mj_ext = ET.SubElement(root, "mujoco")
-                ET.SubElement(mj_ext, "compiler", attrib={
-                    "meshdir": meshdir,
-                    "balanceinertia": "true",
-                })
-            tmp_urdf = model_dir / "_tmp_mujoco_load.urdf"
-            tree.write(str(tmp_urdf), xml_declaration=True, encoding="unicode")
-            try:
-                model_raw = mujoco.MjModel.from_xml_path(str(tmp_urdf))
-            finally:
-                tmp_urdf.unlink()
-        else:
-            model_raw = mujoco.MjModel.from_xml_path(str(abs_path))
-
-        if not robot.actuators and not robot.joints:
-            return model_raw
-
-        # Step 2: Export internal MJCF, inject actuators + joint overrides, reload
-        tmp_mjcf = model_dir / "_tmp_actuator_inject.xml"
-        try:
-            mujoco.mj_saveLastXML(str(tmp_mjcf), model_raw)
-            mjcf_str = tmp_mjcf.read_text()
-
-            root = ET.fromstring(mjcf_str)
-
-            # ── Inject actuators ────────────────────────────────────
-            if robot.actuators:
-                act_elem = ET.SubElement(root, "actuator")
-                for act in robot.actuators:
-                    attribs = {
-                        "name": f"{act.joint}_{act.type}",
-                        "joint": act.joint,
-                        "gear": str(act.gear),
-                        "ctrlrange": f"{act.ctrl_range[0]} {act.ctrl_range[1]}",
-                    }
-
-                    # Actuator dynamics: 1st-order low-pass filter on ctrl.
-                    # MuJoCo applies this every physics substep, which is
-                    # more physically accurate than an external EMA.
-                    # dyntype is only available on <general>, not on
-                    # shortcut elements like <motor>/<position>/<velocity>.
-                    use_general = act.filter_tau > 0
-
-                    if use_general:
-                        attribs["dyntype"] = "filter"
-                        attribs["dynprm"] = str(act.filter_tau)
-
-                    if use_general:
-                        attribs["gaintype"] = "fixed"
-                        if act.type == "position":
-                            attribs["biastype"] = "affine"
-                            attribs["gainprm"] = str(act.kp)
-                            attribs["biasprm"] = f"0 -{act.kp} -{act.kv}"
-                        elif act.type == "velocity":
-                            attribs["biastype"] = "affine"
-                            attribs["gainprm"] = str(act.kp)
-                            attribs["biasprm"] = f"0 0 -{act.kp}"
-                        else:  # motor
-                            attribs["biastype"] = "none"
-                        ET.SubElement(act_elem, "general", attrib=attribs)
-                    else:
-                        if act.type == "position":
-                            attribs["kp"] = str(act.kp)
-                            if act.kv > 0:
-                                attribs["kv"] = str(act.kv)
-                        elif act.type == "velocity":
-                            attribs["kp"] = str(act.kp)
-                        ET.SubElement(act_elem, act.type, attrib=attribs)
-
-            # ── Apply joint overrides from robot.yaml ───────────────
-            # Merge actuator damping + explicit joint overrides
-            joint_damping = {a.joint: a.damping for a in robot.actuators}
-            joint_armature: dict[str, float] = {}
-            joint_frictionloss: dict[str, float] = {}
-            for name, jcfg in robot.joints.items():
-                if jcfg.damping is not None:
-                    joint_damping[name] = jcfg.damping
-                if jcfg.armature is not None:
-                    joint_armature[name] = jcfg.armature
-                if jcfg.frictionloss is not None:
-                    joint_frictionloss[name] = jcfg.frictionloss
-
-            for body in root.iter("body"):
-                for jnt in body.findall("joint"):
-                    name = jnt.get("name")
-                    if name in joint_damping:
-                        jnt.set("damping", str(joint_damping[name]))
-                    if name in joint_armature:
-                        jnt.set("armature", str(joint_armature[name]))
-                    if name in joint_frictionloss:
-                        jnt.set("frictionloss", str(joint_frictionloss[name]))
-
-            # Disable self-collision on all geoms.
-            # URDF mesh convex hulls often overlap at joints (especially
-            # grandparent↔grandchild bodies that MuJoCo does NOT auto-exclude),
-            # causing phantom contact forces.
-            for geom in root.iter("geom"):
-                geom.set("contype", "0")
-                geom.set("conaffinity", "0")
-
-            # Joint limits use MuJoCo's default solver settings.
-            # The software limit switch (zeroing ctrl at limits) + armature
-            # prevent overshoot without needing ultra-stiff solref that
-            # kills MJX GPU solver performance.
-
-            # Step 4: Write modified MJCF and reload from file path
-            # (from_xml_path resolves mesh paths relative to the file location)
-            modified_xml = ET.tostring(root, encoding="unicode")
-            tmp_mjcf.write_text(modified_xml)
-            return mujoco.MjModel.from_xml_path(str(tmp_mjcf))
-        finally:
-            tmp_mjcf.unlink(missing_ok=True)
-
-    @staticmethod
-    def _extract_actuator_limits(model: mujoco.MjModel) -> ActuatorLimits:
-        """Extract per-actuator joint-limit arrays from a loaded MjModel.
-
-        Shared by MuJoCoRunner and MJXRunner so the logic isn't duplicated.
-        """
-        nu = model.nu
-        jnt_ids = np.array([model.actuator_trnid[i, 0] for i in range(nu)])
-        return ActuatorLimits(
-            jnt_ids=jnt_ids,
-            limited=np.array([model.jnt_limited[j] for j in jnt_ids], dtype=bool),
-            lo=np.array([model.jnt_range[j, 0] for j in jnt_ids]),
-            hi=np.array([model.jnt_range[j, 1] for j in jnt_ids]),
-            gear_sign=np.sign(np.array([model.actuator_gear[i, 0] for i in range(nu)])),
-        )

    def _sim_initialize(self, config: MuJoCoRunnerConfig) -> None:
-        self._model = self._load_model(self.env.robot)
+        self._model = load_mujoco_model(self.env.robot)
        self._model.opt.timestep = config.dt
-        self._data: list[mujoco.MjData] = [mujoco.MjData(self._model) for _ in range(config.num_envs)]
-
+        self._data: list[mujoco.MjData] = [
+            mujoco.MjData(self._model) for _ in range(config.num_envs)
+        ]
        self._nq = self._model.nq
        self._nv = self._model.nv
+        self._limits = ActuatorLimits(self._model)

-        self._limits = self._extract_actuator_limits(self._model)
+        # Build motor model: list of (actuator_config, joint_qvel_index) for
+        # actuators that have asymmetric motor dynamics.
+        self._motor_actuators: list[tuple] = []
+        for act in self.env.robot.actuators:
+            if act.has_motor_model:
+                jnt_id = mujoco.mj_name2id(self._model, mujoco.mjtObj.mjOBJ_JOINT, act.joint)
+                qvel_idx = self._model.jnt_dofadr[jnt_id]
+                self._motor_actuators.append((act, qvel_idx))

    def _sim_step(self, actions: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor]:
        actions_np: np.ndarray = actions.cpu().numpy()

+        # Apply per-actuator ctrl transform (deadzone + gear compensation)
+        for act_idx, (act, _) in enumerate(self._motor_actuators):
+            for env_idx in range(self.num_envs):
+                actions_np[env_idx, act_idx] = act.transform_ctrl(
+                    float(actions_np[env_idx, act_idx])
+                )
+
        qpos_batch = np.zeros((self.num_envs, self._nq), dtype=np.float32)
        qvel_batch = np.zeros((self.num_envs, self._nv), dtype=np.float32)

        for i, data in enumerate(self._data):
            data.ctrl[:] = actions_np[i]
            for _ in range(self.config.substeps):
-                # Software limit switch: zero out ctrl that would push
-                # a joint past its position limit (like a real controller).
-                lim = self._limits
-                for a in range(self._model.nu):
-                    if lim.limited[a]:
-                        pos = data.qpos[lim.jnt_ids[a]]
-                        gs = lim.gear_sign[a]
-                        if pos >= lim.hi[a] and gs * data.ctrl[a] > 0:
-                            data.ctrl[a] = 0.0
-                        elif pos <= lim.lo[a] and gs * data.ctrl[a] < 0:
-                            data.ctrl[a] = 0.0
+                # Apply asymmetric motor forces via qfrc_applied
+                for act, qvel_idx in self._motor_actuators:
+                    vel = data.qvel[qvel_idx]
+                    ctrl = data.ctrl[0]  # TODO: generalise for multi-actuator
+                    data.qfrc_applied[qvel_idx] = act.compute_motor_force(vel, ctrl)
+                self._limits.enforce(self._model, data)
                mujoco.mj_step(self._model, data)

            qpos_batch[i] = data.qpos
@@ -242,7 +264,7 @@ class MuJoCoRunner(BaseRunner[MuJoCoRunnerConfig]):
            torch.from_numpy(qpos_batch).to(self.device),
            torch.from_numpy(qvel_batch).to(self.device),
        )
-    
+
    def _sim_reset(self, env_ids: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor]:
        ids = env_ids.cpu().numpy()
        n = len(ids)
@@ -250,21 +272,13 @@ class MuJoCoRunner(BaseRunner[MuJoCoRunnerConfig]):
        qpos_batch = np.zeros((n, self._nq), dtype=np.float32)
        qvel_batch = np.zeros((n, self._nv), dtype=np.float32)

-        default_qpos = self.env.get_default_qpos(self._nq)
-
        for i, env_id in enumerate(ids):
            data = self._data[env_id]
            mujoco.mj_resetData(self._model, data)

-            # Set initial pose (env-specific, e.g. pendulum hanging down)
-            if default_qpos is not None:
-                data.qpos[:] = default_qpos
-
-            # Add small random perturbation for exploration
+            # Small random perturbation for exploration
            data.qpos[:] += np.random.uniform(-0.05, 0.05, size=self._nq)
            data.qvel[:] += np.random.uniform(-0.05, 0.05, size=self._nv)
-
-            # Reset smoothed ctrl so motor starts from rest
            data.ctrl[:] = 0.0

            qpos_batch[i] = data.qpos
@@ -275,7 +289,7 @@ class MuJoCoRunner(BaseRunner[MuJoCoRunnerConfig]):
            torch.from_numpy(qvel_batch).to(self.device),
        )

-    def render(self, env_idx: int = 0) -> np.ndarray:
+    def _render_frame(self, env_idx: int = 0) -> np.ndarray:
        """Offscreen render of a single environment."""
        if not hasattr(self, "_offscreen_renderer"):
            self._offscreen_renderer = mujoco.Renderer(
@@ -283,43 +297,4 @@ class MuJoCoRunner(BaseRunner[MuJoCoRunnerConfig]):
            )
        mujoco.mj_forward(self._model, self._data[env_idx])
        self._offscreen_renderer.update_scene(self._data[env_idx])
-        frame = self._offscreen_renderer.render().copy()
-
-        # Draw action bar overlay — shows ctrl[0] as a horizontal bar
-        ctrl_val = float(self._data[env_idx].ctrl[0]) if self._model.nu > 0 else 0.0
-        _draw_action_overlay(frame, ctrl_val)
-        return frame
-
-
-def _draw_action_overlay(frame: np.ndarray, action: float) -> None:
-    """Draw an action bar + text on a rendered frame (no OpenCV needed).
-
-    Bar is centered horizontally: green to the right (+), red to the left (-).
-    """
-    h, w = frame.shape[:2]
-
-    # Bar geometry
-    bar_y = h - 30
-    bar_h = 16
-    bar_x_center = w // 2
-    bar_half_w = w // 4  # max half-width of the bar
-    bar_x_left = bar_x_center - bar_half_w
-    bar_x_right = bar_x_center + bar_half_w
-
-    # Background (dark grey)
-    frame[bar_y:bar_y + bar_h, bar_x_left:bar_x_right] = [40, 40, 40]
-
-    # Filled bar
-    fill_len = int(abs(action) * bar_half_w)
-    if action > 0:
-        color = [60, 200, 60]  # green
-        x0 = bar_x_center
-        x1 = min(bar_x_center + fill_len, bar_x_right)
-    else:
-        color = [200, 60, 60]  # red
-        x1 = bar_x_center
-        x0 = max(bar_x_center - fill_len, bar_x_left)
-    frame[bar_y:bar_y + bar_h, x0:x1] = color
-
-    # Center tick mark (white)
-    frame[bar_y:bar_y + bar_h, bar_x_center - 1:bar_x_center + 1] = [255, 255, 255]
+        return self._offscreen_renderer.render().copy()
--- a/src/runners/serial.py
+++ b/src/runners/serial.py
@@ -11,11 +11,15 @@ Serial protocol (ESP32 firmware):
        H           — stop streaming
        M<int>      — set motor PWM speed (-255 … 255)

-    State lines received FROM the ESP32:
-        S,<ms>,<enc>,<rpm>,<motor_speed>,<at_limit>,
-          <pend_deg>,<pend_vel>,<target_speed>,<braking>,
-          <enc_vel_cps>,<pendulum_ok>
-        (12 comma-separated fields after the ``S`` prefix)
+    State lines received FROM the ESP32 (firmware sends SI units):
+        S,<ms>,<motor_rad>,<motor_vel>,<pend_rad>,<pend_vel>,<motor_speed>
+        (7 comma-separated fields after the ``S`` prefix)
+
+        motor_rad    — motor joint angle (radians)
+        motor_vel    — motor joint velocity (rad/s)
+        pend_rad     — pendulum angle (radians, 0 = hanging down)
+        pend_vel     — pendulum angular velocity (rad/s)
+        motor_speed  — applied PWM (-255..255, for action recording)

 A daemon thread continuously reads the serial stream so the control
 loop never blocks on I/O.
@@ -28,7 +32,6 @@ from __future__ import annotations

 import dataclasses
 import logging
-import math
 import threading
 import time
 from typing import Any
@@ -36,8 +39,6 @@ from typing import Any
 import numpy as np
 import torch

-from src.core.env import BaseEnv
-from src.core.hardware import HardwareConfig, load_hardware_config
 from src.core.runner import BaseRunner, BaseRunnerConfig

 logger = logging.getLogger(__name__)
@@ -52,9 +53,14 @@ class SerialRunnerConfig(BaseRunnerConfig):

    port: str = "/dev/cu.usbserial-0001"
    baud: int = 115200
-    dt: float = 0.02  # control loop period (seconds), 50 Hz
+    dt: float = 0.04  # control loop period (seconds), 25 Hz
    no_data_timeout: float = 2.0  # seconds of silence → disconnect
-    encoder_jump_threshold: int = 200  # encoder tick jump → reboot
+
+    # Physical reset procedure
+    reset_drive_speed: int = 80       # PWM for bang-bang drive-to-center
+    reset_deadband_rad: float = 0.01  # "centered" threshold (~0.6°)
+    reset_drive_timeout: float = 3.0  # seconds to reach center
+    reset_settle_timeout: float = 30.0  # seconds to wait for pendulum


 class SerialRunner(BaseRunner[SerialRunnerConfig]):
@@ -80,22 +86,6 @@ class SerialRunner(BaseRunner[SerialRunnerConfig]):
        return torch.device("cpu")

    def _sim_initialize(self, config: SerialRunnerConfig) -> None:
-        # Load hardware description (encoder, safety, reset params).
-        hw = load_hardware_config(self.env.config.robot_path)
-        if hw is None:
-            raise FileNotFoundError(
-                f"hardware.yaml not found in {self.env.config.robot_path}. "
-                "The serial runner requires a hardware config for encoder, "
-                "safety, and reset parameters."
-            )
-        self._hw: HardwareConfig = hw
-        self._counts_per_rev: float = hw.encoder.counts_per_rev
-        self._max_motor_angle_rad: float = (
-            math.radians(hw.safety.max_motor_angle_deg)
-            if hw.safety.max_motor_angle_deg > 0
-            else 0.0
-        )
-
        # Joint dimensions for the rotary cartpole (motor + pendulum).
        self._nq = 2
        self._nv = 2
@@ -105,8 +95,16 @@ class SerialRunner(BaseRunner[SerialRunnerConfig]):

        self._serial_mod = _serial

+        # Explicitly disable hardware flow control and exclusive mode to
+        # avoid termios.error (errno 22) on macOS with CH340/CP2102 adapters.
        self.ser: _serial.Serial = _serial.Serial(
-            config.port, config.baud, timeout=0.05
+            port=config.port,
+            baudrate=config.baud,
+            timeout=0.05,
+            xonxoff=False,
+            rtscts=False,
+            dsrdtr=False,
+            exclusive=False,
        )
        time.sleep(2)  # Wait for ESP32 boot.
        self.ser.reset_input_buffer()
@@ -116,22 +114,17 @@ class SerialRunner(BaseRunner[SerialRunnerConfig]):
        self._serial_disconnected: bool = False
        self._last_esp_ms: int = 0
        self._last_data_time: float = time.monotonic()
-        self._last_encoder_count: int = 0
        self._streaming: bool = False

        # Latest parsed state (updated by the reader thread).
+        # Firmware sends SI units — values are used directly as qpos/qvel.
        self._latest_state: dict[str, Any] = {
            "timestamp_ms": 0,
-            "encoder_count": 0,
-            "rpm": 0.0,
+            "motor_rad": 0.0,
+            "motor_vel": 0.0,
+            "pend_rad": 0.0,
+            "pend_vel": 0.0,
            "motor_speed": 0,
-            "at_limit": False,
-            "pendulum_angle": 0.0,
-            "pendulum_velocity": 0.0,
-            "target_speed": 0,
-            "braking": False,
-            "enc_vel_cps": 0.0,
-            "pendulum_ok": False,
        }
        self._state_lock = threading.Lock()
        self._state_event = threading.Event()
@@ -148,6 +141,11 @@ class SerialRunner(BaseRunner[SerialRunnerConfig]):
        self._streaming = True
        self._last_data_time = time.monotonic()

+        # Derive max PWM from actuator ctrl_range so the serial
+        # command range matches what MuJoCo training sees.
+        ctrl_hi = self.env.robot.actuators[0].ctrl_range[1]
+        self._max_pwm: int = round(ctrl_hi * 255)
+
        # Track wall-clock time of last step for PPO-gap detection.
        self._last_step_time: float = time.monotonic()

@@ -170,41 +168,22 @@ class SerialRunner(BaseRunner[SerialRunnerConfig]):
            self._sim_reset(all_ids)
            self.step_counts.zero_()

-        step_start = time.monotonic()
-
-        # Map normalised action [-1, 1] → PWM [-255, 255].
+        # Map normalised action [-1, 1] → PWM, scaled by ctrl_range.
        action_val = float(actions[0, 0].clamp(-1.0, 1.0))
-        motor_speed = int(action_val * 255)
+        motor_speed = int(action_val * self._max_pwm)
        self._send(f"M{motor_speed}")

-        # Enforce dt wall-clock timing.
-        elapsed = time.monotonic() - step_start
-        remaining = self.config.dt - elapsed
-        if remaining > 0:
-            time.sleep(remaining)
+        # Stream-driven: block until the firmware sends the next state
+        # line (~20 ms at 50 Hz).
+        state = self._read_state_blocking(timeout=0.1)

-        # Read latest sensor data (non-blocking — dt sleep ensures freshness).
-        state = self._read_state()
+        # Firmware sends SI units — use directly.
+        qpos, qvel = self._state_to_tensors(state)

-        motor_angle = (
-            state["encoder_count"] / self._counts_per_rev * 2.0 * math.pi
-        )
-        motor_vel = (
-            state["enc_vel_cps"] / self._counts_per_rev * 2.0 * math.pi
-        )
-        pendulum_angle = math.radians(state["pendulum_angle"])
-        pendulum_vel = math.radians(state["pendulum_velocity"])
-
-        # Cache motor angle for safety check in step() — avoids a second read.
-        self._last_motor_angle_rad = motor_angle
+        # Cache for _sync_viz().
+        self._last_sync_state = state
        self._last_step_time = time.monotonic()

-        qpos = torch.tensor(
-            [[motor_angle, pendulum_angle]], dtype=torch.float32
-        )
-        qvel = torch.tensor(
-            [[motor_vel, pendulum_vel]], dtype=torch.float32
-        )
        return qpos, qvel

    def _sim_reset(
@@ -232,59 +211,42 @@ class SerialRunner(BaseRunner[SerialRunnerConfig]):
        # Physically return the motor to the centre position.
        self._drive_to_center()

-        # Wait until the pendulum settles.
-        self._wait_for_pendulum_still()
+        # Wait until the env considers the robot settled.
+        self._wait_for_settle()

        # Refresh data timer so health checks don't false-positive.
        self._last_data_time = time.monotonic()

        # Read settled state and return as qpos/qvel.
        state = self._read_state_blocking()
-        motor_angle = (
-            state["encoder_count"] / self._counts_per_rev * 2.0 * math.pi
-        )
-        motor_vel = (
-            state["enc_vel_cps"] / self._counts_per_rev * 2.0 * math.pi
-        )
-        pendulum_angle = math.radians(state["pendulum_angle"])
-        pendulum_vel = math.radians(state["pendulum_velocity"])
+        qpos, qvel = self._state_to_tensors(state)

-        qpos = torch.tensor(
-            [[motor_angle, pendulum_angle]], dtype=torch.float32
-        )
-        qvel = torch.tensor(
-            [[motor_vel, pendulum_vel]], dtype=torch.float32
-        )
+        self._last_sync_state = state
        return qpos, qvel

    def _sim_close(self) -> None:
        self._reader_running = False
        self._streaming = False
-        self._send("H")  # Stop streaming.
-        self._send("M0")  # Stop motor.
+        self._send("H")
+        self._send("M0")
        time.sleep(0.1)
        self._reader_thread.join(timeout=1.0)
        self.ser.close()
-        if hasattr(self, "_offscreen_renderer") and self._offscreen_renderer is not None:
-            self._offscreen_renderer.close()
+        super()._sim_close()

    # ------------------------------------------------------------------
    # MuJoCo digital-twin rendering
    # ------------------------------------------------------------------

    def _ensure_viz_model(self) -> None:
-        """Lazily load the MuJoCo model for visualisation (digital twin).
-
-        Reuses the same URDF + robot.yaml that the MuJoCoRunner would use,
-        but only for rendering — no physics stepping.
-        """
+        """Lazily load the MuJoCo model for visualisation (digital twin)."""
        if hasattr(self, "_viz_model"):
            return

        import mujoco
-        from src.runners.mujoco import MuJoCoRunner
+        from src.runners.mujoco import load_mujoco_model

-        self._viz_model = MuJoCoRunner._load_model(self.env.robot)
+        self._viz_model = load_mujoco_model(self.env.robot)
        self._viz_data = mujoco.MjData(self._viz_model)
        self._offscreen_renderer = None

@@ -293,32 +255,21 @@ class SerialRunner(BaseRunner[SerialRunnerConfig]):
        import mujoco

        self._ensure_viz_model()
-        state = self._read_state()

-        # Set joint positions from serial data.
-        motor_angle = (
-            state["encoder_count"] / self._counts_per_rev * 2.0 * math.pi
-        )
-        pendulum_angle = math.radians(state["pendulum_angle"])
-        self._viz_data.qpos[0] = motor_angle
-        self._viz_data.qpos[1] = pendulum_angle
+        last_state = getattr(self, "_last_sync_state", None)
+        if last_state is None:
+            last_state = self._read_state()

-        # Set joint velocities (for any velocity-dependent visuals).
-        motor_vel = (
-            state["enc_vel_cps"] / self._counts_per_rev * 2.0 * math.pi
-        )
-        pendulum_vel = math.radians(state["pendulum_velocity"])
-        self._viz_data.qvel[0] = motor_vel
-        self._viz_data.qvel[1] = pendulum_vel
+        # Firmware sends radians — use directly.
+        self._viz_data.qpos[0] = last_state["motor_rad"]
+        self._viz_data.qpos[1] = last_state["pend_rad"]
+        self._viz_data.qvel[0] = last_state["motor_vel"]
+        self._viz_data.qvel[1] = last_state["pend_vel"]

-        # Forward kinematics (updates body positions for rendering).
        mujoco.mj_forward(self._viz_model, self._viz_data)

-    def render(self, env_idx: int = 0) -> np.ndarray:
-        """Offscreen render of the digital-twin MuJoCo model.
-
-        Called by VideoRecordingTrainer during training to capture frames.
-        """
+    def _render_frame(self, env_idx: int = 0) -> np.ndarray:
+        """Offscreen render of the digital-twin MuJoCo model."""
        import mujoco

        self._sync_viz()
@@ -340,7 +291,6 @@ class SerialRunner(BaseRunner[SerialRunnerConfig]):
        # Check for ESP32 reboot / disconnect BEFORE stepping.
        if self._rebooted or self._serial_disconnected:
            self._send("M0")
-            # Return a terminal observation with penalty.
            qpos, qvel = self._make_current_state()
            state = self.env.build_state(qpos, qvel)
            obs = self.env.compute_observations(state)
@@ -359,16 +309,6 @@ class SerialRunner(BaseRunner[SerialRunnerConfig]):
            rewards = torch.tensor([[-100.0]])
            info["reboot_detected"] = True

-        # Check motor angle against hard safety limit.
-        # Uses the cached value from _sim_step — no extra serial read.
-        if self._max_motor_angle_rad > 0:
-            motor_angle = abs(getattr(self, "_last_motor_angle_rad", 0.0))
-            if motor_angle >= self._max_motor_angle_rad:
-                self._send("M0")
-                terminated = torch.tensor([[True]])
-                rewards = torch.tensor([[-100.0]])
-                info["motor_limit_exceeded"] = True
-
        # Always stop motor on episode end.
        if terminated.any() or truncated.any():
            self._send("M0")
@@ -387,7 +327,11 @@ class SerialRunner(BaseRunner[SerialRunnerConfig]):
            self._serial_disconnected = True

    def _serial_reader(self) -> None:
-        """Background thread: continuously read and parse serial lines."""
+        """Background thread: continuously read and parse serial lines.
+
+        Protocol: ``S,<ms>,<motor_rad>,<motor_vel>,<pend_rad>,<pend_vel>,<motor_speed>``
+        (7 comma-separated fields).  Firmware sends SI units directly.
+        """
        while self._reader_running:
            try:
                if self.ser.in_waiting:
@@ -405,9 +349,15 @@ class SerialRunner(BaseRunner[SerialRunnerConfig]):

                    if line.startswith("S,"):
                        parts = line.split(",")
-                        if len(parts) >= 12:
-                            esp_ms = int(parts[1])
-                            enc = int(parts[2])
+                        if len(parts) >= 7:
+                            try:
+                                esp_ms = int(parts[1])
+                            except ValueError:
+                                logger.debug(
+                                    "Malformed state line (header): %s",
+                                    line,
+                                )
+                                continue

                            # Detect reboot: timestamp jumped backwards.
                            if (
@@ -422,38 +372,24 @@ class SerialRunner(BaseRunner[SerialRunnerConfig]):
                                    esp_ms,
                                )

-                            # Detect reboot: encoder snapped to 0 from
-                            # a far position.
-                            if (
-                                abs(self._last_encoder_count)
-                                > self.config.encoder_jump_threshold
-                                and abs(enc) < 5
-                            ):
-                                self._rebooted = True
-                                logger.critical(
-                                    "ESP32 reboot detected: encoder"
-                                    " jumped %d -> %d",
-                                    self._last_encoder_count,
-                                    enc,
-                                )
-
                            self._last_esp_ms = esp_ms
-                            self._last_encoder_count = enc
                            self._last_data_time = time.monotonic()

-                            parsed: dict[str, Any] = {
-                                "timestamp_ms": esp_ms,
-                                "encoder_count": enc,
-                                "rpm": float(parts[3]),
-                                "motor_speed": int(parts[4]),
-                                "at_limit": bool(int(parts[5])),
-                                "pendulum_angle": float(parts[6]),
-                                "pendulum_velocity": float(parts[7]),
-                                "target_speed": int(parts[8]),
-                                "braking": bool(int(parts[9])),
-                                "enc_vel_cps": float(parts[10]),
-                                "pendulum_ok": bool(int(parts[11])),
-                            }
+                            try:
+                                parsed: dict[str, Any] = {
+                                    "timestamp_ms": esp_ms,
+                                    "motor_rad": float(parts[2]),
+                                    "motor_vel": float(parts[3]),
+                                    "pend_rad": float(parts[4]),
+                                    "pend_vel": float(parts[5]),
+                                    "motor_speed": int(parts[6]),
+                                }
+                            except ValueError:
+                                logger.debug(
+                                    "Malformed state line (fields): %s",
+                                    line,
+                                )
+                                continue
                            with self._state_lock:
                                self._latest_state = parsed
                            self._state_event.set()
@@ -483,81 +419,68 @@ class SerialRunner(BaseRunner[SerialRunnerConfig]):
        return True

    def _read_state(self) -> dict[str, Any]:
-        """Return the most recent state from the reader thread (non-blocking).
-
-        The background thread updates at ~50 Hz and `_sim_step` already
-        sleeps for `dt` before calling this, so the data is always fresh.
-        """
+        """Return the most recent state from the reader thread (non-blocking)."""
        with self._state_lock:
            return dict(self._latest_state)

    def _read_state_blocking(self, timeout: float = 0.05) -> dict[str, Any]:
-        """Wait for a fresh sample, then return it.
-
-        Used during reset / settling where we need to guarantee we have
-        a new reading (no prior dt sleep).
-        """
+        """Wait for a fresh sample, then return it."""
        self._state_event.clear()
        self._state_event.wait(timeout=timeout)
        with self._state_lock:
            return dict(self._latest_state)

-    def _make_current_state(self) -> tuple[torch.Tensor, torch.Tensor]:
-        """Build qpos/qvel from current sensor data (utility)."""
-        state = self._read_state_blocking()
-        motor_angle = (
-            state["encoder_count"] / self._counts_per_rev * 2.0 * math.pi
-        )
-        motor_vel = (
-            state["enc_vel_cps"] / self._counts_per_rev * 2.0 * math.pi
-        )
-        pendulum_angle = math.radians(state["pendulum_angle"])
-        pendulum_vel = math.radians(state["pendulum_velocity"])
-
+    def _state_to_tensors(
+        self, state: dict[str, Any],
+    ) -> tuple[torch.Tensor, torch.Tensor]:
+        """Convert a parsed state dict to (qpos, qvel) tensors."""
        qpos = torch.tensor(
-            [[motor_angle, pendulum_angle]], dtype=torch.float32
+            [[state["motor_rad"], state["pend_rad"]]], dtype=torch.float32
        )
        qvel = torch.tensor(
-            [[motor_vel, pendulum_vel]], dtype=torch.float32
+            [[state["motor_vel"], state["pend_vel"]]], dtype=torch.float32
        )
        return qpos, qvel

+    def _make_current_state(self) -> tuple[torch.Tensor, torch.Tensor]:
+        """Build qpos/qvel from current sensor data (utility)."""
+        return self._state_to_tensors(self._read_state_blocking())
+
    # ------------------------------------------------------------------
    # Physical reset helpers
    # ------------------------------------------------------------------

    def _drive_to_center(self) -> None:
-        """Drive the motor back toward encoder=0 using bang-bang control."""
-        rc = self._hw.reset
+        """Drive the motor back toward center using bang-bang control."""
+        cfg = self.config
        start = time.time()
-        while time.time() - start < rc.drive_timeout:
+        while time.time() - start < cfg.reset_drive_timeout:
            state = self._read_state_blocking()
-            enc = state["encoder_count"]
-            if abs(enc) < rc.deadband:
+            motor_rad = state["motor_rad"]
+            if abs(motor_rad) < cfg.reset_deadband_rad:
                break
-            speed = rc.drive_speed if enc < 0 else -rc.drive_speed
+            speed = cfg.reset_drive_speed if motor_rad < 0 else -cfg.reset_drive_speed
            self._send(f"M{speed}")
            time.sleep(0.05)
        self._send("M0")
        time.sleep(0.2)

-    def _wait_for_pendulum_still(self) -> None:
-        """Block until the pendulum has settled (angle and velocity near zero)."""
-        rc = self._hw.reset
+    def _wait_for_settle(self) -> None:
+        """Block until the env considers the robot ready for a new episode."""
+        cfg = self.config
        stable_since: float | None = None
        start = time.monotonic()

-        while time.monotonic() - start < rc.settle_timeout:
+        while time.monotonic() - start < cfg.reset_settle_timeout:
            state = self._read_state_blocking()
-            angle_ok = abs(state["pendulum_angle"]) < rc.settle_angle_deg
-            vel_ok = abs(state["pendulum_velocity"]) < rc.settle_vel_dps
+            qpos, qvel = self._state_to_tensors(state)

-            if angle_ok and vel_ok:
+            if self.env.is_reset_ready(qpos, qvel):
                if stable_since is None:
                    stable_since = time.monotonic()
-                elif time.monotonic() - stable_since >= rc.settle_duration:
+                elif time.monotonic() - stable_since >= 0.5:
                    logger.info(
-                        "Pendulum settled after %.2f s",
+                        "Robot settled after %.2f s",
                        time.monotonic() - start,
                    )
                    return
@@ -566,6 +489,6 @@ class SerialRunner(BaseRunner[SerialRunnerConfig]):
            time.sleep(0.02)

        logger.warning(
-            "Pendulum did not fully settle within %.1f s — proceeding anyway.",
-            rc.settle_timeout,
+            "Robot did not settle within %.1f s — proceeding anyway.",
+            cfg.reset_settle_timeout,
        )
--- a/src/sysid/_urdf.py
+++ b/src/sysid/_urdf.py
@@ -0,0 +1,79 @@
+"""URDF XML helpers shared by sysid rollout and export modules."""
+
+from __future__ import annotations
+
+import xml.etree.ElementTree as ET
+
+
+def set_mass(inertial: ET.Element, mass: float | None) -> None:
+    if mass is None:
+        return
+    mass_el = inertial.find("mass")
+    if mass_el is not None:
+        mass_el.set("value", str(mass))
+
+
+def set_com(
+    inertial: ET.Element,
+    x: float | None,
+    y: float | None,
+    z: float | None,
+) -> None:
+    origin = inertial.find("origin")
+    if origin is None:
+        return
+    xyz = origin.get("xyz", "0 0 0").split()
+    if x is not None:
+        xyz[0] = str(x)
+    if y is not None:
+        xyz[1] = str(y)
+    if z is not None:
+        xyz[2] = str(z)
+    origin.set("xyz", " ".join(xyz))
+
+
+def set_inertia(
+    inertial: ET.Element,
+    ixx: float | None = None,
+    iyy: float | None = None,
+    izz: float | None = None,
+    ixy: float | None = None,
+    iyz: float | None = None,
+    ixz: float | None = None,
+) -> None:
+    ine = inertial.find("inertia")
+    if ine is None:
+        return
+    for attr, val in [
+        ("ixx", ixx), ("iyy", iyy), ("izz", izz),
+        ("ixy", ixy), ("iyz", iyz), ("ixz", ixz),
+    ]:
+        if val is not None:
+            ine.set(attr, str(val))
+
+
+def patch_link_inertials(
+    root: ET.Element,
+    params: dict[str, float],
+) -> None:
+    """Patch arm and pendulum inertial parameters in a URDF ElementTree root."""
+    for link in root.iter("link"):
+        link_name = link.get("name", "")
+        inertial = link.find("inertial")
+        if inertial is None:
+            continue
+
+        if link_name == "arm":
+            set_mass(inertial, params.get("arm_mass"))
+            set_com(inertial, params.get("arm_com_x"),
+                     params.get("arm_com_y"), params.get("arm_com_z"))
+
+        elif link_name == "pendulum":
+            set_mass(inertial, params.get("pendulum_mass"))
+            set_com(inertial, params.get("pendulum_com_x"),
+                     params.get("pendulum_com_y"), params.get("pendulum_com_z"))
+            set_inertia(inertial,
+                         ixx=params.get("pendulum_ixx"),
+                         iyy=params.get("pendulum_iyy"),
+                         izz=params.get("pendulum_izz"),
+                         ixy=params.get("pendulum_ixy"))
--- a/src/sysid/capture.py
+++ b/src/sysid/capture.py
@@ -6,6 +6,10 @@ the system, and records motor + pendulum angles and velocities at ~50 Hz.
 Saves a compressed numpy archive (.npz) that the optimizer can replay
 in simulation to fit physics parameters.

+Serial protocol (same as SerialRunner):
+    S,<ms>,<motor_rad>,<motor_vel>,<pend_rad>,<pend_vel>,<motor_speed>
+    (7 comma-separated fields — firmware sends SI units)
+
 Usage:
    python -m src.sysid.capture \
        --robot-path assets/rotary_cartpole \
@@ -17,7 +21,6 @@ from __future__ import annotations

 import argparse
 import math
-import os
 import random
 import threading
 import time
@@ -27,7 +30,6 @@ from typing import Any

 import numpy as np
 import structlog
-import yaml

 log = structlog.get_logger()

@@ -36,25 +38,24 @@ log = structlog.get_logger()


 def _parse_state_line(line: str) -> dict[str, Any] | None:
-    """Parse an ``S,…`` state line from the ESP32."""
+    """Parse an ``S,…`` state line from the ESP32.
+
+    Format: S,<ms>,<motor_rad>,<motor_vel>,<pend_rad>,<pend_vel>,<motor_speed>
+    (7 comma-separated fields, firmware sends SI units)
+    """
    if not line.startswith("S,"):
        return None
    parts = line.split(",")
-    if len(parts) < 12:
+    if len(parts) < 7:
        return None
    try:
        return {
            "timestamp_ms": int(parts[1]),
-            "encoder_count": int(parts[2]),
-            "rpm": float(parts[3]),
-            "motor_speed": int(parts[4]),
-            "at_limit": bool(int(parts[5])),
-            "pendulum_angle": float(parts[6]),
-            "pendulum_velocity": float(parts[7]),
-            "target_speed": int(parts[8]),
-            "braking": bool(int(parts[9])),
-            "enc_vel_cps": float(parts[10]),
-            "pendulum_ok": bool(int(parts[11])),
+            "motor_rad": float(parts[2]),
+            "motor_vel": float(parts[3]),
+            "pend_rad": float(parts[4]),
+            "pend_vel": float(parts[5]),
+            "motor_speed": int(parts[6]),
        }
    except (ValueError, IndexError):
        return None
@@ -64,7 +65,12 @@ def _parse_state_line(line: str) -> dict[str, Any] | None:


 class _SerialReader:
-    """Minimal background reader for the ESP32 serial stream."""
+    """Minimal background reader for the ESP32 serial stream.
+
+    Uses a sequence counter so ``read_blocking()`` guarantees it returns
+    a *new* state line (not a stale repeat).  This keeps the capture
+    loop locked to the firmware's 50 Hz tick.
+    """

    def __init__(self, port: str, baud: int = 115200):
        import serial as _serial
@@ -75,14 +81,16 @@ class _SerialReader:
        self.ser.reset_input_buffer()

        self._latest: dict[str, Any] = {}
+        self._seq: int = 0              # incremented on every new state line
        self._lock = threading.Lock()
-        self._event = threading.Event()
+        self._cond = threading.Condition(self._lock)
        self._running = True

        self._thread = threading.Thread(target=self._reader_loop, daemon=True)
        self._thread.start()

    def _reader_loop(self) -> None:
+        _debug_count = 0
        while self._running:
            try:
                if self.ser.in_waiting:
@@ -91,11 +99,16 @@ class _SerialReader:
                        .decode("utf-8", errors="ignore")
                        .strip()
                    )
+                    # Debug: log first 10 raw lines so we can see what the firmware sends.
+                    if _debug_count < 10 and line:
+                        log.info("serial_raw_line", line=repr(line), count=_debug_count)
+                        _debug_count += 1
                    parsed = _parse_state_line(line)
                    if parsed is not None:
-                        with self._lock:
+                        with self._cond:
                            self._latest = parsed
-                        self._event.set()
+                            self._seq += 1
+                            self._cond.notify_all()
                else:
                    time.sleep(0.001)
            except (OSError, self._serial_mod.SerialException):
@@ -108,14 +121,19 @@ class _SerialReader:
        except (OSError, self._serial_mod.SerialException):
            log.critical("serial_send_failed", cmd=cmd)

-    def read(self) -> dict[str, Any]:
-        with self._lock:
-            return dict(self._latest)
-
    def read_blocking(self, timeout: float = 0.1) -> dict[str, Any]:
-        self._event.clear()
-        self._event.wait(timeout=timeout)
-        return self.read()
+        """Wait until a *new* state line arrives, then return it.
+
+        Uses a sequence counter to guarantee the returned state is
+        different from whatever was available before this call.
+        """
+        with self._cond:
+            seq_before = self._seq
+            if not self._cond.wait_for(
+                lambda: self._seq > seq_before, timeout=timeout
+            ):
+                return {}  # timeout — no new data
+            return dict(self._latest)

    def close(self) -> None:
        self._running = False
@@ -139,7 +157,7 @@ class _PRBSExcitation:

    def __init__(
        self,
-        amplitude: int = 180,
+        amplitude: int = 150,
        hold_min_ms: int = 50,
        hold_max_ms: int = 300,
    ):
@@ -169,42 +187,39 @@ def capture(
    port: str = "/dev/cu.usbserial-0001",
    baud: int = 115200,
    duration: float = 20.0,
-    amplitude: int = 180,
+    amplitude: int = 150,
    hold_min_ms: int = 50,
    hold_max_ms: int = 300,
    dt: float = 0.02,
+    motor_angle_limit_deg: float = 90.0,
 ) -> Path:
    """Run the capture procedure and return the path to the saved .npz file.

+    The capture loop is **stream-driven**: it blocks on each incoming
+    state line from the firmware (which arrives at 50 Hz), sends the
+    next motor command immediately, and records both.
+
    Parameters
    ----------
-    robot_path : path to robot asset directory (contains hardware.yaml)
+    robot_path : path to robot asset directory
    port : serial port for ESP32
    baud : baud rate
    duration : capture duration in seconds
-    amplitude : max PWM magnitude for excitation (0–255)
+    amplitude : max PWM magnitude for excitation
    hold_min_ms / hold_max_ms : random hold time range (ms)
-    dt : target sampling period (seconds), default 50 Hz
+    dt : nominal sample period for buffer sizing (seconds)
+    motor_angle_limit_deg : safety limit for motor angle
    """
    robot_path = Path(robot_path).resolve()

-    # Load hardware config for encoder conversion + safety.
-    hw_yaml = robot_path / "hardware.yaml"
-    if not hw_yaml.exists():
-        raise FileNotFoundError(f"hardware.yaml not found in {robot_path}")
-    raw_hw = yaml.safe_load(hw_yaml.read_text())
-    ppr = raw_hw.get("encoder", {}).get("ppr", 11)
-    gear_ratio = raw_hw.get("encoder", {}).get("gear_ratio", 30.0)
-    counts_per_rev: float = ppr * gear_ratio * 4.0
-    max_motor_deg = raw_hw.get("safety", {}).get("max_motor_angle_deg", 90.0)
-    max_motor_rad = math.radians(max_motor_deg) if max_motor_deg > 0 else 0.0
+    max_motor_rad = math.radians(motor_angle_limit_deg) if motor_angle_limit_deg > 0 else 0.0

    # Connect.
    reader = _SerialReader(port, baud)
    excitation = _PRBSExcitation(amplitude, hold_min_ms, hold_max_ms)

-    # Prepare recording buffers.
-    max_samples = int(duration / dt) + 500  # headroom
+    # Prepare recording buffers (generous headroom).
+    max_samples = int(duration / dt) + 500
    rec_time = np.zeros(max_samples, dtype=np.float64)
    rec_action = np.zeros(max_samples, dtype=np.float64)
    rec_motor_angle = np.zeros(max_samples, dtype=np.float64)
@@ -222,63 +237,96 @@ def capture(
        duration=duration,
        amplitude=amplitude,
        hold_range_ms=f"{hold_min_ms}–{hold_max_ms}",
-        dt=dt,
+        mode="stream-driven (firmware clock)",
    )

    idx = 0
+    pwm = 0
+    last_esp_ms = -1  # firmware timestamp of last recorded sample
+    no_data_count = 0  # consecutive timeouts with no data
    t0 = time.monotonic()
    try:
        while True:
-            loop_start = time.monotonic()
-            elapsed = loop_start - t0
+            # Block until the firmware sends the next state line (~20 ms).
+            # Timeout at 100 ms prevents hanging if the ESP32 disconnects.
+            state = reader.read_blocking(timeout=0.1)
+            if not state:
+                no_data_count += 1
+                if no_data_count == 30:  # 3 seconds with no data
+                    log.warning(
+                        "no_data_received",
+                        msg="No state lines from firmware after 3s. "
+                        "Check: is the ESP32 powered? Is it running the right firmware? "
+                        "Try pressing the RESET button.",
+                    )
+                if no_data_count == 100:  # 10 seconds
+                    log.critical(
+                        "no_data_timeout",
+                        msg="No data for 10s — aborting capture.",
+                    )
+                    break
+                continue  # no data yet — retry
+            no_data_count = 0
+
+            # Deduplicate: the firmware may send multiple state lines per
+            # tick (e.g. M-command echo + tick). Only record one sample
+            # per unique firmware timestamp.
+            esp_ms = state.get("timestamp_ms", 0)
+            if esp_ms == last_esp_ms:
+                continue
+            last_esp_ms = esp_ms
+
+            elapsed = time.monotonic() - t0
            if elapsed >= duration:
                break

-            # Get excitation PWM.
+            # Get excitation PWM for the NEXT tick.
            pwm = excitation()

-            # Safety: reverse/zero if near motor limit.
-            state = reader.read()
-            if state:
-                motor_angle_rad = (
-                    state.get("encoder_count", 0) / counts_per_rev * 2.0 * math.pi
-                )
-                if max_motor_rad > 0:
-                    margin = max_motor_rad * 0.85  # start braking at 85%
-                    if motor_angle_rad > margin and pwm > 0:
-                        pwm = -abs(pwm)  # reverse
-                    elif motor_angle_rad < -margin and pwm < 0:
-                        pwm = abs(pwm)  # reverse
+            # Safety: keep the arm well within its mechanical range.
+            # Firmware sends motor angle in radians — use directly.
+            motor_angle_rad = state.get("motor_rad", 0.0)
+            if max_motor_rad > 0:
+                ratio = motor_angle_rad / max_motor_rad  # signed, -1..+1
+                abs_ratio = abs(ratio)

-            # Send command.
+                if abs_ratio > 0.90:
+                    # Deep in the danger zone — force a strong return.
+                    brake_strength = min(1.0, (abs_ratio - 0.90) / 0.10)  # 0→1
+                    brake_pwm = int(amplitude * (0.5 + 0.5 * brake_strength))
+                    pwm = -brake_pwm if ratio > 0 else brake_pwm
+                elif abs_ratio > 0.70:
+                    # Soft zone — only allow actions pointing back to centre.
+                    if ratio > 0 and pwm > 0:
+                        pwm = -abs(pwm)
+                    elif ratio < 0 and pwm < 0:
+                        pwm = abs(pwm)
+
+            # Send command immediately — it will take effect on the next tick.
            reader.send(f"M{pwm}")

-            # Wait for fresh data.
-            time.sleep(max(0, dt - (time.monotonic() - loop_start) - 0.005))
-            state = reader.read_blocking(timeout=dt)
+            # Record this tick's state + the action the motor *actually*
+            # received.  Firmware sends SI units — use directly.
+            motor_angle = state.get("motor_rad", 0.0)
+            motor_vel = state.get("motor_vel", 0.0)
+            pend_angle = state.get("pend_rad", 0.0)
+            pend_vel = state.get("pend_vel", 0.0)
+            # Firmware constrains to ±255; normalise to [-1, 1].
+            applied = state.get("motor_speed", 0)
+            action_norm = max(-255, min(255, applied)) / 255.0

-            if state:
-                enc = state.get("encoder_count", 0)
-                motor_angle = enc / counts_per_rev * 2.0 * math.pi
-                motor_vel = (
-                    state.get("enc_vel_cps", 0.0) / counts_per_rev * 2.0 * math.pi
-                )
-                pend_angle = math.radians(state.get("pendulum_angle", 0.0))
-                pend_vel = math.radians(state.get("pendulum_velocity", 0.0))
+            if idx < max_samples:
+                rec_time[idx] = elapsed
+                rec_action[idx] = action_norm
+                rec_motor_angle[idx] = motor_angle
+                rec_motor_vel[idx] = motor_vel
+                rec_pend_angle[idx] = pend_angle
+                rec_pend_vel[idx] = pend_vel
+                idx += 1
+            else:
+                break  # buffer full

-                # Normalised action: PWM / 255 → [-1, 1]
-                action_norm = pwm / 255.0
-
-                if idx < max_samples:
-                    rec_time[idx] = elapsed
-                    rec_action[idx] = action_norm
-                    rec_motor_angle[idx] = motor_angle
-                    rec_motor_vel[idx] = motor_vel
-                    rec_pend_angle[idx] = pend_angle
-                    rec_pend_vel[idx] = pend_vel
-                    idx += 1
-
-            # Progress.
+            # Progress (every 50 samples ≈ once per second at 50 Hz).
            if idx % 50 == 0:
                log.info(
                    "capture_progress",
@@ -287,11 +335,6 @@ def capture(
                    pwm=pwm,
                )

-            # Pace to dt.
-            remaining = dt - (time.monotonic() - loop_start)
-            if remaining > 0:
-                time.sleep(remaining)
-
    finally:
        reader.send("M0")
        reader.close()
@@ -339,7 +382,7 @@ def main() -> None:
        "--robot-path",
        type=str,
        default="assets/rotary_cartpole",
-        help="Path to robot asset directory (contains hardware.yaml)",
+        help="Path to robot asset directory",
    )
    parser.add_argument(
        "--port",
@@ -352,7 +395,8 @@ def main() -> None:
        "--duration", type=float, default=20.0, help="Capture duration (s)"
    )
    parser.add_argument(
-        "--amplitude", type=int, default=180, help="Max PWM magnitude (0–255)"
+        "--amplitude", type=int, default=150,
+        help="Max PWM magnitude (should not exceed firmware MAX_MOTOR_SPEED=150)",
    )
    parser.add_argument(
        "--hold-min-ms", type=int, default=50, help="Min hold time (ms)"
@@ -361,7 +405,11 @@ def main() -> None:
        "--hold-max-ms", type=int, default=300, help="Max hold time (ms)"
    )
    parser.add_argument(
-        "--dt", type=float, default=0.02, help="Sample period (s)"
+        "--dt", type=float, default=0.02, help="Nominal sample period for buffer sizing (s)"
+    )
+    parser.add_argument(
+        "--motor-angle-limit", type=float, default=90.0,
+        help="Motor angle safety limit in degrees (0 = disabled)",
    )
    args = parser.parse_args()

@@ -374,6 +422,7 @@ def main() -> None:
        hold_min_ms=args.hold_min_ms,
        hold_max_ms=args.hold_max_ms,
        dt=args.dt,
+        motor_angle_limit_deg=args.motor_angle_limit,
    )


--- a/src/sysid/export.py
+++ b/src/sysid/export.py
@@ -8,20 +8,21 @@ alongside the originals in the robot asset directory.
 from __future__ import annotations

 import copy
-import json
-import shutil
 import xml.etree.ElementTree as ET
 from pathlib import Path

 import structlog
 import yaml

+from src.sysid._urdf import patch_link_inertials
+
 log = structlog.get_logger()


 def export_tuned_files(
    robot_path: str | Path,
    params: dict[str, float],
+    motor_params: dict[str, float] | None = None,
 ) -> tuple[Path, Path]:
    """Write tuned URDF and robot.yaml files.

@@ -29,6 +30,8 @@ def export_tuned_files(
    ----------
    robot_path : robot asset directory (contains robot.yaml + *.urdf)
    params : dict of parameter name → tuned value (from optimizer)
+    motor_params : locked motor sysid params (asymmetric model).
+        If provided, motor joint parameters come from here.

    Returns
    -------
@@ -43,38 +46,7 @@ def export_tuned_files(

    # ── Tune URDF ────────────────────────────────────────────────
    tree = ET.parse(urdf_path)
-    root = tree.getroot()
-
-    for link in root.iter("link"):
-        link_name = link.get("name", "")
-        inertial = link.find("inertial")
-        if inertial is None:
-            continue
-
-        if link_name == "arm":
-            _set_mass(inertial, params.get("arm_mass"))
-            _set_com(
-                inertial,
-                params.get("arm_com_x"),
-                params.get("arm_com_y"),
-                params.get("arm_com_z"),
-            )
-
-        elif link_name == "pendulum":
-            _set_mass(inertial, params.get("pendulum_mass"))
-            _set_com(
-                inertial,
-                params.get("pendulum_com_x"),
-                params.get("pendulum_com_y"),
-                params.get("pendulum_com_z"),
-            )
-            _set_inertia(
-                inertial,
-                ixx=params.get("pendulum_ixx"),
-                iyy=params.get("pendulum_iyy"),
-                izz=params.get("pendulum_izz"),
-                ixy=params.get("pendulum_ixy"),
-            )
+    patch_link_inertials(tree.getroot(), params)

    # Write tuned URDF.
    tuned_urdf_name = urdf_path.stem + "_tuned" + urdf_path.suffix
@@ -91,15 +63,47 @@ def export_tuned_files(
    # Point to the tuned URDF.
    tuned_cfg["urdf"] = tuned_urdf_name

-    # Update actuator parameters.
+    # Update actuator parameters — full asymmetric motor model.
    if tuned_cfg.get("actuators") and len(tuned_cfg["actuators"]) > 0:
        act = tuned_cfg["actuators"][0]
-        if "actuator_gear" in params:
-            act["gear"] = round(params["actuator_gear"], 6)
-        if "actuator_filter_tau" in params:
-            act["filter_tau"] = round(params["actuator_filter_tau"], 6)
-        if "motor_damping" in params:
-            act["damping"] = round(params["motor_damping"], 6)
+        if motor_params:
+            # Asymmetric gear, damping, deadzone, frictionloss as [pos, neg].
+            gear_pos = motor_params.get("actuator_gear_pos", 0.424)
+            gear_neg = motor_params.get("actuator_gear_neg", 0.425)
+            act["gear"] = [round(gear_pos, 6), round(gear_neg, 6)]
+
+            damp_pos = motor_params.get("motor_damping_pos", 0.002)
+            damp_neg = motor_params.get("motor_damping_neg", 0.015)
+            act["damping"] = [round(damp_pos, 6), round(damp_neg, 6)]
+
+            dz_pos = motor_params.get("motor_deadzone_pos", 0.141)
+            dz_neg = motor_params.get("motor_deadzone_neg", 0.078)
+            act["deadzone"] = [round(dz_pos, 6), round(dz_neg, 6)]
+
+            fl_pos = motor_params.get("motor_frictionloss_pos", 0.057)
+            fl_neg = motor_params.get("motor_frictionloss_neg", 0.053)
+            act["frictionloss"] = [round(fl_pos, 6), round(fl_neg, 6)]
+
+            if "actuator_filter_tau" in motor_params:
+                act["filter_tau"] = round(motor_params["actuator_filter_tau"], 6)
+            if "viscous_quadratic" in motor_params:
+                act["viscous_quadratic"] = round(motor_params["viscous_quadratic"], 6)
+            if "back_emf_gain" in motor_params:
+                act["back_emf_gain"] = round(motor_params["back_emf_gain"], 6)
+        else:
+            if "actuator_gear" in params:
+                act["gear"] = round(params["actuator_gear"], 6)
+            if "actuator_filter_tau" in params:
+                act["filter_tau"] = round(params["actuator_filter_tau"], 6)
+            if "motor_damping" in params:
+                act["damping"] = round(params["motor_damping"], 6)
+            if "motor_deadzone" in params:
+                act["deadzone"] = round(params["motor_deadzone"], 6)
+
+        # ctrl_range from ctrl_limit parameter.
+        if "ctrl_limit" in params:
+            lim = round(params["ctrl_limit"], 6)
+            act["ctrl_range"] = [-lim, lim]

    # Update joint overrides.
    if "joints" not in tuned_cfg:
@@ -108,16 +112,23 @@ def export_tuned_files(
    if "motor_joint" not in tuned_cfg["joints"]:
        tuned_cfg["joints"]["motor_joint"] = {}
    mj = tuned_cfg["joints"]["motor_joint"]
-    if "motor_armature" in params:
-        mj["armature"] = round(params["motor_armature"], 6)
-    if "motor_frictionloss" in params:
-        mj["frictionloss"] = round(params["motor_frictionloss"], 6)
+    if motor_params:
+        mj["armature"] = round(motor_params.get("motor_armature", 0.00277), 6)
+        # Frictionloss/damping = 0 in MuJoCo (motor model handles via qfrc_applied).
+        mj["frictionloss"] = 0.0
+    else:
+        if "motor_armature" in params:
+            mj["armature"] = round(params["motor_armature"], 6)
+        if "motor_frictionloss" in params:
+            mj["frictionloss"] = round(params["motor_frictionloss"], 6)

    if "pendulum_joint" not in tuned_cfg["joints"]:
        tuned_cfg["joints"]["pendulum_joint"] = {}
    pj = tuned_cfg["joints"]["pendulum_joint"]
    if "pendulum_damping" in params:
        pj["damping"] = round(params["pendulum_damping"], 6)
+    if "pendulum_frictionloss" in params:
+        pj["frictionloss"] = round(params["pendulum_frictionloss"], 6)

    # Write tuned robot.yaml.
    tuned_yaml_path = robot_path / "robot_tuned.yaml"
@@ -136,51 +147,46 @@ def export_tuned_files(
    return tuned_urdf_path, tuned_yaml_path


-# ── XML helpers (shared with rollout.py) ─────────────────────────────
+# ── CLI entry point ──────────────────────────────────────────────────


-def _set_mass(inertial: ET.Element, mass: float | None) -> None:
-    if mass is None:
-        return
-    mass_el = inertial.find("mass")
-    if mass_el is not None:
-        mass_el.set("value", str(mass))
+def main() -> None:
+    import argparse
+    import json
+
+    from src.sysid.rollout import LOCKED_MOTOR_PARAMS
+
+    parser = argparse.ArgumentParser(
+        description="Export tuned URDF + robot.yaml from sysid results."
+    )
+    parser.add_argument(
+        "--robot-path", type=str, default="assets/rotary_cartpole",
+        help="Path to robot asset directory",
+    )
+    parser.add_argument(
+        "--result", type=str, default=None,
+        help="Path to sysid_result.json (auto-detected if omitted)",
+    )
+    args = parser.parse_args()
+
+    robot_path = Path(args.robot_path).resolve()
+    if args.result:
+        result_path = Path(args.result)
+    else:
+        result_path = robot_path / "sysid_result.json"
+
+    if not result_path.exists():
+        raise FileNotFoundError(f"Result file not found: {result_path}")
+
+    result = json.loads(result_path.read_text())
+
+    export_tuned_files(
+        robot_path=args.robot_path,
+        params=result["best_params"],
+        motor_params=result.get("motor_params", dict(LOCKED_MOTOR_PARAMS)),
+    )


-def _set_com(
-    inertial: ET.Element,
-    x: float | None,
-    y: float | None,
-    z: float | None,
-) -> None:
-    origin = inertial.find("origin")
-    if origin is None:
-        return
-    xyz = origin.get("xyz", "0 0 0").split()
-    if x is not None:
-        xyz[0] = str(x)
-    if y is not None:
-        xyz[1] = str(y)
-    if z is not None:
-        xyz[2] = str(z)
-    origin.set("xyz", " ".join(xyz))
+if __name__ == "__main__":
+    main()

-
-def _set_inertia(
-    inertial: ET.Element,
-    ixx: float | None = None,
-    iyy: float | None = None,
-    izz: float | None = None,
-    ixy: float | None = None,
-    iyz: float | None = None,
-    ixz: float | None = None,
-) -> None:
-    ine = inertial.find("inertia")
-    if ine is None:
-        return
-    for attr, val in [
-        ("ixx", ixx), ("iyy", iyy), ("izz", izz),
-        ("ixy", ixy), ("iyz", iyz), ("ixz", ixz),
-    ]:
-        if val is not None:
-            ine.set(attr, str(val))
--- a/src/sysid/motor/init.py
+++ b/src/sysid/motor/init.py
@@ -0,0 +1,5 @@
+"""Motor-only system identification subpackage.
+
+Identifies JGB37-520 DC motor dynamics (no pendulum, no limits)
+so the MuJoCo simulation matches the real hardware response.
+"""
--- a/src/sysid/motor/capture.py
+++ b/src/sysid/motor/capture.py
@@ -0,0 +1,364 @@
+"""Capture a motor-only trajectory under random excitation (PRBS-style).
+
+Connects to the ESP32 running the simplified sysid firmware (no pendulum,
+no limits), sends random PWM commands, and records motor angle + velocity
+at ~ 50 Hz.
+
+Firmware serial protocol (115200 baud):
+  Commands:  M<speed>\\n  R\\n  S\\n  G\\n  H\\n  P\\n
+  State:     S,<millis>,<encoder_count>,<rpm>,<applied_speed>,<enc_vel_cps>\\n
+
+Usage:
+    python -m src.sysid.motor.capture --duration 20
+    python -m src.sysid.motor.capture --duration 30 --amplitude 200
+"""
+
+from __future__ import annotations
+
+import argparse
+import math
+import random
+import threading
+import time
+from datetime import datetime
+from pathlib import Path
+from typing import Any
+
+import numpy as np
+import structlog
+import yaml
+
+log = structlog.get_logger()
+
+# ── Default asset path ───────────────────────────────────────────────
+_DEFAULT_ASSET = "assets/motor"
+
+
+# ── Serial protocol helpers ──────────────────────────────────────────
+
+
+def _parse_state_line(line: str) -> dict[str, Any] | None:
+    """Parse an ``S,…`` state line from the motor sysid firmware.
+
+    Format: S,<millis>,<encoder_count>,<rpm>,<applied_speed>,<enc_vel_cps>
+    """
+    if not line.startswith("S,"):
+        return None
+    parts = line.split(",")
+    if len(parts) < 6:
+        return None
+    try:
+        return {
+            "timestamp_ms": int(parts[1]),
+            "encoder_count": int(parts[2]),
+            "rpm": float(parts[3]),
+            "applied_speed": int(parts[4]),
+            "enc_vel_cps": float(parts[5]),
+        }
+    except (ValueError, IndexError):
+        return None
+
+
+# ── Background serial reader ─────────────────────────────────────────
+
+
+class _SerialReader:
+    """Minimal background reader for the ESP32 serial stream."""
+
+    def __init__(self, port: str, baud: int = 115200):
+        import serial as _serial
+
+        self._serial_mod = _serial
+        self.ser = _serial.Serial(port, baud, timeout=0.05)
+        time.sleep(2)  # Wait for ESP32 boot
+        self.ser.reset_input_buffer()
+
+        self._latest: dict[str, Any] = {}
+        self._seq: int = 0
+        self._lock = threading.Lock()
+        self._cond = threading.Condition(self._lock)
+        self._running = True
+
+        self._thread = threading.Thread(target=self._reader_loop, daemon=True)
+        self._thread.start()
+
+    def _reader_loop(self) -> None:
+        while self._running:
+            try:
+                if self.ser.in_waiting:
+                    line = (
+                        self.ser.readline()
+                        .decode("utf-8", errors="ignore")
+                        .strip()
+                    )
+                    parsed = _parse_state_line(line)
+                    if parsed is not None:
+                        with self._cond:
+                            self._latest = parsed
+                            self._seq += 1
+                            self._cond.notify_all()
+                    elif line and not line.startswith("S,"):
+                        # Log non-state lines (READY, PONG, WARN, etc.)
+                        log.debug("serial_info", line=line)
+                else:
+                    time.sleep(0.001)
+            except (OSError, self._serial_mod.SerialException):
+                log.critical("serial_lost")
+                break
+
+    def send(self, cmd: str) -> None:
+        try:
+            self.ser.write(f"{cmd}\n".encode())
+        except (OSError, self._serial_mod.SerialException):
+            log.critical("serial_send_failed", cmd=cmd)
+
+    def read_blocking(self, timeout: float = 0.1) -> dict[str, Any]:
+        """Wait until a *new* state line arrives, then return it."""
+        with self._cond:
+            seq_before = self._seq
+            if not self._cond.wait_for(
+                lambda: self._seq > seq_before, timeout=timeout
+            ):
+                return {}
+            return dict(self._latest)
+
+    def close(self) -> None:
+        self._running = False
+        self.send("H")
+        self.send("M0")
+        time.sleep(0.1)
+        self._thread.join(timeout=1.0)
+        self.ser.close()
+
+
+# ── PRBS excitation signal ───────────────────────────────────────────
+
+
+class _PRBSExcitation:
+    """Random hold-value excitation with configurable amplitude and hold time."""
+
+    def __init__(
+        self,
+        amplitude: int = 200,
+        hold_min_ms: int = 50,
+        hold_max_ms: int = 400,
+    ):
+        self.amplitude = amplitude
+        self.hold_min_ms = hold_min_ms
+        self.hold_max_ms = hold_max_ms
+        self._current: int = 0
+        self._switch_time: float = 0.0
+        self._new_value()
+
+    def _new_value(self) -> None:
+        self._current = random.randint(-self.amplitude, self.amplitude)
+        hold_ms = random.randint(self.hold_min_ms, self.hold_max_ms)
+        self._switch_time = time.monotonic() + hold_ms / 1000.0
+
+    def __call__(self) -> int:
+        if time.monotonic() >= self._switch_time:
+            self._new_value()
+        return self._current
+
+
+# ── Main capture loop ────────────────────────────────────────────────
+
+
+def capture(
+    asset_path: str | Path = _DEFAULT_ASSET,
+    port: str = "/dev/cu.usbserial-0001",
+    baud: int = 115200,
+    duration: float = 20.0,
+    amplitude: int = 200,
+    hold_min_ms: int = 50,
+    hold_max_ms: int = 400,
+    dt: float = 0.02,
+) -> Path:
+    """Run motor-only capture and return the path to the saved .npz file.
+
+    Stream-driven: blocks on each firmware state line (~50 Hz),
+    sends next motor command immediately, records both.
+    No time.sleep pacing — locked to firmware clock.
+
+    The recording stores:
+      - time: wall-clock seconds since start
+      - action: normalised action = applied_speed / 255
+      - motor_angle: shaft angle in radians (from encoder)
+      - motor_vel: shaft velocity in rad/s (from encoder velocity)
+    """
+    asset_path = Path(asset_path).resolve()
+
+    # Load hardware config for encoder conversion.
+    hw_yaml = asset_path / "hardware.yaml"
+    if not hw_yaml.exists():
+        raise FileNotFoundError(f"hardware.yaml not found in {asset_path}")
+    raw_hw = yaml.safe_load(hw_yaml.read_text())
+    ppr = raw_hw.get("encoder", {}).get("ppr", 11)
+    gear_ratio = raw_hw.get("encoder", {}).get("gear_ratio", 30.0)
+    counts_per_rev: float = ppr * gear_ratio * 4.0
+    max_pwm = raw_hw.get("motor", {}).get("max_pwm", 255)
+
+    log.info(
+        "hardware_config",
+        ppr=ppr,
+        gear_ratio=gear_ratio,
+        counts_per_rev=counts_per_rev,
+        max_pwm=max_pwm,
+    )
+
+    # Connect.
+    reader = _SerialReader(port, baud)
+    excitation = _PRBSExcitation(amplitude, hold_min_ms, hold_max_ms)
+
+    # Prepare recording buffers.
+    max_samples = int(duration / dt) + 500
+    rec_time = np.zeros(max_samples, dtype=np.float64)
+    rec_action = np.zeros(max_samples, dtype=np.float64)
+    rec_motor_angle = np.zeros(max_samples, dtype=np.float64)
+    rec_motor_vel = np.zeros(max_samples, dtype=np.float64)
+
+    # Reset encoder to zero.
+    reader.send("R")
+    time.sleep(0.1)
+
+    # Start streaming.
+    reader.send("G")
+    time.sleep(0.1)
+
+    log.info(
+        "capture_starting",
+        port=port,
+        duration=duration,
+        amplitude=amplitude,
+        hold_range_ms=f"{hold_min_ms}–{hold_max_ms}",
+    )
+
+    idx = 0
+    pwm = 0
+    last_esp_ms = -1
+    t0 = time.monotonic()
+
+    try:
+        while True:
+            state = reader.read_blocking(timeout=0.1)
+            if not state:
+                continue
+
+            # Deduplicate by firmware timestamp.
+            esp_ms = state.get("timestamp_ms", 0)
+            if esp_ms == last_esp_ms:
+                continue
+            last_esp_ms = esp_ms
+
+            elapsed = time.monotonic() - t0
+            if elapsed >= duration:
+                break
+
+            # Generate next excitation PWM.
+            pwm = excitation()
+
+            # Send command.
+            reader.send(f"M{pwm}")
+
+            # Convert encoder to angle/velocity.
+            enc = state.get("encoder_count", 0)
+            motor_angle = enc / counts_per_rev * 2.0 * math.pi
+            motor_vel = (
+                state.get("enc_vel_cps", 0.0) / counts_per_rev * 2.0 * math.pi
+            )
+
+            # Use firmware-applied speed for the action.
+            applied = state.get("applied_speed", 0)
+            action_norm = applied / 255.0
+
+            if idx < max_samples:
+                rec_time[idx] = elapsed
+                rec_action[idx] = action_norm
+                rec_motor_angle[idx] = motor_angle
+                rec_motor_vel[idx] = motor_vel
+                idx += 1
+            else:
+                break
+
+            if idx % 50 == 0:
+                log.info(
+                    "capture_progress",
+                    elapsed=f"{elapsed:.1f}/{duration:.0f}s",
+                    samples=idx,
+                    pwm=pwm,
+                    angle_deg=f"{math.degrees(motor_angle):.1f}",
+                    vel_rps=f"{motor_vel / (2 * math.pi):.1f}",
+                )
+
+    finally:
+        reader.send("M0")
+        reader.close()
+
+    # Trim.
+    rec_time = rec_time[:idx]
+    rec_action = rec_action[:idx]
+    rec_motor_angle = rec_motor_angle[:idx]
+    rec_motor_vel = rec_motor_vel[:idx]
+
+    # Save.
+    recordings_dir = asset_path / "recordings"
+    recordings_dir.mkdir(exist_ok=True)
+    stamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+    out_path = recordings_dir / f"motor_capture_{stamp}.npz"
+    np.savez_compressed(
+        out_path,
+        time=rec_time,
+        action=rec_action,
+        motor_angle=rec_motor_angle,
+        motor_vel=rec_motor_vel,
+    )
+
+    log.info(
+        "capture_saved",
+        path=str(out_path),
+        samples=idx,
+        duration_actual=f"{rec_time[-1]:.2f}s" if idx > 0 else "0s",
+    )
+    return out_path
+
+
+# ── CLI ──────────────────────────────────────────────────────────────
+
+
+def main() -> None:
+    parser = argparse.ArgumentParser(
+        description="Capture motor-only trajectory for system identification."
+    )
+    parser.add_argument(
+        "--asset-path", type=str, default=_DEFAULT_ASSET,
+        help="Path to motor asset directory (contains hardware.yaml)",
+    )
+    parser.add_argument(
+        "--port", type=str, default="/dev/cu.usbserial-0001",
+        help="Serial port for ESP32",
+    )
+    parser.add_argument("--baud", type=int, default=115200)
+    parser.add_argument("--duration", type=float, default=20.0, help="Capture duration (s)")
+    parser.add_argument(
+        "--amplitude", type=int, default=200,
+        help="Max PWM magnitude (0–255, firmware allows full range)",
+    )
+    parser.add_argument("--hold-min-ms", type=int, default=50, help="PRBS min hold (ms)")
+    parser.add_argument("--hold-max-ms", type=int, default=400, help="PRBS max hold (ms)")
+    parser.add_argument("--dt", type=float, default=0.02, help="Nominal sample period (s)")
+    args = parser.parse_args()
+
+    capture(
+        asset_path=args.asset_path,
+        port=args.port,
+        baud=args.baud,
+        duration=args.duration,
+        amplitude=args.amplitude,
+        hold_min_ms=args.hold_min_ms,
+        hold_max_ms=args.hold_max_ms,
+        dt=args.dt,
+    )
+
+
+if __name__ == "__main__":
+    main()
--- a/src/sysid/motor/export.py
+++ b/src/sysid/motor/export.py
@@ -0,0 +1,186 @@
+"""Export tuned motor parameters to MJCF and robot.yaml files.
+
+Reads the original motor.xml and robot.yaml, patches with optimised
+parameter values, and writes motor_tuned.xml + robot_tuned.yaml.
+
+Usage:
+    python -m src.sysid.motor.export \
+        --asset-path assets/motor \
+        --result assets/motor/motor_sysid_result.json
+"""
+
+from __future__ import annotations
+
+import argparse
+import copy
+import json
+import xml.etree.ElementTree as ET
+from pathlib import Path
+
+import structlog
+import yaml
+
+log = structlog.get_logger()
+
+_DEFAULT_ASSET = "assets/motor"
+
+
+def export_tuned_files(
+    asset_path: str | Path,
+    params: dict[str, float],
+) -> tuple[Path, Path]:
+    """Write tuned MJCF and robot.yaml files.
+
+    Returns (tuned_mjcf_path, tuned_robot_yaml_path).
+    """
+    asset_path = Path(asset_path).resolve()
+
+    robot_yaml_path = asset_path / "robot.yaml"
+    robot_cfg = yaml.safe_load(robot_yaml_path.read_text())
+    mjcf_path = asset_path / robot_cfg["mjcf"]
+
+    # ── Tune MJCF ────────────────────────────────────────────────
+    tree = ET.parse(str(mjcf_path))
+    root = tree.getroot()
+
+    # Actuator — use average gear for the MJCF model.
+    gear_pos = params.get("actuator_gear_pos", params.get("actuator_gear"))
+    gear_neg = params.get("actuator_gear_neg", params.get("actuator_gear"))
+    gear_avg = None
+    if gear_pos is not None and gear_neg is not None:
+        gear_avg = (gear_pos + gear_neg) / 2.0
+    elif gear_pos is not None:
+        gear_avg = gear_pos
+    filter_tau = params.get("actuator_filter_tau")
+    for act_el in root.iter("general"):
+        if act_el.get("name") == "motor":
+            if gear_avg is not None:
+                act_el.set("gear", str(gear_avg))
+            if filter_tau is not None:
+                if filter_tau > 0:
+                    act_el.set("dyntype", "filter")
+                    act_el.set("dynprm", str(filter_tau))
+                else:
+                    act_el.set("dyntype", "none")
+
+    # Joint — average damping & friction for MJCF (asymmetry in runtime).
+    fl_pos = params.get("motor_frictionloss_pos", params.get("motor_frictionloss"))
+    fl_neg = params.get("motor_frictionloss_neg", params.get("motor_frictionloss"))
+    fl_avg = None
+    if fl_pos is not None and fl_neg is not None:
+        fl_avg = (fl_pos + fl_neg) / 2.0
+    elif fl_pos is not None:
+        fl_avg = fl_pos
+    damp_pos = params.get("motor_damping_pos", params.get("motor_damping"))
+    damp_neg = params.get("motor_damping_neg", params.get("motor_damping"))
+    damp_avg = None
+    if damp_pos is not None and damp_neg is not None:
+        damp_avg = (damp_pos + damp_neg) / 2.0
+    elif damp_pos is not None:
+        damp_avg = damp_pos
+    for jnt in root.iter("joint"):
+        if jnt.get("name") == "motor_joint":
+            if damp_avg is not None:
+                jnt.set("damping", str(damp_avg))
+            if "motor_armature" in params:
+                jnt.set("armature", str(params["motor_armature"]))
+            if fl_avg is not None:
+                jnt.set("frictionloss", str(fl_avg))
+
+    # Rotor mass.
+    if "rotor_mass" in params:
+        for geom in root.iter("geom"):
+            if geom.get("name") == "rotor_disk":
+                geom.set("mass", str(params["rotor_mass"]))
+
+    # Write tuned MJCF.
+    tuned_mjcf_name = mjcf_path.stem + "_tuned" + mjcf_path.suffix
+    tuned_mjcf_path = asset_path / tuned_mjcf_name
+    ET.indent(tree, space="  ")
+    tree.write(str(tuned_mjcf_path), xml_declaration=True, encoding="unicode")
+    log.info("tuned_mjcf_written", path=str(tuned_mjcf_path))
+
+    # ── Tune robot.yaml ──────────────────────────────────────────
+    tuned_cfg = copy.deepcopy(robot_cfg)
+    tuned_cfg["mjcf"] = tuned_mjcf_name
+
+    if tuned_cfg.get("actuators") and len(tuned_cfg["actuators"]) > 0:
+        act = tuned_cfg["actuators"][0]
+        if gear_avg is not None:
+            act["gear"] = round(gear_avg, 6)
+        if "actuator_filter_tau" in params:
+            act["filter_tau"] = round(params["actuator_filter_tau"], 6)
+        if "motor_damping" in params:
+            act["damping"] = round(params["motor_damping"], 6)
+
+    if "joints" not in tuned_cfg:
+        tuned_cfg["joints"] = {}
+    if "motor_joint" not in tuned_cfg["joints"]:
+        tuned_cfg["joints"]["motor_joint"] = {}
+    mj = tuned_cfg["joints"]["motor_joint"]
+    if "motor_armature" in params:
+        mj["armature"] = round(params["motor_armature"], 6)
+    if fl_avg is not None:
+        mj["frictionloss"] = round(fl_avg, 6)
+
+    # Asymmetric / hardware-realism / nonlinear parameters.
+    realism = {}
+    for key in [
+        "actuator_gear_pos", "actuator_gear_neg",
+        "motor_damping_pos", "motor_damping_neg",
+        "motor_frictionloss_pos", "motor_frictionloss_neg",
+        "motor_deadzone_pos", "motor_deadzone_neg",
+        "action_bias",
+        "viscous_quadratic", "back_emf_gain",
+        "stribeck_friction_boost", "stribeck_vel",
+        "gearbox_backlash",
+    ]:
+        if key in params:
+            realism[key] = round(params[key], 6)
+    if realism:
+        tuned_cfg["hardware_realism"] = realism
+
+    tuned_yaml_path = asset_path / "robot_tuned.yaml"
+    header = (
+        "# Tuned motor config — generated by src.sysid.motor.optimize\n"
+        "# Original: robot.yaml\n\n"
+    )
+    tuned_yaml_path.write_text(
+        header + yaml.dump(tuned_cfg, default_flow_style=False, sort_keys=False)
+    )
+    log.info("tuned_robot_yaml_written", path=str(tuned_yaml_path))
+
+    return tuned_mjcf_path, tuned_yaml_path
+
+
+# ── CLI ──────────────────────────────────────────────────────────────
+
+
+def main() -> None:
+    parser = argparse.ArgumentParser(
+        description="Export tuned motor parameters to MJCF + robot.yaml."
+    )
+    parser.add_argument("--asset-path", type=str, default=_DEFAULT_ASSET)
+    parser.add_argument(
+        "--result", type=str, default=None,
+        help="Path to motor_sysid_result.json (auto-detected if omitted)",
+    )
+    args = parser.parse_args()
+
+    asset_path = Path(args.asset_path).resolve()
+    if args.result:
+        result_path = Path(args.result)
+    else:
+        result_path = asset_path / "motor_sysid_result.json"
+
+    if not result_path.exists():
+        raise FileNotFoundError(f"Result file not found: {result_path}")
+
+    result = json.loads(result_path.read_text())
+    params = result["best_params"]
+
+    export_tuned_files(asset_path=args.asset_path, params=params)
+
+
+if __name__ == "__main__":
+    main()
--- a/src/sysid/motor/optimize.py
+++ b/src/sysid/motor/optimize.py
@@ -0,0 +1,367 @@
+"""CMA-ES optimiser — fit motor simulation parameters to a real recording.
+
+Motor-only version: minimises trajectory-matching cost between MuJoCo
+rollout and recorded motor angle + velocity.
+
+Usage:
+    python -m src.sysid.motor.optimize \
+        --recording assets/motor/recordings/motor_capture_YYYYMMDD_HHMMSS.npz
+
+    # Quick test:
+    python -m src.sysid.motor.optimize --recording <file>.npz \
+        --max-generations 20 --population-size 10
+"""
+
+from __future__ import annotations
+
+import argparse
+import json
+import time
+from datetime import datetime
+from pathlib import Path
+
+import numpy as np
+import structlog
+
+from src.sysid.motor.preprocess import recompute_velocity
+from src.sysid.motor.rollout import (
+    MOTOR_PARAMS,
+    ParamSpec,
+    bounds_arrays,
+    defaults_vector,
+    params_to_dict,
+    rollout,
+    windowed_rollout,
+)
+
+log = structlog.get_logger()
+
+_DEFAULT_ASSET = "assets/motor"
+
+
+# ── Cost function ────────────────────────────────────────────────────
+
+
+def _compute_trajectory_cost(
+    sim: dict[str, np.ndarray],
+    recording: dict[str, np.ndarray],
+    pos_weight: float = 1.0,
+    vel_weight: float = 0.5,
+    acc_weight: float = 0.0,
+    dt: float = 0.02,
+) -> float:
+    """Weighted MSE between simulated and real motor trajectories.
+
+    Motor-only: angle, velocity, and optionally acceleration.
+    Acceleration is computed as finite-difference of velocity.
+    """
+    angle_err = sim["motor_angle"] - recording["motor_angle"]
+    vel_err = sim["motor_vel"] - recording["motor_vel"]
+
+    # Reject NaN results (unstable simulation).
+    if np.any(~np.isfinite(angle_err)) or np.any(~np.isfinite(vel_err)):
+        return 1e6
+
+    cost = float(
+        pos_weight * np.mean(angle_err**2)
+        + vel_weight * np.mean(vel_err**2)
+    )
+
+    # Optional acceleration term — penalises wrong dynamics (d(vel)/dt).
+    if acc_weight > 0 and len(vel_err) > 2:
+        sim_acc = np.diff(sim["motor_vel"]) / dt
+        real_acc = np.diff(recording["motor_vel"]) / dt
+        acc_err = sim_acc - real_acc
+        if np.any(~np.isfinite(acc_err)):
+            return 1e6
+        # Normalise by typical acceleration scale (~50 rad/s²) to keep
+        # the weight intuitive relative to vel/pos terms.
+        cost += acc_weight * np.mean(acc_err**2) / (50.0**2)
+
+    return cost
+
+
+def cost_function(
+    params_vec: np.ndarray,
+    recording: dict[str, np.ndarray],
+    asset_path: Path,
+    specs: list[ParamSpec],
+    sim_dt: float = 0.002,
+    substeps: int = 10,
+    pos_weight: float = 1.0,
+    vel_weight: float = 0.5,
+    acc_weight: float = 0.0,
+    window_duration: float = 0.5,
+) -> float:
+    """Compute trajectory-matching cost for a candidate parameter vector.
+
+    Uses multiple-shooting (windowed rollout) by default.
+    """
+    params = params_to_dict(params_vec, specs)
+
+    try:
+        if window_duration > 0:
+            sim = windowed_rollout(
+                asset_path=asset_path,
+                params=params,
+                recording=recording,
+                window_duration=window_duration,
+                sim_dt=sim_dt,
+                substeps=substeps,
+            )
+        else:
+            sim = rollout(
+                asset_path=asset_path,
+                params=params,
+                actions=recording["action"],
+                sim_dt=sim_dt,
+                substeps=substeps,
+            )
+    except Exception as exc:
+        log.warning("rollout_failed", error=str(exc))
+        return 1e6
+
+    return _compute_trajectory_cost(
+        sim, recording, pos_weight, vel_weight, acc_weight,
+        dt=np.mean(np.diff(recording["time"])) if len(recording["time"]) > 1 else 0.02,
+    )
+
+
+# ── CMA-ES optimisation loop ────────────────────────────────────────
+
+
+def optimize(
+    asset_path: str | Path = _DEFAULT_ASSET,
+    recording_path: str | Path = "",
+    specs: list[ParamSpec] | None = None,
+    sigma0: float = 0.3,
+    population_size: int = 30,
+    max_generations: int = 300,
+    sim_dt: float = 0.002,
+    substeps: int = 10,
+    pos_weight: float = 1.0,
+    vel_weight: float = 0.5,
+    acc_weight: float = 0.0,
+    window_duration: float = 0.5,
+    seed: int = 42,
+    preprocess_vel: bool = True,
+    sg_window: int = 7,
+    sg_polyorder: int = 3,
+) -> dict:
+    """Run CMA-ES optimisation and return results dict."""
+    from cmaes import CMA
+
+    asset_path = Path(asset_path).resolve()
+    recording_path = Path(recording_path).resolve()
+
+    if specs is None:
+        specs = MOTOR_PARAMS
+
+    # Load recording.
+    recording = dict(np.load(recording_path))
+    n_samples = len(recording["time"])
+    duration = recording["time"][-1] - recording["time"][0]
+    n_windows = max(1, int(duration / window_duration)) if window_duration > 0 else 1
+    log.info(
+        "recording_loaded",
+        path=str(recording_path),
+        samples=n_samples,
+        duration=f"{duration:.1f}s",
+        n_windows=n_windows,
+    )
+
+    # Preprocess velocity: replace noisy firmware finite-difference with
+    # smooth Savitzky-Golay derivative of the angle signal.
+    if preprocess_vel:
+        recording = recompute_velocity(
+            recording,
+            window_length=sg_window,
+            polyorder=sg_polyorder,
+        )
+
+    # Normalise to [0, 1] for CMA-ES.
+    lo, hi = bounds_arrays(specs)
+    x0 = defaults_vector(specs)
+    span = hi - lo
+    span[span == 0] = 1.0
+
+    def to_normed(x: np.ndarray) -> np.ndarray:
+        return (x - lo) / span
+
+    def from_normed(x_n: np.ndarray) -> np.ndarray:
+        return x_n * span + lo
+
+    x0_normed = to_normed(x0)
+    bounds_normed = np.column_stack(
+        [np.zeros(len(specs)), np.ones(len(specs))]
+    )
+
+    optimizer = CMA(
+        mean=x0_normed,
+        sigma=sigma0,
+        bounds=bounds_normed,
+        population_size=population_size,
+        seed=seed,
+    )
+
+    best_cost = float("inf")
+    best_params_vec = x0.copy()
+    history: list[tuple[int, float]] = []
+
+    log.info(
+        "cmaes_starting",
+        n_params=len(specs),
+        population=population_size,
+        max_gens=max_generations,
+        sigma0=sigma0,
+    )
+
+    t0 = time.monotonic()
+
+    for gen in range(max_generations):
+        solutions = []
+        for _ in range(optimizer.population_size):
+            x_normed = optimizer.ask()
+            x_natural = from_normed(x_normed)
+            x_natural = np.clip(x_natural, lo, hi)
+
+            c = cost_function(
+                x_natural,
+                recording,
+                asset_path,
+                specs,
+                sim_dt=sim_dt,
+                substeps=substeps,
+                pos_weight=pos_weight,
+                vel_weight=vel_weight,
+                acc_weight=acc_weight,
+                window_duration=window_duration,
+            )
+            solutions.append((x_normed, c))
+
+            if c < best_cost:
+                best_cost = c
+                best_params_vec = x_natural.copy()
+
+        optimizer.tell(solutions)
+        history.append((gen, best_cost))
+
+        elapsed = time.monotonic() - t0
+        if gen % 5 == 0 or gen == max_generations - 1:
+            log.info(
+                "cmaes_generation",
+                gen=gen,
+                best_cost=f"{best_cost:.6f}",
+                elapsed=f"{elapsed:.1f}s",
+                gen_best=f"{min(c for _, c in solutions):.6f}",
+            )
+
+    total_time = time.monotonic() - t0
+    best_params = params_to_dict(best_params_vec, specs)
+
+    log.info(
+        "cmaes_finished",
+        best_cost=f"{best_cost:.6f}",
+        total_time=f"{total_time:.1f}s",
+        evaluations=max_generations * population_size,
+    )
+
+    # Log parameter comparison.
+    defaults = params_to_dict(defaults_vector(specs), specs)
+    for name in best_params:
+        d = defaults[name]
+        b = best_params[name]
+        change_pct = ((b - d) / abs(d) * 100) if abs(d) > 1e-12 else 0.0
+        log.info(
+            "param_result",
+            name=name,
+            default=f"{d:.6g}",
+            tuned=f"{b:.6g}",
+            change=f"{change_pct:+.1f}%",
+        )
+
+    return {
+        "best_params": best_params,
+        "best_cost": best_cost,
+        "history": history,
+        "recording": str(recording_path),
+        "param_names": [s.name for s in specs],
+        "defaults": {s.name: s.default for s in specs},
+        "timestamp": datetime.now().isoformat(),
+    }
+
+
+# ── CLI ──────────────────────────────────────────────────────────────
+
+
+def main() -> None:
+    parser = argparse.ArgumentParser(
+        description="Fit motor simulation parameters to a real recording (CMA-ES)."
+    )
+    parser.add_argument(
+        "--asset-path", type=str, default=_DEFAULT_ASSET,
+        help="Path to motor asset directory",
+    )
+    parser.add_argument(
+        "--recording", type=str, required=True,
+        help="Path to .npz recording file",
+    )
+    parser.add_argument("--sigma0", type=float, default=0.3)
+    parser.add_argument("--population-size", type=int, default=30)
+    parser.add_argument("--max-generations", type=int, default=300)
+    parser.add_argument("--sim-dt", type=float, default=0.002)
+    parser.add_argument("--substeps", type=int, default=10)
+    parser.add_argument("--pos-weight", type=float, default=1.0)
+    parser.add_argument("--vel-weight", type=float, default=0.5)
+    parser.add_argument("--acc-weight", type=float, default=0.0,
+        help="Weight for acceleration matching (0=off, try 0.1-0.5)")
+    parser.add_argument("--window-duration", type=float, default=0.5)
+    parser.add_argument("--seed", type=int, default=42)
+    parser.add_argument(
+        "--no-preprocess-vel", action="store_true",
+        help="Disable Savitzky-Golay velocity preprocessing",
+    )
+    parser.add_argument("--sg-window", type=int, default=7,
+        help="Savitzky-Golay window length (odd, default 7 = 140ms)")
+    parser.add_argument("--sg-polyorder", type=int, default=3,
+        help="Savitzky-Golay polynomial order (default 3 = cubic)")
+    args = parser.parse_args()
+
+    result = optimize(
+        asset_path=args.asset_path,
+        recording_path=args.recording,
+        sigma0=args.sigma0,
+        population_size=args.population_size,
+        max_generations=args.max_generations,
+        sim_dt=args.sim_dt,
+        substeps=args.substeps,
+        pos_weight=args.pos_weight,
+        vel_weight=args.vel_weight,
+        acc_weight=args.acc_weight,
+        window_duration=args.window_duration,
+        seed=args.seed,
+        preprocess_vel=not args.no_preprocess_vel,
+        sg_window=args.sg_window,
+        sg_polyorder=args.sg_polyorder,
+    )
+
+    # Save results JSON.
+    asset_path = Path(args.asset_path).resolve()
+    result_path = asset_path / "motor_sysid_result.json"
+    result_json = {k: v for k, v in result.items() if k != "history"}
+    result_json["history_summary"] = {
+        "first_cost": result["history"][0][1] if result["history"] else None,
+        "final_cost": result["history"][-1][1] if result["history"] else None,
+        "generations": len(result["history"]),
+    }
+    result_path.write_text(json.dumps(result_json, indent=2, default=str))
+    log.info("results_saved", path=str(result_path))
+
+    # Export tuned files.
+    from src.sysid.motor.export import export_tuned_files
+
+    export_tuned_files(asset_path=args.asset_path, params=result["best_params"])
+
+
+if __name__ == "__main__":
+    main()
--- a/src/sysid/motor/preprocess.py
+++ b/src/sysid/motor/preprocess.py
@@ -0,0 +1,114 @@
+"""Recording preprocessing — clean velocity estimation from angle data.
+
+The ESP32 firmware computes velocity as a raw finite-difference of encoder
+counts at 50 Hz.  With a 1320 CPR encoder that gives ~0.24 rad/s of
+quantisation noise per count.  This module replaces the noisy firmware
+velocity with a smooth differentiation of the (much cleaner) angle signal.
+
+Method: Savitzky-Golay filter applied to the angle signal, then
+differentiated analytically.  Zero phase lag, preserves transients well.
+
+This is standard practice in robotics sysid — see e.g. MATLAB System ID
+Toolbox, Drake's trajectory processing, or ETH's ANYmal sysid pipeline.
+"""
+
+from __future__ import annotations
+
+import numpy as np
+from scipy.signal import savgol_filter
+
+import structlog
+
+log = structlog.get_logger()
+
+
+def recompute_velocity(
+    recording: dict[str, np.ndarray],
+    window_length: int = 7,
+    polyorder: int = 3,
+    deriv: int = 1,
+) -> dict[str, np.ndarray]:
+    """Recompute motor_vel from motor_angle using Savitzky-Golay differentiation.
+
+    Parameters
+    ----------
+    recording : dict with at least 'time', 'motor_angle', 'motor_vel' keys.
+    window_length : SG filter window (must be odd, > polyorder).
+        7 samples at 50 Hz = 140ms window — good balance of smoothness
+        and responsiveness.  Captures dynamics up to ~7 Hz.
+    polyorder : Polynomial order for the SG filter (3 = cubic).
+    deriv : Derivative order (1 = first derivative = velocity).
+
+    Returns
+    -------
+    New recording dict with 'motor_vel' replaced and 'motor_vel_raw' added.
+    """
+    rec = dict(recording)  # shallow copy
+
+    times = rec["time"]
+    angles = rec["motor_angle"]
+    dt = np.mean(np.diff(times))
+
+    # Keep original for diagnostics.
+    rec["motor_vel_raw"] = rec["motor_vel"].copy()
+
+    # Savitzky-Golay derivative: fits a polynomial to each window,
+    # then takes the analytical derivative → smooth, zero phase lag.
+    vel_sg = savgol_filter(
+        angles,
+        window_length=window_length,
+        polyorder=polyorder,
+        deriv=deriv,
+        delta=dt,
+    )
+
+    # Compute stats for logging.
+    raw_vel = rec["motor_vel_raw"]
+    noise_estimate = np.std(raw_vel - vel_sg)
+    max_diff = np.max(np.abs(raw_vel - vel_sg))
+
+    log.info(
+        "velocity_recomputed",
+        method="savgol",
+        window=window_length,
+        polyorder=polyorder,
+        dt=f"{dt*1000:.1f}ms",
+        noise_std=f"{noise_estimate:.3f} rad/s",
+        max_diff=f"{max_diff:.3f} rad/s",
+        raw_rms=f"{np.sqrt(np.mean(raw_vel**2)):.3f}",
+        sg_rms=f"{np.sqrt(np.mean(vel_sg**2)):.3f}",
+    )
+
+    rec["motor_vel"] = vel_sg
+    return rec
+
+
+def smooth_velocity(
+    recording: dict[str, np.ndarray],
+    cutoff_hz: float = 10.0,
+) -> dict[str, np.ndarray]:
+    """Alternative: apply zero-phase Butterworth low-pass to motor_vel.
+
+    Simpler than SG derivative but introduces slight edge effects.
+    """
+    from scipy.signal import butter, filtfilt
+
+    rec = dict(recording)
+    rec["motor_vel_raw"] = rec["motor_vel"].copy()
+
+    dt = np.mean(np.diff(rec["time"]))
+    fs = 1.0 / dt
+    nyq = fs / 2.0
+    norm_cutoff = min(cutoff_hz / nyq, 0.99)
+
+    b, a = butter(2, norm_cutoff, btype="low")
+    rec["motor_vel"] = filtfilt(b, a, rec["motor_vel"])
+
+    log.info(
+        "velocity_smoothed",
+        method="butterworth",
+        cutoff_hz=cutoff_hz,
+        fs=fs,
+    )
+
+    return rec
--- a/src/sysid/motor/rollout.py
+++ b/src/sysid/motor/rollout.py
@@ -0,0 +1,460 @@
+"""Deterministic simulation replay — roll out recorded actions in MuJoCo.
+
+Motor-only version: single hinge joint, no pendulum.
+Given a parameter vector and recorded actions, builds a MuJoCo model
+with overridden dynamics, replays the actions, and returns the simulated
+motor angle + velocity for comparison with the real recording.
+"""
+
+from __future__ import annotations
+
+import dataclasses
+import xml.etree.ElementTree as ET
+from pathlib import Path
+
+import mujoco
+import numpy as np
+import yaml
+
+
+# ── Tunable parameter specification ──────────────────────────────────
+
+
+@dataclasses.dataclass
+class ParamSpec:
+    """Specification for a single tunable parameter."""
+
+    name: str
+    default: float
+    lower: float
+    upper: float
+    log_scale: bool = False
+
+
+# Motor-only parameters to identify.
+# These capture the full transfer function: PWM → shaft angle/velocity.
+#
+# Asymmetric parameters (pos/neg suffix) capture real-world differences
+# between CW and CCW rotation caused by gear mesh, brush contact,
+# and H-bridge asymmetry.
+MOTOR_PARAMS: list[ParamSpec] = [
+    # ── Actuator ─────────────────────────────────────────────────
+    # gear_pos/neg: effective torque per unit ctrl, split by direction.
+    # Real motors + L298N often have different drive strength per direction.
+    ParamSpec("actuator_gear_pos", 0.064, 0.005, 0.5, log_scale=True),
+    ParamSpec("actuator_gear_neg", 0.064, 0.005, 0.5, log_scale=True),
+    # filter_tau: first-order electrical/driver time constant (s).
+    # Lower bound 1ms — L298N PWM switching is very fast.
+    ParamSpec("actuator_filter_tau", 0.03, 0.001, 0.20),
+    # ── Joint dynamics ───────────────────────────────────────────
+    # damping_pos/neg: viscous friction (back-EMF), split by direction.
+    ParamSpec("motor_damping_pos", 0.003, 1e-5, 0.1, log_scale=True),
+    ParamSpec("motor_damping_neg", 0.003, 1e-5, 0.1, log_scale=True),
+    # armature: reflected rotor inertia (kg·m²).
+    ParamSpec("motor_armature", 0.0001, 1e-6, 0.01, log_scale=True),
+    # frictionloss_pos/neg: Coulomb friction, split by velocity direction.
+    ParamSpec("motor_frictionloss_pos", 0.03, 0.001, 0.2, log_scale=True),
+    ParamSpec("motor_frictionloss_neg", 0.03, 0.001, 0.2, log_scale=True),
+    # ── Nonlinear dynamics ───────────────────────────────────────
+    # viscous_quadratic: velocity-squared drag term (N·m·s²/rad²).
+    # Captures nonlinear friction that increases at high speed (air drag,
+    # grease viscosity, etc.).  Opposes motion.
+    ParamSpec("viscous_quadratic", 0.0, 0.0, 0.005),
+    # back_emf_gain: torque reduction proportional to |vel × ctrl|.
+    # Models the back-EMF effect: at high speed the motor produces less
+    # torque because the voltage drop across the armature is smaller.
+    ParamSpec("back_emf_gain", 0.0, 0.0, 0.05),
+    # stribeck_vel: characteristic velocity below which Coulomb friction
+    # is boosted (Stribeck effect).  0 = standard Coulomb only.
+    ParamSpec("stribeck_friction_boost", 0.0, 0.0, 0.15),
+    ParamSpec("stribeck_vel", 2.0, 0.1, 8.0),
+    # ── Rotor load ───────────────────────────────────────────────
+    ParamSpec("rotor_mass", 0.012, 0.002, 0.05, log_scale=True),
+    # ── Hardware realism ─────────────────────────────────────────
+    # deadzone_pos/neg: minimum |action| per direction.
+    ParamSpec("motor_deadzone_pos", 0.08, 0.0, 0.30),
+    ParamSpec("motor_deadzone_neg", 0.08, 0.0, 0.30),
+    # action_bias: constant offset added to ctrl (H-bridge asymmetry).
+    ParamSpec("action_bias", 0.0, -0.10, 0.10),
+    # ── Gearbox backlash ─────────────────────────────────────────
+    # backlash_halfwidth: half the angular deadband (rad) in the gearbox.
+    # When the motor reverses, the shaft doesn't move until the backlash
+    # gap is taken up.  Typical for 30:1 plastic/metal spur gears.
+    ParamSpec("gearbox_backlash", 0.0, 0.0, 0.15),
+]
+
+
+def params_to_dict(
+    values: np.ndarray, specs: list[ParamSpec] | None = None
+) -> dict[str, float]:
+    if specs is None:
+        specs = MOTOR_PARAMS
+    return {s.name: float(values[i]) for i, s in enumerate(specs)}
+
+
+def defaults_vector(specs: list[ParamSpec] | None = None) -> np.ndarray:
+    if specs is None:
+        specs = MOTOR_PARAMS
+    return np.array([s.default for s in specs], dtype=np.float64)
+
+
+def bounds_arrays(
+    specs: list[ParamSpec] | None = None,
+) -> tuple[np.ndarray, np.ndarray]:
+    if specs is None:
+        specs = MOTOR_PARAMS
+    lo = np.array([s.lower for s in specs], dtype=np.float64)
+    hi = np.array([s.upper for s in specs], dtype=np.float64)
+    return lo, hi
+
+
+# ── MuJoCo model building with parameter overrides ──────────────────
+
+
+def _build_model(
+    asset_path: Path,
+    params: dict[str, float],
+) -> mujoco.MjModel:
+    """Build a MuJoCo model from motor.xml with parameter overrides.
+
+    Parses the MJCF, patches actuator/joint/body parameters, reloads.
+    """
+    asset_path = Path(asset_path).resolve()
+    robot_cfg = yaml.safe_load((asset_path / "robot.yaml").read_text())
+    mjcf_path = asset_path / robot_cfg["mjcf"]
+
+    tree = ET.parse(str(mjcf_path))
+    root = tree.getroot()
+
+    # ── Actuator overrides ───────────────────────────────────────
+    # Use average of pos/neg gear for MuJoCo (asymmetry handled in ctrl).
+    gear_pos = params.get("actuator_gear_pos", params.get("actuator_gear", 0.064))
+    gear_neg = params.get("actuator_gear_neg", params.get("actuator_gear", 0.064))
+    gear = (gear_pos + gear_neg) / 2.0
+    filter_tau = params.get("actuator_filter_tau", 0.03)
+
+    for act_el in root.iter("general"):
+        if act_el.get("name") == "motor":
+            act_el.set("gear", str(gear))
+            if filter_tau > 0:
+                act_el.set("dyntype", "filter")
+                act_el.set("dynprm", str(filter_tau))
+            else:
+                act_el.set("dyntype", "none")
+                act_el.set("dynprm", "1")
+
+    # ── Joint overrides ──────────────────────────────────────────
+    # Damping and friction are asymmetric + nonlinear → applied manually.
+    # Set MuJoCo damping & frictionloss to 0; we handle them in qfrc_applied.
+    armature = params.get("motor_armature", 0.0001)
+
+    for jnt in root.iter("joint"):
+        if jnt.get("name") == "motor_joint":
+            jnt.set("damping", "0")
+            jnt.set("armature", str(armature))
+            jnt.set("frictionloss", "0")
+
+    # ── Rotor mass override ──────────────────────────────────────
+    rotor_mass = params.get("rotor_mass", 0.012)
+    for geom in root.iter("geom"):
+        if geom.get("name") == "rotor_disk":
+            geom.set("mass", str(rotor_mass))
+
+    # Write temp file and load.
+    tmp_path = asset_path / "_tmp_motor_sysid.xml"
+    try:
+        tree.write(str(tmp_path), xml_declaration=True, encoding="unicode")
+        model = mujoco.MjModel.from_xml_path(str(tmp_path))
+    finally:
+        tmp_path.unlink(missing_ok=True)
+
+    return model
+
+
+# ── Action + asymmetry transforms ────────────────────────────────────
+
+
+def _transform_action(
+    action: float,
+    params: dict[str, float],
+) -> float:
+    """Apply bias, direction-dependent deadzone, and gear scaling.
+
+    The MuJoCo actuator has the *average* gear ratio.  We rescale the
+    control signal so that ``ctrl × gear_avg ≈ action × gear_dir``.
+    """
+    # Constant bias (H-bridge asymmetry).
+    action = action + params.get("action_bias", 0.0)
+
+    # Direction-dependent deadzone.
+    dz_pos = params.get("motor_deadzone_pos", params.get("motor_deadzone", 0.08))
+    dz_neg = params.get("motor_deadzone_neg", params.get("motor_deadzone", 0.08))
+    if action >= 0 and action < dz_pos:
+        return 0.0
+    if action < 0 and action > -dz_neg:
+        return 0.0
+
+    # Direction-dependent gear scaling.
+    # MuJoCo model uses gear_avg; we rescale ctrl to get the right torque.
+    gear_pos = params.get("actuator_gear_pos", params.get("actuator_gear", 0.064))
+    gear_neg = params.get("actuator_gear_neg", params.get("actuator_gear", 0.064))
+    gear_avg = (gear_pos + gear_neg) / 2.0
+    if gear_avg < 1e-8:
+        return 0.0
+    gear_dir = gear_pos if action >= 0 else gear_neg
+    return action * (gear_dir / gear_avg)
+
+
+def _apply_forces(
+    data: mujoco.MjData,
+    vel: float,
+    ctrl: float,
+    params: dict[str, float],
+    backlash_state: list[float] | None = None,
+) -> None:
+    """Apply all manual forces: asymmetric friction, damping, and nonlinear terms.
+
+    Everything that MuJoCo can't represent with its symmetric joint model
+    is injected here via ``qfrc_applied``.
+
+    Forces applied (all oppose motion or reduce torque):
+      1. Asymmetric Coulomb friction (with Stribeck boost at low speed)
+      2. Asymmetric viscous damping
+      3. Quadratic velocity drag
+      4. Back-EMF torque reduction (proportional to |vel|)
+
+    Backlash:
+      If backlash_state is provided, it is a 1-element list [gap_pos].
+      gap_pos tracks the motor's position within the backlash deadband.
+      When inside the gap, no actuator torque is transmitted to the
+      output shaft — only friction forces act.
+    """
+    torque = 0.0
+
+    # ── Gearbox backlash ──────────────────────────────────────────
+    # Model: the gear teeth have play of 2×halfwidth radians.
+    # We track where the motor is within that gap.  When at the
+    # edge (contact), actuator torque passes through normally.
+    # When inside the gap, no actuator torque is transmitted.
+    backlash_hw = params.get("gearbox_backlash", 0.0)
+    actuator_torque_scale = 1.0  # 1.0 = full contact, 0.0 = in gap
+
+    if backlash_hw > 0 and backlash_state is not None:
+        # gap_pos: how far into the backlash gap we are.
+        # Range: [-backlash_hw, +backlash_hw]
+        # At ±backlash_hw, gears are in contact and torque transmits.
+        gap = backlash_state[0]
+        # Update gap position based on velocity.
+        dt_sub = data.model.opt.timestep
+        gap += vel * dt_sub
+        # Clamp to backlash range.
+        if gap > backlash_hw:
+            gap = backlash_hw
+        elif gap < -backlash_hw:
+            gap = -backlash_hw
+
+        backlash_state[0] = gap
+
+        # If not at contact edge, no torque transmitted.
+        if abs(gap) < backlash_hw - 1e-8:
+            actuator_torque_scale = 0.0
+        else:
+            actuator_torque_scale = 1.0
+
+    # ── 1. Coulomb friction (direction-dependent + Stribeck) ─────
+    fl_pos = params.get("motor_frictionloss_pos", params.get("motor_frictionloss", 0.03))
+    fl_neg = params.get("motor_frictionloss_neg", params.get("motor_frictionloss", 0.03))
+    stribeck_boost = params.get("stribeck_friction_boost", 0.0)
+    stribeck_vel = params.get("stribeck_vel", 2.0)
+
+    if abs(vel) > 1e-6:
+        fl = fl_pos if vel > 0 else fl_neg
+        # Stribeck: boost friction at low speed. Exponential decay.
+        if stribeck_boost > 0 and stribeck_vel > 0:
+            fl = fl * (1.0 + stribeck_boost * np.exp(-abs(vel) / stribeck_vel))
+        # Coulomb: constant magnitude, opposes motion.
+        torque -= np.sign(vel) * fl
+
+    # ── 2. Asymmetric viscous damping ────────────────────────────
+    damp_pos = params.get("motor_damping_pos", params.get("motor_damping", 0.003))
+    damp_neg = params.get("motor_damping_neg", params.get("motor_damping", 0.003))
+    damp = damp_pos if vel > 0 else damp_neg
+    torque -= damp * vel
+
+    # ── 3. Quadratic velocity drag ───────────────────────────────
+    visc_quad = params.get("viscous_quadratic", 0.0)
+    if visc_quad > 0:
+        torque -= visc_quad * vel * abs(vel)
+
+    # ── 4. Back-EMF torque reduction ─────────────────────────────
+    # At high speed, the motor's effective torque is reduced because
+    # back-EMF opposes the supply voltage.  Modelled as a torque that
+    # opposes the control signal proportional to speed.
+    bemf = params.get("back_emf_gain", 0.0)
+    if bemf > 0 and abs(ctrl) > 1e-6:
+        # The reduction should oppose the actuator torque direction.
+        torque -= bemf * vel * np.sign(ctrl) * actuator_torque_scale
+
+    # ── 5. Scale actuator contribution by backlash state ─────────
+    # When in the backlash gap, MuJoCo's actuator force should not
+    # transmit.  We cancel it by applying an opposing force.
+    if actuator_torque_scale < 1.0:
+        # The actuator_force from MuJoCo will be applied by mj_step.
+        # We need to counteract it.  data.qfrc_actuator isn't set yet
+        # at this point (pre-step), so we zero the ctrl instead.
+        # This is handled in the rollout loop by zeroing ctrl.
+        pass
+
+    data.qfrc_applied[0] = max(-10.0, min(10.0, torque))
+    return actuator_torque_scale
+
+
+# ── Simulation rollout ───────────────────────────────────────────────
+
+
+def rollout(
+    asset_path: str | Path,
+    params: dict[str, float],
+    actions: np.ndarray,
+    sim_dt: float = 0.002,
+    substeps: int = 10,
+) -> dict[str, np.ndarray]:
+    """Open-loop replay of recorded actions.
+
+    Parameters
+    ----------
+    asset_path : motor asset directory
+    params : named parameter overrides
+    actions : (N,) normalised actions [-1, 1] from the recording
+    sim_dt : MuJoCo physics timestep
+    substeps : physics substeps per control step (ctrl_dt = sim_dt × substeps)
+
+    Returns
+    -------
+    dict with motor_angle (N,) and motor_vel (N,).
+    """
+    asset_path = Path(asset_path).resolve()
+    model = _build_model(asset_path, params)
+    model.opt.timestep = sim_dt
+    data = mujoco.MjData(model)
+    mujoco.mj_resetData(model, data)
+
+    n = len(actions)
+
+    sim_motor_angle = np.zeros(n, dtype=np.float64)
+    sim_motor_vel = np.zeros(n, dtype=np.float64)
+
+    # Backlash state: [gap_position].  Starts at 0 (centered in gap).
+    backlash_state = [0.0]
+
+    for i in range(n):
+        ctrl = _transform_action(actions[i], params)
+        data.ctrl[0] = ctrl
+
+        for _ in range(substeps):
+            scale = _apply_forces(data, data.qvel[0], ctrl, params, backlash_state)
+            # If in backlash gap, zero ctrl so actuator torque doesn't transmit.
+            if scale < 1.0:
+                data.ctrl[0] = 0.0
+            else:
+                data.ctrl[0] = ctrl
+            mujoco.mj_step(model, data)
+
+        # Bail out on NaN/instability.
+        if not np.isfinite(data.qpos[0]) or abs(data.qvel[0]) > 1e4:
+            sim_motor_angle[i:] = np.nan
+            sim_motor_vel[i:] = np.nan
+            break
+
+        sim_motor_angle[i] = data.qpos[0]
+        sim_motor_vel[i] = data.qvel[0]
+
+    return {
+        "motor_angle": sim_motor_angle,
+        "motor_vel": sim_motor_vel,
+    }
+
+
+def windowed_rollout(
+    asset_path: str | Path,
+    params: dict[str, float],
+    recording: dict[str, np.ndarray],
+    window_duration: float = 0.5,
+    sim_dt: float = 0.002,
+    substeps: int = 10,
+) -> dict[str, np.ndarray]:
+    """Multiple-shooting rollout for motor-only sysid.
+
+    Splits the recording into short windows. Each window is initialised
+    from the real motor state, preventing error accumulation.
+
+    Returns
+    -------
+    dict with motor_angle (N,), motor_vel (N,), n_windows (int).
+    """
+    asset_path = Path(asset_path).resolve()
+    model = _build_model(asset_path, params)
+    model.opt.timestep = sim_dt
+    data = mujoco.MjData(model)
+
+    times = recording["time"]
+    actions = recording["action"]
+    real_angle = recording["motor_angle"]
+    real_vel = recording["motor_vel"]
+    n = len(actions)
+
+    sim_motor_angle = np.zeros(n, dtype=np.float64)
+    sim_motor_vel = np.zeros(n, dtype=np.float64)
+
+    # Compute window boundaries.
+    t0 = times[0]
+    t_end = times[-1]
+    window_starts: list[int] = []
+    current_t = t0
+    while current_t < t_end:
+        idx = int(np.searchsorted(times, current_t))
+        idx = min(idx, n - 1)
+        window_starts.append(idx)
+        current_t += window_duration
+
+    n_windows = len(window_starts)
+
+    for w, w_start in enumerate(window_starts):
+        w_end = window_starts[w + 1] if w + 1 < n_windows else n
+
+        # Init from real state at window start.
+        mujoco.mj_resetData(model, data)
+        data.qpos[0] = real_angle[w_start]
+        data.qvel[0] = real_vel[w_start]
+        data.ctrl[:] = 0.0
+        mujoco.mj_forward(model, data)
+
+        # Backlash state resets each window (assume contact at start).
+        backlash_state = [0.0]
+
+        for i in range(w_start, w_end):
+            ctrl = _transform_action(actions[i], params)
+            data.ctrl[0] = ctrl
+
+            for _ in range(substeps):
+                scale = _apply_forces(data, data.qvel[0], ctrl, params, backlash_state)
+                if scale < 1.0:
+                    data.ctrl[0] = 0.0
+                else:
+                    data.ctrl[0] = ctrl
+                mujoco.mj_step(model, data)
+
+            # Bail out on NaN/instability — fill rest of window with last good.
+            if not np.isfinite(data.qpos[0]) or abs(data.qvel[0]) > 1e4:
+                sim_motor_angle[i:w_end] = sim_motor_angle[max(i-1, w_start)]
+                sim_motor_vel[i:w_end] = 0.0
+                break
+
+            sim_motor_angle[i] = data.qpos[0]
+            sim_motor_vel[i] = data.qvel[0]
+
+    return {
+        "motor_angle": sim_motor_angle,
+        "motor_vel": sim_motor_vel,
+        "n_windows": n_windows,
+    }
--- a/src/sysid/motor/visualize.py
+++ b/src/sysid/motor/visualize.py
@@ -0,0 +1,204 @@
+"""Visualise motor system identification — real vs simulated trajectories.
+
+Usage:
+    python -m src.sysid.motor.visualize \
+        --recording assets/motor/recordings/motor_capture_YYYYMMDD_HHMMSS.npz
+
+    # With tuned result:
+    python -m src.sysid.motor.visualize \
+        --recording <file>.npz \
+        --result assets/motor/motor_sysid_result.json
+"""
+
+from __future__ import annotations
+
+import argparse
+import json
+from pathlib import Path
+
+import numpy as np
+import structlog
+
+log = structlog.get_logger()
+
+_DEFAULT_ASSET = "assets/motor"
+
+
+def visualize(
+    asset_path: str | Path = _DEFAULT_ASSET,
+    recording_path: str | Path = "",
+    result_path: str | Path | None = None,
+    sim_dt: float = 0.002,
+    substeps: int = 10,
+    window_duration: float = 0.5,
+    save_path: str | Path | None = None,
+    show: bool = True,
+) -> None:
+    """Generate 3-panel comparison plot: angle, velocity, action."""
+    import matplotlib.pyplot as plt
+
+    from src.sysid.motor.rollout import (
+        MOTOR_PARAMS,
+        defaults_vector,
+        params_to_dict,
+        rollout,
+        windowed_rollout,
+    )
+
+    asset_path = Path(asset_path).resolve()
+    recording = dict(np.load(recording_path))
+
+    t = recording["time"]
+    actions = recording["action"]
+
+    # ── Simulate with default parameters ─────────────────────────
+    default_params = params_to_dict(defaults_vector(MOTOR_PARAMS), MOTOR_PARAMS)
+    log.info("simulating_default_params", windowed=window_duration > 0)
+
+    if window_duration > 0:
+        sim_default = windowed_rollout(
+            asset_path=asset_path,
+            params=default_params,
+            recording=recording,
+            window_duration=window_duration,
+            sim_dt=sim_dt,
+            substeps=substeps,
+        )
+    else:
+        sim_default = rollout(
+            asset_path=asset_path,
+            params=default_params,
+            actions=actions,
+            sim_dt=sim_dt,
+            substeps=substeps,
+        )
+
+    # ── Simulate with tuned parameters (if available) ────────────
+    sim_tuned = None
+    tuned_cost = None
+
+    if result_path is not None:
+        result_path = Path(result_path)
+    else:
+        # Auto-detect.
+        auto = asset_path / "motor_sysid_result.json"
+        if auto.exists():
+            result_path = auto
+
+    if result_path is not None and result_path.exists():
+        result = json.loads(result_path.read_text())
+        tuned_params = result.get("best_params", {})
+        tuned_cost = result.get("best_cost")
+        log.info("simulating_tuned_params", cost=tuned_cost)
+
+        if window_duration > 0:
+            sim_tuned = windowed_rollout(
+                asset_path=asset_path,
+                params=tuned_params,
+                recording=recording,
+                window_duration=window_duration,
+                sim_dt=sim_dt,
+                substeps=substeps,
+            )
+        else:
+            sim_tuned = rollout(
+                asset_path=asset_path,
+                params=tuned_params,
+                actions=actions,
+                sim_dt=sim_dt,
+                substeps=substeps,
+            )
+
+    # ── Plot ─────────────────────────────────────────────────────
+    fig, axes = plt.subplots(3, 1, figsize=(14, 8), sharex=True)
+
+    # Motor angle.
+    ax = axes[0]
+    ax.plot(t, np.degrees(recording["motor_angle"]), "k-", lw=1.2, alpha=0.8, label="Real")
+    ax.plot(t, np.degrees(sim_default["motor_angle"]), "--", color="#d62728", lw=1.0, alpha=0.7, label="Sim (default)")
+    if sim_tuned is not None:
+        ax.plot(t, np.degrees(sim_tuned["motor_angle"]), "--", color="#2ca02c", lw=1.0, alpha=0.7, label="Sim (tuned)")
+    ax.set_ylabel("Motor Angle (°)")
+    ax.legend(loc="upper right", fontsize=8)
+    ax.grid(True, alpha=0.3)
+
+    # Motor velocity.
+    ax = axes[1]
+    ax.plot(t, recording["motor_vel"], "k-", lw=1.2, alpha=0.8, label="Real")
+    ax.plot(t, sim_default["motor_vel"], "--", color="#d62728", lw=1.0, alpha=0.7, label="Sim (default)")
+    if sim_tuned is not None:
+        ax.plot(t, sim_tuned["motor_vel"], "--", color="#2ca02c", lw=1.0, alpha=0.7, label="Sim (tuned)")
+    ax.set_ylabel("Motor Velocity (rad/s)")
+    ax.legend(loc="upper right", fontsize=8)
+    ax.grid(True, alpha=0.3)
+
+    # Action.
+    ax = axes[2]
+    ax.plot(t, actions, "b-", lw=0.8, alpha=0.6)
+    ax.set_ylabel("Action (norm)")
+    ax.set_xlabel("Time (s)")
+    ax.grid(True, alpha=0.3)
+    ax.set_ylim(-1.1, 1.1)
+
+    # Title.
+    title = "Motor System Identification — Real vs Simulated"
+    if tuned_cost is not None:
+        from src.sysid.motor.optimize import cost_function
+
+        orig_cost = cost_function(
+            defaults_vector(MOTOR_PARAMS),
+            recording,
+            asset_path,
+            MOTOR_PARAMS,
+            sim_dt=sim_dt,
+            substeps=substeps,
+            window_duration=window_duration,
+        )
+        title += f"\nDefault cost: {orig_cost:.4f}  →  Tuned cost: {tuned_cost:.4f}"
+        improvement = (1.0 - tuned_cost / orig_cost) * 100 if orig_cost > 0 else 0
+        title += f"  ({improvement:+.1f}%)"
+
+    fig.suptitle(title, fontsize=12)
+    plt.tight_layout()
+
+    if save_path:
+        fig.savefig(str(save_path), dpi=150, bbox_inches="tight")
+        log.info("figure_saved", path=str(save_path))
+
+    if show:
+        plt.show()
+    else:
+        plt.close(fig)
+
+
+# ── CLI ──────────────────────────────────────────────────────────────
+
+
+def main() -> None:
+    parser = argparse.ArgumentParser(
+        description="Visualise motor system identification results."
+    )
+    parser.add_argument("--asset-path", type=str, default=_DEFAULT_ASSET)
+    parser.add_argument("--recording", type=str, required=True, help=".npz file")
+    parser.add_argument("--result", type=str, default=None, help="sysid result JSON")
+    parser.add_argument("--sim-dt", type=float, default=0.002)
+    parser.add_argument("--substeps", type=int, default=10)
+    parser.add_argument("--window-duration", type=float, default=0.5)
+    parser.add_argument("--save", type=str, default=None, help="Save figure path")
+    parser.add_argument("--no-show", action="store_true")
+    args = parser.parse_args()
+
+    visualize(
+        asset_path=args.asset_path,
+        recording_path=args.recording,
+        result_path=args.result,
+        sim_dt=args.sim_dt,
+        substeps=args.substeps,
+        window_duration=args.window_duration,
+        save_path=args.save,
+        show=not args.no_show,
+    )
+
+
+if __name__ == "__main__":
+    main()
--- a/src/sysid/optimize.py
+++ b/src/sysid/optimize.py
@@ -4,10 +4,15 @@ Minimises the trajectory-matching cost between a MuJoCo rollout and a
 recorded real-robot sequence.  Uses the ``cmaes`` package (pure-Python
 CMA-ES with native box-constraint support).

+Motor parameters are **locked** from the motor-only sysid — only
+pendulum/arm inertial parameters, joint dynamics, and ctrl_limit are
+optimised.  Velocities are optionally preprocessed with Savitzky-Golay
+differentiation for cleaner targets.
+
 Usage:
    python -m src.sysid.optimize \
        --robot-path assets/rotary_cartpole \
-        --recording assets/rotary_cartpole/recordings/capture_20260311_120000.npz
+        --recording assets/rotary_cartpole/recordings/capture_20260314_000435.npz

    # Shorter run for testing:
    python -m src.sysid.optimize \
@@ -28,6 +33,8 @@ import numpy as np
 import structlog

 from src.sysid.rollout import (
+    LOCKED_MOTOR_PARAMS,
+    PARAM_SETS,
    ROTARY_CARTPOLE_PARAMS,
    ParamSpec,
    bounds_arrays,
@@ -40,6 +47,65 @@ from src.sysid.rollout import (
 log = structlog.get_logger()


+# ── Velocity preprocessing ───────────────────────────────────────────
+
+
+def _preprocess_recording(
+    recording: dict[str, np.ndarray],
+    preprocess_vel: bool = True,
+    sg_window: int = 7,
+    sg_polyorder: int = 3,
+) -> dict[str, np.ndarray]:
+    """Optionally recompute velocities using Savitzky-Golay differentiation.
+
+    Applies SG filtering to both motor_vel and pendulum_vel, replacing
+    the noisy firmware finite-difference velocities with smooth
+    analytical derivatives of the (clean) angle signals.
+    """
+    if not preprocess_vel:
+        return recording
+
+    from scipy.signal import savgol_filter
+
+    rec = dict(recording)
+    times = rec["time"]
+    dt = float(np.mean(np.diff(times)))
+
+    # Motor velocity.
+    rec["motor_vel_raw"] = rec["motor_vel"].copy()
+    rec["motor_vel"] = savgol_filter(
+        rec["motor_angle"],
+        window_length=sg_window,
+        polyorder=sg_polyorder,
+        deriv=1,
+        delta=dt,
+    )
+
+    # Pendulum velocity.
+    rec["pendulum_vel_raw"] = rec["pendulum_vel"].copy()
+    rec["pendulum_vel"] = savgol_filter(
+        rec["pendulum_angle"],
+        window_length=sg_window,
+        polyorder=sg_polyorder,
+        deriv=1,
+        delta=dt,
+    )
+
+    motor_noise = np.std(rec["motor_vel_raw"] - rec["motor_vel"])
+    pend_noise = np.std(rec["pendulum_vel_raw"] - rec["pendulum_vel"])
+    log.info(
+        "velocity_preprocessed",
+        method="savgol",
+        sg_window=sg_window,
+        sg_polyorder=sg_polyorder,
+        dt_ms=f"{dt*1000:.1f}",
+        motor_noise_std=f"{motor_noise:.3f} rad/s",
+        pend_noise_std=f"{pend_noise:.3f} rad/s",
+    )
+
+    return rec
+
+
 # ── Cost function ────────────────────────────────────────────────────


@@ -63,18 +129,35 @@ def _compute_trajectory_cost(
    recording: dict[str, np.ndarray],
    pos_weight: float = 1.0,
    vel_weight: float = 0.1,
+    pendulum_scale: float = 3.0,
+    vel_outlier_threshold: float = 20.0,
 ) -> float:
-    """Weighted MSE between sim and real trajectories."""
+    """Weighted MSE between sim and real trajectories.
+
+    pendulum_scale multiplies the pendulum terms relative to motor terms.
+
+    Samples where the *real* pendulum velocity exceeds
+    ``vel_outlier_threshold`` (rad/s) are excluded from the velocity
+    terms.  These are encoder-wrap artefacts (pendulum swinging past
+    vertical) that no simulator can reproduce.
+    """
    motor_err = _angle_diff(sim["motor_angle"], recording["motor_angle"])
    pend_err = _angle_diff(sim["pendulum_angle"], recording["pendulum_angle"])
    motor_vel_err = sim["motor_vel"] - recording["motor_vel"]
    pend_vel_err = sim["pendulum_vel"] - recording["pendulum_vel"]

+    # Mask out encoder-wrap velocity spikes so the optimizer doesn't
+    # waste capacity fitting artefacts.
+    valid = np.abs(recording["pendulum_vel"]) < vel_outlier_threshold
+    if valid.sum() < len(valid):
+        motor_vel_err = motor_vel_err[valid]
+        pend_vel_err = pend_vel_err[valid]
+
    return float(
        pos_weight * np.mean(motor_err**2)
-        + pos_weight * np.mean(pend_err**2)
+        + pos_weight * pendulum_scale * np.mean(pend_err**2)
        + vel_weight * np.mean(motor_vel_err**2)
-        + vel_weight * np.mean(pend_vel_err**2)
+        + vel_weight * pendulum_scale * np.mean(pend_vel_err**2)
    )


@@ -87,7 +170,9 @@ def cost_function(
    substeps: int = 10,
    pos_weight: float = 1.0,
    vel_weight: float = 0.1,
+    pendulum_scale: float = 3.0,
    window_duration: float = 0.5,
+    motor_params: dict[str, float] | None = None,
 ) -> float:
    """Compute trajectory-matching cost for a candidate parameter vector.

@@ -113,21 +198,56 @@ def cost_function(
                window_duration=window_duration,
                sim_dt=sim_dt,
                substeps=substeps,
+                motor_params=motor_params,
            )
        else:
            sim = rollout(
                robot_path=robot_path,
                params=params,
                actions=recording["action"],
-                timesteps=recording["time"],
                sim_dt=sim_dt,
                substeps=substeps,
+                motor_params=motor_params,
            )
    except Exception as exc:
        log.warning("rollout_failed", error=str(exc))
        return 1e6

-    return _compute_trajectory_cost(sim, recording, pos_weight, vel_weight)
+    # Check for NaN in sim output.
+    for key in ("motor_angle", "motor_vel", "pendulum_angle", "pendulum_vel"):
+        if np.any(~np.isfinite(sim[key])):
+            return 1e6
+
+    return _compute_trajectory_cost(sim, recording, pos_weight, vel_weight, pendulum_scale)
+
+
+# ── Parallel evaluation helper (module-level for pickling) ───────────
+
+# Shared state set by the parent process before spawning workers.
+_par_recording: dict[str, np.ndarray] = {}
+_par_robot_path: Path = Path(".")
+_par_specs: list[ParamSpec] = []
+_par_kwargs: dict = {}
+
+
+def _init_worker(recording, robot_path, specs, kwargs):
+    """Initialiser run once per worker process."""
+    global _par_recording, _par_robot_path, _par_specs, _par_kwargs
+    _par_recording = recording
+    _par_robot_path = robot_path
+    _par_specs = specs
+    _par_kwargs = kwargs
+
+
+def _eval_candidate(x_natural: np.ndarray) -> float:
+    """Evaluate a single candidate — called in worker processes."""
+    return cost_function(
+        x_natural,
+        _par_recording,
+        _par_robot_path,
+        _par_specs,
+        **_par_kwargs,
+    )


 # ── CMA-ES optimisation loop ────────────────────────────────────────
@@ -144,17 +264,25 @@ def optimize(
    substeps: int = 10,
    pos_weight: float = 1.0,
    vel_weight: float = 0.1,
+    pendulum_scale: float = 3.0,
    window_duration: float = 0.5,
    seed: int = 42,
+    preprocess_vel: bool = True,
+    sg_window: int = 7,
+    sg_polyorder: int = 3,
 ) -> dict:
    """Run CMA-ES optimisation and return results.

+    Motor parameters are locked from the motor-only sysid.
+    Only pendulum/arm parameters are optimised.
+
    Returns a dict with:
        best_params: dict[str, float]
        best_cost: float
        history: list of (generation, best_cost) tuples
        recording: str (path used)
        specs: list of param names
+        motor_params: dict of locked motor params
    """
    from cmaes import CMA

@@ -164,8 +292,24 @@ def optimize(
    if specs is None:
        specs = ROTARY_CARTPOLE_PARAMS

+    motor_params = dict(LOCKED_MOTOR_PARAMS)
+    log.info(
+        "motor_params_locked",
+        n_params=len(motor_params),
+        gear_avg=f"{(motor_params['actuator_gear_pos'] + motor_params['actuator_gear_neg']) / 2:.4f}",
+    )
+
    # Load recording.
    recording = dict(np.load(recording_path))
+
+    # Preprocessing: SG velocity recomputation.
+    recording = _preprocess_recording(
+        recording,
+        preprocess_vel=preprocess_vel,
+        sg_window=sg_window,
+        sg_polyorder=sg_polyorder,
+    )
+
    n_samples = len(recording["time"])
    duration = recording["time"][-1] - recording["time"][0]
    n_windows = max(1, int(duration / window_duration)) if window_duration > 0 else 1
@@ -219,28 +363,54 @@ def optimize(

    t0 = time.monotonic()

+    # ── Parallel evaluation setup ────────────────────────────────
+    # Each candidate is independent — evaluate them in parallel using
+    # a process pool.  Falls back to sequential if n_workers=1.
+    import multiprocessing as mp
+    n_workers = max(1, mp.cpu_count() - 1)  # leave 1 core free
+
+    eval_kwargs = dict(
+        sim_dt=sim_dt,
+        substeps=substeps,
+        pos_weight=pos_weight,
+        vel_weight=vel_weight,
+        pendulum_scale=pendulum_scale,
+        window_duration=window_duration,
+        motor_params=motor_params,
+    )
+
+    log.info("parallel_workers", n_workers=n_workers)
+
+    # Create a persistent pool (avoids per-generation fork overhead).
+    pool = None
+    if n_workers > 1:
+        pool = mp.Pool(
+            n_workers,
+            initializer=_init_worker,
+            initargs=(recording, robot_path, specs, eval_kwargs),
+        )
+
    for gen in range(max_generations):
-        solutions = []
+        # Ask all candidates first.
+        candidates_normed = []
+        candidates_natural = []
        for _ in range(optimizer.population_size):
            x_normed = optimizer.ask()
            x_natural = from_normed(x_normed)
-
-            # Clip to bounds (CMA-ES can slightly exceed with sampling noise).
            x_natural = np.clip(x_natural, lo, hi)
+            candidates_normed.append(x_normed)
+            candidates_natural.append(x_natural)

-            c = cost_function(
-                x_natural,
-                recording,
-                robot_path,
-                specs,
-                sim_dt=sim_dt,
-                substeps=substeps,
-                pos_weight=pos_weight,
-                vel_weight=vel_weight,
-                window_duration=window_duration,
-            )
-            solutions.append((x_normed, c))
+        # Evaluate in parallel.
+        if pool is not None:
+            costs = pool.map(_eval_candidate, candidates_natural)
+        else:
+            costs = [cost_function(
+                x, recording, robot_path, specs, **eval_kwargs
+            ) for x in candidates_natural]

+        solutions = list(zip(candidates_normed, costs))
+        for x_natural, c in zip(candidates_natural, costs):
            if c < best_cost:
                best_cost = c
                best_params_vec = x_natural.copy()
@@ -259,6 +429,12 @@ def optimize(
            )

    total_time = time.monotonic() - t0
+
+    # Clean up process pool.
+    if pool is not None:
+        pool.close()
+        pool.join()
+
    best_params = params_to_dict(best_params_vec, specs)

    log.info(
@@ -289,6 +465,8 @@ def optimize(
        "recording": str(recording_path),
        "param_names": [s.name for s in specs],
        "defaults": {s.name: s.default for s in specs},
+        "motor_params": motor_params,
+        "preprocess_vel": preprocess_vel,
        "timestamp": datetime.now().isoformat(),
    }

@@ -319,6 +497,8 @@ def main() -> None:
    parser.add_argument("--substeps", type=int, default=10)
    parser.add_argument("--pos-weight", type=float, default=1.0)
    parser.add_argument("--vel-weight", type=float, default=0.1)
+    parser.add_argument("--pendulum-scale", type=float, default=3.0,
+                        help="Multiplier for pendulum terms relative to motor (default 3.0)")
    parser.add_argument(
        "--window-duration",
        type=float,
@@ -331,11 +511,30 @@ def main() -> None:
        action="store_true",
        help="Skip exporting tuned files (results JSON only)",
    )
+    parser.add_argument(
+        "--no-preprocess-vel",
+        action="store_true",
+        help="Skip Savitzky-Golay velocity preprocessing",
+    )
+    parser.add_argument("--sg-window", type=int, default=7,
+                        help="Savitzky-Golay window length (odd, default 7)")
+    parser.add_argument("--sg-polyorder", type=int, default=3,
+                        help="Savitzky-Golay polynomial order (default 3)")
+    parser.add_argument(
+        "--param-set",
+        type=str,
+        default="full",
+        choices=list(PARAM_SETS.keys()),
+        help="Parameter set to optimize: 'reduced' (6 params, fast) or 'full' (15 params)",
+    )
    args = parser.parse_args()

+    specs = PARAM_SETS[args.param_set]
+
    result = optimize(
        robot_path=args.robot_path,
        recording_path=args.recording,
+        specs=specs,
        sigma0=args.sigma0,
        population_size=args.population_size,
        max_generations=args.max_generations,
@@ -343,8 +542,12 @@ def main() -> None:
        substeps=args.substeps,
        pos_weight=args.pos_weight,
        vel_weight=args.vel_weight,
+        pendulum_scale=args.pendulum_scale,
        window_duration=args.window_duration,
        seed=args.seed,
+        preprocess_vel=not args.no_preprocess_vel,
+        sg_window=args.sg_window,
+        sg_polyorder=args.sg_polyorder,
    )

    # Save results JSON.
@@ -369,6 +572,7 @@ def main() -> None:
        export_tuned_files(
            robot_path=args.robot_path,
            params=result["best_params"],
+            motor_params=result.get("motor_params"),
        )


--- a/src/sysid/rollout.py
+++ b/src/sysid/rollout.py
@@ -6,22 +6,51 @@ the simulated trajectory for comparison with the real recording.

 This module is the inner loop of the CMA-ES optimizer: it is called once
 per candidate parameter vector per generation.
+
+Motor parameters are **locked** from the motor-only sysid result.
+The optimizer only fits
+pendulum/arm inertial parameters, pendulum joint dynamics, and
+``ctrl_limit``.  The asymmetric motor model (deadzone, gear compensation,
+Coulomb friction, viscous damping, quadratic drag, back-EMF) is applied
+via ``ActuatorConfig.transform_ctrl()`` and ``compute_motor_force()``.
 """

 from __future__ import annotations

-import copy
 import dataclasses
-import math
+import os
 import tempfile
 import xml.etree.ElementTree as ET
 from pathlib import Path
-from typing import Any

 import mujoco
 import numpy as np
 import yaml

+from src.core.robot import ActuatorConfig, JointConfig, RobotConfig
+from src.runners.mujoco import ActuatorLimits, load_mujoco_model
+from src.sysid._urdf import patch_link_inertials
+
+
+# ── Locked motor parameters (from motor-only sysid) ─────────────────
+# These are FIXED and not optimised.  They come from the 12-param model
+# in robot.yaml (from motor-only sysid, cost 0.862).
+
+LOCKED_MOTOR_PARAMS: dict[str, float] = {
+    "actuator_gear_pos": 0.424182,
+    "actuator_gear_neg": 0.425031,
+    "actuator_filter_tau": 0.00503506,
+    "motor_damping_pos": 0.00202682,
+    "motor_damping_neg": 0.0146651,
+    "motor_armature": 0.00277342,
+    "motor_frictionloss_pos": 0.0573282,
+    "motor_frictionloss_neg": 0.0533549,
+    "viscous_quadratic": 0.000285329,
+    "back_emf_gain": 0.00675809,
+    "motor_deadzone_pos": 0.141291,
+    "motor_deadzone_neg": 0.0780148,
+}
+

 # ── Tunable parameter specification ──────────────────────────────────

@@ -37,33 +66,58 @@ class ParamSpec:
    log_scale: bool = False  # optimise in log-space (masses, inertias)


-# Default parameter specs for the rotary cartpole.
+# Pendulum sysid parameters — motor params are LOCKED (not here).
 # Order matters: the optimizer maps a flat vector to these specs.
+# Defaults are from the URDF exported by Fusion 360.
 ROTARY_CARTPOLE_PARAMS: list[ParamSpec] = [
    # ── Arm link (URDF) ──────────────────────────────────────────
-    ParamSpec("arm_mass", 0.010, 0.003, 0.05, log_scale=True),
-    ParamSpec("arm_com_x", 0.00005, -0.02, 0.02),
-    ParamSpec("arm_com_y", 0.0065, -0.01, 0.02),
-    ParamSpec("arm_com_z", 0.00563, -0.01, 0.02),
+    ParamSpec("arm_mass", 0.02110, 0.005, 0.08, log_scale=True),
+    ParamSpec("arm_com_x", -0.00710, -0.03, 0.03),
+    ParamSpec("arm_com_y", 0.00085, -0.02, 0.02),
+    ParamSpec("arm_com_z", 0.00795, -0.02, 0.02),
    # ── Pendulum link (URDF) ─────────────────────────────────────
-    ParamSpec("pendulum_mass", 0.015, 0.005, 0.05, log_scale=True),
-    ParamSpec("pendulum_com_x", 0.1583, 0.05, 0.25),
-    ParamSpec("pendulum_com_y", -0.0983, -0.20, 0.0),
-    ParamSpec("pendulum_com_z", 0.0, -0.05, 0.05),
-    ParamSpec("pendulum_ixx", 6.16e-06, 1e-07, 1e-04, log_scale=True),
-    ParamSpec("pendulum_iyy", 6.16e-06, 1e-07, 1e-04, log_scale=True),
-    ParamSpec("pendulum_izz", 1.23e-05, 1e-07, 1e-04, log_scale=True),
-    ParamSpec("pendulum_ixy", 6.10e-06, -1e-04, 1e-04),
-    # ── Actuator / joint dynamics (robot.yaml) ───────────────────
-    ParamSpec("actuator_gear", 0.064, 0.01, 0.2, log_scale=True),
-    ParamSpec("actuator_filter_tau", 0.03, 0.005, 0.15),
-    ParamSpec("motor_damping", 0.003, 1e-4, 0.05, log_scale=True),
-    ParamSpec("pendulum_damping", 0.0001, 1e-5, 0.01, log_scale=True),
-    ParamSpec("motor_armature", 0.0001, 1e-5, 0.01, log_scale=True),
-    ParamSpec("motor_frictionloss", 0.03, 0.001, 0.1, log_scale=True),
+    ParamSpec("pendulum_mass", 0.03937, 0.010, 0.10, log_scale=True),
+    ParamSpec("pendulum_com_x", 0.06025, 0.01, 0.15),
+    ParamSpec("pendulum_com_y", -0.07602, -0.20, 0.0),
+    ParamSpec("pendulum_com_z", -0.00346, -0.05, 0.05),
+    ParamSpec("pendulum_ixx", 6.20e-05, 1e-07, 1e-03, log_scale=True),
+    ParamSpec("pendulum_iyy", 3.70e-05, 1e-07, 1e-03, log_scale=True),
+    ParamSpec("pendulum_izz", 7.83e-05, 1e-07, 1e-03, log_scale=True),
+    ParamSpec("pendulum_ixy", -6.93e-06, -1e-03, 1e-03),
+    # ── Pendulum joint dynamics ──────────────────────────────────
+    ParamSpec("pendulum_damping", 0.0001, 1e-6, 0.05, log_scale=True),
+    ParamSpec("pendulum_frictionloss", 0.0001, 1e-6, 0.05, log_scale=True),
+    # ── Hardware realism (control pipeline) ────────────────────
+    ParamSpec("ctrl_limit", 0.588, 0.45, 0.70),  # MAX_MOTOR_SPEED / 255
 ]


+# Extended set: full params + motor armature (compensates for the
+# motor-only sysid having captured arm/pendulum loading in armature).
+EXTENDED_PARAMS: list[ParamSpec] = ROTARY_CARTPOLE_PARAMS + [
+    ParamSpec("motor_armature", 0.00277, 0.0005, 0.02, log_scale=True),
+]
+
+
+# Reduced set: only the 6 most impactful pendulum parameters.
+# Good for a fast first pass — converges in ~50 generations.
+REDUCED_PARAMS: list[ParamSpec] = [
+    ParamSpec("pendulum_mass", 0.03937, 0.010, 0.10, log_scale=True),
+    ParamSpec("pendulum_com_x", 0.06025, 0.01, 0.15),
+    ParamSpec("pendulum_com_y", -0.07602, -0.20, 0.0),
+    ParamSpec("pendulum_ixx", 6.20e-05, 1e-07, 1e-03, log_scale=True),
+    ParamSpec("pendulum_damping", 0.0001, 1e-6, 0.05, log_scale=True),
+    ParamSpec("pendulum_frictionloss", 0.0001, 1e-6, 0.05, log_scale=True),
+]
+
+
+PARAM_SETS: dict[str, list[ParamSpec]] = {
+    "full": ROTARY_CARTPOLE_PARAMS,
+    "extended": EXTENDED_PARAMS,
+    "reduced": REDUCED_PARAMS,
+}
+
+
 def params_to_dict(
    values: np.ndarray, specs: list[ParamSpec] | None = None
 ) -> dict[str, float]:
@@ -97,182 +151,93 @@ def bounds_arrays(
 def _build_model(
    robot_path: Path,
    params: dict[str, float],
-) -> mujoco.MjModel:
-    """Build a MuJoCo model from URDF + robot.yaml with parameter overrides.
+    motor_params: dict[str, float] | None = None,
+) -> tuple[mujoco.MjModel, ActuatorConfig]:
+    """Build a MuJoCo model with sysid overrides.

-    Follows the same two-step approach as ``MuJoCoRunner._load_model()``:
-    1. Parse URDF, inject meshdir, load into MuJoCo
-    2. Export MJCF, inject actuators + joint overrides + param overrides, reload
+    Returns (model, actuator) — use ``actuator.transform_ctrl()`` and
+    ``actuator.compute_motor_force()`` in the rollout loop.
    """
+    if motor_params is None:
+        motor_params = LOCKED_MOTOR_PARAMS
+
    robot_path = Path(robot_path).resolve()
+
+    # ── Patch URDF inertial parameters to a temp file ────────────
    robot_yaml = yaml.safe_load((robot_path / "robot.yaml").read_text())
    urdf_path = robot_path / robot_yaml["urdf"]

-    # ── Step 1: Load URDF ────────────────────────────────────────
    tree = ET.parse(urdf_path)
-    root = tree.getroot()
+    patch_link_inertials(tree.getroot(), params)

-    # Inject meshdir compiler directive.
-    meshdir = None
-    for mesh_el in root.iter("mesh"):
-        fn = mesh_el.get("filename", "")
-        parent = str(Path(fn).parent)
-        if parent and parent != ".":
-            meshdir = parent
-            break
-    if meshdir:
-        mj_ext = ET.SubElement(root, "mujoco")
-        ET.SubElement(
-            mj_ext, "compiler", attrib={"meshdir": meshdir, "balanceinertia": "true"}
-        )
+    fd, tmp_urdf = tempfile.mkstemp(
+        suffix=".urdf", prefix="_sysid_", dir=str(robot_path),
+    )
+    os.close(fd)
+    tmp_urdf_path = Path(tmp_urdf)
+    tree.write(str(tmp_urdf_path), xml_declaration=True, encoding="unicode")

-    # Override URDF inertial parameters BEFORE MuJoCo loading.
-    for link in root.iter("link"):
-        link_name = link.get("name", "")
-        inertial = link.find("inertial")
-        if inertial is None:
-            continue
+    # ── Build RobotConfig with full motor sysid values ───────────
+    gear_pos = motor_params.get("actuator_gear_pos", 0.424182)
+    gear_neg = motor_params.get("actuator_gear_neg", 0.425031)
+    motor_armature = params.get(
+        "motor_armature",
+        motor_params.get("motor_armature", 0.00277342),
+    )
+    pend_damping = params.get("pendulum_damping", 0.0001)
+    pend_frictionloss = params.get("pendulum_frictionloss", 0.0001)

-        if link_name == "arm":
-            _set_mass(inertial, params.get("arm_mass"))
-            _set_com(
-                inertial,
-                params.get("arm_com_x"),
-                params.get("arm_com_y"),
-                params.get("arm_com_z"),
-            )
+    act_cfg = robot_yaml["actuators"][0]
+    ctrl_lo, ctrl_hi = act_cfg.get("ctrl_range", [-1.0, 1.0])

-        elif link_name == "pendulum":
-            _set_mass(inertial, params.get("pendulum_mass"))
-            _set_com(
-                inertial,
-                params.get("pendulum_com_x"),
-                params.get("pendulum_com_y"),
-                params.get("pendulum_com_z"),
-            )
-            _set_inertia(
-                inertial,
-                ixx=params.get("pendulum_ixx"),
-                iyy=params.get("pendulum_iyy"),
-                izz=params.get("pendulum_izz"),
-                ixy=params.get("pendulum_ixy"),
-            )
+    actuator = ActuatorConfig(
+        joint=act_cfg["joint"],
+        type="motor",
+        gear=(gear_pos, gear_neg),
+        ctrl_range=(ctrl_lo, ctrl_hi),
+        deadzone=(
+            motor_params.get("motor_deadzone_pos", 0.141),
+            motor_params.get("motor_deadzone_neg", 0.078),
+        ),
+        damping=(
+            motor_params.get("motor_damping_pos", 0.002),
+            motor_params.get("motor_damping_neg", 0.015),
+        ),
+        frictionloss=(
+            motor_params.get("motor_frictionloss_pos", 0.057),
+            motor_params.get("motor_frictionloss_neg", 0.053),
+        ),
+        filter_tau=motor_params.get("actuator_filter_tau", 0.005),
+        viscous_quadratic=motor_params.get("viscous_quadratic", 0.000285),
+        back_emf_gain=motor_params.get("back_emf_gain", 0.00676),
+    )
+
+    robot = RobotConfig(
+        urdf_path=tmp_urdf_path,
+        actuators=[actuator],
+        joints={
+            "motor_joint": JointConfig(
+                damping=0.0,
+                armature=motor_armature,
+                frictionloss=0.0,
+            ),
+            "pendulum_joint": JointConfig(
+                damping=pend_damping,
+                frictionloss=pend_frictionloss,
+            ),
+        },
+    )

-    # Write temp URDF and load.
-    tmp_urdf = robot_path / "_tmp_sysid_load.urdf"
-    tree.write(str(tmp_urdf), xml_declaration=True, encoding="unicode")
    try:
-        model_raw = mujoco.MjModel.from_xml_path(str(tmp_urdf))
+        model = load_mujoco_model(robot)
    finally:
-        tmp_urdf.unlink(missing_ok=True)
+        tmp_urdf_path.unlink(missing_ok=True)

-    # ── Step 2: Export MJCF, inject actuators + overrides ────────
-    tmp_mjcf = robot_path / "_tmp_sysid_inject.xml"
-    try:
-        mujoco.mj_saveLastXML(str(tmp_mjcf), model_raw)
-        mjcf_root = ET.fromstring(tmp_mjcf.read_text())
-
-        # Actuator.
-        gear = params.get("actuator_gear", robot_yaml["actuators"][0].get("gear", 0.064))
-        filter_tau = params.get(
-            "actuator_filter_tau",
-            robot_yaml["actuators"][0].get("filter_tau", 0.03),
-        )
-        act_cfg = robot_yaml["actuators"][0]
-        ctrl_lo, ctrl_hi = act_cfg.get("ctrl_range", [-1.0, 1.0])
-
-        act_elem = ET.SubElement(mjcf_root, "actuator")
-        attribs: dict[str, str] = {
-            "name": f"{act_cfg['joint']}_motor",
-            "joint": act_cfg["joint"],
-            "gear": str(gear),
-            "ctrlrange": f"{ctrl_lo} {ctrl_hi}",
-        }
-        if filter_tau > 0:
-            attribs["dyntype"] = "filter"
-            attribs["dynprm"] = str(filter_tau)
-            attribs["gaintype"] = "fixed"
-            attribs["biastype"] = "none"
-            ET.SubElement(act_elem, "general", attrib=attribs)
-        else:
-            ET.SubElement(act_elem, "motor", attrib=attribs)
-
-        # Joint overrides.
-        motor_damping = params.get("motor_damping", 0.003)
-        pend_damping = params.get("pendulum_damping", 0.0001)
-        motor_armature = params.get("motor_armature", 0.0001)
-        motor_frictionloss = params.get("motor_frictionloss", 0.03)
-
-        for body in mjcf_root.iter("body"):
-            for jnt in body.findall("joint"):
-                name = jnt.get("name")
-                if name == "motor_joint":
-                    jnt.set("damping", str(motor_damping))
-                    jnt.set("armature", str(motor_armature))
-                    jnt.set("frictionloss", str(motor_frictionloss))
-                elif name == "pendulum_joint":
-                    jnt.set("damping", str(pend_damping))
-
-        # Disable self-collision.
-        for geom in mjcf_root.iter("geom"):
-            geom.set("contype", "0")
-            geom.set("conaffinity", "0")
-
-        modified_xml = ET.tostring(mjcf_root, encoding="unicode")
-        tmp_mjcf.write_text(modified_xml)
-        model = mujoco.MjModel.from_xml_path(str(tmp_mjcf))
-    finally:
-        tmp_mjcf.unlink(missing_ok=True)
-
-    return model
+    return model, actuator


-def _set_mass(inertial: ET.Element, mass: float | None) -> None:
-    if mass is None:
-        return
-    mass_el = inertial.find("mass")
-    if mass_el is not None:
-        mass_el.set("value", str(mass))


-def _set_com(
-    inertial: ET.Element,
-    x: float | None,
-    y: float | None,
-    z: float | None,
-) -> None:
-    origin = inertial.find("origin")
-    if origin is None:
-        return
-    xyz = origin.get("xyz", "0 0 0").split()
-    if x is not None:
-        xyz[0] = str(x)
-    if y is not None:
-        xyz[1] = str(y)
-    if z is not None:
-        xyz[2] = str(z)
-    origin.set("xyz", " ".join(xyz))
-
-
-def _set_inertia(
-    inertial: ET.Element,
-    ixx: float | None = None,
-    iyy: float | None = None,
-    izz: float | None = None,
-    ixy: float | None = None,
-    iyz: float | None = None,
-    ixz: float | None = None,
-) -> None:
-    ine = inertial.find("inertia")
-    if ine is None:
-        return
-    for attr, val in [
-        ("ixx", ixx), ("iyy", iyy), ("izz", izz),
-        ("ixy", ixy), ("iyz", iyz), ("ixz", ixz),
-    ]:
-        if val is not None:
-            ine.set(attr, str(val))
-

 # ── Simulation rollout ───────────────────────────────────────────────

@@ -281,72 +246,58 @@ def rollout(
    robot_path: str | Path,
    params: dict[str, float],
    actions: np.ndarray,
-    timesteps: np.ndarray,
    sim_dt: float = 0.002,
    substeps: int = 10,
+    motor_params: dict[str, float] | None = None,
 ) -> dict[str, np.ndarray]:
    """Replay recorded actions in MuJoCo with overridden parameters.

    Parameters
    ----------
    robot_path : asset directory
-    params : named parameter overrides
+    params : named parameter overrides (pendulum/arm only)
    actions : (N,) normalised actions [-1, 1] from the recording
-    timesteps : (N,) wall-clock times (seconds) from the recording
    sim_dt : MuJoCo physics timestep
    substeps : physics substeps per control step
+    motor_params : locked motor params (default: LOCKED_MOTOR_PARAMS)

    Returns
    -------
    dict with keys: motor_angle, motor_vel, pendulum_angle, pendulum_vel
    Each is an (N,) numpy array of simulated values.
    """
+    if motor_params is None:
+        motor_params = LOCKED_MOTOR_PARAMS
+
    robot_path = Path(robot_path).resolve()
-    model = _build_model(robot_path, params)
+    model, actuator = _build_model(robot_path, params, motor_params)
    model.opt.timestep = sim_dt
    data = mujoco.MjData(model)
-
-    # Start from pendulum hanging down (qpos=0 is down per URDF convention).
    mujoco.mj_resetData(model, data)

-    # Control dt derived from actual recording sample rate.
    n = len(actions)
-    ctrl_dt = sim_dt * substeps
+    ctrl_limit = params.get("ctrl_limit", 0.588)

-    # Pre-allocate output.
    sim_motor_angle = np.zeros(n, dtype=np.float64)
    sim_motor_vel = np.zeros(n, dtype=np.float64)
    sim_pend_angle = np.zeros(n, dtype=np.float64)
    sim_pend_vel = np.zeros(n, dtype=np.float64)

-    # Extract actuator limit info for software limit switch.
-    nu = model.nu
-    if nu > 0:
-        jnt_id = model.actuator_trnid[0, 0]
-        jnt_limited = bool(model.jnt_limited[jnt_id])
-        jnt_lo = model.jnt_range[jnt_id, 0]
-        jnt_hi = model.jnt_range[jnt_id, 1]
-        gear_sign = float(np.sign(model.actuator_gear[0, 0]))
-    else:
-        jnt_limited = False
-        jnt_lo = jnt_hi = gear_sign = 0.0
+    limits = ActuatorLimits(model)

    for i in range(n):
-        data.ctrl[0] = actions[i]
+        action = max(-ctrl_limit, min(ctrl_limit, float(actions[i])))
+        ctrl = actuator.transform_ctrl(action)
+        data.ctrl[0] = ctrl

        for _ in range(substeps):
-            # Software limit switch (mirrors MuJoCoRunner).
-            if jnt_limited and nu > 0:
-                pos = data.qpos[jnt_id]
-                if pos >= jnt_hi and gear_sign * data.ctrl[0] > 0:
-                    data.ctrl[0] = 0.0
-                elif pos <= jnt_lo and gear_sign * data.ctrl[0] < 0:
-                    data.ctrl[0] = 0.0
+            limits.enforce(model, data)
+            data.qfrc_applied[0] = actuator.compute_motor_force(data.qvel[0], ctrl)
            mujoco.mj_step(model, data)

        sim_motor_angle[i] = data.qpos[0]
-        sim_motor_vel[i] = data.qvel[0]
        sim_pend_angle[i] = data.qpos[1]
+        sim_motor_vel[i] = data.qvel[0]
        sim_pend_vel[i] = data.qvel[1]

    return {
@@ -364,6 +315,7 @@ def windowed_rollout(
    window_duration: float = 0.5,
    sim_dt: float = 0.002,
    substeps: int = 10,
+    motor_params: dict[str, float] | None = None,
 ) -> dict[str, np.ndarray | float]:
    """Multiple-shooting rollout — split recording into short windows.

@@ -372,18 +324,19 @@ def windowed_rollout(
      2. Replay the recorded actions within the window.
      3. Record the simulated output.

-    This prevents error accumulation across the full trajectory, giving
-    a much smoother cost landscape for the optimizer.
+    Motor dynamics (asymmetric friction, damping, back-EMF, etc.) are
+    applied via qfrc_applied using the locked motor sysid parameters.

    Parameters
    ----------
    robot_path : asset directory
-    params : named parameter overrides
+    params : named parameter overrides (pendulum/arm only)
    recording : dict with keys time, action, motor_angle, motor_vel,
                pendulum_angle, pendulum_vel (all 1D arrays of length N)
    window_duration : length of each shooting window in seconds
    sim_dt : MuJoCo physics timestep
    substeps : physics substeps per control step
+    motor_params : locked motor params (default: LOCKED_MOTOR_PARAMS)

    Returns
    -------
@@ -392,8 +345,11 @@ def windowed_rollout(
            (stitched from per-window simulations)
        n_windows — number of windows used
    """
+    if motor_params is None:
+        motor_params = LOCKED_MOTOR_PARAMS
+
    robot_path = Path(robot_path).resolve()
-    model = _build_model(robot_path, params)
+    model, actuator = _build_model(robot_path, params, motor_params)
    model.opt.timestep = sim_dt
    data = mujoco.MjData(model)

@@ -405,67 +361,50 @@ def windowed_rollout(
    real_pend_vel = recording["pendulum_vel"]
    n = len(actions)

-    # Pre-allocate output (stitched from all windows).
    sim_motor_angle = np.zeros(n, dtype=np.float64)
    sim_motor_vel = np.zeros(n, dtype=np.float64)
    sim_pend_angle = np.zeros(n, dtype=np.float64)
    sim_pend_vel = np.zeros(n, dtype=np.float64)

-    # Extract actuator limit info.
-    nu = model.nu
-    if nu > 0:
-        jnt_id = model.actuator_trnid[0, 0]
-        jnt_limited = bool(model.jnt_limited[jnt_id])
-        jnt_lo = model.jnt_range[jnt_id, 0]
-        jnt_hi = model.jnt_range[jnt_id, 1]
-        gear_sign = float(np.sign(model.actuator_gear[0, 0]))
-    else:
-        jnt_limited = False
-        jnt_lo = jnt_hi = gear_sign = 0.0
+    limits = ActuatorLimits(model)

-    # Compute window boundaries from recording timestamps.
    t0 = times[0]
    t_end = times[-1]
-    window_starts: list[int] = []  # indices into the recording
+    window_starts: list[int] = []
    current_t = t0
    while current_t < t_end:
-        # Find the index closest to current_t.
        idx = int(np.searchsorted(times, current_t))
        idx = min(idx, n - 1)
        window_starts.append(idx)
        current_t += window_duration

+    ctrl_limit = params.get("ctrl_limit", 0.588)
    n_windows = len(window_starts)

    for w, w_start in enumerate(window_starts):
-        # Window end: next window start, or end of recording.
        w_end = window_starts[w + 1] if w + 1 < n_windows else n

-        # Initialize MuJoCo state from real data at window start.
        mujoco.mj_resetData(model, data)
        data.qpos[0] = real_motor[w_start]
        data.qpos[1] = real_pend[w_start]
        data.qvel[0] = real_motor_vel[w_start]
        data.qvel[1] = real_pend_vel[w_start]
        data.ctrl[:] = 0.0
-        # Forward kinematics to make state consistent.
        mujoco.mj_forward(model, data)

        for i in range(w_start, w_end):
-            data.ctrl[0] = actions[i]
+            action = max(-ctrl_limit, min(ctrl_limit, float(actions[i])))
+            ctrl = actuator.transform_ctrl(action)
+            data.ctrl[0] = ctrl

            for _ in range(substeps):
-                if jnt_limited and nu > 0:
-                    pos = data.qpos[jnt_id]
-                    if pos >= jnt_hi and gear_sign * data.ctrl[0] > 0:
-                        data.ctrl[0] = 0.0
-                    elif pos <= jnt_lo and gear_sign * data.ctrl[0] < 0:
-                        data.ctrl[0] = 0.0
+                limits.enforce(model, data)
+                data.qfrc_applied[0] = actuator.compute_motor_force(data.qvel[0], ctrl)
                mujoco.mj_step(model, data)

            sim_motor_angle[i] = data.qpos[0]
-            sim_motor_vel[i] = data.qvel[0]
            sim_pend_angle[i] = data.qpos[1]
+            sim_motor_vel[i] = data.qvel[0]
            sim_pend_vel[i] = data.qvel[1]

    return {
--- a/src/sysid/visualize.py
+++ b/src/sysid/visualize.py
@@ -20,7 +20,6 @@ from __future__ import annotations

 import argparse
 import json
-import math
 from pathlib import Path

 import numpy as np
@@ -29,6 +28,30 @@ import structlog
 log = structlog.get_logger()


+def _run_sim(
+    robot_path: Path,
+    params: dict[str, float],
+    recording: dict[str, np.ndarray],
+    window_duration: float,
+    sim_dt: float,
+    substeps: int,
+    motor_params: dict[str, float],
+) -> dict[str, np.ndarray]:
+    """Run windowed or open-loop rollout depending on window_duration."""
+    from src.sysid.rollout import rollout, windowed_rollout
+
+    if window_duration > 0:
+        return windowed_rollout(
+            robot_path=robot_path, params=params, recording=recording,
+            window_duration=window_duration, sim_dt=sim_dt,
+            substeps=substeps, motor_params=motor_params,
+        )
+    return rollout(
+        robot_path=robot_path, params=params, actions=recording["action"],
+        substeps=substeps, motor_params=motor_params,
+    )
+
+
 def visualize(
    robot_path: str | Path,
    recording_path: str | Path,
@@ -39,31 +62,27 @@ def visualize(
    save_path: str | Path | None = None,
    show: bool = True,
 ) -> None:
-    """Generate comparison plot.
-
-    Parameters
-    ----------
-    robot_path : robot asset directory
-    recording_path : .npz file from capture
-    result_path : sysid_result.json with best_params (optional)
-    sim_dt / substeps : physics settings for rollout
-    window_duration : shooting window length (s); 0 = open-loop
-    save_path : if provided, save figure to this path (PNG, PDF, …)
-    show : if True, display interactive matplotlib window
-    """
+    """Generate comparison plot."""
    import matplotlib.pyplot as plt

    from src.sysid.rollout import (
+        LOCKED_MOTOR_PARAMS,
        ROTARY_CARTPOLE_PARAMS,
        defaults_vector,
        params_to_dict,
-        rollout,
-        windowed_rollout,
    )

    robot_path = Path(robot_path).resolve()
    recording = dict(np.load(recording_path))

+    motor_params = LOCKED_MOTOR_PARAMS
+
+    sim_kwargs = dict(
+        robot_path=robot_path, recording=recording,
+        window_duration=window_duration, sim_dt=sim_dt,
+        substeps=substeps, motor_params=motor_params,
+    )
+
    t = recording["time"]
    actions = recording["action"]

@@ -72,26 +91,15 @@ def visualize(
        defaults_vector(ROTARY_CARTPOLE_PARAMS), ROTARY_CARTPOLE_PARAMS
    )
    log.info("simulating_default_params", windowed=window_duration > 0)
-    if window_duration > 0:
-        sim_default = windowed_rollout(
-            robot_path=robot_path,
-            params=default_params,
-            recording=recording,
-            window_duration=window_duration,
-            sim_dt=sim_dt,
-            substeps=substeps,
-        )
-    else:
-        sim_default = rollout(
-            robot_path=robot_path,
-            params=default_params,
-            actions=actions,
-            timesteps=t,
-            sim_dt=sim_dt,
-            substeps=substeps,
-        )
+    sim_default = _run_sim(params=default_params, **sim_kwargs)

    # ── Simulate with tuned parameters (if available) ────────────
+    # Resolve result path (explicit or auto-detect).
+    if result_path is None:
+        auto = robot_path / "sysid_result.json"
+        if auto.exists():
+            result_path = auto
+
    sim_tuned = None
    tuned_cost = None
    if result_path is not None:
@@ -101,64 +109,21 @@ def visualize(
            tuned_params = result.get("best_params", {})
            tuned_cost = result.get("best_cost")
            log.info("simulating_tuned_params", cost=tuned_cost)
-            if window_duration > 0:
-                sim_tuned = windowed_rollout(
-                    robot_path=robot_path,
-                    params=tuned_params,
-                    recording=recording,
-                    window_duration=window_duration,
-                    sim_dt=sim_dt,
-                    substeps=substeps,
-                )
-            else:
-                sim_tuned = rollout(
-                    robot_path=robot_path,
-                    params=tuned_params,
-                    actions=actions,
-                    timesteps=t,
-                    sim_dt=sim_dt,
-                    substeps=substeps,
-                )
+            sim_tuned = _run_sim(params=tuned_params, **sim_kwargs)
        else:
            log.warning("result_file_not_found", path=str(result_path))
-    else:
-        # Auto-detect sysid_result.json in robot_path.
-        auto_result = robot_path / "sysid_result.json"
-        if auto_result.exists():
-            result = json.loads(auto_result.read_text())
-            tuned_params = result.get("best_params", {})
-            tuned_cost = result.get("best_cost")
-            log.info("auto_detected_tuned_params", cost=tuned_cost)
-            if window_duration > 0:
-                sim_tuned = windowed_rollout(
-                    robot_path=robot_path,
-                    params=tuned_params,
-                    recording=recording,
-                    window_duration=window_duration,
-                    sim_dt=sim_dt,
-                    substeps=substeps,
-                )
-            else:
-                sim_tuned = rollout(
-                    robot_path=robot_path,
-                    params=tuned_params,
-                    actions=actions,
-                    timesteps=t,
-                    sim_dt=sim_dt,
-                    substeps=substeps,
-                )

    # ── Plot ─────────────────────────────────────────────────────
    fig, axes = plt.subplots(5, 1, figsize=(14, 12), sharex=True)

    channels = [
-        ("motor_angle", "Motor Angle (rad)", True),
-        ("motor_vel", "Motor Velocity (rad/s)", False),
-        ("pendulum_angle", "Pendulum Angle (rad)", True),
-        ("pendulum_vel", "Pendulum Velocity (rad/s)", False),
+        ("motor_angle", "Motor Angle (rad)"),
+        ("motor_vel", "Motor Velocity (rad/s)"),
+        ("pendulum_angle", "Pendulum Angle (rad)"),
+        ("pendulum_vel", "Pendulum Velocity (rad/s)"),
    ]

-    for ax, (key, ylabel, is_angle) in zip(axes[:4], channels):
+    for ax, (key, ylabel) in zip(axes[:4], channels):
        real = recording[key]

        ax.plot(t, real, "k-", linewidth=1.2, alpha=0.8, label="Real")
@@ -207,6 +172,7 @@ def visualize(
            sim_dt=sim_dt,
            substeps=substeps,
            window_duration=window_duration,
+            motor_params=motor_params,
        )
        title += f"\nOriginal cost: {orig_cost:.4f}  →  Tuned cost: {tuned_cost:.4f}"
        improvement = (1.0 - tuned_cost / orig_cost) * 100 if orig_cost > 0 else 0
--- a/src/training/trainer.py
+++ b/src/training/trainer.py
@@ -8,7 +8,6 @@ import structlog
 import torch
 import tqdm
 from clearml import Logger
-from gymnasium import spaces
 from skrl.agents.torch.ppo import PPO, PPO_DEFAULT_CONFIG
 from skrl.memories.torch import RandomMemory
 from skrl.resources.preprocessors.torch import RunningStandardScaler
@@ -129,24 +128,17 @@ class VideoRecordingTrainer(SequentialTrainer):
        frames: list[np.ndarray] = []

        obs, _ = self.env.reset()
-        for _ in range(max_steps):
-            with torch.no_grad():
+        with torch.no_grad():
+            for _ in range(max_steps):
                action = self.agents.act(obs, timestep=timestep, timesteps=self.timesteps)[0]
-            obs, _, terminated, truncated, _ = self.env.step(action)
+                obs, _, terminated, truncated, _ = self.env.step(action)

-            try:
                frame = self.env.render()
-            except Exception:
-                # Headless environment without OpenGL — skip video recording
-                log.warning("video_recording_disabled", reason="render failed (headless?)")
-                self.env.reset()
-                return
+                if frame is not None:
+                    frames.append(frame)

-            if frame is not None:
-                frames.append(frame)
-
-            if (terminated | truncated).any().item():
-                break
+                if (terminated | truncated).any().item():
+                    break

        if frames:
            path = str(self._video_dir / f"step_{timestep}.mp4")
@@ -159,7 +151,6 @@ class VideoRecordingTrainer(SequentialTrainer):
                    local_path=path, iteration=timestep,
                )

-        # Restore training state
        self.env.reset()