diff --git a/.gitignore b/.gitignore
index 25335e9..3fe1d68 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,23 @@
-outputs/
+# IDE / editor
.vscode/
-runs/
\ No newline at end of file
+
+# Training & HPO outputs
+outputs/
+runs/
+smac3_output/
+
+# MuJoCo
+MUJOCO_LOG.TXT
+
+# Python
+__pycache__/
+*.pyc
+*.pyo
+*.egg-info/
+.eggs/
+dist/
+build/
+
+# Temp files
+*.stl
+*.scad
\ No newline at end of file
diff --git a/assets/rotary_cartpole/hardware.yaml b/assets/rotary_cartpole/hardware.yaml
index a968c91..1c5ccf7 100644
--- a/assets/rotary_cartpole/hardware.yaml
+++ b/assets/rotary_cartpole/hardware.yaml
@@ -10,7 +10,7 @@ encoder:
# counts_per_rev = ppr × gear_ratio × 4 (quadrature) = 1320
safety:
- max_motor_angle_deg: 90.0 # hard termination limit (0 = disabled)
+ max_motor_angle_deg: 90.0 # hard termination limit (physical endstop ~70-80°)
soft_limit_deg: 40.0 # progressive penalty ramp starts here
reset:
diff --git a/assets/rotary_cartpole/recordings/capture_20260311_215608.npz b/assets/rotary_cartpole/recordings/capture_20260311_215608.npz
deleted file mode 100644
index 850edae..0000000
Binary files a/assets/rotary_cartpole/recordings/capture_20260311_215608.npz and /dev/null differ
diff --git a/assets/rotary_cartpole/robot.yaml b/assets/rotary_cartpole/robot.yaml
index 3323c23..9336fc5 100644
--- a/assets/rotary_cartpole/robot.yaml
+++ b/assets/rotary_cartpole/robot.yaml
@@ -1,20 +1,21 @@
# Tuned robot config — generated by src.sysid.optimize
-# Original: robot.yaml
-# Run `python -m src.sysid.visualize` to compare real vs sim.
urdf: rotary_cartpole.urdf
actuators:
- joint: motor_joint
type: motor
- gear: 0.176692
- ctrl_range:
- - -1.0
- - 1.0
- damping: 0.009505
- filter_tau: 0.040906
+ gear: [0.424182, 0.425031] # torque constant [pos, neg] (motor sysid)
+ ctrl_range: [-0.592, 0.592] # effective control bound (sysid-tuned)
+ deadzone: [0.141291, 0.078015] # L298N min |ctrl| for torque [pos, neg]
+ damping: [0.002027, 0.014665] # viscous damping [pos, neg]
+ frictionloss: [0.057328, 0.053355] # Coulomb friction [pos, neg]
+ filter_tau: 0.005035 # 1st-order actuator filter (motor sysid)
+ viscous_quadratic: 0.000285 # velocity² drag
+ back_emf_gain: 0.006758 # back-EMF torque reduction
joints:
motor_joint:
- armature: 0.001389
- frictionloss: 0.002179
+ armature: 0.002773 # reflected rotor inertia (motor sysid)
+ frictionloss: 0.0 # handled by motor model via qfrc_applied
pendulum_joint:
- damping: 6.1e-05
+ damping: 0.000119
+ frictionloss: 1.0e-05
diff --git a/assets/rotary_cartpole/rotary_cartpole.urdf b/assets/rotary_cartpole/rotary_cartpole.urdf
index cb7f392..3be4f51 100644
--- a/assets/rotary_cartpole/rotary_cartpole.urdf
+++ b/assets/rotary_cartpole/rotary_cartpole.urdf
@@ -26,8 +26,8 @@
-
-
+
+
@@ -53,9 +53,9 @@
-
-
-
+
+
+
diff --git a/assets/rotary_cartpole/sysid_result.json b/assets/rotary_cartpole/sysid_result.json
deleted file mode 100644
index 7673ddc..0000000
--- a/assets/rotary_cartpole/sysid_result.json
+++ /dev/null
@@ -1,70 +0,0 @@
-{
- "best_params": {
- "arm_mass": 0.012391951282440451,
- "arm_com_x": 0.014950488360794875,
- "arm_com_y": 0.006089886527968399,
- "arm_com_z": 0.004470745447817278,
- "pendulum_mass": 0.035508993892747365,
- "pendulum_com_x": 0.06432778588634695,
- "pendulum_com_y": -0.05999895841669392,
- "pendulum_com_z": 0.0008769789937631209,
- "pendulum_ixx": 3.139576982078822e-05,
- "pendulum_iyy": 9.431951659638859e-06,
- "pendulum_izz": 4.07315891863556e-05,
- "pendulum_ixy": -1.8892943833253423e-06,
- "actuator_gear": 0.17669161390939517,
- "actuator_filter_tau": 0.040905643692382504,
- "motor_damping": 0.009504542103348917,
- "pendulum_damping": 6.128535042404019e-05,
- "motor_armature": 0.0013894759540138252,
- "motor_frictionloss": 0.002179448047511452
- },
- "best_cost": 0.7471380533090072,
- "recording": "/Users/victormylle/Library/CloudStorage/SeaDrive-VictorMylle(cloud.optimize-it.be)/My Libraries/Projects/AI/RL-Framework/assets/rotary_cartpole/recordings/capture_20260311_215608.npz",
- "param_names": [
- "arm_mass",
- "arm_com_x",
- "arm_com_y",
- "arm_com_z",
- "pendulum_mass",
- "pendulum_com_x",
- "pendulum_com_y",
- "pendulum_com_z",
- "pendulum_ixx",
- "pendulum_iyy",
- "pendulum_izz",
- "pendulum_ixy",
- "actuator_gear",
- "actuator_filter_tau",
- "motor_damping",
- "pendulum_damping",
- "motor_armature",
- "motor_frictionloss"
- ],
- "defaults": {
- "arm_mass": 0.01,
- "arm_com_x": 5e-05,
- "arm_com_y": 0.0065,
- "arm_com_z": 0.00563,
- "pendulum_mass": 0.015,
- "pendulum_com_x": 0.1583,
- "pendulum_com_y": -0.0983,
- "pendulum_com_z": 0.0,
- "pendulum_ixx": 6.16e-06,
- "pendulum_iyy": 6.16e-06,
- "pendulum_izz": 1.23e-05,
- "pendulum_ixy": 6.1e-06,
- "actuator_gear": 0.064,
- "actuator_filter_tau": 0.03,
- "motor_damping": 0.003,
- "pendulum_damping": 0.0001,
- "motor_armature": 0.0001,
- "motor_frictionloss": 0.03
- },
- "timestamp": "2026-03-11T22:08:04.782736",
- "history_summary": {
- "first_cost": 3.909456214944022,
- "final_cost": 0.7471380533090072,
- "generations": 200
- }
-}
\ No newline at end of file
diff --git a/configs/env/rotary_cartpole.yaml b/configs/env/rotary_cartpole.yaml
index 65049af..23bb616 100644
--- a/configs/env/rotary_cartpole.yaml
+++ b/configs/env/rotary_cartpole.yaml
@@ -1,10 +1,19 @@
max_steps: 1000
robot_path: assets/rotary_cartpole
reward_upright_scale: 1.0
-speed_penalty_scale: 0.1
+
+# ── Regularisation penalties (prevent fast spinning) ─────────────────
+motor_vel_penalty: 0.01 # penalise high motor angular velocity
+motor_angle_penalty: 0.05 # penalise deviation from centre
+action_penalty: 0.05 # penalise large actions (energy cost)
+
+# ── Software safety limit (env-level, always applied) ────────────────
+motor_angle_limit_deg: 90.0 # terminate episode if motor exceeds ±90°
# ── HPO search ranges ────────────────────────────────────────────────
hpo:
reward_upright_scale: {min: 0.5, max: 5.0}
- speed_penalty_scale: {min: 0.01, max: 1.0}
+ motor_vel_penalty: {min: 0.001, max: 0.1}
+ motor_angle_penalty: {min: 0.01, max: 0.2}
+ action_penalty: {min: 0.01, max: 0.2}
max_steps: {values: [500, 1000, 2000]}
\ No newline at end of file
diff --git a/configs/runner/mjx.yaml b/configs/runner/mjx.yaml
index 87ccd3f..5c35dbf 100644
--- a/configs/runner/mjx.yaml
+++ b/configs/runner/mjx.yaml
@@ -1,4 +1,4 @@
num_envs: 1024 # MJX shines with many parallel envs
device: auto # auto = cuda if available, else cpu
dt: 0.002
-substeps: 20
+substeps: 10
diff --git a/configs/runner/mujoco.yaml b/configs/runner/mujoco.yaml
index 7d23949..f99ff9b 100644
--- a/configs/runner/mujoco.yaml
+++ b/configs/runner/mujoco.yaml
@@ -1,4 +1,4 @@
num_envs: 64
device: auto # auto = cuda if available, else cpu
dt: 0.002
-substeps: 20
+substeps: 10
diff --git a/configs/runner/serial.yaml b/configs/runner/serial.yaml
index c187c11..44f19ff 100644
--- a/configs/runner/serial.yaml
+++ b/configs/runner/serial.yaml
@@ -6,6 +6,5 @@ num_envs: 1
device: cpu
port: /dev/cu.usbserial-0001
baud: 115200
-dt: 0.02 # control loop period (50 Hz)
+dt: 0.02 # control loop period (50 Hz, matches training)
no_data_timeout: 2.0 # seconds of silence before declaring disconnect
-encoder_jump_threshold: 200 # encoder tick jump → reboot detection
diff --git a/configs/sysid.yaml b/configs/sysid.yaml
index 492c3c0..d61489a 100644
--- a/configs/sysid.yaml
+++ b/configs/sysid.yaml
@@ -1,5 +1,5 @@
# System identification defaults.
-# Override via CLI: python -m src.sysid.optimize sysid.max_generations=50
+# Override via CLI: python scripts/sysid.py optimize --max-generations 50
#
# These are NOT Hydra config groups — the sysid scripts use argparse.
# This file serves as documentation and can be loaded by custom wrappers.
@@ -8,18 +8,25 @@ capture:
port: /dev/cu.usbserial-0001
baud: 115200
duration: 20.0 # seconds
- amplitude: 180 # max PWM magnitude (0–255)
+ amplitude: 150 # max PWM magnitude — must match firmware MAX_MOTOR_SPEED
hold_min_ms: 50 # PRBS min hold time
hold_max_ms: 300 # PRBS max hold time
dt: 0.02 # sample period (50 Hz)
optimize:
sigma0: 0.3 # CMA-ES initial step size (in [0,1] normalised space)
- population_size: 20 # candidates per generation
- max_generations: 200 # total generations (~4000 evaluations)
+ population_size: 50 # candidates per generation
+ max_generations: 1000 # total generations (~4000 evaluations)
sim_dt: 0.002 # MuJoCo physics timestep
substeps: 10 # physics substeps per control step (ctrl_dt = 0.02s)
pos_weight: 1.0 # MSE weight for angle errors
vel_weight: 0.1 # MSE weight for velocity errors
window_duration: 0.5 # multiple-shooting window length (s); 0 = open-loop
seed: 42
+
+# Tunable hardware-realism params (added to ROTARY_CARTPOLE_PARAMS):
+# ctrl_limit — effective motor range → exported as ctrl_range in robot.yaml
+# motor_deadzone — L298N minimum |action| for torque → exported as deadzone in robot.yaml
+# Firmware sends raw (unfiltered) sensor data; EMA filtering is
+# handled on the Python side (env transforms) and is NOT part of
+# the sysid parameter search.
diff --git a/configs/training/ppo_real.yaml b/configs/training/ppo_real.yaml
index b165e56..835845e 100644
--- a/configs/training/ppo_real.yaml
+++ b/configs/training/ppo_real.yaml
@@ -8,15 +8,17 @@ defaults:
- _self_
hidden_sizes: [256, 256]
-total_timesteps: 100000
-learning_epochs: 5
-learning_rate: 0.001 # conservative — can't undo real-world damage
-entropy_loss_scale: 0.0001
-log_interval: 1024
+total_timesteps: 2000000
+learning_epochs: 10
+learning_rate: 0.0005 # conservative — can't undo real-world damage
+entropy_loss_scale: 0.01
+rollout_steps: 2048
+mini_batches: 8
+log_interval: 2048
checkpoint_interval: 5000 # frequent saves — can't rewind real hardware
initial_log_std: -0.5 # moderate initial exploration
min_log_std: -4.0
-max_log_std: 0.0 # cap σ at 1.0
+max_log_std: 2.0 # cap σ at 1.0
# Never run real-hardware training remotely
remote: false
diff --git a/configs/training/ppo_single.yaml b/configs/training/ppo_single.yaml
index 7534433..21d521c 100644
--- a/configs/training/ppo_single.yaml
+++ b/configs/training/ppo_single.yaml
@@ -8,7 +8,7 @@ defaults:
- _self_
hidden_sizes: [256, 256]
-total_timesteps: 1000000
+total_timesteps: 2000000
learning_epochs: 10
learning_rate: 0.0003
entropy_loss_scale: 0.01
diff --git a/scripts/eval.py b/scripts/eval.py
new file mode 100644
index 0000000..2f01266
--- /dev/null
+++ b/scripts/eval.py
@@ -0,0 +1,379 @@
+"""Evaluate a trained policy on real hardware (or in simulation).
+
+Loads a checkpoint and runs the policy in a closed loop. For real
+hardware the serial runner talks to the ESP32; for sim it uses the
+MuJoCo runner. A digital-twin MuJoCo viewer mirrors the robot state
+in both modes.
+
+Usage (real hardware):
+ mjpython scripts/eval.py env=rotary_cartpole runner=serial \
+ checkpoint=runs/26-03-12_00-16-43-308420_PPO/checkpoints/agent_1000000.pt
+
+Usage (simulation):
+ mjpython scripts/eval.py env=rotary_cartpole runner=mujoco_single \
+ checkpoint=runs/26-03-12_00-16-43-308420_PPO/checkpoints/agent_1000000.pt
+
+Controls:
+ Space — pause / resume policy (motor stops while paused)
+ R — reset environment
+ Esc — quit
+"""
+
+import math
+import sys
+import time
+from pathlib import Path
+
+# Ensure project root is on sys.path
+_PROJECT_ROOT = str(Path(__file__).resolve().parent.parent)
+if _PROJECT_ROOT not in sys.path:
+ sys.path.insert(0, _PROJECT_ROOT)
+
+import hydra
+import mujoco
+import mujoco.viewer
+import numpy as np
+import structlog
+import torch
+from gymnasium import spaces
+from hydra.core.hydra_config import HydraConfig
+from omegaconf import DictConfig, OmegaConf
+from skrl.resources.preprocessors.torch import RunningStandardScaler
+
+from src.core.registry import build_env
+from src.models.mlp import SharedMLP
+
+logger = structlog.get_logger()
+
+
+# ── keyboard state ───────────────────────────────────────────────────
+_reset_flag = [False]
+_paused = [False]
+_quit_flag = [False]
+
+
+def _key_callback(keycode: int) -> None:
+ """Called by MuJoCo viewer on key press."""
+ if keycode == 32: # GLFW_KEY_SPACE
+ _paused[0] = not _paused[0]
+ elif keycode == 82: # GLFW_KEY_R
+ _reset_flag[0] = True
+ elif keycode == 256: # GLFW_KEY_ESCAPE
+ _quit_flag[0] = True
+
+
+# ── checkpoint loading ───────────────────────────────────────────────
+
+def _infer_hidden_sizes(state_dict: dict[str, torch.Tensor]) -> tuple[int, ...]:
+ """Infer hidden layer sizes from a SharedMLP state dict."""
+ sizes = []
+ i = 0
+ while f"net.{i}.weight" in state_dict:
+ sizes.append(state_dict[f"net.{i}.weight"].shape[0])
+ i += 2 # skip activation layers (ELU)
+ return tuple(sizes)
+
+
+def load_policy(
+ checkpoint_path: str,
+ observation_space: spaces.Space,
+ action_space: spaces.Space,
+ device: torch.device = torch.device("cpu"),
+) -> tuple[SharedMLP, RunningStandardScaler]:
+ """Load a trained SharedMLP + observation normalizer from a checkpoint.
+
+ Returns:
+ (model, state_preprocessor) ready for inference.
+ """
+ ckpt = torch.load(checkpoint_path, map_location=device, weights_only=False)
+
+ # Infer architecture from saved weights.
+ hidden_sizes = _infer_hidden_sizes(ckpt["policy"])
+
+ # Reconstruct model.
+ model = SharedMLP(
+ observation_space=observation_space,
+ action_space=action_space,
+ device=device,
+ hidden_sizes=hidden_sizes,
+ )
+ model.load_state_dict(ckpt["policy"])
+ model.eval()
+
+ # Reconstruct observation normalizer.
+ state_preprocessor = RunningStandardScaler(size=observation_space, device=device)
+ state_preprocessor.running_mean = ckpt["state_preprocessor"]["running_mean"].to(device)
+ state_preprocessor.running_variance = ckpt["state_preprocessor"]["running_variance"].to(device)
+ state_preprocessor.current_count = ckpt["state_preprocessor"]["current_count"]
+ # Freeze the normalizer — don't update stats during eval.
+ state_preprocessor.training = False
+
+ logger.info(
+ "checkpoint_loaded",
+ path=checkpoint_path,
+ hidden_sizes=hidden_sizes,
+ obs_mean=[round(x, 3) for x in state_preprocessor.running_mean.tolist()],
+ obs_std=[round(x, 3) for x in state_preprocessor.running_variance.sqrt().tolist()],
+ )
+ return model, state_preprocessor
+
+
+# ── action arrow overlay ─────────────────────────────────────────────
+
+def _add_action_arrow(viewer, model, data, action_val: float) -> None:
+ """Draw an arrow showing applied torque direction."""
+ if abs(action_val) < 0.01 or model.nu == 0:
+ return
+
+ jnt_id = model.actuator_trnid[0, 0]
+ body_id = model.jnt_bodyid[jnt_id]
+ pos = data.xpos[body_id].copy()
+ pos[2] += 0.02
+
+ axis = data.xmat[body_id].reshape(3, 3) @ model.jnt_axis[jnt_id]
+ arrow_len = 0.08 * action_val
+ direction = axis * np.sign(arrow_len)
+
+ z = direction / (np.linalg.norm(direction) + 1e-8)
+ up = np.array([0, 0, 1]) if abs(z[2]) < 0.99 else np.array([0, 1, 0])
+ x = np.cross(up, z)
+ x /= np.linalg.norm(x) + 1e-8
+ y = np.cross(z, x)
+ mat = np.column_stack([x, y, z]).flatten()
+
+ rgba = np.array(
+ [0.2, 0.8, 0.2, 0.8] if action_val > 0 else [0.8, 0.2, 0.2, 0.8],
+ dtype=np.float32,
+ )
+
+ geom = viewer.user_scn.geoms[viewer.user_scn.ngeom]
+ mujoco.mjv_initGeom(
+ geom,
+ type=mujoco.mjtGeom.mjGEOM_ARROW,
+ size=np.array([0.008, 0.008, abs(arrow_len)]),
+ pos=pos,
+ mat=mat,
+ rgba=rgba,
+ )
+ viewer.user_scn.ngeom += 1
+
+
+# ── main loops ───────────────────────────────────────────────────────
+
+@hydra.main(version_base=None, config_path="../configs", config_name="config")
+def main(cfg: DictConfig) -> None:
+ choices = HydraConfig.get().runtime.choices
+ env_name = choices.get("env", "cartpole")
+ runner_name = choices.get("runner", "mujoco_single")
+
+ checkpoint_path = cfg.get("checkpoint", None)
+ if checkpoint_path is None:
+ logger.error("No checkpoint specified. Use: +checkpoint=path/to/agent.pt")
+ sys.exit(1)
+
+ # Resolve relative paths against original working directory.
+ checkpoint_path = str(Path(hydra.utils.get_original_cwd()) / checkpoint_path)
+ if not Path(checkpoint_path).exists():
+ logger.error("checkpoint_not_found", path=checkpoint_path)
+ sys.exit(1)
+
+ if runner_name == "serial":
+ _eval_serial(cfg, env_name, checkpoint_path)
+ else:
+ _eval_sim(cfg, env_name, checkpoint_path)
+
+
+def _eval_sim(cfg: DictConfig, env_name: str, checkpoint_path: str) -> None:
+ """Evaluate policy in MuJoCo simulation with viewer."""
+ from src.runners.mujoco import MuJoCoRunner, MuJoCoRunnerConfig
+
+ env = build_env(env_name, cfg)
+ runner_dict = OmegaConf.to_container(cfg.runner, resolve=True)
+ runner_dict["num_envs"] = 1
+ runner = MuJoCoRunner(env=env, config=MuJoCoRunnerConfig(**runner_dict))
+
+ device = runner.device
+ model, preprocessor = load_policy(
+ checkpoint_path, runner.observation_space, runner.action_space, device
+ )
+
+ mj_model = runner._model
+ mj_data = runner._data[0]
+ dt_ctrl = runner.config.dt * runner.config.substeps
+
+ with mujoco.viewer.launch_passive(mj_model, mj_data, key_callback=_key_callback) as viewer:
+ obs, _ = runner.reset()
+ step = 0
+ episode = 0
+ episode_reward = 0.0
+
+ logger.info(
+ "eval_started",
+ env=env_name,
+ mode="simulation",
+ checkpoint=Path(checkpoint_path).name,
+ controls="Space=pause, R=reset, Esc=quit",
+ )
+
+ while viewer.is_running() and not _quit_flag[0]:
+ if _reset_flag[0]:
+ _reset_flag[0] = False
+ obs, _ = runner.reset()
+ step = 0
+ episode += 1
+ episode_reward = 0.0
+ logger.info("reset", episode=episode)
+
+ if _paused[0]:
+ viewer.sync()
+ time.sleep(0.05)
+ continue
+
+ # Policy inference
+ with torch.no_grad():
+ normalized_obs = preprocessor(obs.unsqueeze(0) if obs.dim() == 1 else obs)
+ action = model.act({"states": normalized_obs}, role="policy")[0]
+ action = action.clamp(-1.0, 1.0)
+
+ obs, reward, terminated, truncated, info = runner.step(action)
+ episode_reward += reward.item()
+ step += 1
+
+ # Sync viewer
+ mujoco.mj_forward(mj_model, mj_data)
+ viewer.user_scn.ngeom = 0
+ _add_action_arrow(viewer, mj_model, mj_data, action[0, 0].item())
+ viewer.sync()
+
+ if step % 50 == 0:
+ joints = {mj_model.jnt(i).name: round(math.degrees(mj_data.qpos[i]), 1)
+ for i in range(mj_model.njnt)}
+ logger.debug(
+ "step", n=step, reward=round(reward.item(), 3),
+ action=round(action[0, 0].item(), 2),
+ ep_reward=round(episode_reward, 1), **joints,
+ )
+
+ if terminated.any() or truncated.any():
+ logger.info(
+ "episode_done", episode=episode, steps=step,
+ total_reward=round(episode_reward, 2),
+ reason="terminated" if terminated.any() else "truncated",
+ )
+ obs, _ = runner.reset()
+ step = 0
+ episode += 1
+ episode_reward = 0.0
+
+ time.sleep(dt_ctrl)
+
+ runner.close()
+
+
+def _eval_serial(cfg: DictConfig, env_name: str, checkpoint_path: str) -> None:
+ """Evaluate policy on real hardware via serial, with digital-twin viewer."""
+ from src.runners.serial import SerialRunner, SerialRunnerConfig
+
+ env = build_env(env_name, cfg)
+ runner_dict = OmegaConf.to_container(cfg.runner, resolve=True)
+ serial_runner = SerialRunner(env=env, config=SerialRunnerConfig(**runner_dict))
+
+ device = serial_runner.device
+ model, preprocessor = load_policy(
+ checkpoint_path, serial_runner.observation_space, serial_runner.action_space, device
+ )
+
+ # Set up digital-twin MuJoCo model for visualization.
+ serial_runner._ensure_viz_model()
+ mj_model = serial_runner._viz_model
+ mj_data = serial_runner._viz_data
+
+ with mujoco.viewer.launch_passive(mj_model, mj_data, key_callback=_key_callback) as viewer:
+ obs, _ = serial_runner.reset()
+ step = 0
+ episode = 0
+ episode_reward = 0.0
+
+ logger.info(
+ "eval_started",
+ env=env_name,
+ mode="real hardware (serial)",
+ port=serial_runner.config.port,
+ checkpoint=Path(checkpoint_path).name,
+ controls="Space=pause, R=reset, Esc=quit",
+ )
+
+ while viewer.is_running() and not _quit_flag[0]:
+ if _reset_flag[0]:
+ _reset_flag[0] = False
+ serial_runner._send("M0")
+ serial_runner._drive_to_center()
+ serial_runner._wait_for_pendulum_still()
+ obs, _ = serial_runner.reset()
+ step = 0
+ episode += 1
+ episode_reward = 0.0
+ logger.info("reset", episode=episode)
+
+ if _paused[0]:
+ serial_runner._send("M0") # safety: stop motor while paused
+ serial_runner._sync_viz()
+ viewer.sync()
+ time.sleep(0.05)
+ continue
+
+ # Policy inference
+ with torch.no_grad():
+ normalized_obs = preprocessor(obs.unsqueeze(0) if obs.dim() == 1 else obs)
+ action = model.act({"states": normalized_obs}, role="policy")[0]
+ action = action.clamp(-1.0, 1.0)
+
+ obs, reward, terminated, truncated, info = serial_runner.step(action)
+ episode_reward += reward.item()
+ step += 1
+
+ # Sync digital twin with real sensor data.
+ serial_runner._sync_viz()
+ viewer.user_scn.ngeom = 0
+ _add_action_arrow(viewer, mj_model, mj_data, action[0, 0].item())
+ viewer.sync()
+
+ if step % 25 == 0:
+ state = serial_runner._read_state()
+ logger.debug(
+ "step", n=step, reward=round(reward.item(), 3),
+ action=round(action[0, 0].item(), 2),
+ ep_reward=round(episode_reward, 1),
+ motor_enc=state["encoder_count"],
+ pend_deg=round(state["pendulum_angle"], 1),
+ )
+
+ # Check for safety / disconnection.
+ if info.get("reboot_detected") or info.get("motor_limit_exceeded"):
+ logger.error(
+ "safety_stop",
+ reboot=info.get("reboot_detected", False),
+ motor_limit=info.get("motor_limit_exceeded", False),
+ )
+ serial_runner._send("M0")
+ break
+
+ if terminated.any() or truncated.any():
+ logger.info(
+ "episode_done", episode=episode, steps=step,
+ total_reward=round(episode_reward, 2),
+ reason="terminated" if terminated.any() else "truncated",
+ )
+ # Auto-reset for next episode.
+ obs, _ = serial_runner.reset()
+ step = 0
+ episode += 1
+ episode_reward = 0.0
+
+ # Real-time pacing is handled by serial_runner.step() (dt sleep).
+
+ serial_runner.close()
+
+
+if __name__ == "__main__":
+ main()
diff --git a/scripts/motor_sysid.py b/scripts/motor_sysid.py
new file mode 100644
index 0000000..8ef354d
--- /dev/null
+++ b/scripts/motor_sysid.py
@@ -0,0 +1,64 @@
+"""Unified CLI for motor-only system identification.
+
+Usage:
+ python scripts/motor_sysid.py capture --duration 20
+ python scripts/motor_sysid.py optimize --recording assets/motor/recordings/.npz
+ python scripts/motor_sysid.py visualize --recording assets/motor/recordings/.npz
+ python scripts/motor_sysid.py export --result assets/motor/motor_sysid_result.json
+"""
+
+from __future__ import annotations
+
+import sys
+from pathlib import Path
+
+# Ensure project root is on sys.path
+_PROJECT_ROOT = str(Path(__file__).resolve().parent.parent)
+if _PROJECT_ROOT not in sys.path:
+ sys.path.insert(0, _PROJECT_ROOT)
+
+
+def main() -> None:
+ if len(sys.argv) < 2 or sys.argv[1] in ("-h", "--help"):
+ print(
+ "Motor System Identification\n"
+ "===========================\n"
+ "Usage: python scripts/motor_sysid.py [options]\n"
+ "\n"
+ "Commands:\n"
+ " capture Record motor trajectory under PRBS excitation\n"
+ " optimize Run CMA-ES to fit motor parameters\n"
+ " visualize Plot real vs simulated motor response\n"
+ " export Write tuned MJCF + robot.yaml files\n"
+ "\n"
+ "Workflow:\n"
+ " 1. Flash sysid firmware to ESP32 (motor-only, no limits)\n"
+ " 2. python scripts/motor_sysid.py capture --duration 20\n"
+ " 3. python scripts/motor_sysid.py optimize --recording .npz\n"
+ " 4. python scripts/motor_sysid.py visualize --recording .npz\n"
+ "\n"
+ "Run ' --help' for command-specific options."
+ )
+ sys.exit(0)
+
+ command = sys.argv[1]
+ sys.argv = [f"motor_sysid {command}"] + sys.argv[2:]
+
+ if command == "capture":
+ from src.sysid.motor.capture import main as cmd_main
+ elif command == "optimize":
+ from src.sysid.motor.optimize import main as cmd_main
+ elif command == "visualize":
+ from src.sysid.motor.visualize import main as cmd_main
+ elif command == "export":
+ from src.sysid.motor.export import main as cmd_main
+ else:
+ print(f"Unknown command: {command}")
+ print("Available commands: capture, optimize, visualize, export")
+ sys.exit(1)
+
+ cmd_main()
+
+
+if __name__ == "__main__":
+ main()
diff --git a/src/core/env.py b/src/core/env.py
index 75d9e81..19f122d 100644
--- a/src/core/env.py
+++ b/src/core/env.py
@@ -48,9 +48,10 @@ class BaseEnv(abc.ABC, Generic[T]):
def compute_truncations(self, step_counts: torch.Tensor) -> torch.Tensor:
return step_counts >= self.config.max_steps
- def get_default_qpos(self, nq: int) -> list[float] | None:
- """Return the default joint positions for reset.
- Override in subclass if the URDF zero pose doesn't match
- the desired initial state (e.g. pendulum hanging down).
- Returns None to use the URDF default (all zeros)."""
- return None
+ def is_reset_ready(self, qpos: torch.Tensor, qvel: torch.Tensor) -> bool:
+ """Check whether the physical robot has settled enough to start an episode.
+
+ Used by the SerialRunner after driving to center and waiting for the
+ pendulum. Default: always ready (sim doesn't need settling).
+ """
+ return True
diff --git a/src/core/hardware.py b/src/core/hardware.py
deleted file mode 100644
index 2d49569..0000000
--- a/src/core/hardware.py
+++ /dev/null
@@ -1,91 +0,0 @@
-"""Real-hardware configuration — loaded from hardware.yaml next to robot.yaml.
-
-Provides robot-specific constants for the SerialRunner: encoder specs,
-safety limits, and reset behaviour. Simulation-only robots simply don't
-have a hardware.yaml (the loader returns None).
-
-Usage:
- hw = load_hardware_config("assets/rotary_cartpole")
- if hw is not None:
- counts_per_rev = hw.encoder.ppr * hw.encoder.gear_ratio * 4.0
-"""
-
-import dataclasses
-from pathlib import Path
-
-import structlog
-import yaml
-
-log = structlog.get_logger()
-
-
-@dataclasses.dataclass
-class EncoderConfig:
- """Rotary encoder parameters."""
-
- ppr: int = 11 # pulses per revolution (before quadrature)
- gear_ratio: float = 30.0 # gearbox ratio
-
- @property
- def counts_per_rev(self) -> float:
- """Total encoder counts per output-shaft revolution (quadrature)."""
- return self.ppr * self.gear_ratio * 4.0
-
-
-@dataclasses.dataclass
-class SafetyConfig:
- """Safety limits enforced by the runner (not the env)."""
-
- max_motor_angle_deg: float = 90.0 # hard termination (0 = disabled)
- soft_limit_deg: float = 40.0 # progressive penalty ramp start
-
-
-@dataclasses.dataclass
-class ResetConfig:
- """Parameters for the physical reset procedure."""
-
- drive_speed: int = 80 # PWM for bang-bang drive-to-center
- deadband: int = 15 # encoder count threshold for "centered"
- drive_timeout: float = 3.0 # seconds
-
- settle_angle_deg: float = 2.0 # pendulum angle threshold (degrees)
- settle_vel_dps: float = 5.0 # pendulum velocity threshold (deg/s)
- settle_duration: float = 0.5 # seconds the pendulum must stay still
- settle_timeout: float = 30.0 # give up after this many seconds
-
-
-@dataclasses.dataclass
-class HardwareConfig:
- """Complete real-hardware description for a robot."""
-
- encoder: EncoderConfig = dataclasses.field(default_factory=EncoderConfig)
- safety: SafetyConfig = dataclasses.field(default_factory=SafetyConfig)
- reset: ResetConfig = dataclasses.field(default_factory=ResetConfig)
-
-
-def load_hardware_config(robot_dir: str | Path) -> HardwareConfig | None:
- """Load hardware.yaml from a directory.
-
- Returns None if the file doesn't exist (simulation-only robot).
- """
- robot_dir = Path(robot_dir).resolve()
- yaml_path = robot_dir / "hardware.yaml"
-
- if not yaml_path.exists():
- return None
-
- raw = yaml.safe_load(yaml_path.read_text()) or {}
-
- encoder = EncoderConfig(**raw.get("encoder", {}))
- safety = SafetyConfig(**raw.get("safety", {}))
- reset = ResetConfig(**raw.get("reset", {}))
-
- config = HardwareConfig(encoder=encoder, safety=safety, reset=reset)
-
- log.debug(
- "hardware_config_loaded",
- robot_dir=str(robot_dir),
- counts_per_rev=encoder.counts_per_rev,
- max_motor_angle_deg=safety.max_motor_angle_deg,
- )
- return config
diff --git a/src/core/robot.py b/src/core/robot.py
index e8ef93a..c52dacd 100644
--- a/src/core/robot.py
+++ b/src/core/robot.py
@@ -11,6 +11,7 @@ Usage:
"""
import dataclasses
+import math
from pathlib import Path
import structlog
@@ -19,10 +20,20 @@ import yaml
log = structlog.get_logger()
+def _as_pair(val) -> tuple[float, float]:
+ """Convert scalar or [pos, neg] list to (pos, neg) tuple."""
+ if isinstance(val, (list, tuple)) and len(val) == 2:
+ return (float(val[0]), float(val[1]))
+ return (float(val), float(val))
+
+
@dataclasses.dataclass
class ActuatorConfig:
"""Motor/actuator attached to a joint.
+ Asymmetric fields use (positive_dir, negative_dir) tuples.
+ A scalar in YAML is expanded to a symmetric pair.
+
type:
motor — direct torque control (ctrl = normalised torque)
position — PD position servo (ctrl = target angle, needs kp)
@@ -30,12 +41,91 @@ class ActuatorConfig:
"""
joint: str = ""
type: str = "motor"
- gear: float = 1.0
- ctrl_range: tuple[float, float] = (-1.0, 1.0)
- damping: float = 0.05
- kp: float = 0.0 # proportional gain (position / velocity actuators)
- kv: float = 0.0 # derivative gain (position actuators)
- filter_tau: float = 0.0 # 1st-order filter time constant (s); 0 = no filter
+ gear: tuple[float, float] = (1.0, 1.0) # torque constant (pos, neg)
+ ctrl_range: tuple[float, float] = (-1.0, 1.0) # (lower, upper) control bounds
+ deadzone: tuple[float, float] = (0.0, 0.0) # min |ctrl| for torque (pos, neg)
+ damping: tuple[float, float] = (0.0, 0.0) # viscous damping (pos, neg)
+ frictionloss: tuple[float, float] = (0.0, 0.0) # Coulomb friction (pos, neg)
+ kp: float = 0.0 # proportional gain (position / velocity actuators)
+ kv: float = 0.0 # derivative gain (position actuators)
+ filter_tau: float = 0.0 # 1st-order filter time constant (s); 0 = no filter
+ viscous_quadratic: float = 0.0 # velocity² drag coefficient
+ back_emf_gain: float = 0.0 # back-EMF torque reduction
+
+ @property
+ def gear_avg(self) -> float:
+ return (self.gear[0] + self.gear[1]) / 2.0
+
+ @property
+ def has_motor_model(self) -> bool:
+ """True if this actuator needs the runtime motor model."""
+ return (
+ self.gear[0] != self.gear[1]
+ or self.deadzone != (0.0, 0.0)
+ or self.damping != (0.0, 0.0)
+ or self.frictionloss != (0.0, 0.0)
+ or self.viscous_quadratic > 0
+ or self.back_emf_gain > 0
+ )
+
+ def transform_ctrl(self, ctrl: float) -> float:
+ """Apply asymmetric deadzone and gear compensation to a scalar ctrl."""
+ # Deadzone
+ dz_pos, dz_neg = self.deadzone
+ if ctrl >= 0 and ctrl < dz_pos:
+ return 0.0
+ if ctrl < 0 and ctrl > -dz_neg:
+ return 0.0
+
+ # Gear compensation: rescale so ctrl × gear_avg ≈ action × gear_dir
+ gear_avg = self.gear_avg
+ if gear_avg > 1e-8:
+ gear_dir = self.gear[0] if ctrl >= 0 else self.gear[1]
+ ctrl *= gear_dir / gear_avg
+
+ return ctrl
+
+ def compute_motor_force(self, vel: float, ctrl: float) -> float:
+ """Asymmetric friction, damping, drag, back-EMF → applied torque."""
+ torque = 0.0
+
+ # Coulomb friction (direction-dependent)
+ fl_pos, fl_neg = self.frictionloss
+ if abs(vel) > 1e-6:
+ fl = fl_pos if vel > 0 else fl_neg
+ torque -= math.copysign(fl, vel)
+
+ # Viscous damping (direction-dependent)
+ damp = self.damping[0] if vel > 0 else self.damping[1]
+ torque -= damp * vel
+
+ # Quadratic velocity drag
+ if self.viscous_quadratic > 0:
+ torque -= self.viscous_quadratic * vel * abs(vel)
+
+ # Back-EMF torque reduction
+ if self.back_emf_gain > 0 and abs(ctrl) > 1e-6:
+ torque -= self.back_emf_gain * vel * math.copysign(1.0, ctrl)
+
+ return max(-10.0, min(10.0, torque))
+
+ def transform_action(self, action):
+ """Vectorised deadzone + gear compensation for a torch batch."""
+ dz_pos, dz_neg = self.deadzone
+ if dz_pos > 0 or dz_neg > 0:
+ action = action.clone()
+ pos_dead = (action >= 0) & (action < dz_pos)
+ neg_dead = (action < 0) & (action > -dz_neg)
+ action[pos_dead | neg_dead] = 0.0
+
+ gear_avg = self.gear_avg
+ if gear_avg > 1e-8 and self.gear[0] != self.gear[1]:
+ action = action.clone() if dz_pos == 0 and dz_neg == 0 else action
+ pos = action >= 0
+ action[pos] *= self.gear[0] / gear_avg
+ action[~pos] *= self.gear[1] / gear_avg
+
+ return action
@dataclasses.dataclass
@@ -84,6 +174,9 @@ def load_robot_config(robot_dir: str | Path) -> RobotConfig:
for a in raw.get("actuators", []):
if "ctrl_range" in a:
a["ctrl_range"] = tuple(a["ctrl_range"])
+ for key in ("gear", "deadzone", "damping", "frictionloss"):
+ if key in a:
+ a[key] = _as_pair(a[key])
actuators.append(ActuatorConfig(**a))
# Parse joint overrides
diff --git a/src/core/runner.py b/src/core/runner.py
index f543ff1..2b064e2 100644
--- a/src/core/runner.py
+++ b/src/core/runner.py
@@ -1,9 +1,12 @@
import dataclasses
import abc
from typing import Any, Generic, TypeVar
-from src.core.env import BaseEnv
+
+import numpy as np
import torch
+from src.core.env import BaseEnv
+
T = TypeVar("T")
@@ -21,6 +24,8 @@ class BaseRunner(abc.ABC, Generic[T]):
if getattr(self.config, "device", None) == "auto":
self.config.device = "cuda" if torch.cuda.is_available() else "cpu"
+ self._last_actions: torch.Tensor | None = None
+
self._sim_initialize(config)
self.observation_space = self.env.observation_space
@@ -68,6 +73,7 @@ class BaseRunner(abc.ABC, Generic[T]):
return obs, {}
def step(self, actions: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor, dict[str, Any]]:
+ self._last_actions = actions
qpos, qvel = self._sim_step(actions)
self.step_counts += 1
@@ -95,9 +101,50 @@ class BaseRunner(abc.ABC, Generic[T]):
# skrl expects (num_envs, 1) for rewards/terminated/truncated
return obs, rewards.unsqueeze(-1), terminated.unsqueeze(-1), truncated.unsqueeze(-1), info
- def render(self, env_idx: int = 0):
- """Offscreen render → RGB numpy array. Override in subclass."""
+ def _render_frame(self, env_idx: int = 0) -> np.ndarray:
+ """Return a raw RGB frame. Override in subclass."""
raise NotImplementedError("Render not implemented for this runner.")
-
+
+ def render(self, env_idx: int = 0) -> np.ndarray:
+ """Render frame with action overlay."""
+ frame = self._render_frame(env_idx)
+ if self._last_actions is not None:
+ ctrl = float(self._last_actions[env_idx, 0].clamp(-1.0, 1.0))
+ _draw_action_overlay(frame, ctrl)
+ return frame
+
def close(self) -> None:
- self._sim_close()
\ No newline at end of file
+ self._sim_close()
+
+
+def _draw_action_overlay(frame: np.ndarray, action: float) -> None:
+ """Draw an action bar on a rendered frame (no OpenCV needed).
+
+ Bar is centered horizontally: green to the right (+), red to the left (-).
+ """
+ h, w = frame.shape[:2]
+
+ bar_y = h - 30
+ bar_h = 16
+ bar_x_center = w // 2
+ bar_half_w = w // 4
+ bar_x_left = bar_x_center - bar_half_w
+ bar_x_right = bar_x_center + bar_half_w
+
+ # Background (dark grey)
+ frame[bar_y:bar_y + bar_h, bar_x_left:bar_x_right] = [40, 40, 40]
+
+ # Filled bar
+ fill_len = int(abs(action) * bar_half_w)
+ if action > 0:
+ color = [60, 200, 60] # green
+ x0 = bar_x_center
+ x1 = min(bar_x_center + fill_len, bar_x_right)
+ else:
+ color = [200, 60, 60] # red
+ x1 = bar_x_center
+ x0 = max(bar_x_center - fill_len, bar_x_left)
+ frame[bar_y:bar_y + bar_h, x0:x1] = color
+
+ # Center tick mark (white)
+ frame[bar_y:bar_y + bar_h, bar_x_center - 1:bar_x_center + 1] = [255, 255, 255]
\ No newline at end of file
diff --git a/src/envs/rotary_cartpole.py b/src/envs/rotary_cartpole.py
index 84eccf3..ebecca2 100644
--- a/src/envs/rotary_cartpole.py
+++ b/src/envs/rotary_cartpole.py
@@ -1,4 +1,5 @@
import dataclasses
+import math
import torch
from src.core.env import BaseEnv, BaseEnvConfig
from gymnasium import spaces
@@ -21,7 +22,12 @@ class RotaryCartPoleConfig(BaseEnvConfig):
"""
# Reward shaping
reward_upright_scale: float = 1.0 # cos(pendulum) when upright = +1
- speed_penalty_scale: float = 0.1 # penalty for high pendulum velocity near top
+ motor_vel_penalty: float = 0.01 # penalise high motor angular velocity
+ motor_angle_penalty: float = 0.05 # penalise deviation from centre
+ action_penalty: float = 0.05 # penalise large actions (energy cost)
+
+ # ── Software safety limit (env-level, on top of URDF + hardware) ──
+ motor_angle_limit_deg: float = 90.0 # terminate episode if exceeded
class RotaryCartPoleEnv(BaseEnv[RotaryCartPoleConfig]):
@@ -34,12 +40,14 @@ class RotaryCartPoleEnv(BaseEnv[RotaryCartPoleConfig]):
Using sin/cos avoids discontinuities at ±π for continuous joints.
Actions (1):
- Torque applied to the motor_joint.
+ Torque applied to the motor_joint (normalised to [-1, 1]).
"""
def __init__(self, config: RotaryCartPoleConfig):
super().__init__(config)
+ # ── Spaces ───────────────────────────────────────────────────
+
@property
def observation_space(self) -> spaces.Space:
return spaces.Box(low=-torch.inf, high=torch.inf, shape=(6,))
@@ -48,6 +56,8 @@ class RotaryCartPoleEnv(BaseEnv[RotaryCartPoleConfig]):
def action_space(self) -> spaces.Space:
return spaces.Box(low=-1.0, high=1.0, shape=(1,))
+ # ── State building ───────────────────────────────────────────
+
def build_state(self, qpos: torch.Tensor, qvel: torch.Tensor) -> RotaryCartPoleState:
return RotaryCartPoleState(
motor_angle=qpos[:, 0],
@@ -56,32 +66,64 @@ class RotaryCartPoleEnv(BaseEnv[RotaryCartPoleConfig]):
pendulum_vel=qvel[:, 1],
)
+ # ── Observations ─────────────────────────────────────────────
+
def compute_observations(self, state: RotaryCartPoleState) -> torch.Tensor:
- return torch.stack([
+ obs = [
torch.sin(state.motor_angle),
torch.cos(state.motor_angle),
torch.sin(state.pendulum_angle),
torch.cos(state.pendulum_angle),
state.motor_vel,
state.pendulum_vel,
- ], dim=-1)
+ ]
+ return torch.stack(obs, dim=-1)
+
+ # ── Rewards ──────────────────────────────────────────────────
def compute_rewards(self, state: RotaryCartPoleState, actions: torch.Tensor) -> torch.Tensor:
# Upright reward: -cos(θ) ∈ [-1, +1]
- upright = -torch.cos(state.pendulum_angle)
+ reward = -torch.cos(state.pendulum_angle) * self.config.reward_upright_scale
- # Penalise high pendulum velocity when near the top (upright).
- # "nearness" is weighted by how upright the pendulum is (0 at bottom, 1 at top).
- near_top = torch.clamp(-torch.cos(state.pendulum_angle), min=0.0) # 0‥1
- speed_penalty = self.config.speed_penalty_scale * near_top * state.pendulum_vel.abs()
+ # Penalise fast motor spinning (discourages violent oscillation)
+ reward = reward - self.config.motor_vel_penalty * state.motor_vel.pow(2)
- return upright * self.config.reward_upright_scale - speed_penalty
+ # Penalise motor deviation from centre (keep arm near zero)
+ reward = reward - self.config.motor_angle_penalty * state.motor_angle.pow(2)
+
+ # Penalise large actions (energy efficiency / smoother control)
+ reward = reward - self.config.action_penalty * actions.squeeze(-1).pow(2)
+
+ # Penalty for exceeding motor angle limit (episode also terminates)
+ limit_rad = math.radians(self.config.motor_angle_limit_deg)
+ exceeded = state.motor_angle.abs() >= limit_rad
+ reward = torch.where(exceeded, torch.tensor(-1000.0, device=reward.device), reward)
+
+ return reward
+
+ # ── Terminations ─────────────────────────────────────────────
def compute_terminations(self, state: RotaryCartPoleState) -> torch.Tensor:
- # No early termination — episode runs for max_steps (truncation only).
- # The agent must learn to swing up AND balance continuously.
- return torch.zeros_like(state.motor_angle, dtype=torch.bool)
+ # Software safety: terminate if motor angle exceeds limit.
+ limit_rad = math.radians(self.config.motor_angle_limit_deg)
+ exceeded = state.motor_angle.abs() >= limit_rad
+ return exceeded
+
+ # ── Reset readiness (for SerialRunner) ───────────────────────
+
+ def is_reset_ready(self, qpos: torch.Tensor, qvel: torch.Tensor) -> bool:
+ """Pendulum must be hanging still and motor near center."""
+ motor_angle = float(qpos[0, 0])
+ pend_angle = float(qpos[0, 1])
+ motor_vel = float(qvel[0, 0])
+ pend_vel = float(qvel[0, 1])
+
+ # Pendulum near hanging-down (angle ~0) and slow
+ angle_ok = abs(pend_angle) < math.radians(2.0)
+ vel_ok = abs(pend_vel) < math.radians(5.0)
+ # Motor near center and slow
+ motor_ok = abs(motor_angle) < math.radians(5.0)
+ motor_vel_ok = abs(motor_vel) < math.radians(10.0)
+ return angle_ok and vel_ok and motor_ok and motor_vel_ok
+
- def get_default_qpos(self, nq: int) -> list[float] | None:
- # qpos=0 = pendulum hanging down (joint frame rotated in URDF).
- return None
diff --git a/src/runners/mjx.py b/src/runners/mjx.py
index a92ac2d..b67ace9 100644
--- a/src/runners/mjx.py
+++ b/src/runners/mjx.py
@@ -29,7 +29,10 @@ import numpy as np
from src.core.env import BaseEnv
from src.core.runner import BaseRunner, BaseRunnerConfig
-from src.runners.mujoco import MuJoCoRunner # reuse _load_model
+from src.runners.mujoco import (
+ ActuatorLimits,
+ load_mujoco_model,
+)
log = structlog.get_logger()
@@ -39,7 +42,7 @@ class MJXRunnerConfig(BaseRunnerConfig):
num_envs: int = 1024
device: str = "cuda"
dt: float = 0.002
- substeps: int = 20
+ substeps: int = 10
class MJXRunner(BaseRunner[MJXRunnerConfig]):
@@ -64,7 +67,7 @@ class MJXRunner(BaseRunner[MJXRunnerConfig]):
def _sim_initialize(self, config: MJXRunnerConfig) -> None:
# Step 1: Load CPU model (reuses URDF → MJCF → actuator injection)
- self._mj_model = MuJoCoRunner._load_model(self.env.robot)
+ self._mj_model = load_mujoco_model(self.env.robot)
self._mj_model.opt.timestep = config.dt
self._nq = self._mj_model.nq
self._nv = self._mj_model.nv
@@ -75,9 +78,6 @@ class MJXRunner(BaseRunner[MJXRunnerConfig]):
# Step 3: Default reset state on GPU
default_data = mujoco.MjData(self._mj_model)
- default_qpos = self.env.get_default_qpos(self._nq)
- if default_qpos is not None:
- default_data.qpos[:] = default_qpos
mujoco.mj_forward(self._mj_model, default_data)
self._default_mjx_data = mjx.put_data(self._mj_model, default_data)
@@ -85,6 +85,18 @@ class MJXRunner(BaseRunner[MJXRunnerConfig]):
self._rng = jax.random.PRNGKey(42)
self._batch_data = self._make_batched_data(config.num_envs)
+ # Step 4b: Build motor model info (ctrl_idx, qvel_idx, ActuatorConfig)
+ self._motor_info: list[tuple[int, int]] = []
+ self._motor_acts: list = []
+ for ctrl_idx, act in enumerate(self.env.robot.actuators):
+ if act.has_motor_model:
+ jnt_id = mujoco.mj_name2id(
+ self._mj_model, mujoco.mjtObj.mjOBJ_JOINT, act.joint,
+ )
+ qvel_idx = self._mj_model.jnt_dofadr[jnt_id]
+ self._motor_info.append((ctrl_idx, qvel_idx))
+ self._motor_acts.append(act)
+
# Step 5: JIT-compile the hot-path functions
self._compile_jit_fns(config.substeps)
@@ -121,28 +133,77 @@ class MJXRunner(BaseRunner[MJXRunnerConfig]):
model = self._mjx_model
default = self._default_mjx_data
- # ── Per-actuator limit info (shared helper on MuJoCoRunner) ──
- lim = MuJoCoRunner._extract_actuator_limits(self._mj_model)
+ lim = ActuatorLimits(self._mj_model)
act_jnt_ids = jnp.array(lim.jnt_ids)
act_limited = jnp.array(lim.limited)
act_lo = jnp.array(lim.lo)
act_hi = jnp.array(lim.hi)
act_gs = jnp.array(lim.gear_sign)
+ # ── Motor model params (JAX arrays for JIT) ─────────────────
+ _has_motor = len(self._motor_info) > 0
+ if _has_motor:
+ acts = self._motor_acts
+ _ctrl_ids = jnp.array([c for c, _ in self._motor_info])
+ _qvel_ids = jnp.array([q for _, q in self._motor_info])
+ _dz_pos = jnp.array([a.deadzone[0] for a in acts])
+ _dz_neg = jnp.array([a.deadzone[1] for a in acts])
+ _gear_pos = jnp.array([a.gear[0] for a in acts])
+ _gear_neg = jnp.array([a.gear[1] for a in acts])
+ _gear_avg = jnp.array([a.gear_avg for a in acts])
+ _fl_pos = jnp.array([a.frictionloss[0] for a in acts])
+ _fl_neg = jnp.array([a.frictionloss[1] for a in acts])
+ _damp_pos = jnp.array([a.damping[0] for a in acts])
+ _damp_neg = jnp.array([a.damping[1] for a in acts])
+ _visc_quad = jnp.array([a.viscous_quadratic for a in acts])
+ _back_emf = jnp.array([a.back_emf_gain for a in acts])
+
# ── Batched step (N substeps per call) ──────────────────────
@jax.jit
def step_fn(data):
# Software limit switch: clamp ctrl once before substeps.
- # Armature + default joint limits prevent significant overshoot
- # within the substep window, so per-substep clamping isn't needed.
pos = data.qpos[:, act_jnt_ids]
ctrl = data.ctrl
at_hi = act_limited & (pos >= act_hi) & (act_gs * ctrl > 0)
at_lo = act_limited & (pos <= act_lo) & (act_gs * ctrl < 0)
- clamped = jnp.where(at_hi | at_lo, 0.0, ctrl)
- data = data.replace(ctrl=clamped)
+ ctrl = jnp.where(at_hi | at_lo, 0.0, ctrl)
+
+ if _has_motor:
+ # Deadzone + asymmetric gear compensation
+ mc = ctrl[:, _ctrl_ids]
+ mc = jnp.where((mc >= 0) & (mc < _dz_pos), 0.0, mc)
+ mc = jnp.where((mc < 0) & (mc > -_dz_neg), 0.0, mc)
+ gear_dir = jnp.where(mc >= 0, _gear_pos, _gear_neg)
+ mc = mc * gear_dir / _gear_avg
+ ctrl = ctrl.at[:, _ctrl_ids].set(mc)
+
+ data = data.replace(ctrl=ctrl)
def body(_, d):
+ if _has_motor:
+ vel = d.qvel[:, _qvel_ids]
+ mc = d.ctrl[:, _ctrl_ids]
+
+ # Coulomb friction (direction-dependent)
+ fl = jnp.where(vel > 0, _fl_pos, _fl_neg)
+ torque = -jnp.where(
+ jnp.abs(vel) > 1e-6, jnp.sign(vel) * fl, 0.0,
+ )
+ # Viscous damping (direction-dependent)
+ damp = jnp.where(vel > 0, _damp_pos, _damp_neg)
+ torque = torque - damp * vel
+ # Quadratic velocity drag
+ torque = torque - _visc_quad * vel * jnp.abs(vel)
+ # Back-EMF torque reduction
+ bemf = _back_emf * vel * jnp.sign(mc)
+ torque = torque - jnp.where(
+ jnp.abs(mc) > 1e-6, bemf, 0.0,
+ )
+ torque = jnp.clip(torque, -10.0, 10.0)
+ d = d.replace(
+ qfrc_applied=d.qfrc_applied.at[:, _qvel_ids].set(torque),
+ )
+
return jax.vmap(mjx.step, in_axes=(None, 0))(model, d)
return jax.lax.fori_loop(0, substeps, body, data)
@@ -210,7 +271,7 @@ class MJXRunner(BaseRunner[MJXRunnerConfig]):
# ── Rendering ────────────────────────────────────────────────────
- def render(self, env_idx: int = 0) -> np.ndarray:
+ def _render_frame(self, env_idx: int = 0) -> np.ndarray:
"""Offscreen render — copies one env's state from GPU to CPU."""
self._render_data.qpos[:] = np.asarray(self._batch_data.qpos[env_idx])
self._render_data.qvel[:] = np.asarray(self._batch_data.qvel[env_idx])
@@ -222,10 +283,4 @@ class MJXRunner(BaseRunner[MJXRunnerConfig]):
self._mj_model, width=640, height=480,
)
self._offscreen_renderer.update_scene(self._render_data)
- frame = self._offscreen_renderer.render().copy()
-
- # Import shared overlay helper from mujoco runner
- from src.runners.mujoco import _draw_action_overlay
- ctrl_val = float(self._render_data.ctrl[0]) if self._mj_model.nu > 0 else 0.0
- _draw_action_overlay(frame, ctrl_val)
- return frame
+ return self._offscreen_renderer.render().copy()
diff --git a/src/runners/mujoco.py b/src/runners/mujoco.py
index 73f071b..811f057 100644
--- a/src/runners/mujoco.py
+++ b/src/runners/mujoco.py
@@ -1,4 +1,6 @@
import dataclasses
+import os
+import tempfile
import xml.etree.ElementTree as ET
from pathlib import Path
@@ -10,26 +12,195 @@ from src.core.env import BaseEnv
from src.core.robot import RobotConfig
from src.core.runner import BaseRunner, BaseRunnerConfig
+
@dataclasses.dataclass
class MuJoCoRunnerConfig(BaseRunnerConfig):
num_envs: int = 16
device: str = "cpu"
- dt: float = 0.02
- substeps: int = 2
+ dt: float = 0.002
+ substeps: int = 10
-@dataclasses.dataclass
class ActuatorLimits:
- """Per-actuator joint-limit info used for software ctrl clamping.
+ """Software limit-switch: cuts motor ctrl when a joint hits its range.
- Real motor controllers have position limits that prevent the motor
- from driving into the mechanical hard stop.
+ The real robot has physical limit switches that kill motor current
+ at the travel endpoints. MuJoCo's built-in joint limits only apply
+ a spring force — they don't zero the actuator signal. This class
+ replicates the hardware behavior.
"""
- jnt_ids: np.ndarray # (nu,) joint index for each actuator
- limited: np.ndarray # (nu,) bool — whether that joint is limited
- lo: np.ndarray # (nu,) lower position bound
- hi: np.ndarray # (nu,) upper position bound
- gear_sign: np.ndarray # (nu,) sign of gear ratio
+
+ def __init__(self, model: mujoco.MjModel) -> None:
+ jnt_ids = model.actuator_trnid[:model.nu, 0]
+ self.jnt_ids = jnt_ids
+ self.limited = model.jnt_limited[jnt_ids].astype(bool)
+ self.lo = model.jnt_range[jnt_ids, 0]
+ self.hi = model.jnt_range[jnt_ids, 1]
+ self.gear_sign = np.sign(model.actuator_gear[:model.nu, 0])
+
+ def enforce(self, model: mujoco.MjModel, data: mujoco.MjData) -> None:
+ """Zero ctrl that would push past joint limits (call every substep)."""
+ if not self.limited.any():
+ return
+ pos = data.qpos[self.jnt_ids]
+ signed_ctrl = self.gear_sign * data.ctrl[:model.nu]
+ at_hi = self.limited & (pos >= self.hi) & (signed_ctrl > 0)
+ at_lo = self.limited & (pos <= self.lo) & (signed_ctrl < 0)
+ data.ctrl[at_hi | at_lo] = 0.0
+
+
+# ── Public utilities ─────────────────────────────────────────────────
+
+
+def load_mujoco_model(robot: RobotConfig) -> mujoco.MjModel:
+ """Load a URDF (or MJCF) and apply robot.yaml settings.
+
+ Single model-loading entry point for all MuJoCo-based code:
+ training runners, MJX, and system identification.
+
+ Two-step approach required because MuJoCo's URDF parser ignores
+ ```` in the ```` extension block:
+
+ 1. Load the URDF -> MuJoCo converts it to internal MJCF
+ 2. Export the MJCF XML, inject actuators + joint overrides, reload
+
+ This keeps the URDF clean (re-exportable from CAD) -- all hardware
+ tuning lives in ``robot.yaml``.
+ """
+ abs_path = robot.urdf_path.resolve()
+ model_dir = abs_path.parent
+ is_urdf = abs_path.suffix.lower() == ".urdf"
+
+ # -- Step 1: Load URDF with meshdir injection --
+ if is_urdf:
+ tree = ET.parse(abs_path)
+ root = tree.getroot()
+
+ # MuJoCo's URDF parser strips directory prefixes from mesh
+ # filenames, so we inject a
+ # block. The original URDF stays clean and simulator-agnostic.
+ meshdir = None
+ for mesh_el in root.iter("mesh"):
+ fn = mesh_el.get("filename", "")
+ parent = str(Path(fn).parent)
+ if parent and parent != ".":
+ meshdir = parent
+ break
+ if meshdir:
+ mj_ext = ET.SubElement(root, "mujoco")
+ ET.SubElement(mj_ext, "compiler", attrib={
+ "meshdir": meshdir,
+ "balanceinertia": "true",
+ })
+
+ # Write to a temp file (unique name for multiprocessing safety).
+ fd, tmp_path = tempfile.mkstemp(
+ suffix=".urdf", prefix="_mj_", dir=str(model_dir),
+ )
+ os.close(fd)
+ try:
+ tree.write(tmp_path, xml_declaration=True, encoding="unicode")
+ model_raw = mujoco.MjModel.from_xml_path(tmp_path)
+ finally:
+ Path(tmp_path).unlink(missing_ok=True)
+ else:
+ model_raw = mujoco.MjModel.from_xml_path(str(abs_path))
+
+ # If robot.yaml has no actuators/joints, we're done.
+ if not robot.actuators and not robot.joints:
+ return model_raw
+
+ # -- Step 2: Export MJCF, inject actuators + joint overrides --
+ fd, tmp_path = tempfile.mkstemp(
+ suffix=".xml", prefix="_mj_", dir=str(model_dir),
+ )
+ os.close(fd)
+ try:
+ mujoco.mj_saveLastXML(tmp_path, model_raw)
+ mjcf_str = Path(tmp_path).read_text()
+ root = ET.fromstring(mjcf_str)
+
+ # -- Inject actuators --
+ if robot.actuators:
+ act_elem = ET.SubElement(root, "actuator")
+ for act in robot.actuators:
+ attribs = {
+ "name": f"{act.joint}_{act.type}",
+ "joint": act.joint,
+ "gear": str(act.gear_avg),
+ "ctrlrange": f"{act.ctrl_range[0]} {act.ctrl_range[1]}",
+ }
+
+ # dyntype is only available on , not on
+ # shortcut elements like //.
+ use_general = act.filter_tau > 0
+
+ if use_general:
+ attribs["dyntype"] = "filter"
+ attribs["dynprm"] = str(act.filter_tau)
+ attribs["gaintype"] = "fixed"
+ if act.type == "position":
+ attribs["biastype"] = "affine"
+ attribs["gainprm"] = str(act.kp)
+ attribs["biasprm"] = f"0 -{act.kp} -{act.kv}"
+ elif act.type == "velocity":
+ attribs["biastype"] = "affine"
+ attribs["gainprm"] = str(act.kp)
+ attribs["biasprm"] = f"0 0 -{act.kp}"
+ else: # motor
+ attribs["biastype"] = "none"
+ ET.SubElement(act_elem, "general", attrib=attribs)
+ else:
+ if act.type == "position":
+ attribs["kp"] = str(act.kp)
+ if act.kv > 0:
+ attribs["kv"] = str(act.kv)
+ elif act.type == "velocity":
+ attribs["kp"] = str(act.kp)
+ ET.SubElement(act_elem, act.type, attrib=attribs)
+
+ # -- Apply joint overrides from robot.yaml --
+ # For actuated joints with a motor model, MuJoCo damping/frictionloss
+ # are set to 0 — the motor model handles them via qfrc_applied.
+ joint_damping: dict[str, float] = {}
+ joint_frictionloss: dict[str, float] = {}
+ for act in robot.actuators:
+ if act.has_motor_model:
+ joint_damping[act.joint] = 0.0
+ joint_frictionloss[act.joint] = 0.0
+ joint_armature: dict[str, float] = {}
+ for name, jcfg in robot.joints.items():
+ if jcfg.damping is not None:
+ joint_damping[name] = jcfg.damping
+ if jcfg.armature is not None:
+ joint_armature[name] = jcfg.armature
+ if jcfg.frictionloss is not None:
+ joint_frictionloss[name] = jcfg.frictionloss
+
+ for body in root.iter("body"):
+ for jnt in body.findall("joint"):
+ name = jnt.get("name")
+ if name in joint_damping:
+ jnt.set("damping", str(joint_damping[name]))
+ if name in joint_armature:
+ jnt.set("armature", str(joint_armature[name]))
+ if name in joint_frictionloss:
+ jnt.set("frictionloss", str(joint_frictionloss[name]))
+
+ # Disable self-collision on all geoms.
+ for geom in root.iter("geom"):
+ geom.set("contype", "0")
+ geom.set("conaffinity", "0")
+
+ modified_xml = ET.tostring(root, encoding="unicode")
+ Path(tmp_path).write_text(modified_xml)
+ return mujoco.MjModel.from_xml_path(tmp_path)
+ finally:
+ Path(tmp_path).unlink(missing_ok=True)
+
+
+# -- Runner -----------------------------------------------------------
+
class MuJoCoRunner(BaseRunner[MuJoCoRunnerConfig]):
def __init__(self, env: BaseEnv, config: MuJoCoRunnerConfig):
@@ -38,201 +209,52 @@ class MuJoCoRunner(BaseRunner[MuJoCoRunnerConfig]):
@property
def num_envs(self) -> int:
return self.config.num_envs
-
+
@property
def device(self) -> torch.device:
return torch.device(self.config.device)
-
- @staticmethod
- def _load_model(robot: RobotConfig) -> mujoco.MjModel:
- """Load a URDF (or MJCF) and apply robot.yaml settings.
-
- Two-step approach required because MuJoCo's URDF parser ignores
- in the extension block:
- 1. Load the URDF → MuJoCo converts it to internal MJCF
- 2. Export the MJCF XML, inject actuators + joint overrides, reload
-
- This keeps the URDF clean (re-exportable from CAD) — all hardware
- tuning lives in robot.yaml.
- """
- abs_path = robot.urdf_path.resolve()
- model_dir = abs_path.parent
- is_urdf = abs_path.suffix.lower() == ".urdf"
-
- # MuJoCo's URDF parser strips directory prefixes from mesh filenames,
- # so we inject a block into a
- # temporary copy. The original URDF stays clean and simulator-agnostic.
- if is_urdf:
- tree = ET.parse(abs_path)
- root = tree.getroot()
- # Detect the mesh subdirectory from the first mesh filename
- meshdir = None
- for mesh_el in root.iter("mesh"):
- fn = mesh_el.get("filename", "")
- parent = str(Path(fn).parent)
- if parent and parent != ".":
- meshdir = parent
- break
- if meshdir:
- mj_ext = ET.SubElement(root, "mujoco")
- ET.SubElement(mj_ext, "compiler", attrib={
- "meshdir": meshdir,
- "balanceinertia": "true",
- })
- tmp_urdf = model_dir / "_tmp_mujoco_load.urdf"
- tree.write(str(tmp_urdf), xml_declaration=True, encoding="unicode")
- try:
- model_raw = mujoco.MjModel.from_xml_path(str(tmp_urdf))
- finally:
- tmp_urdf.unlink()
- else:
- model_raw = mujoco.MjModel.from_xml_path(str(abs_path))
-
- if not robot.actuators and not robot.joints:
- return model_raw
-
- # Step 2: Export internal MJCF, inject actuators + joint overrides, reload
- tmp_mjcf = model_dir / "_tmp_actuator_inject.xml"
- try:
- mujoco.mj_saveLastXML(str(tmp_mjcf), model_raw)
- mjcf_str = tmp_mjcf.read_text()
-
- root = ET.fromstring(mjcf_str)
-
- # ── Inject actuators ────────────────────────────────────
- if robot.actuators:
- act_elem = ET.SubElement(root, "actuator")
- for act in robot.actuators:
- attribs = {
- "name": f"{act.joint}_{act.type}",
- "joint": act.joint,
- "gear": str(act.gear),
- "ctrlrange": f"{act.ctrl_range[0]} {act.ctrl_range[1]}",
- }
-
- # Actuator dynamics: 1st-order low-pass filter on ctrl.
- # MuJoCo applies this every physics substep, which is
- # more physically accurate than an external EMA.
- # dyntype is only available on , not on
- # shortcut elements like //.
- use_general = act.filter_tau > 0
-
- if use_general:
- attribs["dyntype"] = "filter"
- attribs["dynprm"] = str(act.filter_tau)
-
- if use_general:
- attribs["gaintype"] = "fixed"
- if act.type == "position":
- attribs["biastype"] = "affine"
- attribs["gainprm"] = str(act.kp)
- attribs["biasprm"] = f"0 -{act.kp} -{act.kv}"
- elif act.type == "velocity":
- attribs["biastype"] = "affine"
- attribs["gainprm"] = str(act.kp)
- attribs["biasprm"] = f"0 0 -{act.kp}"
- else: # motor
- attribs["biastype"] = "none"
- ET.SubElement(act_elem, "general", attrib=attribs)
- else:
- if act.type == "position":
- attribs["kp"] = str(act.kp)
- if act.kv > 0:
- attribs["kv"] = str(act.kv)
- elif act.type == "velocity":
- attribs["kp"] = str(act.kp)
- ET.SubElement(act_elem, act.type, attrib=attribs)
-
- # ── Apply joint overrides from robot.yaml ───────────────
- # Merge actuator damping + explicit joint overrides
- joint_damping = {a.joint: a.damping for a in robot.actuators}
- joint_armature: dict[str, float] = {}
- joint_frictionloss: dict[str, float] = {}
- for name, jcfg in robot.joints.items():
- if jcfg.damping is not None:
- joint_damping[name] = jcfg.damping
- if jcfg.armature is not None:
- joint_armature[name] = jcfg.armature
- if jcfg.frictionloss is not None:
- joint_frictionloss[name] = jcfg.frictionloss
-
- for body in root.iter("body"):
- for jnt in body.findall("joint"):
- name = jnt.get("name")
- if name in joint_damping:
- jnt.set("damping", str(joint_damping[name]))
- if name in joint_armature:
- jnt.set("armature", str(joint_armature[name]))
- if name in joint_frictionloss:
- jnt.set("frictionloss", str(joint_frictionloss[name]))
-
- # Disable self-collision on all geoms.
- # URDF mesh convex hulls often overlap at joints (especially
- # grandparent↔grandchild bodies that MuJoCo does NOT auto-exclude),
- # causing phantom contact forces.
- for geom in root.iter("geom"):
- geom.set("contype", "0")
- geom.set("conaffinity", "0")
-
- # Joint limits use MuJoCo's default solver settings.
- # The software limit switch (zeroing ctrl at limits) + armature
- # prevent overshoot without needing ultra-stiff solref that
- # kills MJX GPU solver performance.
-
- # Step 4: Write modified MJCF and reload from file path
- # (from_xml_path resolves mesh paths relative to the file location)
- modified_xml = ET.tostring(root, encoding="unicode")
- tmp_mjcf.write_text(modified_xml)
- return mujoco.MjModel.from_xml_path(str(tmp_mjcf))
- finally:
- tmp_mjcf.unlink(missing_ok=True)
-
- @staticmethod
- def _extract_actuator_limits(model: mujoco.MjModel) -> ActuatorLimits:
- """Extract per-actuator joint-limit arrays from a loaded MjModel.
-
- Shared by MuJoCoRunner and MJXRunner so the logic isn't duplicated.
- """
- nu = model.nu
- jnt_ids = np.array([model.actuator_trnid[i, 0] for i in range(nu)])
- return ActuatorLimits(
- jnt_ids=jnt_ids,
- limited=np.array([model.jnt_limited[j] for j in jnt_ids], dtype=bool),
- lo=np.array([model.jnt_range[j, 0] for j in jnt_ids]),
- hi=np.array([model.jnt_range[j, 1] for j in jnt_ids]),
- gear_sign=np.sign(np.array([model.actuator_gear[i, 0] for i in range(nu)])),
- )
def _sim_initialize(self, config: MuJoCoRunnerConfig) -> None:
- self._model = self._load_model(self.env.robot)
+ self._model = load_mujoco_model(self.env.robot)
self._model.opt.timestep = config.dt
- self._data: list[mujoco.MjData] = [mujoco.MjData(self._model) for _ in range(config.num_envs)]
-
+ self._data: list[mujoco.MjData] = [
+ mujoco.MjData(self._model) for _ in range(config.num_envs)
+ ]
self._nq = self._model.nq
self._nv = self._model.nv
+ self._limits = ActuatorLimits(self._model)
- self._limits = self._extract_actuator_limits(self._model)
+ # Build motor model: list of (actuator_config, joint_qvel_index) for
+ # actuators that have asymmetric motor dynamics.
+ self._motor_actuators: list[tuple] = []
+ for act in self.env.robot.actuators:
+ if act.has_motor_model:
+ jnt_id = mujoco.mj_name2id(self._model, mujoco.mjtObj.mjOBJ_JOINT, act.joint)
+ qvel_idx = self._model.jnt_dofadr[jnt_id]
+ self._motor_actuators.append((act, qvel_idx))
def _sim_step(self, actions: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor]:
actions_np: np.ndarray = actions.cpu().numpy()
+ # Apply per-actuator ctrl transform (deadzone + gear compensation)
+ for act_idx, (act, _) in enumerate(self._motor_actuators):
+ for env_idx in range(self.num_envs):
+ actions_np[env_idx, act_idx] = act.transform_ctrl(
+ float(actions_np[env_idx, act_idx])
+ )
+
qpos_batch = np.zeros((self.num_envs, self._nq), dtype=np.float32)
qvel_batch = np.zeros((self.num_envs, self._nv), dtype=np.float32)
for i, data in enumerate(self._data):
data.ctrl[:] = actions_np[i]
for _ in range(self.config.substeps):
- # Software limit switch: zero out ctrl that would push
- # a joint past its position limit (like a real controller).
- lim = self._limits
- for a in range(self._model.nu):
- if lim.limited[a]:
- pos = data.qpos[lim.jnt_ids[a]]
- gs = lim.gear_sign[a]
- if pos >= lim.hi[a] and gs * data.ctrl[a] > 0:
- data.ctrl[a] = 0.0
- elif pos <= lim.lo[a] and gs * data.ctrl[a] < 0:
- data.ctrl[a] = 0.0
+ # Apply asymmetric motor forces via qfrc_applied
+ for act, qvel_idx in self._motor_actuators:
+ vel = data.qvel[qvel_idx]
+ ctrl = data.ctrl[0] # TODO: generalise for multi-actuator
+ data.qfrc_applied[qvel_idx] = act.compute_motor_force(vel, ctrl)
+ self._limits.enforce(self._model, data)
mujoco.mj_step(self._model, data)
qpos_batch[i] = data.qpos
@@ -242,7 +264,7 @@ class MuJoCoRunner(BaseRunner[MuJoCoRunnerConfig]):
torch.from_numpy(qpos_batch).to(self.device),
torch.from_numpy(qvel_batch).to(self.device),
)
-
+
def _sim_reset(self, env_ids: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor]:
ids = env_ids.cpu().numpy()
n = len(ids)
@@ -250,21 +272,13 @@ class MuJoCoRunner(BaseRunner[MuJoCoRunnerConfig]):
qpos_batch = np.zeros((n, self._nq), dtype=np.float32)
qvel_batch = np.zeros((n, self._nv), dtype=np.float32)
- default_qpos = self.env.get_default_qpos(self._nq)
-
for i, env_id in enumerate(ids):
data = self._data[env_id]
mujoco.mj_resetData(self._model, data)
- # Set initial pose (env-specific, e.g. pendulum hanging down)
- if default_qpos is not None:
- data.qpos[:] = default_qpos
-
- # Add small random perturbation for exploration
+ # Small random perturbation for exploration
data.qpos[:] += np.random.uniform(-0.05, 0.05, size=self._nq)
data.qvel[:] += np.random.uniform(-0.05, 0.05, size=self._nv)
-
- # Reset smoothed ctrl so motor starts from rest
data.ctrl[:] = 0.0
qpos_batch[i] = data.qpos
@@ -275,7 +289,7 @@ class MuJoCoRunner(BaseRunner[MuJoCoRunnerConfig]):
torch.from_numpy(qvel_batch).to(self.device),
)
- def render(self, env_idx: int = 0) -> np.ndarray:
+ def _render_frame(self, env_idx: int = 0) -> np.ndarray:
"""Offscreen render of a single environment."""
if not hasattr(self, "_offscreen_renderer"):
self._offscreen_renderer = mujoco.Renderer(
@@ -283,43 +297,4 @@ class MuJoCoRunner(BaseRunner[MuJoCoRunnerConfig]):
)
mujoco.mj_forward(self._model, self._data[env_idx])
self._offscreen_renderer.update_scene(self._data[env_idx])
- frame = self._offscreen_renderer.render().copy()
-
- # Draw action bar overlay — shows ctrl[0] as a horizontal bar
- ctrl_val = float(self._data[env_idx].ctrl[0]) if self._model.nu > 0 else 0.0
- _draw_action_overlay(frame, ctrl_val)
- return frame
-
-
-def _draw_action_overlay(frame: np.ndarray, action: float) -> None:
- """Draw an action bar + text on a rendered frame (no OpenCV needed).
-
- Bar is centered horizontally: green to the right (+), red to the left (-).
- """
- h, w = frame.shape[:2]
-
- # Bar geometry
- bar_y = h - 30
- bar_h = 16
- bar_x_center = w // 2
- bar_half_w = w // 4 # max half-width of the bar
- bar_x_left = bar_x_center - bar_half_w
- bar_x_right = bar_x_center + bar_half_w
-
- # Background (dark grey)
- frame[bar_y:bar_y + bar_h, bar_x_left:bar_x_right] = [40, 40, 40]
-
- # Filled bar
- fill_len = int(abs(action) * bar_half_w)
- if action > 0:
- color = [60, 200, 60] # green
- x0 = bar_x_center
- x1 = min(bar_x_center + fill_len, bar_x_right)
- else:
- color = [200, 60, 60] # red
- x1 = bar_x_center
- x0 = max(bar_x_center - fill_len, bar_x_left)
- frame[bar_y:bar_y + bar_h, x0:x1] = color
-
- # Center tick mark (white)
- frame[bar_y:bar_y + bar_h, bar_x_center - 1:bar_x_center + 1] = [255, 255, 255]
\ No newline at end of file
+ return self._offscreen_renderer.render().copy()
diff --git a/src/runners/serial.py b/src/runners/serial.py
index 1c38725..396881a 100644
--- a/src/runners/serial.py
+++ b/src/runners/serial.py
@@ -11,11 +11,15 @@ Serial protocol (ESP32 firmware):
H — stop streaming
M — set motor PWM speed (-255 … 255)
- State lines received FROM the ESP32:
- S,,,,,,
- ,,,,
- ,
- (12 comma-separated fields after the ``S`` prefix)
+ State lines received FROM the ESP32 (firmware sends SI units):
+ S,,,,,,
+ (7 comma-separated fields after the ``S`` prefix)
+
+ motor_rad — motor joint angle (radians)
+ motor_vel — motor joint velocity (rad/s)
+ pend_rad — pendulum angle (radians, 0 = hanging down)
+ pend_vel — pendulum angular velocity (rad/s)
+ motor_speed — applied PWM (-255..255, for action recording)
A daemon thread continuously reads the serial stream so the control
loop never blocks on I/O.
@@ -28,7 +32,6 @@ from __future__ import annotations
import dataclasses
import logging
-import math
import threading
import time
from typing import Any
@@ -36,8 +39,6 @@ from typing import Any
import numpy as np
import torch
-from src.core.env import BaseEnv
-from src.core.hardware import HardwareConfig, load_hardware_config
from src.core.runner import BaseRunner, BaseRunnerConfig
logger = logging.getLogger(__name__)
@@ -52,9 +53,14 @@ class SerialRunnerConfig(BaseRunnerConfig):
port: str = "/dev/cu.usbserial-0001"
baud: int = 115200
- dt: float = 0.02 # control loop period (seconds), 50 Hz
+ dt: float = 0.04 # control loop period (seconds), 25 Hz
no_data_timeout: float = 2.0 # seconds of silence → disconnect
- encoder_jump_threshold: int = 200 # encoder tick jump → reboot
+
+ # Physical reset procedure
+ reset_drive_speed: int = 80 # PWM for bang-bang drive-to-center
+ reset_deadband_rad: float = 0.01 # "centered" threshold (~0.6°)
+ reset_drive_timeout: float = 3.0 # seconds to reach center
+ reset_settle_timeout: float = 30.0 # seconds to wait for pendulum
class SerialRunner(BaseRunner[SerialRunnerConfig]):
@@ -80,22 +86,6 @@ class SerialRunner(BaseRunner[SerialRunnerConfig]):
return torch.device("cpu")
def _sim_initialize(self, config: SerialRunnerConfig) -> None:
- # Load hardware description (encoder, safety, reset params).
- hw = load_hardware_config(self.env.config.robot_path)
- if hw is None:
- raise FileNotFoundError(
- f"hardware.yaml not found in {self.env.config.robot_path}. "
- "The serial runner requires a hardware config for encoder, "
- "safety, and reset parameters."
- )
- self._hw: HardwareConfig = hw
- self._counts_per_rev: float = hw.encoder.counts_per_rev
- self._max_motor_angle_rad: float = (
- math.radians(hw.safety.max_motor_angle_deg)
- if hw.safety.max_motor_angle_deg > 0
- else 0.0
- )
-
# Joint dimensions for the rotary cartpole (motor + pendulum).
self._nq = 2
self._nv = 2
@@ -105,8 +95,16 @@ class SerialRunner(BaseRunner[SerialRunnerConfig]):
self._serial_mod = _serial
+ # Explicitly disable hardware flow control and exclusive mode to
+ # avoid termios.error (errno 22) on macOS with CH340/CP2102 adapters.
self.ser: _serial.Serial = _serial.Serial(
- config.port, config.baud, timeout=0.05
+ port=config.port,
+ baudrate=config.baud,
+ timeout=0.05,
+ xonxoff=False,
+ rtscts=False,
+ dsrdtr=False,
+ exclusive=False,
)
time.sleep(2) # Wait for ESP32 boot.
self.ser.reset_input_buffer()
@@ -116,22 +114,17 @@ class SerialRunner(BaseRunner[SerialRunnerConfig]):
self._serial_disconnected: bool = False
self._last_esp_ms: int = 0
self._last_data_time: float = time.monotonic()
- self._last_encoder_count: int = 0
self._streaming: bool = False
# Latest parsed state (updated by the reader thread).
+ # Firmware sends SI units — values are used directly as qpos/qvel.
self._latest_state: dict[str, Any] = {
"timestamp_ms": 0,
- "encoder_count": 0,
- "rpm": 0.0,
+ "motor_rad": 0.0,
+ "motor_vel": 0.0,
+ "pend_rad": 0.0,
+ "pend_vel": 0.0,
"motor_speed": 0,
- "at_limit": False,
- "pendulum_angle": 0.0,
- "pendulum_velocity": 0.0,
- "target_speed": 0,
- "braking": False,
- "enc_vel_cps": 0.0,
- "pendulum_ok": False,
}
self._state_lock = threading.Lock()
self._state_event = threading.Event()
@@ -148,6 +141,11 @@ class SerialRunner(BaseRunner[SerialRunnerConfig]):
self._streaming = True
self._last_data_time = time.monotonic()
+ # Derive max PWM from actuator ctrl_range so the serial
+ # command range matches what MuJoCo training sees.
+ ctrl_hi = self.env.robot.actuators[0].ctrl_range[1]
+ self._max_pwm: int = round(ctrl_hi * 255)
+
# Track wall-clock time of last step for PPO-gap detection.
self._last_step_time: float = time.monotonic()
@@ -170,41 +168,22 @@ class SerialRunner(BaseRunner[SerialRunnerConfig]):
self._sim_reset(all_ids)
self.step_counts.zero_()
- step_start = time.monotonic()
-
- # Map normalised action [-1, 1] → PWM [-255, 255].
+ # Map normalised action [-1, 1] → PWM, scaled by ctrl_range.
action_val = float(actions[0, 0].clamp(-1.0, 1.0))
- motor_speed = int(action_val * 255)
+ motor_speed = int(action_val * self._max_pwm)
self._send(f"M{motor_speed}")
- # Enforce dt wall-clock timing.
- elapsed = time.monotonic() - step_start
- remaining = self.config.dt - elapsed
- if remaining > 0:
- time.sleep(remaining)
+ # Stream-driven: block until the firmware sends the next state
+ # line (~20 ms at 50 Hz).
+ state = self._read_state_blocking(timeout=0.1)
- # Read latest sensor data (non-blocking — dt sleep ensures freshness).
- state = self._read_state()
+ # Firmware sends SI units — use directly.
+ qpos, qvel = self._state_to_tensors(state)
- motor_angle = (
- state["encoder_count"] / self._counts_per_rev * 2.0 * math.pi
- )
- motor_vel = (
- state["enc_vel_cps"] / self._counts_per_rev * 2.0 * math.pi
- )
- pendulum_angle = math.radians(state["pendulum_angle"])
- pendulum_vel = math.radians(state["pendulum_velocity"])
-
- # Cache motor angle for safety check in step() — avoids a second read.
- self._last_motor_angle_rad = motor_angle
+ # Cache for _sync_viz().
+ self._last_sync_state = state
self._last_step_time = time.monotonic()
- qpos = torch.tensor(
- [[motor_angle, pendulum_angle]], dtype=torch.float32
- )
- qvel = torch.tensor(
- [[motor_vel, pendulum_vel]], dtype=torch.float32
- )
return qpos, qvel
def _sim_reset(
@@ -232,59 +211,42 @@ class SerialRunner(BaseRunner[SerialRunnerConfig]):
# Physically return the motor to the centre position.
self._drive_to_center()
- # Wait until the pendulum settles.
- self._wait_for_pendulum_still()
+ # Wait until the env considers the robot settled.
+ self._wait_for_settle()
# Refresh data timer so health checks don't false-positive.
self._last_data_time = time.monotonic()
# Read settled state and return as qpos/qvel.
state = self._read_state_blocking()
- motor_angle = (
- state["encoder_count"] / self._counts_per_rev * 2.0 * math.pi
- )
- motor_vel = (
- state["enc_vel_cps"] / self._counts_per_rev * 2.0 * math.pi
- )
- pendulum_angle = math.radians(state["pendulum_angle"])
- pendulum_vel = math.radians(state["pendulum_velocity"])
+ qpos, qvel = self._state_to_tensors(state)
- qpos = torch.tensor(
- [[motor_angle, pendulum_angle]], dtype=torch.float32
- )
- qvel = torch.tensor(
- [[motor_vel, pendulum_vel]], dtype=torch.float32
- )
+ self._last_sync_state = state
return qpos, qvel
def _sim_close(self) -> None:
self._reader_running = False
self._streaming = False
- self._send("H") # Stop streaming.
- self._send("M0") # Stop motor.
+ self._send("H")
+ self._send("M0")
time.sleep(0.1)
self._reader_thread.join(timeout=1.0)
self.ser.close()
- if hasattr(self, "_offscreen_renderer") and self._offscreen_renderer is not None:
- self._offscreen_renderer.close()
+ super()._sim_close()
# ------------------------------------------------------------------
# MuJoCo digital-twin rendering
# ------------------------------------------------------------------
def _ensure_viz_model(self) -> None:
- """Lazily load the MuJoCo model for visualisation (digital twin).
-
- Reuses the same URDF + robot.yaml that the MuJoCoRunner would use,
- but only for rendering — no physics stepping.
- """
+ """Lazily load the MuJoCo model for visualisation (digital twin)."""
if hasattr(self, "_viz_model"):
return
import mujoco
- from src.runners.mujoco import MuJoCoRunner
+ from src.runners.mujoco import load_mujoco_model
- self._viz_model = MuJoCoRunner._load_model(self.env.robot)
+ self._viz_model = load_mujoco_model(self.env.robot)
self._viz_data = mujoco.MjData(self._viz_model)
self._offscreen_renderer = None
@@ -293,32 +255,21 @@ class SerialRunner(BaseRunner[SerialRunnerConfig]):
import mujoco
self._ensure_viz_model()
- state = self._read_state()
- # Set joint positions from serial data.
- motor_angle = (
- state["encoder_count"] / self._counts_per_rev * 2.0 * math.pi
- )
- pendulum_angle = math.radians(state["pendulum_angle"])
- self._viz_data.qpos[0] = motor_angle
- self._viz_data.qpos[1] = pendulum_angle
+ last_state = getattr(self, "_last_sync_state", None)
+ if last_state is None:
+ last_state = self._read_state()
- # Set joint velocities (for any velocity-dependent visuals).
- motor_vel = (
- state["enc_vel_cps"] / self._counts_per_rev * 2.0 * math.pi
- )
- pendulum_vel = math.radians(state["pendulum_velocity"])
- self._viz_data.qvel[0] = motor_vel
- self._viz_data.qvel[1] = pendulum_vel
+ # Firmware sends radians — use directly.
+ self._viz_data.qpos[0] = last_state["motor_rad"]
+ self._viz_data.qpos[1] = last_state["pend_rad"]
+ self._viz_data.qvel[0] = last_state["motor_vel"]
+ self._viz_data.qvel[1] = last_state["pend_vel"]
- # Forward kinematics (updates body positions for rendering).
mujoco.mj_forward(self._viz_model, self._viz_data)
- def render(self, env_idx: int = 0) -> np.ndarray:
- """Offscreen render of the digital-twin MuJoCo model.
-
- Called by VideoRecordingTrainer during training to capture frames.
- """
+ def _render_frame(self, env_idx: int = 0) -> np.ndarray:
+ """Offscreen render of the digital-twin MuJoCo model."""
import mujoco
self._sync_viz()
@@ -340,7 +291,6 @@ class SerialRunner(BaseRunner[SerialRunnerConfig]):
# Check for ESP32 reboot / disconnect BEFORE stepping.
if self._rebooted or self._serial_disconnected:
self._send("M0")
- # Return a terminal observation with penalty.
qpos, qvel = self._make_current_state()
state = self.env.build_state(qpos, qvel)
obs = self.env.compute_observations(state)
@@ -359,16 +309,6 @@ class SerialRunner(BaseRunner[SerialRunnerConfig]):
rewards = torch.tensor([[-100.0]])
info["reboot_detected"] = True
- # Check motor angle against hard safety limit.
- # Uses the cached value from _sim_step — no extra serial read.
- if self._max_motor_angle_rad > 0:
- motor_angle = abs(getattr(self, "_last_motor_angle_rad", 0.0))
- if motor_angle >= self._max_motor_angle_rad:
- self._send("M0")
- terminated = torch.tensor([[True]])
- rewards = torch.tensor([[-100.0]])
- info["motor_limit_exceeded"] = True
-
# Always stop motor on episode end.
if terminated.any() or truncated.any():
self._send("M0")
@@ -387,7 +327,11 @@ class SerialRunner(BaseRunner[SerialRunnerConfig]):
self._serial_disconnected = True
def _serial_reader(self) -> None:
- """Background thread: continuously read and parse serial lines."""
+ """Background thread: continuously read and parse serial lines.
+
+ Protocol: ``S,,,,,,``
+ (7 comma-separated fields). Firmware sends SI units directly.
+ """
while self._reader_running:
try:
if self.ser.in_waiting:
@@ -405,9 +349,15 @@ class SerialRunner(BaseRunner[SerialRunnerConfig]):
if line.startswith("S,"):
parts = line.split(",")
- if len(parts) >= 12:
- esp_ms = int(parts[1])
- enc = int(parts[2])
+ if len(parts) >= 7:
+ try:
+ esp_ms = int(parts[1])
+ except ValueError:
+ logger.debug(
+ "Malformed state line (header): %s",
+ line,
+ )
+ continue
# Detect reboot: timestamp jumped backwards.
if (
@@ -422,38 +372,24 @@ class SerialRunner(BaseRunner[SerialRunnerConfig]):
esp_ms,
)
- # Detect reboot: encoder snapped to 0 from
- # a far position.
- if (
- abs(self._last_encoder_count)
- > self.config.encoder_jump_threshold
- and abs(enc) < 5
- ):
- self._rebooted = True
- logger.critical(
- "ESP32 reboot detected: encoder"
- " jumped %d -> %d",
- self._last_encoder_count,
- enc,
- )
-
self._last_esp_ms = esp_ms
- self._last_encoder_count = enc
self._last_data_time = time.monotonic()
- parsed: dict[str, Any] = {
- "timestamp_ms": esp_ms,
- "encoder_count": enc,
- "rpm": float(parts[3]),
- "motor_speed": int(parts[4]),
- "at_limit": bool(int(parts[5])),
- "pendulum_angle": float(parts[6]),
- "pendulum_velocity": float(parts[7]),
- "target_speed": int(parts[8]),
- "braking": bool(int(parts[9])),
- "enc_vel_cps": float(parts[10]),
- "pendulum_ok": bool(int(parts[11])),
- }
+ try:
+ parsed: dict[str, Any] = {
+ "timestamp_ms": esp_ms,
+ "motor_rad": float(parts[2]),
+ "motor_vel": float(parts[3]),
+ "pend_rad": float(parts[4]),
+ "pend_vel": float(parts[5]),
+ "motor_speed": int(parts[6]),
+ }
+ except ValueError:
+ logger.debug(
+ "Malformed state line (fields): %s",
+ line,
+ )
+ continue
with self._state_lock:
self._latest_state = parsed
self._state_event.set()
@@ -483,81 +419,68 @@ class SerialRunner(BaseRunner[SerialRunnerConfig]):
return True
def _read_state(self) -> dict[str, Any]:
- """Return the most recent state from the reader thread (non-blocking).
-
- The background thread updates at ~50 Hz and `_sim_step` already
- sleeps for `dt` before calling this, so the data is always fresh.
- """
+ """Return the most recent state from the reader thread (non-blocking)."""
with self._state_lock:
return dict(self._latest_state)
def _read_state_blocking(self, timeout: float = 0.05) -> dict[str, Any]:
- """Wait for a fresh sample, then return it.
-
- Used during reset / settling where we need to guarantee we have
- a new reading (no prior dt sleep).
- """
+ """Wait for a fresh sample, then return it."""
self._state_event.clear()
self._state_event.wait(timeout=timeout)
with self._state_lock:
return dict(self._latest_state)
- def _make_current_state(self) -> tuple[torch.Tensor, torch.Tensor]:
- """Build qpos/qvel from current sensor data (utility)."""
- state = self._read_state_blocking()
- motor_angle = (
- state["encoder_count"] / self._counts_per_rev * 2.0 * math.pi
- )
- motor_vel = (
- state["enc_vel_cps"] / self._counts_per_rev * 2.0 * math.pi
- )
- pendulum_angle = math.radians(state["pendulum_angle"])
- pendulum_vel = math.radians(state["pendulum_velocity"])
-
+ def _state_to_tensors(
+ self, state: dict[str, Any],
+ ) -> tuple[torch.Tensor, torch.Tensor]:
+ """Convert a parsed state dict to (qpos, qvel) tensors."""
qpos = torch.tensor(
- [[motor_angle, pendulum_angle]], dtype=torch.float32
+ [[state["motor_rad"], state["pend_rad"]]], dtype=torch.float32
)
qvel = torch.tensor(
- [[motor_vel, pendulum_vel]], dtype=torch.float32
+ [[state["motor_vel"], state["pend_vel"]]], dtype=torch.float32
)
return qpos, qvel
+ def _make_current_state(self) -> tuple[torch.Tensor, torch.Tensor]:
+ """Build qpos/qvel from current sensor data (utility)."""
+ return self._state_to_tensors(self._read_state_blocking())
+
# ------------------------------------------------------------------
# Physical reset helpers
# ------------------------------------------------------------------
def _drive_to_center(self) -> None:
- """Drive the motor back toward encoder=0 using bang-bang control."""
- rc = self._hw.reset
+ """Drive the motor back toward center using bang-bang control."""
+ cfg = self.config
start = time.time()
- while time.time() - start < rc.drive_timeout:
+ while time.time() - start < cfg.reset_drive_timeout:
state = self._read_state_blocking()
- enc = state["encoder_count"]
- if abs(enc) < rc.deadband:
+ motor_rad = state["motor_rad"]
+ if abs(motor_rad) < cfg.reset_deadband_rad:
break
- speed = rc.drive_speed if enc < 0 else -rc.drive_speed
+ speed = cfg.reset_drive_speed if motor_rad < 0 else -cfg.reset_drive_speed
self._send(f"M{speed}")
time.sleep(0.05)
self._send("M0")
time.sleep(0.2)
- def _wait_for_pendulum_still(self) -> None:
- """Block until the pendulum has settled (angle and velocity near zero)."""
- rc = self._hw.reset
+ def _wait_for_settle(self) -> None:
+ """Block until the env considers the robot ready for a new episode."""
+ cfg = self.config
stable_since: float | None = None
start = time.monotonic()
- while time.monotonic() - start < rc.settle_timeout:
+ while time.monotonic() - start < cfg.reset_settle_timeout:
state = self._read_state_blocking()
- angle_ok = abs(state["pendulum_angle"]) < rc.settle_angle_deg
- vel_ok = abs(state["pendulum_velocity"]) < rc.settle_vel_dps
+ qpos, qvel = self._state_to_tensors(state)
- if angle_ok and vel_ok:
+ if self.env.is_reset_ready(qpos, qvel):
if stable_since is None:
stable_since = time.monotonic()
- elif time.monotonic() - stable_since >= rc.settle_duration:
+ elif time.monotonic() - stable_since >= 0.5:
logger.info(
- "Pendulum settled after %.2f s",
+ "Robot settled after %.2f s",
time.monotonic() - start,
)
return
@@ -566,6 +489,6 @@ class SerialRunner(BaseRunner[SerialRunnerConfig]):
time.sleep(0.02)
logger.warning(
- "Pendulum did not fully settle within %.1f s — proceeding anyway.",
- rc.settle_timeout,
+ "Robot did not settle within %.1f s — proceeding anyway.",
+ cfg.reset_settle_timeout,
)
diff --git a/src/sysid/_urdf.py b/src/sysid/_urdf.py
new file mode 100644
index 0000000..d703aca
--- /dev/null
+++ b/src/sysid/_urdf.py
@@ -0,0 +1,79 @@
+"""URDF XML helpers shared by sysid rollout and export modules."""
+
+from __future__ import annotations
+
+import xml.etree.ElementTree as ET
+
+
+def set_mass(inertial: ET.Element, mass: float | None) -> None:
+ if mass is None:
+ return
+ mass_el = inertial.find("mass")
+ if mass_el is not None:
+ mass_el.set("value", str(mass))
+
+
+def set_com(
+ inertial: ET.Element,
+ x: float | None,
+ y: float | None,
+ z: float | None,
+) -> None:
+ origin = inertial.find("origin")
+ if origin is None:
+ return
+ xyz = origin.get("xyz", "0 0 0").split()
+ if x is not None:
+ xyz[0] = str(x)
+ if y is not None:
+ xyz[1] = str(y)
+ if z is not None:
+ xyz[2] = str(z)
+ origin.set("xyz", " ".join(xyz))
+
+
+def set_inertia(
+ inertial: ET.Element,
+ ixx: float | None = None,
+ iyy: float | None = None,
+ izz: float | None = None,
+ ixy: float | None = None,
+ iyz: float | None = None,
+ ixz: float | None = None,
+) -> None:
+ ine = inertial.find("inertia")
+ if ine is None:
+ return
+ for attr, val in [
+ ("ixx", ixx), ("iyy", iyy), ("izz", izz),
+ ("ixy", ixy), ("iyz", iyz), ("ixz", ixz),
+ ]:
+ if val is not None:
+ ine.set(attr, str(val))
+
+
+def patch_link_inertials(
+ root: ET.Element,
+ params: dict[str, float],
+) -> None:
+ """Patch arm and pendulum inertial parameters in a URDF ElementTree root."""
+ for link in root.iter("link"):
+ link_name = link.get("name", "")
+ inertial = link.find("inertial")
+ if inertial is None:
+ continue
+
+ if link_name == "arm":
+ set_mass(inertial, params.get("arm_mass"))
+ set_com(inertial, params.get("arm_com_x"),
+ params.get("arm_com_y"), params.get("arm_com_z"))
+
+ elif link_name == "pendulum":
+ set_mass(inertial, params.get("pendulum_mass"))
+ set_com(inertial, params.get("pendulum_com_x"),
+ params.get("pendulum_com_y"), params.get("pendulum_com_z"))
+ set_inertia(inertial,
+ ixx=params.get("pendulum_ixx"),
+ iyy=params.get("pendulum_iyy"),
+ izz=params.get("pendulum_izz"),
+ ixy=params.get("pendulum_ixy"))
diff --git a/src/sysid/capture.py b/src/sysid/capture.py
index 9c54ea4..f202c74 100644
--- a/src/sysid/capture.py
+++ b/src/sysid/capture.py
@@ -6,6 +6,10 @@ the system, and records motor + pendulum angles and velocities at ~50 Hz.
Saves a compressed numpy archive (.npz) that the optimizer can replay
in simulation to fit physics parameters.
+Serial protocol (same as SerialRunner):
+ S,,,,,,
+ (7 comma-separated fields — firmware sends SI units)
+
Usage:
python -m src.sysid.capture \
--robot-path assets/rotary_cartpole \
@@ -17,7 +21,6 @@ from __future__ import annotations
import argparse
import math
-import os
import random
import threading
import time
@@ -27,7 +30,6 @@ from typing import Any
import numpy as np
import structlog
-import yaml
log = structlog.get_logger()
@@ -36,25 +38,24 @@ log = structlog.get_logger()
def _parse_state_line(line: str) -> dict[str, Any] | None:
- """Parse an ``S,…`` state line from the ESP32."""
+ """Parse an ``S,…`` state line from the ESP32.
+
+ Format: S,,,,,,
+ (7 comma-separated fields, firmware sends SI units)
+ """
if not line.startswith("S,"):
return None
parts = line.split(",")
- if len(parts) < 12:
+ if len(parts) < 7:
return None
try:
return {
"timestamp_ms": int(parts[1]),
- "encoder_count": int(parts[2]),
- "rpm": float(parts[3]),
- "motor_speed": int(parts[4]),
- "at_limit": bool(int(parts[5])),
- "pendulum_angle": float(parts[6]),
- "pendulum_velocity": float(parts[7]),
- "target_speed": int(parts[8]),
- "braking": bool(int(parts[9])),
- "enc_vel_cps": float(parts[10]),
- "pendulum_ok": bool(int(parts[11])),
+ "motor_rad": float(parts[2]),
+ "motor_vel": float(parts[3]),
+ "pend_rad": float(parts[4]),
+ "pend_vel": float(parts[5]),
+ "motor_speed": int(parts[6]),
}
except (ValueError, IndexError):
return None
@@ -64,7 +65,12 @@ def _parse_state_line(line: str) -> dict[str, Any] | None:
class _SerialReader:
- """Minimal background reader for the ESP32 serial stream."""
+ """Minimal background reader for the ESP32 serial stream.
+
+ Uses a sequence counter so ``read_blocking()`` guarantees it returns
+ a *new* state line (not a stale repeat). This keeps the capture
+ loop locked to the firmware's 50 Hz tick.
+ """
def __init__(self, port: str, baud: int = 115200):
import serial as _serial
@@ -75,14 +81,16 @@ class _SerialReader:
self.ser.reset_input_buffer()
self._latest: dict[str, Any] = {}
+ self._seq: int = 0 # incremented on every new state line
self._lock = threading.Lock()
- self._event = threading.Event()
+ self._cond = threading.Condition(self._lock)
self._running = True
self._thread = threading.Thread(target=self._reader_loop, daemon=True)
self._thread.start()
def _reader_loop(self) -> None:
+ _debug_count = 0
while self._running:
try:
if self.ser.in_waiting:
@@ -91,11 +99,16 @@ class _SerialReader:
.decode("utf-8", errors="ignore")
.strip()
)
+ # Debug: log first 10 raw lines so we can see what the firmware sends.
+ if _debug_count < 10 and line:
+ log.info("serial_raw_line", line=repr(line), count=_debug_count)
+ _debug_count += 1
parsed = _parse_state_line(line)
if parsed is not None:
- with self._lock:
+ with self._cond:
self._latest = parsed
- self._event.set()
+ self._seq += 1
+ self._cond.notify_all()
else:
time.sleep(0.001)
except (OSError, self._serial_mod.SerialException):
@@ -108,14 +121,19 @@ class _SerialReader:
except (OSError, self._serial_mod.SerialException):
log.critical("serial_send_failed", cmd=cmd)
- def read(self) -> dict[str, Any]:
- with self._lock:
- return dict(self._latest)
-
def read_blocking(self, timeout: float = 0.1) -> dict[str, Any]:
- self._event.clear()
- self._event.wait(timeout=timeout)
- return self.read()
+ """Wait until a *new* state line arrives, then return it.
+
+ Uses a sequence counter to guarantee the returned state is
+ different from whatever was available before this call.
+ """
+ with self._cond:
+ seq_before = self._seq
+ if not self._cond.wait_for(
+ lambda: self._seq > seq_before, timeout=timeout
+ ):
+ return {} # timeout — no new data
+ return dict(self._latest)
def close(self) -> None:
self._running = False
@@ -139,7 +157,7 @@ class _PRBSExcitation:
def __init__(
self,
- amplitude: int = 180,
+ amplitude: int = 150,
hold_min_ms: int = 50,
hold_max_ms: int = 300,
):
@@ -169,42 +187,39 @@ def capture(
port: str = "/dev/cu.usbserial-0001",
baud: int = 115200,
duration: float = 20.0,
- amplitude: int = 180,
+ amplitude: int = 150,
hold_min_ms: int = 50,
hold_max_ms: int = 300,
dt: float = 0.02,
+ motor_angle_limit_deg: float = 90.0,
) -> Path:
"""Run the capture procedure and return the path to the saved .npz file.
+ The capture loop is **stream-driven**: it blocks on each incoming
+ state line from the firmware (which arrives at 50 Hz), sends the
+ next motor command immediately, and records both.
+
Parameters
----------
- robot_path : path to robot asset directory (contains hardware.yaml)
+ robot_path : path to robot asset directory
port : serial port for ESP32
baud : baud rate
duration : capture duration in seconds
- amplitude : max PWM magnitude for excitation (0–255)
+ amplitude : max PWM magnitude for excitation
hold_min_ms / hold_max_ms : random hold time range (ms)
- dt : target sampling period (seconds), default 50 Hz
+ dt : nominal sample period for buffer sizing (seconds)
+ motor_angle_limit_deg : safety limit for motor angle
"""
robot_path = Path(robot_path).resolve()
- # Load hardware config for encoder conversion + safety.
- hw_yaml = robot_path / "hardware.yaml"
- if not hw_yaml.exists():
- raise FileNotFoundError(f"hardware.yaml not found in {robot_path}")
- raw_hw = yaml.safe_load(hw_yaml.read_text())
- ppr = raw_hw.get("encoder", {}).get("ppr", 11)
- gear_ratio = raw_hw.get("encoder", {}).get("gear_ratio", 30.0)
- counts_per_rev: float = ppr * gear_ratio * 4.0
- max_motor_deg = raw_hw.get("safety", {}).get("max_motor_angle_deg", 90.0)
- max_motor_rad = math.radians(max_motor_deg) if max_motor_deg > 0 else 0.0
+ max_motor_rad = math.radians(motor_angle_limit_deg) if motor_angle_limit_deg > 0 else 0.0
# Connect.
reader = _SerialReader(port, baud)
excitation = _PRBSExcitation(amplitude, hold_min_ms, hold_max_ms)
- # Prepare recording buffers.
- max_samples = int(duration / dt) + 500 # headroom
+ # Prepare recording buffers (generous headroom).
+ max_samples = int(duration / dt) + 500
rec_time = np.zeros(max_samples, dtype=np.float64)
rec_action = np.zeros(max_samples, dtype=np.float64)
rec_motor_angle = np.zeros(max_samples, dtype=np.float64)
@@ -222,63 +237,96 @@ def capture(
duration=duration,
amplitude=amplitude,
hold_range_ms=f"{hold_min_ms}–{hold_max_ms}",
- dt=dt,
+ mode="stream-driven (firmware clock)",
)
idx = 0
+ pwm = 0
+ last_esp_ms = -1 # firmware timestamp of last recorded sample
+ no_data_count = 0 # consecutive timeouts with no data
t0 = time.monotonic()
try:
while True:
- loop_start = time.monotonic()
- elapsed = loop_start - t0
+ # Block until the firmware sends the next state line (~20 ms).
+ # Timeout at 100 ms prevents hanging if the ESP32 disconnects.
+ state = reader.read_blocking(timeout=0.1)
+ if not state:
+ no_data_count += 1
+ if no_data_count == 30: # 3 seconds with no data
+ log.warning(
+ "no_data_received",
+ msg="No state lines from firmware after 3s. "
+ "Check: is the ESP32 powered? Is it running the right firmware? "
+ "Try pressing the RESET button.",
+ )
+ if no_data_count == 100: # 10 seconds
+ log.critical(
+ "no_data_timeout",
+ msg="No data for 10s — aborting capture.",
+ )
+ break
+ continue # no data yet — retry
+ no_data_count = 0
+
+ # Deduplicate: the firmware may send multiple state lines per
+ # tick (e.g. M-command echo + tick). Only record one sample
+ # per unique firmware timestamp.
+ esp_ms = state.get("timestamp_ms", 0)
+ if esp_ms == last_esp_ms:
+ continue
+ last_esp_ms = esp_ms
+
+ elapsed = time.monotonic() - t0
if elapsed >= duration:
break
- # Get excitation PWM.
+ # Get excitation PWM for the NEXT tick.
pwm = excitation()
- # Safety: reverse/zero if near motor limit.
- state = reader.read()
- if state:
- motor_angle_rad = (
- state.get("encoder_count", 0) / counts_per_rev * 2.0 * math.pi
- )
- if max_motor_rad > 0:
- margin = max_motor_rad * 0.85 # start braking at 85%
- if motor_angle_rad > margin and pwm > 0:
- pwm = -abs(pwm) # reverse
- elif motor_angle_rad < -margin and pwm < 0:
- pwm = abs(pwm) # reverse
+ # Safety: keep the arm well within its mechanical range.
+ # Firmware sends motor angle in radians — use directly.
+ motor_angle_rad = state.get("motor_rad", 0.0)
+ if max_motor_rad > 0:
+ ratio = motor_angle_rad / max_motor_rad # signed, -1..+1
+ abs_ratio = abs(ratio)
- # Send command.
+ if abs_ratio > 0.90:
+ # Deep in the danger zone — force a strong return.
+ brake_strength = min(1.0, (abs_ratio - 0.90) / 0.10) # 0→1
+ brake_pwm = int(amplitude * (0.5 + 0.5 * brake_strength))
+ pwm = -brake_pwm if ratio > 0 else brake_pwm
+ elif abs_ratio > 0.70:
+ # Soft zone — only allow actions pointing back to centre.
+ if ratio > 0 and pwm > 0:
+ pwm = -abs(pwm)
+ elif ratio < 0 and pwm < 0:
+ pwm = abs(pwm)
+
+ # Send command immediately — it will take effect on the next tick.
reader.send(f"M{pwm}")
- # Wait for fresh data.
- time.sleep(max(0, dt - (time.monotonic() - loop_start) - 0.005))
- state = reader.read_blocking(timeout=dt)
+ # Record this tick's state + the action the motor *actually*
+ # received. Firmware sends SI units — use directly.
+ motor_angle = state.get("motor_rad", 0.0)
+ motor_vel = state.get("motor_vel", 0.0)
+ pend_angle = state.get("pend_rad", 0.0)
+ pend_vel = state.get("pend_vel", 0.0)
+ # Firmware constrains to ±255; normalise to [-1, 1].
+ applied = state.get("motor_speed", 0)
+ action_norm = max(-255, min(255, applied)) / 255.0
- if state:
- enc = state.get("encoder_count", 0)
- motor_angle = enc / counts_per_rev * 2.0 * math.pi
- motor_vel = (
- state.get("enc_vel_cps", 0.0) / counts_per_rev * 2.0 * math.pi
- )
- pend_angle = math.radians(state.get("pendulum_angle", 0.0))
- pend_vel = math.radians(state.get("pendulum_velocity", 0.0))
+ if idx < max_samples:
+ rec_time[idx] = elapsed
+ rec_action[idx] = action_norm
+ rec_motor_angle[idx] = motor_angle
+ rec_motor_vel[idx] = motor_vel
+ rec_pend_angle[idx] = pend_angle
+ rec_pend_vel[idx] = pend_vel
+ idx += 1
+ else:
+ break # buffer full
- # Normalised action: PWM / 255 → [-1, 1]
- action_norm = pwm / 255.0
-
- if idx < max_samples:
- rec_time[idx] = elapsed
- rec_action[idx] = action_norm
- rec_motor_angle[idx] = motor_angle
- rec_motor_vel[idx] = motor_vel
- rec_pend_angle[idx] = pend_angle
- rec_pend_vel[idx] = pend_vel
- idx += 1
-
- # Progress.
+ # Progress (every 50 samples ≈ once per second at 50 Hz).
if idx % 50 == 0:
log.info(
"capture_progress",
@@ -287,11 +335,6 @@ def capture(
pwm=pwm,
)
- # Pace to dt.
- remaining = dt - (time.monotonic() - loop_start)
- if remaining > 0:
- time.sleep(remaining)
-
finally:
reader.send("M0")
reader.close()
@@ -339,7 +382,7 @@ def main() -> None:
"--robot-path",
type=str,
default="assets/rotary_cartpole",
- help="Path to robot asset directory (contains hardware.yaml)",
+ help="Path to robot asset directory",
)
parser.add_argument(
"--port",
@@ -352,7 +395,8 @@ def main() -> None:
"--duration", type=float, default=20.0, help="Capture duration (s)"
)
parser.add_argument(
- "--amplitude", type=int, default=180, help="Max PWM magnitude (0–255)"
+ "--amplitude", type=int, default=150,
+ help="Max PWM magnitude (should not exceed firmware MAX_MOTOR_SPEED=150)",
)
parser.add_argument(
"--hold-min-ms", type=int, default=50, help="Min hold time (ms)"
@@ -361,7 +405,11 @@ def main() -> None:
"--hold-max-ms", type=int, default=300, help="Max hold time (ms)"
)
parser.add_argument(
- "--dt", type=float, default=0.02, help="Sample period (s)"
+ "--dt", type=float, default=0.02, help="Nominal sample period for buffer sizing (s)"
+ )
+ parser.add_argument(
+ "--motor-angle-limit", type=float, default=90.0,
+ help="Motor angle safety limit in degrees (0 = disabled)",
)
args = parser.parse_args()
@@ -374,6 +422,7 @@ def main() -> None:
hold_min_ms=args.hold_min_ms,
hold_max_ms=args.hold_max_ms,
dt=args.dt,
+ motor_angle_limit_deg=args.motor_angle_limit,
)
diff --git a/src/sysid/export.py b/src/sysid/export.py
index 4dc02a9..ae33855 100644
--- a/src/sysid/export.py
+++ b/src/sysid/export.py
@@ -8,20 +8,21 @@ alongside the originals in the robot asset directory.
from __future__ import annotations
import copy
-import json
-import shutil
import xml.etree.ElementTree as ET
from pathlib import Path
import structlog
import yaml
+from src.sysid._urdf import patch_link_inertials
+
log = structlog.get_logger()
def export_tuned_files(
robot_path: str | Path,
params: dict[str, float],
+ motor_params: dict[str, float] | None = None,
) -> tuple[Path, Path]:
"""Write tuned URDF and robot.yaml files.
@@ -29,6 +30,8 @@ def export_tuned_files(
----------
robot_path : robot asset directory (contains robot.yaml + *.urdf)
params : dict of parameter name → tuned value (from optimizer)
+ motor_params : locked motor sysid params (asymmetric model).
+ If provided, motor joint parameters come from here.
Returns
-------
@@ -43,38 +46,7 @@ def export_tuned_files(
# ── Tune URDF ────────────────────────────────────────────────
tree = ET.parse(urdf_path)
- root = tree.getroot()
-
- for link in root.iter("link"):
- link_name = link.get("name", "")
- inertial = link.find("inertial")
- if inertial is None:
- continue
-
- if link_name == "arm":
- _set_mass(inertial, params.get("arm_mass"))
- _set_com(
- inertial,
- params.get("arm_com_x"),
- params.get("arm_com_y"),
- params.get("arm_com_z"),
- )
-
- elif link_name == "pendulum":
- _set_mass(inertial, params.get("pendulum_mass"))
- _set_com(
- inertial,
- params.get("pendulum_com_x"),
- params.get("pendulum_com_y"),
- params.get("pendulum_com_z"),
- )
- _set_inertia(
- inertial,
- ixx=params.get("pendulum_ixx"),
- iyy=params.get("pendulum_iyy"),
- izz=params.get("pendulum_izz"),
- ixy=params.get("pendulum_ixy"),
- )
+ patch_link_inertials(tree.getroot(), params)
# Write tuned URDF.
tuned_urdf_name = urdf_path.stem + "_tuned" + urdf_path.suffix
@@ -91,15 +63,47 @@ def export_tuned_files(
# Point to the tuned URDF.
tuned_cfg["urdf"] = tuned_urdf_name
- # Update actuator parameters.
+ # Update actuator parameters — full asymmetric motor model.
if tuned_cfg.get("actuators") and len(tuned_cfg["actuators"]) > 0:
act = tuned_cfg["actuators"][0]
- if "actuator_gear" in params:
- act["gear"] = round(params["actuator_gear"], 6)
- if "actuator_filter_tau" in params:
- act["filter_tau"] = round(params["actuator_filter_tau"], 6)
- if "motor_damping" in params:
- act["damping"] = round(params["motor_damping"], 6)
+ if motor_params:
+ # Asymmetric gear, damping, deadzone, frictionloss as [pos, neg].
+ gear_pos = motor_params.get("actuator_gear_pos", 0.424)
+ gear_neg = motor_params.get("actuator_gear_neg", 0.425)
+ act["gear"] = [round(gear_pos, 6), round(gear_neg, 6)]
+
+ damp_pos = motor_params.get("motor_damping_pos", 0.002)
+ damp_neg = motor_params.get("motor_damping_neg", 0.015)
+ act["damping"] = [round(damp_pos, 6), round(damp_neg, 6)]
+
+ dz_pos = motor_params.get("motor_deadzone_pos", 0.141)
+ dz_neg = motor_params.get("motor_deadzone_neg", 0.078)
+ act["deadzone"] = [round(dz_pos, 6), round(dz_neg, 6)]
+
+ fl_pos = motor_params.get("motor_frictionloss_pos", 0.057)
+ fl_neg = motor_params.get("motor_frictionloss_neg", 0.053)
+ act["frictionloss"] = [round(fl_pos, 6), round(fl_neg, 6)]
+
+ if "actuator_filter_tau" in motor_params:
+ act["filter_tau"] = round(motor_params["actuator_filter_tau"], 6)
+ if "viscous_quadratic" in motor_params:
+ act["viscous_quadratic"] = round(motor_params["viscous_quadratic"], 6)
+ if "back_emf_gain" in motor_params:
+ act["back_emf_gain"] = round(motor_params["back_emf_gain"], 6)
+ else:
+ if "actuator_gear" in params:
+ act["gear"] = round(params["actuator_gear"], 6)
+ if "actuator_filter_tau" in params:
+ act["filter_tau"] = round(params["actuator_filter_tau"], 6)
+ if "motor_damping" in params:
+ act["damping"] = round(params["motor_damping"], 6)
+ if "motor_deadzone" in params:
+ act["deadzone"] = round(params["motor_deadzone"], 6)
+
+ # ctrl_range from ctrl_limit parameter.
+ if "ctrl_limit" in params:
+ lim = round(params["ctrl_limit"], 6)
+ act["ctrl_range"] = [-lim, lim]
# Update joint overrides.
if "joints" not in tuned_cfg:
@@ -108,16 +112,23 @@ def export_tuned_files(
if "motor_joint" not in tuned_cfg["joints"]:
tuned_cfg["joints"]["motor_joint"] = {}
mj = tuned_cfg["joints"]["motor_joint"]
- if "motor_armature" in params:
- mj["armature"] = round(params["motor_armature"], 6)
- if "motor_frictionloss" in params:
- mj["frictionloss"] = round(params["motor_frictionloss"], 6)
+ if motor_params:
+ mj["armature"] = round(motor_params.get("motor_armature", 0.00277), 6)
+ # Frictionloss/damping = 0 in MuJoCo (motor model handles via qfrc_applied).
+ mj["frictionloss"] = 0.0
+ else:
+ if "motor_armature" in params:
+ mj["armature"] = round(params["motor_armature"], 6)
+ if "motor_frictionloss" in params:
+ mj["frictionloss"] = round(params["motor_frictionloss"], 6)
if "pendulum_joint" not in tuned_cfg["joints"]:
tuned_cfg["joints"]["pendulum_joint"] = {}
pj = tuned_cfg["joints"]["pendulum_joint"]
if "pendulum_damping" in params:
pj["damping"] = round(params["pendulum_damping"], 6)
+ if "pendulum_frictionloss" in params:
+ pj["frictionloss"] = round(params["pendulum_frictionloss"], 6)
# Write tuned robot.yaml.
tuned_yaml_path = robot_path / "robot_tuned.yaml"
@@ -136,51 +147,46 @@ def export_tuned_files(
return tuned_urdf_path, tuned_yaml_path
-# ── XML helpers (shared with rollout.py) ─────────────────────────────
+# ── CLI entry point ──────────────────────────────────────────────────
-def _set_mass(inertial: ET.Element, mass: float | None) -> None:
- if mass is None:
- return
- mass_el = inertial.find("mass")
- if mass_el is not None:
- mass_el.set("value", str(mass))
+def main() -> None:
+ import argparse
+ import json
+
+ from src.sysid.rollout import LOCKED_MOTOR_PARAMS
+
+ parser = argparse.ArgumentParser(
+ description="Export tuned URDF + robot.yaml from sysid results."
+ )
+ parser.add_argument(
+ "--robot-path", type=str, default="assets/rotary_cartpole",
+ help="Path to robot asset directory",
+ )
+ parser.add_argument(
+ "--result", type=str, default=None,
+ help="Path to sysid_result.json (auto-detected if omitted)",
+ )
+ args = parser.parse_args()
+
+ robot_path = Path(args.robot_path).resolve()
+ if args.result:
+ result_path = Path(args.result)
+ else:
+ result_path = robot_path / "sysid_result.json"
+
+ if not result_path.exists():
+ raise FileNotFoundError(f"Result file not found: {result_path}")
+
+ result = json.loads(result_path.read_text())
+
+ export_tuned_files(
+ robot_path=args.robot_path,
+ params=result["best_params"],
+ motor_params=result.get("motor_params", dict(LOCKED_MOTOR_PARAMS)),
+ )
-def _set_com(
- inertial: ET.Element,
- x: float | None,
- y: float | None,
- z: float | None,
-) -> None:
- origin = inertial.find("origin")
- if origin is None:
- return
- xyz = origin.get("xyz", "0 0 0").split()
- if x is not None:
- xyz[0] = str(x)
- if y is not None:
- xyz[1] = str(y)
- if z is not None:
- xyz[2] = str(z)
- origin.set("xyz", " ".join(xyz))
+if __name__ == "__main__":
+ main()
-
-def _set_inertia(
- inertial: ET.Element,
- ixx: float | None = None,
- iyy: float | None = None,
- izz: float | None = None,
- ixy: float | None = None,
- iyz: float | None = None,
- ixz: float | None = None,
-) -> None:
- ine = inertial.find("inertia")
- if ine is None:
- return
- for attr, val in [
- ("ixx", ixx), ("iyy", iyy), ("izz", izz),
- ("ixy", ixy), ("iyz", iyz), ("ixz", ixz),
- ]:
- if val is not None:
- ine.set(attr, str(val))
diff --git a/src/sysid/motor/__init__.py b/src/sysid/motor/__init__.py
new file mode 100644
index 0000000..1c05f16
--- /dev/null
+++ b/src/sysid/motor/__init__.py
@@ -0,0 +1,5 @@
+"""Motor-only system identification subpackage.
+
+Identifies JGB37-520 DC motor dynamics (no pendulum, no limits)
+so the MuJoCo simulation matches the real hardware response.
+"""
diff --git a/src/sysid/motor/capture.py b/src/sysid/motor/capture.py
new file mode 100644
index 0000000..f7eb7ea
--- /dev/null
+++ b/src/sysid/motor/capture.py
@@ -0,0 +1,364 @@
+"""Capture a motor-only trajectory under random excitation (PRBS-style).
+
+Connects to the ESP32 running the simplified sysid firmware (no pendulum,
+no limits), sends random PWM commands, and records motor angle + velocity
+at ~ 50 Hz.
+
+Firmware serial protocol (115200 baud):
+ Commands: M\\n R\\n S\\n G\\n H\\n P\\n
+ State: S,,,,,\\n
+
+Usage:
+ python -m src.sysid.motor.capture --duration 20
+ python -m src.sysid.motor.capture --duration 30 --amplitude 200
+"""
+
+from __future__ import annotations
+
+import argparse
+import math
+import random
+import threading
+import time
+from datetime import datetime
+from pathlib import Path
+from typing import Any
+
+import numpy as np
+import structlog
+import yaml
+
+log = structlog.get_logger()
+
+# ── Default asset path ───────────────────────────────────────────────
+_DEFAULT_ASSET = "assets/motor"
+
+
+# ── Serial protocol helpers ──────────────────────────────────────────
+
+
+def _parse_state_line(line: str) -> dict[str, Any] | None:
+ """Parse an ``S,…`` state line from the motor sysid firmware.
+
+ Format: S,,,,,
+ """
+ if not line.startswith("S,"):
+ return None
+ parts = line.split(",")
+ if len(parts) < 6:
+ return None
+ try:
+ return {
+ "timestamp_ms": int(parts[1]),
+ "encoder_count": int(parts[2]),
+ "rpm": float(parts[3]),
+ "applied_speed": int(parts[4]),
+ "enc_vel_cps": float(parts[5]),
+ }
+ except (ValueError, IndexError):
+ return None
+
+
+# ── Background serial reader ─────────────────────────────────────────
+
+
+class _SerialReader:
+ """Minimal background reader for the ESP32 serial stream."""
+
+ def __init__(self, port: str, baud: int = 115200):
+ import serial as _serial
+
+ self._serial_mod = _serial
+ self.ser = _serial.Serial(port, baud, timeout=0.05)
+ time.sleep(2) # Wait for ESP32 boot
+ self.ser.reset_input_buffer()
+
+ self._latest: dict[str, Any] = {}
+ self._seq: int = 0
+ self._lock = threading.Lock()
+ self._cond = threading.Condition(self._lock)
+ self._running = True
+
+ self._thread = threading.Thread(target=self._reader_loop, daemon=True)
+ self._thread.start()
+
+ def _reader_loop(self) -> None:
+ while self._running:
+ try:
+ if self.ser.in_waiting:
+ line = (
+ self.ser.readline()
+ .decode("utf-8", errors="ignore")
+ .strip()
+ )
+ parsed = _parse_state_line(line)
+ if parsed is not None:
+ with self._cond:
+ self._latest = parsed
+ self._seq += 1
+ self._cond.notify_all()
+ elif line and not line.startswith("S,"):
+ # Log non-state lines (READY, PONG, WARN, etc.)
+ log.debug("serial_info", line=line)
+ else:
+ time.sleep(0.001)
+ except (OSError, self._serial_mod.SerialException):
+ log.critical("serial_lost")
+ break
+
+ def send(self, cmd: str) -> None:
+ try:
+ self.ser.write(f"{cmd}\n".encode())
+ except (OSError, self._serial_mod.SerialException):
+ log.critical("serial_send_failed", cmd=cmd)
+
+ def read_blocking(self, timeout: float = 0.1) -> dict[str, Any]:
+ """Wait until a *new* state line arrives, then return it."""
+ with self._cond:
+ seq_before = self._seq
+ if not self._cond.wait_for(
+ lambda: self._seq > seq_before, timeout=timeout
+ ):
+ return {}
+ return dict(self._latest)
+
+ def close(self) -> None:
+ self._running = False
+ self.send("H")
+ self.send("M0")
+ time.sleep(0.1)
+ self._thread.join(timeout=1.0)
+ self.ser.close()
+
+
+# ── PRBS excitation signal ───────────────────────────────────────────
+
+
+class _PRBSExcitation:
+ """Random hold-value excitation with configurable amplitude and hold time."""
+
+ def __init__(
+ self,
+ amplitude: int = 200,
+ hold_min_ms: int = 50,
+ hold_max_ms: int = 400,
+ ):
+ self.amplitude = amplitude
+ self.hold_min_ms = hold_min_ms
+ self.hold_max_ms = hold_max_ms
+ self._current: int = 0
+ self._switch_time: float = 0.0
+ self._new_value()
+
+ def _new_value(self) -> None:
+ self._current = random.randint(-self.amplitude, self.amplitude)
+ hold_ms = random.randint(self.hold_min_ms, self.hold_max_ms)
+ self._switch_time = time.monotonic() + hold_ms / 1000.0
+
+ def __call__(self) -> int:
+ if time.monotonic() >= self._switch_time:
+ self._new_value()
+ return self._current
+
+
+# ── Main capture loop ────────────────────────────────────────────────
+
+
+def capture(
+ asset_path: str | Path = _DEFAULT_ASSET,
+ port: str = "/dev/cu.usbserial-0001",
+ baud: int = 115200,
+ duration: float = 20.0,
+ amplitude: int = 200,
+ hold_min_ms: int = 50,
+ hold_max_ms: int = 400,
+ dt: float = 0.02,
+) -> Path:
+ """Run motor-only capture and return the path to the saved .npz file.
+
+ Stream-driven: blocks on each firmware state line (~50 Hz),
+ sends next motor command immediately, records both.
+ No time.sleep pacing — locked to firmware clock.
+
+ The recording stores:
+ - time: wall-clock seconds since start
+ - action: normalised action = applied_speed / 255
+ - motor_angle: shaft angle in radians (from encoder)
+ - motor_vel: shaft velocity in rad/s (from encoder velocity)
+ """
+ asset_path = Path(asset_path).resolve()
+
+ # Load hardware config for encoder conversion.
+ hw_yaml = asset_path / "hardware.yaml"
+ if not hw_yaml.exists():
+ raise FileNotFoundError(f"hardware.yaml not found in {asset_path}")
+ raw_hw = yaml.safe_load(hw_yaml.read_text())
+ ppr = raw_hw.get("encoder", {}).get("ppr", 11)
+ gear_ratio = raw_hw.get("encoder", {}).get("gear_ratio", 30.0)
+ counts_per_rev: float = ppr * gear_ratio * 4.0
+ max_pwm = raw_hw.get("motor", {}).get("max_pwm", 255)
+
+ log.info(
+ "hardware_config",
+ ppr=ppr,
+ gear_ratio=gear_ratio,
+ counts_per_rev=counts_per_rev,
+ max_pwm=max_pwm,
+ )
+
+ # Connect.
+ reader = _SerialReader(port, baud)
+ excitation = _PRBSExcitation(amplitude, hold_min_ms, hold_max_ms)
+
+ # Prepare recording buffers.
+ max_samples = int(duration / dt) + 500
+ rec_time = np.zeros(max_samples, dtype=np.float64)
+ rec_action = np.zeros(max_samples, dtype=np.float64)
+ rec_motor_angle = np.zeros(max_samples, dtype=np.float64)
+ rec_motor_vel = np.zeros(max_samples, dtype=np.float64)
+
+ # Reset encoder to zero.
+ reader.send("R")
+ time.sleep(0.1)
+
+ # Start streaming.
+ reader.send("G")
+ time.sleep(0.1)
+
+ log.info(
+ "capture_starting",
+ port=port,
+ duration=duration,
+ amplitude=amplitude,
+ hold_range_ms=f"{hold_min_ms}–{hold_max_ms}",
+ )
+
+ idx = 0
+ pwm = 0
+ last_esp_ms = -1
+ t0 = time.monotonic()
+
+ try:
+ while True:
+ state = reader.read_blocking(timeout=0.1)
+ if not state:
+ continue
+
+ # Deduplicate by firmware timestamp.
+ esp_ms = state.get("timestamp_ms", 0)
+ if esp_ms == last_esp_ms:
+ continue
+ last_esp_ms = esp_ms
+
+ elapsed = time.monotonic() - t0
+ if elapsed >= duration:
+ break
+
+ # Generate next excitation PWM.
+ pwm = excitation()
+
+ # Send command.
+ reader.send(f"M{pwm}")
+
+ # Convert encoder to angle/velocity.
+ enc = state.get("encoder_count", 0)
+ motor_angle = enc / counts_per_rev * 2.0 * math.pi
+ motor_vel = (
+ state.get("enc_vel_cps", 0.0) / counts_per_rev * 2.0 * math.pi
+ )
+
+ # Use firmware-applied speed for the action.
+ applied = state.get("applied_speed", 0)
+ action_norm = applied / 255.0
+
+ if idx < max_samples:
+ rec_time[idx] = elapsed
+ rec_action[idx] = action_norm
+ rec_motor_angle[idx] = motor_angle
+ rec_motor_vel[idx] = motor_vel
+ idx += 1
+ else:
+ break
+
+ if idx % 50 == 0:
+ log.info(
+ "capture_progress",
+ elapsed=f"{elapsed:.1f}/{duration:.0f}s",
+ samples=idx,
+ pwm=pwm,
+ angle_deg=f"{math.degrees(motor_angle):.1f}",
+ vel_rps=f"{motor_vel / (2 * math.pi):.1f}",
+ )
+
+ finally:
+ reader.send("M0")
+ reader.close()
+
+ # Trim.
+ rec_time = rec_time[:idx]
+ rec_action = rec_action[:idx]
+ rec_motor_angle = rec_motor_angle[:idx]
+ rec_motor_vel = rec_motor_vel[:idx]
+
+ # Save.
+ recordings_dir = asset_path / "recordings"
+ recordings_dir.mkdir(exist_ok=True)
+ stamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+ out_path = recordings_dir / f"motor_capture_{stamp}.npz"
+ np.savez_compressed(
+ out_path,
+ time=rec_time,
+ action=rec_action,
+ motor_angle=rec_motor_angle,
+ motor_vel=rec_motor_vel,
+ )
+
+ log.info(
+ "capture_saved",
+ path=str(out_path),
+ samples=idx,
+ duration_actual=f"{rec_time[-1]:.2f}s" if idx > 0 else "0s",
+ )
+ return out_path
+
+
+# ── CLI ──────────────────────────────────────────────────────────────
+
+
+def main() -> None:
+ parser = argparse.ArgumentParser(
+ description="Capture motor-only trajectory for system identification."
+ )
+ parser.add_argument(
+ "--asset-path", type=str, default=_DEFAULT_ASSET,
+ help="Path to motor asset directory (contains hardware.yaml)",
+ )
+ parser.add_argument(
+ "--port", type=str, default="/dev/cu.usbserial-0001",
+ help="Serial port for ESP32",
+ )
+ parser.add_argument("--baud", type=int, default=115200)
+ parser.add_argument("--duration", type=float, default=20.0, help="Capture duration (s)")
+ parser.add_argument(
+ "--amplitude", type=int, default=200,
+ help="Max PWM magnitude (0–255, firmware allows full range)",
+ )
+ parser.add_argument("--hold-min-ms", type=int, default=50, help="PRBS min hold (ms)")
+ parser.add_argument("--hold-max-ms", type=int, default=400, help="PRBS max hold (ms)")
+ parser.add_argument("--dt", type=float, default=0.02, help="Nominal sample period (s)")
+ args = parser.parse_args()
+
+ capture(
+ asset_path=args.asset_path,
+ port=args.port,
+ baud=args.baud,
+ duration=args.duration,
+ amplitude=args.amplitude,
+ hold_min_ms=args.hold_min_ms,
+ hold_max_ms=args.hold_max_ms,
+ dt=args.dt,
+ )
+
+
+if __name__ == "__main__":
+ main()
diff --git a/src/sysid/motor/export.py b/src/sysid/motor/export.py
new file mode 100644
index 0000000..b717ce1
--- /dev/null
+++ b/src/sysid/motor/export.py
@@ -0,0 +1,186 @@
+"""Export tuned motor parameters to MJCF and robot.yaml files.
+
+Reads the original motor.xml and robot.yaml, patches with optimised
+parameter values, and writes motor_tuned.xml + robot_tuned.yaml.
+
+Usage:
+ python -m src.sysid.motor.export \
+ --asset-path assets/motor \
+ --result assets/motor/motor_sysid_result.json
+"""
+
+from __future__ import annotations
+
+import argparse
+import copy
+import json
+import xml.etree.ElementTree as ET
+from pathlib import Path
+
+import structlog
+import yaml
+
+log = structlog.get_logger()
+
+_DEFAULT_ASSET = "assets/motor"
+
+
+def export_tuned_files(
+ asset_path: str | Path,
+ params: dict[str, float],
+) -> tuple[Path, Path]:
+ """Write tuned MJCF and robot.yaml files.
+
+ Returns (tuned_mjcf_path, tuned_robot_yaml_path).
+ """
+ asset_path = Path(asset_path).resolve()
+
+ robot_yaml_path = asset_path / "robot.yaml"
+ robot_cfg = yaml.safe_load(robot_yaml_path.read_text())
+ mjcf_path = asset_path / robot_cfg["mjcf"]
+
+ # ── Tune MJCF ────────────────────────────────────────────────
+ tree = ET.parse(str(mjcf_path))
+ root = tree.getroot()
+
+ # Actuator — use average gear for the MJCF model.
+ gear_pos = params.get("actuator_gear_pos", params.get("actuator_gear"))
+ gear_neg = params.get("actuator_gear_neg", params.get("actuator_gear"))
+ gear_avg = None
+ if gear_pos is not None and gear_neg is not None:
+ gear_avg = (gear_pos + gear_neg) / 2.0
+ elif gear_pos is not None:
+ gear_avg = gear_pos
+ filter_tau = params.get("actuator_filter_tau")
+ for act_el in root.iter("general"):
+ if act_el.get("name") == "motor":
+ if gear_avg is not None:
+ act_el.set("gear", str(gear_avg))
+ if filter_tau is not None:
+ if filter_tau > 0:
+ act_el.set("dyntype", "filter")
+ act_el.set("dynprm", str(filter_tau))
+ else:
+ act_el.set("dyntype", "none")
+
+ # Joint — average damping & friction for MJCF (asymmetry in runtime).
+ fl_pos = params.get("motor_frictionloss_pos", params.get("motor_frictionloss"))
+ fl_neg = params.get("motor_frictionloss_neg", params.get("motor_frictionloss"))
+ fl_avg = None
+ if fl_pos is not None and fl_neg is not None:
+ fl_avg = (fl_pos + fl_neg) / 2.0
+ elif fl_pos is not None:
+ fl_avg = fl_pos
+ damp_pos = params.get("motor_damping_pos", params.get("motor_damping"))
+ damp_neg = params.get("motor_damping_neg", params.get("motor_damping"))
+ damp_avg = None
+ if damp_pos is not None and damp_neg is not None:
+ damp_avg = (damp_pos + damp_neg) / 2.0
+ elif damp_pos is not None:
+ damp_avg = damp_pos
+ for jnt in root.iter("joint"):
+ if jnt.get("name") == "motor_joint":
+ if damp_avg is not None:
+ jnt.set("damping", str(damp_avg))
+ if "motor_armature" in params:
+ jnt.set("armature", str(params["motor_armature"]))
+ if fl_avg is not None:
+ jnt.set("frictionloss", str(fl_avg))
+
+ # Rotor mass.
+ if "rotor_mass" in params:
+ for geom in root.iter("geom"):
+ if geom.get("name") == "rotor_disk":
+ geom.set("mass", str(params["rotor_mass"]))
+
+ # Write tuned MJCF.
+ tuned_mjcf_name = mjcf_path.stem + "_tuned" + mjcf_path.suffix
+ tuned_mjcf_path = asset_path / tuned_mjcf_name
+ ET.indent(tree, space=" ")
+ tree.write(str(tuned_mjcf_path), xml_declaration=True, encoding="unicode")
+ log.info("tuned_mjcf_written", path=str(tuned_mjcf_path))
+
+ # ── Tune robot.yaml ──────────────────────────────────────────
+ tuned_cfg = copy.deepcopy(robot_cfg)
+ tuned_cfg["mjcf"] = tuned_mjcf_name
+
+ if tuned_cfg.get("actuators") and len(tuned_cfg["actuators"]) > 0:
+ act = tuned_cfg["actuators"][0]
+ if gear_avg is not None:
+ act["gear"] = round(gear_avg, 6)
+ if "actuator_filter_tau" in params:
+ act["filter_tau"] = round(params["actuator_filter_tau"], 6)
+ if "motor_damping" in params:
+ act["damping"] = round(params["motor_damping"], 6)
+
+ if "joints" not in tuned_cfg:
+ tuned_cfg["joints"] = {}
+ if "motor_joint" not in tuned_cfg["joints"]:
+ tuned_cfg["joints"]["motor_joint"] = {}
+ mj = tuned_cfg["joints"]["motor_joint"]
+ if "motor_armature" in params:
+ mj["armature"] = round(params["motor_armature"], 6)
+ if fl_avg is not None:
+ mj["frictionloss"] = round(fl_avg, 6)
+
+ # Asymmetric / hardware-realism / nonlinear parameters.
+ realism = {}
+ for key in [
+ "actuator_gear_pos", "actuator_gear_neg",
+ "motor_damping_pos", "motor_damping_neg",
+ "motor_frictionloss_pos", "motor_frictionloss_neg",
+ "motor_deadzone_pos", "motor_deadzone_neg",
+ "action_bias",
+ "viscous_quadratic", "back_emf_gain",
+ "stribeck_friction_boost", "stribeck_vel",
+ "gearbox_backlash",
+ ]:
+ if key in params:
+ realism[key] = round(params[key], 6)
+ if realism:
+ tuned_cfg["hardware_realism"] = realism
+
+ tuned_yaml_path = asset_path / "robot_tuned.yaml"
+ header = (
+ "# Tuned motor config — generated by src.sysid.motor.optimize\n"
+ "# Original: robot.yaml\n\n"
+ )
+ tuned_yaml_path.write_text(
+ header + yaml.dump(tuned_cfg, default_flow_style=False, sort_keys=False)
+ )
+ log.info("tuned_robot_yaml_written", path=str(tuned_yaml_path))
+
+ return tuned_mjcf_path, tuned_yaml_path
+
+
+# ── CLI ──────────────────────────────────────────────────────────────
+
+
+def main() -> None:
+ parser = argparse.ArgumentParser(
+ description="Export tuned motor parameters to MJCF + robot.yaml."
+ )
+ parser.add_argument("--asset-path", type=str, default=_DEFAULT_ASSET)
+ parser.add_argument(
+ "--result", type=str, default=None,
+ help="Path to motor_sysid_result.json (auto-detected if omitted)",
+ )
+ args = parser.parse_args()
+
+ asset_path = Path(args.asset_path).resolve()
+ if args.result:
+ result_path = Path(args.result)
+ else:
+ result_path = asset_path / "motor_sysid_result.json"
+
+ if not result_path.exists():
+ raise FileNotFoundError(f"Result file not found: {result_path}")
+
+ result = json.loads(result_path.read_text())
+ params = result["best_params"]
+
+ export_tuned_files(asset_path=args.asset_path, params=params)
+
+
+if __name__ == "__main__":
+ main()
diff --git a/src/sysid/motor/optimize.py b/src/sysid/motor/optimize.py
new file mode 100644
index 0000000..d736498
--- /dev/null
+++ b/src/sysid/motor/optimize.py
@@ -0,0 +1,367 @@
+"""CMA-ES optimiser — fit motor simulation parameters to a real recording.
+
+Motor-only version: minimises trajectory-matching cost between MuJoCo
+rollout and recorded motor angle + velocity.
+
+Usage:
+ python -m src.sysid.motor.optimize \
+ --recording assets/motor/recordings/motor_capture_YYYYMMDD_HHMMSS.npz
+
+ # Quick test:
+ python -m src.sysid.motor.optimize --recording .npz \
+ --max-generations 20 --population-size 10
+"""
+
+from __future__ import annotations
+
+import argparse
+import json
+import time
+from datetime import datetime
+from pathlib import Path
+
+import numpy as np
+import structlog
+
+from src.sysid.motor.preprocess import recompute_velocity
+from src.sysid.motor.rollout import (
+ MOTOR_PARAMS,
+ ParamSpec,
+ bounds_arrays,
+ defaults_vector,
+ params_to_dict,
+ rollout,
+ windowed_rollout,
+)
+
+log = structlog.get_logger()
+
+_DEFAULT_ASSET = "assets/motor"
+
+
+# ── Cost function ────────────────────────────────────────────────────
+
+
+def _compute_trajectory_cost(
+ sim: dict[str, np.ndarray],
+ recording: dict[str, np.ndarray],
+ pos_weight: float = 1.0,
+ vel_weight: float = 0.5,
+ acc_weight: float = 0.0,
+ dt: float = 0.02,
+) -> float:
+ """Weighted MSE between simulated and real motor trajectories.
+
+ Motor-only: angle, velocity, and optionally acceleration.
+ Acceleration is computed as finite-difference of velocity.
+ """
+ angle_err = sim["motor_angle"] - recording["motor_angle"]
+ vel_err = sim["motor_vel"] - recording["motor_vel"]
+
+ # Reject NaN results (unstable simulation).
+ if np.any(~np.isfinite(angle_err)) or np.any(~np.isfinite(vel_err)):
+ return 1e6
+
+ cost = float(
+ pos_weight * np.mean(angle_err**2)
+ + vel_weight * np.mean(vel_err**2)
+ )
+
+ # Optional acceleration term — penalises wrong dynamics (d(vel)/dt).
+ if acc_weight > 0 and len(vel_err) > 2:
+ sim_acc = np.diff(sim["motor_vel"]) / dt
+ real_acc = np.diff(recording["motor_vel"]) / dt
+ acc_err = sim_acc - real_acc
+ if np.any(~np.isfinite(acc_err)):
+ return 1e6
+ # Normalise by typical acceleration scale (~50 rad/s²) to keep
+ # the weight intuitive relative to vel/pos terms.
+ cost += acc_weight * np.mean(acc_err**2) / (50.0**2)
+
+ return cost
+
+
+def cost_function(
+ params_vec: np.ndarray,
+ recording: dict[str, np.ndarray],
+ asset_path: Path,
+ specs: list[ParamSpec],
+ sim_dt: float = 0.002,
+ substeps: int = 10,
+ pos_weight: float = 1.0,
+ vel_weight: float = 0.5,
+ acc_weight: float = 0.0,
+ window_duration: float = 0.5,
+) -> float:
+ """Compute trajectory-matching cost for a candidate parameter vector.
+
+ Uses multiple-shooting (windowed rollout) by default.
+ """
+ params = params_to_dict(params_vec, specs)
+
+ try:
+ if window_duration > 0:
+ sim = windowed_rollout(
+ asset_path=asset_path,
+ params=params,
+ recording=recording,
+ window_duration=window_duration,
+ sim_dt=sim_dt,
+ substeps=substeps,
+ )
+ else:
+ sim = rollout(
+ asset_path=asset_path,
+ params=params,
+ actions=recording["action"],
+ sim_dt=sim_dt,
+ substeps=substeps,
+ )
+ except Exception as exc:
+ log.warning("rollout_failed", error=str(exc))
+ return 1e6
+
+ return _compute_trajectory_cost(
+ sim, recording, pos_weight, vel_weight, acc_weight,
+ dt=np.mean(np.diff(recording["time"])) if len(recording["time"]) > 1 else 0.02,
+ )
+
+
+# ── CMA-ES optimisation loop ────────────────────────────────────────
+
+
+def optimize(
+ asset_path: str | Path = _DEFAULT_ASSET,
+ recording_path: str | Path = "",
+ specs: list[ParamSpec] | None = None,
+ sigma0: float = 0.3,
+ population_size: int = 30,
+ max_generations: int = 300,
+ sim_dt: float = 0.002,
+ substeps: int = 10,
+ pos_weight: float = 1.0,
+ vel_weight: float = 0.5,
+ acc_weight: float = 0.0,
+ window_duration: float = 0.5,
+ seed: int = 42,
+ preprocess_vel: bool = True,
+ sg_window: int = 7,
+ sg_polyorder: int = 3,
+) -> dict:
+ """Run CMA-ES optimisation and return results dict."""
+ from cmaes import CMA
+
+ asset_path = Path(asset_path).resolve()
+ recording_path = Path(recording_path).resolve()
+
+ if specs is None:
+ specs = MOTOR_PARAMS
+
+ # Load recording.
+ recording = dict(np.load(recording_path))
+ n_samples = len(recording["time"])
+ duration = recording["time"][-1] - recording["time"][0]
+ n_windows = max(1, int(duration / window_duration)) if window_duration > 0 else 1
+ log.info(
+ "recording_loaded",
+ path=str(recording_path),
+ samples=n_samples,
+ duration=f"{duration:.1f}s",
+ n_windows=n_windows,
+ )
+
+ # Preprocess velocity: replace noisy firmware finite-difference with
+ # smooth Savitzky-Golay derivative of the angle signal.
+ if preprocess_vel:
+ recording = recompute_velocity(
+ recording,
+ window_length=sg_window,
+ polyorder=sg_polyorder,
+ )
+
+ # Normalise to [0, 1] for CMA-ES.
+ lo, hi = bounds_arrays(specs)
+ x0 = defaults_vector(specs)
+ span = hi - lo
+ span[span == 0] = 1.0
+
+ def to_normed(x: np.ndarray) -> np.ndarray:
+ return (x - lo) / span
+
+ def from_normed(x_n: np.ndarray) -> np.ndarray:
+ return x_n * span + lo
+
+ x0_normed = to_normed(x0)
+ bounds_normed = np.column_stack(
+ [np.zeros(len(specs)), np.ones(len(specs))]
+ )
+
+ optimizer = CMA(
+ mean=x0_normed,
+ sigma=sigma0,
+ bounds=bounds_normed,
+ population_size=population_size,
+ seed=seed,
+ )
+
+ best_cost = float("inf")
+ best_params_vec = x0.copy()
+ history: list[tuple[int, float]] = []
+
+ log.info(
+ "cmaes_starting",
+ n_params=len(specs),
+ population=population_size,
+ max_gens=max_generations,
+ sigma0=sigma0,
+ )
+
+ t0 = time.monotonic()
+
+ for gen in range(max_generations):
+ solutions = []
+ for _ in range(optimizer.population_size):
+ x_normed = optimizer.ask()
+ x_natural = from_normed(x_normed)
+ x_natural = np.clip(x_natural, lo, hi)
+
+ c = cost_function(
+ x_natural,
+ recording,
+ asset_path,
+ specs,
+ sim_dt=sim_dt,
+ substeps=substeps,
+ pos_weight=pos_weight,
+ vel_weight=vel_weight,
+ acc_weight=acc_weight,
+ window_duration=window_duration,
+ )
+ solutions.append((x_normed, c))
+
+ if c < best_cost:
+ best_cost = c
+ best_params_vec = x_natural.copy()
+
+ optimizer.tell(solutions)
+ history.append((gen, best_cost))
+
+ elapsed = time.monotonic() - t0
+ if gen % 5 == 0 or gen == max_generations - 1:
+ log.info(
+ "cmaes_generation",
+ gen=gen,
+ best_cost=f"{best_cost:.6f}",
+ elapsed=f"{elapsed:.1f}s",
+ gen_best=f"{min(c for _, c in solutions):.6f}",
+ )
+
+ total_time = time.monotonic() - t0
+ best_params = params_to_dict(best_params_vec, specs)
+
+ log.info(
+ "cmaes_finished",
+ best_cost=f"{best_cost:.6f}",
+ total_time=f"{total_time:.1f}s",
+ evaluations=max_generations * population_size,
+ )
+
+ # Log parameter comparison.
+ defaults = params_to_dict(defaults_vector(specs), specs)
+ for name in best_params:
+ d = defaults[name]
+ b = best_params[name]
+ change_pct = ((b - d) / abs(d) * 100) if abs(d) > 1e-12 else 0.0
+ log.info(
+ "param_result",
+ name=name,
+ default=f"{d:.6g}",
+ tuned=f"{b:.6g}",
+ change=f"{change_pct:+.1f}%",
+ )
+
+ return {
+ "best_params": best_params,
+ "best_cost": best_cost,
+ "history": history,
+ "recording": str(recording_path),
+ "param_names": [s.name for s in specs],
+ "defaults": {s.name: s.default for s in specs},
+ "timestamp": datetime.now().isoformat(),
+ }
+
+
+# ── CLI ──────────────────────────────────────────────────────────────
+
+
+def main() -> None:
+ parser = argparse.ArgumentParser(
+ description="Fit motor simulation parameters to a real recording (CMA-ES)."
+ )
+ parser.add_argument(
+ "--asset-path", type=str, default=_DEFAULT_ASSET,
+ help="Path to motor asset directory",
+ )
+ parser.add_argument(
+ "--recording", type=str, required=True,
+ help="Path to .npz recording file",
+ )
+ parser.add_argument("--sigma0", type=float, default=0.3)
+ parser.add_argument("--population-size", type=int, default=30)
+ parser.add_argument("--max-generations", type=int, default=300)
+ parser.add_argument("--sim-dt", type=float, default=0.002)
+ parser.add_argument("--substeps", type=int, default=10)
+ parser.add_argument("--pos-weight", type=float, default=1.0)
+ parser.add_argument("--vel-weight", type=float, default=0.5)
+ parser.add_argument("--acc-weight", type=float, default=0.0,
+ help="Weight for acceleration matching (0=off, try 0.1-0.5)")
+ parser.add_argument("--window-duration", type=float, default=0.5)
+ parser.add_argument("--seed", type=int, default=42)
+ parser.add_argument(
+ "--no-preprocess-vel", action="store_true",
+ help="Disable Savitzky-Golay velocity preprocessing",
+ )
+ parser.add_argument("--sg-window", type=int, default=7,
+ help="Savitzky-Golay window length (odd, default 7 = 140ms)")
+ parser.add_argument("--sg-polyorder", type=int, default=3,
+ help="Savitzky-Golay polynomial order (default 3 = cubic)")
+ args = parser.parse_args()
+
+ result = optimize(
+ asset_path=args.asset_path,
+ recording_path=args.recording,
+ sigma0=args.sigma0,
+ population_size=args.population_size,
+ max_generations=args.max_generations,
+ sim_dt=args.sim_dt,
+ substeps=args.substeps,
+ pos_weight=args.pos_weight,
+ vel_weight=args.vel_weight,
+ acc_weight=args.acc_weight,
+ window_duration=args.window_duration,
+ seed=args.seed,
+ preprocess_vel=not args.no_preprocess_vel,
+ sg_window=args.sg_window,
+ sg_polyorder=args.sg_polyorder,
+ )
+
+ # Save results JSON.
+ asset_path = Path(args.asset_path).resolve()
+ result_path = asset_path / "motor_sysid_result.json"
+ result_json = {k: v for k, v in result.items() if k != "history"}
+ result_json["history_summary"] = {
+ "first_cost": result["history"][0][1] if result["history"] else None,
+ "final_cost": result["history"][-1][1] if result["history"] else None,
+ "generations": len(result["history"]),
+ }
+ result_path.write_text(json.dumps(result_json, indent=2, default=str))
+ log.info("results_saved", path=str(result_path))
+
+ # Export tuned files.
+ from src.sysid.motor.export import export_tuned_files
+
+ export_tuned_files(asset_path=args.asset_path, params=result["best_params"])
+
+
+if __name__ == "__main__":
+ main()
diff --git a/src/sysid/motor/preprocess.py b/src/sysid/motor/preprocess.py
new file mode 100644
index 0000000..9aa8cf1
--- /dev/null
+++ b/src/sysid/motor/preprocess.py
@@ -0,0 +1,114 @@
+"""Recording preprocessing — clean velocity estimation from angle data.
+
+The ESP32 firmware computes velocity as a raw finite-difference of encoder
+counts at 50 Hz. With a 1320 CPR encoder that gives ~0.24 rad/s of
+quantisation noise per count. This module replaces the noisy firmware
+velocity with a smooth differentiation of the (much cleaner) angle signal.
+
+Method: Savitzky-Golay filter applied to the angle signal, then
+differentiated analytically. Zero phase lag, preserves transients well.
+
+This is standard practice in robotics sysid — see e.g. MATLAB System ID
+Toolbox, Drake's trajectory processing, or ETH's ANYmal sysid pipeline.
+"""
+
+from __future__ import annotations
+
+import numpy as np
+from scipy.signal import savgol_filter
+
+import structlog
+
+log = structlog.get_logger()
+
+
+def recompute_velocity(
+ recording: dict[str, np.ndarray],
+ window_length: int = 7,
+ polyorder: int = 3,
+ deriv: int = 1,
+) -> dict[str, np.ndarray]:
+ """Recompute motor_vel from motor_angle using Savitzky-Golay differentiation.
+
+ Parameters
+ ----------
+ recording : dict with at least 'time', 'motor_angle', 'motor_vel' keys.
+ window_length : SG filter window (must be odd, > polyorder).
+ 7 samples at 50 Hz = 140ms window — good balance of smoothness
+ and responsiveness. Captures dynamics up to ~7 Hz.
+ polyorder : Polynomial order for the SG filter (3 = cubic).
+ deriv : Derivative order (1 = first derivative = velocity).
+
+ Returns
+ -------
+ New recording dict with 'motor_vel' replaced and 'motor_vel_raw' added.
+ """
+ rec = dict(recording) # shallow copy
+
+ times = rec["time"]
+ angles = rec["motor_angle"]
+ dt = np.mean(np.diff(times))
+
+ # Keep original for diagnostics.
+ rec["motor_vel_raw"] = rec["motor_vel"].copy()
+
+ # Savitzky-Golay derivative: fits a polynomial to each window,
+ # then takes the analytical derivative → smooth, zero phase lag.
+ vel_sg = savgol_filter(
+ angles,
+ window_length=window_length,
+ polyorder=polyorder,
+ deriv=deriv,
+ delta=dt,
+ )
+
+ # Compute stats for logging.
+ raw_vel = rec["motor_vel_raw"]
+ noise_estimate = np.std(raw_vel - vel_sg)
+ max_diff = np.max(np.abs(raw_vel - vel_sg))
+
+ log.info(
+ "velocity_recomputed",
+ method="savgol",
+ window=window_length,
+ polyorder=polyorder,
+ dt=f"{dt*1000:.1f}ms",
+ noise_std=f"{noise_estimate:.3f} rad/s",
+ max_diff=f"{max_diff:.3f} rad/s",
+ raw_rms=f"{np.sqrt(np.mean(raw_vel**2)):.3f}",
+ sg_rms=f"{np.sqrt(np.mean(vel_sg**2)):.3f}",
+ )
+
+ rec["motor_vel"] = vel_sg
+ return rec
+
+
+def smooth_velocity(
+ recording: dict[str, np.ndarray],
+ cutoff_hz: float = 10.0,
+) -> dict[str, np.ndarray]:
+ """Alternative: apply zero-phase Butterworth low-pass to motor_vel.
+
+ Simpler than SG derivative but introduces slight edge effects.
+ """
+ from scipy.signal import butter, filtfilt
+
+ rec = dict(recording)
+ rec["motor_vel_raw"] = rec["motor_vel"].copy()
+
+ dt = np.mean(np.diff(rec["time"]))
+ fs = 1.0 / dt
+ nyq = fs / 2.0
+ norm_cutoff = min(cutoff_hz / nyq, 0.99)
+
+ b, a = butter(2, norm_cutoff, btype="low")
+ rec["motor_vel"] = filtfilt(b, a, rec["motor_vel"])
+
+ log.info(
+ "velocity_smoothed",
+ method="butterworth",
+ cutoff_hz=cutoff_hz,
+ fs=fs,
+ )
+
+ return rec
diff --git a/src/sysid/motor/rollout.py b/src/sysid/motor/rollout.py
new file mode 100644
index 0000000..691703c
--- /dev/null
+++ b/src/sysid/motor/rollout.py
@@ -0,0 +1,460 @@
+"""Deterministic simulation replay — roll out recorded actions in MuJoCo.
+
+Motor-only version: single hinge joint, no pendulum.
+Given a parameter vector and recorded actions, builds a MuJoCo model
+with overridden dynamics, replays the actions, and returns the simulated
+motor angle + velocity for comparison with the real recording.
+"""
+
+from __future__ import annotations
+
+import dataclasses
+import xml.etree.ElementTree as ET
+from pathlib import Path
+
+import mujoco
+import numpy as np
+import yaml
+
+
+# ── Tunable parameter specification ──────────────────────────────────
+
+
+@dataclasses.dataclass
+class ParamSpec:
+ """Specification for a single tunable parameter."""
+
+ name: str
+ default: float
+ lower: float
+ upper: float
+ log_scale: bool = False
+
+
+# Motor-only parameters to identify.
+# These capture the full transfer function: PWM → shaft angle/velocity.
+#
+# Asymmetric parameters (pos/neg suffix) capture real-world differences
+# between CW and CCW rotation caused by gear mesh, brush contact,
+# and H-bridge asymmetry.
+MOTOR_PARAMS: list[ParamSpec] = [
+ # ── Actuator ─────────────────────────────────────────────────
+ # gear_pos/neg: effective torque per unit ctrl, split by direction.
+ # Real motors + L298N often have different drive strength per direction.
+ ParamSpec("actuator_gear_pos", 0.064, 0.005, 0.5, log_scale=True),
+ ParamSpec("actuator_gear_neg", 0.064, 0.005, 0.5, log_scale=True),
+ # filter_tau: first-order electrical/driver time constant (s).
+ # Lower bound 1ms — L298N PWM switching is very fast.
+ ParamSpec("actuator_filter_tau", 0.03, 0.001, 0.20),
+ # ── Joint dynamics ───────────────────────────────────────────
+ # damping_pos/neg: viscous friction (back-EMF), split by direction.
+ ParamSpec("motor_damping_pos", 0.003, 1e-5, 0.1, log_scale=True),
+ ParamSpec("motor_damping_neg", 0.003, 1e-5, 0.1, log_scale=True),
+ # armature: reflected rotor inertia (kg·m²).
+ ParamSpec("motor_armature", 0.0001, 1e-6, 0.01, log_scale=True),
+ # frictionloss_pos/neg: Coulomb friction, split by velocity direction.
+ ParamSpec("motor_frictionloss_pos", 0.03, 0.001, 0.2, log_scale=True),
+ ParamSpec("motor_frictionloss_neg", 0.03, 0.001, 0.2, log_scale=True),
+ # ── Nonlinear dynamics ───────────────────────────────────────
+ # viscous_quadratic: velocity-squared drag term (N·m·s²/rad²).
+ # Captures nonlinear friction that increases at high speed (air drag,
+ # grease viscosity, etc.). Opposes motion.
+ ParamSpec("viscous_quadratic", 0.0, 0.0, 0.005),
+ # back_emf_gain: torque reduction proportional to |vel × ctrl|.
+ # Models the back-EMF effect: at high speed the motor produces less
+ # torque because the voltage drop across the armature is smaller.
+ ParamSpec("back_emf_gain", 0.0, 0.0, 0.05),
+ # stribeck_vel: characteristic velocity below which Coulomb friction
+ # is boosted (Stribeck effect). 0 = standard Coulomb only.
+ ParamSpec("stribeck_friction_boost", 0.0, 0.0, 0.15),
+ ParamSpec("stribeck_vel", 2.0, 0.1, 8.0),
+ # ── Rotor load ───────────────────────────────────────────────
+ ParamSpec("rotor_mass", 0.012, 0.002, 0.05, log_scale=True),
+ # ── Hardware realism ─────────────────────────────────────────
+ # deadzone_pos/neg: minimum |action| per direction.
+ ParamSpec("motor_deadzone_pos", 0.08, 0.0, 0.30),
+ ParamSpec("motor_deadzone_neg", 0.08, 0.0, 0.30),
+ # action_bias: constant offset added to ctrl (H-bridge asymmetry).
+ ParamSpec("action_bias", 0.0, -0.10, 0.10),
+ # ── Gearbox backlash ─────────────────────────────────────────
+ # backlash_halfwidth: half the angular deadband (rad) in the gearbox.
+ # When the motor reverses, the shaft doesn't move until the backlash
+ # gap is taken up. Typical for 30:1 plastic/metal spur gears.
+ ParamSpec("gearbox_backlash", 0.0, 0.0, 0.15),
+]
+
+
+def params_to_dict(
+ values: np.ndarray, specs: list[ParamSpec] | None = None
+) -> dict[str, float]:
+ if specs is None:
+ specs = MOTOR_PARAMS
+ return {s.name: float(values[i]) for i, s in enumerate(specs)}
+
+
+def defaults_vector(specs: list[ParamSpec] | None = None) -> np.ndarray:
+ if specs is None:
+ specs = MOTOR_PARAMS
+ return np.array([s.default for s in specs], dtype=np.float64)
+
+
+def bounds_arrays(
+ specs: list[ParamSpec] | None = None,
+) -> tuple[np.ndarray, np.ndarray]:
+ if specs is None:
+ specs = MOTOR_PARAMS
+ lo = np.array([s.lower for s in specs], dtype=np.float64)
+ hi = np.array([s.upper for s in specs], dtype=np.float64)
+ return lo, hi
+
+
+# ── MuJoCo model building with parameter overrides ──────────────────
+
+
+def _build_model(
+ asset_path: Path,
+ params: dict[str, float],
+) -> mujoco.MjModel:
+ """Build a MuJoCo model from motor.xml with parameter overrides.
+
+ Parses the MJCF, patches actuator/joint/body parameters, reloads.
+ """
+ asset_path = Path(asset_path).resolve()
+ robot_cfg = yaml.safe_load((asset_path / "robot.yaml").read_text())
+ mjcf_path = asset_path / robot_cfg["mjcf"]
+
+ tree = ET.parse(str(mjcf_path))
+ root = tree.getroot()
+
+ # ── Actuator overrides ───────────────────────────────────────
+ # Use average of pos/neg gear for MuJoCo (asymmetry handled in ctrl).
+ gear_pos = params.get("actuator_gear_pos", params.get("actuator_gear", 0.064))
+ gear_neg = params.get("actuator_gear_neg", params.get("actuator_gear", 0.064))
+ gear = (gear_pos + gear_neg) / 2.0
+ filter_tau = params.get("actuator_filter_tau", 0.03)
+
+ for act_el in root.iter("general"):
+ if act_el.get("name") == "motor":
+ act_el.set("gear", str(gear))
+ if filter_tau > 0:
+ act_el.set("dyntype", "filter")
+ act_el.set("dynprm", str(filter_tau))
+ else:
+ act_el.set("dyntype", "none")
+ act_el.set("dynprm", "1")
+
+ # ── Joint overrides ──────────────────────────────────────────
+ # Damping and friction are asymmetric + nonlinear → applied manually.
+ # Set MuJoCo damping & frictionloss to 0; we handle them in qfrc_applied.
+ armature = params.get("motor_armature", 0.0001)
+
+ for jnt in root.iter("joint"):
+ if jnt.get("name") == "motor_joint":
+ jnt.set("damping", "0")
+ jnt.set("armature", str(armature))
+ jnt.set("frictionloss", "0")
+
+ # ── Rotor mass override ──────────────────────────────────────
+ rotor_mass = params.get("rotor_mass", 0.012)
+ for geom in root.iter("geom"):
+ if geom.get("name") == "rotor_disk":
+ geom.set("mass", str(rotor_mass))
+
+ # Write temp file and load.
+ tmp_path = asset_path / "_tmp_motor_sysid.xml"
+ try:
+ tree.write(str(tmp_path), xml_declaration=True, encoding="unicode")
+ model = mujoco.MjModel.from_xml_path(str(tmp_path))
+ finally:
+ tmp_path.unlink(missing_ok=True)
+
+ return model
+
+
+# ── Action + asymmetry transforms ────────────────────────────────────
+
+
+def _transform_action(
+ action: float,
+ params: dict[str, float],
+) -> float:
+ """Apply bias, direction-dependent deadzone, and gear scaling.
+
+ The MuJoCo actuator has the *average* gear ratio. We rescale the
+ control signal so that ``ctrl × gear_avg ≈ action × gear_dir``.
+ """
+ # Constant bias (H-bridge asymmetry).
+ action = action + params.get("action_bias", 0.0)
+
+ # Direction-dependent deadzone.
+ dz_pos = params.get("motor_deadzone_pos", params.get("motor_deadzone", 0.08))
+ dz_neg = params.get("motor_deadzone_neg", params.get("motor_deadzone", 0.08))
+ if action >= 0 and action < dz_pos:
+ return 0.0
+ if action < 0 and action > -dz_neg:
+ return 0.0
+
+ # Direction-dependent gear scaling.
+ # MuJoCo model uses gear_avg; we rescale ctrl to get the right torque.
+ gear_pos = params.get("actuator_gear_pos", params.get("actuator_gear", 0.064))
+ gear_neg = params.get("actuator_gear_neg", params.get("actuator_gear", 0.064))
+ gear_avg = (gear_pos + gear_neg) / 2.0
+ if gear_avg < 1e-8:
+ return 0.0
+ gear_dir = gear_pos if action >= 0 else gear_neg
+ return action * (gear_dir / gear_avg)
+
+
+def _apply_forces(
+ data: mujoco.MjData,
+ vel: float,
+ ctrl: float,
+ params: dict[str, float],
+ backlash_state: list[float] | None = None,
+) -> None:
+ """Apply all manual forces: asymmetric friction, damping, and nonlinear terms.
+
+ Everything that MuJoCo can't represent with its symmetric joint model
+ is injected here via ``qfrc_applied``.
+
+ Forces applied (all oppose motion or reduce torque):
+ 1. Asymmetric Coulomb friction (with Stribeck boost at low speed)
+ 2. Asymmetric viscous damping
+ 3. Quadratic velocity drag
+ 4. Back-EMF torque reduction (proportional to |vel|)
+
+ Backlash:
+ If backlash_state is provided, it is a 1-element list [gap_pos].
+ gap_pos tracks the motor's position within the backlash deadband.
+ When inside the gap, no actuator torque is transmitted to the
+ output shaft — only friction forces act.
+ """
+ torque = 0.0
+
+ # ── Gearbox backlash ──────────────────────────────────────────
+ # Model: the gear teeth have play of 2×halfwidth radians.
+ # We track where the motor is within that gap. When at the
+ # edge (contact), actuator torque passes through normally.
+ # When inside the gap, no actuator torque is transmitted.
+ backlash_hw = params.get("gearbox_backlash", 0.0)
+ actuator_torque_scale = 1.0 # 1.0 = full contact, 0.0 = in gap
+
+ if backlash_hw > 0 and backlash_state is not None:
+ # gap_pos: how far into the backlash gap we are.
+ # Range: [-backlash_hw, +backlash_hw]
+ # At ±backlash_hw, gears are in contact and torque transmits.
+ gap = backlash_state[0]
+ # Update gap position based on velocity.
+ dt_sub = data.model.opt.timestep
+ gap += vel * dt_sub
+ # Clamp to backlash range.
+ if gap > backlash_hw:
+ gap = backlash_hw
+ elif gap < -backlash_hw:
+ gap = -backlash_hw
+
+ backlash_state[0] = gap
+
+ # If not at contact edge, no torque transmitted.
+ if abs(gap) < backlash_hw - 1e-8:
+ actuator_torque_scale = 0.0
+ else:
+ actuator_torque_scale = 1.0
+
+ # ── 1. Coulomb friction (direction-dependent + Stribeck) ─────
+ fl_pos = params.get("motor_frictionloss_pos", params.get("motor_frictionloss", 0.03))
+ fl_neg = params.get("motor_frictionloss_neg", params.get("motor_frictionloss", 0.03))
+ stribeck_boost = params.get("stribeck_friction_boost", 0.0)
+ stribeck_vel = params.get("stribeck_vel", 2.0)
+
+ if abs(vel) > 1e-6:
+ fl = fl_pos if vel > 0 else fl_neg
+ # Stribeck: boost friction at low speed. Exponential decay.
+ if stribeck_boost > 0 and stribeck_vel > 0:
+ fl = fl * (1.0 + stribeck_boost * np.exp(-abs(vel) / stribeck_vel))
+ # Coulomb: constant magnitude, opposes motion.
+ torque -= np.sign(vel) * fl
+
+ # ── 2. Asymmetric viscous damping ────────────────────────────
+ damp_pos = params.get("motor_damping_pos", params.get("motor_damping", 0.003))
+ damp_neg = params.get("motor_damping_neg", params.get("motor_damping", 0.003))
+ damp = damp_pos if vel > 0 else damp_neg
+ torque -= damp * vel
+
+ # ── 3. Quadratic velocity drag ───────────────────────────────
+ visc_quad = params.get("viscous_quadratic", 0.0)
+ if visc_quad > 0:
+ torque -= visc_quad * vel * abs(vel)
+
+ # ── 4. Back-EMF torque reduction ─────────────────────────────
+ # At high speed, the motor's effective torque is reduced because
+ # back-EMF opposes the supply voltage. Modelled as a torque that
+ # opposes the control signal proportional to speed.
+ bemf = params.get("back_emf_gain", 0.0)
+ if bemf > 0 and abs(ctrl) > 1e-6:
+ # The reduction should oppose the actuator torque direction.
+ torque -= bemf * vel * np.sign(ctrl) * actuator_torque_scale
+
+ # ── 5. Scale actuator contribution by backlash state ─────────
+ # When in the backlash gap, MuJoCo's actuator force should not
+ # transmit. We cancel it by applying an opposing force.
+ if actuator_torque_scale < 1.0:
+ # The actuator_force from MuJoCo will be applied by mj_step.
+ # We need to counteract it. data.qfrc_actuator isn't set yet
+ # at this point (pre-step), so we zero the ctrl instead.
+ # This is handled in the rollout loop by zeroing ctrl.
+ pass
+
+ data.qfrc_applied[0] = max(-10.0, min(10.0, torque))
+ return actuator_torque_scale
+
+
+# ── Simulation rollout ───────────────────────────────────────────────
+
+
+def rollout(
+ asset_path: str | Path,
+ params: dict[str, float],
+ actions: np.ndarray,
+ sim_dt: float = 0.002,
+ substeps: int = 10,
+) -> dict[str, np.ndarray]:
+ """Open-loop replay of recorded actions.
+
+ Parameters
+ ----------
+ asset_path : motor asset directory
+ params : named parameter overrides
+ actions : (N,) normalised actions [-1, 1] from the recording
+ sim_dt : MuJoCo physics timestep
+ substeps : physics substeps per control step (ctrl_dt = sim_dt × substeps)
+
+ Returns
+ -------
+ dict with motor_angle (N,) and motor_vel (N,).
+ """
+ asset_path = Path(asset_path).resolve()
+ model = _build_model(asset_path, params)
+ model.opt.timestep = sim_dt
+ data = mujoco.MjData(model)
+ mujoco.mj_resetData(model, data)
+
+ n = len(actions)
+
+ sim_motor_angle = np.zeros(n, dtype=np.float64)
+ sim_motor_vel = np.zeros(n, dtype=np.float64)
+
+ # Backlash state: [gap_position]. Starts at 0 (centered in gap).
+ backlash_state = [0.0]
+
+ for i in range(n):
+ ctrl = _transform_action(actions[i], params)
+ data.ctrl[0] = ctrl
+
+ for _ in range(substeps):
+ scale = _apply_forces(data, data.qvel[0], ctrl, params, backlash_state)
+ # If in backlash gap, zero ctrl so actuator torque doesn't transmit.
+ if scale < 1.0:
+ data.ctrl[0] = 0.0
+ else:
+ data.ctrl[0] = ctrl
+ mujoco.mj_step(model, data)
+
+ # Bail out on NaN/instability.
+ if not np.isfinite(data.qpos[0]) or abs(data.qvel[0]) > 1e4:
+ sim_motor_angle[i:] = np.nan
+ sim_motor_vel[i:] = np.nan
+ break
+
+ sim_motor_angle[i] = data.qpos[0]
+ sim_motor_vel[i] = data.qvel[0]
+
+ return {
+ "motor_angle": sim_motor_angle,
+ "motor_vel": sim_motor_vel,
+ }
+
+
+def windowed_rollout(
+ asset_path: str | Path,
+ params: dict[str, float],
+ recording: dict[str, np.ndarray],
+ window_duration: float = 0.5,
+ sim_dt: float = 0.002,
+ substeps: int = 10,
+) -> dict[str, np.ndarray]:
+ """Multiple-shooting rollout for motor-only sysid.
+
+ Splits the recording into short windows. Each window is initialised
+ from the real motor state, preventing error accumulation.
+
+ Returns
+ -------
+ dict with motor_angle (N,), motor_vel (N,), n_windows (int).
+ """
+ asset_path = Path(asset_path).resolve()
+ model = _build_model(asset_path, params)
+ model.opt.timestep = sim_dt
+ data = mujoco.MjData(model)
+
+ times = recording["time"]
+ actions = recording["action"]
+ real_angle = recording["motor_angle"]
+ real_vel = recording["motor_vel"]
+ n = len(actions)
+
+ sim_motor_angle = np.zeros(n, dtype=np.float64)
+ sim_motor_vel = np.zeros(n, dtype=np.float64)
+
+ # Compute window boundaries.
+ t0 = times[0]
+ t_end = times[-1]
+ window_starts: list[int] = []
+ current_t = t0
+ while current_t < t_end:
+ idx = int(np.searchsorted(times, current_t))
+ idx = min(idx, n - 1)
+ window_starts.append(idx)
+ current_t += window_duration
+
+ n_windows = len(window_starts)
+
+ for w, w_start in enumerate(window_starts):
+ w_end = window_starts[w + 1] if w + 1 < n_windows else n
+
+ # Init from real state at window start.
+ mujoco.mj_resetData(model, data)
+ data.qpos[0] = real_angle[w_start]
+ data.qvel[0] = real_vel[w_start]
+ data.ctrl[:] = 0.0
+ mujoco.mj_forward(model, data)
+
+ # Backlash state resets each window (assume contact at start).
+ backlash_state = [0.0]
+
+ for i in range(w_start, w_end):
+ ctrl = _transform_action(actions[i], params)
+ data.ctrl[0] = ctrl
+
+ for _ in range(substeps):
+ scale = _apply_forces(data, data.qvel[0], ctrl, params, backlash_state)
+ if scale < 1.0:
+ data.ctrl[0] = 0.0
+ else:
+ data.ctrl[0] = ctrl
+ mujoco.mj_step(model, data)
+
+ # Bail out on NaN/instability — fill rest of window with last good.
+ if not np.isfinite(data.qpos[0]) or abs(data.qvel[0]) > 1e4:
+ sim_motor_angle[i:w_end] = sim_motor_angle[max(i-1, w_start)]
+ sim_motor_vel[i:w_end] = 0.0
+ break
+
+ sim_motor_angle[i] = data.qpos[0]
+ sim_motor_vel[i] = data.qvel[0]
+
+ return {
+ "motor_angle": sim_motor_angle,
+ "motor_vel": sim_motor_vel,
+ "n_windows": n_windows,
+ }
diff --git a/src/sysid/motor/visualize.py b/src/sysid/motor/visualize.py
new file mode 100644
index 0000000..69fa107
--- /dev/null
+++ b/src/sysid/motor/visualize.py
@@ -0,0 +1,204 @@
+"""Visualise motor system identification — real vs simulated trajectories.
+
+Usage:
+ python -m src.sysid.motor.visualize \
+ --recording assets/motor/recordings/motor_capture_YYYYMMDD_HHMMSS.npz
+
+ # With tuned result:
+ python -m src.sysid.motor.visualize \
+ --recording .npz \
+ --result assets/motor/motor_sysid_result.json
+"""
+
+from __future__ import annotations
+
+import argparse
+import json
+from pathlib import Path
+
+import numpy as np
+import structlog
+
+log = structlog.get_logger()
+
+_DEFAULT_ASSET = "assets/motor"
+
+
+def visualize(
+ asset_path: str | Path = _DEFAULT_ASSET,
+ recording_path: str | Path = "",
+ result_path: str | Path | None = None,
+ sim_dt: float = 0.002,
+ substeps: int = 10,
+ window_duration: float = 0.5,
+ save_path: str | Path | None = None,
+ show: bool = True,
+) -> None:
+ """Generate 3-panel comparison plot: angle, velocity, action."""
+ import matplotlib.pyplot as plt
+
+ from src.sysid.motor.rollout import (
+ MOTOR_PARAMS,
+ defaults_vector,
+ params_to_dict,
+ rollout,
+ windowed_rollout,
+ )
+
+ asset_path = Path(asset_path).resolve()
+ recording = dict(np.load(recording_path))
+
+ t = recording["time"]
+ actions = recording["action"]
+
+ # ── Simulate with default parameters ─────────────────────────
+ default_params = params_to_dict(defaults_vector(MOTOR_PARAMS), MOTOR_PARAMS)
+ log.info("simulating_default_params", windowed=window_duration > 0)
+
+ if window_duration > 0:
+ sim_default = windowed_rollout(
+ asset_path=asset_path,
+ params=default_params,
+ recording=recording,
+ window_duration=window_duration,
+ sim_dt=sim_dt,
+ substeps=substeps,
+ )
+ else:
+ sim_default = rollout(
+ asset_path=asset_path,
+ params=default_params,
+ actions=actions,
+ sim_dt=sim_dt,
+ substeps=substeps,
+ )
+
+ # ── Simulate with tuned parameters (if available) ────────────
+ sim_tuned = None
+ tuned_cost = None
+
+ if result_path is not None:
+ result_path = Path(result_path)
+ else:
+ # Auto-detect.
+ auto = asset_path / "motor_sysid_result.json"
+ if auto.exists():
+ result_path = auto
+
+ if result_path is not None and result_path.exists():
+ result = json.loads(result_path.read_text())
+ tuned_params = result.get("best_params", {})
+ tuned_cost = result.get("best_cost")
+ log.info("simulating_tuned_params", cost=tuned_cost)
+
+ if window_duration > 0:
+ sim_tuned = windowed_rollout(
+ asset_path=asset_path,
+ params=tuned_params,
+ recording=recording,
+ window_duration=window_duration,
+ sim_dt=sim_dt,
+ substeps=substeps,
+ )
+ else:
+ sim_tuned = rollout(
+ asset_path=asset_path,
+ params=tuned_params,
+ actions=actions,
+ sim_dt=sim_dt,
+ substeps=substeps,
+ )
+
+ # ── Plot ─────────────────────────────────────────────────────
+ fig, axes = plt.subplots(3, 1, figsize=(14, 8), sharex=True)
+
+ # Motor angle.
+ ax = axes[0]
+ ax.plot(t, np.degrees(recording["motor_angle"]), "k-", lw=1.2, alpha=0.8, label="Real")
+ ax.plot(t, np.degrees(sim_default["motor_angle"]), "--", color="#d62728", lw=1.0, alpha=0.7, label="Sim (default)")
+ if sim_tuned is not None:
+ ax.plot(t, np.degrees(sim_tuned["motor_angle"]), "--", color="#2ca02c", lw=1.0, alpha=0.7, label="Sim (tuned)")
+ ax.set_ylabel("Motor Angle (°)")
+ ax.legend(loc="upper right", fontsize=8)
+ ax.grid(True, alpha=0.3)
+
+ # Motor velocity.
+ ax = axes[1]
+ ax.plot(t, recording["motor_vel"], "k-", lw=1.2, alpha=0.8, label="Real")
+ ax.plot(t, sim_default["motor_vel"], "--", color="#d62728", lw=1.0, alpha=0.7, label="Sim (default)")
+ if sim_tuned is not None:
+ ax.plot(t, sim_tuned["motor_vel"], "--", color="#2ca02c", lw=1.0, alpha=0.7, label="Sim (tuned)")
+ ax.set_ylabel("Motor Velocity (rad/s)")
+ ax.legend(loc="upper right", fontsize=8)
+ ax.grid(True, alpha=0.3)
+
+ # Action.
+ ax = axes[2]
+ ax.plot(t, actions, "b-", lw=0.8, alpha=0.6)
+ ax.set_ylabel("Action (norm)")
+ ax.set_xlabel("Time (s)")
+ ax.grid(True, alpha=0.3)
+ ax.set_ylim(-1.1, 1.1)
+
+ # Title.
+ title = "Motor System Identification — Real vs Simulated"
+ if tuned_cost is not None:
+ from src.sysid.motor.optimize import cost_function
+
+ orig_cost = cost_function(
+ defaults_vector(MOTOR_PARAMS),
+ recording,
+ asset_path,
+ MOTOR_PARAMS,
+ sim_dt=sim_dt,
+ substeps=substeps,
+ window_duration=window_duration,
+ )
+ title += f"\nDefault cost: {orig_cost:.4f} → Tuned cost: {tuned_cost:.4f}"
+ improvement = (1.0 - tuned_cost / orig_cost) * 100 if orig_cost > 0 else 0
+ title += f" ({improvement:+.1f}%)"
+
+ fig.suptitle(title, fontsize=12)
+ plt.tight_layout()
+
+ if save_path:
+ fig.savefig(str(save_path), dpi=150, bbox_inches="tight")
+ log.info("figure_saved", path=str(save_path))
+
+ if show:
+ plt.show()
+ else:
+ plt.close(fig)
+
+
+# ── CLI ──────────────────────────────────────────────────────────────
+
+
+def main() -> None:
+ parser = argparse.ArgumentParser(
+ description="Visualise motor system identification results."
+ )
+ parser.add_argument("--asset-path", type=str, default=_DEFAULT_ASSET)
+ parser.add_argument("--recording", type=str, required=True, help=".npz file")
+ parser.add_argument("--result", type=str, default=None, help="sysid result JSON")
+ parser.add_argument("--sim-dt", type=float, default=0.002)
+ parser.add_argument("--substeps", type=int, default=10)
+ parser.add_argument("--window-duration", type=float, default=0.5)
+ parser.add_argument("--save", type=str, default=None, help="Save figure path")
+ parser.add_argument("--no-show", action="store_true")
+ args = parser.parse_args()
+
+ visualize(
+ asset_path=args.asset_path,
+ recording_path=args.recording,
+ result_path=args.result,
+ sim_dt=args.sim_dt,
+ substeps=args.substeps,
+ window_duration=args.window_duration,
+ save_path=args.save,
+ show=not args.no_show,
+ )
+
+
+if __name__ == "__main__":
+ main()
diff --git a/src/sysid/optimize.py b/src/sysid/optimize.py
index 1ffc03f..4a4f931 100644
--- a/src/sysid/optimize.py
+++ b/src/sysid/optimize.py
@@ -4,10 +4,15 @@ Minimises the trajectory-matching cost between a MuJoCo rollout and a
recorded real-robot sequence. Uses the ``cmaes`` package (pure-Python
CMA-ES with native box-constraint support).
+Motor parameters are **locked** from the motor-only sysid — only
+pendulum/arm inertial parameters, joint dynamics, and ctrl_limit are
+optimised. Velocities are optionally preprocessed with Savitzky-Golay
+differentiation for cleaner targets.
+
Usage:
python -m src.sysid.optimize \
--robot-path assets/rotary_cartpole \
- --recording assets/rotary_cartpole/recordings/capture_20260311_120000.npz
+ --recording assets/rotary_cartpole/recordings/capture_20260314_000435.npz
# Shorter run for testing:
python -m src.sysid.optimize \
@@ -28,6 +33,8 @@ import numpy as np
import structlog
from src.sysid.rollout import (
+ LOCKED_MOTOR_PARAMS,
+ PARAM_SETS,
ROTARY_CARTPOLE_PARAMS,
ParamSpec,
bounds_arrays,
@@ -40,6 +47,65 @@ from src.sysid.rollout import (
log = structlog.get_logger()
+# ── Velocity preprocessing ───────────────────────────────────────────
+
+
+def _preprocess_recording(
+ recording: dict[str, np.ndarray],
+ preprocess_vel: bool = True,
+ sg_window: int = 7,
+ sg_polyorder: int = 3,
+) -> dict[str, np.ndarray]:
+ """Optionally recompute velocities using Savitzky-Golay differentiation.
+
+ Applies SG filtering to both motor_vel and pendulum_vel, replacing
+ the noisy firmware finite-difference velocities with smooth
+ analytical derivatives of the (clean) angle signals.
+ """
+ if not preprocess_vel:
+ return recording
+
+ from scipy.signal import savgol_filter
+
+ rec = dict(recording)
+ times = rec["time"]
+ dt = float(np.mean(np.diff(times)))
+
+ # Motor velocity.
+ rec["motor_vel_raw"] = rec["motor_vel"].copy()
+ rec["motor_vel"] = savgol_filter(
+ rec["motor_angle"],
+ window_length=sg_window,
+ polyorder=sg_polyorder,
+ deriv=1,
+ delta=dt,
+ )
+
+ # Pendulum velocity.
+ rec["pendulum_vel_raw"] = rec["pendulum_vel"].copy()
+ rec["pendulum_vel"] = savgol_filter(
+ rec["pendulum_angle"],
+ window_length=sg_window,
+ polyorder=sg_polyorder,
+ deriv=1,
+ delta=dt,
+ )
+
+ motor_noise = np.std(rec["motor_vel_raw"] - rec["motor_vel"])
+ pend_noise = np.std(rec["pendulum_vel_raw"] - rec["pendulum_vel"])
+ log.info(
+ "velocity_preprocessed",
+ method="savgol",
+ sg_window=sg_window,
+ sg_polyorder=sg_polyorder,
+ dt_ms=f"{dt*1000:.1f}",
+ motor_noise_std=f"{motor_noise:.3f} rad/s",
+ pend_noise_std=f"{pend_noise:.3f} rad/s",
+ )
+
+ return rec
+
+
# ── Cost function ────────────────────────────────────────────────────
@@ -63,18 +129,35 @@ def _compute_trajectory_cost(
recording: dict[str, np.ndarray],
pos_weight: float = 1.0,
vel_weight: float = 0.1,
+ pendulum_scale: float = 3.0,
+ vel_outlier_threshold: float = 20.0,
) -> float:
- """Weighted MSE between sim and real trajectories."""
+ """Weighted MSE between sim and real trajectories.
+
+ pendulum_scale multiplies the pendulum terms relative to motor terms.
+
+ Samples where the *real* pendulum velocity exceeds
+ ``vel_outlier_threshold`` (rad/s) are excluded from the velocity
+ terms. These are encoder-wrap artefacts (pendulum swinging past
+ vertical) that no simulator can reproduce.
+ """
motor_err = _angle_diff(sim["motor_angle"], recording["motor_angle"])
pend_err = _angle_diff(sim["pendulum_angle"], recording["pendulum_angle"])
motor_vel_err = sim["motor_vel"] - recording["motor_vel"]
pend_vel_err = sim["pendulum_vel"] - recording["pendulum_vel"]
+ # Mask out encoder-wrap velocity spikes so the optimizer doesn't
+ # waste capacity fitting artefacts.
+ valid = np.abs(recording["pendulum_vel"]) < vel_outlier_threshold
+ if valid.sum() < len(valid):
+ motor_vel_err = motor_vel_err[valid]
+ pend_vel_err = pend_vel_err[valid]
+
return float(
pos_weight * np.mean(motor_err**2)
- + pos_weight * np.mean(pend_err**2)
+ + pos_weight * pendulum_scale * np.mean(pend_err**2)
+ vel_weight * np.mean(motor_vel_err**2)
- + vel_weight * np.mean(pend_vel_err**2)
+ + vel_weight * pendulum_scale * np.mean(pend_vel_err**2)
)
@@ -87,7 +170,9 @@ def cost_function(
substeps: int = 10,
pos_weight: float = 1.0,
vel_weight: float = 0.1,
+ pendulum_scale: float = 3.0,
window_duration: float = 0.5,
+ motor_params: dict[str, float] | None = None,
) -> float:
"""Compute trajectory-matching cost for a candidate parameter vector.
@@ -113,21 +198,56 @@ def cost_function(
window_duration=window_duration,
sim_dt=sim_dt,
substeps=substeps,
+ motor_params=motor_params,
)
else:
sim = rollout(
robot_path=robot_path,
params=params,
actions=recording["action"],
- timesteps=recording["time"],
sim_dt=sim_dt,
substeps=substeps,
+ motor_params=motor_params,
)
except Exception as exc:
log.warning("rollout_failed", error=str(exc))
return 1e6
- return _compute_trajectory_cost(sim, recording, pos_weight, vel_weight)
+ # Check for NaN in sim output.
+ for key in ("motor_angle", "motor_vel", "pendulum_angle", "pendulum_vel"):
+ if np.any(~np.isfinite(sim[key])):
+ return 1e6
+
+ return _compute_trajectory_cost(sim, recording, pos_weight, vel_weight, pendulum_scale)
+
+
+# ── Parallel evaluation helper (module-level for pickling) ───────────
+
+# Shared state set by the parent process before spawning workers.
+_par_recording: dict[str, np.ndarray] = {}
+_par_robot_path: Path = Path(".")
+_par_specs: list[ParamSpec] = []
+_par_kwargs: dict = {}
+
+
+def _init_worker(recording, robot_path, specs, kwargs):
+ """Initialiser run once per worker process."""
+ global _par_recording, _par_robot_path, _par_specs, _par_kwargs
+ _par_recording = recording
+ _par_robot_path = robot_path
+ _par_specs = specs
+ _par_kwargs = kwargs
+
+
+def _eval_candidate(x_natural: np.ndarray) -> float:
+ """Evaluate a single candidate — called in worker processes."""
+ return cost_function(
+ x_natural,
+ _par_recording,
+ _par_robot_path,
+ _par_specs,
+ **_par_kwargs,
+ )
# ── CMA-ES optimisation loop ────────────────────────────────────────
@@ -144,17 +264,25 @@ def optimize(
substeps: int = 10,
pos_weight: float = 1.0,
vel_weight: float = 0.1,
+ pendulum_scale: float = 3.0,
window_duration: float = 0.5,
seed: int = 42,
+ preprocess_vel: bool = True,
+ sg_window: int = 7,
+ sg_polyorder: int = 3,
) -> dict:
"""Run CMA-ES optimisation and return results.
+ Motor parameters are locked from the motor-only sysid.
+ Only pendulum/arm parameters are optimised.
+
Returns a dict with:
best_params: dict[str, float]
best_cost: float
history: list of (generation, best_cost) tuples
recording: str (path used)
specs: list of param names
+ motor_params: dict of locked motor params
"""
from cmaes import CMA
@@ -164,8 +292,24 @@ def optimize(
if specs is None:
specs = ROTARY_CARTPOLE_PARAMS
+ motor_params = dict(LOCKED_MOTOR_PARAMS)
+ log.info(
+ "motor_params_locked",
+ n_params=len(motor_params),
+ gear_avg=f"{(motor_params['actuator_gear_pos'] + motor_params['actuator_gear_neg']) / 2:.4f}",
+ )
+
# Load recording.
recording = dict(np.load(recording_path))
+
+ # Preprocessing: SG velocity recomputation.
+ recording = _preprocess_recording(
+ recording,
+ preprocess_vel=preprocess_vel,
+ sg_window=sg_window,
+ sg_polyorder=sg_polyorder,
+ )
+
n_samples = len(recording["time"])
duration = recording["time"][-1] - recording["time"][0]
n_windows = max(1, int(duration / window_duration)) if window_duration > 0 else 1
@@ -219,28 +363,54 @@ def optimize(
t0 = time.monotonic()
+ # ── Parallel evaluation setup ────────────────────────────────
+ # Each candidate is independent — evaluate them in parallel using
+ # a process pool. Falls back to sequential if n_workers=1.
+ import multiprocessing as mp
+ n_workers = max(1, mp.cpu_count() - 1) # leave 1 core free
+
+ eval_kwargs = dict(
+ sim_dt=sim_dt,
+ substeps=substeps,
+ pos_weight=pos_weight,
+ vel_weight=vel_weight,
+ pendulum_scale=pendulum_scale,
+ window_duration=window_duration,
+ motor_params=motor_params,
+ )
+
+ log.info("parallel_workers", n_workers=n_workers)
+
+ # Create a persistent pool (avoids per-generation fork overhead).
+ pool = None
+ if n_workers > 1:
+ pool = mp.Pool(
+ n_workers,
+ initializer=_init_worker,
+ initargs=(recording, robot_path, specs, eval_kwargs),
+ )
+
for gen in range(max_generations):
- solutions = []
+ # Ask all candidates first.
+ candidates_normed = []
+ candidates_natural = []
for _ in range(optimizer.population_size):
x_normed = optimizer.ask()
x_natural = from_normed(x_normed)
-
- # Clip to bounds (CMA-ES can slightly exceed with sampling noise).
x_natural = np.clip(x_natural, lo, hi)
+ candidates_normed.append(x_normed)
+ candidates_natural.append(x_natural)
- c = cost_function(
- x_natural,
- recording,
- robot_path,
- specs,
- sim_dt=sim_dt,
- substeps=substeps,
- pos_weight=pos_weight,
- vel_weight=vel_weight,
- window_duration=window_duration,
- )
- solutions.append((x_normed, c))
+ # Evaluate in parallel.
+ if pool is not None:
+ costs = pool.map(_eval_candidate, candidates_natural)
+ else:
+ costs = [cost_function(
+ x, recording, robot_path, specs, **eval_kwargs
+ ) for x in candidates_natural]
+ solutions = list(zip(candidates_normed, costs))
+ for x_natural, c in zip(candidates_natural, costs):
if c < best_cost:
best_cost = c
best_params_vec = x_natural.copy()
@@ -259,6 +429,12 @@ def optimize(
)
total_time = time.monotonic() - t0
+
+ # Clean up process pool.
+ if pool is not None:
+ pool.close()
+ pool.join()
+
best_params = params_to_dict(best_params_vec, specs)
log.info(
@@ -289,6 +465,8 @@ def optimize(
"recording": str(recording_path),
"param_names": [s.name for s in specs],
"defaults": {s.name: s.default for s in specs},
+ "motor_params": motor_params,
+ "preprocess_vel": preprocess_vel,
"timestamp": datetime.now().isoformat(),
}
@@ -319,6 +497,8 @@ def main() -> None:
parser.add_argument("--substeps", type=int, default=10)
parser.add_argument("--pos-weight", type=float, default=1.0)
parser.add_argument("--vel-weight", type=float, default=0.1)
+ parser.add_argument("--pendulum-scale", type=float, default=3.0,
+ help="Multiplier for pendulum terms relative to motor (default 3.0)")
parser.add_argument(
"--window-duration",
type=float,
@@ -331,11 +511,30 @@ def main() -> None:
action="store_true",
help="Skip exporting tuned files (results JSON only)",
)
+ parser.add_argument(
+ "--no-preprocess-vel",
+ action="store_true",
+ help="Skip Savitzky-Golay velocity preprocessing",
+ )
+ parser.add_argument("--sg-window", type=int, default=7,
+ help="Savitzky-Golay window length (odd, default 7)")
+ parser.add_argument("--sg-polyorder", type=int, default=3,
+ help="Savitzky-Golay polynomial order (default 3)")
+ parser.add_argument(
+ "--param-set",
+ type=str,
+ default="full",
+ choices=list(PARAM_SETS.keys()),
+ help="Parameter set to optimize: 'reduced' (6 params, fast) or 'full' (15 params)",
+ )
args = parser.parse_args()
+ specs = PARAM_SETS[args.param_set]
+
result = optimize(
robot_path=args.robot_path,
recording_path=args.recording,
+ specs=specs,
sigma0=args.sigma0,
population_size=args.population_size,
max_generations=args.max_generations,
@@ -343,8 +542,12 @@ def main() -> None:
substeps=args.substeps,
pos_weight=args.pos_weight,
vel_weight=args.vel_weight,
+ pendulum_scale=args.pendulum_scale,
window_duration=args.window_duration,
seed=args.seed,
+ preprocess_vel=not args.no_preprocess_vel,
+ sg_window=args.sg_window,
+ sg_polyorder=args.sg_polyorder,
)
# Save results JSON.
@@ -369,6 +572,7 @@ def main() -> None:
export_tuned_files(
robot_path=args.robot_path,
params=result["best_params"],
+ motor_params=result.get("motor_params"),
)
diff --git a/src/sysid/rollout.py b/src/sysid/rollout.py
index cbdcf53..7ac2e96 100644
--- a/src/sysid/rollout.py
+++ b/src/sysid/rollout.py
@@ -6,22 +6,51 @@ the simulated trajectory for comparison with the real recording.
This module is the inner loop of the CMA-ES optimizer: it is called once
per candidate parameter vector per generation.
+
+Motor parameters are **locked** from the motor-only sysid result.
+The optimizer only fits
+pendulum/arm inertial parameters, pendulum joint dynamics, and
+``ctrl_limit``. The asymmetric motor model (deadzone, gear compensation,
+Coulomb friction, viscous damping, quadratic drag, back-EMF) is applied
+via ``ActuatorConfig.transform_ctrl()`` and ``compute_motor_force()``.
"""
from __future__ import annotations
-import copy
import dataclasses
-import math
+import os
import tempfile
import xml.etree.ElementTree as ET
from pathlib import Path
-from typing import Any
import mujoco
import numpy as np
import yaml
+from src.core.robot import ActuatorConfig, JointConfig, RobotConfig
+from src.runners.mujoco import ActuatorLimits, load_mujoco_model
+from src.sysid._urdf import patch_link_inertials
+
+
+# ── Locked motor parameters (from motor-only sysid) ─────────────────
+# These are FIXED and not optimised. They come from the 12-param model
+# in robot.yaml (from motor-only sysid, cost 0.862).
+
+LOCKED_MOTOR_PARAMS: dict[str, float] = {
+ "actuator_gear_pos": 0.424182,
+ "actuator_gear_neg": 0.425031,
+ "actuator_filter_tau": 0.00503506,
+ "motor_damping_pos": 0.00202682,
+ "motor_damping_neg": 0.0146651,
+ "motor_armature": 0.00277342,
+ "motor_frictionloss_pos": 0.0573282,
+ "motor_frictionloss_neg": 0.0533549,
+ "viscous_quadratic": 0.000285329,
+ "back_emf_gain": 0.00675809,
+ "motor_deadzone_pos": 0.141291,
+ "motor_deadzone_neg": 0.0780148,
+}
+
# ── Tunable parameter specification ──────────────────────────────────
@@ -37,33 +66,58 @@ class ParamSpec:
log_scale: bool = False # optimise in log-space (masses, inertias)
-# Default parameter specs for the rotary cartpole.
+# Pendulum sysid parameters — motor params are LOCKED (not here).
# Order matters: the optimizer maps a flat vector to these specs.
+# Defaults are from the URDF exported by Fusion 360.
ROTARY_CARTPOLE_PARAMS: list[ParamSpec] = [
# ── Arm link (URDF) ──────────────────────────────────────────
- ParamSpec("arm_mass", 0.010, 0.003, 0.05, log_scale=True),
- ParamSpec("arm_com_x", 0.00005, -0.02, 0.02),
- ParamSpec("arm_com_y", 0.0065, -0.01, 0.02),
- ParamSpec("arm_com_z", 0.00563, -0.01, 0.02),
+ ParamSpec("arm_mass", 0.02110, 0.005, 0.08, log_scale=True),
+ ParamSpec("arm_com_x", -0.00710, -0.03, 0.03),
+ ParamSpec("arm_com_y", 0.00085, -0.02, 0.02),
+ ParamSpec("arm_com_z", 0.00795, -0.02, 0.02),
# ── Pendulum link (URDF) ─────────────────────────────────────
- ParamSpec("pendulum_mass", 0.015, 0.005, 0.05, log_scale=True),
- ParamSpec("pendulum_com_x", 0.1583, 0.05, 0.25),
- ParamSpec("pendulum_com_y", -0.0983, -0.20, 0.0),
- ParamSpec("pendulum_com_z", 0.0, -0.05, 0.05),
- ParamSpec("pendulum_ixx", 6.16e-06, 1e-07, 1e-04, log_scale=True),
- ParamSpec("pendulum_iyy", 6.16e-06, 1e-07, 1e-04, log_scale=True),
- ParamSpec("pendulum_izz", 1.23e-05, 1e-07, 1e-04, log_scale=True),
- ParamSpec("pendulum_ixy", 6.10e-06, -1e-04, 1e-04),
- # ── Actuator / joint dynamics (robot.yaml) ───────────────────
- ParamSpec("actuator_gear", 0.064, 0.01, 0.2, log_scale=True),
- ParamSpec("actuator_filter_tau", 0.03, 0.005, 0.15),
- ParamSpec("motor_damping", 0.003, 1e-4, 0.05, log_scale=True),
- ParamSpec("pendulum_damping", 0.0001, 1e-5, 0.01, log_scale=True),
- ParamSpec("motor_armature", 0.0001, 1e-5, 0.01, log_scale=True),
- ParamSpec("motor_frictionloss", 0.03, 0.001, 0.1, log_scale=True),
+ ParamSpec("pendulum_mass", 0.03937, 0.010, 0.10, log_scale=True),
+ ParamSpec("pendulum_com_x", 0.06025, 0.01, 0.15),
+ ParamSpec("pendulum_com_y", -0.07602, -0.20, 0.0),
+ ParamSpec("pendulum_com_z", -0.00346, -0.05, 0.05),
+ ParamSpec("pendulum_ixx", 6.20e-05, 1e-07, 1e-03, log_scale=True),
+ ParamSpec("pendulum_iyy", 3.70e-05, 1e-07, 1e-03, log_scale=True),
+ ParamSpec("pendulum_izz", 7.83e-05, 1e-07, 1e-03, log_scale=True),
+ ParamSpec("pendulum_ixy", -6.93e-06, -1e-03, 1e-03),
+ # ── Pendulum joint dynamics ──────────────────────────────────
+ ParamSpec("pendulum_damping", 0.0001, 1e-6, 0.05, log_scale=True),
+ ParamSpec("pendulum_frictionloss", 0.0001, 1e-6, 0.05, log_scale=True),
+ # ── Hardware realism (control pipeline) ────────────────────
+ ParamSpec("ctrl_limit", 0.588, 0.45, 0.70), # MAX_MOTOR_SPEED / 255
]
+# Extended set: full params + motor armature (compensates for the
+# motor-only sysid having captured arm/pendulum loading in armature).
+EXTENDED_PARAMS: list[ParamSpec] = ROTARY_CARTPOLE_PARAMS + [
+ ParamSpec("motor_armature", 0.00277, 0.0005, 0.02, log_scale=True),
+]
+
+
+# Reduced set: only the 6 most impactful pendulum parameters.
+# Good for a fast first pass — converges in ~50 generations.
+REDUCED_PARAMS: list[ParamSpec] = [
+ ParamSpec("pendulum_mass", 0.03937, 0.010, 0.10, log_scale=True),
+ ParamSpec("pendulum_com_x", 0.06025, 0.01, 0.15),
+ ParamSpec("pendulum_com_y", -0.07602, -0.20, 0.0),
+ ParamSpec("pendulum_ixx", 6.20e-05, 1e-07, 1e-03, log_scale=True),
+ ParamSpec("pendulum_damping", 0.0001, 1e-6, 0.05, log_scale=True),
+ ParamSpec("pendulum_frictionloss", 0.0001, 1e-6, 0.05, log_scale=True),
+]
+
+
+PARAM_SETS: dict[str, list[ParamSpec]] = {
+ "full": ROTARY_CARTPOLE_PARAMS,
+ "extended": EXTENDED_PARAMS,
+ "reduced": REDUCED_PARAMS,
+}
+
+
def params_to_dict(
values: np.ndarray, specs: list[ParamSpec] | None = None
) -> dict[str, float]:
@@ -97,182 +151,93 @@ def bounds_arrays(
def _build_model(
robot_path: Path,
params: dict[str, float],
-) -> mujoco.MjModel:
- """Build a MuJoCo model from URDF + robot.yaml with parameter overrides.
+ motor_params: dict[str, float] | None = None,
+) -> tuple[mujoco.MjModel, ActuatorConfig]:
+ """Build a MuJoCo model with sysid overrides.
- Follows the same two-step approach as ``MuJoCoRunner._load_model()``:
- 1. Parse URDF, inject meshdir, load into MuJoCo
- 2. Export MJCF, inject actuators + joint overrides + param overrides, reload
+ Returns (model, actuator) — use ``actuator.transform_ctrl()`` and
+ ``actuator.compute_motor_force()`` in the rollout loop.
"""
+ if motor_params is None:
+ motor_params = LOCKED_MOTOR_PARAMS
+
robot_path = Path(robot_path).resolve()
+
+ # ── Patch URDF inertial parameters to a temp file ────────────
robot_yaml = yaml.safe_load((robot_path / "robot.yaml").read_text())
urdf_path = robot_path / robot_yaml["urdf"]
- # ── Step 1: Load URDF ────────────────────────────────────────
tree = ET.parse(urdf_path)
- root = tree.getroot()
+ patch_link_inertials(tree.getroot(), params)
- # Inject meshdir compiler directive.
- meshdir = None
- for mesh_el in root.iter("mesh"):
- fn = mesh_el.get("filename", "")
- parent = str(Path(fn).parent)
- if parent and parent != ".":
- meshdir = parent
- break
- if meshdir:
- mj_ext = ET.SubElement(root, "mujoco")
- ET.SubElement(
- mj_ext, "compiler", attrib={"meshdir": meshdir, "balanceinertia": "true"}
- )
+ fd, tmp_urdf = tempfile.mkstemp(
+ suffix=".urdf", prefix="_sysid_", dir=str(robot_path),
+ )
+ os.close(fd)
+ tmp_urdf_path = Path(tmp_urdf)
+ tree.write(str(tmp_urdf_path), xml_declaration=True, encoding="unicode")
- # Override URDF inertial parameters BEFORE MuJoCo loading.
- for link in root.iter("link"):
- link_name = link.get("name", "")
- inertial = link.find("inertial")
- if inertial is None:
- continue
+ # ── Build RobotConfig with full motor sysid values ───────────
+ gear_pos = motor_params.get("actuator_gear_pos", 0.424182)
+ gear_neg = motor_params.get("actuator_gear_neg", 0.425031)
+ motor_armature = params.get(
+ "motor_armature",
+ motor_params.get("motor_armature", 0.00277342),
+ )
+ pend_damping = params.get("pendulum_damping", 0.0001)
+ pend_frictionloss = params.get("pendulum_frictionloss", 0.0001)
- if link_name == "arm":
- _set_mass(inertial, params.get("arm_mass"))
- _set_com(
- inertial,
- params.get("arm_com_x"),
- params.get("arm_com_y"),
- params.get("arm_com_z"),
- )
+ act_cfg = robot_yaml["actuators"][0]
+ ctrl_lo, ctrl_hi = act_cfg.get("ctrl_range", [-1.0, 1.0])
- elif link_name == "pendulum":
- _set_mass(inertial, params.get("pendulum_mass"))
- _set_com(
- inertial,
- params.get("pendulum_com_x"),
- params.get("pendulum_com_y"),
- params.get("pendulum_com_z"),
- )
- _set_inertia(
- inertial,
- ixx=params.get("pendulum_ixx"),
- iyy=params.get("pendulum_iyy"),
- izz=params.get("pendulum_izz"),
- ixy=params.get("pendulum_ixy"),
- )
+ actuator = ActuatorConfig(
+ joint=act_cfg["joint"],
+ type="motor",
+ gear=(gear_pos, gear_neg),
+ ctrl_range=(ctrl_lo, ctrl_hi),
+ deadzone=(
+ motor_params.get("motor_deadzone_pos", 0.141),
+ motor_params.get("motor_deadzone_neg", 0.078),
+ ),
+ damping=(
+ motor_params.get("motor_damping_pos", 0.002),
+ motor_params.get("motor_damping_neg", 0.015),
+ ),
+ frictionloss=(
+ motor_params.get("motor_frictionloss_pos", 0.057),
+ motor_params.get("motor_frictionloss_neg", 0.053),
+ ),
+ filter_tau=motor_params.get("actuator_filter_tau", 0.005),
+ viscous_quadratic=motor_params.get("viscous_quadratic", 0.000285),
+ back_emf_gain=motor_params.get("back_emf_gain", 0.00676),
+ )
+
+ robot = RobotConfig(
+ urdf_path=tmp_urdf_path,
+ actuators=[actuator],
+ joints={
+ "motor_joint": JointConfig(
+ damping=0.0,
+ armature=motor_armature,
+ frictionloss=0.0,
+ ),
+ "pendulum_joint": JointConfig(
+ damping=pend_damping,
+ frictionloss=pend_frictionloss,
+ ),
+ },
+ )
- # Write temp URDF and load.
- tmp_urdf = robot_path / "_tmp_sysid_load.urdf"
- tree.write(str(tmp_urdf), xml_declaration=True, encoding="unicode")
try:
- model_raw = mujoco.MjModel.from_xml_path(str(tmp_urdf))
+ model = load_mujoco_model(robot)
finally:
- tmp_urdf.unlink(missing_ok=True)
+ tmp_urdf_path.unlink(missing_ok=True)
- # ── Step 2: Export MJCF, inject actuators + overrides ────────
- tmp_mjcf = robot_path / "_tmp_sysid_inject.xml"
- try:
- mujoco.mj_saveLastXML(str(tmp_mjcf), model_raw)
- mjcf_root = ET.fromstring(tmp_mjcf.read_text())
-
- # Actuator.
- gear = params.get("actuator_gear", robot_yaml["actuators"][0].get("gear", 0.064))
- filter_tau = params.get(
- "actuator_filter_tau",
- robot_yaml["actuators"][0].get("filter_tau", 0.03),
- )
- act_cfg = robot_yaml["actuators"][0]
- ctrl_lo, ctrl_hi = act_cfg.get("ctrl_range", [-1.0, 1.0])
-
- act_elem = ET.SubElement(mjcf_root, "actuator")
- attribs: dict[str, str] = {
- "name": f"{act_cfg['joint']}_motor",
- "joint": act_cfg["joint"],
- "gear": str(gear),
- "ctrlrange": f"{ctrl_lo} {ctrl_hi}",
- }
- if filter_tau > 0:
- attribs["dyntype"] = "filter"
- attribs["dynprm"] = str(filter_tau)
- attribs["gaintype"] = "fixed"
- attribs["biastype"] = "none"
- ET.SubElement(act_elem, "general", attrib=attribs)
- else:
- ET.SubElement(act_elem, "motor", attrib=attribs)
-
- # Joint overrides.
- motor_damping = params.get("motor_damping", 0.003)
- pend_damping = params.get("pendulum_damping", 0.0001)
- motor_armature = params.get("motor_armature", 0.0001)
- motor_frictionloss = params.get("motor_frictionloss", 0.03)
-
- for body in mjcf_root.iter("body"):
- for jnt in body.findall("joint"):
- name = jnt.get("name")
- if name == "motor_joint":
- jnt.set("damping", str(motor_damping))
- jnt.set("armature", str(motor_armature))
- jnt.set("frictionloss", str(motor_frictionloss))
- elif name == "pendulum_joint":
- jnt.set("damping", str(pend_damping))
-
- # Disable self-collision.
- for geom in mjcf_root.iter("geom"):
- geom.set("contype", "0")
- geom.set("conaffinity", "0")
-
- modified_xml = ET.tostring(mjcf_root, encoding="unicode")
- tmp_mjcf.write_text(modified_xml)
- model = mujoco.MjModel.from_xml_path(str(tmp_mjcf))
- finally:
- tmp_mjcf.unlink(missing_ok=True)
-
- return model
+ return model, actuator
-def _set_mass(inertial: ET.Element, mass: float | None) -> None:
- if mass is None:
- return
- mass_el = inertial.find("mass")
- if mass_el is not None:
- mass_el.set("value", str(mass))
-def _set_com(
- inertial: ET.Element,
- x: float | None,
- y: float | None,
- z: float | None,
-) -> None:
- origin = inertial.find("origin")
- if origin is None:
- return
- xyz = origin.get("xyz", "0 0 0").split()
- if x is not None:
- xyz[0] = str(x)
- if y is not None:
- xyz[1] = str(y)
- if z is not None:
- xyz[2] = str(z)
- origin.set("xyz", " ".join(xyz))
-
-
-def _set_inertia(
- inertial: ET.Element,
- ixx: float | None = None,
- iyy: float | None = None,
- izz: float | None = None,
- ixy: float | None = None,
- iyz: float | None = None,
- ixz: float | None = None,
-) -> None:
- ine = inertial.find("inertia")
- if ine is None:
- return
- for attr, val in [
- ("ixx", ixx), ("iyy", iyy), ("izz", izz),
- ("ixy", ixy), ("iyz", iyz), ("ixz", ixz),
- ]:
- if val is not None:
- ine.set(attr, str(val))
-
# ── Simulation rollout ───────────────────────────────────────────────
@@ -281,72 +246,58 @@ def rollout(
robot_path: str | Path,
params: dict[str, float],
actions: np.ndarray,
- timesteps: np.ndarray,
sim_dt: float = 0.002,
substeps: int = 10,
+ motor_params: dict[str, float] | None = None,
) -> dict[str, np.ndarray]:
"""Replay recorded actions in MuJoCo with overridden parameters.
Parameters
----------
robot_path : asset directory
- params : named parameter overrides
+ params : named parameter overrides (pendulum/arm only)
actions : (N,) normalised actions [-1, 1] from the recording
- timesteps : (N,) wall-clock times (seconds) from the recording
sim_dt : MuJoCo physics timestep
substeps : physics substeps per control step
+ motor_params : locked motor params (default: LOCKED_MOTOR_PARAMS)
Returns
-------
dict with keys: motor_angle, motor_vel, pendulum_angle, pendulum_vel
Each is an (N,) numpy array of simulated values.
"""
+ if motor_params is None:
+ motor_params = LOCKED_MOTOR_PARAMS
+
robot_path = Path(robot_path).resolve()
- model = _build_model(robot_path, params)
+ model, actuator = _build_model(robot_path, params, motor_params)
model.opt.timestep = sim_dt
data = mujoco.MjData(model)
-
- # Start from pendulum hanging down (qpos=0 is down per URDF convention).
mujoco.mj_resetData(model, data)
- # Control dt derived from actual recording sample rate.
n = len(actions)
- ctrl_dt = sim_dt * substeps
+ ctrl_limit = params.get("ctrl_limit", 0.588)
- # Pre-allocate output.
sim_motor_angle = np.zeros(n, dtype=np.float64)
sim_motor_vel = np.zeros(n, dtype=np.float64)
sim_pend_angle = np.zeros(n, dtype=np.float64)
sim_pend_vel = np.zeros(n, dtype=np.float64)
- # Extract actuator limit info for software limit switch.
- nu = model.nu
- if nu > 0:
- jnt_id = model.actuator_trnid[0, 0]
- jnt_limited = bool(model.jnt_limited[jnt_id])
- jnt_lo = model.jnt_range[jnt_id, 0]
- jnt_hi = model.jnt_range[jnt_id, 1]
- gear_sign = float(np.sign(model.actuator_gear[0, 0]))
- else:
- jnt_limited = False
- jnt_lo = jnt_hi = gear_sign = 0.0
+ limits = ActuatorLimits(model)
for i in range(n):
- data.ctrl[0] = actions[i]
+ action = max(-ctrl_limit, min(ctrl_limit, float(actions[i])))
+ ctrl = actuator.transform_ctrl(action)
+ data.ctrl[0] = ctrl
for _ in range(substeps):
- # Software limit switch (mirrors MuJoCoRunner).
- if jnt_limited and nu > 0:
- pos = data.qpos[jnt_id]
- if pos >= jnt_hi and gear_sign * data.ctrl[0] > 0:
- data.ctrl[0] = 0.0
- elif pos <= jnt_lo and gear_sign * data.ctrl[0] < 0:
- data.ctrl[0] = 0.0
+ limits.enforce(model, data)
+ data.qfrc_applied[0] = actuator.compute_motor_force(data.qvel[0], ctrl)
mujoco.mj_step(model, data)
sim_motor_angle[i] = data.qpos[0]
- sim_motor_vel[i] = data.qvel[0]
sim_pend_angle[i] = data.qpos[1]
+ sim_motor_vel[i] = data.qvel[0]
sim_pend_vel[i] = data.qvel[1]
return {
@@ -364,6 +315,7 @@ def windowed_rollout(
window_duration: float = 0.5,
sim_dt: float = 0.002,
substeps: int = 10,
+ motor_params: dict[str, float] | None = None,
) -> dict[str, np.ndarray | float]:
"""Multiple-shooting rollout — split recording into short windows.
@@ -372,18 +324,19 @@ def windowed_rollout(
2. Replay the recorded actions within the window.
3. Record the simulated output.
- This prevents error accumulation across the full trajectory, giving
- a much smoother cost landscape for the optimizer.
+ Motor dynamics (asymmetric friction, damping, back-EMF, etc.) are
+ applied via qfrc_applied using the locked motor sysid parameters.
Parameters
----------
robot_path : asset directory
- params : named parameter overrides
+ params : named parameter overrides (pendulum/arm only)
recording : dict with keys time, action, motor_angle, motor_vel,
pendulum_angle, pendulum_vel (all 1D arrays of length N)
window_duration : length of each shooting window in seconds
sim_dt : MuJoCo physics timestep
substeps : physics substeps per control step
+ motor_params : locked motor params (default: LOCKED_MOTOR_PARAMS)
Returns
-------
@@ -392,8 +345,11 @@ def windowed_rollout(
(stitched from per-window simulations)
n_windows — number of windows used
"""
+ if motor_params is None:
+ motor_params = LOCKED_MOTOR_PARAMS
+
robot_path = Path(robot_path).resolve()
- model = _build_model(robot_path, params)
+ model, actuator = _build_model(robot_path, params, motor_params)
model.opt.timestep = sim_dt
data = mujoco.MjData(model)
@@ -405,67 +361,50 @@ def windowed_rollout(
real_pend_vel = recording["pendulum_vel"]
n = len(actions)
- # Pre-allocate output (stitched from all windows).
sim_motor_angle = np.zeros(n, dtype=np.float64)
sim_motor_vel = np.zeros(n, dtype=np.float64)
sim_pend_angle = np.zeros(n, dtype=np.float64)
sim_pend_vel = np.zeros(n, dtype=np.float64)
- # Extract actuator limit info.
- nu = model.nu
- if nu > 0:
- jnt_id = model.actuator_trnid[0, 0]
- jnt_limited = bool(model.jnt_limited[jnt_id])
- jnt_lo = model.jnt_range[jnt_id, 0]
- jnt_hi = model.jnt_range[jnt_id, 1]
- gear_sign = float(np.sign(model.actuator_gear[0, 0]))
- else:
- jnt_limited = False
- jnt_lo = jnt_hi = gear_sign = 0.0
+ limits = ActuatorLimits(model)
- # Compute window boundaries from recording timestamps.
t0 = times[0]
t_end = times[-1]
- window_starts: list[int] = [] # indices into the recording
+ window_starts: list[int] = []
current_t = t0
while current_t < t_end:
- # Find the index closest to current_t.
idx = int(np.searchsorted(times, current_t))
idx = min(idx, n - 1)
window_starts.append(idx)
current_t += window_duration
+ ctrl_limit = params.get("ctrl_limit", 0.588)
n_windows = len(window_starts)
for w, w_start in enumerate(window_starts):
- # Window end: next window start, or end of recording.
w_end = window_starts[w + 1] if w + 1 < n_windows else n
- # Initialize MuJoCo state from real data at window start.
mujoco.mj_resetData(model, data)
data.qpos[0] = real_motor[w_start]
data.qpos[1] = real_pend[w_start]
data.qvel[0] = real_motor_vel[w_start]
data.qvel[1] = real_pend_vel[w_start]
data.ctrl[:] = 0.0
- # Forward kinematics to make state consistent.
mujoco.mj_forward(model, data)
for i in range(w_start, w_end):
- data.ctrl[0] = actions[i]
+ action = max(-ctrl_limit, min(ctrl_limit, float(actions[i])))
+ ctrl = actuator.transform_ctrl(action)
+ data.ctrl[0] = ctrl
for _ in range(substeps):
- if jnt_limited and nu > 0:
- pos = data.qpos[jnt_id]
- if pos >= jnt_hi and gear_sign * data.ctrl[0] > 0:
- data.ctrl[0] = 0.0
- elif pos <= jnt_lo and gear_sign * data.ctrl[0] < 0:
- data.ctrl[0] = 0.0
+ limits.enforce(model, data)
+ data.qfrc_applied[0] = actuator.compute_motor_force(data.qvel[0], ctrl)
mujoco.mj_step(model, data)
sim_motor_angle[i] = data.qpos[0]
- sim_motor_vel[i] = data.qvel[0]
sim_pend_angle[i] = data.qpos[1]
+ sim_motor_vel[i] = data.qvel[0]
sim_pend_vel[i] = data.qvel[1]
return {
diff --git a/src/sysid/visualize.py b/src/sysid/visualize.py
index 987791c..0aed788 100644
--- a/src/sysid/visualize.py
+++ b/src/sysid/visualize.py
@@ -20,7 +20,6 @@ from __future__ import annotations
import argparse
import json
-import math
from pathlib import Path
import numpy as np
@@ -29,6 +28,30 @@ import structlog
log = structlog.get_logger()
+def _run_sim(
+ robot_path: Path,
+ params: dict[str, float],
+ recording: dict[str, np.ndarray],
+ window_duration: float,
+ sim_dt: float,
+ substeps: int,
+ motor_params: dict[str, float],
+) -> dict[str, np.ndarray]:
+ """Run windowed or open-loop rollout depending on window_duration."""
+ from src.sysid.rollout import rollout, windowed_rollout
+
+ if window_duration > 0:
+ return windowed_rollout(
+ robot_path=robot_path, params=params, recording=recording,
+ window_duration=window_duration, sim_dt=sim_dt,
+ substeps=substeps, motor_params=motor_params,
+ )
+ return rollout(
+ robot_path=robot_path, params=params, actions=recording["action"],
+ substeps=substeps, motor_params=motor_params,
+ )
+
+
def visualize(
robot_path: str | Path,
recording_path: str | Path,
@@ -39,31 +62,27 @@ def visualize(
save_path: str | Path | None = None,
show: bool = True,
) -> None:
- """Generate comparison plot.
-
- Parameters
- ----------
- robot_path : robot asset directory
- recording_path : .npz file from capture
- result_path : sysid_result.json with best_params (optional)
- sim_dt / substeps : physics settings for rollout
- window_duration : shooting window length (s); 0 = open-loop
- save_path : if provided, save figure to this path (PNG, PDF, …)
- show : if True, display interactive matplotlib window
- """
+ """Generate comparison plot."""
import matplotlib.pyplot as plt
from src.sysid.rollout import (
+ LOCKED_MOTOR_PARAMS,
ROTARY_CARTPOLE_PARAMS,
defaults_vector,
params_to_dict,
- rollout,
- windowed_rollout,
)
robot_path = Path(robot_path).resolve()
recording = dict(np.load(recording_path))
+ motor_params = LOCKED_MOTOR_PARAMS
+
+ sim_kwargs = dict(
+ robot_path=robot_path, recording=recording,
+ window_duration=window_duration, sim_dt=sim_dt,
+ substeps=substeps, motor_params=motor_params,
+ )
+
t = recording["time"]
actions = recording["action"]
@@ -72,26 +91,15 @@ def visualize(
defaults_vector(ROTARY_CARTPOLE_PARAMS), ROTARY_CARTPOLE_PARAMS
)
log.info("simulating_default_params", windowed=window_duration > 0)
- if window_duration > 0:
- sim_default = windowed_rollout(
- robot_path=robot_path,
- params=default_params,
- recording=recording,
- window_duration=window_duration,
- sim_dt=sim_dt,
- substeps=substeps,
- )
- else:
- sim_default = rollout(
- robot_path=robot_path,
- params=default_params,
- actions=actions,
- timesteps=t,
- sim_dt=sim_dt,
- substeps=substeps,
- )
+ sim_default = _run_sim(params=default_params, **sim_kwargs)
# ── Simulate with tuned parameters (if available) ────────────
+ # Resolve result path (explicit or auto-detect).
+ if result_path is None:
+ auto = robot_path / "sysid_result.json"
+ if auto.exists():
+ result_path = auto
+
sim_tuned = None
tuned_cost = None
if result_path is not None:
@@ -101,64 +109,21 @@ def visualize(
tuned_params = result.get("best_params", {})
tuned_cost = result.get("best_cost")
log.info("simulating_tuned_params", cost=tuned_cost)
- if window_duration > 0:
- sim_tuned = windowed_rollout(
- robot_path=robot_path,
- params=tuned_params,
- recording=recording,
- window_duration=window_duration,
- sim_dt=sim_dt,
- substeps=substeps,
- )
- else:
- sim_tuned = rollout(
- robot_path=robot_path,
- params=tuned_params,
- actions=actions,
- timesteps=t,
- sim_dt=sim_dt,
- substeps=substeps,
- )
+ sim_tuned = _run_sim(params=tuned_params, **sim_kwargs)
else:
log.warning("result_file_not_found", path=str(result_path))
- else:
- # Auto-detect sysid_result.json in robot_path.
- auto_result = robot_path / "sysid_result.json"
- if auto_result.exists():
- result = json.loads(auto_result.read_text())
- tuned_params = result.get("best_params", {})
- tuned_cost = result.get("best_cost")
- log.info("auto_detected_tuned_params", cost=tuned_cost)
- if window_duration > 0:
- sim_tuned = windowed_rollout(
- robot_path=robot_path,
- params=tuned_params,
- recording=recording,
- window_duration=window_duration,
- sim_dt=sim_dt,
- substeps=substeps,
- )
- else:
- sim_tuned = rollout(
- robot_path=robot_path,
- params=tuned_params,
- actions=actions,
- timesteps=t,
- sim_dt=sim_dt,
- substeps=substeps,
- )
# ── Plot ─────────────────────────────────────────────────────
fig, axes = plt.subplots(5, 1, figsize=(14, 12), sharex=True)
channels = [
- ("motor_angle", "Motor Angle (rad)", True),
- ("motor_vel", "Motor Velocity (rad/s)", False),
- ("pendulum_angle", "Pendulum Angle (rad)", True),
- ("pendulum_vel", "Pendulum Velocity (rad/s)", False),
+ ("motor_angle", "Motor Angle (rad)"),
+ ("motor_vel", "Motor Velocity (rad/s)"),
+ ("pendulum_angle", "Pendulum Angle (rad)"),
+ ("pendulum_vel", "Pendulum Velocity (rad/s)"),
]
- for ax, (key, ylabel, is_angle) in zip(axes[:4], channels):
+ for ax, (key, ylabel) in zip(axes[:4], channels):
real = recording[key]
ax.plot(t, real, "k-", linewidth=1.2, alpha=0.8, label="Real")
@@ -207,6 +172,7 @@ def visualize(
sim_dt=sim_dt,
substeps=substeps,
window_duration=window_duration,
+ motor_params=motor_params,
)
title += f"\nOriginal cost: {orig_cost:.4f} → Tuned cost: {tuned_cost:.4f}"
improvement = (1.0 - tuned_cost / orig_cost) * 100 if orig_cost > 0 else 0
diff --git a/src/training/trainer.py b/src/training/trainer.py
index 72d0202..87ddf75 100644
--- a/src/training/trainer.py
+++ b/src/training/trainer.py
@@ -8,7 +8,6 @@ import structlog
import torch
import tqdm
from clearml import Logger
-from gymnasium import spaces
from skrl.agents.torch.ppo import PPO, PPO_DEFAULT_CONFIG
from skrl.memories.torch import RandomMemory
from skrl.resources.preprocessors.torch import RunningStandardScaler
@@ -129,24 +128,17 @@ class VideoRecordingTrainer(SequentialTrainer):
frames: list[np.ndarray] = []
obs, _ = self.env.reset()
- for _ in range(max_steps):
- with torch.no_grad():
+ with torch.no_grad():
+ for _ in range(max_steps):
action = self.agents.act(obs, timestep=timestep, timesteps=self.timesteps)[0]
- obs, _, terminated, truncated, _ = self.env.step(action)
+ obs, _, terminated, truncated, _ = self.env.step(action)
- try:
frame = self.env.render()
- except Exception:
- # Headless environment without OpenGL — skip video recording
- log.warning("video_recording_disabled", reason="render failed (headless?)")
- self.env.reset()
- return
+ if frame is not None:
+ frames.append(frame)
- if frame is not None:
- frames.append(frame)
-
- if (terminated | truncated).any().item():
- break
+ if (terminated | truncated).any().item():
+ break
if frames:
path = str(self._video_dir / f"step_{timestep}.mp4")
@@ -159,7 +151,6 @@ class VideoRecordingTrainer(SequentialTrainer):
local_path=path, iteration=timestep,
)
- # Restore training state
self.env.reset()