clean up lot of stuff

This commit is contained in:
2026-03-22 15:49:13 +01:00
parent d3ed1c25ad
commit ca0e7b8b03
37 changed files with 3613 additions and 1223 deletions

24
.gitignore vendored
View File

@@ -1,3 +1,23 @@
outputs/
# IDE / editor
.vscode/
runs/
# Training & HPO outputs
outputs/
runs/
smac3_output/
# MuJoCo
MUJOCO_LOG.TXT
# Python
__pycache__/
*.pyc
*.pyo
*.egg-info/
.eggs/
dist/
build/
# Temp files
*.stl
*.scad

View File

@@ -10,7 +10,7 @@ encoder:
# counts_per_rev = ppr × gear_ratio × 4 (quadrature) = 1320
safety:
max_motor_angle_deg: 90.0 # hard termination limit (0 = disabled)
max_motor_angle_deg: 90.0 # hard termination limit (physical endstop ~70-80°)
soft_limit_deg: 40.0 # progressive penalty ramp starts here
reset:

View File

@@ -1,20 +1,21 @@
# Tuned robot config — generated by src.sysid.optimize
# Original: robot.yaml
# Run `python -m src.sysid.visualize` to compare real vs sim.
urdf: rotary_cartpole.urdf
actuators:
- joint: motor_joint
type: motor
gear: 0.176692
ctrl_range:
- -1.0
- 1.0
damping: 0.009505
filter_tau: 0.040906
gear: [0.424182, 0.425031] # torque constant [pos, neg] (motor sysid)
ctrl_range: [-0.592, 0.592] # effective control bound (sysid-tuned)
deadzone: [0.141291, 0.078015] # L298N min |ctrl| for torque [pos, neg]
damping: [0.002027, 0.014665] # viscous damping [pos, neg]
frictionloss: [0.057328, 0.053355] # Coulomb friction [pos, neg]
filter_tau: 0.005035 # 1st-order actuator filter (motor sysid)
viscous_quadratic: 0.000285 # velocity² drag
back_emf_gain: 0.006758 # back-EMF torque reduction
joints:
motor_joint:
armature: 0.001389
frictionloss: 0.002179
armature: 0.002773 # reflected rotor inertia (motor sysid)
frictionloss: 0.0 # handled by motor model via qfrc_applied
pendulum_joint:
damping: 6.1e-05
damping: 0.000119
frictionloss: 1.0e-05

View File

@@ -26,8 +26,8 @@
</joint>
<link name="arm">
<inertial>
<origin xyz="0.014950488360794875 0.006089886527968399 0.004470745447817278" rpy="0 0 0" />
<mass value="0.012391951282440451" />
<origin xyz="-0.0071030505291264975 0.0008511826488989179 0.007952020186701035" rpy="0 0 0" />
<mass value="0.02110029934220782" />
<inertia ixx="2.70e-06" iyy="7.80e-07" izz="2.44e-06" ixy="0.0" iyz="7.20e-08" ixz="0.0" />
</inertial>
<visual>
@@ -53,9 +53,9 @@
</joint>
<link name="pendulum">
<inertial>
<origin xyz="0.06432778588634695 -0.05999895841669392 0.0008769789937631209" rpy="0 0 0" />
<mass value="0.035508993892747365" />
<inertia ixx="3.139576982078822e-05" iyy="9.431951659638859e-06" izz="4.07315891863556e-05" ixy="-1.8892943833253423e-06" iyz="0.0" ixz="0.0" />
<origin xyz="0.060245187591695615 -0.07601707109312682 -0.0034636702158137786" rpy="0 0 0" />
<mass value="0.03936742845036306" />
<inertia ixx="6.202768755990066e-05" iyy="3.70078470430685e-05" izz="7.827356811788924e-05" ixy="-6.925117819616428e-06" iyz="0.0" ixz="0.0" />
</inertial>
<visual>
<origin xyz="0.006895 -0.023224 -0.162953" rpy="0 0 0" />

View File

@@ -1,70 +0,0 @@
{
"best_params": {
"arm_mass": 0.012391951282440451,
"arm_com_x": 0.014950488360794875,
"arm_com_y": 0.006089886527968399,
"arm_com_z": 0.004470745447817278,
"pendulum_mass": 0.035508993892747365,
"pendulum_com_x": 0.06432778588634695,
"pendulum_com_y": -0.05999895841669392,
"pendulum_com_z": 0.0008769789937631209,
"pendulum_ixx": 3.139576982078822e-05,
"pendulum_iyy": 9.431951659638859e-06,
"pendulum_izz": 4.07315891863556e-05,
"pendulum_ixy": -1.8892943833253423e-06,
"actuator_gear": 0.17669161390939517,
"actuator_filter_tau": 0.040905643692382504,
"motor_damping": 0.009504542103348917,
"pendulum_damping": 6.128535042404019e-05,
"motor_armature": 0.0013894759540138252,
"motor_frictionloss": 0.002179448047511452
},
"best_cost": 0.7471380533090072,
"recording": "/Users/victormylle/Library/CloudStorage/SeaDrive-VictorMylle(cloud.optimize-it.be)/My Libraries/Projects/AI/RL-Framework/assets/rotary_cartpole/recordings/capture_20260311_215608.npz",
"param_names": [
"arm_mass",
"arm_com_x",
"arm_com_y",
"arm_com_z",
"pendulum_mass",
"pendulum_com_x",
"pendulum_com_y",
"pendulum_com_z",
"pendulum_ixx",
"pendulum_iyy",
"pendulum_izz",
"pendulum_ixy",
"actuator_gear",
"actuator_filter_tau",
"motor_damping",
"pendulum_damping",
"motor_armature",
"motor_frictionloss"
],
"defaults": {
"arm_mass": 0.01,
"arm_com_x": 5e-05,
"arm_com_y": 0.0065,
"arm_com_z": 0.00563,
"pendulum_mass": 0.015,
"pendulum_com_x": 0.1583,
"pendulum_com_y": -0.0983,
"pendulum_com_z": 0.0,
"pendulum_ixx": 6.16e-06,
"pendulum_iyy": 6.16e-06,
"pendulum_izz": 1.23e-05,
"pendulum_ixy": 6.1e-06,
"actuator_gear": 0.064,
"actuator_filter_tau": 0.03,
"motor_damping": 0.003,
"pendulum_damping": 0.0001,
"motor_armature": 0.0001,
"motor_frictionloss": 0.03
},
"timestamp": "2026-03-11T22:08:04.782736",
"history_summary": {
"first_cost": 3.909456214944022,
"final_cost": 0.7471380533090072,
"generations": 200
}
}

View File

@@ -1,10 +1,19 @@
max_steps: 1000
robot_path: assets/rotary_cartpole
reward_upright_scale: 1.0
speed_penalty_scale: 0.1
# ── Regularisation penalties (prevent fast spinning) ─────────────────
motor_vel_penalty: 0.01 # penalise high motor angular velocity
motor_angle_penalty: 0.05 # penalise deviation from centre
action_penalty: 0.05 # penalise large actions (energy cost)
# ── Software safety limit (env-level, always applied) ────────────────
motor_angle_limit_deg: 90.0 # terminate episode if motor exceeds ±90°
# ── HPO search ranges ────────────────────────────────────────────────
hpo:
reward_upright_scale: {min: 0.5, max: 5.0}
speed_penalty_scale: {min: 0.01, max: 1.0}
motor_vel_penalty: {min: 0.001, max: 0.1}
motor_angle_penalty: {min: 0.01, max: 0.2}
action_penalty: {min: 0.01, max: 0.2}
max_steps: {values: [500, 1000, 2000]}

View File

@@ -1,4 +1,4 @@
num_envs: 1024 # MJX shines with many parallel envs
device: auto # auto = cuda if available, else cpu
dt: 0.002
substeps: 20
substeps: 10

View File

@@ -1,4 +1,4 @@
num_envs: 64
device: auto # auto = cuda if available, else cpu
dt: 0.002
substeps: 20
substeps: 10

View File

@@ -6,6 +6,5 @@ num_envs: 1
device: cpu
port: /dev/cu.usbserial-0001
baud: 115200
dt: 0.02 # control loop period (50 Hz)
dt: 0.02 # control loop period (50 Hz, matches training)
no_data_timeout: 2.0 # seconds of silence before declaring disconnect
encoder_jump_threshold: 200 # encoder tick jump → reboot detection

View File

@@ -1,5 +1,5 @@
# System identification defaults.
# Override via CLI: python -m src.sysid.optimize sysid.max_generations=50
# Override via CLI: python scripts/sysid.py optimize --max-generations 50
#
# These are NOT Hydra config groups — the sysid scripts use argparse.
# This file serves as documentation and can be loaded by custom wrappers.
@@ -8,18 +8,25 @@ capture:
port: /dev/cu.usbserial-0001
baud: 115200
duration: 20.0 # seconds
amplitude: 180 # max PWM magnitude (0255)
amplitude: 150 # max PWM magnitude — must match firmware MAX_MOTOR_SPEED
hold_min_ms: 50 # PRBS min hold time
hold_max_ms: 300 # PRBS max hold time
dt: 0.02 # sample period (50 Hz)
optimize:
sigma0: 0.3 # CMA-ES initial step size (in [0,1] normalised space)
population_size: 20 # candidates per generation
max_generations: 200 # total generations (~4000 evaluations)
population_size: 50 # candidates per generation
max_generations: 1000 # total generations (~4000 evaluations)
sim_dt: 0.002 # MuJoCo physics timestep
substeps: 10 # physics substeps per control step (ctrl_dt = 0.02s)
pos_weight: 1.0 # MSE weight for angle errors
vel_weight: 0.1 # MSE weight for velocity errors
window_duration: 0.5 # multiple-shooting window length (s); 0 = open-loop
seed: 42
# Tunable hardware-realism params (added to ROTARY_CARTPOLE_PARAMS):
# ctrl_limit — effective motor range → exported as ctrl_range in robot.yaml
# motor_deadzone — L298N minimum |action| for torque → exported as deadzone in robot.yaml
# Firmware sends raw (unfiltered) sensor data; EMA filtering is
# handled on the Python side (env transforms) and is NOT part of
# the sysid parameter search.

View File

@@ -8,15 +8,17 @@ defaults:
- _self_
hidden_sizes: [256, 256]
total_timesteps: 100000
learning_epochs: 5
learning_rate: 0.001 # conservative — can't undo real-world damage
entropy_loss_scale: 0.0001
log_interval: 1024
total_timesteps: 2000000
learning_epochs: 10
learning_rate: 0.0005 # conservative — can't undo real-world damage
entropy_loss_scale: 0.01
rollout_steps: 2048
mini_batches: 8
log_interval: 2048
checkpoint_interval: 5000 # frequent saves — can't rewind real hardware
initial_log_std: -0.5 # moderate initial exploration
min_log_std: -4.0
max_log_std: 0.0 # cap σ at 1.0
max_log_std: 2.0 # cap σ at 1.0
# Never run real-hardware training remotely
remote: false

View File

@@ -8,7 +8,7 @@ defaults:
- _self_
hidden_sizes: [256, 256]
total_timesteps: 1000000
total_timesteps: 2000000
learning_epochs: 10
learning_rate: 0.0003
entropy_loss_scale: 0.01

379
scripts/eval.py Normal file
View File

@@ -0,0 +1,379 @@
"""Evaluate a trained policy on real hardware (or in simulation).
Loads a checkpoint and runs the policy in a closed loop. For real
hardware the serial runner talks to the ESP32; for sim it uses the
MuJoCo runner. A digital-twin MuJoCo viewer mirrors the robot state
in both modes.
Usage (real hardware):
mjpython scripts/eval.py env=rotary_cartpole runner=serial \
checkpoint=runs/26-03-12_00-16-43-308420_PPO/checkpoints/agent_1000000.pt
Usage (simulation):
mjpython scripts/eval.py env=rotary_cartpole runner=mujoco_single \
checkpoint=runs/26-03-12_00-16-43-308420_PPO/checkpoints/agent_1000000.pt
Controls:
Space — pause / resume policy (motor stops while paused)
R — reset environment
Esc — quit
"""
import math
import sys
import time
from pathlib import Path
# Ensure project root is on sys.path
_PROJECT_ROOT = str(Path(__file__).resolve().parent.parent)
if _PROJECT_ROOT not in sys.path:
sys.path.insert(0, _PROJECT_ROOT)
import hydra
import mujoco
import mujoco.viewer
import numpy as np
import structlog
import torch
from gymnasium import spaces
from hydra.core.hydra_config import HydraConfig
from omegaconf import DictConfig, OmegaConf
from skrl.resources.preprocessors.torch import RunningStandardScaler
from src.core.registry import build_env
from src.models.mlp import SharedMLP
logger = structlog.get_logger()
# ── keyboard state ───────────────────────────────────────────────────
_reset_flag = [False]
_paused = [False]
_quit_flag = [False]
def _key_callback(keycode: int) -> None:
"""Called by MuJoCo viewer on key press."""
if keycode == 32: # GLFW_KEY_SPACE
_paused[0] = not _paused[0]
elif keycode == 82: # GLFW_KEY_R
_reset_flag[0] = True
elif keycode == 256: # GLFW_KEY_ESCAPE
_quit_flag[0] = True
# ── checkpoint loading ───────────────────────────────────────────────
def _infer_hidden_sizes(state_dict: dict[str, torch.Tensor]) -> tuple[int, ...]:
"""Infer hidden layer sizes from a SharedMLP state dict."""
sizes = []
i = 0
while f"net.{i}.weight" in state_dict:
sizes.append(state_dict[f"net.{i}.weight"].shape[0])
i += 2 # skip activation layers (ELU)
return tuple(sizes)
def load_policy(
checkpoint_path: str,
observation_space: spaces.Space,
action_space: spaces.Space,
device: torch.device = torch.device("cpu"),
) -> tuple[SharedMLP, RunningStandardScaler]:
"""Load a trained SharedMLP + observation normalizer from a checkpoint.
Returns:
(model, state_preprocessor) ready for inference.
"""
ckpt = torch.load(checkpoint_path, map_location=device, weights_only=False)
# Infer architecture from saved weights.
hidden_sizes = _infer_hidden_sizes(ckpt["policy"])
# Reconstruct model.
model = SharedMLP(
observation_space=observation_space,
action_space=action_space,
device=device,
hidden_sizes=hidden_sizes,
)
model.load_state_dict(ckpt["policy"])
model.eval()
# Reconstruct observation normalizer.
state_preprocessor = RunningStandardScaler(size=observation_space, device=device)
state_preprocessor.running_mean = ckpt["state_preprocessor"]["running_mean"].to(device)
state_preprocessor.running_variance = ckpt["state_preprocessor"]["running_variance"].to(device)
state_preprocessor.current_count = ckpt["state_preprocessor"]["current_count"]
# Freeze the normalizer — don't update stats during eval.
state_preprocessor.training = False
logger.info(
"checkpoint_loaded",
path=checkpoint_path,
hidden_sizes=hidden_sizes,
obs_mean=[round(x, 3) for x in state_preprocessor.running_mean.tolist()],
obs_std=[round(x, 3) for x in state_preprocessor.running_variance.sqrt().tolist()],
)
return model, state_preprocessor
# ── action arrow overlay ─────────────────────────────────────────────
def _add_action_arrow(viewer, model, data, action_val: float) -> None:
"""Draw an arrow showing applied torque direction."""
if abs(action_val) < 0.01 or model.nu == 0:
return
jnt_id = model.actuator_trnid[0, 0]
body_id = model.jnt_bodyid[jnt_id]
pos = data.xpos[body_id].copy()
pos[2] += 0.02
axis = data.xmat[body_id].reshape(3, 3) @ model.jnt_axis[jnt_id]
arrow_len = 0.08 * action_val
direction = axis * np.sign(arrow_len)
z = direction / (np.linalg.norm(direction) + 1e-8)
up = np.array([0, 0, 1]) if abs(z[2]) < 0.99 else np.array([0, 1, 0])
x = np.cross(up, z)
x /= np.linalg.norm(x) + 1e-8
y = np.cross(z, x)
mat = np.column_stack([x, y, z]).flatten()
rgba = np.array(
[0.2, 0.8, 0.2, 0.8] if action_val > 0 else [0.8, 0.2, 0.2, 0.8],
dtype=np.float32,
)
geom = viewer.user_scn.geoms[viewer.user_scn.ngeom]
mujoco.mjv_initGeom(
geom,
type=mujoco.mjtGeom.mjGEOM_ARROW,
size=np.array([0.008, 0.008, abs(arrow_len)]),
pos=pos,
mat=mat,
rgba=rgba,
)
viewer.user_scn.ngeom += 1
# ── main loops ───────────────────────────────────────────────────────
@hydra.main(version_base=None, config_path="../configs", config_name="config")
def main(cfg: DictConfig) -> None:
choices = HydraConfig.get().runtime.choices
env_name = choices.get("env", "cartpole")
runner_name = choices.get("runner", "mujoco_single")
checkpoint_path = cfg.get("checkpoint", None)
if checkpoint_path is None:
logger.error("No checkpoint specified. Use: +checkpoint=path/to/agent.pt")
sys.exit(1)
# Resolve relative paths against original working directory.
checkpoint_path = str(Path(hydra.utils.get_original_cwd()) / checkpoint_path)
if not Path(checkpoint_path).exists():
logger.error("checkpoint_not_found", path=checkpoint_path)
sys.exit(1)
if runner_name == "serial":
_eval_serial(cfg, env_name, checkpoint_path)
else:
_eval_sim(cfg, env_name, checkpoint_path)
def _eval_sim(cfg: DictConfig, env_name: str, checkpoint_path: str) -> None:
"""Evaluate policy in MuJoCo simulation with viewer."""
from src.runners.mujoco import MuJoCoRunner, MuJoCoRunnerConfig
env = build_env(env_name, cfg)
runner_dict = OmegaConf.to_container(cfg.runner, resolve=True)
runner_dict["num_envs"] = 1
runner = MuJoCoRunner(env=env, config=MuJoCoRunnerConfig(**runner_dict))
device = runner.device
model, preprocessor = load_policy(
checkpoint_path, runner.observation_space, runner.action_space, device
)
mj_model = runner._model
mj_data = runner._data[0]
dt_ctrl = runner.config.dt * runner.config.substeps
with mujoco.viewer.launch_passive(mj_model, mj_data, key_callback=_key_callback) as viewer:
obs, _ = runner.reset()
step = 0
episode = 0
episode_reward = 0.0
logger.info(
"eval_started",
env=env_name,
mode="simulation",
checkpoint=Path(checkpoint_path).name,
controls="Space=pause, R=reset, Esc=quit",
)
while viewer.is_running() and not _quit_flag[0]:
if _reset_flag[0]:
_reset_flag[0] = False
obs, _ = runner.reset()
step = 0
episode += 1
episode_reward = 0.0
logger.info("reset", episode=episode)
if _paused[0]:
viewer.sync()
time.sleep(0.05)
continue
# Policy inference
with torch.no_grad():
normalized_obs = preprocessor(obs.unsqueeze(0) if obs.dim() == 1 else obs)
action = model.act({"states": normalized_obs}, role="policy")[0]
action = action.clamp(-1.0, 1.0)
obs, reward, terminated, truncated, info = runner.step(action)
episode_reward += reward.item()
step += 1
# Sync viewer
mujoco.mj_forward(mj_model, mj_data)
viewer.user_scn.ngeom = 0
_add_action_arrow(viewer, mj_model, mj_data, action[0, 0].item())
viewer.sync()
if step % 50 == 0:
joints = {mj_model.jnt(i).name: round(math.degrees(mj_data.qpos[i]), 1)
for i in range(mj_model.njnt)}
logger.debug(
"step", n=step, reward=round(reward.item(), 3),
action=round(action[0, 0].item(), 2),
ep_reward=round(episode_reward, 1), **joints,
)
if terminated.any() or truncated.any():
logger.info(
"episode_done", episode=episode, steps=step,
total_reward=round(episode_reward, 2),
reason="terminated" if terminated.any() else "truncated",
)
obs, _ = runner.reset()
step = 0
episode += 1
episode_reward = 0.0
time.sleep(dt_ctrl)
runner.close()
def _eval_serial(cfg: DictConfig, env_name: str, checkpoint_path: str) -> None:
"""Evaluate policy on real hardware via serial, with digital-twin viewer."""
from src.runners.serial import SerialRunner, SerialRunnerConfig
env = build_env(env_name, cfg)
runner_dict = OmegaConf.to_container(cfg.runner, resolve=True)
serial_runner = SerialRunner(env=env, config=SerialRunnerConfig(**runner_dict))
device = serial_runner.device
model, preprocessor = load_policy(
checkpoint_path, serial_runner.observation_space, serial_runner.action_space, device
)
# Set up digital-twin MuJoCo model for visualization.
serial_runner._ensure_viz_model()
mj_model = serial_runner._viz_model
mj_data = serial_runner._viz_data
with mujoco.viewer.launch_passive(mj_model, mj_data, key_callback=_key_callback) as viewer:
obs, _ = serial_runner.reset()
step = 0
episode = 0
episode_reward = 0.0
logger.info(
"eval_started",
env=env_name,
mode="real hardware (serial)",
port=serial_runner.config.port,
checkpoint=Path(checkpoint_path).name,
controls="Space=pause, R=reset, Esc=quit",
)
while viewer.is_running() and not _quit_flag[0]:
if _reset_flag[0]:
_reset_flag[0] = False
serial_runner._send("M0")
serial_runner._drive_to_center()
serial_runner._wait_for_pendulum_still()
obs, _ = serial_runner.reset()
step = 0
episode += 1
episode_reward = 0.0
logger.info("reset", episode=episode)
if _paused[0]:
serial_runner._send("M0") # safety: stop motor while paused
serial_runner._sync_viz()
viewer.sync()
time.sleep(0.05)
continue
# Policy inference
with torch.no_grad():
normalized_obs = preprocessor(obs.unsqueeze(0) if obs.dim() == 1 else obs)
action = model.act({"states": normalized_obs}, role="policy")[0]
action = action.clamp(-1.0, 1.0)
obs, reward, terminated, truncated, info = serial_runner.step(action)
episode_reward += reward.item()
step += 1
# Sync digital twin with real sensor data.
serial_runner._sync_viz()
viewer.user_scn.ngeom = 0
_add_action_arrow(viewer, mj_model, mj_data, action[0, 0].item())
viewer.sync()
if step % 25 == 0:
state = serial_runner._read_state()
logger.debug(
"step", n=step, reward=round(reward.item(), 3),
action=round(action[0, 0].item(), 2),
ep_reward=round(episode_reward, 1),
motor_enc=state["encoder_count"],
pend_deg=round(state["pendulum_angle"], 1),
)
# Check for safety / disconnection.
if info.get("reboot_detected") or info.get("motor_limit_exceeded"):
logger.error(
"safety_stop",
reboot=info.get("reboot_detected", False),
motor_limit=info.get("motor_limit_exceeded", False),
)
serial_runner._send("M0")
break
if terminated.any() or truncated.any():
logger.info(
"episode_done", episode=episode, steps=step,
total_reward=round(episode_reward, 2),
reason="terminated" if terminated.any() else "truncated",
)
# Auto-reset for next episode.
obs, _ = serial_runner.reset()
step = 0
episode += 1
episode_reward = 0.0
# Real-time pacing is handled by serial_runner.step() (dt sleep).
serial_runner.close()
if __name__ == "__main__":
main()

64
scripts/motor_sysid.py Normal file
View File

@@ -0,0 +1,64 @@
"""Unified CLI for motor-only system identification.
Usage:
python scripts/motor_sysid.py capture --duration 20
python scripts/motor_sysid.py optimize --recording assets/motor/recordings/<file>.npz
python scripts/motor_sysid.py visualize --recording assets/motor/recordings/<file>.npz
python scripts/motor_sysid.py export --result assets/motor/motor_sysid_result.json
"""
from __future__ import annotations
import sys
from pathlib import Path
# Ensure project root is on sys.path
_PROJECT_ROOT = str(Path(__file__).resolve().parent.parent)
if _PROJECT_ROOT not in sys.path:
sys.path.insert(0, _PROJECT_ROOT)
def main() -> None:
if len(sys.argv) < 2 or sys.argv[1] in ("-h", "--help"):
print(
"Motor System Identification\n"
"===========================\n"
"Usage: python scripts/motor_sysid.py <command> [options]\n"
"\n"
"Commands:\n"
" capture Record motor trajectory under PRBS excitation\n"
" optimize Run CMA-ES to fit motor parameters\n"
" visualize Plot real vs simulated motor response\n"
" export Write tuned MJCF + robot.yaml files\n"
"\n"
"Workflow:\n"
" 1. Flash sysid firmware to ESP32 (motor-only, no limits)\n"
" 2. python scripts/motor_sysid.py capture --duration 20\n"
" 3. python scripts/motor_sysid.py optimize --recording <file>.npz\n"
" 4. python scripts/motor_sysid.py visualize --recording <file>.npz\n"
"\n"
"Run '<command> --help' for command-specific options."
)
sys.exit(0)
command = sys.argv[1]
sys.argv = [f"motor_sysid {command}"] + sys.argv[2:]
if command == "capture":
from src.sysid.motor.capture import main as cmd_main
elif command == "optimize":
from src.sysid.motor.optimize import main as cmd_main
elif command == "visualize":
from src.sysid.motor.visualize import main as cmd_main
elif command == "export":
from src.sysid.motor.export import main as cmd_main
else:
print(f"Unknown command: {command}")
print("Available commands: capture, optimize, visualize, export")
sys.exit(1)
cmd_main()
if __name__ == "__main__":
main()

View File

@@ -48,9 +48,10 @@ class BaseEnv(abc.ABC, Generic[T]):
def compute_truncations(self, step_counts: torch.Tensor) -> torch.Tensor:
return step_counts >= self.config.max_steps
def get_default_qpos(self, nq: int) -> list[float] | None:
"""Return the default joint positions for reset.
Override in subclass if the URDF zero pose doesn't match
the desired initial state (e.g. pendulum hanging down).
Returns None to use the URDF default (all zeros)."""
return None
def is_reset_ready(self, qpos: torch.Tensor, qvel: torch.Tensor) -> bool:
"""Check whether the physical robot has settled enough to start an episode.
Used by the SerialRunner after driving to center and waiting for the
pendulum. Default: always ready (sim doesn't need settling).
"""
return True

View File

@@ -1,91 +0,0 @@
"""Real-hardware configuration — loaded from hardware.yaml next to robot.yaml.
Provides robot-specific constants for the SerialRunner: encoder specs,
safety limits, and reset behaviour. Simulation-only robots simply don't
have a hardware.yaml (the loader returns None).
Usage:
hw = load_hardware_config("assets/rotary_cartpole")
if hw is not None:
counts_per_rev = hw.encoder.ppr * hw.encoder.gear_ratio * 4.0
"""
import dataclasses
from pathlib import Path
import structlog
import yaml
log = structlog.get_logger()
@dataclasses.dataclass
class EncoderConfig:
"""Rotary encoder parameters."""
ppr: int = 11 # pulses per revolution (before quadrature)
gear_ratio: float = 30.0 # gearbox ratio
@property
def counts_per_rev(self) -> float:
"""Total encoder counts per output-shaft revolution (quadrature)."""
return self.ppr * self.gear_ratio * 4.0
@dataclasses.dataclass
class SafetyConfig:
"""Safety limits enforced by the runner (not the env)."""
max_motor_angle_deg: float = 90.0 # hard termination (0 = disabled)
soft_limit_deg: float = 40.0 # progressive penalty ramp start
@dataclasses.dataclass
class ResetConfig:
"""Parameters for the physical reset procedure."""
drive_speed: int = 80 # PWM for bang-bang drive-to-center
deadband: int = 15 # encoder count threshold for "centered"
drive_timeout: float = 3.0 # seconds
settle_angle_deg: float = 2.0 # pendulum angle threshold (degrees)
settle_vel_dps: float = 5.0 # pendulum velocity threshold (deg/s)
settle_duration: float = 0.5 # seconds the pendulum must stay still
settle_timeout: float = 30.0 # give up after this many seconds
@dataclasses.dataclass
class HardwareConfig:
"""Complete real-hardware description for a robot."""
encoder: EncoderConfig = dataclasses.field(default_factory=EncoderConfig)
safety: SafetyConfig = dataclasses.field(default_factory=SafetyConfig)
reset: ResetConfig = dataclasses.field(default_factory=ResetConfig)
def load_hardware_config(robot_dir: str | Path) -> HardwareConfig | None:
"""Load hardware.yaml from a directory.
Returns None if the file doesn't exist (simulation-only robot).
"""
robot_dir = Path(robot_dir).resolve()
yaml_path = robot_dir / "hardware.yaml"
if not yaml_path.exists():
return None
raw = yaml.safe_load(yaml_path.read_text()) or {}
encoder = EncoderConfig(**raw.get("encoder", {}))
safety = SafetyConfig(**raw.get("safety", {}))
reset = ResetConfig(**raw.get("reset", {}))
config = HardwareConfig(encoder=encoder, safety=safety, reset=reset)
log.debug(
"hardware_config_loaded",
robot_dir=str(robot_dir),
counts_per_rev=encoder.counts_per_rev,
max_motor_angle_deg=safety.max_motor_angle_deg,
)
return config

View File

@@ -11,6 +11,7 @@ Usage:
"""
import dataclasses
import math
from pathlib import Path
import structlog
@@ -19,10 +20,20 @@ import yaml
log = structlog.get_logger()
def _as_pair(val) -> tuple[float, float]:
"""Convert scalar or [pos, neg] list to (pos, neg) tuple."""
if isinstance(val, (list, tuple)) and len(val) == 2:
return (float(val[0]), float(val[1]))
return (float(val), float(val))
@dataclasses.dataclass
class ActuatorConfig:
"""Motor/actuator attached to a joint.
Asymmetric fields use (positive_dir, negative_dir) tuples.
A scalar in YAML is expanded to a symmetric pair.
type:
motor — direct torque control (ctrl = normalised torque)
position — PD position servo (ctrl = target angle, needs kp)
@@ -30,12 +41,91 @@ class ActuatorConfig:
"""
joint: str = ""
type: str = "motor"
gear: float = 1.0
ctrl_range: tuple[float, float] = (-1.0, 1.0)
damping: float = 0.05
kp: float = 0.0 # proportional gain (position / velocity actuators)
kv: float = 0.0 # derivative gain (position actuators)
filter_tau: float = 0.0 # 1st-order filter time constant (s); 0 = no filter
gear: tuple[float, float] = (1.0, 1.0) # torque constant (pos, neg)
ctrl_range: tuple[float, float] = (-1.0, 1.0) # (lower, upper) control bounds
deadzone: tuple[float, float] = (0.0, 0.0) # min |ctrl| for torque (pos, neg)
damping: tuple[float, float] = (0.0, 0.0) # viscous damping (pos, neg)
frictionloss: tuple[float, float] = (0.0, 0.0) # Coulomb friction (pos, neg)
kp: float = 0.0 # proportional gain (position / velocity actuators)
kv: float = 0.0 # derivative gain (position actuators)
filter_tau: float = 0.0 # 1st-order filter time constant (s); 0 = no filter
viscous_quadratic: float = 0.0 # velocity² drag coefficient
back_emf_gain: float = 0.0 # back-EMF torque reduction
@property
def gear_avg(self) -> float:
return (self.gear[0] + self.gear[1]) / 2.0
@property
def has_motor_model(self) -> bool:
"""True if this actuator needs the runtime motor model."""
return (
self.gear[0] != self.gear[1]
or self.deadzone != (0.0, 0.0)
or self.damping != (0.0, 0.0)
or self.frictionloss != (0.0, 0.0)
or self.viscous_quadratic > 0
or self.back_emf_gain > 0
)
def transform_ctrl(self, ctrl: float) -> float:
"""Apply asymmetric deadzone and gear compensation to a scalar ctrl."""
# Deadzone
dz_pos, dz_neg = self.deadzone
if ctrl >= 0 and ctrl < dz_pos:
return 0.0
if ctrl < 0 and ctrl > -dz_neg:
return 0.0
# Gear compensation: rescale so ctrl × gear_avg ≈ action × gear_dir
gear_avg = self.gear_avg
if gear_avg > 1e-8:
gear_dir = self.gear[0] if ctrl >= 0 else self.gear[1]
ctrl *= gear_dir / gear_avg
return ctrl
def compute_motor_force(self, vel: float, ctrl: float) -> float:
"""Asymmetric friction, damping, drag, back-EMF → applied torque."""
torque = 0.0
# Coulomb friction (direction-dependent)
fl_pos, fl_neg = self.frictionloss
if abs(vel) > 1e-6:
fl = fl_pos if vel > 0 else fl_neg
torque -= math.copysign(fl, vel)
# Viscous damping (direction-dependent)
damp = self.damping[0] if vel > 0 else self.damping[1]
torque -= damp * vel
# Quadratic velocity drag
if self.viscous_quadratic > 0:
torque -= self.viscous_quadratic * vel * abs(vel)
# Back-EMF torque reduction
if self.back_emf_gain > 0 and abs(ctrl) > 1e-6:
torque -= self.back_emf_gain * vel * math.copysign(1.0, ctrl)
return max(-10.0, min(10.0, torque))
def transform_action(self, action):
"""Vectorised deadzone + gear compensation for a torch batch."""
dz_pos, dz_neg = self.deadzone
if dz_pos > 0 or dz_neg > 0:
action = action.clone()
pos_dead = (action >= 0) & (action < dz_pos)
neg_dead = (action < 0) & (action > -dz_neg)
action[pos_dead | neg_dead] = 0.0
gear_avg = self.gear_avg
if gear_avg > 1e-8 and self.gear[0] != self.gear[1]:
action = action.clone() if dz_pos == 0 and dz_neg == 0 else action
pos = action >= 0
action[pos] *= self.gear[0] / gear_avg
action[~pos] *= self.gear[1] / gear_avg
return action
@dataclasses.dataclass
@@ -84,6 +174,9 @@ def load_robot_config(robot_dir: str | Path) -> RobotConfig:
for a in raw.get("actuators", []):
if "ctrl_range" in a:
a["ctrl_range"] = tuple(a["ctrl_range"])
for key in ("gear", "deadzone", "damping", "frictionloss"):
if key in a:
a[key] = _as_pair(a[key])
actuators.append(ActuatorConfig(**a))
# Parse joint overrides

View File

@@ -1,9 +1,12 @@
import dataclasses
import abc
from typing import Any, Generic, TypeVar
from src.core.env import BaseEnv
import numpy as np
import torch
from src.core.env import BaseEnv
T = TypeVar("T")
@@ -21,6 +24,8 @@ class BaseRunner(abc.ABC, Generic[T]):
if getattr(self.config, "device", None) == "auto":
self.config.device = "cuda" if torch.cuda.is_available() else "cpu"
self._last_actions: torch.Tensor | None = None
self._sim_initialize(config)
self.observation_space = self.env.observation_space
@@ -68,6 +73,7 @@ class BaseRunner(abc.ABC, Generic[T]):
return obs, {}
def step(self, actions: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor, dict[str, Any]]:
self._last_actions = actions
qpos, qvel = self._sim_step(actions)
self.step_counts += 1
@@ -95,9 +101,50 @@ class BaseRunner(abc.ABC, Generic[T]):
# skrl expects (num_envs, 1) for rewards/terminated/truncated
return obs, rewards.unsqueeze(-1), terminated.unsqueeze(-1), truncated.unsqueeze(-1), info
def render(self, env_idx: int = 0):
"""Offscreen render → RGB numpy array. Override in subclass."""
def _render_frame(self, env_idx: int = 0) -> np.ndarray:
"""Return a raw RGB frame. Override in subclass."""
raise NotImplementedError("Render not implemented for this runner.")
def render(self, env_idx: int = 0) -> np.ndarray:
"""Render frame with action overlay."""
frame = self._render_frame(env_idx)
if self._last_actions is not None:
ctrl = float(self._last_actions[env_idx, 0].clamp(-1.0, 1.0))
_draw_action_overlay(frame, ctrl)
return frame
def close(self) -> None:
self._sim_close()
self._sim_close()
def _draw_action_overlay(frame: np.ndarray, action: float) -> None:
"""Draw an action bar on a rendered frame (no OpenCV needed).
Bar is centered horizontally: green to the right (+), red to the left (-).
"""
h, w = frame.shape[:2]
bar_y = h - 30
bar_h = 16
bar_x_center = w // 2
bar_half_w = w // 4
bar_x_left = bar_x_center - bar_half_w
bar_x_right = bar_x_center + bar_half_w
# Background (dark grey)
frame[bar_y:bar_y + bar_h, bar_x_left:bar_x_right] = [40, 40, 40]
# Filled bar
fill_len = int(abs(action) * bar_half_w)
if action > 0:
color = [60, 200, 60] # green
x0 = bar_x_center
x1 = min(bar_x_center + fill_len, bar_x_right)
else:
color = [200, 60, 60] # red
x1 = bar_x_center
x0 = max(bar_x_center - fill_len, bar_x_left)
frame[bar_y:bar_y + bar_h, x0:x1] = color
# Center tick mark (white)
frame[bar_y:bar_y + bar_h, bar_x_center - 1:bar_x_center + 1] = [255, 255, 255]

View File

@@ -1,4 +1,5 @@
import dataclasses
import math
import torch
from src.core.env import BaseEnv, BaseEnvConfig
from gymnasium import spaces
@@ -21,7 +22,12 @@ class RotaryCartPoleConfig(BaseEnvConfig):
"""
# Reward shaping
reward_upright_scale: float = 1.0 # cos(pendulum) when upright = +1
speed_penalty_scale: float = 0.1 # penalty for high pendulum velocity near top
motor_vel_penalty: float = 0.01 # penalise high motor angular velocity
motor_angle_penalty: float = 0.05 # penalise deviation from centre
action_penalty: float = 0.05 # penalise large actions (energy cost)
# ── Software safety limit (env-level, on top of URDF + hardware) ──
motor_angle_limit_deg: float = 90.0 # terminate episode if exceeded
class RotaryCartPoleEnv(BaseEnv[RotaryCartPoleConfig]):
@@ -34,12 +40,14 @@ class RotaryCartPoleEnv(BaseEnv[RotaryCartPoleConfig]):
Using sin/cos avoids discontinuities at ±π for continuous joints.
Actions (1):
Torque applied to the motor_joint.
Torque applied to the motor_joint (normalised to [-1, 1]).
"""
def __init__(self, config: RotaryCartPoleConfig):
super().__init__(config)
# ── Spaces ───────────────────────────────────────────────────
@property
def observation_space(self) -> spaces.Space:
return spaces.Box(low=-torch.inf, high=torch.inf, shape=(6,))
@@ -48,6 +56,8 @@ class RotaryCartPoleEnv(BaseEnv[RotaryCartPoleConfig]):
def action_space(self) -> spaces.Space:
return spaces.Box(low=-1.0, high=1.0, shape=(1,))
# ── State building ───────────────────────────────────────────
def build_state(self, qpos: torch.Tensor, qvel: torch.Tensor) -> RotaryCartPoleState:
return RotaryCartPoleState(
motor_angle=qpos[:, 0],
@@ -56,32 +66,64 @@ class RotaryCartPoleEnv(BaseEnv[RotaryCartPoleConfig]):
pendulum_vel=qvel[:, 1],
)
# ── Observations ─────────────────────────────────────────────
def compute_observations(self, state: RotaryCartPoleState) -> torch.Tensor:
return torch.stack([
obs = [
torch.sin(state.motor_angle),
torch.cos(state.motor_angle),
torch.sin(state.pendulum_angle),
torch.cos(state.pendulum_angle),
state.motor_vel,
state.pendulum_vel,
], dim=-1)
]
return torch.stack(obs, dim=-1)
# ── Rewards ──────────────────────────────────────────────────
def compute_rewards(self, state: RotaryCartPoleState, actions: torch.Tensor) -> torch.Tensor:
# Upright reward: -cos(θ) ∈ [-1, +1]
upright = -torch.cos(state.pendulum_angle)
reward = -torch.cos(state.pendulum_angle) * self.config.reward_upright_scale
# Penalise high pendulum velocity when near the top (upright).
# "nearness" is weighted by how upright the pendulum is (0 at bottom, 1 at top).
near_top = torch.clamp(-torch.cos(state.pendulum_angle), min=0.0) # 0‥1
speed_penalty = self.config.speed_penalty_scale * near_top * state.pendulum_vel.abs()
# Penalise fast motor spinning (discourages violent oscillation)
reward = reward - self.config.motor_vel_penalty * state.motor_vel.pow(2)
return upright * self.config.reward_upright_scale - speed_penalty
# Penalise motor deviation from centre (keep arm near zero)
reward = reward - self.config.motor_angle_penalty * state.motor_angle.pow(2)
# Penalise large actions (energy efficiency / smoother control)
reward = reward - self.config.action_penalty * actions.squeeze(-1).pow(2)
# Penalty for exceeding motor angle limit (episode also terminates)
limit_rad = math.radians(self.config.motor_angle_limit_deg)
exceeded = state.motor_angle.abs() >= limit_rad
reward = torch.where(exceeded, torch.tensor(-1000.0, device=reward.device), reward)
return reward
# ── Terminations ─────────────────────────────────────────────
def compute_terminations(self, state: RotaryCartPoleState) -> torch.Tensor:
# No early termination — episode runs for max_steps (truncation only).
# The agent must learn to swing up AND balance continuously.
return torch.zeros_like(state.motor_angle, dtype=torch.bool)
# Software safety: terminate if motor angle exceeds limit.
limit_rad = math.radians(self.config.motor_angle_limit_deg)
exceeded = state.motor_angle.abs() >= limit_rad
return exceeded
# ── Reset readiness (for SerialRunner) ───────────────────────
def is_reset_ready(self, qpos: torch.Tensor, qvel: torch.Tensor) -> bool:
"""Pendulum must be hanging still and motor near center."""
motor_angle = float(qpos[0, 0])
pend_angle = float(qpos[0, 1])
motor_vel = float(qvel[0, 0])
pend_vel = float(qvel[0, 1])
# Pendulum near hanging-down (angle ~0) and slow
angle_ok = abs(pend_angle) < math.radians(2.0)
vel_ok = abs(pend_vel) < math.radians(5.0)
# Motor near center and slow
motor_ok = abs(motor_angle) < math.radians(5.0)
motor_vel_ok = abs(motor_vel) < math.radians(10.0)
return angle_ok and vel_ok and motor_ok and motor_vel_ok
def get_default_qpos(self, nq: int) -> list[float] | None:
# qpos=0 = pendulum hanging down (joint frame rotated in URDF).
return None

View File

@@ -29,7 +29,10 @@ import numpy as np
from src.core.env import BaseEnv
from src.core.runner import BaseRunner, BaseRunnerConfig
from src.runners.mujoco import MuJoCoRunner # reuse _load_model
from src.runners.mujoco import (
ActuatorLimits,
load_mujoco_model,
)
log = structlog.get_logger()
@@ -39,7 +42,7 @@ class MJXRunnerConfig(BaseRunnerConfig):
num_envs: int = 1024
device: str = "cuda"
dt: float = 0.002
substeps: int = 20
substeps: int = 10
class MJXRunner(BaseRunner[MJXRunnerConfig]):
@@ -64,7 +67,7 @@ class MJXRunner(BaseRunner[MJXRunnerConfig]):
def _sim_initialize(self, config: MJXRunnerConfig) -> None:
# Step 1: Load CPU model (reuses URDF → MJCF → actuator injection)
self._mj_model = MuJoCoRunner._load_model(self.env.robot)
self._mj_model = load_mujoco_model(self.env.robot)
self._mj_model.opt.timestep = config.dt
self._nq = self._mj_model.nq
self._nv = self._mj_model.nv
@@ -75,9 +78,6 @@ class MJXRunner(BaseRunner[MJXRunnerConfig]):
# Step 3: Default reset state on GPU
default_data = mujoco.MjData(self._mj_model)
default_qpos = self.env.get_default_qpos(self._nq)
if default_qpos is not None:
default_data.qpos[:] = default_qpos
mujoco.mj_forward(self._mj_model, default_data)
self._default_mjx_data = mjx.put_data(self._mj_model, default_data)
@@ -85,6 +85,18 @@ class MJXRunner(BaseRunner[MJXRunnerConfig]):
self._rng = jax.random.PRNGKey(42)
self._batch_data = self._make_batched_data(config.num_envs)
# Step 4b: Build motor model info (ctrl_idx, qvel_idx, ActuatorConfig)
self._motor_info: list[tuple[int, int]] = []
self._motor_acts: list = []
for ctrl_idx, act in enumerate(self.env.robot.actuators):
if act.has_motor_model:
jnt_id = mujoco.mj_name2id(
self._mj_model, mujoco.mjtObj.mjOBJ_JOINT, act.joint,
)
qvel_idx = self._mj_model.jnt_dofadr[jnt_id]
self._motor_info.append((ctrl_idx, qvel_idx))
self._motor_acts.append(act)
# Step 5: JIT-compile the hot-path functions
self._compile_jit_fns(config.substeps)
@@ -121,28 +133,77 @@ class MJXRunner(BaseRunner[MJXRunnerConfig]):
model = self._mjx_model
default = self._default_mjx_data
# ── Per-actuator limit info (shared helper on MuJoCoRunner) ──
lim = MuJoCoRunner._extract_actuator_limits(self._mj_model)
lim = ActuatorLimits(self._mj_model)
act_jnt_ids = jnp.array(lim.jnt_ids)
act_limited = jnp.array(lim.limited)
act_lo = jnp.array(lim.lo)
act_hi = jnp.array(lim.hi)
act_gs = jnp.array(lim.gear_sign)
# ── Motor model params (JAX arrays for JIT) ─────────────────
_has_motor = len(self._motor_info) > 0
if _has_motor:
acts = self._motor_acts
_ctrl_ids = jnp.array([c for c, _ in self._motor_info])
_qvel_ids = jnp.array([q for _, q in self._motor_info])
_dz_pos = jnp.array([a.deadzone[0] for a in acts])
_dz_neg = jnp.array([a.deadzone[1] for a in acts])
_gear_pos = jnp.array([a.gear[0] for a in acts])
_gear_neg = jnp.array([a.gear[1] for a in acts])
_gear_avg = jnp.array([a.gear_avg for a in acts])
_fl_pos = jnp.array([a.frictionloss[0] for a in acts])
_fl_neg = jnp.array([a.frictionloss[1] for a in acts])
_damp_pos = jnp.array([a.damping[0] for a in acts])
_damp_neg = jnp.array([a.damping[1] for a in acts])
_visc_quad = jnp.array([a.viscous_quadratic for a in acts])
_back_emf = jnp.array([a.back_emf_gain for a in acts])
# ── Batched step (N substeps per call) ──────────────────────
@jax.jit
def step_fn(data):
# Software limit switch: clamp ctrl once before substeps.
# Armature + default joint limits prevent significant overshoot
# within the substep window, so per-substep clamping isn't needed.
pos = data.qpos[:, act_jnt_ids]
ctrl = data.ctrl
at_hi = act_limited & (pos >= act_hi) & (act_gs * ctrl > 0)
at_lo = act_limited & (pos <= act_lo) & (act_gs * ctrl < 0)
clamped = jnp.where(at_hi | at_lo, 0.0, ctrl)
data = data.replace(ctrl=clamped)
ctrl = jnp.where(at_hi | at_lo, 0.0, ctrl)
if _has_motor:
# Deadzone + asymmetric gear compensation
mc = ctrl[:, _ctrl_ids]
mc = jnp.where((mc >= 0) & (mc < _dz_pos), 0.0, mc)
mc = jnp.where((mc < 0) & (mc > -_dz_neg), 0.0, mc)
gear_dir = jnp.where(mc >= 0, _gear_pos, _gear_neg)
mc = mc * gear_dir / _gear_avg
ctrl = ctrl.at[:, _ctrl_ids].set(mc)
data = data.replace(ctrl=ctrl)
def body(_, d):
if _has_motor:
vel = d.qvel[:, _qvel_ids]
mc = d.ctrl[:, _ctrl_ids]
# Coulomb friction (direction-dependent)
fl = jnp.where(vel > 0, _fl_pos, _fl_neg)
torque = -jnp.where(
jnp.abs(vel) > 1e-6, jnp.sign(vel) * fl, 0.0,
)
# Viscous damping (direction-dependent)
damp = jnp.where(vel > 0, _damp_pos, _damp_neg)
torque = torque - damp * vel
# Quadratic velocity drag
torque = torque - _visc_quad * vel * jnp.abs(vel)
# Back-EMF torque reduction
bemf = _back_emf * vel * jnp.sign(mc)
torque = torque - jnp.where(
jnp.abs(mc) > 1e-6, bemf, 0.0,
)
torque = jnp.clip(torque, -10.0, 10.0)
d = d.replace(
qfrc_applied=d.qfrc_applied.at[:, _qvel_ids].set(torque),
)
return jax.vmap(mjx.step, in_axes=(None, 0))(model, d)
return jax.lax.fori_loop(0, substeps, body, data)
@@ -210,7 +271,7 @@ class MJXRunner(BaseRunner[MJXRunnerConfig]):
# ── Rendering ────────────────────────────────────────────────────
def render(self, env_idx: int = 0) -> np.ndarray:
def _render_frame(self, env_idx: int = 0) -> np.ndarray:
"""Offscreen render — copies one env's state from GPU to CPU."""
self._render_data.qpos[:] = np.asarray(self._batch_data.qpos[env_idx])
self._render_data.qvel[:] = np.asarray(self._batch_data.qvel[env_idx])
@@ -222,10 +283,4 @@ class MJXRunner(BaseRunner[MJXRunnerConfig]):
self._mj_model, width=640, height=480,
)
self._offscreen_renderer.update_scene(self._render_data)
frame = self._offscreen_renderer.render().copy()
# Import shared overlay helper from mujoco runner
from src.runners.mujoco import _draw_action_overlay
ctrl_val = float(self._render_data.ctrl[0]) if self._mj_model.nu > 0 else 0.0
_draw_action_overlay(frame, ctrl_val)
return frame
return self._offscreen_renderer.render().copy()

View File

@@ -1,4 +1,6 @@
import dataclasses
import os
import tempfile
import xml.etree.ElementTree as ET
from pathlib import Path
@@ -10,26 +12,195 @@ from src.core.env import BaseEnv
from src.core.robot import RobotConfig
from src.core.runner import BaseRunner, BaseRunnerConfig
@dataclasses.dataclass
class MuJoCoRunnerConfig(BaseRunnerConfig):
num_envs: int = 16
device: str = "cpu"
dt: float = 0.02
substeps: int = 2
dt: float = 0.002
substeps: int = 10
@dataclasses.dataclass
class ActuatorLimits:
"""Per-actuator joint-limit info used for software ctrl clamping.
"""Software limit-switch: cuts motor ctrl when a joint hits its range.
Real motor controllers have position limits that prevent the motor
from driving into the mechanical hard stop.
The real robot has physical limit switches that kill motor current
at the travel endpoints. MuJoCo's built-in joint limits only apply
a spring force — they don't zero the actuator signal. This class
replicates the hardware behavior.
"""
jnt_ids: np.ndarray # (nu,) joint index for each actuator
limited: np.ndarray # (nu,) bool — whether that joint is limited
lo: np.ndarray # (nu,) lower position bound
hi: np.ndarray # (nu,) upper position bound
gear_sign: np.ndarray # (nu,) sign of gear ratio
def __init__(self, model: mujoco.MjModel) -> None:
jnt_ids = model.actuator_trnid[:model.nu, 0]
self.jnt_ids = jnt_ids
self.limited = model.jnt_limited[jnt_ids].astype(bool)
self.lo = model.jnt_range[jnt_ids, 0]
self.hi = model.jnt_range[jnt_ids, 1]
self.gear_sign = np.sign(model.actuator_gear[:model.nu, 0])
def enforce(self, model: mujoco.MjModel, data: mujoco.MjData) -> None:
"""Zero ctrl that would push past joint limits (call every substep)."""
if not self.limited.any():
return
pos = data.qpos[self.jnt_ids]
signed_ctrl = self.gear_sign * data.ctrl[:model.nu]
at_hi = self.limited & (pos >= self.hi) & (signed_ctrl > 0)
at_lo = self.limited & (pos <= self.lo) & (signed_ctrl < 0)
data.ctrl[at_hi | at_lo] = 0.0
# ── Public utilities ─────────────────────────────────────────────────
def load_mujoco_model(robot: RobotConfig) -> mujoco.MjModel:
"""Load a URDF (or MJCF) and apply robot.yaml settings.
Single model-loading entry point for all MuJoCo-based code:
training runners, MJX, and system identification.
Two-step approach required because MuJoCo's URDF parser ignores
``<actuator>`` in the ``<mujoco>`` extension block:
1. Load the URDF -> MuJoCo converts it to internal MJCF
2. Export the MJCF XML, inject actuators + joint overrides, reload
This keeps the URDF clean (re-exportable from CAD) -- all hardware
tuning lives in ``robot.yaml``.
"""
abs_path = robot.urdf_path.resolve()
model_dir = abs_path.parent
is_urdf = abs_path.suffix.lower() == ".urdf"
# -- Step 1: Load URDF with meshdir injection --
if is_urdf:
tree = ET.parse(abs_path)
root = tree.getroot()
# MuJoCo's URDF parser strips directory prefixes from mesh
# filenames, so we inject a <mujoco><compiler meshdir="..."/>
# block. The original URDF stays clean and simulator-agnostic.
meshdir = None
for mesh_el in root.iter("mesh"):
fn = mesh_el.get("filename", "")
parent = str(Path(fn).parent)
if parent and parent != ".":
meshdir = parent
break
if meshdir:
mj_ext = ET.SubElement(root, "mujoco")
ET.SubElement(mj_ext, "compiler", attrib={
"meshdir": meshdir,
"balanceinertia": "true",
})
# Write to a temp file (unique name for multiprocessing safety).
fd, tmp_path = tempfile.mkstemp(
suffix=".urdf", prefix="_mj_", dir=str(model_dir),
)
os.close(fd)
try:
tree.write(tmp_path, xml_declaration=True, encoding="unicode")
model_raw = mujoco.MjModel.from_xml_path(tmp_path)
finally:
Path(tmp_path).unlink(missing_ok=True)
else:
model_raw = mujoco.MjModel.from_xml_path(str(abs_path))
# If robot.yaml has no actuators/joints, we're done.
if not robot.actuators and not robot.joints:
return model_raw
# -- Step 2: Export MJCF, inject actuators + joint overrides --
fd, tmp_path = tempfile.mkstemp(
suffix=".xml", prefix="_mj_", dir=str(model_dir),
)
os.close(fd)
try:
mujoco.mj_saveLastXML(tmp_path, model_raw)
mjcf_str = Path(tmp_path).read_text()
root = ET.fromstring(mjcf_str)
# -- Inject actuators --
if robot.actuators:
act_elem = ET.SubElement(root, "actuator")
for act in robot.actuators:
attribs = {
"name": f"{act.joint}_{act.type}",
"joint": act.joint,
"gear": str(act.gear_avg),
"ctrlrange": f"{act.ctrl_range[0]} {act.ctrl_range[1]}",
}
# dyntype is only available on <general>, not on
# shortcut elements like <motor>/<position>/<velocity>.
use_general = act.filter_tau > 0
if use_general:
attribs["dyntype"] = "filter"
attribs["dynprm"] = str(act.filter_tau)
attribs["gaintype"] = "fixed"
if act.type == "position":
attribs["biastype"] = "affine"
attribs["gainprm"] = str(act.kp)
attribs["biasprm"] = f"0 -{act.kp} -{act.kv}"
elif act.type == "velocity":
attribs["biastype"] = "affine"
attribs["gainprm"] = str(act.kp)
attribs["biasprm"] = f"0 0 -{act.kp}"
else: # motor
attribs["biastype"] = "none"
ET.SubElement(act_elem, "general", attrib=attribs)
else:
if act.type == "position":
attribs["kp"] = str(act.kp)
if act.kv > 0:
attribs["kv"] = str(act.kv)
elif act.type == "velocity":
attribs["kp"] = str(act.kp)
ET.SubElement(act_elem, act.type, attrib=attribs)
# -- Apply joint overrides from robot.yaml --
# For actuated joints with a motor model, MuJoCo damping/frictionloss
# are set to 0 — the motor model handles them via qfrc_applied.
joint_damping: dict[str, float] = {}
joint_frictionloss: dict[str, float] = {}
for act in robot.actuators:
if act.has_motor_model:
joint_damping[act.joint] = 0.0
joint_frictionloss[act.joint] = 0.0
joint_armature: dict[str, float] = {}
for name, jcfg in robot.joints.items():
if jcfg.damping is not None:
joint_damping[name] = jcfg.damping
if jcfg.armature is not None:
joint_armature[name] = jcfg.armature
if jcfg.frictionloss is not None:
joint_frictionloss[name] = jcfg.frictionloss
for body in root.iter("body"):
for jnt in body.findall("joint"):
name = jnt.get("name")
if name in joint_damping:
jnt.set("damping", str(joint_damping[name]))
if name in joint_armature:
jnt.set("armature", str(joint_armature[name]))
if name in joint_frictionloss:
jnt.set("frictionloss", str(joint_frictionloss[name]))
# Disable self-collision on all geoms.
for geom in root.iter("geom"):
geom.set("contype", "0")
geom.set("conaffinity", "0")
modified_xml = ET.tostring(root, encoding="unicode")
Path(tmp_path).write_text(modified_xml)
return mujoco.MjModel.from_xml_path(tmp_path)
finally:
Path(tmp_path).unlink(missing_ok=True)
# -- Runner -----------------------------------------------------------
class MuJoCoRunner(BaseRunner[MuJoCoRunnerConfig]):
def __init__(self, env: BaseEnv, config: MuJoCoRunnerConfig):
@@ -38,201 +209,52 @@ class MuJoCoRunner(BaseRunner[MuJoCoRunnerConfig]):
@property
def num_envs(self) -> int:
return self.config.num_envs
@property
def device(self) -> torch.device:
return torch.device(self.config.device)
@staticmethod
def _load_model(robot: RobotConfig) -> mujoco.MjModel:
"""Load a URDF (or MJCF) and apply robot.yaml settings.
Two-step approach required because MuJoCo's URDF parser ignores
<actuator> in the <mujoco> extension block:
1. Load the URDF → MuJoCo converts it to internal MJCF
2. Export the MJCF XML, inject actuators + joint overrides, reload
This keeps the URDF clean (re-exportable from CAD) — all hardware
tuning lives in robot.yaml.
"""
abs_path = robot.urdf_path.resolve()
model_dir = abs_path.parent
is_urdf = abs_path.suffix.lower() == ".urdf"
# MuJoCo's URDF parser strips directory prefixes from mesh filenames,
# so we inject a <mujoco><compiler meshdir="..."/> block into a
# temporary copy. The original URDF stays clean and simulator-agnostic.
if is_urdf:
tree = ET.parse(abs_path)
root = tree.getroot()
# Detect the mesh subdirectory from the first mesh filename
meshdir = None
for mesh_el in root.iter("mesh"):
fn = mesh_el.get("filename", "")
parent = str(Path(fn).parent)
if parent and parent != ".":
meshdir = parent
break
if meshdir:
mj_ext = ET.SubElement(root, "mujoco")
ET.SubElement(mj_ext, "compiler", attrib={
"meshdir": meshdir,
"balanceinertia": "true",
})
tmp_urdf = model_dir / "_tmp_mujoco_load.urdf"
tree.write(str(tmp_urdf), xml_declaration=True, encoding="unicode")
try:
model_raw = mujoco.MjModel.from_xml_path(str(tmp_urdf))
finally:
tmp_urdf.unlink()
else:
model_raw = mujoco.MjModel.from_xml_path(str(abs_path))
if not robot.actuators and not robot.joints:
return model_raw
# Step 2: Export internal MJCF, inject actuators + joint overrides, reload
tmp_mjcf = model_dir / "_tmp_actuator_inject.xml"
try:
mujoco.mj_saveLastXML(str(tmp_mjcf), model_raw)
mjcf_str = tmp_mjcf.read_text()
root = ET.fromstring(mjcf_str)
# ── Inject actuators ────────────────────────────────────
if robot.actuators:
act_elem = ET.SubElement(root, "actuator")
for act in robot.actuators:
attribs = {
"name": f"{act.joint}_{act.type}",
"joint": act.joint,
"gear": str(act.gear),
"ctrlrange": f"{act.ctrl_range[0]} {act.ctrl_range[1]}",
}
# Actuator dynamics: 1st-order low-pass filter on ctrl.
# MuJoCo applies this every physics substep, which is
# more physically accurate than an external EMA.
# dyntype is only available on <general>, not on
# shortcut elements like <motor>/<position>/<velocity>.
use_general = act.filter_tau > 0
if use_general:
attribs["dyntype"] = "filter"
attribs["dynprm"] = str(act.filter_tau)
if use_general:
attribs["gaintype"] = "fixed"
if act.type == "position":
attribs["biastype"] = "affine"
attribs["gainprm"] = str(act.kp)
attribs["biasprm"] = f"0 -{act.kp} -{act.kv}"
elif act.type == "velocity":
attribs["biastype"] = "affine"
attribs["gainprm"] = str(act.kp)
attribs["biasprm"] = f"0 0 -{act.kp}"
else: # motor
attribs["biastype"] = "none"
ET.SubElement(act_elem, "general", attrib=attribs)
else:
if act.type == "position":
attribs["kp"] = str(act.kp)
if act.kv > 0:
attribs["kv"] = str(act.kv)
elif act.type == "velocity":
attribs["kp"] = str(act.kp)
ET.SubElement(act_elem, act.type, attrib=attribs)
# ── Apply joint overrides from robot.yaml ───────────────
# Merge actuator damping + explicit joint overrides
joint_damping = {a.joint: a.damping for a in robot.actuators}
joint_armature: dict[str, float] = {}
joint_frictionloss: dict[str, float] = {}
for name, jcfg in robot.joints.items():
if jcfg.damping is not None:
joint_damping[name] = jcfg.damping
if jcfg.armature is not None:
joint_armature[name] = jcfg.armature
if jcfg.frictionloss is not None:
joint_frictionloss[name] = jcfg.frictionloss
for body in root.iter("body"):
for jnt in body.findall("joint"):
name = jnt.get("name")
if name in joint_damping:
jnt.set("damping", str(joint_damping[name]))
if name in joint_armature:
jnt.set("armature", str(joint_armature[name]))
if name in joint_frictionloss:
jnt.set("frictionloss", str(joint_frictionloss[name]))
# Disable self-collision on all geoms.
# URDF mesh convex hulls often overlap at joints (especially
# grandparent↔grandchild bodies that MuJoCo does NOT auto-exclude),
# causing phantom contact forces.
for geom in root.iter("geom"):
geom.set("contype", "0")
geom.set("conaffinity", "0")
# Joint limits use MuJoCo's default solver settings.
# The software limit switch (zeroing ctrl at limits) + armature
# prevent overshoot without needing ultra-stiff solref that
# kills MJX GPU solver performance.
# Step 4: Write modified MJCF and reload from file path
# (from_xml_path resolves mesh paths relative to the file location)
modified_xml = ET.tostring(root, encoding="unicode")
tmp_mjcf.write_text(modified_xml)
return mujoco.MjModel.from_xml_path(str(tmp_mjcf))
finally:
tmp_mjcf.unlink(missing_ok=True)
@staticmethod
def _extract_actuator_limits(model: mujoco.MjModel) -> ActuatorLimits:
"""Extract per-actuator joint-limit arrays from a loaded MjModel.
Shared by MuJoCoRunner and MJXRunner so the logic isn't duplicated.
"""
nu = model.nu
jnt_ids = np.array([model.actuator_trnid[i, 0] for i in range(nu)])
return ActuatorLimits(
jnt_ids=jnt_ids,
limited=np.array([model.jnt_limited[j] for j in jnt_ids], dtype=bool),
lo=np.array([model.jnt_range[j, 0] for j in jnt_ids]),
hi=np.array([model.jnt_range[j, 1] for j in jnt_ids]),
gear_sign=np.sign(np.array([model.actuator_gear[i, 0] for i in range(nu)])),
)
def _sim_initialize(self, config: MuJoCoRunnerConfig) -> None:
self._model = self._load_model(self.env.robot)
self._model = load_mujoco_model(self.env.robot)
self._model.opt.timestep = config.dt
self._data: list[mujoco.MjData] = [mujoco.MjData(self._model) for _ in range(config.num_envs)]
self._data: list[mujoco.MjData] = [
mujoco.MjData(self._model) for _ in range(config.num_envs)
]
self._nq = self._model.nq
self._nv = self._model.nv
self._limits = ActuatorLimits(self._model)
self._limits = self._extract_actuator_limits(self._model)
# Build motor model: list of (actuator_config, joint_qvel_index) for
# actuators that have asymmetric motor dynamics.
self._motor_actuators: list[tuple] = []
for act in self.env.robot.actuators:
if act.has_motor_model:
jnt_id = mujoco.mj_name2id(self._model, mujoco.mjtObj.mjOBJ_JOINT, act.joint)
qvel_idx = self._model.jnt_dofadr[jnt_id]
self._motor_actuators.append((act, qvel_idx))
def _sim_step(self, actions: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor]:
actions_np: np.ndarray = actions.cpu().numpy()
# Apply per-actuator ctrl transform (deadzone + gear compensation)
for act_idx, (act, _) in enumerate(self._motor_actuators):
for env_idx in range(self.num_envs):
actions_np[env_idx, act_idx] = act.transform_ctrl(
float(actions_np[env_idx, act_idx])
)
qpos_batch = np.zeros((self.num_envs, self._nq), dtype=np.float32)
qvel_batch = np.zeros((self.num_envs, self._nv), dtype=np.float32)
for i, data in enumerate(self._data):
data.ctrl[:] = actions_np[i]
for _ in range(self.config.substeps):
# Software limit switch: zero out ctrl that would push
# a joint past its position limit (like a real controller).
lim = self._limits
for a in range(self._model.nu):
if lim.limited[a]:
pos = data.qpos[lim.jnt_ids[a]]
gs = lim.gear_sign[a]
if pos >= lim.hi[a] and gs * data.ctrl[a] > 0:
data.ctrl[a] = 0.0
elif pos <= lim.lo[a] and gs * data.ctrl[a] < 0:
data.ctrl[a] = 0.0
# Apply asymmetric motor forces via qfrc_applied
for act, qvel_idx in self._motor_actuators:
vel = data.qvel[qvel_idx]
ctrl = data.ctrl[0] # TODO: generalise for multi-actuator
data.qfrc_applied[qvel_idx] = act.compute_motor_force(vel, ctrl)
self._limits.enforce(self._model, data)
mujoco.mj_step(self._model, data)
qpos_batch[i] = data.qpos
@@ -242,7 +264,7 @@ class MuJoCoRunner(BaseRunner[MuJoCoRunnerConfig]):
torch.from_numpy(qpos_batch).to(self.device),
torch.from_numpy(qvel_batch).to(self.device),
)
def _sim_reset(self, env_ids: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor]:
ids = env_ids.cpu().numpy()
n = len(ids)
@@ -250,21 +272,13 @@ class MuJoCoRunner(BaseRunner[MuJoCoRunnerConfig]):
qpos_batch = np.zeros((n, self._nq), dtype=np.float32)
qvel_batch = np.zeros((n, self._nv), dtype=np.float32)
default_qpos = self.env.get_default_qpos(self._nq)
for i, env_id in enumerate(ids):
data = self._data[env_id]
mujoco.mj_resetData(self._model, data)
# Set initial pose (env-specific, e.g. pendulum hanging down)
if default_qpos is not None:
data.qpos[:] = default_qpos
# Add small random perturbation for exploration
# Small random perturbation for exploration
data.qpos[:] += np.random.uniform(-0.05, 0.05, size=self._nq)
data.qvel[:] += np.random.uniform(-0.05, 0.05, size=self._nv)
# Reset smoothed ctrl so motor starts from rest
data.ctrl[:] = 0.0
qpos_batch[i] = data.qpos
@@ -275,7 +289,7 @@ class MuJoCoRunner(BaseRunner[MuJoCoRunnerConfig]):
torch.from_numpy(qvel_batch).to(self.device),
)
def render(self, env_idx: int = 0) -> np.ndarray:
def _render_frame(self, env_idx: int = 0) -> np.ndarray:
"""Offscreen render of a single environment."""
if not hasattr(self, "_offscreen_renderer"):
self._offscreen_renderer = mujoco.Renderer(
@@ -283,43 +297,4 @@ class MuJoCoRunner(BaseRunner[MuJoCoRunnerConfig]):
)
mujoco.mj_forward(self._model, self._data[env_idx])
self._offscreen_renderer.update_scene(self._data[env_idx])
frame = self._offscreen_renderer.render().copy()
# Draw action bar overlay — shows ctrl[0] as a horizontal bar
ctrl_val = float(self._data[env_idx].ctrl[0]) if self._model.nu > 0 else 0.0
_draw_action_overlay(frame, ctrl_val)
return frame
def _draw_action_overlay(frame: np.ndarray, action: float) -> None:
"""Draw an action bar + text on a rendered frame (no OpenCV needed).
Bar is centered horizontally: green to the right (+), red to the left (-).
"""
h, w = frame.shape[:2]
# Bar geometry
bar_y = h - 30
bar_h = 16
bar_x_center = w // 2
bar_half_w = w // 4 # max half-width of the bar
bar_x_left = bar_x_center - bar_half_w
bar_x_right = bar_x_center + bar_half_w
# Background (dark grey)
frame[bar_y:bar_y + bar_h, bar_x_left:bar_x_right] = [40, 40, 40]
# Filled bar
fill_len = int(abs(action) * bar_half_w)
if action > 0:
color = [60, 200, 60] # green
x0 = bar_x_center
x1 = min(bar_x_center + fill_len, bar_x_right)
else:
color = [200, 60, 60] # red
x1 = bar_x_center
x0 = max(bar_x_center - fill_len, bar_x_left)
frame[bar_y:bar_y + bar_h, x0:x1] = color
# Center tick mark (white)
frame[bar_y:bar_y + bar_h, bar_x_center - 1:bar_x_center + 1] = [255, 255, 255]
return self._offscreen_renderer.render().copy()

View File

@@ -11,11 +11,15 @@ Serial protocol (ESP32 firmware):
H — stop streaming
M<int> — set motor PWM speed (-255 … 255)
State lines received FROM the ESP32:
S,<ms>,<enc>,<rpm>,<motor_speed>,<at_limit>,
<pend_deg>,<pend_vel>,<target_speed>,<braking>,
<enc_vel_cps>,<pendulum_ok>
(12 comma-separated fields after the ``S`` prefix)
State lines received FROM the ESP32 (firmware sends SI units):
S,<ms>,<motor_rad>,<motor_vel>,<pend_rad>,<pend_vel>,<motor_speed>
(7 comma-separated fields after the ``S`` prefix)
motor_rad — motor joint angle (radians)
motor_vel — motor joint velocity (rad/s)
pend_rad — pendulum angle (radians, 0 = hanging down)
pend_vel — pendulum angular velocity (rad/s)
motor_speed — applied PWM (-255..255, for action recording)
A daemon thread continuously reads the serial stream so the control
loop never blocks on I/O.
@@ -28,7 +32,6 @@ from __future__ import annotations
import dataclasses
import logging
import math
import threading
import time
from typing import Any
@@ -36,8 +39,6 @@ from typing import Any
import numpy as np
import torch
from src.core.env import BaseEnv
from src.core.hardware import HardwareConfig, load_hardware_config
from src.core.runner import BaseRunner, BaseRunnerConfig
logger = logging.getLogger(__name__)
@@ -52,9 +53,14 @@ class SerialRunnerConfig(BaseRunnerConfig):
port: str = "/dev/cu.usbserial-0001"
baud: int = 115200
dt: float = 0.02 # control loop period (seconds), 50 Hz
dt: float = 0.04 # control loop period (seconds), 25 Hz
no_data_timeout: float = 2.0 # seconds of silence → disconnect
encoder_jump_threshold: int = 200 # encoder tick jump → reboot
# Physical reset procedure
reset_drive_speed: int = 80 # PWM for bang-bang drive-to-center
reset_deadband_rad: float = 0.01 # "centered" threshold (~0.6°)
reset_drive_timeout: float = 3.0 # seconds to reach center
reset_settle_timeout: float = 30.0 # seconds to wait for pendulum
class SerialRunner(BaseRunner[SerialRunnerConfig]):
@@ -80,22 +86,6 @@ class SerialRunner(BaseRunner[SerialRunnerConfig]):
return torch.device("cpu")
def _sim_initialize(self, config: SerialRunnerConfig) -> None:
# Load hardware description (encoder, safety, reset params).
hw = load_hardware_config(self.env.config.robot_path)
if hw is None:
raise FileNotFoundError(
f"hardware.yaml not found in {self.env.config.robot_path}. "
"The serial runner requires a hardware config for encoder, "
"safety, and reset parameters."
)
self._hw: HardwareConfig = hw
self._counts_per_rev: float = hw.encoder.counts_per_rev
self._max_motor_angle_rad: float = (
math.radians(hw.safety.max_motor_angle_deg)
if hw.safety.max_motor_angle_deg > 0
else 0.0
)
# Joint dimensions for the rotary cartpole (motor + pendulum).
self._nq = 2
self._nv = 2
@@ -105,8 +95,16 @@ class SerialRunner(BaseRunner[SerialRunnerConfig]):
self._serial_mod = _serial
# Explicitly disable hardware flow control and exclusive mode to
# avoid termios.error (errno 22) on macOS with CH340/CP2102 adapters.
self.ser: _serial.Serial = _serial.Serial(
config.port, config.baud, timeout=0.05
port=config.port,
baudrate=config.baud,
timeout=0.05,
xonxoff=False,
rtscts=False,
dsrdtr=False,
exclusive=False,
)
time.sleep(2) # Wait for ESP32 boot.
self.ser.reset_input_buffer()
@@ -116,22 +114,17 @@ class SerialRunner(BaseRunner[SerialRunnerConfig]):
self._serial_disconnected: bool = False
self._last_esp_ms: int = 0
self._last_data_time: float = time.monotonic()
self._last_encoder_count: int = 0
self._streaming: bool = False
# Latest parsed state (updated by the reader thread).
# Firmware sends SI units — values are used directly as qpos/qvel.
self._latest_state: dict[str, Any] = {
"timestamp_ms": 0,
"encoder_count": 0,
"rpm": 0.0,
"motor_rad": 0.0,
"motor_vel": 0.0,
"pend_rad": 0.0,
"pend_vel": 0.0,
"motor_speed": 0,
"at_limit": False,
"pendulum_angle": 0.0,
"pendulum_velocity": 0.0,
"target_speed": 0,
"braking": False,
"enc_vel_cps": 0.0,
"pendulum_ok": False,
}
self._state_lock = threading.Lock()
self._state_event = threading.Event()
@@ -148,6 +141,11 @@ class SerialRunner(BaseRunner[SerialRunnerConfig]):
self._streaming = True
self._last_data_time = time.monotonic()
# Derive max PWM from actuator ctrl_range so the serial
# command range matches what MuJoCo training sees.
ctrl_hi = self.env.robot.actuators[0].ctrl_range[1]
self._max_pwm: int = round(ctrl_hi * 255)
# Track wall-clock time of last step for PPO-gap detection.
self._last_step_time: float = time.monotonic()
@@ -170,41 +168,22 @@ class SerialRunner(BaseRunner[SerialRunnerConfig]):
self._sim_reset(all_ids)
self.step_counts.zero_()
step_start = time.monotonic()
# Map normalised action [-1, 1] → PWM [-255, 255].
# Map normalised action [-1, 1] → PWM, scaled by ctrl_range.
action_val = float(actions[0, 0].clamp(-1.0, 1.0))
motor_speed = int(action_val * 255)
motor_speed = int(action_val * self._max_pwm)
self._send(f"M{motor_speed}")
# Enforce dt wall-clock timing.
elapsed = time.monotonic() - step_start
remaining = self.config.dt - elapsed
if remaining > 0:
time.sleep(remaining)
# Stream-driven: block until the firmware sends the next state
# line (~20 ms at 50 Hz).
state = self._read_state_blocking(timeout=0.1)
# Read latest sensor data (non-blocking — dt sleep ensures freshness).
state = self._read_state()
# Firmware sends SI units — use directly.
qpos, qvel = self._state_to_tensors(state)
motor_angle = (
state["encoder_count"] / self._counts_per_rev * 2.0 * math.pi
)
motor_vel = (
state["enc_vel_cps"] / self._counts_per_rev * 2.0 * math.pi
)
pendulum_angle = math.radians(state["pendulum_angle"])
pendulum_vel = math.radians(state["pendulum_velocity"])
# Cache motor angle for safety check in step() — avoids a second read.
self._last_motor_angle_rad = motor_angle
# Cache for _sync_viz().
self._last_sync_state = state
self._last_step_time = time.monotonic()
qpos = torch.tensor(
[[motor_angle, pendulum_angle]], dtype=torch.float32
)
qvel = torch.tensor(
[[motor_vel, pendulum_vel]], dtype=torch.float32
)
return qpos, qvel
def _sim_reset(
@@ -232,59 +211,42 @@ class SerialRunner(BaseRunner[SerialRunnerConfig]):
# Physically return the motor to the centre position.
self._drive_to_center()
# Wait until the pendulum settles.
self._wait_for_pendulum_still()
# Wait until the env considers the robot settled.
self._wait_for_settle()
# Refresh data timer so health checks don't false-positive.
self._last_data_time = time.monotonic()
# Read settled state and return as qpos/qvel.
state = self._read_state_blocking()
motor_angle = (
state["encoder_count"] / self._counts_per_rev * 2.0 * math.pi
)
motor_vel = (
state["enc_vel_cps"] / self._counts_per_rev * 2.0 * math.pi
)
pendulum_angle = math.radians(state["pendulum_angle"])
pendulum_vel = math.radians(state["pendulum_velocity"])
qpos, qvel = self._state_to_tensors(state)
qpos = torch.tensor(
[[motor_angle, pendulum_angle]], dtype=torch.float32
)
qvel = torch.tensor(
[[motor_vel, pendulum_vel]], dtype=torch.float32
)
self._last_sync_state = state
return qpos, qvel
def _sim_close(self) -> None:
self._reader_running = False
self._streaming = False
self._send("H") # Stop streaming.
self._send("M0") # Stop motor.
self._send("H")
self._send("M0")
time.sleep(0.1)
self._reader_thread.join(timeout=1.0)
self.ser.close()
if hasattr(self, "_offscreen_renderer") and self._offscreen_renderer is not None:
self._offscreen_renderer.close()
super()._sim_close()
# ------------------------------------------------------------------
# MuJoCo digital-twin rendering
# ------------------------------------------------------------------
def _ensure_viz_model(self) -> None:
"""Lazily load the MuJoCo model for visualisation (digital twin).
Reuses the same URDF + robot.yaml that the MuJoCoRunner would use,
but only for rendering — no physics stepping.
"""
"""Lazily load the MuJoCo model for visualisation (digital twin)."""
if hasattr(self, "_viz_model"):
return
import mujoco
from src.runners.mujoco import MuJoCoRunner
from src.runners.mujoco import load_mujoco_model
self._viz_model = MuJoCoRunner._load_model(self.env.robot)
self._viz_model = load_mujoco_model(self.env.robot)
self._viz_data = mujoco.MjData(self._viz_model)
self._offscreen_renderer = None
@@ -293,32 +255,21 @@ class SerialRunner(BaseRunner[SerialRunnerConfig]):
import mujoco
self._ensure_viz_model()
state = self._read_state()
# Set joint positions from serial data.
motor_angle = (
state["encoder_count"] / self._counts_per_rev * 2.0 * math.pi
)
pendulum_angle = math.radians(state["pendulum_angle"])
self._viz_data.qpos[0] = motor_angle
self._viz_data.qpos[1] = pendulum_angle
last_state = getattr(self, "_last_sync_state", None)
if last_state is None:
last_state = self._read_state()
# Set joint velocities (for any velocity-dependent visuals).
motor_vel = (
state["enc_vel_cps"] / self._counts_per_rev * 2.0 * math.pi
)
pendulum_vel = math.radians(state["pendulum_velocity"])
self._viz_data.qvel[0] = motor_vel
self._viz_data.qvel[1] = pendulum_vel
# Firmware sends radians — use directly.
self._viz_data.qpos[0] = last_state["motor_rad"]
self._viz_data.qpos[1] = last_state["pend_rad"]
self._viz_data.qvel[0] = last_state["motor_vel"]
self._viz_data.qvel[1] = last_state["pend_vel"]
# Forward kinematics (updates body positions for rendering).
mujoco.mj_forward(self._viz_model, self._viz_data)
def render(self, env_idx: int = 0) -> np.ndarray:
"""Offscreen render of the digital-twin MuJoCo model.
Called by VideoRecordingTrainer during training to capture frames.
"""
def _render_frame(self, env_idx: int = 0) -> np.ndarray:
"""Offscreen render of the digital-twin MuJoCo model."""
import mujoco
self._sync_viz()
@@ -340,7 +291,6 @@ class SerialRunner(BaseRunner[SerialRunnerConfig]):
# Check for ESP32 reboot / disconnect BEFORE stepping.
if self._rebooted or self._serial_disconnected:
self._send("M0")
# Return a terminal observation with penalty.
qpos, qvel = self._make_current_state()
state = self.env.build_state(qpos, qvel)
obs = self.env.compute_observations(state)
@@ -359,16 +309,6 @@ class SerialRunner(BaseRunner[SerialRunnerConfig]):
rewards = torch.tensor([[-100.0]])
info["reboot_detected"] = True
# Check motor angle against hard safety limit.
# Uses the cached value from _sim_step — no extra serial read.
if self._max_motor_angle_rad > 0:
motor_angle = abs(getattr(self, "_last_motor_angle_rad", 0.0))
if motor_angle >= self._max_motor_angle_rad:
self._send("M0")
terminated = torch.tensor([[True]])
rewards = torch.tensor([[-100.0]])
info["motor_limit_exceeded"] = True
# Always stop motor on episode end.
if terminated.any() or truncated.any():
self._send("M0")
@@ -387,7 +327,11 @@ class SerialRunner(BaseRunner[SerialRunnerConfig]):
self._serial_disconnected = True
def _serial_reader(self) -> None:
"""Background thread: continuously read and parse serial lines."""
"""Background thread: continuously read and parse serial lines.
Protocol: ``S,<ms>,<motor_rad>,<motor_vel>,<pend_rad>,<pend_vel>,<motor_speed>``
(7 comma-separated fields). Firmware sends SI units directly.
"""
while self._reader_running:
try:
if self.ser.in_waiting:
@@ -405,9 +349,15 @@ class SerialRunner(BaseRunner[SerialRunnerConfig]):
if line.startswith("S,"):
parts = line.split(",")
if len(parts) >= 12:
esp_ms = int(parts[1])
enc = int(parts[2])
if len(parts) >= 7:
try:
esp_ms = int(parts[1])
except ValueError:
logger.debug(
"Malformed state line (header): %s",
line,
)
continue
# Detect reboot: timestamp jumped backwards.
if (
@@ -422,38 +372,24 @@ class SerialRunner(BaseRunner[SerialRunnerConfig]):
esp_ms,
)
# Detect reboot: encoder snapped to 0 from
# a far position.
if (
abs(self._last_encoder_count)
> self.config.encoder_jump_threshold
and abs(enc) < 5
):
self._rebooted = True
logger.critical(
"ESP32 reboot detected: encoder"
" jumped %d -> %d",
self._last_encoder_count,
enc,
)
self._last_esp_ms = esp_ms
self._last_encoder_count = enc
self._last_data_time = time.monotonic()
parsed: dict[str, Any] = {
"timestamp_ms": esp_ms,
"encoder_count": enc,
"rpm": float(parts[3]),
"motor_speed": int(parts[4]),
"at_limit": bool(int(parts[5])),
"pendulum_angle": float(parts[6]),
"pendulum_velocity": float(parts[7]),
"target_speed": int(parts[8]),
"braking": bool(int(parts[9])),
"enc_vel_cps": float(parts[10]),
"pendulum_ok": bool(int(parts[11])),
}
try:
parsed: dict[str, Any] = {
"timestamp_ms": esp_ms,
"motor_rad": float(parts[2]),
"motor_vel": float(parts[3]),
"pend_rad": float(parts[4]),
"pend_vel": float(parts[5]),
"motor_speed": int(parts[6]),
}
except ValueError:
logger.debug(
"Malformed state line (fields): %s",
line,
)
continue
with self._state_lock:
self._latest_state = parsed
self._state_event.set()
@@ -483,81 +419,68 @@ class SerialRunner(BaseRunner[SerialRunnerConfig]):
return True
def _read_state(self) -> dict[str, Any]:
"""Return the most recent state from the reader thread (non-blocking).
The background thread updates at ~50 Hz and `_sim_step` already
sleeps for `dt` before calling this, so the data is always fresh.
"""
"""Return the most recent state from the reader thread (non-blocking)."""
with self._state_lock:
return dict(self._latest_state)
def _read_state_blocking(self, timeout: float = 0.05) -> dict[str, Any]:
"""Wait for a fresh sample, then return it.
Used during reset / settling where we need to guarantee we have
a new reading (no prior dt sleep).
"""
"""Wait for a fresh sample, then return it."""
self._state_event.clear()
self._state_event.wait(timeout=timeout)
with self._state_lock:
return dict(self._latest_state)
def _make_current_state(self) -> tuple[torch.Tensor, torch.Tensor]:
"""Build qpos/qvel from current sensor data (utility)."""
state = self._read_state_blocking()
motor_angle = (
state["encoder_count"] / self._counts_per_rev * 2.0 * math.pi
)
motor_vel = (
state["enc_vel_cps"] / self._counts_per_rev * 2.0 * math.pi
)
pendulum_angle = math.radians(state["pendulum_angle"])
pendulum_vel = math.radians(state["pendulum_velocity"])
def _state_to_tensors(
self, state: dict[str, Any],
) -> tuple[torch.Tensor, torch.Tensor]:
"""Convert a parsed state dict to (qpos, qvel) tensors."""
qpos = torch.tensor(
[[motor_angle, pendulum_angle]], dtype=torch.float32
[[state["motor_rad"], state["pend_rad"]]], dtype=torch.float32
)
qvel = torch.tensor(
[[motor_vel, pendulum_vel]], dtype=torch.float32
[[state["motor_vel"], state["pend_vel"]]], dtype=torch.float32
)
return qpos, qvel
def _make_current_state(self) -> tuple[torch.Tensor, torch.Tensor]:
"""Build qpos/qvel from current sensor data (utility)."""
return self._state_to_tensors(self._read_state_blocking())
# ------------------------------------------------------------------
# Physical reset helpers
# ------------------------------------------------------------------
def _drive_to_center(self) -> None:
"""Drive the motor back toward encoder=0 using bang-bang control."""
rc = self._hw.reset
"""Drive the motor back toward center using bang-bang control."""
cfg = self.config
start = time.time()
while time.time() - start < rc.drive_timeout:
while time.time() - start < cfg.reset_drive_timeout:
state = self._read_state_blocking()
enc = state["encoder_count"]
if abs(enc) < rc.deadband:
motor_rad = state["motor_rad"]
if abs(motor_rad) < cfg.reset_deadband_rad:
break
speed = rc.drive_speed if enc < 0 else -rc.drive_speed
speed = cfg.reset_drive_speed if motor_rad < 0 else -cfg.reset_drive_speed
self._send(f"M{speed}")
time.sleep(0.05)
self._send("M0")
time.sleep(0.2)
def _wait_for_pendulum_still(self) -> None:
"""Block until the pendulum has settled (angle and velocity near zero)."""
rc = self._hw.reset
def _wait_for_settle(self) -> None:
"""Block until the env considers the robot ready for a new episode."""
cfg = self.config
stable_since: float | None = None
start = time.monotonic()
while time.monotonic() - start < rc.settle_timeout:
while time.monotonic() - start < cfg.reset_settle_timeout:
state = self._read_state_blocking()
angle_ok = abs(state["pendulum_angle"]) < rc.settle_angle_deg
vel_ok = abs(state["pendulum_velocity"]) < rc.settle_vel_dps
qpos, qvel = self._state_to_tensors(state)
if angle_ok and vel_ok:
if self.env.is_reset_ready(qpos, qvel):
if stable_since is None:
stable_since = time.monotonic()
elif time.monotonic() - stable_since >= rc.settle_duration:
elif time.monotonic() - stable_since >= 0.5:
logger.info(
"Pendulum settled after %.2f s",
"Robot settled after %.2f s",
time.monotonic() - start,
)
return
@@ -566,6 +489,6 @@ class SerialRunner(BaseRunner[SerialRunnerConfig]):
time.sleep(0.02)
logger.warning(
"Pendulum did not fully settle within %.1f s — proceeding anyway.",
rc.settle_timeout,
"Robot did not settle within %.1f s — proceeding anyway.",
cfg.reset_settle_timeout,
)

79
src/sysid/_urdf.py Normal file
View File

@@ -0,0 +1,79 @@
"""URDF XML helpers shared by sysid rollout and export modules."""
from __future__ import annotations
import xml.etree.ElementTree as ET
def set_mass(inertial: ET.Element, mass: float | None) -> None:
if mass is None:
return
mass_el = inertial.find("mass")
if mass_el is not None:
mass_el.set("value", str(mass))
def set_com(
inertial: ET.Element,
x: float | None,
y: float | None,
z: float | None,
) -> None:
origin = inertial.find("origin")
if origin is None:
return
xyz = origin.get("xyz", "0 0 0").split()
if x is not None:
xyz[0] = str(x)
if y is not None:
xyz[1] = str(y)
if z is not None:
xyz[2] = str(z)
origin.set("xyz", " ".join(xyz))
def set_inertia(
inertial: ET.Element,
ixx: float | None = None,
iyy: float | None = None,
izz: float | None = None,
ixy: float | None = None,
iyz: float | None = None,
ixz: float | None = None,
) -> None:
ine = inertial.find("inertia")
if ine is None:
return
for attr, val in [
("ixx", ixx), ("iyy", iyy), ("izz", izz),
("ixy", ixy), ("iyz", iyz), ("ixz", ixz),
]:
if val is not None:
ine.set(attr, str(val))
def patch_link_inertials(
root: ET.Element,
params: dict[str, float],
) -> None:
"""Patch arm and pendulum inertial parameters in a URDF ElementTree root."""
for link in root.iter("link"):
link_name = link.get("name", "")
inertial = link.find("inertial")
if inertial is None:
continue
if link_name == "arm":
set_mass(inertial, params.get("arm_mass"))
set_com(inertial, params.get("arm_com_x"),
params.get("arm_com_y"), params.get("arm_com_z"))
elif link_name == "pendulum":
set_mass(inertial, params.get("pendulum_mass"))
set_com(inertial, params.get("pendulum_com_x"),
params.get("pendulum_com_y"), params.get("pendulum_com_z"))
set_inertia(inertial,
ixx=params.get("pendulum_ixx"),
iyy=params.get("pendulum_iyy"),
izz=params.get("pendulum_izz"),
ixy=params.get("pendulum_ixy"))

View File

@@ -6,6 +6,10 @@ the system, and records motor + pendulum angles and velocities at ~50 Hz.
Saves a compressed numpy archive (.npz) that the optimizer can replay
in simulation to fit physics parameters.
Serial protocol (same as SerialRunner):
S,<ms>,<motor_rad>,<motor_vel>,<pend_rad>,<pend_vel>,<motor_speed>
(7 comma-separated fields — firmware sends SI units)
Usage:
python -m src.sysid.capture \
--robot-path assets/rotary_cartpole \
@@ -17,7 +21,6 @@ from __future__ import annotations
import argparse
import math
import os
import random
import threading
import time
@@ -27,7 +30,6 @@ from typing import Any
import numpy as np
import structlog
import yaml
log = structlog.get_logger()
@@ -36,25 +38,24 @@ log = structlog.get_logger()
def _parse_state_line(line: str) -> dict[str, Any] | None:
"""Parse an ``S,…`` state line from the ESP32."""
"""Parse an ``S,…`` state line from the ESP32.
Format: S,<ms>,<motor_rad>,<motor_vel>,<pend_rad>,<pend_vel>,<motor_speed>
(7 comma-separated fields, firmware sends SI units)
"""
if not line.startswith("S,"):
return None
parts = line.split(",")
if len(parts) < 12:
if len(parts) < 7:
return None
try:
return {
"timestamp_ms": int(parts[1]),
"encoder_count": int(parts[2]),
"rpm": float(parts[3]),
"motor_speed": int(parts[4]),
"at_limit": bool(int(parts[5])),
"pendulum_angle": float(parts[6]),
"pendulum_velocity": float(parts[7]),
"target_speed": int(parts[8]),
"braking": bool(int(parts[9])),
"enc_vel_cps": float(parts[10]),
"pendulum_ok": bool(int(parts[11])),
"motor_rad": float(parts[2]),
"motor_vel": float(parts[3]),
"pend_rad": float(parts[4]),
"pend_vel": float(parts[5]),
"motor_speed": int(parts[6]),
}
except (ValueError, IndexError):
return None
@@ -64,7 +65,12 @@ def _parse_state_line(line: str) -> dict[str, Any] | None:
class _SerialReader:
"""Minimal background reader for the ESP32 serial stream."""
"""Minimal background reader for the ESP32 serial stream.
Uses a sequence counter so ``read_blocking()`` guarantees it returns
a *new* state line (not a stale repeat). This keeps the capture
loop locked to the firmware's 50 Hz tick.
"""
def __init__(self, port: str, baud: int = 115200):
import serial as _serial
@@ -75,14 +81,16 @@ class _SerialReader:
self.ser.reset_input_buffer()
self._latest: dict[str, Any] = {}
self._seq: int = 0 # incremented on every new state line
self._lock = threading.Lock()
self._event = threading.Event()
self._cond = threading.Condition(self._lock)
self._running = True
self._thread = threading.Thread(target=self._reader_loop, daemon=True)
self._thread.start()
def _reader_loop(self) -> None:
_debug_count = 0
while self._running:
try:
if self.ser.in_waiting:
@@ -91,11 +99,16 @@ class _SerialReader:
.decode("utf-8", errors="ignore")
.strip()
)
# Debug: log first 10 raw lines so we can see what the firmware sends.
if _debug_count < 10 and line:
log.info("serial_raw_line", line=repr(line), count=_debug_count)
_debug_count += 1
parsed = _parse_state_line(line)
if parsed is not None:
with self._lock:
with self._cond:
self._latest = parsed
self._event.set()
self._seq += 1
self._cond.notify_all()
else:
time.sleep(0.001)
except (OSError, self._serial_mod.SerialException):
@@ -108,14 +121,19 @@ class _SerialReader:
except (OSError, self._serial_mod.SerialException):
log.critical("serial_send_failed", cmd=cmd)
def read(self) -> dict[str, Any]:
with self._lock:
return dict(self._latest)
def read_blocking(self, timeout: float = 0.1) -> dict[str, Any]:
self._event.clear()
self._event.wait(timeout=timeout)
return self.read()
"""Wait until a *new* state line arrives, then return it.
Uses a sequence counter to guarantee the returned state is
different from whatever was available before this call.
"""
with self._cond:
seq_before = self._seq
if not self._cond.wait_for(
lambda: self._seq > seq_before, timeout=timeout
):
return {} # timeout — no new data
return dict(self._latest)
def close(self) -> None:
self._running = False
@@ -139,7 +157,7 @@ class _PRBSExcitation:
def __init__(
self,
amplitude: int = 180,
amplitude: int = 150,
hold_min_ms: int = 50,
hold_max_ms: int = 300,
):
@@ -169,42 +187,39 @@ def capture(
port: str = "/dev/cu.usbserial-0001",
baud: int = 115200,
duration: float = 20.0,
amplitude: int = 180,
amplitude: int = 150,
hold_min_ms: int = 50,
hold_max_ms: int = 300,
dt: float = 0.02,
motor_angle_limit_deg: float = 90.0,
) -> Path:
"""Run the capture procedure and return the path to the saved .npz file.
The capture loop is **stream-driven**: it blocks on each incoming
state line from the firmware (which arrives at 50 Hz), sends the
next motor command immediately, and records both.
Parameters
----------
robot_path : path to robot asset directory (contains hardware.yaml)
robot_path : path to robot asset directory
port : serial port for ESP32
baud : baud rate
duration : capture duration in seconds
amplitude : max PWM magnitude for excitation (0255)
amplitude : max PWM magnitude for excitation
hold_min_ms / hold_max_ms : random hold time range (ms)
dt : target sampling period (seconds), default 50 Hz
dt : nominal sample period for buffer sizing (seconds)
motor_angle_limit_deg : safety limit for motor angle
"""
robot_path = Path(robot_path).resolve()
# Load hardware config for encoder conversion + safety.
hw_yaml = robot_path / "hardware.yaml"
if not hw_yaml.exists():
raise FileNotFoundError(f"hardware.yaml not found in {robot_path}")
raw_hw = yaml.safe_load(hw_yaml.read_text())
ppr = raw_hw.get("encoder", {}).get("ppr", 11)
gear_ratio = raw_hw.get("encoder", {}).get("gear_ratio", 30.0)
counts_per_rev: float = ppr * gear_ratio * 4.0
max_motor_deg = raw_hw.get("safety", {}).get("max_motor_angle_deg", 90.0)
max_motor_rad = math.radians(max_motor_deg) if max_motor_deg > 0 else 0.0
max_motor_rad = math.radians(motor_angle_limit_deg) if motor_angle_limit_deg > 0 else 0.0
# Connect.
reader = _SerialReader(port, baud)
excitation = _PRBSExcitation(amplitude, hold_min_ms, hold_max_ms)
# Prepare recording buffers.
max_samples = int(duration / dt) + 500 # headroom
# Prepare recording buffers (generous headroom).
max_samples = int(duration / dt) + 500
rec_time = np.zeros(max_samples, dtype=np.float64)
rec_action = np.zeros(max_samples, dtype=np.float64)
rec_motor_angle = np.zeros(max_samples, dtype=np.float64)
@@ -222,63 +237,96 @@ def capture(
duration=duration,
amplitude=amplitude,
hold_range_ms=f"{hold_min_ms}{hold_max_ms}",
dt=dt,
mode="stream-driven (firmware clock)",
)
idx = 0
pwm = 0
last_esp_ms = -1 # firmware timestamp of last recorded sample
no_data_count = 0 # consecutive timeouts with no data
t0 = time.monotonic()
try:
while True:
loop_start = time.monotonic()
elapsed = loop_start - t0
# Block until the firmware sends the next state line (~20 ms).
# Timeout at 100 ms prevents hanging if the ESP32 disconnects.
state = reader.read_blocking(timeout=0.1)
if not state:
no_data_count += 1
if no_data_count == 30: # 3 seconds with no data
log.warning(
"no_data_received",
msg="No state lines from firmware after 3s. "
"Check: is the ESP32 powered? Is it running the right firmware? "
"Try pressing the RESET button.",
)
if no_data_count == 100: # 10 seconds
log.critical(
"no_data_timeout",
msg="No data for 10s — aborting capture.",
)
break
continue # no data yet — retry
no_data_count = 0
# Deduplicate: the firmware may send multiple state lines per
# tick (e.g. M-command echo + tick). Only record one sample
# per unique firmware timestamp.
esp_ms = state.get("timestamp_ms", 0)
if esp_ms == last_esp_ms:
continue
last_esp_ms = esp_ms
elapsed = time.monotonic() - t0
if elapsed >= duration:
break
# Get excitation PWM.
# Get excitation PWM for the NEXT tick.
pwm = excitation()
# Safety: reverse/zero if near motor limit.
state = reader.read()
if state:
motor_angle_rad = (
state.get("encoder_count", 0) / counts_per_rev * 2.0 * math.pi
)
if max_motor_rad > 0:
margin = max_motor_rad * 0.85 # start braking at 85%
if motor_angle_rad > margin and pwm > 0:
pwm = -abs(pwm) # reverse
elif motor_angle_rad < -margin and pwm < 0:
pwm = abs(pwm) # reverse
# Safety: keep the arm well within its mechanical range.
# Firmware sends motor angle in radians — use directly.
motor_angle_rad = state.get("motor_rad", 0.0)
if max_motor_rad > 0:
ratio = motor_angle_rad / max_motor_rad # signed, -1..+1
abs_ratio = abs(ratio)
# Send command.
if abs_ratio > 0.90:
# Deep in the danger zone — force a strong return.
brake_strength = min(1.0, (abs_ratio - 0.90) / 0.10) # 0→1
brake_pwm = int(amplitude * (0.5 + 0.5 * brake_strength))
pwm = -brake_pwm if ratio > 0 else brake_pwm
elif abs_ratio > 0.70:
# Soft zone — only allow actions pointing back to centre.
if ratio > 0 and pwm > 0:
pwm = -abs(pwm)
elif ratio < 0 and pwm < 0:
pwm = abs(pwm)
# Send command immediately — it will take effect on the next tick.
reader.send(f"M{pwm}")
# Wait for fresh data.
time.sleep(max(0, dt - (time.monotonic() - loop_start) - 0.005))
state = reader.read_blocking(timeout=dt)
# Record this tick's state + the action the motor *actually*
# received. Firmware sends SI units — use directly.
motor_angle = state.get("motor_rad", 0.0)
motor_vel = state.get("motor_vel", 0.0)
pend_angle = state.get("pend_rad", 0.0)
pend_vel = state.get("pend_vel", 0.0)
# Firmware constrains to ±255; normalise to [-1, 1].
applied = state.get("motor_speed", 0)
action_norm = max(-255, min(255, applied)) / 255.0
if state:
enc = state.get("encoder_count", 0)
motor_angle = enc / counts_per_rev * 2.0 * math.pi
motor_vel = (
state.get("enc_vel_cps", 0.0) / counts_per_rev * 2.0 * math.pi
)
pend_angle = math.radians(state.get("pendulum_angle", 0.0))
pend_vel = math.radians(state.get("pendulum_velocity", 0.0))
if idx < max_samples:
rec_time[idx] = elapsed
rec_action[idx] = action_norm
rec_motor_angle[idx] = motor_angle
rec_motor_vel[idx] = motor_vel
rec_pend_angle[idx] = pend_angle
rec_pend_vel[idx] = pend_vel
idx += 1
else:
break # buffer full
# Normalised action: PWM / 255 → [-1, 1]
action_norm = pwm / 255.0
if idx < max_samples:
rec_time[idx] = elapsed
rec_action[idx] = action_norm
rec_motor_angle[idx] = motor_angle
rec_motor_vel[idx] = motor_vel
rec_pend_angle[idx] = pend_angle
rec_pend_vel[idx] = pend_vel
idx += 1
# Progress.
# Progress (every 50 samples ≈ once per second at 50 Hz).
if idx % 50 == 0:
log.info(
"capture_progress",
@@ -287,11 +335,6 @@ def capture(
pwm=pwm,
)
# Pace to dt.
remaining = dt - (time.monotonic() - loop_start)
if remaining > 0:
time.sleep(remaining)
finally:
reader.send("M0")
reader.close()
@@ -339,7 +382,7 @@ def main() -> None:
"--robot-path",
type=str,
default="assets/rotary_cartpole",
help="Path to robot asset directory (contains hardware.yaml)",
help="Path to robot asset directory",
)
parser.add_argument(
"--port",
@@ -352,7 +395,8 @@ def main() -> None:
"--duration", type=float, default=20.0, help="Capture duration (s)"
)
parser.add_argument(
"--amplitude", type=int, default=180, help="Max PWM magnitude (0255)"
"--amplitude", type=int, default=150,
help="Max PWM magnitude (should not exceed firmware MAX_MOTOR_SPEED=150)",
)
parser.add_argument(
"--hold-min-ms", type=int, default=50, help="Min hold time (ms)"
@@ -361,7 +405,11 @@ def main() -> None:
"--hold-max-ms", type=int, default=300, help="Max hold time (ms)"
)
parser.add_argument(
"--dt", type=float, default=0.02, help="Sample period (s)"
"--dt", type=float, default=0.02, help="Nominal sample period for buffer sizing (s)"
)
parser.add_argument(
"--motor-angle-limit", type=float, default=90.0,
help="Motor angle safety limit in degrees (0 = disabled)",
)
args = parser.parse_args()
@@ -374,6 +422,7 @@ def main() -> None:
hold_min_ms=args.hold_min_ms,
hold_max_ms=args.hold_max_ms,
dt=args.dt,
motor_angle_limit_deg=args.motor_angle_limit,
)

View File

@@ -8,20 +8,21 @@ alongside the originals in the robot asset directory.
from __future__ import annotations
import copy
import json
import shutil
import xml.etree.ElementTree as ET
from pathlib import Path
import structlog
import yaml
from src.sysid._urdf import patch_link_inertials
log = structlog.get_logger()
def export_tuned_files(
robot_path: str | Path,
params: dict[str, float],
motor_params: dict[str, float] | None = None,
) -> tuple[Path, Path]:
"""Write tuned URDF and robot.yaml files.
@@ -29,6 +30,8 @@ def export_tuned_files(
----------
robot_path : robot asset directory (contains robot.yaml + *.urdf)
params : dict of parameter name → tuned value (from optimizer)
motor_params : locked motor sysid params (asymmetric model).
If provided, motor joint parameters come from here.
Returns
-------
@@ -43,38 +46,7 @@ def export_tuned_files(
# ── Tune URDF ────────────────────────────────────────────────
tree = ET.parse(urdf_path)
root = tree.getroot()
for link in root.iter("link"):
link_name = link.get("name", "")
inertial = link.find("inertial")
if inertial is None:
continue
if link_name == "arm":
_set_mass(inertial, params.get("arm_mass"))
_set_com(
inertial,
params.get("arm_com_x"),
params.get("arm_com_y"),
params.get("arm_com_z"),
)
elif link_name == "pendulum":
_set_mass(inertial, params.get("pendulum_mass"))
_set_com(
inertial,
params.get("pendulum_com_x"),
params.get("pendulum_com_y"),
params.get("pendulum_com_z"),
)
_set_inertia(
inertial,
ixx=params.get("pendulum_ixx"),
iyy=params.get("pendulum_iyy"),
izz=params.get("pendulum_izz"),
ixy=params.get("pendulum_ixy"),
)
patch_link_inertials(tree.getroot(), params)
# Write tuned URDF.
tuned_urdf_name = urdf_path.stem + "_tuned" + urdf_path.suffix
@@ -91,15 +63,47 @@ def export_tuned_files(
# Point to the tuned URDF.
tuned_cfg["urdf"] = tuned_urdf_name
# Update actuator parameters.
# Update actuator parameters — full asymmetric motor model.
if tuned_cfg.get("actuators") and len(tuned_cfg["actuators"]) > 0:
act = tuned_cfg["actuators"][0]
if "actuator_gear" in params:
act["gear"] = round(params["actuator_gear"], 6)
if "actuator_filter_tau" in params:
act["filter_tau"] = round(params["actuator_filter_tau"], 6)
if "motor_damping" in params:
act["damping"] = round(params["motor_damping"], 6)
if motor_params:
# Asymmetric gear, damping, deadzone, frictionloss as [pos, neg].
gear_pos = motor_params.get("actuator_gear_pos", 0.424)
gear_neg = motor_params.get("actuator_gear_neg", 0.425)
act["gear"] = [round(gear_pos, 6), round(gear_neg, 6)]
damp_pos = motor_params.get("motor_damping_pos", 0.002)
damp_neg = motor_params.get("motor_damping_neg", 0.015)
act["damping"] = [round(damp_pos, 6), round(damp_neg, 6)]
dz_pos = motor_params.get("motor_deadzone_pos", 0.141)
dz_neg = motor_params.get("motor_deadzone_neg", 0.078)
act["deadzone"] = [round(dz_pos, 6), round(dz_neg, 6)]
fl_pos = motor_params.get("motor_frictionloss_pos", 0.057)
fl_neg = motor_params.get("motor_frictionloss_neg", 0.053)
act["frictionloss"] = [round(fl_pos, 6), round(fl_neg, 6)]
if "actuator_filter_tau" in motor_params:
act["filter_tau"] = round(motor_params["actuator_filter_tau"], 6)
if "viscous_quadratic" in motor_params:
act["viscous_quadratic"] = round(motor_params["viscous_quadratic"], 6)
if "back_emf_gain" in motor_params:
act["back_emf_gain"] = round(motor_params["back_emf_gain"], 6)
else:
if "actuator_gear" in params:
act["gear"] = round(params["actuator_gear"], 6)
if "actuator_filter_tau" in params:
act["filter_tau"] = round(params["actuator_filter_tau"], 6)
if "motor_damping" in params:
act["damping"] = round(params["motor_damping"], 6)
if "motor_deadzone" in params:
act["deadzone"] = round(params["motor_deadzone"], 6)
# ctrl_range from ctrl_limit parameter.
if "ctrl_limit" in params:
lim = round(params["ctrl_limit"], 6)
act["ctrl_range"] = [-lim, lim]
# Update joint overrides.
if "joints" not in tuned_cfg:
@@ -108,16 +112,23 @@ def export_tuned_files(
if "motor_joint" not in tuned_cfg["joints"]:
tuned_cfg["joints"]["motor_joint"] = {}
mj = tuned_cfg["joints"]["motor_joint"]
if "motor_armature" in params:
mj["armature"] = round(params["motor_armature"], 6)
if "motor_frictionloss" in params:
mj["frictionloss"] = round(params["motor_frictionloss"], 6)
if motor_params:
mj["armature"] = round(motor_params.get("motor_armature", 0.00277), 6)
# Frictionloss/damping = 0 in MuJoCo (motor model handles via qfrc_applied).
mj["frictionloss"] = 0.0
else:
if "motor_armature" in params:
mj["armature"] = round(params["motor_armature"], 6)
if "motor_frictionloss" in params:
mj["frictionloss"] = round(params["motor_frictionloss"], 6)
if "pendulum_joint" not in tuned_cfg["joints"]:
tuned_cfg["joints"]["pendulum_joint"] = {}
pj = tuned_cfg["joints"]["pendulum_joint"]
if "pendulum_damping" in params:
pj["damping"] = round(params["pendulum_damping"], 6)
if "pendulum_frictionloss" in params:
pj["frictionloss"] = round(params["pendulum_frictionloss"], 6)
# Write tuned robot.yaml.
tuned_yaml_path = robot_path / "robot_tuned.yaml"
@@ -136,51 +147,46 @@ def export_tuned_files(
return tuned_urdf_path, tuned_yaml_path
# ── XML helpers (shared with rollout.py) ─────────────────────────────
# ── CLI entry point ──────────────────────────────────────────────────
def _set_mass(inertial: ET.Element, mass: float | None) -> None:
if mass is None:
return
mass_el = inertial.find("mass")
if mass_el is not None:
mass_el.set("value", str(mass))
def main() -> None:
import argparse
import json
from src.sysid.rollout import LOCKED_MOTOR_PARAMS
parser = argparse.ArgumentParser(
description="Export tuned URDF + robot.yaml from sysid results."
)
parser.add_argument(
"--robot-path", type=str, default="assets/rotary_cartpole",
help="Path to robot asset directory",
)
parser.add_argument(
"--result", type=str, default=None,
help="Path to sysid_result.json (auto-detected if omitted)",
)
args = parser.parse_args()
robot_path = Path(args.robot_path).resolve()
if args.result:
result_path = Path(args.result)
else:
result_path = robot_path / "sysid_result.json"
if not result_path.exists():
raise FileNotFoundError(f"Result file not found: {result_path}")
result = json.loads(result_path.read_text())
export_tuned_files(
robot_path=args.robot_path,
params=result["best_params"],
motor_params=result.get("motor_params", dict(LOCKED_MOTOR_PARAMS)),
)
def _set_com(
inertial: ET.Element,
x: float | None,
y: float | None,
z: float | None,
) -> None:
origin = inertial.find("origin")
if origin is None:
return
xyz = origin.get("xyz", "0 0 0").split()
if x is not None:
xyz[0] = str(x)
if y is not None:
xyz[1] = str(y)
if z is not None:
xyz[2] = str(z)
origin.set("xyz", " ".join(xyz))
if __name__ == "__main__":
main()
def _set_inertia(
inertial: ET.Element,
ixx: float | None = None,
iyy: float | None = None,
izz: float | None = None,
ixy: float | None = None,
iyz: float | None = None,
ixz: float | None = None,
) -> None:
ine = inertial.find("inertia")
if ine is None:
return
for attr, val in [
("ixx", ixx), ("iyy", iyy), ("izz", izz),
("ixy", ixy), ("iyz", iyz), ("ixz", ixz),
]:
if val is not None:
ine.set(attr, str(val))

View File

@@ -0,0 +1,5 @@
"""Motor-only system identification subpackage.
Identifies JGB37-520 DC motor dynamics (no pendulum, no limits)
so the MuJoCo simulation matches the real hardware response.
"""

364
src/sysid/motor/capture.py Normal file
View File

@@ -0,0 +1,364 @@
"""Capture a motor-only trajectory under random excitation (PRBS-style).
Connects to the ESP32 running the simplified sysid firmware (no pendulum,
no limits), sends random PWM commands, and records motor angle + velocity
at ~ 50 Hz.
Firmware serial protocol (115200 baud):
Commands: M<speed>\\n R\\n S\\n G\\n H\\n P\\n
State: S,<millis>,<encoder_count>,<rpm>,<applied_speed>,<enc_vel_cps>\\n
Usage:
python -m src.sysid.motor.capture --duration 20
python -m src.sysid.motor.capture --duration 30 --amplitude 200
"""
from __future__ import annotations
import argparse
import math
import random
import threading
import time
from datetime import datetime
from pathlib import Path
from typing import Any
import numpy as np
import structlog
import yaml
log = structlog.get_logger()
# ── Default asset path ───────────────────────────────────────────────
_DEFAULT_ASSET = "assets/motor"
# ── Serial protocol helpers ──────────────────────────────────────────
def _parse_state_line(line: str) -> dict[str, Any] | None:
"""Parse an ``S,…`` state line from the motor sysid firmware.
Format: S,<millis>,<encoder_count>,<rpm>,<applied_speed>,<enc_vel_cps>
"""
if not line.startswith("S,"):
return None
parts = line.split(",")
if len(parts) < 6:
return None
try:
return {
"timestamp_ms": int(parts[1]),
"encoder_count": int(parts[2]),
"rpm": float(parts[3]),
"applied_speed": int(parts[4]),
"enc_vel_cps": float(parts[5]),
}
except (ValueError, IndexError):
return None
# ── Background serial reader ─────────────────────────────────────────
class _SerialReader:
"""Minimal background reader for the ESP32 serial stream."""
def __init__(self, port: str, baud: int = 115200):
import serial as _serial
self._serial_mod = _serial
self.ser = _serial.Serial(port, baud, timeout=0.05)
time.sleep(2) # Wait for ESP32 boot
self.ser.reset_input_buffer()
self._latest: dict[str, Any] = {}
self._seq: int = 0
self._lock = threading.Lock()
self._cond = threading.Condition(self._lock)
self._running = True
self._thread = threading.Thread(target=self._reader_loop, daemon=True)
self._thread.start()
def _reader_loop(self) -> None:
while self._running:
try:
if self.ser.in_waiting:
line = (
self.ser.readline()
.decode("utf-8", errors="ignore")
.strip()
)
parsed = _parse_state_line(line)
if parsed is not None:
with self._cond:
self._latest = parsed
self._seq += 1
self._cond.notify_all()
elif line and not line.startswith("S,"):
# Log non-state lines (READY, PONG, WARN, etc.)
log.debug("serial_info", line=line)
else:
time.sleep(0.001)
except (OSError, self._serial_mod.SerialException):
log.critical("serial_lost")
break
def send(self, cmd: str) -> None:
try:
self.ser.write(f"{cmd}\n".encode())
except (OSError, self._serial_mod.SerialException):
log.critical("serial_send_failed", cmd=cmd)
def read_blocking(self, timeout: float = 0.1) -> dict[str, Any]:
"""Wait until a *new* state line arrives, then return it."""
with self._cond:
seq_before = self._seq
if not self._cond.wait_for(
lambda: self._seq > seq_before, timeout=timeout
):
return {}
return dict(self._latest)
def close(self) -> None:
self._running = False
self.send("H")
self.send("M0")
time.sleep(0.1)
self._thread.join(timeout=1.0)
self.ser.close()
# ── PRBS excitation signal ───────────────────────────────────────────
class _PRBSExcitation:
"""Random hold-value excitation with configurable amplitude and hold time."""
def __init__(
self,
amplitude: int = 200,
hold_min_ms: int = 50,
hold_max_ms: int = 400,
):
self.amplitude = amplitude
self.hold_min_ms = hold_min_ms
self.hold_max_ms = hold_max_ms
self._current: int = 0
self._switch_time: float = 0.0
self._new_value()
def _new_value(self) -> None:
self._current = random.randint(-self.amplitude, self.amplitude)
hold_ms = random.randint(self.hold_min_ms, self.hold_max_ms)
self._switch_time = time.monotonic() + hold_ms / 1000.0
def __call__(self) -> int:
if time.monotonic() >= self._switch_time:
self._new_value()
return self._current
# ── Main capture loop ────────────────────────────────────────────────
def capture(
asset_path: str | Path = _DEFAULT_ASSET,
port: str = "/dev/cu.usbserial-0001",
baud: int = 115200,
duration: float = 20.0,
amplitude: int = 200,
hold_min_ms: int = 50,
hold_max_ms: int = 400,
dt: float = 0.02,
) -> Path:
"""Run motor-only capture and return the path to the saved .npz file.
Stream-driven: blocks on each firmware state line (~50 Hz),
sends next motor command immediately, records both.
No time.sleep pacing — locked to firmware clock.
The recording stores:
- time: wall-clock seconds since start
- action: normalised action = applied_speed / 255
- motor_angle: shaft angle in radians (from encoder)
- motor_vel: shaft velocity in rad/s (from encoder velocity)
"""
asset_path = Path(asset_path).resolve()
# Load hardware config for encoder conversion.
hw_yaml = asset_path / "hardware.yaml"
if not hw_yaml.exists():
raise FileNotFoundError(f"hardware.yaml not found in {asset_path}")
raw_hw = yaml.safe_load(hw_yaml.read_text())
ppr = raw_hw.get("encoder", {}).get("ppr", 11)
gear_ratio = raw_hw.get("encoder", {}).get("gear_ratio", 30.0)
counts_per_rev: float = ppr * gear_ratio * 4.0
max_pwm = raw_hw.get("motor", {}).get("max_pwm", 255)
log.info(
"hardware_config",
ppr=ppr,
gear_ratio=gear_ratio,
counts_per_rev=counts_per_rev,
max_pwm=max_pwm,
)
# Connect.
reader = _SerialReader(port, baud)
excitation = _PRBSExcitation(amplitude, hold_min_ms, hold_max_ms)
# Prepare recording buffers.
max_samples = int(duration / dt) + 500
rec_time = np.zeros(max_samples, dtype=np.float64)
rec_action = np.zeros(max_samples, dtype=np.float64)
rec_motor_angle = np.zeros(max_samples, dtype=np.float64)
rec_motor_vel = np.zeros(max_samples, dtype=np.float64)
# Reset encoder to zero.
reader.send("R")
time.sleep(0.1)
# Start streaming.
reader.send("G")
time.sleep(0.1)
log.info(
"capture_starting",
port=port,
duration=duration,
amplitude=amplitude,
hold_range_ms=f"{hold_min_ms}{hold_max_ms}",
)
idx = 0
pwm = 0
last_esp_ms = -1
t0 = time.monotonic()
try:
while True:
state = reader.read_blocking(timeout=0.1)
if not state:
continue
# Deduplicate by firmware timestamp.
esp_ms = state.get("timestamp_ms", 0)
if esp_ms == last_esp_ms:
continue
last_esp_ms = esp_ms
elapsed = time.monotonic() - t0
if elapsed >= duration:
break
# Generate next excitation PWM.
pwm = excitation()
# Send command.
reader.send(f"M{pwm}")
# Convert encoder to angle/velocity.
enc = state.get("encoder_count", 0)
motor_angle = enc / counts_per_rev * 2.0 * math.pi
motor_vel = (
state.get("enc_vel_cps", 0.0) / counts_per_rev * 2.0 * math.pi
)
# Use firmware-applied speed for the action.
applied = state.get("applied_speed", 0)
action_norm = applied / 255.0
if idx < max_samples:
rec_time[idx] = elapsed
rec_action[idx] = action_norm
rec_motor_angle[idx] = motor_angle
rec_motor_vel[idx] = motor_vel
idx += 1
else:
break
if idx % 50 == 0:
log.info(
"capture_progress",
elapsed=f"{elapsed:.1f}/{duration:.0f}s",
samples=idx,
pwm=pwm,
angle_deg=f"{math.degrees(motor_angle):.1f}",
vel_rps=f"{motor_vel / (2 * math.pi):.1f}",
)
finally:
reader.send("M0")
reader.close()
# Trim.
rec_time = rec_time[:idx]
rec_action = rec_action[:idx]
rec_motor_angle = rec_motor_angle[:idx]
rec_motor_vel = rec_motor_vel[:idx]
# Save.
recordings_dir = asset_path / "recordings"
recordings_dir.mkdir(exist_ok=True)
stamp = datetime.now().strftime("%Y%m%d_%H%M%S")
out_path = recordings_dir / f"motor_capture_{stamp}.npz"
np.savez_compressed(
out_path,
time=rec_time,
action=rec_action,
motor_angle=rec_motor_angle,
motor_vel=rec_motor_vel,
)
log.info(
"capture_saved",
path=str(out_path),
samples=idx,
duration_actual=f"{rec_time[-1]:.2f}s" if idx > 0 else "0s",
)
return out_path
# ── CLI ──────────────────────────────────────────────────────────────
def main() -> None:
parser = argparse.ArgumentParser(
description="Capture motor-only trajectory for system identification."
)
parser.add_argument(
"--asset-path", type=str, default=_DEFAULT_ASSET,
help="Path to motor asset directory (contains hardware.yaml)",
)
parser.add_argument(
"--port", type=str, default="/dev/cu.usbserial-0001",
help="Serial port for ESP32",
)
parser.add_argument("--baud", type=int, default=115200)
parser.add_argument("--duration", type=float, default=20.0, help="Capture duration (s)")
parser.add_argument(
"--amplitude", type=int, default=200,
help="Max PWM magnitude (0255, firmware allows full range)",
)
parser.add_argument("--hold-min-ms", type=int, default=50, help="PRBS min hold (ms)")
parser.add_argument("--hold-max-ms", type=int, default=400, help="PRBS max hold (ms)")
parser.add_argument("--dt", type=float, default=0.02, help="Nominal sample period (s)")
args = parser.parse_args()
capture(
asset_path=args.asset_path,
port=args.port,
baud=args.baud,
duration=args.duration,
amplitude=args.amplitude,
hold_min_ms=args.hold_min_ms,
hold_max_ms=args.hold_max_ms,
dt=args.dt,
)
if __name__ == "__main__":
main()

186
src/sysid/motor/export.py Normal file
View File

@@ -0,0 +1,186 @@
"""Export tuned motor parameters to MJCF and robot.yaml files.
Reads the original motor.xml and robot.yaml, patches with optimised
parameter values, and writes motor_tuned.xml + robot_tuned.yaml.
Usage:
python -m src.sysid.motor.export \
--asset-path assets/motor \
--result assets/motor/motor_sysid_result.json
"""
from __future__ import annotations
import argparse
import copy
import json
import xml.etree.ElementTree as ET
from pathlib import Path
import structlog
import yaml
log = structlog.get_logger()
_DEFAULT_ASSET = "assets/motor"
def export_tuned_files(
asset_path: str | Path,
params: dict[str, float],
) -> tuple[Path, Path]:
"""Write tuned MJCF and robot.yaml files.
Returns (tuned_mjcf_path, tuned_robot_yaml_path).
"""
asset_path = Path(asset_path).resolve()
robot_yaml_path = asset_path / "robot.yaml"
robot_cfg = yaml.safe_load(robot_yaml_path.read_text())
mjcf_path = asset_path / robot_cfg["mjcf"]
# ── Tune MJCF ────────────────────────────────────────────────
tree = ET.parse(str(mjcf_path))
root = tree.getroot()
# Actuator — use average gear for the MJCF model.
gear_pos = params.get("actuator_gear_pos", params.get("actuator_gear"))
gear_neg = params.get("actuator_gear_neg", params.get("actuator_gear"))
gear_avg = None
if gear_pos is not None and gear_neg is not None:
gear_avg = (gear_pos + gear_neg) / 2.0
elif gear_pos is not None:
gear_avg = gear_pos
filter_tau = params.get("actuator_filter_tau")
for act_el in root.iter("general"):
if act_el.get("name") == "motor":
if gear_avg is not None:
act_el.set("gear", str(gear_avg))
if filter_tau is not None:
if filter_tau > 0:
act_el.set("dyntype", "filter")
act_el.set("dynprm", str(filter_tau))
else:
act_el.set("dyntype", "none")
# Joint — average damping & friction for MJCF (asymmetry in runtime).
fl_pos = params.get("motor_frictionloss_pos", params.get("motor_frictionloss"))
fl_neg = params.get("motor_frictionloss_neg", params.get("motor_frictionloss"))
fl_avg = None
if fl_pos is not None and fl_neg is not None:
fl_avg = (fl_pos + fl_neg) / 2.0
elif fl_pos is not None:
fl_avg = fl_pos
damp_pos = params.get("motor_damping_pos", params.get("motor_damping"))
damp_neg = params.get("motor_damping_neg", params.get("motor_damping"))
damp_avg = None
if damp_pos is not None and damp_neg is not None:
damp_avg = (damp_pos + damp_neg) / 2.0
elif damp_pos is not None:
damp_avg = damp_pos
for jnt in root.iter("joint"):
if jnt.get("name") == "motor_joint":
if damp_avg is not None:
jnt.set("damping", str(damp_avg))
if "motor_armature" in params:
jnt.set("armature", str(params["motor_armature"]))
if fl_avg is not None:
jnt.set("frictionloss", str(fl_avg))
# Rotor mass.
if "rotor_mass" in params:
for geom in root.iter("geom"):
if geom.get("name") == "rotor_disk":
geom.set("mass", str(params["rotor_mass"]))
# Write tuned MJCF.
tuned_mjcf_name = mjcf_path.stem + "_tuned" + mjcf_path.suffix
tuned_mjcf_path = asset_path / tuned_mjcf_name
ET.indent(tree, space=" ")
tree.write(str(tuned_mjcf_path), xml_declaration=True, encoding="unicode")
log.info("tuned_mjcf_written", path=str(tuned_mjcf_path))
# ── Tune robot.yaml ──────────────────────────────────────────
tuned_cfg = copy.deepcopy(robot_cfg)
tuned_cfg["mjcf"] = tuned_mjcf_name
if tuned_cfg.get("actuators") and len(tuned_cfg["actuators"]) > 0:
act = tuned_cfg["actuators"][0]
if gear_avg is not None:
act["gear"] = round(gear_avg, 6)
if "actuator_filter_tau" in params:
act["filter_tau"] = round(params["actuator_filter_tau"], 6)
if "motor_damping" in params:
act["damping"] = round(params["motor_damping"], 6)
if "joints" not in tuned_cfg:
tuned_cfg["joints"] = {}
if "motor_joint" not in tuned_cfg["joints"]:
tuned_cfg["joints"]["motor_joint"] = {}
mj = tuned_cfg["joints"]["motor_joint"]
if "motor_armature" in params:
mj["armature"] = round(params["motor_armature"], 6)
if fl_avg is not None:
mj["frictionloss"] = round(fl_avg, 6)
# Asymmetric / hardware-realism / nonlinear parameters.
realism = {}
for key in [
"actuator_gear_pos", "actuator_gear_neg",
"motor_damping_pos", "motor_damping_neg",
"motor_frictionloss_pos", "motor_frictionloss_neg",
"motor_deadzone_pos", "motor_deadzone_neg",
"action_bias",
"viscous_quadratic", "back_emf_gain",
"stribeck_friction_boost", "stribeck_vel",
"gearbox_backlash",
]:
if key in params:
realism[key] = round(params[key], 6)
if realism:
tuned_cfg["hardware_realism"] = realism
tuned_yaml_path = asset_path / "robot_tuned.yaml"
header = (
"# Tuned motor config — generated by src.sysid.motor.optimize\n"
"# Original: robot.yaml\n\n"
)
tuned_yaml_path.write_text(
header + yaml.dump(tuned_cfg, default_flow_style=False, sort_keys=False)
)
log.info("tuned_robot_yaml_written", path=str(tuned_yaml_path))
return tuned_mjcf_path, tuned_yaml_path
# ── CLI ──────────────────────────────────────────────────────────────
def main() -> None:
parser = argparse.ArgumentParser(
description="Export tuned motor parameters to MJCF + robot.yaml."
)
parser.add_argument("--asset-path", type=str, default=_DEFAULT_ASSET)
parser.add_argument(
"--result", type=str, default=None,
help="Path to motor_sysid_result.json (auto-detected if omitted)",
)
args = parser.parse_args()
asset_path = Path(args.asset_path).resolve()
if args.result:
result_path = Path(args.result)
else:
result_path = asset_path / "motor_sysid_result.json"
if not result_path.exists():
raise FileNotFoundError(f"Result file not found: {result_path}")
result = json.loads(result_path.read_text())
params = result["best_params"]
export_tuned_files(asset_path=args.asset_path, params=params)
if __name__ == "__main__":
main()

367
src/sysid/motor/optimize.py Normal file
View File

@@ -0,0 +1,367 @@
"""CMA-ES optimiser — fit motor simulation parameters to a real recording.
Motor-only version: minimises trajectory-matching cost between MuJoCo
rollout and recorded motor angle + velocity.
Usage:
python -m src.sysid.motor.optimize \
--recording assets/motor/recordings/motor_capture_YYYYMMDD_HHMMSS.npz
# Quick test:
python -m src.sysid.motor.optimize --recording <file>.npz \
--max-generations 20 --population-size 10
"""
from __future__ import annotations
import argparse
import json
import time
from datetime import datetime
from pathlib import Path
import numpy as np
import structlog
from src.sysid.motor.preprocess import recompute_velocity
from src.sysid.motor.rollout import (
MOTOR_PARAMS,
ParamSpec,
bounds_arrays,
defaults_vector,
params_to_dict,
rollout,
windowed_rollout,
)
log = structlog.get_logger()
_DEFAULT_ASSET = "assets/motor"
# ── Cost function ────────────────────────────────────────────────────
def _compute_trajectory_cost(
sim: dict[str, np.ndarray],
recording: dict[str, np.ndarray],
pos_weight: float = 1.0,
vel_weight: float = 0.5,
acc_weight: float = 0.0,
dt: float = 0.02,
) -> float:
"""Weighted MSE between simulated and real motor trajectories.
Motor-only: angle, velocity, and optionally acceleration.
Acceleration is computed as finite-difference of velocity.
"""
angle_err = sim["motor_angle"] - recording["motor_angle"]
vel_err = sim["motor_vel"] - recording["motor_vel"]
# Reject NaN results (unstable simulation).
if np.any(~np.isfinite(angle_err)) or np.any(~np.isfinite(vel_err)):
return 1e6
cost = float(
pos_weight * np.mean(angle_err**2)
+ vel_weight * np.mean(vel_err**2)
)
# Optional acceleration term — penalises wrong dynamics (d(vel)/dt).
if acc_weight > 0 and len(vel_err) > 2:
sim_acc = np.diff(sim["motor_vel"]) / dt
real_acc = np.diff(recording["motor_vel"]) / dt
acc_err = sim_acc - real_acc
if np.any(~np.isfinite(acc_err)):
return 1e6
# Normalise by typical acceleration scale (~50 rad/s²) to keep
# the weight intuitive relative to vel/pos terms.
cost += acc_weight * np.mean(acc_err**2) / (50.0**2)
return cost
def cost_function(
params_vec: np.ndarray,
recording: dict[str, np.ndarray],
asset_path: Path,
specs: list[ParamSpec],
sim_dt: float = 0.002,
substeps: int = 10,
pos_weight: float = 1.0,
vel_weight: float = 0.5,
acc_weight: float = 0.0,
window_duration: float = 0.5,
) -> float:
"""Compute trajectory-matching cost for a candidate parameter vector.
Uses multiple-shooting (windowed rollout) by default.
"""
params = params_to_dict(params_vec, specs)
try:
if window_duration > 0:
sim = windowed_rollout(
asset_path=asset_path,
params=params,
recording=recording,
window_duration=window_duration,
sim_dt=sim_dt,
substeps=substeps,
)
else:
sim = rollout(
asset_path=asset_path,
params=params,
actions=recording["action"],
sim_dt=sim_dt,
substeps=substeps,
)
except Exception as exc:
log.warning("rollout_failed", error=str(exc))
return 1e6
return _compute_trajectory_cost(
sim, recording, pos_weight, vel_weight, acc_weight,
dt=np.mean(np.diff(recording["time"])) if len(recording["time"]) > 1 else 0.02,
)
# ── CMA-ES optimisation loop ────────────────────────────────────────
def optimize(
asset_path: str | Path = _DEFAULT_ASSET,
recording_path: str | Path = "",
specs: list[ParamSpec] | None = None,
sigma0: float = 0.3,
population_size: int = 30,
max_generations: int = 300,
sim_dt: float = 0.002,
substeps: int = 10,
pos_weight: float = 1.0,
vel_weight: float = 0.5,
acc_weight: float = 0.0,
window_duration: float = 0.5,
seed: int = 42,
preprocess_vel: bool = True,
sg_window: int = 7,
sg_polyorder: int = 3,
) -> dict:
"""Run CMA-ES optimisation and return results dict."""
from cmaes import CMA
asset_path = Path(asset_path).resolve()
recording_path = Path(recording_path).resolve()
if specs is None:
specs = MOTOR_PARAMS
# Load recording.
recording = dict(np.load(recording_path))
n_samples = len(recording["time"])
duration = recording["time"][-1] - recording["time"][0]
n_windows = max(1, int(duration / window_duration)) if window_duration > 0 else 1
log.info(
"recording_loaded",
path=str(recording_path),
samples=n_samples,
duration=f"{duration:.1f}s",
n_windows=n_windows,
)
# Preprocess velocity: replace noisy firmware finite-difference with
# smooth Savitzky-Golay derivative of the angle signal.
if preprocess_vel:
recording = recompute_velocity(
recording,
window_length=sg_window,
polyorder=sg_polyorder,
)
# Normalise to [0, 1] for CMA-ES.
lo, hi = bounds_arrays(specs)
x0 = defaults_vector(specs)
span = hi - lo
span[span == 0] = 1.0
def to_normed(x: np.ndarray) -> np.ndarray:
return (x - lo) / span
def from_normed(x_n: np.ndarray) -> np.ndarray:
return x_n * span + lo
x0_normed = to_normed(x0)
bounds_normed = np.column_stack(
[np.zeros(len(specs)), np.ones(len(specs))]
)
optimizer = CMA(
mean=x0_normed,
sigma=sigma0,
bounds=bounds_normed,
population_size=population_size,
seed=seed,
)
best_cost = float("inf")
best_params_vec = x0.copy()
history: list[tuple[int, float]] = []
log.info(
"cmaes_starting",
n_params=len(specs),
population=population_size,
max_gens=max_generations,
sigma0=sigma0,
)
t0 = time.monotonic()
for gen in range(max_generations):
solutions = []
for _ in range(optimizer.population_size):
x_normed = optimizer.ask()
x_natural = from_normed(x_normed)
x_natural = np.clip(x_natural, lo, hi)
c = cost_function(
x_natural,
recording,
asset_path,
specs,
sim_dt=sim_dt,
substeps=substeps,
pos_weight=pos_weight,
vel_weight=vel_weight,
acc_weight=acc_weight,
window_duration=window_duration,
)
solutions.append((x_normed, c))
if c < best_cost:
best_cost = c
best_params_vec = x_natural.copy()
optimizer.tell(solutions)
history.append((gen, best_cost))
elapsed = time.monotonic() - t0
if gen % 5 == 0 or gen == max_generations - 1:
log.info(
"cmaes_generation",
gen=gen,
best_cost=f"{best_cost:.6f}",
elapsed=f"{elapsed:.1f}s",
gen_best=f"{min(c for _, c in solutions):.6f}",
)
total_time = time.monotonic() - t0
best_params = params_to_dict(best_params_vec, specs)
log.info(
"cmaes_finished",
best_cost=f"{best_cost:.6f}",
total_time=f"{total_time:.1f}s",
evaluations=max_generations * population_size,
)
# Log parameter comparison.
defaults = params_to_dict(defaults_vector(specs), specs)
for name in best_params:
d = defaults[name]
b = best_params[name]
change_pct = ((b - d) / abs(d) * 100) if abs(d) > 1e-12 else 0.0
log.info(
"param_result",
name=name,
default=f"{d:.6g}",
tuned=f"{b:.6g}",
change=f"{change_pct:+.1f}%",
)
return {
"best_params": best_params,
"best_cost": best_cost,
"history": history,
"recording": str(recording_path),
"param_names": [s.name for s in specs],
"defaults": {s.name: s.default for s in specs},
"timestamp": datetime.now().isoformat(),
}
# ── CLI ──────────────────────────────────────────────────────────────
def main() -> None:
parser = argparse.ArgumentParser(
description="Fit motor simulation parameters to a real recording (CMA-ES)."
)
parser.add_argument(
"--asset-path", type=str, default=_DEFAULT_ASSET,
help="Path to motor asset directory",
)
parser.add_argument(
"--recording", type=str, required=True,
help="Path to .npz recording file",
)
parser.add_argument("--sigma0", type=float, default=0.3)
parser.add_argument("--population-size", type=int, default=30)
parser.add_argument("--max-generations", type=int, default=300)
parser.add_argument("--sim-dt", type=float, default=0.002)
parser.add_argument("--substeps", type=int, default=10)
parser.add_argument("--pos-weight", type=float, default=1.0)
parser.add_argument("--vel-weight", type=float, default=0.5)
parser.add_argument("--acc-weight", type=float, default=0.0,
help="Weight for acceleration matching (0=off, try 0.1-0.5)")
parser.add_argument("--window-duration", type=float, default=0.5)
parser.add_argument("--seed", type=int, default=42)
parser.add_argument(
"--no-preprocess-vel", action="store_true",
help="Disable Savitzky-Golay velocity preprocessing",
)
parser.add_argument("--sg-window", type=int, default=7,
help="Savitzky-Golay window length (odd, default 7 = 140ms)")
parser.add_argument("--sg-polyorder", type=int, default=3,
help="Savitzky-Golay polynomial order (default 3 = cubic)")
args = parser.parse_args()
result = optimize(
asset_path=args.asset_path,
recording_path=args.recording,
sigma0=args.sigma0,
population_size=args.population_size,
max_generations=args.max_generations,
sim_dt=args.sim_dt,
substeps=args.substeps,
pos_weight=args.pos_weight,
vel_weight=args.vel_weight,
acc_weight=args.acc_weight,
window_duration=args.window_duration,
seed=args.seed,
preprocess_vel=not args.no_preprocess_vel,
sg_window=args.sg_window,
sg_polyorder=args.sg_polyorder,
)
# Save results JSON.
asset_path = Path(args.asset_path).resolve()
result_path = asset_path / "motor_sysid_result.json"
result_json = {k: v for k, v in result.items() if k != "history"}
result_json["history_summary"] = {
"first_cost": result["history"][0][1] if result["history"] else None,
"final_cost": result["history"][-1][1] if result["history"] else None,
"generations": len(result["history"]),
}
result_path.write_text(json.dumps(result_json, indent=2, default=str))
log.info("results_saved", path=str(result_path))
# Export tuned files.
from src.sysid.motor.export import export_tuned_files
export_tuned_files(asset_path=args.asset_path, params=result["best_params"])
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,114 @@
"""Recording preprocessing — clean velocity estimation from angle data.
The ESP32 firmware computes velocity as a raw finite-difference of encoder
counts at 50 Hz. With a 1320 CPR encoder that gives ~0.24 rad/s of
quantisation noise per count. This module replaces the noisy firmware
velocity with a smooth differentiation of the (much cleaner) angle signal.
Method: Savitzky-Golay filter applied to the angle signal, then
differentiated analytically. Zero phase lag, preserves transients well.
This is standard practice in robotics sysid — see e.g. MATLAB System ID
Toolbox, Drake's trajectory processing, or ETH's ANYmal sysid pipeline.
"""
from __future__ import annotations
import numpy as np
from scipy.signal import savgol_filter
import structlog
log = structlog.get_logger()
def recompute_velocity(
recording: dict[str, np.ndarray],
window_length: int = 7,
polyorder: int = 3,
deriv: int = 1,
) -> dict[str, np.ndarray]:
"""Recompute motor_vel from motor_angle using Savitzky-Golay differentiation.
Parameters
----------
recording : dict with at least 'time', 'motor_angle', 'motor_vel' keys.
window_length : SG filter window (must be odd, > polyorder).
7 samples at 50 Hz = 140ms window — good balance of smoothness
and responsiveness. Captures dynamics up to ~7 Hz.
polyorder : Polynomial order for the SG filter (3 = cubic).
deriv : Derivative order (1 = first derivative = velocity).
Returns
-------
New recording dict with 'motor_vel' replaced and 'motor_vel_raw' added.
"""
rec = dict(recording) # shallow copy
times = rec["time"]
angles = rec["motor_angle"]
dt = np.mean(np.diff(times))
# Keep original for diagnostics.
rec["motor_vel_raw"] = rec["motor_vel"].copy()
# Savitzky-Golay derivative: fits a polynomial to each window,
# then takes the analytical derivative → smooth, zero phase lag.
vel_sg = savgol_filter(
angles,
window_length=window_length,
polyorder=polyorder,
deriv=deriv,
delta=dt,
)
# Compute stats for logging.
raw_vel = rec["motor_vel_raw"]
noise_estimate = np.std(raw_vel - vel_sg)
max_diff = np.max(np.abs(raw_vel - vel_sg))
log.info(
"velocity_recomputed",
method="savgol",
window=window_length,
polyorder=polyorder,
dt=f"{dt*1000:.1f}ms",
noise_std=f"{noise_estimate:.3f} rad/s",
max_diff=f"{max_diff:.3f} rad/s",
raw_rms=f"{np.sqrt(np.mean(raw_vel**2)):.3f}",
sg_rms=f"{np.sqrt(np.mean(vel_sg**2)):.3f}",
)
rec["motor_vel"] = vel_sg
return rec
def smooth_velocity(
recording: dict[str, np.ndarray],
cutoff_hz: float = 10.0,
) -> dict[str, np.ndarray]:
"""Alternative: apply zero-phase Butterworth low-pass to motor_vel.
Simpler than SG derivative but introduces slight edge effects.
"""
from scipy.signal import butter, filtfilt
rec = dict(recording)
rec["motor_vel_raw"] = rec["motor_vel"].copy()
dt = np.mean(np.diff(rec["time"]))
fs = 1.0 / dt
nyq = fs / 2.0
norm_cutoff = min(cutoff_hz / nyq, 0.99)
b, a = butter(2, norm_cutoff, btype="low")
rec["motor_vel"] = filtfilt(b, a, rec["motor_vel"])
log.info(
"velocity_smoothed",
method="butterworth",
cutoff_hz=cutoff_hz,
fs=fs,
)
return rec

460
src/sysid/motor/rollout.py Normal file
View File

@@ -0,0 +1,460 @@
"""Deterministic simulation replay — roll out recorded actions in MuJoCo.
Motor-only version: single hinge joint, no pendulum.
Given a parameter vector and recorded actions, builds a MuJoCo model
with overridden dynamics, replays the actions, and returns the simulated
motor angle + velocity for comparison with the real recording.
"""
from __future__ import annotations
import dataclasses
import xml.etree.ElementTree as ET
from pathlib import Path
import mujoco
import numpy as np
import yaml
# ── Tunable parameter specification ──────────────────────────────────
@dataclasses.dataclass
class ParamSpec:
"""Specification for a single tunable parameter."""
name: str
default: float
lower: float
upper: float
log_scale: bool = False
# Motor-only parameters to identify.
# These capture the full transfer function: PWM → shaft angle/velocity.
#
# Asymmetric parameters (pos/neg suffix) capture real-world differences
# between CW and CCW rotation caused by gear mesh, brush contact,
# and H-bridge asymmetry.
MOTOR_PARAMS: list[ParamSpec] = [
# ── Actuator ─────────────────────────────────────────────────
# gear_pos/neg: effective torque per unit ctrl, split by direction.
# Real motors + L298N often have different drive strength per direction.
ParamSpec("actuator_gear_pos", 0.064, 0.005, 0.5, log_scale=True),
ParamSpec("actuator_gear_neg", 0.064, 0.005, 0.5, log_scale=True),
# filter_tau: first-order electrical/driver time constant (s).
# Lower bound 1ms — L298N PWM switching is very fast.
ParamSpec("actuator_filter_tau", 0.03, 0.001, 0.20),
# ── Joint dynamics ───────────────────────────────────────────
# damping_pos/neg: viscous friction (back-EMF), split by direction.
ParamSpec("motor_damping_pos", 0.003, 1e-5, 0.1, log_scale=True),
ParamSpec("motor_damping_neg", 0.003, 1e-5, 0.1, log_scale=True),
# armature: reflected rotor inertia (kg·m²).
ParamSpec("motor_armature", 0.0001, 1e-6, 0.01, log_scale=True),
# frictionloss_pos/neg: Coulomb friction, split by velocity direction.
ParamSpec("motor_frictionloss_pos", 0.03, 0.001, 0.2, log_scale=True),
ParamSpec("motor_frictionloss_neg", 0.03, 0.001, 0.2, log_scale=True),
# ── Nonlinear dynamics ───────────────────────────────────────
# viscous_quadratic: velocity-squared drag term (N·m·s²/rad²).
# Captures nonlinear friction that increases at high speed (air drag,
# grease viscosity, etc.). Opposes motion.
ParamSpec("viscous_quadratic", 0.0, 0.0, 0.005),
# back_emf_gain: torque reduction proportional to |vel × ctrl|.
# Models the back-EMF effect: at high speed the motor produces less
# torque because the voltage drop across the armature is smaller.
ParamSpec("back_emf_gain", 0.0, 0.0, 0.05),
# stribeck_vel: characteristic velocity below which Coulomb friction
# is boosted (Stribeck effect). 0 = standard Coulomb only.
ParamSpec("stribeck_friction_boost", 0.0, 0.0, 0.15),
ParamSpec("stribeck_vel", 2.0, 0.1, 8.0),
# ── Rotor load ───────────────────────────────────────────────
ParamSpec("rotor_mass", 0.012, 0.002, 0.05, log_scale=True),
# ── Hardware realism ─────────────────────────────────────────
# deadzone_pos/neg: minimum |action| per direction.
ParamSpec("motor_deadzone_pos", 0.08, 0.0, 0.30),
ParamSpec("motor_deadzone_neg", 0.08, 0.0, 0.30),
# action_bias: constant offset added to ctrl (H-bridge asymmetry).
ParamSpec("action_bias", 0.0, -0.10, 0.10),
# ── Gearbox backlash ─────────────────────────────────────────
# backlash_halfwidth: half the angular deadband (rad) in the gearbox.
# When the motor reverses, the shaft doesn't move until the backlash
# gap is taken up. Typical for 30:1 plastic/metal spur gears.
ParamSpec("gearbox_backlash", 0.0, 0.0, 0.15),
]
def params_to_dict(
values: np.ndarray, specs: list[ParamSpec] | None = None
) -> dict[str, float]:
if specs is None:
specs = MOTOR_PARAMS
return {s.name: float(values[i]) for i, s in enumerate(specs)}
def defaults_vector(specs: list[ParamSpec] | None = None) -> np.ndarray:
if specs is None:
specs = MOTOR_PARAMS
return np.array([s.default for s in specs], dtype=np.float64)
def bounds_arrays(
specs: list[ParamSpec] | None = None,
) -> tuple[np.ndarray, np.ndarray]:
if specs is None:
specs = MOTOR_PARAMS
lo = np.array([s.lower for s in specs], dtype=np.float64)
hi = np.array([s.upper for s in specs], dtype=np.float64)
return lo, hi
# ── MuJoCo model building with parameter overrides ──────────────────
def _build_model(
asset_path: Path,
params: dict[str, float],
) -> mujoco.MjModel:
"""Build a MuJoCo model from motor.xml with parameter overrides.
Parses the MJCF, patches actuator/joint/body parameters, reloads.
"""
asset_path = Path(asset_path).resolve()
robot_cfg = yaml.safe_load((asset_path / "robot.yaml").read_text())
mjcf_path = asset_path / robot_cfg["mjcf"]
tree = ET.parse(str(mjcf_path))
root = tree.getroot()
# ── Actuator overrides ───────────────────────────────────────
# Use average of pos/neg gear for MuJoCo (asymmetry handled in ctrl).
gear_pos = params.get("actuator_gear_pos", params.get("actuator_gear", 0.064))
gear_neg = params.get("actuator_gear_neg", params.get("actuator_gear", 0.064))
gear = (gear_pos + gear_neg) / 2.0
filter_tau = params.get("actuator_filter_tau", 0.03)
for act_el in root.iter("general"):
if act_el.get("name") == "motor":
act_el.set("gear", str(gear))
if filter_tau > 0:
act_el.set("dyntype", "filter")
act_el.set("dynprm", str(filter_tau))
else:
act_el.set("dyntype", "none")
act_el.set("dynprm", "1")
# ── Joint overrides ──────────────────────────────────────────
# Damping and friction are asymmetric + nonlinear → applied manually.
# Set MuJoCo damping & frictionloss to 0; we handle them in qfrc_applied.
armature = params.get("motor_armature", 0.0001)
for jnt in root.iter("joint"):
if jnt.get("name") == "motor_joint":
jnt.set("damping", "0")
jnt.set("armature", str(armature))
jnt.set("frictionloss", "0")
# ── Rotor mass override ──────────────────────────────────────
rotor_mass = params.get("rotor_mass", 0.012)
for geom in root.iter("geom"):
if geom.get("name") == "rotor_disk":
geom.set("mass", str(rotor_mass))
# Write temp file and load.
tmp_path = asset_path / "_tmp_motor_sysid.xml"
try:
tree.write(str(tmp_path), xml_declaration=True, encoding="unicode")
model = mujoco.MjModel.from_xml_path(str(tmp_path))
finally:
tmp_path.unlink(missing_ok=True)
return model
# ── Action + asymmetry transforms ────────────────────────────────────
def _transform_action(
action: float,
params: dict[str, float],
) -> float:
"""Apply bias, direction-dependent deadzone, and gear scaling.
The MuJoCo actuator has the *average* gear ratio. We rescale the
control signal so that ``ctrl × gear_avg ≈ action × gear_dir``.
"""
# Constant bias (H-bridge asymmetry).
action = action + params.get("action_bias", 0.0)
# Direction-dependent deadzone.
dz_pos = params.get("motor_deadzone_pos", params.get("motor_deadzone", 0.08))
dz_neg = params.get("motor_deadzone_neg", params.get("motor_deadzone", 0.08))
if action >= 0 and action < dz_pos:
return 0.0
if action < 0 and action > -dz_neg:
return 0.0
# Direction-dependent gear scaling.
# MuJoCo model uses gear_avg; we rescale ctrl to get the right torque.
gear_pos = params.get("actuator_gear_pos", params.get("actuator_gear", 0.064))
gear_neg = params.get("actuator_gear_neg", params.get("actuator_gear", 0.064))
gear_avg = (gear_pos + gear_neg) / 2.0
if gear_avg < 1e-8:
return 0.0
gear_dir = gear_pos if action >= 0 else gear_neg
return action * (gear_dir / gear_avg)
def _apply_forces(
data: mujoco.MjData,
vel: float,
ctrl: float,
params: dict[str, float],
backlash_state: list[float] | None = None,
) -> None:
"""Apply all manual forces: asymmetric friction, damping, and nonlinear terms.
Everything that MuJoCo can't represent with its symmetric joint model
is injected here via ``qfrc_applied``.
Forces applied (all oppose motion or reduce torque):
1. Asymmetric Coulomb friction (with Stribeck boost at low speed)
2. Asymmetric viscous damping
3. Quadratic velocity drag
4. Back-EMF torque reduction (proportional to |vel|)
Backlash:
If backlash_state is provided, it is a 1-element list [gap_pos].
gap_pos tracks the motor's position within the backlash deadband.
When inside the gap, no actuator torque is transmitted to the
output shaft — only friction forces act.
"""
torque = 0.0
# ── Gearbox backlash ──────────────────────────────────────────
# Model: the gear teeth have play of 2×halfwidth radians.
# We track where the motor is within that gap. When at the
# edge (contact), actuator torque passes through normally.
# When inside the gap, no actuator torque is transmitted.
backlash_hw = params.get("gearbox_backlash", 0.0)
actuator_torque_scale = 1.0 # 1.0 = full contact, 0.0 = in gap
if backlash_hw > 0 and backlash_state is not None:
# gap_pos: how far into the backlash gap we are.
# Range: [-backlash_hw, +backlash_hw]
# At ±backlash_hw, gears are in contact and torque transmits.
gap = backlash_state[0]
# Update gap position based on velocity.
dt_sub = data.model.opt.timestep
gap += vel * dt_sub
# Clamp to backlash range.
if gap > backlash_hw:
gap = backlash_hw
elif gap < -backlash_hw:
gap = -backlash_hw
backlash_state[0] = gap
# If not at contact edge, no torque transmitted.
if abs(gap) < backlash_hw - 1e-8:
actuator_torque_scale = 0.0
else:
actuator_torque_scale = 1.0
# ── 1. Coulomb friction (direction-dependent + Stribeck) ─────
fl_pos = params.get("motor_frictionloss_pos", params.get("motor_frictionloss", 0.03))
fl_neg = params.get("motor_frictionloss_neg", params.get("motor_frictionloss", 0.03))
stribeck_boost = params.get("stribeck_friction_boost", 0.0)
stribeck_vel = params.get("stribeck_vel", 2.0)
if abs(vel) > 1e-6:
fl = fl_pos if vel > 0 else fl_neg
# Stribeck: boost friction at low speed. Exponential decay.
if stribeck_boost > 0 and stribeck_vel > 0:
fl = fl * (1.0 + stribeck_boost * np.exp(-abs(vel) / stribeck_vel))
# Coulomb: constant magnitude, opposes motion.
torque -= np.sign(vel) * fl
# ── 2. Asymmetric viscous damping ────────────────────────────
damp_pos = params.get("motor_damping_pos", params.get("motor_damping", 0.003))
damp_neg = params.get("motor_damping_neg", params.get("motor_damping", 0.003))
damp = damp_pos if vel > 0 else damp_neg
torque -= damp * vel
# ── 3. Quadratic velocity drag ───────────────────────────────
visc_quad = params.get("viscous_quadratic", 0.0)
if visc_quad > 0:
torque -= visc_quad * vel * abs(vel)
# ── 4. Back-EMF torque reduction ─────────────────────────────
# At high speed, the motor's effective torque is reduced because
# back-EMF opposes the supply voltage. Modelled as a torque that
# opposes the control signal proportional to speed.
bemf = params.get("back_emf_gain", 0.0)
if bemf > 0 and abs(ctrl) > 1e-6:
# The reduction should oppose the actuator torque direction.
torque -= bemf * vel * np.sign(ctrl) * actuator_torque_scale
# ── 5. Scale actuator contribution by backlash state ─────────
# When in the backlash gap, MuJoCo's actuator force should not
# transmit. We cancel it by applying an opposing force.
if actuator_torque_scale < 1.0:
# The actuator_force from MuJoCo will be applied by mj_step.
# We need to counteract it. data.qfrc_actuator isn't set yet
# at this point (pre-step), so we zero the ctrl instead.
# This is handled in the rollout loop by zeroing ctrl.
pass
data.qfrc_applied[0] = max(-10.0, min(10.0, torque))
return actuator_torque_scale
# ── Simulation rollout ───────────────────────────────────────────────
def rollout(
asset_path: str | Path,
params: dict[str, float],
actions: np.ndarray,
sim_dt: float = 0.002,
substeps: int = 10,
) -> dict[str, np.ndarray]:
"""Open-loop replay of recorded actions.
Parameters
----------
asset_path : motor asset directory
params : named parameter overrides
actions : (N,) normalised actions [-1, 1] from the recording
sim_dt : MuJoCo physics timestep
substeps : physics substeps per control step (ctrl_dt = sim_dt × substeps)
Returns
-------
dict with motor_angle (N,) and motor_vel (N,).
"""
asset_path = Path(asset_path).resolve()
model = _build_model(asset_path, params)
model.opt.timestep = sim_dt
data = mujoco.MjData(model)
mujoco.mj_resetData(model, data)
n = len(actions)
sim_motor_angle = np.zeros(n, dtype=np.float64)
sim_motor_vel = np.zeros(n, dtype=np.float64)
# Backlash state: [gap_position]. Starts at 0 (centered in gap).
backlash_state = [0.0]
for i in range(n):
ctrl = _transform_action(actions[i], params)
data.ctrl[0] = ctrl
for _ in range(substeps):
scale = _apply_forces(data, data.qvel[0], ctrl, params, backlash_state)
# If in backlash gap, zero ctrl so actuator torque doesn't transmit.
if scale < 1.0:
data.ctrl[0] = 0.0
else:
data.ctrl[0] = ctrl
mujoco.mj_step(model, data)
# Bail out on NaN/instability.
if not np.isfinite(data.qpos[0]) or abs(data.qvel[0]) > 1e4:
sim_motor_angle[i:] = np.nan
sim_motor_vel[i:] = np.nan
break
sim_motor_angle[i] = data.qpos[0]
sim_motor_vel[i] = data.qvel[0]
return {
"motor_angle": sim_motor_angle,
"motor_vel": sim_motor_vel,
}
def windowed_rollout(
asset_path: str | Path,
params: dict[str, float],
recording: dict[str, np.ndarray],
window_duration: float = 0.5,
sim_dt: float = 0.002,
substeps: int = 10,
) -> dict[str, np.ndarray]:
"""Multiple-shooting rollout for motor-only sysid.
Splits the recording into short windows. Each window is initialised
from the real motor state, preventing error accumulation.
Returns
-------
dict with motor_angle (N,), motor_vel (N,), n_windows (int).
"""
asset_path = Path(asset_path).resolve()
model = _build_model(asset_path, params)
model.opt.timestep = sim_dt
data = mujoco.MjData(model)
times = recording["time"]
actions = recording["action"]
real_angle = recording["motor_angle"]
real_vel = recording["motor_vel"]
n = len(actions)
sim_motor_angle = np.zeros(n, dtype=np.float64)
sim_motor_vel = np.zeros(n, dtype=np.float64)
# Compute window boundaries.
t0 = times[0]
t_end = times[-1]
window_starts: list[int] = []
current_t = t0
while current_t < t_end:
idx = int(np.searchsorted(times, current_t))
idx = min(idx, n - 1)
window_starts.append(idx)
current_t += window_duration
n_windows = len(window_starts)
for w, w_start in enumerate(window_starts):
w_end = window_starts[w + 1] if w + 1 < n_windows else n
# Init from real state at window start.
mujoco.mj_resetData(model, data)
data.qpos[0] = real_angle[w_start]
data.qvel[0] = real_vel[w_start]
data.ctrl[:] = 0.0
mujoco.mj_forward(model, data)
# Backlash state resets each window (assume contact at start).
backlash_state = [0.0]
for i in range(w_start, w_end):
ctrl = _transform_action(actions[i], params)
data.ctrl[0] = ctrl
for _ in range(substeps):
scale = _apply_forces(data, data.qvel[0], ctrl, params, backlash_state)
if scale < 1.0:
data.ctrl[0] = 0.0
else:
data.ctrl[0] = ctrl
mujoco.mj_step(model, data)
# Bail out on NaN/instability — fill rest of window with last good.
if not np.isfinite(data.qpos[0]) or abs(data.qvel[0]) > 1e4:
sim_motor_angle[i:w_end] = sim_motor_angle[max(i-1, w_start)]
sim_motor_vel[i:w_end] = 0.0
break
sim_motor_angle[i] = data.qpos[0]
sim_motor_vel[i] = data.qvel[0]
return {
"motor_angle": sim_motor_angle,
"motor_vel": sim_motor_vel,
"n_windows": n_windows,
}

View File

@@ -0,0 +1,204 @@
"""Visualise motor system identification — real vs simulated trajectories.
Usage:
python -m src.sysid.motor.visualize \
--recording assets/motor/recordings/motor_capture_YYYYMMDD_HHMMSS.npz
# With tuned result:
python -m src.sysid.motor.visualize \
--recording <file>.npz \
--result assets/motor/motor_sysid_result.json
"""
from __future__ import annotations
import argparse
import json
from pathlib import Path
import numpy as np
import structlog
log = structlog.get_logger()
_DEFAULT_ASSET = "assets/motor"
def visualize(
asset_path: str | Path = _DEFAULT_ASSET,
recording_path: str | Path = "",
result_path: str | Path | None = None,
sim_dt: float = 0.002,
substeps: int = 10,
window_duration: float = 0.5,
save_path: str | Path | None = None,
show: bool = True,
) -> None:
"""Generate 3-panel comparison plot: angle, velocity, action."""
import matplotlib.pyplot as plt
from src.sysid.motor.rollout import (
MOTOR_PARAMS,
defaults_vector,
params_to_dict,
rollout,
windowed_rollout,
)
asset_path = Path(asset_path).resolve()
recording = dict(np.load(recording_path))
t = recording["time"]
actions = recording["action"]
# ── Simulate with default parameters ─────────────────────────
default_params = params_to_dict(defaults_vector(MOTOR_PARAMS), MOTOR_PARAMS)
log.info("simulating_default_params", windowed=window_duration > 0)
if window_duration > 0:
sim_default = windowed_rollout(
asset_path=asset_path,
params=default_params,
recording=recording,
window_duration=window_duration,
sim_dt=sim_dt,
substeps=substeps,
)
else:
sim_default = rollout(
asset_path=asset_path,
params=default_params,
actions=actions,
sim_dt=sim_dt,
substeps=substeps,
)
# ── Simulate with tuned parameters (if available) ────────────
sim_tuned = None
tuned_cost = None
if result_path is not None:
result_path = Path(result_path)
else:
# Auto-detect.
auto = asset_path / "motor_sysid_result.json"
if auto.exists():
result_path = auto
if result_path is not None and result_path.exists():
result = json.loads(result_path.read_text())
tuned_params = result.get("best_params", {})
tuned_cost = result.get("best_cost")
log.info("simulating_tuned_params", cost=tuned_cost)
if window_duration > 0:
sim_tuned = windowed_rollout(
asset_path=asset_path,
params=tuned_params,
recording=recording,
window_duration=window_duration,
sim_dt=sim_dt,
substeps=substeps,
)
else:
sim_tuned = rollout(
asset_path=asset_path,
params=tuned_params,
actions=actions,
sim_dt=sim_dt,
substeps=substeps,
)
# ── Plot ─────────────────────────────────────────────────────
fig, axes = plt.subplots(3, 1, figsize=(14, 8), sharex=True)
# Motor angle.
ax = axes[0]
ax.plot(t, np.degrees(recording["motor_angle"]), "k-", lw=1.2, alpha=0.8, label="Real")
ax.plot(t, np.degrees(sim_default["motor_angle"]), "--", color="#d62728", lw=1.0, alpha=0.7, label="Sim (default)")
if sim_tuned is not None:
ax.plot(t, np.degrees(sim_tuned["motor_angle"]), "--", color="#2ca02c", lw=1.0, alpha=0.7, label="Sim (tuned)")
ax.set_ylabel("Motor Angle (°)")
ax.legend(loc="upper right", fontsize=8)
ax.grid(True, alpha=0.3)
# Motor velocity.
ax = axes[1]
ax.plot(t, recording["motor_vel"], "k-", lw=1.2, alpha=0.8, label="Real")
ax.plot(t, sim_default["motor_vel"], "--", color="#d62728", lw=1.0, alpha=0.7, label="Sim (default)")
if sim_tuned is not None:
ax.plot(t, sim_tuned["motor_vel"], "--", color="#2ca02c", lw=1.0, alpha=0.7, label="Sim (tuned)")
ax.set_ylabel("Motor Velocity (rad/s)")
ax.legend(loc="upper right", fontsize=8)
ax.grid(True, alpha=0.3)
# Action.
ax = axes[2]
ax.plot(t, actions, "b-", lw=0.8, alpha=0.6)
ax.set_ylabel("Action (norm)")
ax.set_xlabel("Time (s)")
ax.grid(True, alpha=0.3)
ax.set_ylim(-1.1, 1.1)
# Title.
title = "Motor System Identification — Real vs Simulated"
if tuned_cost is not None:
from src.sysid.motor.optimize import cost_function
orig_cost = cost_function(
defaults_vector(MOTOR_PARAMS),
recording,
asset_path,
MOTOR_PARAMS,
sim_dt=sim_dt,
substeps=substeps,
window_duration=window_duration,
)
title += f"\nDefault cost: {orig_cost:.4f} → Tuned cost: {tuned_cost:.4f}"
improvement = (1.0 - tuned_cost / orig_cost) * 100 if orig_cost > 0 else 0
title += f" ({improvement:+.1f}%)"
fig.suptitle(title, fontsize=12)
plt.tight_layout()
if save_path:
fig.savefig(str(save_path), dpi=150, bbox_inches="tight")
log.info("figure_saved", path=str(save_path))
if show:
plt.show()
else:
plt.close(fig)
# ── CLI ──────────────────────────────────────────────────────────────
def main() -> None:
parser = argparse.ArgumentParser(
description="Visualise motor system identification results."
)
parser.add_argument("--asset-path", type=str, default=_DEFAULT_ASSET)
parser.add_argument("--recording", type=str, required=True, help=".npz file")
parser.add_argument("--result", type=str, default=None, help="sysid result JSON")
parser.add_argument("--sim-dt", type=float, default=0.002)
parser.add_argument("--substeps", type=int, default=10)
parser.add_argument("--window-duration", type=float, default=0.5)
parser.add_argument("--save", type=str, default=None, help="Save figure path")
parser.add_argument("--no-show", action="store_true")
args = parser.parse_args()
visualize(
asset_path=args.asset_path,
recording_path=args.recording,
result_path=args.result,
sim_dt=args.sim_dt,
substeps=args.substeps,
window_duration=args.window_duration,
save_path=args.save,
show=not args.no_show,
)
if __name__ == "__main__":
main()

View File

@@ -4,10 +4,15 @@ Minimises the trajectory-matching cost between a MuJoCo rollout and a
recorded real-robot sequence. Uses the ``cmaes`` package (pure-Python
CMA-ES with native box-constraint support).
Motor parameters are **locked** from the motor-only sysid — only
pendulum/arm inertial parameters, joint dynamics, and ctrl_limit are
optimised. Velocities are optionally preprocessed with Savitzky-Golay
differentiation for cleaner targets.
Usage:
python -m src.sysid.optimize \
--robot-path assets/rotary_cartpole \
--recording assets/rotary_cartpole/recordings/capture_20260311_120000.npz
--recording assets/rotary_cartpole/recordings/capture_20260314_000435.npz
# Shorter run for testing:
python -m src.sysid.optimize \
@@ -28,6 +33,8 @@ import numpy as np
import structlog
from src.sysid.rollout import (
LOCKED_MOTOR_PARAMS,
PARAM_SETS,
ROTARY_CARTPOLE_PARAMS,
ParamSpec,
bounds_arrays,
@@ -40,6 +47,65 @@ from src.sysid.rollout import (
log = structlog.get_logger()
# ── Velocity preprocessing ───────────────────────────────────────────
def _preprocess_recording(
recording: dict[str, np.ndarray],
preprocess_vel: bool = True,
sg_window: int = 7,
sg_polyorder: int = 3,
) -> dict[str, np.ndarray]:
"""Optionally recompute velocities using Savitzky-Golay differentiation.
Applies SG filtering to both motor_vel and pendulum_vel, replacing
the noisy firmware finite-difference velocities with smooth
analytical derivatives of the (clean) angle signals.
"""
if not preprocess_vel:
return recording
from scipy.signal import savgol_filter
rec = dict(recording)
times = rec["time"]
dt = float(np.mean(np.diff(times)))
# Motor velocity.
rec["motor_vel_raw"] = rec["motor_vel"].copy()
rec["motor_vel"] = savgol_filter(
rec["motor_angle"],
window_length=sg_window,
polyorder=sg_polyorder,
deriv=1,
delta=dt,
)
# Pendulum velocity.
rec["pendulum_vel_raw"] = rec["pendulum_vel"].copy()
rec["pendulum_vel"] = savgol_filter(
rec["pendulum_angle"],
window_length=sg_window,
polyorder=sg_polyorder,
deriv=1,
delta=dt,
)
motor_noise = np.std(rec["motor_vel_raw"] - rec["motor_vel"])
pend_noise = np.std(rec["pendulum_vel_raw"] - rec["pendulum_vel"])
log.info(
"velocity_preprocessed",
method="savgol",
sg_window=sg_window,
sg_polyorder=sg_polyorder,
dt_ms=f"{dt*1000:.1f}",
motor_noise_std=f"{motor_noise:.3f} rad/s",
pend_noise_std=f"{pend_noise:.3f} rad/s",
)
return rec
# ── Cost function ────────────────────────────────────────────────────
@@ -63,18 +129,35 @@ def _compute_trajectory_cost(
recording: dict[str, np.ndarray],
pos_weight: float = 1.0,
vel_weight: float = 0.1,
pendulum_scale: float = 3.0,
vel_outlier_threshold: float = 20.0,
) -> float:
"""Weighted MSE between sim and real trajectories."""
"""Weighted MSE between sim and real trajectories.
pendulum_scale multiplies the pendulum terms relative to motor terms.
Samples where the *real* pendulum velocity exceeds
``vel_outlier_threshold`` (rad/s) are excluded from the velocity
terms. These are encoder-wrap artefacts (pendulum swinging past
vertical) that no simulator can reproduce.
"""
motor_err = _angle_diff(sim["motor_angle"], recording["motor_angle"])
pend_err = _angle_diff(sim["pendulum_angle"], recording["pendulum_angle"])
motor_vel_err = sim["motor_vel"] - recording["motor_vel"]
pend_vel_err = sim["pendulum_vel"] - recording["pendulum_vel"]
# Mask out encoder-wrap velocity spikes so the optimizer doesn't
# waste capacity fitting artefacts.
valid = np.abs(recording["pendulum_vel"]) < vel_outlier_threshold
if valid.sum() < len(valid):
motor_vel_err = motor_vel_err[valid]
pend_vel_err = pend_vel_err[valid]
return float(
pos_weight * np.mean(motor_err**2)
+ pos_weight * np.mean(pend_err**2)
+ pos_weight * pendulum_scale * np.mean(pend_err**2)
+ vel_weight * np.mean(motor_vel_err**2)
+ vel_weight * np.mean(pend_vel_err**2)
+ vel_weight * pendulum_scale * np.mean(pend_vel_err**2)
)
@@ -87,7 +170,9 @@ def cost_function(
substeps: int = 10,
pos_weight: float = 1.0,
vel_weight: float = 0.1,
pendulum_scale: float = 3.0,
window_duration: float = 0.5,
motor_params: dict[str, float] | None = None,
) -> float:
"""Compute trajectory-matching cost for a candidate parameter vector.
@@ -113,21 +198,56 @@ def cost_function(
window_duration=window_duration,
sim_dt=sim_dt,
substeps=substeps,
motor_params=motor_params,
)
else:
sim = rollout(
robot_path=robot_path,
params=params,
actions=recording["action"],
timesteps=recording["time"],
sim_dt=sim_dt,
substeps=substeps,
motor_params=motor_params,
)
except Exception as exc:
log.warning("rollout_failed", error=str(exc))
return 1e6
return _compute_trajectory_cost(sim, recording, pos_weight, vel_weight)
# Check for NaN in sim output.
for key in ("motor_angle", "motor_vel", "pendulum_angle", "pendulum_vel"):
if np.any(~np.isfinite(sim[key])):
return 1e6
return _compute_trajectory_cost(sim, recording, pos_weight, vel_weight, pendulum_scale)
# ── Parallel evaluation helper (module-level for pickling) ───────────
# Shared state set by the parent process before spawning workers.
_par_recording: dict[str, np.ndarray] = {}
_par_robot_path: Path = Path(".")
_par_specs: list[ParamSpec] = []
_par_kwargs: dict = {}
def _init_worker(recording, robot_path, specs, kwargs):
"""Initialiser run once per worker process."""
global _par_recording, _par_robot_path, _par_specs, _par_kwargs
_par_recording = recording
_par_robot_path = robot_path
_par_specs = specs
_par_kwargs = kwargs
def _eval_candidate(x_natural: np.ndarray) -> float:
"""Evaluate a single candidate — called in worker processes."""
return cost_function(
x_natural,
_par_recording,
_par_robot_path,
_par_specs,
**_par_kwargs,
)
# ── CMA-ES optimisation loop ────────────────────────────────────────
@@ -144,17 +264,25 @@ def optimize(
substeps: int = 10,
pos_weight: float = 1.0,
vel_weight: float = 0.1,
pendulum_scale: float = 3.0,
window_duration: float = 0.5,
seed: int = 42,
preprocess_vel: bool = True,
sg_window: int = 7,
sg_polyorder: int = 3,
) -> dict:
"""Run CMA-ES optimisation and return results.
Motor parameters are locked from the motor-only sysid.
Only pendulum/arm parameters are optimised.
Returns a dict with:
best_params: dict[str, float]
best_cost: float
history: list of (generation, best_cost) tuples
recording: str (path used)
specs: list of param names
motor_params: dict of locked motor params
"""
from cmaes import CMA
@@ -164,8 +292,24 @@ def optimize(
if specs is None:
specs = ROTARY_CARTPOLE_PARAMS
motor_params = dict(LOCKED_MOTOR_PARAMS)
log.info(
"motor_params_locked",
n_params=len(motor_params),
gear_avg=f"{(motor_params['actuator_gear_pos'] + motor_params['actuator_gear_neg']) / 2:.4f}",
)
# Load recording.
recording = dict(np.load(recording_path))
# Preprocessing: SG velocity recomputation.
recording = _preprocess_recording(
recording,
preprocess_vel=preprocess_vel,
sg_window=sg_window,
sg_polyorder=sg_polyorder,
)
n_samples = len(recording["time"])
duration = recording["time"][-1] - recording["time"][0]
n_windows = max(1, int(duration / window_duration)) if window_duration > 0 else 1
@@ -219,28 +363,54 @@ def optimize(
t0 = time.monotonic()
# ── Parallel evaluation setup ────────────────────────────────
# Each candidate is independent — evaluate them in parallel using
# a process pool. Falls back to sequential if n_workers=1.
import multiprocessing as mp
n_workers = max(1, mp.cpu_count() - 1) # leave 1 core free
eval_kwargs = dict(
sim_dt=sim_dt,
substeps=substeps,
pos_weight=pos_weight,
vel_weight=vel_weight,
pendulum_scale=pendulum_scale,
window_duration=window_duration,
motor_params=motor_params,
)
log.info("parallel_workers", n_workers=n_workers)
# Create a persistent pool (avoids per-generation fork overhead).
pool = None
if n_workers > 1:
pool = mp.Pool(
n_workers,
initializer=_init_worker,
initargs=(recording, robot_path, specs, eval_kwargs),
)
for gen in range(max_generations):
solutions = []
# Ask all candidates first.
candidates_normed = []
candidates_natural = []
for _ in range(optimizer.population_size):
x_normed = optimizer.ask()
x_natural = from_normed(x_normed)
# Clip to bounds (CMA-ES can slightly exceed with sampling noise).
x_natural = np.clip(x_natural, lo, hi)
candidates_normed.append(x_normed)
candidates_natural.append(x_natural)
c = cost_function(
x_natural,
recording,
robot_path,
specs,
sim_dt=sim_dt,
substeps=substeps,
pos_weight=pos_weight,
vel_weight=vel_weight,
window_duration=window_duration,
)
solutions.append((x_normed, c))
# Evaluate in parallel.
if pool is not None:
costs = pool.map(_eval_candidate, candidates_natural)
else:
costs = [cost_function(
x, recording, robot_path, specs, **eval_kwargs
) for x in candidates_natural]
solutions = list(zip(candidates_normed, costs))
for x_natural, c in zip(candidates_natural, costs):
if c < best_cost:
best_cost = c
best_params_vec = x_natural.copy()
@@ -259,6 +429,12 @@ def optimize(
)
total_time = time.monotonic() - t0
# Clean up process pool.
if pool is not None:
pool.close()
pool.join()
best_params = params_to_dict(best_params_vec, specs)
log.info(
@@ -289,6 +465,8 @@ def optimize(
"recording": str(recording_path),
"param_names": [s.name for s in specs],
"defaults": {s.name: s.default for s in specs},
"motor_params": motor_params,
"preprocess_vel": preprocess_vel,
"timestamp": datetime.now().isoformat(),
}
@@ -319,6 +497,8 @@ def main() -> None:
parser.add_argument("--substeps", type=int, default=10)
parser.add_argument("--pos-weight", type=float, default=1.0)
parser.add_argument("--vel-weight", type=float, default=0.1)
parser.add_argument("--pendulum-scale", type=float, default=3.0,
help="Multiplier for pendulum terms relative to motor (default 3.0)")
parser.add_argument(
"--window-duration",
type=float,
@@ -331,11 +511,30 @@ def main() -> None:
action="store_true",
help="Skip exporting tuned files (results JSON only)",
)
parser.add_argument(
"--no-preprocess-vel",
action="store_true",
help="Skip Savitzky-Golay velocity preprocessing",
)
parser.add_argument("--sg-window", type=int, default=7,
help="Savitzky-Golay window length (odd, default 7)")
parser.add_argument("--sg-polyorder", type=int, default=3,
help="Savitzky-Golay polynomial order (default 3)")
parser.add_argument(
"--param-set",
type=str,
default="full",
choices=list(PARAM_SETS.keys()),
help="Parameter set to optimize: 'reduced' (6 params, fast) or 'full' (15 params)",
)
args = parser.parse_args()
specs = PARAM_SETS[args.param_set]
result = optimize(
robot_path=args.robot_path,
recording_path=args.recording,
specs=specs,
sigma0=args.sigma0,
population_size=args.population_size,
max_generations=args.max_generations,
@@ -343,8 +542,12 @@ def main() -> None:
substeps=args.substeps,
pos_weight=args.pos_weight,
vel_weight=args.vel_weight,
pendulum_scale=args.pendulum_scale,
window_duration=args.window_duration,
seed=args.seed,
preprocess_vel=not args.no_preprocess_vel,
sg_window=args.sg_window,
sg_polyorder=args.sg_polyorder,
)
# Save results JSON.
@@ -369,6 +572,7 @@ def main() -> None:
export_tuned_files(
robot_path=args.robot_path,
params=result["best_params"],
motor_params=result.get("motor_params"),
)

View File

@@ -6,22 +6,51 @@ the simulated trajectory for comparison with the real recording.
This module is the inner loop of the CMA-ES optimizer: it is called once
per candidate parameter vector per generation.
Motor parameters are **locked** from the motor-only sysid result.
The optimizer only fits
pendulum/arm inertial parameters, pendulum joint dynamics, and
``ctrl_limit``. The asymmetric motor model (deadzone, gear compensation,
Coulomb friction, viscous damping, quadratic drag, back-EMF) is applied
via ``ActuatorConfig.transform_ctrl()`` and ``compute_motor_force()``.
"""
from __future__ import annotations
import copy
import dataclasses
import math
import os
import tempfile
import xml.etree.ElementTree as ET
from pathlib import Path
from typing import Any
import mujoco
import numpy as np
import yaml
from src.core.robot import ActuatorConfig, JointConfig, RobotConfig
from src.runners.mujoco import ActuatorLimits, load_mujoco_model
from src.sysid._urdf import patch_link_inertials
# ── Locked motor parameters (from motor-only sysid) ─────────────────
# These are FIXED and not optimised. They come from the 12-param model
# in robot.yaml (from motor-only sysid, cost 0.862).
LOCKED_MOTOR_PARAMS: dict[str, float] = {
"actuator_gear_pos": 0.424182,
"actuator_gear_neg": 0.425031,
"actuator_filter_tau": 0.00503506,
"motor_damping_pos": 0.00202682,
"motor_damping_neg": 0.0146651,
"motor_armature": 0.00277342,
"motor_frictionloss_pos": 0.0573282,
"motor_frictionloss_neg": 0.0533549,
"viscous_quadratic": 0.000285329,
"back_emf_gain": 0.00675809,
"motor_deadzone_pos": 0.141291,
"motor_deadzone_neg": 0.0780148,
}
# ── Tunable parameter specification ──────────────────────────────────
@@ -37,33 +66,58 @@ class ParamSpec:
log_scale: bool = False # optimise in log-space (masses, inertias)
# Default parameter specs for the rotary cartpole.
# Pendulum sysid parameters — motor params are LOCKED (not here).
# Order matters: the optimizer maps a flat vector to these specs.
# Defaults are from the URDF exported by Fusion 360.
ROTARY_CARTPOLE_PARAMS: list[ParamSpec] = [
# ── Arm link (URDF) ──────────────────────────────────────────
ParamSpec("arm_mass", 0.010, 0.003, 0.05, log_scale=True),
ParamSpec("arm_com_x", 0.00005, -0.02, 0.02),
ParamSpec("arm_com_y", 0.0065, -0.01, 0.02),
ParamSpec("arm_com_z", 0.00563, -0.01, 0.02),
ParamSpec("arm_mass", 0.02110, 0.005, 0.08, log_scale=True),
ParamSpec("arm_com_x", -0.00710, -0.03, 0.03),
ParamSpec("arm_com_y", 0.00085, -0.02, 0.02),
ParamSpec("arm_com_z", 0.00795, -0.02, 0.02),
# ── Pendulum link (URDF) ─────────────────────────────────────
ParamSpec("pendulum_mass", 0.015, 0.005, 0.05, log_scale=True),
ParamSpec("pendulum_com_x", 0.1583, 0.05, 0.25),
ParamSpec("pendulum_com_y", -0.0983, -0.20, 0.0),
ParamSpec("pendulum_com_z", 0.0, -0.05, 0.05),
ParamSpec("pendulum_ixx", 6.16e-06, 1e-07, 1e-04, log_scale=True),
ParamSpec("pendulum_iyy", 6.16e-06, 1e-07, 1e-04, log_scale=True),
ParamSpec("pendulum_izz", 1.23e-05, 1e-07, 1e-04, log_scale=True),
ParamSpec("pendulum_ixy", 6.10e-06, -1e-04, 1e-04),
# ── Actuator / joint dynamics (robot.yaml) ───────────────────
ParamSpec("actuator_gear", 0.064, 0.01, 0.2, log_scale=True),
ParamSpec("actuator_filter_tau", 0.03, 0.005, 0.15),
ParamSpec("motor_damping", 0.003, 1e-4, 0.05, log_scale=True),
ParamSpec("pendulum_damping", 0.0001, 1e-5, 0.01, log_scale=True),
ParamSpec("motor_armature", 0.0001, 1e-5, 0.01, log_scale=True),
ParamSpec("motor_frictionloss", 0.03, 0.001, 0.1, log_scale=True),
ParamSpec("pendulum_mass", 0.03937, 0.010, 0.10, log_scale=True),
ParamSpec("pendulum_com_x", 0.06025, 0.01, 0.15),
ParamSpec("pendulum_com_y", -0.07602, -0.20, 0.0),
ParamSpec("pendulum_com_z", -0.00346, -0.05, 0.05),
ParamSpec("pendulum_ixx", 6.20e-05, 1e-07, 1e-03, log_scale=True),
ParamSpec("pendulum_iyy", 3.70e-05, 1e-07, 1e-03, log_scale=True),
ParamSpec("pendulum_izz", 7.83e-05, 1e-07, 1e-03, log_scale=True),
ParamSpec("pendulum_ixy", -6.93e-06, -1e-03, 1e-03),
# ── Pendulum joint dynamics ──────────────────────────────────
ParamSpec("pendulum_damping", 0.0001, 1e-6, 0.05, log_scale=True),
ParamSpec("pendulum_frictionloss", 0.0001, 1e-6, 0.05, log_scale=True),
# ── Hardware realism (control pipeline) ────────────────────
ParamSpec("ctrl_limit", 0.588, 0.45, 0.70), # MAX_MOTOR_SPEED / 255
]
# Extended set: full params + motor armature (compensates for the
# motor-only sysid having captured arm/pendulum loading in armature).
EXTENDED_PARAMS: list[ParamSpec] = ROTARY_CARTPOLE_PARAMS + [
ParamSpec("motor_armature", 0.00277, 0.0005, 0.02, log_scale=True),
]
# Reduced set: only the 6 most impactful pendulum parameters.
# Good for a fast first pass — converges in ~50 generations.
REDUCED_PARAMS: list[ParamSpec] = [
ParamSpec("pendulum_mass", 0.03937, 0.010, 0.10, log_scale=True),
ParamSpec("pendulum_com_x", 0.06025, 0.01, 0.15),
ParamSpec("pendulum_com_y", -0.07602, -0.20, 0.0),
ParamSpec("pendulum_ixx", 6.20e-05, 1e-07, 1e-03, log_scale=True),
ParamSpec("pendulum_damping", 0.0001, 1e-6, 0.05, log_scale=True),
ParamSpec("pendulum_frictionloss", 0.0001, 1e-6, 0.05, log_scale=True),
]
PARAM_SETS: dict[str, list[ParamSpec]] = {
"full": ROTARY_CARTPOLE_PARAMS,
"extended": EXTENDED_PARAMS,
"reduced": REDUCED_PARAMS,
}
def params_to_dict(
values: np.ndarray, specs: list[ParamSpec] | None = None
) -> dict[str, float]:
@@ -97,182 +151,93 @@ def bounds_arrays(
def _build_model(
robot_path: Path,
params: dict[str, float],
) -> mujoco.MjModel:
"""Build a MuJoCo model from URDF + robot.yaml with parameter overrides.
motor_params: dict[str, float] | None = None,
) -> tuple[mujoco.MjModel, ActuatorConfig]:
"""Build a MuJoCo model with sysid overrides.
Follows the same two-step approach as ``MuJoCoRunner._load_model()``:
1. Parse URDF, inject meshdir, load into MuJoCo
2. Export MJCF, inject actuators + joint overrides + param overrides, reload
Returns (model, actuator) — use ``actuator.transform_ctrl()`` and
``actuator.compute_motor_force()`` in the rollout loop.
"""
if motor_params is None:
motor_params = LOCKED_MOTOR_PARAMS
robot_path = Path(robot_path).resolve()
# ── Patch URDF inertial parameters to a temp file ────────────
robot_yaml = yaml.safe_load((robot_path / "robot.yaml").read_text())
urdf_path = robot_path / robot_yaml["urdf"]
# ── Step 1: Load URDF ────────────────────────────────────────
tree = ET.parse(urdf_path)
root = tree.getroot()
patch_link_inertials(tree.getroot(), params)
# Inject meshdir compiler directive.
meshdir = None
for mesh_el in root.iter("mesh"):
fn = mesh_el.get("filename", "")
parent = str(Path(fn).parent)
if parent and parent != ".":
meshdir = parent
break
if meshdir:
mj_ext = ET.SubElement(root, "mujoco")
ET.SubElement(
mj_ext, "compiler", attrib={"meshdir": meshdir, "balanceinertia": "true"}
)
fd, tmp_urdf = tempfile.mkstemp(
suffix=".urdf", prefix="_sysid_", dir=str(robot_path),
)
os.close(fd)
tmp_urdf_path = Path(tmp_urdf)
tree.write(str(tmp_urdf_path), xml_declaration=True, encoding="unicode")
# Override URDF inertial parameters BEFORE MuJoCo loading.
for link in root.iter("link"):
link_name = link.get("name", "")
inertial = link.find("inertial")
if inertial is None:
continue
# ── Build RobotConfig with full motor sysid values ───────────
gear_pos = motor_params.get("actuator_gear_pos", 0.424182)
gear_neg = motor_params.get("actuator_gear_neg", 0.425031)
motor_armature = params.get(
"motor_armature",
motor_params.get("motor_armature", 0.00277342),
)
pend_damping = params.get("pendulum_damping", 0.0001)
pend_frictionloss = params.get("pendulum_frictionloss", 0.0001)
if link_name == "arm":
_set_mass(inertial, params.get("arm_mass"))
_set_com(
inertial,
params.get("arm_com_x"),
params.get("arm_com_y"),
params.get("arm_com_z"),
)
act_cfg = robot_yaml["actuators"][0]
ctrl_lo, ctrl_hi = act_cfg.get("ctrl_range", [-1.0, 1.0])
elif link_name == "pendulum":
_set_mass(inertial, params.get("pendulum_mass"))
_set_com(
inertial,
params.get("pendulum_com_x"),
params.get("pendulum_com_y"),
params.get("pendulum_com_z"),
)
_set_inertia(
inertial,
ixx=params.get("pendulum_ixx"),
iyy=params.get("pendulum_iyy"),
izz=params.get("pendulum_izz"),
ixy=params.get("pendulum_ixy"),
)
actuator = ActuatorConfig(
joint=act_cfg["joint"],
type="motor",
gear=(gear_pos, gear_neg),
ctrl_range=(ctrl_lo, ctrl_hi),
deadzone=(
motor_params.get("motor_deadzone_pos", 0.141),
motor_params.get("motor_deadzone_neg", 0.078),
),
damping=(
motor_params.get("motor_damping_pos", 0.002),
motor_params.get("motor_damping_neg", 0.015),
),
frictionloss=(
motor_params.get("motor_frictionloss_pos", 0.057),
motor_params.get("motor_frictionloss_neg", 0.053),
),
filter_tau=motor_params.get("actuator_filter_tau", 0.005),
viscous_quadratic=motor_params.get("viscous_quadratic", 0.000285),
back_emf_gain=motor_params.get("back_emf_gain", 0.00676),
)
robot = RobotConfig(
urdf_path=tmp_urdf_path,
actuators=[actuator],
joints={
"motor_joint": JointConfig(
damping=0.0,
armature=motor_armature,
frictionloss=0.0,
),
"pendulum_joint": JointConfig(
damping=pend_damping,
frictionloss=pend_frictionloss,
),
},
)
# Write temp URDF and load.
tmp_urdf = robot_path / "_tmp_sysid_load.urdf"
tree.write(str(tmp_urdf), xml_declaration=True, encoding="unicode")
try:
model_raw = mujoco.MjModel.from_xml_path(str(tmp_urdf))
model = load_mujoco_model(robot)
finally:
tmp_urdf.unlink(missing_ok=True)
tmp_urdf_path.unlink(missing_ok=True)
# ── Step 2: Export MJCF, inject actuators + overrides ────────
tmp_mjcf = robot_path / "_tmp_sysid_inject.xml"
try:
mujoco.mj_saveLastXML(str(tmp_mjcf), model_raw)
mjcf_root = ET.fromstring(tmp_mjcf.read_text())
# Actuator.
gear = params.get("actuator_gear", robot_yaml["actuators"][0].get("gear", 0.064))
filter_tau = params.get(
"actuator_filter_tau",
robot_yaml["actuators"][0].get("filter_tau", 0.03),
)
act_cfg = robot_yaml["actuators"][0]
ctrl_lo, ctrl_hi = act_cfg.get("ctrl_range", [-1.0, 1.0])
act_elem = ET.SubElement(mjcf_root, "actuator")
attribs: dict[str, str] = {
"name": f"{act_cfg['joint']}_motor",
"joint": act_cfg["joint"],
"gear": str(gear),
"ctrlrange": f"{ctrl_lo} {ctrl_hi}",
}
if filter_tau > 0:
attribs["dyntype"] = "filter"
attribs["dynprm"] = str(filter_tau)
attribs["gaintype"] = "fixed"
attribs["biastype"] = "none"
ET.SubElement(act_elem, "general", attrib=attribs)
else:
ET.SubElement(act_elem, "motor", attrib=attribs)
# Joint overrides.
motor_damping = params.get("motor_damping", 0.003)
pend_damping = params.get("pendulum_damping", 0.0001)
motor_armature = params.get("motor_armature", 0.0001)
motor_frictionloss = params.get("motor_frictionloss", 0.03)
for body in mjcf_root.iter("body"):
for jnt in body.findall("joint"):
name = jnt.get("name")
if name == "motor_joint":
jnt.set("damping", str(motor_damping))
jnt.set("armature", str(motor_armature))
jnt.set("frictionloss", str(motor_frictionloss))
elif name == "pendulum_joint":
jnt.set("damping", str(pend_damping))
# Disable self-collision.
for geom in mjcf_root.iter("geom"):
geom.set("contype", "0")
geom.set("conaffinity", "0")
modified_xml = ET.tostring(mjcf_root, encoding="unicode")
tmp_mjcf.write_text(modified_xml)
model = mujoco.MjModel.from_xml_path(str(tmp_mjcf))
finally:
tmp_mjcf.unlink(missing_ok=True)
return model
return model, actuator
def _set_mass(inertial: ET.Element, mass: float | None) -> None:
if mass is None:
return
mass_el = inertial.find("mass")
if mass_el is not None:
mass_el.set("value", str(mass))
def _set_com(
inertial: ET.Element,
x: float | None,
y: float | None,
z: float | None,
) -> None:
origin = inertial.find("origin")
if origin is None:
return
xyz = origin.get("xyz", "0 0 0").split()
if x is not None:
xyz[0] = str(x)
if y is not None:
xyz[1] = str(y)
if z is not None:
xyz[2] = str(z)
origin.set("xyz", " ".join(xyz))
def _set_inertia(
inertial: ET.Element,
ixx: float | None = None,
iyy: float | None = None,
izz: float | None = None,
ixy: float | None = None,
iyz: float | None = None,
ixz: float | None = None,
) -> None:
ine = inertial.find("inertia")
if ine is None:
return
for attr, val in [
("ixx", ixx), ("iyy", iyy), ("izz", izz),
("ixy", ixy), ("iyz", iyz), ("ixz", ixz),
]:
if val is not None:
ine.set(attr, str(val))
# ── Simulation rollout ───────────────────────────────────────────────
@@ -281,72 +246,58 @@ def rollout(
robot_path: str | Path,
params: dict[str, float],
actions: np.ndarray,
timesteps: np.ndarray,
sim_dt: float = 0.002,
substeps: int = 10,
motor_params: dict[str, float] | None = None,
) -> dict[str, np.ndarray]:
"""Replay recorded actions in MuJoCo with overridden parameters.
Parameters
----------
robot_path : asset directory
params : named parameter overrides
params : named parameter overrides (pendulum/arm only)
actions : (N,) normalised actions [-1, 1] from the recording
timesteps : (N,) wall-clock times (seconds) from the recording
sim_dt : MuJoCo physics timestep
substeps : physics substeps per control step
motor_params : locked motor params (default: LOCKED_MOTOR_PARAMS)
Returns
-------
dict with keys: motor_angle, motor_vel, pendulum_angle, pendulum_vel
Each is an (N,) numpy array of simulated values.
"""
if motor_params is None:
motor_params = LOCKED_MOTOR_PARAMS
robot_path = Path(robot_path).resolve()
model = _build_model(robot_path, params)
model, actuator = _build_model(robot_path, params, motor_params)
model.opt.timestep = sim_dt
data = mujoco.MjData(model)
# Start from pendulum hanging down (qpos=0 is down per URDF convention).
mujoco.mj_resetData(model, data)
# Control dt derived from actual recording sample rate.
n = len(actions)
ctrl_dt = sim_dt * substeps
ctrl_limit = params.get("ctrl_limit", 0.588)
# Pre-allocate output.
sim_motor_angle = np.zeros(n, dtype=np.float64)
sim_motor_vel = np.zeros(n, dtype=np.float64)
sim_pend_angle = np.zeros(n, dtype=np.float64)
sim_pend_vel = np.zeros(n, dtype=np.float64)
# Extract actuator limit info for software limit switch.
nu = model.nu
if nu > 0:
jnt_id = model.actuator_trnid[0, 0]
jnt_limited = bool(model.jnt_limited[jnt_id])
jnt_lo = model.jnt_range[jnt_id, 0]
jnt_hi = model.jnt_range[jnt_id, 1]
gear_sign = float(np.sign(model.actuator_gear[0, 0]))
else:
jnt_limited = False
jnt_lo = jnt_hi = gear_sign = 0.0
limits = ActuatorLimits(model)
for i in range(n):
data.ctrl[0] = actions[i]
action = max(-ctrl_limit, min(ctrl_limit, float(actions[i])))
ctrl = actuator.transform_ctrl(action)
data.ctrl[0] = ctrl
for _ in range(substeps):
# Software limit switch (mirrors MuJoCoRunner).
if jnt_limited and nu > 0:
pos = data.qpos[jnt_id]
if pos >= jnt_hi and gear_sign * data.ctrl[0] > 0:
data.ctrl[0] = 0.0
elif pos <= jnt_lo and gear_sign * data.ctrl[0] < 0:
data.ctrl[0] = 0.0
limits.enforce(model, data)
data.qfrc_applied[0] = actuator.compute_motor_force(data.qvel[0], ctrl)
mujoco.mj_step(model, data)
sim_motor_angle[i] = data.qpos[0]
sim_motor_vel[i] = data.qvel[0]
sim_pend_angle[i] = data.qpos[1]
sim_motor_vel[i] = data.qvel[0]
sim_pend_vel[i] = data.qvel[1]
return {
@@ -364,6 +315,7 @@ def windowed_rollout(
window_duration: float = 0.5,
sim_dt: float = 0.002,
substeps: int = 10,
motor_params: dict[str, float] | None = None,
) -> dict[str, np.ndarray | float]:
"""Multiple-shooting rollout — split recording into short windows.
@@ -372,18 +324,19 @@ def windowed_rollout(
2. Replay the recorded actions within the window.
3. Record the simulated output.
This prevents error accumulation across the full trajectory, giving
a much smoother cost landscape for the optimizer.
Motor dynamics (asymmetric friction, damping, back-EMF, etc.) are
applied via qfrc_applied using the locked motor sysid parameters.
Parameters
----------
robot_path : asset directory
params : named parameter overrides
params : named parameter overrides (pendulum/arm only)
recording : dict with keys time, action, motor_angle, motor_vel,
pendulum_angle, pendulum_vel (all 1D arrays of length N)
window_duration : length of each shooting window in seconds
sim_dt : MuJoCo physics timestep
substeps : physics substeps per control step
motor_params : locked motor params (default: LOCKED_MOTOR_PARAMS)
Returns
-------
@@ -392,8 +345,11 @@ def windowed_rollout(
(stitched from per-window simulations)
n_windows — number of windows used
"""
if motor_params is None:
motor_params = LOCKED_MOTOR_PARAMS
robot_path = Path(robot_path).resolve()
model = _build_model(robot_path, params)
model, actuator = _build_model(robot_path, params, motor_params)
model.opt.timestep = sim_dt
data = mujoco.MjData(model)
@@ -405,67 +361,50 @@ def windowed_rollout(
real_pend_vel = recording["pendulum_vel"]
n = len(actions)
# Pre-allocate output (stitched from all windows).
sim_motor_angle = np.zeros(n, dtype=np.float64)
sim_motor_vel = np.zeros(n, dtype=np.float64)
sim_pend_angle = np.zeros(n, dtype=np.float64)
sim_pend_vel = np.zeros(n, dtype=np.float64)
# Extract actuator limit info.
nu = model.nu
if nu > 0:
jnt_id = model.actuator_trnid[0, 0]
jnt_limited = bool(model.jnt_limited[jnt_id])
jnt_lo = model.jnt_range[jnt_id, 0]
jnt_hi = model.jnt_range[jnt_id, 1]
gear_sign = float(np.sign(model.actuator_gear[0, 0]))
else:
jnt_limited = False
jnt_lo = jnt_hi = gear_sign = 0.0
limits = ActuatorLimits(model)
# Compute window boundaries from recording timestamps.
t0 = times[0]
t_end = times[-1]
window_starts: list[int] = [] # indices into the recording
window_starts: list[int] = []
current_t = t0
while current_t < t_end:
# Find the index closest to current_t.
idx = int(np.searchsorted(times, current_t))
idx = min(idx, n - 1)
window_starts.append(idx)
current_t += window_duration
ctrl_limit = params.get("ctrl_limit", 0.588)
n_windows = len(window_starts)
for w, w_start in enumerate(window_starts):
# Window end: next window start, or end of recording.
w_end = window_starts[w + 1] if w + 1 < n_windows else n
# Initialize MuJoCo state from real data at window start.
mujoco.mj_resetData(model, data)
data.qpos[0] = real_motor[w_start]
data.qpos[1] = real_pend[w_start]
data.qvel[0] = real_motor_vel[w_start]
data.qvel[1] = real_pend_vel[w_start]
data.ctrl[:] = 0.0
# Forward kinematics to make state consistent.
mujoco.mj_forward(model, data)
for i in range(w_start, w_end):
data.ctrl[0] = actions[i]
action = max(-ctrl_limit, min(ctrl_limit, float(actions[i])))
ctrl = actuator.transform_ctrl(action)
data.ctrl[0] = ctrl
for _ in range(substeps):
if jnt_limited and nu > 0:
pos = data.qpos[jnt_id]
if pos >= jnt_hi and gear_sign * data.ctrl[0] > 0:
data.ctrl[0] = 0.0
elif pos <= jnt_lo and gear_sign * data.ctrl[0] < 0:
data.ctrl[0] = 0.0
limits.enforce(model, data)
data.qfrc_applied[0] = actuator.compute_motor_force(data.qvel[0], ctrl)
mujoco.mj_step(model, data)
sim_motor_angle[i] = data.qpos[0]
sim_motor_vel[i] = data.qvel[0]
sim_pend_angle[i] = data.qpos[1]
sim_motor_vel[i] = data.qvel[0]
sim_pend_vel[i] = data.qvel[1]
return {

View File

@@ -20,7 +20,6 @@ from __future__ import annotations
import argparse
import json
import math
from pathlib import Path
import numpy as np
@@ -29,6 +28,30 @@ import structlog
log = structlog.get_logger()
def _run_sim(
robot_path: Path,
params: dict[str, float],
recording: dict[str, np.ndarray],
window_duration: float,
sim_dt: float,
substeps: int,
motor_params: dict[str, float],
) -> dict[str, np.ndarray]:
"""Run windowed or open-loop rollout depending on window_duration."""
from src.sysid.rollout import rollout, windowed_rollout
if window_duration > 0:
return windowed_rollout(
robot_path=robot_path, params=params, recording=recording,
window_duration=window_duration, sim_dt=sim_dt,
substeps=substeps, motor_params=motor_params,
)
return rollout(
robot_path=robot_path, params=params, actions=recording["action"],
substeps=substeps, motor_params=motor_params,
)
def visualize(
robot_path: str | Path,
recording_path: str | Path,
@@ -39,31 +62,27 @@ def visualize(
save_path: str | Path | None = None,
show: bool = True,
) -> None:
"""Generate comparison plot.
Parameters
----------
robot_path : robot asset directory
recording_path : .npz file from capture
result_path : sysid_result.json with best_params (optional)
sim_dt / substeps : physics settings for rollout
window_duration : shooting window length (s); 0 = open-loop
save_path : if provided, save figure to this path (PNG, PDF, …)
show : if True, display interactive matplotlib window
"""
"""Generate comparison plot."""
import matplotlib.pyplot as plt
from src.sysid.rollout import (
LOCKED_MOTOR_PARAMS,
ROTARY_CARTPOLE_PARAMS,
defaults_vector,
params_to_dict,
rollout,
windowed_rollout,
)
robot_path = Path(robot_path).resolve()
recording = dict(np.load(recording_path))
motor_params = LOCKED_MOTOR_PARAMS
sim_kwargs = dict(
robot_path=robot_path, recording=recording,
window_duration=window_duration, sim_dt=sim_dt,
substeps=substeps, motor_params=motor_params,
)
t = recording["time"]
actions = recording["action"]
@@ -72,26 +91,15 @@ def visualize(
defaults_vector(ROTARY_CARTPOLE_PARAMS), ROTARY_CARTPOLE_PARAMS
)
log.info("simulating_default_params", windowed=window_duration > 0)
if window_duration > 0:
sim_default = windowed_rollout(
robot_path=robot_path,
params=default_params,
recording=recording,
window_duration=window_duration,
sim_dt=sim_dt,
substeps=substeps,
)
else:
sim_default = rollout(
robot_path=robot_path,
params=default_params,
actions=actions,
timesteps=t,
sim_dt=sim_dt,
substeps=substeps,
)
sim_default = _run_sim(params=default_params, **sim_kwargs)
# ── Simulate with tuned parameters (if available) ────────────
# Resolve result path (explicit or auto-detect).
if result_path is None:
auto = robot_path / "sysid_result.json"
if auto.exists():
result_path = auto
sim_tuned = None
tuned_cost = None
if result_path is not None:
@@ -101,64 +109,21 @@ def visualize(
tuned_params = result.get("best_params", {})
tuned_cost = result.get("best_cost")
log.info("simulating_tuned_params", cost=tuned_cost)
if window_duration > 0:
sim_tuned = windowed_rollout(
robot_path=robot_path,
params=tuned_params,
recording=recording,
window_duration=window_duration,
sim_dt=sim_dt,
substeps=substeps,
)
else:
sim_tuned = rollout(
robot_path=robot_path,
params=tuned_params,
actions=actions,
timesteps=t,
sim_dt=sim_dt,
substeps=substeps,
)
sim_tuned = _run_sim(params=tuned_params, **sim_kwargs)
else:
log.warning("result_file_not_found", path=str(result_path))
else:
# Auto-detect sysid_result.json in robot_path.
auto_result = robot_path / "sysid_result.json"
if auto_result.exists():
result = json.loads(auto_result.read_text())
tuned_params = result.get("best_params", {})
tuned_cost = result.get("best_cost")
log.info("auto_detected_tuned_params", cost=tuned_cost)
if window_duration > 0:
sim_tuned = windowed_rollout(
robot_path=robot_path,
params=tuned_params,
recording=recording,
window_duration=window_duration,
sim_dt=sim_dt,
substeps=substeps,
)
else:
sim_tuned = rollout(
robot_path=robot_path,
params=tuned_params,
actions=actions,
timesteps=t,
sim_dt=sim_dt,
substeps=substeps,
)
# ── Plot ─────────────────────────────────────────────────────
fig, axes = plt.subplots(5, 1, figsize=(14, 12), sharex=True)
channels = [
("motor_angle", "Motor Angle (rad)", True),
("motor_vel", "Motor Velocity (rad/s)", False),
("pendulum_angle", "Pendulum Angle (rad)", True),
("pendulum_vel", "Pendulum Velocity (rad/s)", False),
("motor_angle", "Motor Angle (rad)"),
("motor_vel", "Motor Velocity (rad/s)"),
("pendulum_angle", "Pendulum Angle (rad)"),
("pendulum_vel", "Pendulum Velocity (rad/s)"),
]
for ax, (key, ylabel, is_angle) in zip(axes[:4], channels):
for ax, (key, ylabel) in zip(axes[:4], channels):
real = recording[key]
ax.plot(t, real, "k-", linewidth=1.2, alpha=0.8, label="Real")
@@ -207,6 +172,7 @@ def visualize(
sim_dt=sim_dt,
substeps=substeps,
window_duration=window_duration,
motor_params=motor_params,
)
title += f"\nOriginal cost: {orig_cost:.4f} → Tuned cost: {tuned_cost:.4f}"
improvement = (1.0 - tuned_cost / orig_cost) * 100 if orig_cost > 0 else 0

View File

@@ -8,7 +8,6 @@ import structlog
import torch
import tqdm
from clearml import Logger
from gymnasium import spaces
from skrl.agents.torch.ppo import PPO, PPO_DEFAULT_CONFIG
from skrl.memories.torch import RandomMemory
from skrl.resources.preprocessors.torch import RunningStandardScaler
@@ -129,24 +128,17 @@ class VideoRecordingTrainer(SequentialTrainer):
frames: list[np.ndarray] = []
obs, _ = self.env.reset()
for _ in range(max_steps):
with torch.no_grad():
with torch.no_grad():
for _ in range(max_steps):
action = self.agents.act(obs, timestep=timestep, timesteps=self.timesteps)[0]
obs, _, terminated, truncated, _ = self.env.step(action)
obs, _, terminated, truncated, _ = self.env.step(action)
try:
frame = self.env.render()
except Exception:
# Headless environment without OpenGL — skip video recording
log.warning("video_recording_disabled", reason="render failed (headless?)")
self.env.reset()
return
if frame is not None:
frames.append(frame)
if frame is not None:
frames.append(frame)
if (terminated | truncated).any().item():
break
if (terminated | truncated).any().item():
break
if frames:
path = str(self._video_dir / f"step_{timestep}.mp4")
@@ -159,7 +151,6 @@ class VideoRecordingTrainer(SequentialTrainer):
local_path=path, iteration=timestep,
)
# Restore training state
self.env.reset()