Compare commits

15 Commits

Author SHA1 Message Date
ca0e7b8b03 clean up lot of stuff 2026-03-22 15:49:13 +01:00
d3ed1c25ad ⚗️ experimenting training runs 2026-03-12 00:38:09 +01:00
3b2d6d08f9 update hpo 2026-03-11 23:28:39 +01:00
23801857f4 ♻️ cleanup 2026-03-11 23:16:42 +01:00
3db68255f0 update registry 2026-03-11 23:11:21 +01:00
1a822bd82e 🐛 bug fixes 2026-03-11 23:07:37 +01:00
4115447022 ♻️ crazy refactor 2026-03-11 22:52:01 +01:00
35223b3560 update motor friction 2026-03-09 23:37:10 +01:00
0f13086fee remove custom ema and use mujoco motor control 2026-03-09 22:47:57 +01:00
9813319275 add limit enforce to mujoco for joints 2026-03-09 22:30:48 +01:00
70cd2cdd7d better robot joint loading 2026-03-09 22:17:28 +01:00
9be07d9186 add new ppo mjx config 2026-03-09 21:33:42 +01:00
26ccb1e902 add mjx runner 2026-03-09 21:18:19 +01:00
15da0ef2fd update urdf and dependencies 2026-03-09 20:39:02 +01:00
c753c369b4 add rotary cartpole env 2026-03-08 22:58:32 +01:00
53 changed files with 7525 additions and 272 deletions

22
.gitignore vendored
View File

@@ -1,3 +1,23 @@
outputs/
# IDE / editor
.vscode/
# Training & HPO outputs
outputs/
runs/
smac3_output/
# MuJoCo
MUJOCO_LOG.TXT
# Python
__pycache__/
*.pyc
*.pyo
*.egg-info/
.eggs/
dist/
build/
# Temp files
*.stl
*.scad

View File

@@ -0,0 +1,10 @@
# Classic cartpole — robot hardware config.
urdf: cartpole.urdf
actuators:
- joint: cart_joint
type: motor
gear: 10.0
ctrl_range: [-1.0, 1.0]
damping: 0.05

View File

@@ -0,0 +1,23 @@
# Rotary cartpole (Furuta pendulum) — real hardware config.
# Describes the physical device for the SerialRunner.
# Robot-specific constants that don't belong in the runner config
# (which is machine-specific: port, baud) or the env config
# (which is task-specific: rewards, max_steps).
encoder:
ppr: 11 # pulses per revolution (before quadrature)
gear_ratio: 30.0 # gearbox ratio
# counts_per_rev = ppr × gear_ratio × 4 (quadrature) = 1320
safety:
max_motor_angle_deg: 90.0 # hard termination limit (physical endstop ~70-80°)
soft_limit_deg: 40.0 # progressive penalty ramp starts here
reset:
drive_speed: 80 # PWM magnitude for bang-bang drive-to-center
deadband: 15 # encoder count threshold to consider "centered"
drive_timeout: 3.0 # seconds before giving up on drive-to-center
settle_angle_deg: 2.0 # pendulum angle threshold for "still" (degrees)
settle_vel_dps: 5.0 # pendulum velocity threshold (deg/s)
settle_duration: 0.5 # how long pendulum must stay still (seconds)
settle_timeout: 30.0 # give up waiting after this (seconds)

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@@ -0,0 +1,21 @@
# Tuned robot config — generated by src.sysid.optimize
urdf: rotary_cartpole.urdf
actuators:
- joint: motor_joint
type: motor
gear: [0.424182, 0.425031] # torque constant [pos, neg] (motor sysid)
ctrl_range: [-0.592, 0.592] # effective control bound (sysid-tuned)
deadzone: [0.141291, 0.078015] # L298N min |ctrl| for torque [pos, neg]
damping: [0.002027, 0.014665] # viscous damping [pos, neg]
frictionloss: [0.057328, 0.053355] # Coulomb friction [pos, neg]
filter_tau: 0.005035 # 1st-order actuator filter (motor sysid)
viscous_quadratic: 0.000285 # velocity² drag
back_emf_gain: 0.006758 # back-EMF torque reduction
joints:
motor_joint:
armature: 0.002773 # reflected rotor inertia (motor sysid)
frictionloss: 0.0 # handled by motor model via qfrc_applied
pendulum_joint:
damping: 0.000119
frictionloss: 1.0e-05

View File

@@ -0,0 +1,80 @@
<?xml version='1.0' encoding='utf-8'?>
<robot name="rotary_cartpole">
<link name="world" />
<link name="base_link">
<inertial>
<origin xyz="-0.00011 0.00117 0.06055" rpy="0 0 0" />
<mass value="0.921" />
<inertia ixx="0.002385" iyy="0.002484" izz="0.000559" ixy="0.0" iyz="-0.000149" ixz="6e-06" />
</inertial>
<visual>
<origin xyz="0 0 0" rpy="0 0 0" />
<geometry>
<mesh filename="meshes/base_link.stl" scale="0.001 0.001 0.001" />
</geometry>
</visual>
<collision>
<origin xyz="0 0 0" rpy="0 0 0" />
<geometry>
<mesh filename="meshes/base_link.stl" scale="0.001 0.001 0.001" />
</geometry>
</collision>
</link>
<joint name="base_joint" type="fixed">
<parent link="world" />
<child link="base_link" />
</joint>
<link name="arm">
<inertial>
<origin xyz="-0.0071030505291264975 0.0008511826488989179 0.007952020186701035" rpy="0 0 0" />
<mass value="0.02110029934220782" />
<inertia ixx="2.70e-06" iyy="7.80e-07" izz="2.44e-06" ixy="0.0" iyz="7.20e-08" ixz="0.0" />
</inertial>
<visual>
<origin xyz="0.006947 -0.00395 -0.14796" rpy="0 0 0" />
<geometry>
<mesh filename="meshes/arm_1.stl" scale="0.001 0.001 0.001" />
</geometry>
</visual>
<collision>
<origin xyz="0.006947 -0.00395 -0.14796" rpy="0 0 0" />
<geometry>
<mesh filename="meshes/arm_1.stl" scale="0.001 0.001 0.001" />
</geometry>
</collision>
</link>
<joint name="motor_joint" type="revolute">
<origin xyz="-0.006947 0.00395 0.14796" rpy="0 0 0" />
<parent link="base_link" />
<child link="arm" />
<axis xyz="0 0 1" />
<limit lower="-1.5708" upper="1.5708" effort="10.0" velocity="200.0" />
<dynamics damping="0.001" />
</joint>
<link name="pendulum">
<inertial>
<origin xyz="0.060245187591695615 -0.07601707109312682 -0.0034636702158137786" rpy="0 0 0" />
<mass value="0.03936742845036306" />
<inertia ixx="6.202768755990066e-05" iyy="3.70078470430685e-05" izz="7.827356811788924e-05" ixy="-6.925117819616428e-06" iyz="0.0" ixz="0.0" />
</inertial>
<visual>
<origin xyz="0.006895 -0.023224 -0.162953" rpy="0 0 0" />
<geometry>
<mesh filename="meshes/pendulum_1.stl" scale="0.001 0.001 0.001" />
</geometry>
</visual>
<collision>
<origin xyz="0.006895 -0.023224 -0.162953" rpy="0 0 0" />
<geometry>
<mesh filename="meshes/pendulum_1.stl" scale="0.001 0.001 0.001" />
</geometry>
</collision>
</link>
<joint name="pendulum_joint" type="continuous">
<origin xyz="0.000052 0.019274 0.014993" rpy="0 1.5708 0" />
<parent link="arm" />
<child link="pendulum" />
<axis xyz="0 -1 0" />
<dynamics damping="0.0001" />
</joint>
</robot>

View File

@@ -1,11 +1,7 @@
max_steps: 500
robot_path: assets/cartpole
angle_threshold: 0.418
cart_limit: 2.4
reward_alive: 1.0
reward_pole_upright_scale: 1.0
reward_action_penalty_scale: 0.01
model_path: assets/cartpole/cartpole.urdf
actuators:
- joint: cart_joint
gear: 10.0
ctrl_range: [-1.0, 1.0]

19
configs/env/rotary_cartpole.yaml vendored Normal file
View File

@@ -0,0 +1,19 @@
max_steps: 1000
robot_path: assets/rotary_cartpole
reward_upright_scale: 1.0
# ── Regularisation penalties (prevent fast spinning) ─────────────────
motor_vel_penalty: 0.01 # penalise high motor angular velocity
motor_angle_penalty: 0.05 # penalise deviation from centre
action_penalty: 0.05 # penalise large actions (energy cost)
# ── Software safety limit (env-level, always applied) ────────────────
motor_angle_limit_deg: 90.0 # terminate episode if motor exceeds ±90°
# ── HPO search ranges ────────────────────────────────────────────────
hpo:
reward_upright_scale: {min: 0.5, max: 5.0}
motor_vel_penalty: {min: 0.001, max: 0.1}
motor_angle_penalty: {min: 0.01, max: 0.2}
action_penalty: {min: 0.01, max: 0.2}
max_steps: {values: [500, 1000, 2000]}

4
configs/runner/mjx.yaml Normal file
View File

@@ -0,0 +1,4 @@
num_envs: 1024 # MJX shines with many parallel envs
device: auto # auto = cuda if available, else cpu
dt: 0.002
substeps: 10

View File

@@ -1,4 +1,4 @@
num_envs: 16
device: cpu
dt: 0.02
substeps: 2
num_envs: 64
device: auto # auto = cuda if available, else cpu
dt: 0.002
substeps: 10

View File

@@ -0,0 +1,7 @@
# Single-env MuJoCo runner — mimics real hardware timing.
# dt × substeps = 0.002 × 10 = 0.02 s → 50 Hz control, same as serial runner.
num_envs: 1
device: cpu
dt: 0.002
substeps: 10

View File

@@ -0,0 +1,10 @@
# Serial runner — communicates with real hardware over USB/serial.
# Always single-env, CPU-only. Override port on CLI:
# python scripts/train.py runner=serial runner.port=/dev/ttyUSB0
num_envs: 1
device: cpu
port: /dev/cu.usbserial-0001
baud: 115200
dt: 0.02 # control loop period (50 Hz, matches training)
no_data_timeout: 2.0 # seconds of silence before declaring disconnect

32
configs/sysid.yaml Normal file
View File

@@ -0,0 +1,32 @@
# System identification defaults.
# Override via CLI: python scripts/sysid.py optimize --max-generations 50
#
# These are NOT Hydra config groups — the sysid scripts use argparse.
# This file serves as documentation and can be loaded by custom wrappers.
capture:
port: /dev/cu.usbserial-0001
baud: 115200
duration: 20.0 # seconds
amplitude: 150 # max PWM magnitude — must match firmware MAX_MOTOR_SPEED
hold_min_ms: 50 # PRBS min hold time
hold_max_ms: 300 # PRBS max hold time
dt: 0.02 # sample period (50 Hz)
optimize:
sigma0: 0.3 # CMA-ES initial step size (in [0,1] normalised space)
population_size: 50 # candidates per generation
max_generations: 1000 # total generations (~4000 evaluations)
sim_dt: 0.002 # MuJoCo physics timestep
substeps: 10 # physics substeps per control step (ctrl_dt = 0.02s)
pos_weight: 1.0 # MSE weight for angle errors
vel_weight: 0.1 # MSE weight for velocity errors
window_duration: 0.5 # multiple-shooting window length (s); 0 = open-loop
seed: 42
# Tunable hardware-realism params (added to ROTARY_CARTPOLE_PARAMS):
# ctrl_limit — effective motor range → exported as ctrl_range in robot.yaml
# motor_deadzone — L298N minimum |action| for torque → exported as deadzone in robot.yaml
# Firmware sends raw (unfiltered) sensor data; EMA filtering is
# handled on the Python side (env transforms) and is NOT part of
# the sysid parameter search.

View File

@@ -1,5 +1,5 @@
hidden_sizes: [128, 128]
total_timesteps: 1000000
total_timesteps: 5000000
rollout_steps: 1024
learning_epochs: 4
mini_batches: 4
@@ -8,6 +8,27 @@ gae_lambda: 0.95
learning_rate: 0.0003
clip_ratio: 0.2
value_loss_scale: 0.5
entropy_loss_scale: 0.01
log_interval: 10
clearml_project: RL-Framework
entropy_loss_scale: 0.05
log_interval: 1000
checkpoint_interval: 50000
initial_log_std: 0.5
min_log_std: -2.0
max_log_std: 2.0
record_video_every: 10000
# ClearML remote execution (GPU worker)
remote: false
# ── HPO search ranges ────────────────────────────────────────────────
# Read by scripts/hpo.py — ignored by TrainerConfig during training.
hpo:
learning_rate: {min: 0.00005, max: 0.001}
clip_ratio: {min: 0.1, max: 0.3}
discount_factor: {min: 0.98, max: 0.999}
gae_lambda: {min: 0.9, max: 0.99}
entropy_loss_scale: {min: 0.0001, max: 0.1}
value_loss_scale: {min: 0.1, max: 1.0}
learning_epochs: {min: 2, max: 8, type: int}
mini_batches: {values: [2, 4, 8, 16]}

View File

@@ -0,0 +1,18 @@
# PPO tuned for MJX (1024+ parallel envs on GPU).
# Inherits defaults + HPO ranges from ppo.yaml.
# With 1024 envs, each timestep collects 1024 samples, so total_timesteps
# can be much lower than the CPU config.
defaults:
- ppo
- _self_
total_timesteps: 300000 # 300K × 1024 envs ≈ 307M env steps
mini_batches: 32 # keep mini-batch size similar (~32K)
learning_rate: 0.001 # ~3x higher LR for 16x larger batch (sqrt scaling)
log_interval: 100
checkpoint_interval: 10000
record_video_every: 10000
remote: false

View File

@@ -0,0 +1,29 @@
# PPO tuned for single-env real-time training on real hardware.
# Inherits defaults + HPO ranges from ppo.yaml.
# ~50 Hz control × 1 env = ~50 timesteps/s.
# 100k timesteps ≈ 33 minutes of wall-clock training.
defaults:
- ppo
- _self_
hidden_sizes: [256, 256]
total_timesteps: 2000000
learning_epochs: 10
learning_rate: 0.0005 # conservative — can't undo real-world damage
entropy_loss_scale: 0.01
rollout_steps: 2048
mini_batches: 8
log_interval: 2048
checkpoint_interval: 5000 # frequent saves — can't rewind real hardware
initial_log_std: -0.5 # moderate initial exploration
min_log_std: -4.0
max_log_std: 2.0 # cap σ at 1.0
# Never run real-hardware training remotely
remote: false
# Tighter HPO ranges for real hardware (override base ppo.yaml ranges)
hpo:
entropy_loss_scale: {min: 0.00005, max: 0.001}
learning_rate: {min: 0.0003, max: 0.003}

View File

@@ -0,0 +1,25 @@
# PPO tuned for single-env simulation — mimics real hardware training.
# Inherits defaults + HPO ranges from ppo.yaml.
# Same 50 Hz control (runner=mujoco_single), 1 env, conservative hypers.
# Sim runs ~100× faster than real time, so we can afford more timesteps.
defaults:
- ppo
- _self_
hidden_sizes: [256, 256]
total_timesteps: 2000000
learning_epochs: 10
learning_rate: 0.0003
entropy_loss_scale: 0.01
rollout_steps: 2048
mini_batches: 8
log_interval: 2048
checkpoint_interval: 10000
initial_log_std: -0.5
min_log_std: -4.0
max_log_std: 2.0
record_video_every: 50000
remote: false

View File

@@ -3,6 +3,18 @@ gymnasium
hydra-core
omegaconf
mujoco
mujoco-mjx
jax
skrl[torch]
clearml
imageio
imageio-ffmpeg
structlog
pyyaml
pyserial
cmaes
matplotlib
smac>=2.0.0
ConfigSpace
hpbandster
pytest

379
scripts/eval.py Normal file
View File

@@ -0,0 +1,379 @@
"""Evaluate a trained policy on real hardware (or in simulation).
Loads a checkpoint and runs the policy in a closed loop. For real
hardware the serial runner talks to the ESP32; for sim it uses the
MuJoCo runner. A digital-twin MuJoCo viewer mirrors the robot state
in both modes.
Usage (real hardware):
mjpython scripts/eval.py env=rotary_cartpole runner=serial \
checkpoint=runs/26-03-12_00-16-43-308420_PPO/checkpoints/agent_1000000.pt
Usage (simulation):
mjpython scripts/eval.py env=rotary_cartpole runner=mujoco_single \
checkpoint=runs/26-03-12_00-16-43-308420_PPO/checkpoints/agent_1000000.pt
Controls:
Space — pause / resume policy (motor stops while paused)
R — reset environment
Esc — quit
"""
import math
import sys
import time
from pathlib import Path
# Ensure project root is on sys.path
_PROJECT_ROOT = str(Path(__file__).resolve().parent.parent)
if _PROJECT_ROOT not in sys.path:
sys.path.insert(0, _PROJECT_ROOT)
import hydra
import mujoco
import mujoco.viewer
import numpy as np
import structlog
import torch
from gymnasium import spaces
from hydra.core.hydra_config import HydraConfig
from omegaconf import DictConfig, OmegaConf
from skrl.resources.preprocessors.torch import RunningStandardScaler
from src.core.registry import build_env
from src.models.mlp import SharedMLP
logger = structlog.get_logger()
# ── keyboard state ───────────────────────────────────────────────────
_reset_flag = [False]
_paused = [False]
_quit_flag = [False]
def _key_callback(keycode: int) -> None:
"""Called by MuJoCo viewer on key press."""
if keycode == 32: # GLFW_KEY_SPACE
_paused[0] = not _paused[0]
elif keycode == 82: # GLFW_KEY_R
_reset_flag[0] = True
elif keycode == 256: # GLFW_KEY_ESCAPE
_quit_flag[0] = True
# ── checkpoint loading ───────────────────────────────────────────────
def _infer_hidden_sizes(state_dict: dict[str, torch.Tensor]) -> tuple[int, ...]:
"""Infer hidden layer sizes from a SharedMLP state dict."""
sizes = []
i = 0
while f"net.{i}.weight" in state_dict:
sizes.append(state_dict[f"net.{i}.weight"].shape[0])
i += 2 # skip activation layers (ELU)
return tuple(sizes)
def load_policy(
checkpoint_path: str,
observation_space: spaces.Space,
action_space: spaces.Space,
device: torch.device = torch.device("cpu"),
) -> tuple[SharedMLP, RunningStandardScaler]:
"""Load a trained SharedMLP + observation normalizer from a checkpoint.
Returns:
(model, state_preprocessor) ready for inference.
"""
ckpt = torch.load(checkpoint_path, map_location=device, weights_only=False)
# Infer architecture from saved weights.
hidden_sizes = _infer_hidden_sizes(ckpt["policy"])
# Reconstruct model.
model = SharedMLP(
observation_space=observation_space,
action_space=action_space,
device=device,
hidden_sizes=hidden_sizes,
)
model.load_state_dict(ckpt["policy"])
model.eval()
# Reconstruct observation normalizer.
state_preprocessor = RunningStandardScaler(size=observation_space, device=device)
state_preprocessor.running_mean = ckpt["state_preprocessor"]["running_mean"].to(device)
state_preprocessor.running_variance = ckpt["state_preprocessor"]["running_variance"].to(device)
state_preprocessor.current_count = ckpt["state_preprocessor"]["current_count"]
# Freeze the normalizer — don't update stats during eval.
state_preprocessor.training = False
logger.info(
"checkpoint_loaded",
path=checkpoint_path,
hidden_sizes=hidden_sizes,
obs_mean=[round(x, 3) for x in state_preprocessor.running_mean.tolist()],
obs_std=[round(x, 3) for x in state_preprocessor.running_variance.sqrt().tolist()],
)
return model, state_preprocessor
# ── action arrow overlay ─────────────────────────────────────────────
def _add_action_arrow(viewer, model, data, action_val: float) -> None:
"""Draw an arrow showing applied torque direction."""
if abs(action_val) < 0.01 or model.nu == 0:
return
jnt_id = model.actuator_trnid[0, 0]
body_id = model.jnt_bodyid[jnt_id]
pos = data.xpos[body_id].copy()
pos[2] += 0.02
axis = data.xmat[body_id].reshape(3, 3) @ model.jnt_axis[jnt_id]
arrow_len = 0.08 * action_val
direction = axis * np.sign(arrow_len)
z = direction / (np.linalg.norm(direction) + 1e-8)
up = np.array([0, 0, 1]) if abs(z[2]) < 0.99 else np.array([0, 1, 0])
x = np.cross(up, z)
x /= np.linalg.norm(x) + 1e-8
y = np.cross(z, x)
mat = np.column_stack([x, y, z]).flatten()
rgba = np.array(
[0.2, 0.8, 0.2, 0.8] if action_val > 0 else [0.8, 0.2, 0.2, 0.8],
dtype=np.float32,
)
geom = viewer.user_scn.geoms[viewer.user_scn.ngeom]
mujoco.mjv_initGeom(
geom,
type=mujoco.mjtGeom.mjGEOM_ARROW,
size=np.array([0.008, 0.008, abs(arrow_len)]),
pos=pos,
mat=mat,
rgba=rgba,
)
viewer.user_scn.ngeom += 1
# ── main loops ───────────────────────────────────────────────────────
@hydra.main(version_base=None, config_path="../configs", config_name="config")
def main(cfg: DictConfig) -> None:
choices = HydraConfig.get().runtime.choices
env_name = choices.get("env", "cartpole")
runner_name = choices.get("runner", "mujoco_single")
checkpoint_path = cfg.get("checkpoint", None)
if checkpoint_path is None:
logger.error("No checkpoint specified. Use: +checkpoint=path/to/agent.pt")
sys.exit(1)
# Resolve relative paths against original working directory.
checkpoint_path = str(Path(hydra.utils.get_original_cwd()) / checkpoint_path)
if not Path(checkpoint_path).exists():
logger.error("checkpoint_not_found", path=checkpoint_path)
sys.exit(1)
if runner_name == "serial":
_eval_serial(cfg, env_name, checkpoint_path)
else:
_eval_sim(cfg, env_name, checkpoint_path)
def _eval_sim(cfg: DictConfig, env_name: str, checkpoint_path: str) -> None:
"""Evaluate policy in MuJoCo simulation with viewer."""
from src.runners.mujoco import MuJoCoRunner, MuJoCoRunnerConfig
env = build_env(env_name, cfg)
runner_dict = OmegaConf.to_container(cfg.runner, resolve=True)
runner_dict["num_envs"] = 1
runner = MuJoCoRunner(env=env, config=MuJoCoRunnerConfig(**runner_dict))
device = runner.device
model, preprocessor = load_policy(
checkpoint_path, runner.observation_space, runner.action_space, device
)
mj_model = runner._model
mj_data = runner._data[0]
dt_ctrl = runner.config.dt * runner.config.substeps
with mujoco.viewer.launch_passive(mj_model, mj_data, key_callback=_key_callback) as viewer:
obs, _ = runner.reset()
step = 0
episode = 0
episode_reward = 0.0
logger.info(
"eval_started",
env=env_name,
mode="simulation",
checkpoint=Path(checkpoint_path).name,
controls="Space=pause, R=reset, Esc=quit",
)
while viewer.is_running() and not _quit_flag[0]:
if _reset_flag[0]:
_reset_flag[0] = False
obs, _ = runner.reset()
step = 0
episode += 1
episode_reward = 0.0
logger.info("reset", episode=episode)
if _paused[0]:
viewer.sync()
time.sleep(0.05)
continue
# Policy inference
with torch.no_grad():
normalized_obs = preprocessor(obs.unsqueeze(0) if obs.dim() == 1 else obs)
action = model.act({"states": normalized_obs}, role="policy")[0]
action = action.clamp(-1.0, 1.0)
obs, reward, terminated, truncated, info = runner.step(action)
episode_reward += reward.item()
step += 1
# Sync viewer
mujoco.mj_forward(mj_model, mj_data)
viewer.user_scn.ngeom = 0
_add_action_arrow(viewer, mj_model, mj_data, action[0, 0].item())
viewer.sync()
if step % 50 == 0:
joints = {mj_model.jnt(i).name: round(math.degrees(mj_data.qpos[i]), 1)
for i in range(mj_model.njnt)}
logger.debug(
"step", n=step, reward=round(reward.item(), 3),
action=round(action[0, 0].item(), 2),
ep_reward=round(episode_reward, 1), **joints,
)
if terminated.any() or truncated.any():
logger.info(
"episode_done", episode=episode, steps=step,
total_reward=round(episode_reward, 2),
reason="terminated" if terminated.any() else "truncated",
)
obs, _ = runner.reset()
step = 0
episode += 1
episode_reward = 0.0
time.sleep(dt_ctrl)
runner.close()
def _eval_serial(cfg: DictConfig, env_name: str, checkpoint_path: str) -> None:
"""Evaluate policy on real hardware via serial, with digital-twin viewer."""
from src.runners.serial import SerialRunner, SerialRunnerConfig
env = build_env(env_name, cfg)
runner_dict = OmegaConf.to_container(cfg.runner, resolve=True)
serial_runner = SerialRunner(env=env, config=SerialRunnerConfig(**runner_dict))
device = serial_runner.device
model, preprocessor = load_policy(
checkpoint_path, serial_runner.observation_space, serial_runner.action_space, device
)
# Set up digital-twin MuJoCo model for visualization.
serial_runner._ensure_viz_model()
mj_model = serial_runner._viz_model
mj_data = serial_runner._viz_data
with mujoco.viewer.launch_passive(mj_model, mj_data, key_callback=_key_callback) as viewer:
obs, _ = serial_runner.reset()
step = 0
episode = 0
episode_reward = 0.0
logger.info(
"eval_started",
env=env_name,
mode="real hardware (serial)",
port=serial_runner.config.port,
checkpoint=Path(checkpoint_path).name,
controls="Space=pause, R=reset, Esc=quit",
)
while viewer.is_running() and not _quit_flag[0]:
if _reset_flag[0]:
_reset_flag[0] = False
serial_runner._send("M0")
serial_runner._drive_to_center()
serial_runner._wait_for_pendulum_still()
obs, _ = serial_runner.reset()
step = 0
episode += 1
episode_reward = 0.0
logger.info("reset", episode=episode)
if _paused[0]:
serial_runner._send("M0") # safety: stop motor while paused
serial_runner._sync_viz()
viewer.sync()
time.sleep(0.05)
continue
# Policy inference
with torch.no_grad():
normalized_obs = preprocessor(obs.unsqueeze(0) if obs.dim() == 1 else obs)
action = model.act({"states": normalized_obs}, role="policy")[0]
action = action.clamp(-1.0, 1.0)
obs, reward, terminated, truncated, info = serial_runner.step(action)
episode_reward += reward.item()
step += 1
# Sync digital twin with real sensor data.
serial_runner._sync_viz()
viewer.user_scn.ngeom = 0
_add_action_arrow(viewer, mj_model, mj_data, action[0, 0].item())
viewer.sync()
if step % 25 == 0:
state = serial_runner._read_state()
logger.debug(
"step", n=step, reward=round(reward.item(), 3),
action=round(action[0, 0].item(), 2),
ep_reward=round(episode_reward, 1),
motor_enc=state["encoder_count"],
pend_deg=round(state["pendulum_angle"], 1),
)
# Check for safety / disconnection.
if info.get("reboot_detected") or info.get("motor_limit_exceeded"):
logger.error(
"safety_stop",
reboot=info.get("reboot_detected", False),
motor_limit=info.get("motor_limit_exceeded", False),
)
serial_runner._send("M0")
break
if terminated.any() or truncated.any():
logger.info(
"episode_done", episode=episode, steps=step,
total_reward=round(episode_reward, 2),
reason="terminated" if terminated.any() else "truncated",
)
# Auto-reset for next episode.
obs, _ = serial_runner.reset()
step = 0
episode += 1
episode_reward = 0.0
# Real-time pacing is handled by serial_runner.step() (dt sleep).
serial_runner.close()
if __name__ == "__main__":
main()

442
scripts/hpo.py Normal file
View File

@@ -0,0 +1,442 @@
"""Hyperparameter optimization for RL-Framework using ClearML + SMAC3.
Automatically creates a base training task (via Task.create), reads HPO
search ranges from the Hydra config's `training.hpo` and `env.hpo` blocks,
and launches SMAC3 Successive Halving optimization.
Usage:
python scripts/hpo.py env=rotary_cartpole runner=mujoco_single training=ppo_single
# With HPO-specific options:
python scripts/hpo.py env=rotary_cartpole runner=mujoco_single training=ppo_single \\
--queue gpu-queue --total-trials 100
# Or use an existing base task:
python scripts/hpo.py --base-task-id <TASK_ID>
# Dry run (print search space only):
python scripts/hpo.py env=rotary_cartpole --dry-run
"""
from __future__ import annotations
import argparse
import sys
import time
from pathlib import Path
# Ensure project root is on sys.path
_PROJECT_ROOT = str(Path(__file__).resolve().parent.parent)
if _PROJECT_ROOT not in sys.path:
sys.path.insert(0, _PROJECT_ROOT)
import structlog
from clearml import Task
from clearml.automation import (
DiscreteParameterRange,
HyperParameterOptimizer,
UniformIntegerParameterRange,
UniformParameterRange,
)
from omegaconf import OmegaConf
logger = structlog.get_logger()
def _load_hydra_config(
env: str, runner: str, training: str
) -> dict:
"""Load and merge Hydra configs to extract HPO ranges.
We read the YAML files directly (without running Hydra) so this script
doesn't need @hydra.main — it's a ClearML optimizer, not a training job.
"""
configs_dir = Path(__file__).resolve().parent.parent / "configs"
# Load training config (handles defaults: [ppo] inheritance)
training_path = configs_dir / "training" / f"{training}.yaml"
training_cfg = OmegaConf.load(training_path)
# If the training config has defaults pointing to a base, load + merge
if "defaults" in training_cfg:
defaults = OmegaConf.to_container(training_cfg.defaults)
base_cfg = OmegaConf.create({})
for d in defaults:
if isinstance(d, str):
base_path = configs_dir / "training" / f"{d}.yaml"
if base_path.exists():
loaded = OmegaConf.load(base_path)
base_cfg = OmegaConf.merge(base_cfg, loaded)
# Remove defaults key and merge
training_no_defaults = {
k: v for k, v in OmegaConf.to_container(training_cfg).items()
if k != "defaults"
}
training_cfg = OmegaConf.merge(base_cfg, OmegaConf.create(training_no_defaults))
# Load env config
env_path = configs_dir / "env" / f"{env}.yaml"
env_cfg = OmegaConf.load(env_path) if env_path.exists() else OmegaConf.create({})
return {
"training": OmegaConf.to_container(training_cfg, resolve=True),
"env": OmegaConf.to_container(env_cfg, resolve=True),
}
def _build_hyper_parameters(config: dict) -> list:
"""Build ClearML parameter ranges from hpo: blocks in config.
Reads training.hpo and env.hpo dicts and creates appropriate
ClearML parameter range objects.
Each hpo entry can have:
{min, max} → UniformParameterRange (float)
{min, max, type: int} → UniformIntegerParameterRange
{min, max, log: true} → UniformParameterRange with log scale
{values: [...]} → DiscreteParameterRange
"""
params = []
for section in ("training", "env"):
hpo_ranges = config.get(section, {}).get("hpo", {})
if not hpo_ranges:
continue
for param_name, spec in hpo_ranges.items():
hydra_key = f"Hydra/{section}.{param_name}"
if "values" in spec:
params.append(
DiscreteParameterRange(hydra_key, values=spec["values"])
)
elif "min" in spec and "max" in spec:
if spec.get("type") == "int":
params.append(
UniformIntegerParameterRange(
hydra_key,
min_value=int(spec["min"]),
max_value=int(spec["max"]),
)
)
else:
step = spec.get("step", None)
params.append(
UniformParameterRange(
hydra_key,
min_value=float(spec["min"]),
max_value=float(spec["max"]),
step_size=step,
)
)
else:
logger.warning("skipping_unknown_hpo_spec", param=param_name, spec=spec)
return params
def _flatten_dict(d: dict, parent_key: str = "", sep: str = ".") -> dict:
"""Flatten a nested dict into dot-separated keys.
Example: {"a": {"b": 1}} → {"a.b": 1}
"""
items = {}
for k, v in d.items():
new_key = f"{parent_key}{sep}{k}" if parent_key else k
if isinstance(v, dict):
items.update(_flatten_dict(v, new_key, sep=sep))
else:
items[new_key] = v
return items
def _create_base_task(
env: str, runner: str, training: str, queue: str
) -> str:
"""Create a base ClearML task without executing it.
Uses Task.create() to register a task pointing at scripts/train.py
with the correct Hydra overrides. The HPO optimizer will clone this.
The full resolved OmegaConf config is attached as Hydra/* parameters
so cloned trial tasks inherit the complete configuration.
"""
script_path = str(Path(__file__).resolve().parent / "train.py")
project_root = str(Path(__file__).resolve().parent.parent)
base_task = Task.create(
project_name="RL-Framework",
task_name=f"{env}-{runner}-{training} (HPO base)",
task_type=Task.TaskTypes.training,
script=script_path,
working_directory=project_root,
argparse_args=[
f"env={env}",
f"runner={runner}",
f"training={training}",
],
add_task_init_call=False,
)
# ── Attach full resolved OmegaConf config ─────────────────────
# ClearML's Hydra binding normally does this when the script runs,
# but Task.create() never executes Hydra. We replicate the binding
# manually: config group choices + all resolved values.
base_task.set_parameter("Hydra/env", env)
base_task.set_parameter("Hydra/runner", runner)
base_task.set_parameter("Hydra/training", training)
# Load and resolve the full config for each group
configs_dir = Path(__file__).resolve().parent.parent / "configs"
for section, name in [("training", training), ("env", env), ("runner", runner)]:
cfg_path = configs_dir / section / f"{name}.yaml"
if not cfg_path.exists():
continue
cfg = OmegaConf.load(cfg_path)
# Handle Hydra defaults: inheritance (e.g. ppo_single → ppo)
if "defaults" in cfg:
defaults = OmegaConf.to_container(cfg.defaults)
base_cfg = OmegaConf.create({})
for d in defaults:
if isinstance(d, str):
base_path = configs_dir / section / f"{d}.yaml"
if base_path.exists():
loaded = OmegaConf.load(base_path)
base_cfg = OmegaConf.merge(base_cfg, loaded)
cfg_no_defaults = {
k: v for k, v in OmegaConf.to_container(cfg).items()
if k != "defaults"
}
cfg = OmegaConf.merge(base_cfg, OmegaConf.create(cfg_no_defaults))
resolved = OmegaConf.to_container(cfg, resolve=True)
# Remove hpo metadata — not a real config value
resolved.pop("hpo", None)
flat = _flatten_dict(resolved)
for key, value in flat.items():
base_task.set_parameter(f"Hydra/{section}.{key}", value)
# Set docker config
base_task.set_base_docker(
"registry.kube.optimize/worker-image:latest",
docker_setup_bash_script=(
"apt-get update && apt-get install -y --no-install-recommends "
"libosmesa6-dev libgl1-mesa-glx libglfw3 && rm -rf /var/lib/apt/lists/* "
"&& pip install 'jax[cuda12]' mujoco-mjx PyOpenGL PyOpenGL-accelerate"
),
docker_arguments=[
"-e", "MUJOCO_GL=osmesa",
],
)
req_file = Path(__file__).resolve().parent.parent / "requirements.txt"
base_task.set_packages(str(req_file))
task_id = base_task.id
logger.info("base_task_created", task_id=task_id, task_name=base_task.name)
return task_id
def _parse_overrides(argv: list[str]) -> dict[str, str]:
"""Parse Hydra-style key=value overrides from argv.
Returns a dict of parsed key-value pairs. Unknown args (--flags)
are left in argv for argparse to handle.
"""
overrides = {}
remaining = []
for arg in argv:
if "=" in arg and not arg.startswith("-"):
key, value = arg.split("=", 1)
overrides[key] = value
else:
remaining.append(arg)
argv.clear()
argv.extend(remaining)
return overrides
def main() -> None:
# First pass: extract Hydra-style key=value overrides from sys.argv
raw_args = sys.argv[1:]
overrides = _parse_overrides(raw_args)
parser = argparse.ArgumentParser(
description="Hyperparameter optimization for RL-Framework",
usage="%(prog)s env=<ENV> runner=<RUNNER> training=<TRAINING> [options]",
)
parser.add_argument(
"--base-task-id",
type=str,
default=None,
help="Existing ClearML task ID to use as base (skip auto-creation)",
)
parser.add_argument("--queue", type=str, default="gpu-queue")
parser.add_argument(
"--max-concurrent", type=int, default=2,
help="Maximum concurrent trial tasks",
)
parser.add_argument(
"--total-trials", type=int, default=200,
help="Total HPO trial budget",
)
parser.add_argument(
"--min-budget", type=int, default=50_000,
help="Minimum budget (total_timesteps) per trial",
)
parser.add_argument(
"--max-budget", type=int, default=500_000,
help="Maximum budget (total_timesteps) for promoted trials",
)
parser.add_argument("--eta", type=int, default=3, help="Successive halving reduction factor")
parser.add_argument(
"--max-consecutive-failures", type=int, default=3,
help="Abort HPO after N consecutive trial failures (0 = never abort)",
)
parser.add_argument(
"--time-limit-hours", type=float, default=72,
help="Total wall-clock time limit in hours",
)
parser.add_argument(
"--objective-metric", type=str, default="Reward / Total reward (mean)",
help="ClearML scalar metric title to optimize",
)
parser.add_argument(
"--objective-series", type=str, default=None,
help="ClearML scalar metric series (default: same as title)",
)
parser.add_argument(
"--maximize", action="store_true", default=True,
help="Maximize the objective (default)",
)
parser.add_argument(
"--minimize", action="store_true", default=False,
help="Minimize the objective",
)
parser.add_argument(
"--dry-run", action="store_true",
help="Print search space and exit without running",
)
args = parser.parse_args(raw_args)
# Resolve env/runner/training from Hydra-style overrides (same as train.py)
env = overrides.get("env", "rotary_cartpole")
runner = overrides.get("runner", "mujoco_single")
training = overrides.get("training", "ppo_single")
objective_sign = "min" if args.minimize else "max"
# ── Load config and build search space ────────────────────────
config = _load_hydra_config(env, runner, training)
hyper_parameters = _build_hyper_parameters(config)
if not hyper_parameters:
logger.error(
"no_hpo_ranges_found",
hint="Add 'hpo:' blocks to your training and/or env YAML configs",
)
return
if args.dry_run:
print(f"\nSearch space ({len(hyper_parameters)} parameters):")
for p in hyper_parameters:
print(f" {p.name}: {p}")
print(f"\nObjective: {args.objective_metric} ({objective_sign})")
return
# ── Initialize ClearML HPO task ───────────────────────────────
Task.ignore_requirements("torch")
task = Task.init(
project_name="RL-Framework",
task_name=f"HPO {env}-{runner}-{training}",
task_type=Task.TaskTypes.optimizer,
reuse_last_task_id=False,
)
task.set_base_docker(
docker_image="registry.kube.optimize/worker-image:latest",
docker_arguments=[
"-e", "CLEARML_AGENT_SKIP_PYTHON_ENV_INSTALL=1",
"-e", "CLEARML_AGENT_SKIP_PIP_VENV_INSTALL=1",
"-e", "CLEARML_AGENT_FORCE_SYSTEM_SITE_PACKAGES=1",
],
)
req_file = Path(__file__).resolve().parent.parent / "requirements.txt"
task.set_packages(str(req_file))
# ── Create or reuse base task ─────────────────────────────────
# Store the base_task_id on the HPO task so that when the services
# worker re-runs this script it reuses the same base task instead
# of creating a duplicate.
if args.base_task_id:
base_task_id = args.base_task_id
logger.info("using_existing_base_task", task_id=base_task_id)
else:
existing = task.get_parameter("General/base_task_id")
if existing:
base_task_id = existing
logger.info("reusing_base_task_from_param", task_id=base_task_id)
else:
base_task_id = _create_base_task(
env, runner, training, args.queue
)
task.set_parameter("General/base_task_id", base_task_id)
# ── Build objective metric ────────────────────────────────────
# skrl's SequentialTrainer logs "Reward / Total reward (mean)" by default
objective_title = args.objective_metric
objective_series = args.objective_series or objective_title
# ── Launch optimizer ──────────────────────────────────────────
from src.hpo.smac3 import OptimizerSMAC
optimizer = HyperParameterOptimizer(
base_task_id=base_task_id,
hyper_parameters=hyper_parameters,
objective_metric_title=objective_title,
objective_metric_series=objective_series,
objective_metric_sign=objective_sign,
optimizer_class=OptimizerSMAC,
execution_queue=args.queue,
max_number_of_concurrent_tasks=args.max_concurrent,
total_max_jobs=args.total_trials,
min_iteration_per_job=args.min_budget,
max_iteration_per_job=args.max_budget,
pool_period_min=1,
time_limit_per_job=240, # 4 hours per trial max
eta=args.eta,
budget_param_name="Hydra/training.total_timesteps",
max_consecutive_failures=args.max_consecutive_failures,
)
# Send this HPO controller to a remote services worker
task.execute_remotely(queue_name="services", exit_process=True)
# Reporting and time limits
optimizer.set_report_period(1)
optimizer.set_time_limit(in_minutes=int(args.time_limit_hours * 60))
# Start and wait
optimizer.start()
optimizer.wait()
# Get top experiments
max_retries = 5
for attempt in range(max_retries):
try:
top_exp = optimizer.get_top_experiments(top_k=10)
logger.info("top_experiments_retrieved", count=len(top_exp))
for i, t in enumerate(top_exp):
logger.info("top_experiment", rank=i + 1, task_id=t.id, name=t.name)
break
except Exception as e:
logger.warning("retry_get_top_experiments", attempt=attempt + 1, error=str(e))
if attempt < max_retries - 1:
time.sleep(5.0 * (2 ** attempt))
else:
logger.error("could_not_retrieve_top_experiments")
optimizer.stop()
logger.info("hpo_complete")
if __name__ == "__main__":
main()

64
scripts/motor_sysid.py Normal file
View File

@@ -0,0 +1,64 @@
"""Unified CLI for motor-only system identification.
Usage:
python scripts/motor_sysid.py capture --duration 20
python scripts/motor_sysid.py optimize --recording assets/motor/recordings/<file>.npz
python scripts/motor_sysid.py visualize --recording assets/motor/recordings/<file>.npz
python scripts/motor_sysid.py export --result assets/motor/motor_sysid_result.json
"""
from __future__ import annotations
import sys
from pathlib import Path
# Ensure project root is on sys.path
_PROJECT_ROOT = str(Path(__file__).resolve().parent.parent)
if _PROJECT_ROOT not in sys.path:
sys.path.insert(0, _PROJECT_ROOT)
def main() -> None:
if len(sys.argv) < 2 or sys.argv[1] in ("-h", "--help"):
print(
"Motor System Identification\n"
"===========================\n"
"Usage: python scripts/motor_sysid.py <command> [options]\n"
"\n"
"Commands:\n"
" capture Record motor trajectory under PRBS excitation\n"
" optimize Run CMA-ES to fit motor parameters\n"
" visualize Plot real vs simulated motor response\n"
" export Write tuned MJCF + robot.yaml files\n"
"\n"
"Workflow:\n"
" 1. Flash sysid firmware to ESP32 (motor-only, no limits)\n"
" 2. python scripts/motor_sysid.py capture --duration 20\n"
" 3. python scripts/motor_sysid.py optimize --recording <file>.npz\n"
" 4. python scripts/motor_sysid.py visualize --recording <file>.npz\n"
"\n"
"Run '<command> --help' for command-specific options."
)
sys.exit(0)
command = sys.argv[1]
sys.argv = [f"motor_sysid {command}"] + sys.argv[2:]
if command == "capture":
from src.sysid.motor.capture import main as cmd_main
elif command == "optimize":
from src.sysid.motor.optimize import main as cmd_main
elif command == "visualize":
from src.sysid.motor.visualize import main as cmd_main
elif command == "export":
from src.sysid.motor.export import main as cmd_main
else:
print(f"Unknown command: {command}")
print("Available commands: capture, optimize, visualize, export")
sys.exit(1)
cmd_main()
if __name__ == "__main__":
main()

57
scripts/sysid.py Normal file
View File

@@ -0,0 +1,57 @@
"""Unified CLI for system identification tools.
Usage:
python scripts/sysid.py capture --robot-path assets/rotary_cartpole --duration 20
python scripts/sysid.py optimize --robot-path assets/rotary_cartpole --recording <file>.npz
python scripts/sysid.py visualize --recording <file>.npz
python scripts/sysid.py export --robot-path assets/rotary_cartpole --result <result>.json
"""
from __future__ import annotations
import sys
from pathlib import Path
# Ensure project root is on sys.path
_PROJECT_ROOT = str(Path(__file__).resolve().parent.parent)
if _PROJECT_ROOT not in sys.path:
sys.path.insert(0, _PROJECT_ROOT)
def main() -> None:
if len(sys.argv) < 2 or sys.argv[1] in ("-h", "--help"):
print(
"Usage: python scripts/sysid.py <command> [options]\n"
"\n"
"Commands:\n"
" capture Record real robot trajectory under PRBS excitation\n"
" optimize Run CMA-ES parameter optimization\n"
" visualize Plot real vs simulated trajectories\n"
" export Write tuned URDF + robot.yaml files\n"
"\n"
"Run 'python scripts/sysid.py <command> --help' for command-specific options."
)
sys.exit(0)
command = sys.argv[1]
# Remove the subcommand from argv so the module's argparse works normally
sys.argv = [f"sysid {command}"] + sys.argv[2:]
if command == "capture":
from src.sysid.capture import main as cmd_main
elif command == "optimize":
from src.sysid.optimize import main as cmd_main
elif command == "visualize":
from src.sysid.visualize import main as cmd_main
elif command == "export":
from src.sysid.export import main as cmd_main
else:
print(f"Unknown command: {command}")
print("Available commands: capture, optimize, visualize, export")
sys.exit(1)
cmd_main()
if __name__ == "__main__":
main()

128
scripts/train.py Normal file
View File

@@ -0,0 +1,128 @@
import os
import pathlib
import sys
# Ensure project root is on sys.path so `src.*` imports work
# regardless of which directory the script is invoked from.
_PROJECT_ROOT = str(pathlib.Path(__file__).resolve().parent.parent)
if _PROJECT_ROOT not in sys.path:
sys.path.insert(0, _PROJECT_ROOT)
# Headless rendering: use OSMesa on Linux servers (must be set before mujoco import).
# Always default on Linux — Docker containers may have DISPLAY set without a real X server.
if sys.platform == "linux":
os.environ.setdefault("MUJOCO_GL", "osmesa")
import hydra
import hydra.utils as hydra_utils
import structlog
from clearml import Task
from hydra.core.hydra_config import HydraConfig
from omegaconf import DictConfig, OmegaConf
from src.core.env import BaseEnv
from src.core.registry import build_env
from src.core.runner import BaseRunner
from src.training.trainer import Trainer, TrainerConfig
logger = structlog.get_logger()
# ── runner registry ───────────────────────────────────────────────────
# Maps Hydra config-group name → (RunnerClass, ConfigClass)
# Imports are deferred so JAX is only loaded when runner=mjx is chosen.
RUNNER_REGISTRY: dict[str, tuple[str, str, str]] = {
"mujoco": ("src.runners.mujoco", "MuJoCoRunner", "MuJoCoRunnerConfig"),
"mujoco_single": ("src.runners.mujoco", "MuJoCoRunner", "MuJoCoRunnerConfig"),
"mjx": ("src.runners.mjx", "MJXRunner", "MJXRunnerConfig"),
"serial": ("src.runners.serial", "SerialRunner", "SerialRunnerConfig"),
}
def _build_runner(runner_name: str, env: BaseEnv, cfg: DictConfig) -> BaseRunner:
"""Instantiate the right runner from the Hydra config-group name."""
if runner_name not in RUNNER_REGISTRY:
raise ValueError(
f"Unknown runner '{runner_name}'. Registered: {list(RUNNER_REGISTRY)}"
)
module_path, cls_name, cfg_cls_name = RUNNER_REGISTRY[runner_name]
import importlib
mod = importlib.import_module(module_path)
runner_cls = getattr(mod, cls_name)
config_cls = getattr(mod, cfg_cls_name)
runner_config = config_cls(**OmegaConf.to_container(cfg.runner, resolve=True))
return runner_cls(env=env, config=runner_config)
def _init_clearml(choices: dict[str, str], remote: bool = False) -> Task:
"""Initialize ClearML task with project structure and tags."""
Task.ignore_requirements("torch")
env_name = choices.get("env", "cartpole")
runner_name = choices.get("runner", "mujoco")
training_name = choices.get("training", "ppo")
project = "RL-Framework"
task_name = f"{env_name}-{runner_name}-{training_name}"
tags = [env_name, runner_name, training_name]
task = Task.init(project_name=project, task_name=task_name, tags=tags)
task.set_base_docker(
"registry.kube.optimize/worker-image:latest",
docker_setup_bash_script=(
"apt-get update && apt-get install -y --no-install-recommends "
"libosmesa6-dev libgl1-mesa-glx libglfw3 && rm -rf /var/lib/apt/lists/* "
"&& pip install 'jax[cuda12]' mujoco-mjx PyOpenGL PyOpenGL-accelerate"
),
docker_arguments=[
"-e", "MUJOCO_GL=osmesa",
],
)
req_file = pathlib.Path(hydra_utils.get_original_cwd()) / "requirements.txt"
task.set_packages(str(req_file))
# Execute remotely if requested and running locally
if remote and task.running_locally():
logger.info("executing_task_remotely", queue="gpu-queue")
task.execute_remotely(queue_name="gpu-queue", exit_process=True)
return task
@hydra.main(version_base=None, config_path="../configs", config_name="config")
def main(cfg: DictConfig) -> None:
choices = HydraConfig.get().runtime.choices
# ClearML init — must happen before heavy work so remote execution
# can take over early. The remote worker re-runs the full script;
# execute_remotely() is a no-op on the worker side.
training_dict = OmegaConf.to_container(cfg.training, resolve=True)
remote = training_dict.pop("remote", False)
training_dict.pop("hpo", None) # HPO range metadata — not a TrainerConfig field
task = _init_clearml(choices, remote=remote)
# Drop keys not recognised by TrainerConfig (e.g. ClearML-injected
# resume_from_task_id or any future additions)
import dataclasses as _dc
_valid_keys = {f.name for f in _dc.fields(TrainerConfig)}
training_dict = {k: v for k, v in training_dict.items() if k in _valid_keys}
env_name = choices.get("env", "cartpole")
env = build_env(env_name, cfg)
runner = _build_runner(choices.get("runner", "mujoco"), env, cfg)
trainer_config = TrainerConfig(**training_dict)
trainer = Trainer(runner=runner, config=trainer_config)
try:
trainer.train()
finally:
trainer.close()
task.close()
if __name__ == "__main__":
main()

254
scripts/viz.py Normal file
View File

@@ -0,0 +1,254 @@
"""Interactive visualization — control any env with keyboard in MuJoCo viewer.
Usage (simulation):
mjpython scripts/viz.py env=rotary_cartpole
mjpython scripts/viz.py env=cartpole +com=true
Usage (real hardware — digital twin):
mjpython scripts/viz.py env=rotary_cartpole runner=serial
mjpython scripts/viz.py env=rotary_cartpole runner=serial runner.port=/dev/ttyUSB0
Controls:
Left/Right arrows — apply torque to first actuator
R — reset environment
Esc / close window — quit
"""
import math
import sys
import time
from pathlib import Path
# Ensure project root is on sys.path
_PROJECT_ROOT = str(Path(__file__).resolve().parent.parent)
if _PROJECT_ROOT not in sys.path:
sys.path.insert(0, _PROJECT_ROOT)
import hydra
import mujoco
import mujoco.viewer
import numpy as np
import structlog
import torch
from hydra.core.hydra_config import HydraConfig
from omegaconf import DictConfig, OmegaConf
from src.core.registry import build_env
from src.runners.mujoco import MuJoCoRunner, MuJoCoRunnerConfig
logger = structlog.get_logger()
# ── keyboard state ───────────────────────────────────────────────────
_action_val = [0.0] # mutable container shared with callback
_action_time = [0.0] # timestamp of last key press
_reset_flag = [False]
_ACTION_HOLD_S = 0.25 # seconds the action stays active after last key event
def _key_callback(keycode: int) -> None:
"""Called by MuJoCo on key press & repeat (not release)."""
if keycode == 263: # GLFW_KEY_LEFT
_action_val[0] = -1.0
_action_time[0] = time.time()
elif keycode == 262: # GLFW_KEY_RIGHT
_action_val[0] = 1.0
_action_time[0] = time.time()
elif keycode == 82: # GLFW_KEY_R
_reset_flag[0] = True
def _add_action_arrow(viewer, model, data, action_val: float) -> None:
"""Draw an arrow on the motor joint showing applied torque direction."""
if abs(action_val) < 0.01 or model.nu == 0:
return
# Get the body that the first actuator's joint belongs to
jnt_id = model.actuator_trnid[0, 0]
body_id = model.jnt_bodyid[jnt_id]
# Arrow origin: body position
pos = data.xpos[body_id].copy()
pos[2] += 0.02 # lift slightly above the body
# Arrow direction: along joint axis in world frame, scaled by action
axis = data.xmat[body_id].reshape(3, 3) @ model.jnt_axis[jnt_id]
arrow_len = 0.08 * action_val
direction = axis * np.sign(arrow_len)
# Build rotation matrix: arrow rendered along local z-axis
z = direction / (np.linalg.norm(direction) + 1e-8)
up = np.array([0, 0, 1]) if abs(z[2]) < 0.99 else np.array([0, 1, 0])
x = np.cross(up, z)
x /= np.linalg.norm(x) + 1e-8
y = np.cross(z, x)
mat = np.column_stack([x, y, z]).flatten()
# Color: green = positive, red = negative
rgba = np.array(
[0.2, 0.8, 0.2, 0.8] if action_val > 0 else [0.8, 0.2, 0.2, 0.8],
dtype=np.float32,
)
geom = viewer.user_scn.geoms[viewer.user_scn.ngeom]
mujoco.mjv_initGeom(
geom,
type=mujoco.mjtGeom.mjGEOM_ARROW,
size=np.array([0.008, 0.008, abs(arrow_len)]),
pos=pos,
mat=mat,
rgba=rgba,
)
viewer.user_scn.ngeom += 1
@hydra.main(version_base=None, config_path="../configs", config_name="config")
def main(cfg: DictConfig) -> None:
choices = HydraConfig.get().runtime.choices
env_name = choices.get("env", "cartpole")
runner_name = choices.get("runner", "mujoco")
if runner_name == "serial":
_main_serial(cfg, env_name)
else:
_main_sim(cfg, env_name)
def _main_sim(cfg: DictConfig, env_name: str) -> None:
"""Simulation visualization — step MuJoCo physics with keyboard control."""
# Build env + runner (single env for viz)
env = build_env(env_name, cfg)
runner_dict = OmegaConf.to_container(cfg.runner, resolve=True)
runner_dict["num_envs"] = 1
runner = MuJoCoRunner(env=env, config=MuJoCoRunnerConfig(**runner_dict))
model = runner._model
data = runner._data[0]
# Control period
dt_ctrl = runner.config.dt * runner.config.substeps
# Launch viewer
with mujoco.viewer.launch_passive(model, data, key_callback=_key_callback) as viewer:
# Show CoM / inertia if requested via Hydra override: viz.py +com=true
show_com = cfg.get("com", False)
if show_com:
viewer.opt.flags[mujoco.mjtVisFlag.mjVIS_COM] = True
viewer.opt.flags[mujoco.mjtVisFlag.mjVIS_INERTIA] = True
obs, _ = runner.reset()
step = 0
logger.info("viewer_started", env=env_name,
controls="Left/Right arrows = torque, R = reset")
while viewer.is_running():
# Read action from callback (expires after _ACTION_HOLD_S)
if time.time() - _action_time[0] < _ACTION_HOLD_S:
action_val = _action_val[0]
else:
action_val = 0.0
# Reset on R press
if _reset_flag[0]:
_reset_flag[0] = False
obs, _ = runner.reset()
step = 0
logger.info("reset")
# Step through runner
action = torch.tensor([[action_val]])
obs, reward, terminated, truncated, info = runner.step(action)
# Sync viewer with action arrow overlay
mujoco.mj_forward(model, data)
viewer.user_scn.ngeom = 0 # clear previous frame's overlays
_add_action_arrow(viewer, model, data, action_val)
viewer.sync()
# Print state
if step % 25 == 0:
joints = {model.jnt(i).name: round(math.degrees(data.qpos[i]), 1)
for i in range(model.njnt)}
logger.debug("step", n=step, reward=round(reward.item(), 3),
action=round(action_val, 1), **joints)
# Real-time pacing
time.sleep(dt_ctrl)
step += 1
runner.close()
def _main_serial(cfg: DictConfig, env_name: str) -> None:
"""Digital-twin visualization — mirror real hardware in MuJoCo viewer.
The MuJoCo model is loaded for rendering only. Joint positions are
read from the ESP32 over serial and applied to the model each frame.
Keyboard arrows send motor commands to the real robot.
"""
from src.runners.serial import SerialRunner, SerialRunnerConfig
env = build_env(env_name, cfg)
runner_dict = OmegaConf.to_container(cfg.runner, resolve=True)
serial_runner = SerialRunner(
env=env, config=SerialRunnerConfig(**runner_dict)
)
# Load MuJoCo model for visualisation (same URDF the sim uses).
serial_runner._ensure_viz_model()
model = serial_runner._viz_model
data = serial_runner._viz_data
with mujoco.viewer.launch_passive(
model, data, key_callback=_key_callback
) as viewer:
# Show CoM / inertia if requested.
show_com = cfg.get("com", False)
if show_com:
viewer.opt.flags[mujoco.mjtVisFlag.mjVIS_COM] = True
viewer.opt.flags[mujoco.mjtVisFlag.mjVIS_INERTIA] = True
logger.info(
"viewer_started",
env=env_name,
mode="serial (digital twin)",
port=serial_runner.config.port,
controls="Left/Right arrows = motor command, R = reset",
)
while viewer.is_running():
# Read action from keyboard callback.
if time.time() - _action_time[0] < _ACTION_HOLD_S:
action_val = _action_val[0]
else:
action_val = 0.0
# Reset on R press.
if _reset_flag[0]:
_reset_flag[0] = False
serial_runner._send("M0")
serial_runner._drive_to_center()
serial_runner._wait_for_pendulum_still()
logger.info("reset (drive-to-center + settle)")
# Send motor command to real hardware.
motor_speed = int(np.clip(action_val, -1.0, 1.0) * 255)
serial_runner._send(f"M{motor_speed}")
# Sync MuJoCo model with real sensor data.
serial_runner._sync_viz()
# Render overlays and sync viewer.
viewer.user_scn.ngeom = 0
_add_action_arrow(viewer, model, data, action_val)
viewer.sync()
# Real-time pacing (~50 Hz, matches serial dt).
time.sleep(serial_runner.config.dt)
serial_runner.close()
if __name__ == "__main__":
main()

View File

@@ -3,31 +3,21 @@ import dataclasses
from typing import TypeVar, Generic, Any
from gymnasium import spaces
import torch
import pathlib
from src.core.robot import RobotConfig, load_robot_config
T = TypeVar("T")
@dataclasses.dataclass
class ActuatorConfig:
"""Actuator definition — maps a joint to a motor with gear ratio and control limits.
Kept in the env config (not runner config) because actuators define what the robot
can do, which determines action space — a task-level concept.
This mirrors Isaac Lab's pattern of separating actuator config from the robot file."""
joint: str = ""
gear: float = 1.0
ctrl_range: tuple[float, float] = (-1.0, 1.0)
@dataclasses.dataclass
class BaseEnvConfig:
max_steps: int = 1000
model_path: pathlib.Path | None = None
actuators: list[ActuatorConfig] = dataclasses.field(default_factory=list)
robot_path: str = "" # directory containing robot.yaml + URDF
class BaseEnv(abc.ABC, Generic[T]):
def __init__(self, config: BaseEnvConfig):
self.config = config
self.robot: RobotConfig = load_robot_config(config.robot_path)
@property
@abc.abstractmethod
@@ -57,3 +47,11 @@ class BaseEnv(abc.ABC, Generic[T]):
def compute_truncations(self, step_counts: torch.Tensor) -> torch.Tensor:
return step_counts >= self.config.max_steps
def is_reset_ready(self, qpos: torch.Tensor, qvel: torch.Tensor) -> bool:
"""Check whether the physical robot has settled enough to start an episode.
Used by the SerialRunner after driving to center and waiting for the
pendulum. Default: always ready (sim doesn't need settling).
"""
return True

24
src/core/registry.py Normal file
View File

@@ -0,0 +1,24 @@
"""Shared env registry and builder used by train.py and viz.py."""
from omegaconf import DictConfig, OmegaConf
from src.core.env import BaseEnv, BaseEnvConfig
from src.envs.cartpole import CartPoleEnv, CartPoleConfig
from src.envs.rotary_cartpole import RotaryCartPoleEnv, RotaryCartPoleConfig
# Maps Hydra config-group name → (EnvClass, ConfigClass)
ENV_REGISTRY: dict[str, tuple[type[BaseEnv], type[BaseEnvConfig]]] = {
"cartpole": (CartPoleEnv, CartPoleConfig),
"rotary_cartpole": (RotaryCartPoleEnv, RotaryCartPoleConfig),
}
def build_env(env_name: str, cfg: DictConfig) -> BaseEnv:
"""Instantiate the right env + config from the Hydra config-group name."""
if env_name not in ENV_REGISTRY:
raise ValueError(f"Unknown env '{env_name}'. Registered: {list(ENV_REGISTRY)}")
env_cls, config_cls = ENV_REGISTRY[env_name]
env_dict = OmegaConf.to_container(cfg.env, resolve=True)
env_dict.pop("hpo", None) # HPO range metadata — not an env config field
return env_cls(config_cls(**env_dict))

197
src/core/robot.py Normal file
View File

@@ -0,0 +1,197 @@
"""Robot hardware configuration — loaded from robot.yaml next to the URDF.
Separates robot hardware (actuators, joint tuning) from task config
(rewards, episode length) and from the URDF (clean CAD export).
Usage:
robot = load_robot_config(Path("assets/rotary_cartpole"))
# robot.urdf_path → resolved absolute path to the URDF
# robot.actuators → list of ActuatorConfig
# robot.joints → dict of per-joint overrides
"""
import dataclasses
import math
from pathlib import Path
import structlog
import yaml
log = structlog.get_logger()
def _as_pair(val) -> tuple[float, float]:
"""Convert scalar or [pos, neg] list to (pos, neg) tuple."""
if isinstance(val, (list, tuple)) and len(val) == 2:
return (float(val[0]), float(val[1]))
return (float(val), float(val))
@dataclasses.dataclass
class ActuatorConfig:
"""Motor/actuator attached to a joint.
Asymmetric fields use (positive_dir, negative_dir) tuples.
A scalar in YAML is expanded to a symmetric pair.
type:
motor — direct torque control (ctrl = normalised torque)
position — PD position servo (ctrl = target angle, needs kp)
velocity — P velocity servo (ctrl = target velocity, needs kp)
"""
joint: str = ""
type: str = "motor"
gear: tuple[float, float] = (1.0, 1.0) # torque constant (pos, neg)
ctrl_range: tuple[float, float] = (-1.0, 1.0) # (lower, upper) control bounds
deadzone: tuple[float, float] = (0.0, 0.0) # min |ctrl| for torque (pos, neg)
damping: tuple[float, float] = (0.0, 0.0) # viscous damping (pos, neg)
frictionloss: tuple[float, float] = (0.0, 0.0) # Coulomb friction (pos, neg)
kp: float = 0.0 # proportional gain (position / velocity actuators)
kv: float = 0.0 # derivative gain (position actuators)
filter_tau: float = 0.0 # 1st-order filter time constant (s); 0 = no filter
viscous_quadratic: float = 0.0 # velocity² drag coefficient
back_emf_gain: float = 0.0 # back-EMF torque reduction
@property
def gear_avg(self) -> float:
return (self.gear[0] + self.gear[1]) / 2.0
@property
def has_motor_model(self) -> bool:
"""True if this actuator needs the runtime motor model."""
return (
self.gear[0] != self.gear[1]
or self.deadzone != (0.0, 0.0)
or self.damping != (0.0, 0.0)
or self.frictionloss != (0.0, 0.0)
or self.viscous_quadratic > 0
or self.back_emf_gain > 0
)
def transform_ctrl(self, ctrl: float) -> float:
"""Apply asymmetric deadzone and gear compensation to a scalar ctrl."""
# Deadzone
dz_pos, dz_neg = self.deadzone
if ctrl >= 0 and ctrl < dz_pos:
return 0.0
if ctrl < 0 and ctrl > -dz_neg:
return 0.0
# Gear compensation: rescale so ctrl × gear_avg ≈ action × gear_dir
gear_avg = self.gear_avg
if gear_avg > 1e-8:
gear_dir = self.gear[0] if ctrl >= 0 else self.gear[1]
ctrl *= gear_dir / gear_avg
return ctrl
def compute_motor_force(self, vel: float, ctrl: float) -> float:
"""Asymmetric friction, damping, drag, back-EMF → applied torque."""
torque = 0.0
# Coulomb friction (direction-dependent)
fl_pos, fl_neg = self.frictionloss
if abs(vel) > 1e-6:
fl = fl_pos if vel > 0 else fl_neg
torque -= math.copysign(fl, vel)
# Viscous damping (direction-dependent)
damp = self.damping[0] if vel > 0 else self.damping[1]
torque -= damp * vel
# Quadratic velocity drag
if self.viscous_quadratic > 0:
torque -= self.viscous_quadratic * vel * abs(vel)
# Back-EMF torque reduction
if self.back_emf_gain > 0 and abs(ctrl) > 1e-6:
torque -= self.back_emf_gain * vel * math.copysign(1.0, ctrl)
return max(-10.0, min(10.0, torque))
def transform_action(self, action):
"""Vectorised deadzone + gear compensation for a torch batch."""
dz_pos, dz_neg = self.deadzone
if dz_pos > 0 or dz_neg > 0:
action = action.clone()
pos_dead = (action >= 0) & (action < dz_pos)
neg_dead = (action < 0) & (action > -dz_neg)
action[pos_dead | neg_dead] = 0.0
gear_avg = self.gear_avg
if gear_avg > 1e-8 and self.gear[0] != self.gear[1]:
action = action.clone() if dz_pos == 0 and dz_neg == 0 else action
pos = action >= 0
action[pos] *= self.gear[0] / gear_avg
action[~pos] *= self.gear[1] / gear_avg
return action
@dataclasses.dataclass
class JointConfig:
"""Per-joint overrides applied on top of the URDF values."""
damping: float | None = None
armature: float | None = None # reflected rotor inertia (kg·m²)
frictionloss: float | None = None # Coulomb/dry friction torque (N·m)
@dataclasses.dataclass
class RobotConfig:
"""Complete robot hardware description."""
urdf_path: Path = dataclasses.field(default_factory=lambda: Path())
actuators: list[ActuatorConfig] = dataclasses.field(default_factory=list)
joints: dict[str, JointConfig] = dataclasses.field(default_factory=dict)
def load_robot_config(robot_dir: str | Path) -> RobotConfig:
"""Load robot.yaml from a directory and resolve the URDF path.
Expected layout:
robot_dir/
robot.yaml ← hardware config
some_robot.urdf ← CAD export
meshes/ ← optional mesh files
"""
robot_dir = Path(robot_dir).resolve()
yaml_path = robot_dir / "robot.yaml"
if not yaml_path.exists():
raise FileNotFoundError(f"Robot config not found: {yaml_path}")
raw = yaml.safe_load(yaml_path.read_text())
# Resolve URDF path relative to robot.yaml directory
urdf_filename = raw.get("urdf", "")
if not urdf_filename:
raise ValueError(f"robot.yaml must specify 'urdf' filename: {yaml_path}")
urdf_path = robot_dir / urdf_filename
if not urdf_path.exists():
raise FileNotFoundError(f"URDF not found: {urdf_path}")
# Parse actuators
actuators = []
for a in raw.get("actuators", []):
if "ctrl_range" in a:
a["ctrl_range"] = tuple(a["ctrl_range"])
for key in ("gear", "deadzone", "damping", "frictionloss"):
if key in a:
a[key] = _as_pair(a[key])
actuators.append(ActuatorConfig(**a))
# Parse joint overrides
joints = {}
for name, jcfg in raw.get("joints", {}).items():
joints[name] = JointConfig(**jcfg)
config = RobotConfig(
urdf_path=urdf_path,
actuators=actuators,
joints=joints,
)
log.debug("robot_config_loaded", robot_dir=str(robot_dir),
urdf=urdf_filename, num_actuators=len(actuators),
joint_overrides=list(joints.keys()))
return config

View File

@@ -1,9 +1,12 @@
import dataclasses
import abc
from typing import Any, Generic, TypeVar
from src.core.env import BaseEnv
import numpy as np
import torch
from src.core.env import BaseEnv
T = TypeVar("T")
@@ -17,6 +20,12 @@ class BaseRunner(abc.ABC, Generic[T]):
self.env = env
self.config = config
# Resolve "auto" device before anything uses it
if getattr(self.config, "device", None) == "auto":
self.config.device = "cuda" if torch.cuda.is_available() else "cpu"
self._last_actions: torch.Tensor | None = None
self._sim_initialize(config)
self.observation_space = self.env.observation_space
@@ -49,9 +58,10 @@ class BaseRunner(abc.ABC, Generic[T]):
def _sim_reset(self, env_ids: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor]:
...
@abc.abstractmethod
def _sim_close(self) -> None:
...
"""Release simulator resources. Override for extra cleanup."""
if hasattr(self, "_offscreen_renderer") and self._offscreen_renderer is not None:
self._offscreen_renderer.close()
def reset(self) -> tuple[torch.Tensor, dict[str, Any]]:
all_ids = torch.arange(self.num_envs, device=self.device)
@@ -63,6 +73,7 @@ class BaseRunner(abc.ABC, Generic[T]):
return obs, {}
def step(self, actions: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor, dict[str, Any]]:
self._last_actions = actions
qpos, qvel = self._sim_step(actions)
self.step_counts += 1
@@ -90,8 +101,50 @@ class BaseRunner(abc.ABC, Generic[T]):
# skrl expects (num_envs, 1) for rewards/terminated/truncated
return obs, rewards.unsqueeze(-1), terminated.unsqueeze(-1), truncated.unsqueeze(-1), info
def render(self, env_idx: int = 0, mode: str = "human") -> torch.Tensor | None:
raise NotImplementedError("Render method not implemented for this runner.")
def _render_frame(self, env_idx: int = 0) -> np.ndarray:
"""Return a raw RGB frame. Override in subclass."""
raise NotImplementedError("Render not implemented for this runner.")
def render(self, env_idx: int = 0) -> np.ndarray:
"""Render frame with action overlay."""
frame = self._render_frame(env_idx)
if self._last_actions is not None:
ctrl = float(self._last_actions[env_idx, 0].clamp(-1.0, 1.0))
_draw_action_overlay(frame, ctrl)
return frame
def close(self) -> None:
self._sim_close()
def _draw_action_overlay(frame: np.ndarray, action: float) -> None:
"""Draw an action bar on a rendered frame (no OpenCV needed).
Bar is centered horizontally: green to the right (+), red to the left (-).
"""
h, w = frame.shape[:2]
bar_y = h - 30
bar_h = 16
bar_x_center = w // 2
bar_half_w = w // 4
bar_x_left = bar_x_center - bar_half_w
bar_x_right = bar_x_center + bar_half_w
# Background (dark grey)
frame[bar_y:bar_y + bar_h, bar_x_left:bar_x_right] = [40, 40, 40]
# Filled bar
fill_len = int(abs(action) * bar_half_w)
if action > 0:
color = [60, 200, 60] # green
x0 = bar_x_center
x1 = min(bar_x_center + fill_len, bar_x_right)
else:
color = [200, 60, 60] # red
x1 = bar_x_center
x0 = max(bar_x_center - fill_len, bar_x_left)
frame[bar_y:bar_y + bar_h, x0:x1] = color
# Center tick mark (white)
frame[bar_y:bar_y + bar_h, bar_x_center - 1:bar_x_center + 1] = [255, 255, 255]

129
src/envs/rotary_cartpole.py Normal file
View File

@@ -0,0 +1,129 @@
import dataclasses
import math
import torch
from src.core.env import BaseEnv, BaseEnvConfig
from gymnasium import spaces
@dataclasses.dataclass
class RotaryCartPoleState:
motor_angle: torch.Tensor # (num_envs,)
motor_vel: torch.Tensor # (num_envs,)
pendulum_angle: torch.Tensor # (num_envs,)
pendulum_vel: torch.Tensor # (num_envs,)
@dataclasses.dataclass
class RotaryCartPoleConfig(BaseEnvConfig):
"""Rotary inverted pendulum (Furuta pendulum) task config.
The motor rotates the arm horizontally; the pendulum swings freely
at the arm tip. Goal: swing the pendulum up and balance it upright.
"""
# Reward shaping
reward_upright_scale: float = 1.0 # cos(pendulum) when upright = +1
motor_vel_penalty: float = 0.01 # penalise high motor angular velocity
motor_angle_penalty: float = 0.05 # penalise deviation from centre
action_penalty: float = 0.05 # penalise large actions (energy cost)
# ── Software safety limit (env-level, on top of URDF + hardware) ──
motor_angle_limit_deg: float = 90.0 # terminate episode if exceeded
class RotaryCartPoleEnv(BaseEnv[RotaryCartPoleConfig]):
"""Furuta pendulum / rotary inverted pendulum environment.
Kinematic chain: base_link ─(motor_joint, z)─► arm ─(pendulum_joint, y)─► pendulum
Observations (6):
[sin(motor), cos(motor), sin(pendulum), cos(pendulum), motor_vel, pendulum_vel]
Using sin/cos avoids discontinuities at ±π for continuous joints.
Actions (1):
Torque applied to the motor_joint (normalised to [-1, 1]).
"""
def __init__(self, config: RotaryCartPoleConfig):
super().__init__(config)
# ── Spaces ───────────────────────────────────────────────────
@property
def observation_space(self) -> spaces.Space:
return spaces.Box(low=-torch.inf, high=torch.inf, shape=(6,))
@property
def action_space(self) -> spaces.Space:
return spaces.Box(low=-1.0, high=1.0, shape=(1,))
# ── State building ───────────────────────────────────────────
def build_state(self, qpos: torch.Tensor, qvel: torch.Tensor) -> RotaryCartPoleState:
return RotaryCartPoleState(
motor_angle=qpos[:, 0],
motor_vel=qvel[:, 0],
pendulum_angle=qpos[:, 1],
pendulum_vel=qvel[:, 1],
)
# ── Observations ─────────────────────────────────────────────
def compute_observations(self, state: RotaryCartPoleState) -> torch.Tensor:
obs = [
torch.sin(state.motor_angle),
torch.cos(state.motor_angle),
torch.sin(state.pendulum_angle),
torch.cos(state.pendulum_angle),
state.motor_vel,
state.pendulum_vel,
]
return torch.stack(obs, dim=-1)
# ── Rewards ──────────────────────────────────────────────────
def compute_rewards(self, state: RotaryCartPoleState, actions: torch.Tensor) -> torch.Tensor:
# Upright reward: -cos(θ) ∈ [-1, +1]
reward = -torch.cos(state.pendulum_angle) * self.config.reward_upright_scale
# Penalise fast motor spinning (discourages violent oscillation)
reward = reward - self.config.motor_vel_penalty * state.motor_vel.pow(2)
# Penalise motor deviation from centre (keep arm near zero)
reward = reward - self.config.motor_angle_penalty * state.motor_angle.pow(2)
# Penalise large actions (energy efficiency / smoother control)
reward = reward - self.config.action_penalty * actions.squeeze(-1).pow(2)
# Penalty for exceeding motor angle limit (episode also terminates)
limit_rad = math.radians(self.config.motor_angle_limit_deg)
exceeded = state.motor_angle.abs() >= limit_rad
reward = torch.where(exceeded, torch.tensor(-1000.0, device=reward.device), reward)
return reward
# ── Terminations ─────────────────────────────────────────────
def compute_terminations(self, state: RotaryCartPoleState) -> torch.Tensor:
# Software safety: terminate if motor angle exceeds limit.
limit_rad = math.radians(self.config.motor_angle_limit_deg)
exceeded = state.motor_angle.abs() >= limit_rad
return exceeded
# ── Reset readiness (for SerialRunner) ───────────────────────
def is_reset_ready(self, qpos: torch.Tensor, qvel: torch.Tensor) -> bool:
"""Pendulum must be hanging still and motor near center."""
motor_angle = float(qpos[0, 0])
pend_angle = float(qpos[0, 1])
motor_vel = float(qvel[0, 0])
pend_vel = float(qvel[0, 1])
# Pendulum near hanging-down (angle ~0) and slow
angle_ok = abs(pend_angle) < math.radians(2.0)
vel_ok = abs(pend_vel) < math.radians(5.0)
# Motor near center and slow
motor_ok = abs(motor_angle) < math.radians(5.0)
motor_vel_ok = abs(motor_vel) < math.radians(10.0)
return angle_ok and vel_ok and motor_ok and motor_vel_ok

1
src/hpo/__init__.py Normal file
View File

@@ -0,0 +1 @@
"""Hyperparameter optimization — SMAC3 + ClearML Successive Halving."""

684
src/hpo/smac3.py Normal file
View File

@@ -0,0 +1,684 @@
# Requires: pip install smac==2.0.0 ConfigSpace==0.4.20
import contextlib
import time
from collections.abc import Sequence
from functools import wraps
from typing import Any
from clearml import Task
from clearml.automation.optimization import Objective, SearchStrategy
from clearml.automation.parameters import Parameter
from clearml.backend_interface.session import SendError
from ConfigSpace import (
CategoricalHyperparameter,
ConfigurationSpace,
UniformFloatHyperparameter,
UniformIntegerHyperparameter,
)
from smac import MultiFidelityFacade
from smac.intensifier.successive_halving import SuccessiveHalving
from smac.runhistory.dataclasses import TrialInfo, TrialValue
from smac.scenario import Scenario
def retry_on_error(max_retries=5, initial_delay=2.0, backoff=2.0, exceptions=(Exception,)):
"""Decorator to retry a function on exception with exponential backoff."""
def decorator(func):
@wraps(func)
def wrapper(*args, **kwargs):
delay = initial_delay
for attempt in range(max_retries):
try:
return func(*args, **kwargs)
except exceptions:
if attempt == max_retries - 1:
return None # Return None instead of raising
time.sleep(delay)
delay *= backoff
return None
return wrapper
return decorator
def _encode_param_name(name: str) -> str:
"""Encode parameter name for ConfigSpace (replace / with __SLASH__)"""
return name.replace("/", "__SLASH__")
def _decode_param_name(name: str) -> str:
"""Decode parameter name back to original (replace __SLASH__ with /)"""
return name.replace("__SLASH__", "/")
def _convert_param_to_cs(param: Parameter):
"""
Convert a ClearML Parameter into a ConfigSpace hyperparameter,
adapted to ConfigSpace>=1.x (no more 'q' argument).
"""
# Encode the name to avoid ConfigSpace issues with special chars like '/'
name = _encode_param_name(param.name)
# Categorical / discrete list
if hasattr(param, "values"):
return CategoricalHyperparameter(name=name, choices=list(param.values))
# Numeric range (float or int)
if hasattr(param, "min_value") and hasattr(param, "max_value"):
min_val = param.min_value
max_val = param.max_value
# Check if this should be treated as integer
if isinstance(min_val, int) and isinstance(max_val, int):
log = getattr(param, "log_scale", False)
# Check for step_size for quantization
if hasattr(param, "step_size"):
sv = int(param.step_size)
if sv != 1:
# emulate quantization by explicit list of values
choices = list(range(min_val, max_val + 1, sv))
return CategoricalHyperparameter(name=name, choices=choices)
# Simple uniform integer range
return UniformIntegerHyperparameter(name=name, lower=min_val, upper=max_val, log=log)
else:
# Treat as float
lower, upper = float(min_val), float(max_val)
log = getattr(param, "log_scale", False)
return UniformFloatHyperparameter(name=name, lower=lower, upper=upper, log=log)
raise ValueError(f"Unsupported Parameter type: {type(param)}")
class OptimizerSMAC(SearchStrategy):
"""
SMAC3-based hyperparameter optimizer, matching OptimizerBOHB interface.
"""
def __init__(
self,
base_task_id: str,
hyper_parameters: Sequence[Parameter],
objective_metric: Objective,
execution_queue: str,
num_concurrent_workers: int,
min_iteration_per_job: int,
max_iteration_per_job: int,
total_max_jobs: int,
pool_period_min: float = 2.0,
time_limit_per_job: float | None = None,
compute_time_limit: float | None = None,
**smac_kwargs: Any,
):
# Initialize base SearchStrategy
super().__init__(
base_task_id=base_task_id,
hyper_parameters=hyper_parameters,
objective_metric=objective_metric,
execution_queue=execution_queue,
num_concurrent_workers=num_concurrent_workers,
pool_period_min=pool_period_min,
time_limit_per_job=time_limit_per_job,
compute_time_limit=compute_time_limit,
min_iteration_per_job=min_iteration_per_job,
max_iteration_per_job=max_iteration_per_job,
total_max_jobs=total_max_jobs,
)
# Expose for internal use (access private attributes from base class)
self.execution_queue = self._execution_queue
self.min_iterations = min_iteration_per_job
self.max_iterations = max_iteration_per_job
self.num_concurrent_workers = self._num_concurrent_workers # Fix: access private attribute
# Objective details
# Handle both single objective (string) and multi-objective (list) cases
if isinstance(self._objective_metric.title, list):
self.metric_title = self._objective_metric.title[0] # Use first objective
else:
self.metric_title = self._objective_metric.title
if isinstance(self._objective_metric.series, list):
self.metric_series = self._objective_metric.series[0] # Use first series
else:
self.metric_series = self._objective_metric.series
# ClearML Objective stores sign as a list, e.g., ['max'] or ['min']
objective_sign = getattr(self._objective_metric, "sign", None) or getattr(self._objective_metric, "order", None)
# Handle list case - extract first element
if isinstance(objective_sign, list):
objective_sign = objective_sign[0] if objective_sign else "max"
# Default to max if nothing found
if objective_sign is None:
objective_sign = "max"
self.maximize_metric = str(objective_sign).lower() in ("max", "max_global")
# Build ConfigSpace
self.config_space = ConfigurationSpace(seed=42)
for p in self._hyper_parameters: # Access private attribute correctly
cs_hp = _convert_param_to_cs(p)
self.config_space.add(cs_hp)
# Configure SMAC Scenario
scenario = Scenario(
configspace=self.config_space,
n_trials=self.total_max_jobs,
min_budget=float(self.min_iterations),
max_budget=float(self.max_iterations),
walltime_limit=(self.compute_time_limit * 60) if self.compute_time_limit else None,
deterministic=True,
)
# Configurable budget parameter name
# Default: Hydra/training.total_timesteps (RL-Framework convention)
self.budget_param_name = smac_kwargs.pop(
"budget_param_name", "Hydra/training.total_timesteps"
)
# Pop our custom kwargs BEFORE passing smac_kwargs to SuccessiveHalving
self.max_consecutive_failures = int(
smac_kwargs.pop("max_consecutive_failures", 3)
)
self._consecutive_failures = 0
# build the Successive Halving intensifier (NOT Hyperband!)
# Hyperband runs multiple brackets with different starting budgets - wasteful
# Successive Halving: ALL configs start at min_budget, only best get promoted
# eta controls the reduction factor (default 3 means keep top 1/3 each round)
# eta can be overridden via smac_kwargs from HyperParameterOptimizer
eta = smac_kwargs.pop("eta", 3) # Default to 3 if not specified
intensifier = SuccessiveHalving(scenario=scenario, eta=eta, **smac_kwargs)
# now pass that intensifier instance into the facade
self.smac = MultiFidelityFacade(
scenario=scenario,
target_function=lambda config, budget, seed: 0.0,
intensifier=intensifier,
overwrite=True,
)
# Bookkeeping
self.running_tasks = {} # task_id -> trial info
self.task_start_times = {} # task_id -> start time (for timeout)
self.completed_results = []
self.best_score_so_far = float("-inf") if self.maximize_metric else float("inf")
self.time_limit_per_job = time_limit_per_job # Store time limit (minutes)
# Checkpoint continuation tracking: config_key -> {budget: task_id}
# Used to find the previous task's checkpoint when promoting a config
self.config_to_tasks = {} # config_key -> {budget: task_id}
# Manual Successive Halving control
self.eta = eta
self.current_budget = float(self.min_iterations)
self.configs_at_budget = {} # budget -> list of (config, score, trial)
self.pending_configs = [] # configs waiting to be evaluated at current_budget - list of (trial, prev_task_id)
self.evaluated_at_budget = [] # (config, score, trial, task_id) for current budget
self.smac_asked_configs = set() # track which configs SMAC has given us
# Calculate initial rung size for proper Successive Halving
# With eta=3: rung sizes are n, n/3, n/9, ...
# Total trials = n * (1 + 1/eta + 1/eta^2 + ...) = n * eta/(eta-1) for infinite series
# For finite rungs, calculate exactly
num_rungs = 1
b = float(self.min_iterations)
while b * eta <= self.max_iterations:
num_rungs += 1
b *= eta
# Sum of geometric series: 1 + 1/eta + 1/eta^2 + ... (num_rungs terms)
series_sum = sum(1.0 / (eta**i) for i in range(num_rungs))
self.initial_rung_size = int(self.total_max_jobs / series_sum)
self.initial_rung_size = max(self.initial_rung_size, self.num_concurrent_workers) # at least num_workers
self.configs_needed_for_rung = self.initial_rung_size # how many configs we still need for current rung
self.rung_closed = False # whether we've collected all configs for current rung
@retry_on_error(max_retries=3, initial_delay=5.0, exceptions=(ValueError, SendError, ConnectionError))
def _get_task_safe(self, task_id: str):
"""Safely get a task with retry logic."""
return Task.get_task(task_id=task_id)
@retry_on_error(max_retries=3, initial_delay=5.0, exceptions=(ValueError, SendError, ConnectionError))
def _launch_task(self, config: dict, budget: float, prev_task_id: str | None = None):
"""Launch a task with retry logic for robustness.
Args:
config: Hyperparameter configuration dict
budget: Number of epochs to train
prev_task_id: Optional task ID from previous budget to continue from (checkpoint)
"""
base = self._get_task_safe(task_id=self._base_task_id)
if base is None:
return None
clone = Task.clone(
source_task=base,
name=f"HPO Trial - {base.name}",
parent=Task.current_task().id, # Set the current HPO task as parent
)
# Override hyperparameters
for k, v in config.items():
# Decode parameter name back to original (with slashes)
original_name = _decode_param_name(k)
# Convert numpy types to Python built-in types
if hasattr(v, "item"): # numpy scalar
param_value = v.item()
elif isinstance(v, int | float | str | bool):
param_value = type(v)(v) # Ensure it's the built-in type
else:
param_value = v
clone.set_parameter(original_name, param_value)
# Override budget parameter (e.g. total_timesteps) for multi-fidelity
if self.max_iterations != self.min_iterations:
clone.set_parameter(self.budget_param_name, int(budget))
else:
clone.set_parameter(self.budget_param_name, int(self.max_iterations))
# If we have a previous task, pass its ID so the worker can download the checkpoint
if prev_task_id:
clone.set_parameter("Hydra/training.resume_from_task_id", prev_task_id)
Task.enqueue(task=clone, queue_name=self.execution_queue)
# Track start time for timeout enforcement
self.task_start_times[clone.id] = time.time()
return clone
def start(self):
controller = Task.current_task()
total_launched = 0
# Keep launching & collecting until budget exhausted
while total_launched < self.total_max_jobs:
# Check if current budget rung is complete BEFORE asking for new trials
# (no running tasks, no pending configs, and we have results for this budget)
if not self.running_tasks and not self.pending_configs and self.evaluated_at_budget:
# Rung complete! Promote top performers to next budget
# Store results for this budget
self.configs_at_budget[self.current_budget] = self.evaluated_at_budget.copy()
# Sort by score (best first)
sorted_configs = sorted(
self.evaluated_at_budget,
key=lambda x: x[1], # score
reverse=self.maximize_metric,
)
# Print rung results
for _i, (_cfg, _score, _tri, _task_id) in enumerate(sorted_configs[:5], 1):
pass
# Move to next budget?
next_budget = self.current_budget * self.eta
if next_budget <= self.max_iterations:
# How many to promote (top 1/eta)
n_promote = max(1, len(sorted_configs) // self.eta)
promoted = sorted_configs[:n_promote]
# Update budget and reset for next rung
self.current_budget = next_budget
self.evaluated_at_budget = []
self.configs_needed_for_rung = 0 # promoted configs are all we need
self.rung_closed = True # rung is pre-filled with promoted configs
# Re-queue promoted configs with new budget
# Include the previous task ID for checkpoint continuation
for _cfg, _score, old_trial, prev_task_id in promoted:
new_trial = TrialInfo(
config=old_trial.config,
instance=old_trial.instance,
seed=old_trial.seed,
budget=self.current_budget,
)
# Store as tuple: (trial, prev_task_id)
self.pending_configs.append((new_trial, prev_task_id))
else:
# All budgets complete
break
# Fill pending_configs with new trials ONLY if we haven't closed this rung yet
# For the first rung: ask SMAC for initial_rung_size configs total
# For subsequent rungs: only use promoted configs (rung is already closed)
while (
not self.rung_closed
and len(self.pending_configs) + len(self.evaluated_at_budget) + len(self.running_tasks)
< self.initial_rung_size
and total_launched < self.total_max_jobs
):
trial = self.smac.ask()
if trial is None:
self.rung_closed = True
break
# Create new trial with forced budget (TrialInfo is frozen, can't modify)
trial_with_budget = TrialInfo(
config=trial.config,
instance=trial.instance,
seed=trial.seed,
budget=self.current_budget,
)
cfg_key = str(sorted(trial.config.items()))
if cfg_key not in self.smac_asked_configs:
self.smac_asked_configs.add(cfg_key)
# Store as tuple: (trial, None) - no previous task for new configs
self.pending_configs.append((trial_with_budget, None))
# Check if we've collected enough configs for this rung
if (
not self.rung_closed
and len(self.pending_configs) + len(self.evaluated_at_budget) + len(self.running_tasks)
>= self.initial_rung_size
):
self.rung_closed = True
# Launch pending configs up to concurrent limit
while self.pending_configs and len(self.running_tasks) < self.num_concurrent_workers:
# Unpack tuple: (trial, prev_task_id)
trial, prev_task_id = self.pending_configs.pop(0)
t = self._launch_task(trial.config, self.current_budget, prev_task_id=prev_task_id)
if t is None:
# Launch failed, mark trial as failed and continue
# Tell SMAC this trial failed with worst possible score
cost = float("inf") if self.maximize_metric else float("-inf")
self.smac.tell(trial, TrialValue(cost=cost))
total_launched += 1
continue
self.running_tasks[t.id] = trial
# Track which task ID was used for this config at this budget
cfg_key = str(sorted(trial.config.items()))
if cfg_key not in self.config_to_tasks:
self.config_to_tasks[cfg_key] = {}
self.config_to_tasks[cfg_key][self.current_budget] = t.id
total_launched += 1
if not self.running_tasks and not self.pending_configs:
break
# Abort if too many consecutive trials failed (likely a config bug)
if (
self.max_consecutive_failures > 0
and self._consecutive_failures >= self.max_consecutive_failures
):
controller.get_logger().report_text(
f"ABORTING: {self._consecutive_failures} consecutive trial "
f"failures (limit: {self.max_consecutive_failures}). "
"Check the trial logs for errors."
)
# Stop any still-running tasks
for tid in list(self.running_tasks):
with contextlib.suppress(Exception):
t = self._get_task_safe(task_id=tid)
if t:
t.mark_stopped(force=True)
self.running_tasks.clear()
break
# Poll for finished or timed out
done = []
timed_out = []
failed_to_check = []
for tid, _tri in self.running_tasks.items():
try:
task = self._get_task_safe(task_id=tid)
if task is None:
failed_to_check.append(tid)
continue
st = task.get_status()
# Check if task completed normally
if st == Task.TaskStatusEnum.completed or st in (
Task.TaskStatusEnum.failed,
Task.TaskStatusEnum.stopped,
):
done.append(tid)
# Check for timeout (if time limit is set)
elif self.time_limit_per_job and tid in self.task_start_times:
elapsed_minutes = (time.time() - self.task_start_times[tid]) / 60.0
if elapsed_minutes > self.time_limit_per_job:
with contextlib.suppress(Exception):
task.mark_stopped(force=True)
timed_out.append(tid)
except Exception:
# Don't mark as failed immediately, might be transient
# Only mark failed after multiple consecutive failures
if not hasattr(self, "_task_check_failures"):
self._task_check_failures = {}
self._task_check_failures[tid] = self._task_check_failures.get(tid, 0) + 1
if self._task_check_failures[tid] >= 5: # 5 consecutive failures
failed_to_check.append(tid)
del self._task_check_failures[tid]
# Process tasks that failed to check
for tid in failed_to_check:
tri = self.running_tasks.pop(tid)
if tid in self.task_start_times:
del self.task_start_times[tid]
# Tell SMAC this trial failed with worst possible score
res = float("-inf") if self.maximize_metric else float("inf")
cost = -res if self.maximize_metric else res
self.smac.tell(tri, TrialValue(cost=cost))
self.completed_results.append(
{
"task_id": tid,
"config": tri.config,
"budget": tri.budget,
"value": res,
"failed": True,
}
)
# Store result with task_id for checkpoint tracking
self.evaluated_at_budget.append((tri.config, res, tri, tid))
# Process completed tasks
for tid in done:
tri = self.running_tasks.pop(tid)
if tid in self.task_start_times:
del self.task_start_times[tid]
# Clear any accumulated failures for this task
if hasattr(self, "_task_check_failures") and tid in self._task_check_failures:
del self._task_check_failures[tid]
task = self._get_task_safe(task_id=tid)
# Detect hard-failed tasks (crashed / errored) vs completed
task_failed = False
if task is not None:
st = task.get_status()
task_failed = st in (
Task.TaskStatusEnum.failed,
Task.TaskStatusEnum.stopped,
)
if task is None:
res = float("-inf") if self.maximize_metric else float("inf")
task_failed = True
else:
res = self._get_objective(task)
if res is None or res == float("-inf") or res == float("inf"):
res = float("-inf") if self.maximize_metric else float("inf")
# Track consecutive failures for abort logic
if task_failed:
self._consecutive_failures += 1
else:
self._consecutive_failures = 0 # reset on any success
cost = -res if self.maximize_metric else res
self.smac.tell(tri, TrialValue(cost=cost))
self.completed_results.append(
{
"task_id": tid,
"config": tri.config,
"budget": tri.budget,
"value": res,
}
)
# Store result for this budget rung with task_id for checkpoint tracking
self.evaluated_at_budget.append((tri.config, res, tri, tid))
iteration = len(self.completed_results)
# Always report the trial score (even if it's bad)
if res is not None and res != float("-inf") and res != float("inf"):
controller.get_logger().report_scalar(
title="Optimization", series="trial_score", value=res, iteration=iteration
)
controller.get_logger().report_scalar(
title="Optimization",
series="trial_budget",
value=tri.budget or self.max_iterations,
iteration=iteration,
)
# Update best score tracking based on actual results
if res is not None and res != float("-inf") and res != float("inf"):
if self.maximize_metric:
self.best_score_so_far = max(self.best_score_so_far, res)
elif res < self.best_score_so_far:
self.best_score_so_far = res
# Always report best score so far (shows flat line when no improvement)
if self.best_score_so_far != float("-inf") and self.best_score_so_far != float("inf"):
controller.get_logger().report_scalar(
title="Optimization", series="best_score", value=self.best_score_so_far, iteration=iteration
)
# Report running statistics
valid_scores = [
r["value"]
for r in self.completed_results
if r["value"] is not None and r["value"] != float("-inf") and r["value"] != float("inf")
]
if valid_scores:
controller.get_logger().report_scalar(
title="Optimization",
series="mean_score",
value=sum(valid_scores) / len(valid_scores),
iteration=iteration,
)
controller.get_logger().report_scalar(
title="Progress",
series="completed_trials",
value=len(self.completed_results),
iteration=iteration,
)
controller.get_logger().report_scalar(
title="Progress", series="running_tasks", value=len(self.running_tasks), iteration=iteration
)
# Process timed out tasks (treat as failed with current objective value)
for tid in timed_out:
tri = self.running_tasks.pop(tid)
if tid in self.task_start_times:
del self.task_start_times[tid]
# Clear any accumulated failures for this task
if hasattr(self, "_task_check_failures") and tid in self._task_check_failures:
del self._task_check_failures[tid]
# Try to get the last objective value before timeout
task = self._get_task_safe(task_id=tid)
if task is None:
res = float("-inf") if self.maximize_metric else float("inf")
else:
res = self._get_objective(task)
if res is None:
res = float("-inf") if self.maximize_metric else float("inf")
cost = -res if self.maximize_metric else res
self.smac.tell(tri, TrialValue(cost=cost))
self.completed_results.append(
{
"task_id": tid,
"config": tri.config,
"budget": tri.budget,
"value": res,
"timed_out": True,
}
)
# Store timed out result for this budget rung with task_id
self.evaluated_at_budget.append((tri.config, res, tri, tid))
time.sleep(self.pool_period_minutes * 60) # Fix: use correct attribute name from base class
if self.compute_time_limit and controller.get_runtime() > self.compute_time_limit * 60:
break
# Finalize
self._finalize()
return self.completed_results
@retry_on_error(max_retries=3, initial_delay=2.0, exceptions=(SendError, ConnectionError, KeyError))
def _get_objective(self, task: Task):
"""Get objective metric value with retry logic for robustness."""
if task is None:
return None
try:
m = task.get_last_scalar_metrics()
if not m:
return None
metric_data = m[self.metric_title][self.metric_series]
# ClearML returns dict with 'last', 'min', 'max' keys representing
# the last/min/max values of this series over ALL logged iterations.
# For snake_length/train_max: 'last' is the last logged train_max value,
# 'max' is the highest train_max ever logged during training.
# Use 'max' if maximizing (we want the best performance achieved),
# 'min' if minimizing, fallback to 'last'
if self.maximize_metric and "max" in metric_data:
result = metric_data["max"]
elif not self.maximize_metric and "min" in metric_data:
result = metric_data["min"]
else:
result = metric_data["last"]
return result
except (KeyError, Exception):
return None
def _finalize(self):
controller = Task.current_task()
# Report final best score
controller.get_logger().report_text(f"Final best score: {self.best_score_so_far}")
# Also try to get SMAC's incumbent for comparison
try:
incumbent = self.smac.intensifier.get_incumbent()
if incumbent is not None:
runhistory = self.smac.runhistory
# Try different ways to get the cost
incumbent_cost = None
try:
incumbent_cost = runhistory.get_cost(incumbent)
except Exception:
# Fallback: search through runhistory manually
for trial_key, trial_value in runhistory.items():
trial_config = runhistory.get_config(trial_key.config_id)
if trial_config == incumbent and (incumbent_cost is None or trial_value.cost < incumbent_cost):
incumbent_cost = trial_value.cost
if incumbent_cost is not None:
score = -incumbent_cost if self.maximize_metric else incumbent_cost
controller.get_logger().report_text(f"SMAC incumbent: {incumbent}, score: {score}")
controller.upload_artifact(
"best_config",
{"config": dict(incumbent), "score": score, "our_best_score": self.best_score_so_far},
)
else:
controller.upload_artifact("best_config", {"our_best_score": self.best_score_so_far})
except Exception as e:
controller.get_logger().report_text(f"Error getting SMAC incumbent: {e}")
controller.upload_artifact("best_config", {"our_best_score": self.best_score_so_far})

View File

@@ -4,7 +4,7 @@ from gymnasium import spaces
from skrl.models.torch import Model, GaussianMixin, DeterministicMixin
class SharedMLP(GaussianMixin, DeterministicMixin, Model):
def __init__(self, observation_space: spaces.Space, action_space: spaces.Space, device: torch.device, hidden_sizes: tuple[int, ...] = (32, 32), clip_actions: bool = False, clip_log_std: bool = True, min_log_std: float = -20.0, max_log_std: float = 2.0, initial_log_std: float = 0.0):
def __init__(self, observation_space: spaces.Space, action_space: spaces.Space, device: torch.device, hidden_sizes: tuple[int, ...] = (32, 32), clip_actions: bool = False, clip_log_std: bool = True, min_log_std: float = -2.0, max_log_std: float = 2.0, initial_log_std: float = 0.0):
Model.__init__(self, observation_space, action_space, device)
GaussianMixin.__init__(self, clip_actions, clip_log_std, min_log_std, max_log_std)
DeterministicMixin.__init__(self, clip_actions)

286
src/runners/mjx.py Normal file
View File

@@ -0,0 +1,286 @@
"""GPU-batched MuJoCo simulation using MJX (JAX backend).
MJX runs all environments in parallel on GPU via JAX, providing
~10-100x speedup over the CPU MuJoCoRunner for large env counts (1024+).
Requirements:
pip install 'jax[cuda12]' # NVIDIA GPU
pip install jax # CPU fallback
"""
import dataclasses
import structlog
import torch
try:
import jax
import jax.numpy as jnp
import mujoco
from mujoco import mjx
except ImportError as e:
raise ImportError(
"MJX runner requires JAX and MuJoCo MJX. Install with:\n"
" pip install 'jax[cuda12]' # GPU\n"
" pip install jax # CPU\n"
) from e
import numpy as np
from src.core.env import BaseEnv
from src.core.runner import BaseRunner, BaseRunnerConfig
from src.runners.mujoco import (
ActuatorLimits,
load_mujoco_model,
)
log = structlog.get_logger()
@dataclasses.dataclass
class MJXRunnerConfig(BaseRunnerConfig):
num_envs: int = 1024
device: str = "cuda"
dt: float = 0.002
substeps: int = 10
class MJXRunner(BaseRunner[MJXRunnerConfig]):
"""GPU-batched MuJoCo runner using MJX (JAX).
Physics runs entirely on GPU via JAX; observations flow to
PyTorch through zero-copy DLPack transfers.
"""
def __init__(self, env: BaseEnv, config: MJXRunnerConfig):
super().__init__(env, config)
@property
def num_envs(self) -> int:
return self.config.num_envs
@property
def device(self) -> torch.device:
return torch.device(self.config.device)
# ── Initialization ───────────────────────────────────────────────
def _sim_initialize(self, config: MJXRunnerConfig) -> None:
# Step 1: Load CPU model (reuses URDF → MJCF → actuator injection)
self._mj_model = load_mujoco_model(self.env.robot)
self._mj_model.opt.timestep = config.dt
self._nq = self._mj_model.nq
self._nv = self._mj_model.nv
self._nu = self._mj_model.nu
# Step 2: Put model on GPU
self._mjx_model = mjx.put_model(self._mj_model)
# Step 3: Default reset state on GPU
default_data = mujoco.MjData(self._mj_model)
mujoco.mj_forward(self._mj_model, default_data)
self._default_mjx_data = mjx.put_data(self._mj_model, default_data)
# Step 4: Initialise all environments with small perturbations
self._rng = jax.random.PRNGKey(42)
self._batch_data = self._make_batched_data(config.num_envs)
# Step 4b: Build motor model info (ctrl_idx, qvel_idx, ActuatorConfig)
self._motor_info: list[tuple[int, int]] = []
self._motor_acts: list = []
for ctrl_idx, act in enumerate(self.env.robot.actuators):
if act.has_motor_model:
jnt_id = mujoco.mj_name2id(
self._mj_model, mujoco.mjtObj.mjOBJ_JOINT, act.joint,
)
qvel_idx = self._mj_model.jnt_dofadr[jnt_id]
self._motor_info.append((ctrl_idx, qvel_idx))
self._motor_acts.append(act)
# Step 5: JIT-compile the hot-path functions
self._compile_jit_fns(config.substeps)
# Keep one CPU MjData for offscreen rendering
self._render_data = mujoco.MjData(self._mj_model)
log.info(
"mjx_runner_ready",
num_envs=config.num_envs,
substeps=config.substeps,
jax_devices=str(jax.devices()),
)
def _make_batched_data(self, n: int):
"""Create *n* environments with small random perturbations."""
self._rng, k1, k2 = jax.random.split(self._rng, 3)
pq = jax.random.uniform(k1, (n, self._nq), minval=-0.05, maxval=0.05)
pv = jax.random.uniform(k2, (n, self._nv), minval=-0.05, maxval=0.05)
default = self._default_mjx_data
model = self._mjx_model
def _init_one(pq_i, pv_i):
d = default.replace(
qpos=default.qpos + pq_i,
qvel=default.qvel + pv_i,
)
return mjx.forward(model, d)
return jax.vmap(_init_one)(pq, pv)
def _compile_jit_fns(self, substeps: int) -> None:
"""Pre-compile the two hot-path functions so the first call is fast."""
model = self._mjx_model
default = self._default_mjx_data
lim = ActuatorLimits(self._mj_model)
act_jnt_ids = jnp.array(lim.jnt_ids)
act_limited = jnp.array(lim.limited)
act_lo = jnp.array(lim.lo)
act_hi = jnp.array(lim.hi)
act_gs = jnp.array(lim.gear_sign)
# ── Motor model params (JAX arrays for JIT) ─────────────────
_has_motor = len(self._motor_info) > 0
if _has_motor:
acts = self._motor_acts
_ctrl_ids = jnp.array([c for c, _ in self._motor_info])
_qvel_ids = jnp.array([q for _, q in self._motor_info])
_dz_pos = jnp.array([a.deadzone[0] for a in acts])
_dz_neg = jnp.array([a.deadzone[1] for a in acts])
_gear_pos = jnp.array([a.gear[0] for a in acts])
_gear_neg = jnp.array([a.gear[1] for a in acts])
_gear_avg = jnp.array([a.gear_avg for a in acts])
_fl_pos = jnp.array([a.frictionloss[0] for a in acts])
_fl_neg = jnp.array([a.frictionloss[1] for a in acts])
_damp_pos = jnp.array([a.damping[0] for a in acts])
_damp_neg = jnp.array([a.damping[1] for a in acts])
_visc_quad = jnp.array([a.viscous_quadratic for a in acts])
_back_emf = jnp.array([a.back_emf_gain for a in acts])
# ── Batched step (N substeps per call) ──────────────────────
@jax.jit
def step_fn(data):
# Software limit switch: clamp ctrl once before substeps.
pos = data.qpos[:, act_jnt_ids]
ctrl = data.ctrl
at_hi = act_limited & (pos >= act_hi) & (act_gs * ctrl > 0)
at_lo = act_limited & (pos <= act_lo) & (act_gs * ctrl < 0)
ctrl = jnp.where(at_hi | at_lo, 0.0, ctrl)
if _has_motor:
# Deadzone + asymmetric gear compensation
mc = ctrl[:, _ctrl_ids]
mc = jnp.where((mc >= 0) & (mc < _dz_pos), 0.0, mc)
mc = jnp.where((mc < 0) & (mc > -_dz_neg), 0.0, mc)
gear_dir = jnp.where(mc >= 0, _gear_pos, _gear_neg)
mc = mc * gear_dir / _gear_avg
ctrl = ctrl.at[:, _ctrl_ids].set(mc)
data = data.replace(ctrl=ctrl)
def body(_, d):
if _has_motor:
vel = d.qvel[:, _qvel_ids]
mc = d.ctrl[:, _ctrl_ids]
# Coulomb friction (direction-dependent)
fl = jnp.where(vel > 0, _fl_pos, _fl_neg)
torque = -jnp.where(
jnp.abs(vel) > 1e-6, jnp.sign(vel) * fl, 0.0,
)
# Viscous damping (direction-dependent)
damp = jnp.where(vel > 0, _damp_pos, _damp_neg)
torque = torque - damp * vel
# Quadratic velocity drag
torque = torque - _visc_quad * vel * jnp.abs(vel)
# Back-EMF torque reduction
bemf = _back_emf * vel * jnp.sign(mc)
torque = torque - jnp.where(
jnp.abs(mc) > 1e-6, bemf, 0.0,
)
torque = jnp.clip(torque, -10.0, 10.0)
d = d.replace(
qfrc_applied=d.qfrc_applied.at[:, _qvel_ids].set(torque),
)
return jax.vmap(mjx.step, in_axes=(None, 0))(model, d)
return jax.lax.fori_loop(0, substeps, body, data)
self._jit_step = step_fn
# ── Selective reset ─────────────────────────────────────────
@jax.jit
def reset_fn(data, mask, rng):
rng, k1, k2 = jax.random.split(rng, 3)
ne = data.qpos.shape[0]
pq = jax.random.uniform(
k1, (ne, default.qpos.shape[0]), minval=-0.05, maxval=0.05,
)
pv = jax.random.uniform(
k2, (ne, default.qvel.shape[0]), minval=-0.05, maxval=0.05,
)
m = mask[:, None] # (num_envs, 1) broadcast helper
new_qpos = jnp.where(m, default.qpos + pq, data.qpos)
new_qvel = jnp.where(m, default.qvel + pv, data.qvel)
new_ctrl = jnp.where(m, 0.0, data.ctrl)
new_data = data.replace(qpos=new_qpos, qvel=new_qvel, ctrl=new_ctrl)
new_data = jax.vmap(mjx.forward, in_axes=(None, 0))(model, new_data)
return new_data, rng
self._jit_reset = reset_fn
# ── Step / Reset ─────────────────────────────────────────────────
def _sim_step(self, actions: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor]:
# PyTorch → JAX (zero-copy on GPU via DLPack)
actions_jax = jnp.from_dlpack(actions.detach().contiguous())
# Set ctrl & run N substeps for all environments
self._batch_data = self._batch_data.replace(ctrl=actions_jax)
self._batch_data = self._jit_step(self._batch_data)
# JAX → PyTorch (zero-copy on GPU via DLPack, cast to float32)
qpos = torch.from_dlpack(self._batch_data.qpos.astype(jnp.float32))
qvel = torch.from_dlpack(self._batch_data.qvel.astype(jnp.float32))
return qpos, qvel
def _sim_reset(self, env_ids: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor]:
# Build boolean mask (fixed shape → no JIT recompilation)
mask = torch.zeros(
self.config.num_envs, dtype=torch.bool, device=self.device,
)
mask[env_ids] = True
mask_jax = jnp.from_dlpack(mask)
self._batch_data, self._rng = self._jit_reset(
self._batch_data, mask_jax, self._rng,
)
# Return only the reset environments' states
ids_np = env_ids.cpu().numpy()
rq = self._batch_data.qpos[ids_np].astype(jnp.float32)
rv = self._batch_data.qvel[ids_np].astype(jnp.float32)
return torch.from_dlpack(rq), torch.from_dlpack(rv)
# ── Rendering ────────────────────────────────────────────────────
def _render_frame(self, env_idx: int = 0) -> np.ndarray:
"""Offscreen render — copies one env's state from GPU to CPU."""
self._render_data.qpos[:] = np.asarray(self._batch_data.qpos[env_idx])
self._render_data.qvel[:] = np.asarray(self._batch_data.qvel[env_idx])
self._render_data.ctrl[:] = np.asarray(self._batch_data.ctrl[env_idx])
mujoco.mj_forward(self._mj_model, self._render_data)
if not hasattr(self, "_offscreen_renderer"):
self._offscreen_renderer = mujoco.Renderer(
self._mj_model, width=640, height=480,
)
self._offscreen_renderer.update_scene(self._render_data)
return self._offscreen_renderer.render().copy()

View File

@@ -1,19 +1,206 @@
import dataclasses
import os
import tempfile
import xml.etree.ElementTree as ET
from src.core.env import BaseEnv, ActuatorConfig
from src.core.runner import BaseRunner, BaseRunnerConfig
import torch
import numpy as np
from pathlib import Path
import mujoco
import mujoco.viewer
import numpy as np
import torch
from src.core.env import BaseEnv
from src.core.robot import RobotConfig
from src.core.runner import BaseRunner, BaseRunnerConfig
@dataclasses.dataclass
class MuJoCoRunnerConfig(BaseRunnerConfig):
num_envs: int = 16
device: str = "cpu"
dt: float = 0.02
substeps: int = 2
dt: float = 0.002
substeps: int = 10
class ActuatorLimits:
"""Software limit-switch: cuts motor ctrl when a joint hits its range.
The real robot has physical limit switches that kill motor current
at the travel endpoints. MuJoCo's built-in joint limits only apply
a spring force — they don't zero the actuator signal. This class
replicates the hardware behavior.
"""
def __init__(self, model: mujoco.MjModel) -> None:
jnt_ids = model.actuator_trnid[:model.nu, 0]
self.jnt_ids = jnt_ids
self.limited = model.jnt_limited[jnt_ids].astype(bool)
self.lo = model.jnt_range[jnt_ids, 0]
self.hi = model.jnt_range[jnt_ids, 1]
self.gear_sign = np.sign(model.actuator_gear[:model.nu, 0])
def enforce(self, model: mujoco.MjModel, data: mujoco.MjData) -> None:
"""Zero ctrl that would push past joint limits (call every substep)."""
if not self.limited.any():
return
pos = data.qpos[self.jnt_ids]
signed_ctrl = self.gear_sign * data.ctrl[:model.nu]
at_hi = self.limited & (pos >= self.hi) & (signed_ctrl > 0)
at_lo = self.limited & (pos <= self.lo) & (signed_ctrl < 0)
data.ctrl[at_hi | at_lo] = 0.0
# ── Public utilities ─────────────────────────────────────────────────
def load_mujoco_model(robot: RobotConfig) -> mujoco.MjModel:
"""Load a URDF (or MJCF) and apply robot.yaml settings.
Single model-loading entry point for all MuJoCo-based code:
training runners, MJX, and system identification.
Two-step approach required because MuJoCo's URDF parser ignores
``<actuator>`` in the ``<mujoco>`` extension block:
1. Load the URDF -> MuJoCo converts it to internal MJCF
2. Export the MJCF XML, inject actuators + joint overrides, reload
This keeps the URDF clean (re-exportable from CAD) -- all hardware
tuning lives in ``robot.yaml``.
"""
abs_path = robot.urdf_path.resolve()
model_dir = abs_path.parent
is_urdf = abs_path.suffix.lower() == ".urdf"
# -- Step 1: Load URDF with meshdir injection --
if is_urdf:
tree = ET.parse(abs_path)
root = tree.getroot()
# MuJoCo's URDF parser strips directory prefixes from mesh
# filenames, so we inject a <mujoco><compiler meshdir="..."/>
# block. The original URDF stays clean and simulator-agnostic.
meshdir = None
for mesh_el in root.iter("mesh"):
fn = mesh_el.get("filename", "")
parent = str(Path(fn).parent)
if parent and parent != ".":
meshdir = parent
break
if meshdir:
mj_ext = ET.SubElement(root, "mujoco")
ET.SubElement(mj_ext, "compiler", attrib={
"meshdir": meshdir,
"balanceinertia": "true",
})
# Write to a temp file (unique name for multiprocessing safety).
fd, tmp_path = tempfile.mkstemp(
suffix=".urdf", prefix="_mj_", dir=str(model_dir),
)
os.close(fd)
try:
tree.write(tmp_path, xml_declaration=True, encoding="unicode")
model_raw = mujoco.MjModel.from_xml_path(tmp_path)
finally:
Path(tmp_path).unlink(missing_ok=True)
else:
model_raw = mujoco.MjModel.from_xml_path(str(abs_path))
# If robot.yaml has no actuators/joints, we're done.
if not robot.actuators and not robot.joints:
return model_raw
# -- Step 2: Export MJCF, inject actuators + joint overrides --
fd, tmp_path = tempfile.mkstemp(
suffix=".xml", prefix="_mj_", dir=str(model_dir),
)
os.close(fd)
try:
mujoco.mj_saveLastXML(tmp_path, model_raw)
mjcf_str = Path(tmp_path).read_text()
root = ET.fromstring(mjcf_str)
# -- Inject actuators --
if robot.actuators:
act_elem = ET.SubElement(root, "actuator")
for act in robot.actuators:
attribs = {
"name": f"{act.joint}_{act.type}",
"joint": act.joint,
"gear": str(act.gear_avg),
"ctrlrange": f"{act.ctrl_range[0]} {act.ctrl_range[1]}",
}
# dyntype is only available on <general>, not on
# shortcut elements like <motor>/<position>/<velocity>.
use_general = act.filter_tau > 0
if use_general:
attribs["dyntype"] = "filter"
attribs["dynprm"] = str(act.filter_tau)
attribs["gaintype"] = "fixed"
if act.type == "position":
attribs["biastype"] = "affine"
attribs["gainprm"] = str(act.kp)
attribs["biasprm"] = f"0 -{act.kp} -{act.kv}"
elif act.type == "velocity":
attribs["biastype"] = "affine"
attribs["gainprm"] = str(act.kp)
attribs["biasprm"] = f"0 0 -{act.kp}"
else: # motor
attribs["biastype"] = "none"
ET.SubElement(act_elem, "general", attrib=attribs)
else:
if act.type == "position":
attribs["kp"] = str(act.kp)
if act.kv > 0:
attribs["kv"] = str(act.kv)
elif act.type == "velocity":
attribs["kp"] = str(act.kp)
ET.SubElement(act_elem, act.type, attrib=attribs)
# -- Apply joint overrides from robot.yaml --
# For actuated joints with a motor model, MuJoCo damping/frictionloss
# are set to 0 — the motor model handles them via qfrc_applied.
joint_damping: dict[str, float] = {}
joint_frictionloss: dict[str, float] = {}
for act in robot.actuators:
if act.has_motor_model:
joint_damping[act.joint] = 0.0
joint_frictionloss[act.joint] = 0.0
joint_armature: dict[str, float] = {}
for name, jcfg in robot.joints.items():
if jcfg.damping is not None:
joint_damping[name] = jcfg.damping
if jcfg.armature is not None:
joint_armature[name] = jcfg.armature
if jcfg.frictionloss is not None:
joint_frictionloss[name] = jcfg.frictionloss
for body in root.iter("body"):
for jnt in body.findall("joint"):
name = jnt.get("name")
if name in joint_damping:
jnt.set("damping", str(joint_damping[name]))
if name in joint_armature:
jnt.set("armature", str(joint_armature[name]))
if name in joint_frictionloss:
jnt.set("frictionloss", str(joint_frictionloss[name]))
# Disable self-collision on all geoms.
for geom in root.iter("geom"):
geom.set("contype", "0")
geom.set("conaffinity", "0")
modified_xml = ET.tostring(root, encoding="unicode")
Path(tmp_path).write_text(modified_xml)
return mujoco.MjModel.from_xml_path(tmp_path)
finally:
Path(tmp_path).unlink(missing_ok=True)
# -- Runner -----------------------------------------------------------
class MuJoCoRunner(BaseRunner[MuJoCoRunnerConfig]):
def __init__(self, env: BaseEnv, config: MuJoCoRunnerConfig):
@@ -27,71 +214,47 @@ class MuJoCoRunner(BaseRunner[MuJoCoRunnerConfig]):
def device(self) -> torch.device:
return torch.device(self.config.device)
@staticmethod
def _load_model_with_actuators(model_path: str, actuators: list[ActuatorConfig]) -> mujoco.MjModel:
"""Load a URDF (or MJCF) file and programmatically inject actuators.
Two-step approach required because MuJoCo's URDF parser ignores
<actuator> in the <mujoco> extension block:
1. Load the URDF → MuJoCo converts it to internal MJCF
2. Export the MJCF XML, add <actuator> elements, reload
This keeps the URDF clean and standard — actuator config lives in
the env config (Isaac Lab pattern), not in the robot file.
"""
# Step 1: Load URDF/MJCF as-is (no actuators)
model_raw = mujoco.MjModel.from_xml_path(model_path)
if not actuators:
return model_raw
# Step 2: Export internal MJCF representation
tmp_mjcf = tempfile.mktemp(suffix=".xml")
try:
mujoco.mj_saveLastXML(tmp_mjcf, model_raw)
with open(tmp_mjcf) as f:
mjcf_str = f.read()
finally:
import os
os.unlink(tmp_mjcf)
# Step 3: Inject actuators into the MJCF XML
root = ET.fromstring(mjcf_str)
act_elem = ET.SubElement(root, "actuator")
for act in actuators:
ET.SubElement(act_elem, "motor", attrib={
"name": f"{act.joint}_motor",
"joint": act.joint,
"gear": str(act.gear),
"ctrlrange": f"{act.ctrl_range[0]} {act.ctrl_range[1]}",
})
# Step 4: Reload from modified MJCF
modified_xml = ET.tostring(root, encoding="unicode")
return mujoco.MjModel.from_xml_string(modified_xml)
def _sim_initialize(self, config: MuJoCoRunnerConfig) -> None:
model_path = self.env.config.model_path
if model_path is None:
raise ValueError("model_path must be specified in the environment config")
actuators = self.env.config.actuators
self._model = self._load_model_with_actuators(str(model_path), actuators)
self._model = load_mujoco_model(self.env.robot)
self._model.opt.timestep = config.dt
self._data: list[mujoco.MjData] = [mujoco.MjData(self._model) for _ in range(config.num_envs)]
self._data: list[mujoco.MjData] = [
mujoco.MjData(self._model) for _ in range(config.num_envs)
]
self._nq = self._model.nq
self._nv = self._model.nv
self._limits = ActuatorLimits(self._model)
# Build motor model: list of (actuator_config, joint_qvel_index) for
# actuators that have asymmetric motor dynamics.
self._motor_actuators: list[tuple] = []
for act in self.env.robot.actuators:
if act.has_motor_model:
jnt_id = mujoco.mj_name2id(self._model, mujoco.mjtObj.mjOBJ_JOINT, act.joint)
qvel_idx = self._model.jnt_dofadr[jnt_id]
self._motor_actuators.append((act, qvel_idx))
def _sim_step(self, actions: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor]:
actions_np: np.ndarray = actions.cpu().numpy()
# Apply per-actuator ctrl transform (deadzone + gear compensation)
for act_idx, (act, _) in enumerate(self._motor_actuators):
for env_idx in range(self.num_envs):
actions_np[env_idx, act_idx] = act.transform_ctrl(
float(actions_np[env_idx, act_idx])
)
qpos_batch = np.zeros((self.num_envs, self._nq), dtype=np.float32)
qvel_batch = np.zeros((self.num_envs, self._nv), dtype=np.float32)
for i, data in enumerate(self._data):
data.ctrl[:] = actions_np[i]
for _ in range(self.config.substeps):
# Apply asymmetric motor forces via qfrc_applied
for act, qvel_idx in self._motor_actuators:
vel = data.qvel[qvel_idx]
ctrl = data.ctrl[0] # TODO: generalise for multi-actuator
data.qfrc_applied[qvel_idx] = act.compute_motor_force(vel, ctrl)
self._limits.enforce(self._model, data)
mujoco.mj_step(self._model, data)
qpos_batch[i] = data.qpos
@@ -113,9 +276,10 @@ class MuJoCoRunner(BaseRunner[MuJoCoRunnerConfig]):
data = self._data[env_id]
mujoco.mj_resetData(self._model, data)
# Add small random perturbation so the pole doesn't start perfectly upright
# Small random perturbation for exploration
data.qpos[:] += np.random.uniform(-0.05, 0.05, size=self._nq)
data.qvel[:] += np.random.uniform(-0.05, 0.05, size=self._nv)
data.ctrl[:] = 0.0
qpos_batch[i] = data.qpos
qvel_batch[i] = data.qvel
@@ -125,31 +289,12 @@ class MuJoCoRunner(BaseRunner[MuJoCoRunnerConfig]):
torch.from_numpy(qvel_batch).to(self.device),
)
def _sim_close(self) -> None:
if hasattr(self, "_viewer") and self._viewer is not None:
self._viewer.close()
self._viewer = None
if hasattr(self, "_offscreen_renderer") and self._offscreen_renderer is not None:
self._offscreen_renderer.close()
self._offscreen_renderer = None
self._data.clear()
def render(self, env_idx: int = 0, mode: str = "human") -> torch.Tensor | None:
if mode == "human":
if not hasattr(self, "_viewer") or self._viewer is None:
self._viewer = mujoco.viewer.launch_passive(
self._model, self._data[env_idx]
def _render_frame(self, env_idx: int = 0) -> np.ndarray:
"""Offscreen render of a single environment."""
if not hasattr(self, "_offscreen_renderer"):
self._offscreen_renderer = mujoco.Renderer(
self._model, width=640, height=480,
)
# Update visual geometry from current physics state
mujoco.mj_forward(self._model, self._data[env_idx])
self._viewer.sync()
return None
elif mode == "rgb_array":
# Cache the offscreen renderer to avoid create/destroy overhead
if not hasattr(self, "_offscreen_renderer") or self._offscreen_renderer is None:
self._offscreen_renderer = mujoco.Renderer(self._model, height=480, width=640)
self._offscreen_renderer.update_scene(self._data[env_idx])
pixels = self._offscreen_renderer.render().copy() # copy since buffer is reused
return torch.from_numpy(pixels)
return self._offscreen_renderer.render().copy()

494
src/runners/serial.py Normal file
View File

@@ -0,0 +1,494 @@
"""Serial runner — real hardware over USB/serial (ESP32).
Implements the BaseRunner interface for a single physical robot.
All physics come from the real world; the runner translates between
the ESP32 serial protocol and the qpos/qvel tensors that BaseRunner
and BaseEnv expect.
Serial protocol (ESP32 firmware):
Commands sent TO the ESP32:
G — start streaming state lines
H — stop streaming
M<int> — set motor PWM speed (-255 … 255)
State lines received FROM the ESP32 (firmware sends SI units):
S,<ms>,<motor_rad>,<motor_vel>,<pend_rad>,<pend_vel>,<motor_speed>
(7 comma-separated fields after the ``S`` prefix)
motor_rad — motor joint angle (radians)
motor_vel — motor joint velocity (rad/s)
pend_rad — pendulum angle (radians, 0 = hanging down)
pend_vel — pendulum angular velocity (rad/s)
motor_speed — applied PWM (-255..255, for action recording)
A daemon thread continuously reads the serial stream so the control
loop never blocks on I/O.
Usage:
python train.py env=rotary_cartpole runner=serial training=ppo_real
"""
from __future__ import annotations
import dataclasses
import logging
import threading
import time
from typing import Any
import numpy as np
import torch
from src.core.runner import BaseRunner, BaseRunnerConfig
logger = logging.getLogger(__name__)
@dataclasses.dataclass
class SerialRunnerConfig(BaseRunnerConfig):
"""Configuration for serial communication with the ESP32."""
num_envs: int = 1 # always 1 — single physical robot
device: str = "cpu"
port: str = "/dev/cu.usbserial-0001"
baud: int = 115200
dt: float = 0.04 # control loop period (seconds), 25 Hz
no_data_timeout: float = 2.0 # seconds of silence → disconnect
# Physical reset procedure
reset_drive_speed: int = 80 # PWM for bang-bang drive-to-center
reset_deadband_rad: float = 0.01 # "centered" threshold (~0.6°)
reset_drive_timeout: float = 3.0 # seconds to reach center
reset_settle_timeout: float = 30.0 # seconds to wait for pendulum
class SerialRunner(BaseRunner[SerialRunnerConfig]):
"""BaseRunner implementation that talks to real hardware over serial.
Maps the ESP32 serial protocol to qpos/qvel tensors so the existing
RotaryCartPoleEnv (or any compatible env) works unchanged.
qpos layout: [motor_angle_rad, pendulum_angle_rad]
qvel layout: [motor_vel_rad_s, pendulum_vel_rad_s]
"""
# ------------------------------------------------------------------
# BaseRunner interface
# ------------------------------------------------------------------
@property
def num_envs(self) -> int:
return 1
@property
def device(self) -> torch.device:
return torch.device("cpu")
def _sim_initialize(self, config: SerialRunnerConfig) -> None:
# Joint dimensions for the rotary cartpole (motor + pendulum).
self._nq = 2
self._nv = 2
# Import serial here so it's not a hard dependency for sim-only users.
import serial as _serial
self._serial_mod = _serial
# Explicitly disable hardware flow control and exclusive mode to
# avoid termios.error (errno 22) on macOS with CH340/CP2102 adapters.
self.ser: _serial.Serial = _serial.Serial(
port=config.port,
baudrate=config.baud,
timeout=0.05,
xonxoff=False,
rtscts=False,
dsrdtr=False,
exclusive=False,
)
time.sleep(2) # Wait for ESP32 boot.
self.ser.reset_input_buffer()
# Internal state tracking.
self._rebooted: bool = False
self._serial_disconnected: bool = False
self._last_esp_ms: int = 0
self._last_data_time: float = time.monotonic()
self._streaming: bool = False
# Latest parsed state (updated by the reader thread).
# Firmware sends SI units — values are used directly as qpos/qvel.
self._latest_state: dict[str, Any] = {
"timestamp_ms": 0,
"motor_rad": 0.0,
"motor_vel": 0.0,
"pend_rad": 0.0,
"pend_vel": 0.0,
"motor_speed": 0,
}
self._state_lock = threading.Lock()
self._state_event = threading.Event()
# Start background serial reader.
self._reader_running = True
self._reader_thread = threading.Thread(
target=self._serial_reader, daemon=True
)
self._reader_thread.start()
# Start streaming.
self._send("G")
self._streaming = True
self._last_data_time = time.monotonic()
# Derive max PWM from actuator ctrl_range so the serial
# command range matches what MuJoCo training sees.
ctrl_hi = self.env.robot.actuators[0].ctrl_range[1]
self._max_pwm: int = round(ctrl_hi * 255)
# Track wall-clock time of last step for PPO-gap detection.
self._last_step_time: float = time.monotonic()
def _sim_step(
self, actions: torch.Tensor
) -> tuple[torch.Tensor, torch.Tensor]:
now = time.monotonic()
# Detect PPO update gap: if more than 0.5s since last step,
# the optimizer was running and no motor commands were sent.
# Trigger a full reset so the robot starts from a clean state.
gap = now - self._last_step_time
if gap > 0.5:
logger.info(
"PPO update gap detected (%.1f s) — resetting before resuming.",
gap,
)
self._send("M0")
all_ids = torch.arange(self.num_envs, device=self.device)
self._sim_reset(all_ids)
self.step_counts.zero_()
# Map normalised action [-1, 1] → PWM, scaled by ctrl_range.
action_val = float(actions[0, 0].clamp(-1.0, 1.0))
motor_speed = int(action_val * self._max_pwm)
self._send(f"M{motor_speed}")
# Stream-driven: block until the firmware sends the next state
# line (~20 ms at 50 Hz).
state = self._read_state_blocking(timeout=0.1)
# Firmware sends SI units — use directly.
qpos, qvel = self._state_to_tensors(state)
# Cache for _sync_viz().
self._last_sync_state = state
self._last_step_time = time.monotonic()
return qpos, qvel
def _sim_reset(
self, env_ids: torch.Tensor
) -> tuple[torch.Tensor, torch.Tensor]:
# If ESP32 rebooted or disconnected, we can't recover.
if self._rebooted or self._serial_disconnected:
raise RuntimeError(
"ESP32 rebooted or disconnected during training! "
"Encoder center is lost. "
"Please re-center the motor manually and restart."
)
# Stop motor and restart streaming.
self._send("M0")
self._send("H")
self._streaming = False
time.sleep(0.05)
self._state_event.clear()
self._send("G")
self._streaming = True
self._last_data_time = time.monotonic()
time.sleep(0.05)
# Physically return the motor to the centre position.
self._drive_to_center()
# Wait until the env considers the robot settled.
self._wait_for_settle()
# Refresh data timer so health checks don't false-positive.
self._last_data_time = time.monotonic()
# Read settled state and return as qpos/qvel.
state = self._read_state_blocking()
qpos, qvel = self._state_to_tensors(state)
self._last_sync_state = state
return qpos, qvel
def _sim_close(self) -> None:
self._reader_running = False
self._streaming = False
self._send("H")
self._send("M0")
time.sleep(0.1)
self._reader_thread.join(timeout=1.0)
self.ser.close()
super()._sim_close()
# ------------------------------------------------------------------
# MuJoCo digital-twin rendering
# ------------------------------------------------------------------
def _ensure_viz_model(self) -> None:
"""Lazily load the MuJoCo model for visualisation (digital twin)."""
if hasattr(self, "_viz_model"):
return
import mujoco
from src.runners.mujoco import load_mujoco_model
self._viz_model = load_mujoco_model(self.env.robot)
self._viz_data = mujoco.MjData(self._viz_model)
self._offscreen_renderer = None
def _sync_viz(self) -> None:
"""Copy current serial sensor state into the MuJoCo viz model."""
import mujoco
self._ensure_viz_model()
last_state = getattr(self, "_last_sync_state", None)
if last_state is None:
last_state = self._read_state()
# Firmware sends radians — use directly.
self._viz_data.qpos[0] = last_state["motor_rad"]
self._viz_data.qpos[1] = last_state["pend_rad"]
self._viz_data.qvel[0] = last_state["motor_vel"]
self._viz_data.qvel[1] = last_state["pend_vel"]
mujoco.mj_forward(self._viz_model, self._viz_data)
def _render_frame(self, env_idx: int = 0) -> np.ndarray:
"""Offscreen render of the digital-twin MuJoCo model."""
import mujoco
self._sync_viz()
if self._offscreen_renderer is None:
self._offscreen_renderer = mujoco.Renderer(
self._viz_model, width=640, height=480,
)
self._offscreen_renderer.update_scene(self._viz_data)
return self._offscreen_renderer.render().copy()
# ------------------------------------------------------------------
# Override step() for runner-level safety
# ------------------------------------------------------------------
def step(
self, actions: torch.Tensor
) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor, dict[str, Any]]:
# Check for ESP32 reboot / disconnect BEFORE stepping.
if self._rebooted or self._serial_disconnected:
self._send("M0")
qpos, qvel = self._make_current_state()
state = self.env.build_state(qpos, qvel)
obs = self.env.compute_observations(state)
reward = torch.tensor([[-100.0]])
terminated = torch.tensor([[True]])
truncated = torch.tensor([[False]])
return obs, reward, terminated, truncated, {"reboot_detected": True}
# Normal step via BaseRunner (calls _sim_step → env logic).
obs, rewards, terminated, truncated, info = super().step(actions)
# Check connection health after stepping.
if not self._check_connection_health():
self._send("M0")
terminated = torch.tensor([[True]])
rewards = torch.tensor([[-100.0]])
info["reboot_detected"] = True
# Always stop motor on episode end.
if terminated.any() or truncated.any():
self._send("M0")
return obs, rewards, terminated, truncated, info
# ------------------------------------------------------------------
# Serial helpers
# ------------------------------------------------------------------
def _send(self, cmd: str) -> None:
"""Send a command to the ESP32."""
try:
self.ser.write(f"{cmd}\n".encode())
except (OSError, self._serial_mod.SerialException):
self._serial_disconnected = True
def _serial_reader(self) -> None:
"""Background thread: continuously read and parse serial lines.
Protocol: ``S,<ms>,<motor_rad>,<motor_vel>,<pend_rad>,<pend_vel>,<motor_speed>``
(7 comma-separated fields). Firmware sends SI units directly.
"""
while self._reader_running:
try:
if self.ser.in_waiting:
line = (
self.ser.readline()
.decode("utf-8", errors="ignore")
.strip()
)
# Detect ESP32 reboot: it prints READY on startup.
if line.startswith("READY"):
self._rebooted = True
logger.critical("ESP32 reboot detected: %s", line)
continue
if line.startswith("S,"):
parts = line.split(",")
if len(parts) >= 7:
try:
esp_ms = int(parts[1])
except ValueError:
logger.debug(
"Malformed state line (header): %s",
line,
)
continue
# Detect reboot: timestamp jumped backwards.
if (
self._last_esp_ms > 5000
and esp_ms < self._last_esp_ms - 3000
):
self._rebooted = True
logger.critical(
"ESP32 reboot detected: timestamp"
" %d -> %d",
self._last_esp_ms,
esp_ms,
)
self._last_esp_ms = esp_ms
self._last_data_time = time.monotonic()
try:
parsed: dict[str, Any] = {
"timestamp_ms": esp_ms,
"motor_rad": float(parts[2]),
"motor_vel": float(parts[3]),
"pend_rad": float(parts[4]),
"pend_vel": float(parts[5]),
"motor_speed": int(parts[6]),
}
except ValueError:
logger.debug(
"Malformed state line (fields): %s",
line,
)
continue
with self._state_lock:
self._latest_state = parsed
self._state_event.set()
else:
time.sleep(0.001) # Avoid busy-spinning.
except (OSError, self._serial_mod.SerialException) as exc:
self._serial_disconnected = True
logger.critical("Serial connection lost: %s", exc)
break
def _check_connection_health(self) -> bool:
"""Return True if the ESP32 connection appears healthy."""
if self._serial_disconnected:
logger.critical("ESP32 serial connection lost.")
return False
if (
self._streaming
and (time.monotonic() - self._last_data_time)
> self.config.no_data_timeout
):
logger.critical(
"No data from ESP32 for %.1f s — possible crash/disconnect.",
time.monotonic() - self._last_data_time,
)
self._rebooted = True
return False
return True
def _read_state(self) -> dict[str, Any]:
"""Return the most recent state from the reader thread (non-blocking)."""
with self._state_lock:
return dict(self._latest_state)
def _read_state_blocking(self, timeout: float = 0.05) -> dict[str, Any]:
"""Wait for a fresh sample, then return it."""
self._state_event.clear()
self._state_event.wait(timeout=timeout)
with self._state_lock:
return dict(self._latest_state)
def _state_to_tensors(
self, state: dict[str, Any],
) -> tuple[torch.Tensor, torch.Tensor]:
"""Convert a parsed state dict to (qpos, qvel) tensors."""
qpos = torch.tensor(
[[state["motor_rad"], state["pend_rad"]]], dtype=torch.float32
)
qvel = torch.tensor(
[[state["motor_vel"], state["pend_vel"]]], dtype=torch.float32
)
return qpos, qvel
def _make_current_state(self) -> tuple[torch.Tensor, torch.Tensor]:
"""Build qpos/qvel from current sensor data (utility)."""
return self._state_to_tensors(self._read_state_blocking())
# ------------------------------------------------------------------
# Physical reset helpers
# ------------------------------------------------------------------
def _drive_to_center(self) -> None:
"""Drive the motor back toward center using bang-bang control."""
cfg = self.config
start = time.time()
while time.time() - start < cfg.reset_drive_timeout:
state = self._read_state_blocking()
motor_rad = state["motor_rad"]
if abs(motor_rad) < cfg.reset_deadband_rad:
break
speed = cfg.reset_drive_speed if motor_rad < 0 else -cfg.reset_drive_speed
self._send(f"M{speed}")
time.sleep(0.05)
self._send("M0")
time.sleep(0.2)
def _wait_for_settle(self) -> None:
"""Block until the env considers the robot ready for a new episode."""
cfg = self.config
stable_since: float | None = None
start = time.monotonic()
while time.monotonic() - start < cfg.reset_settle_timeout:
state = self._read_state_blocking()
qpos, qvel = self._state_to_tensors(state)
if self.env.is_reset_ready(qpos, qvel):
if stable_since is None:
stable_since = time.monotonic()
elif time.monotonic() - stable_since >= 0.5:
logger.info(
"Robot settled after %.2f s",
time.monotonic() - start,
)
return
else:
stable_since = None
time.sleep(0.02)
logger.warning(
"Robot did not settle within %.1f s — proceeding anyway.",
cfg.reset_settle_timeout,
)

1
src/sysid/__init__.py Normal file
View File

@@ -0,0 +1 @@
"""System identification — tune simulation parameters to match real hardware."""

79
src/sysid/_urdf.py Normal file
View File

@@ -0,0 +1,79 @@
"""URDF XML helpers shared by sysid rollout and export modules."""
from __future__ import annotations
import xml.etree.ElementTree as ET
def set_mass(inertial: ET.Element, mass: float | None) -> None:
if mass is None:
return
mass_el = inertial.find("mass")
if mass_el is not None:
mass_el.set("value", str(mass))
def set_com(
inertial: ET.Element,
x: float | None,
y: float | None,
z: float | None,
) -> None:
origin = inertial.find("origin")
if origin is None:
return
xyz = origin.get("xyz", "0 0 0").split()
if x is not None:
xyz[0] = str(x)
if y is not None:
xyz[1] = str(y)
if z is not None:
xyz[2] = str(z)
origin.set("xyz", " ".join(xyz))
def set_inertia(
inertial: ET.Element,
ixx: float | None = None,
iyy: float | None = None,
izz: float | None = None,
ixy: float | None = None,
iyz: float | None = None,
ixz: float | None = None,
) -> None:
ine = inertial.find("inertia")
if ine is None:
return
for attr, val in [
("ixx", ixx), ("iyy", iyy), ("izz", izz),
("ixy", ixy), ("iyz", iyz), ("ixz", ixz),
]:
if val is not None:
ine.set(attr, str(val))
def patch_link_inertials(
root: ET.Element,
params: dict[str, float],
) -> None:
"""Patch arm and pendulum inertial parameters in a URDF ElementTree root."""
for link in root.iter("link"):
link_name = link.get("name", "")
inertial = link.find("inertial")
if inertial is None:
continue
if link_name == "arm":
set_mass(inertial, params.get("arm_mass"))
set_com(inertial, params.get("arm_com_x"),
params.get("arm_com_y"), params.get("arm_com_z"))
elif link_name == "pendulum":
set_mass(inertial, params.get("pendulum_mass"))
set_com(inertial, params.get("pendulum_com_x"),
params.get("pendulum_com_y"), params.get("pendulum_com_z"))
set_inertia(inertial,
ixx=params.get("pendulum_ixx"),
iyy=params.get("pendulum_iyy"),
izz=params.get("pendulum_izz"),
ixy=params.get("pendulum_ixy"))

430
src/sysid/capture.py Normal file
View File

@@ -0,0 +1,430 @@
"""Capture a real-robot trajectory under random excitation (PRBS-style).
Connects to the ESP32 over serial, sends random PWM commands to excite
the system, and records motor + pendulum angles and velocities at ~50 Hz.
Saves a compressed numpy archive (.npz) that the optimizer can replay
in simulation to fit physics parameters.
Serial protocol (same as SerialRunner):
S,<ms>,<motor_rad>,<motor_vel>,<pend_rad>,<pend_vel>,<motor_speed>
(7 comma-separated fields — firmware sends SI units)
Usage:
python -m src.sysid.capture \
--robot-path assets/rotary_cartpole \
--port /dev/cu.usbserial-0001 \
--duration 20
"""
from __future__ import annotations
import argparse
import math
import random
import threading
import time
from datetime import datetime
from pathlib import Path
from typing import Any
import numpy as np
import structlog
log = structlog.get_logger()
# ── Serial protocol helpers (mirrored from SerialRunner) ─────────────
def _parse_state_line(line: str) -> dict[str, Any] | None:
"""Parse an ``S,…`` state line from the ESP32.
Format: S,<ms>,<motor_rad>,<motor_vel>,<pend_rad>,<pend_vel>,<motor_speed>
(7 comma-separated fields, firmware sends SI units)
"""
if not line.startswith("S,"):
return None
parts = line.split(",")
if len(parts) < 7:
return None
try:
return {
"timestamp_ms": int(parts[1]),
"motor_rad": float(parts[2]),
"motor_vel": float(parts[3]),
"pend_rad": float(parts[4]),
"pend_vel": float(parts[5]),
"motor_speed": int(parts[6]),
}
except (ValueError, IndexError):
return None
# ── Background serial reader ─────────────────────────────────────────
class _SerialReader:
"""Minimal background reader for the ESP32 serial stream.
Uses a sequence counter so ``read_blocking()`` guarantees it returns
a *new* state line (not a stale repeat). This keeps the capture
loop locked to the firmware's 50 Hz tick.
"""
def __init__(self, port: str, baud: int = 115200):
import serial as _serial
self._serial_mod = _serial
self.ser = _serial.Serial(port, baud, timeout=0.05)
time.sleep(2) # Wait for ESP32 boot.
self.ser.reset_input_buffer()
self._latest: dict[str, Any] = {}
self._seq: int = 0 # incremented on every new state line
self._lock = threading.Lock()
self._cond = threading.Condition(self._lock)
self._running = True
self._thread = threading.Thread(target=self._reader_loop, daemon=True)
self._thread.start()
def _reader_loop(self) -> None:
_debug_count = 0
while self._running:
try:
if self.ser.in_waiting:
line = (
self.ser.readline()
.decode("utf-8", errors="ignore")
.strip()
)
# Debug: log first 10 raw lines so we can see what the firmware sends.
if _debug_count < 10 and line:
log.info("serial_raw_line", line=repr(line), count=_debug_count)
_debug_count += 1
parsed = _parse_state_line(line)
if parsed is not None:
with self._cond:
self._latest = parsed
self._seq += 1
self._cond.notify_all()
else:
time.sleep(0.001)
except (OSError, self._serial_mod.SerialException):
log.critical("serial_lost")
break
def send(self, cmd: str) -> None:
try:
self.ser.write(f"{cmd}\n".encode())
except (OSError, self._serial_mod.SerialException):
log.critical("serial_send_failed", cmd=cmd)
def read_blocking(self, timeout: float = 0.1) -> dict[str, Any]:
"""Wait until a *new* state line arrives, then return it.
Uses a sequence counter to guarantee the returned state is
different from whatever was available before this call.
"""
with self._cond:
seq_before = self._seq
if not self._cond.wait_for(
lambda: self._seq > seq_before, timeout=timeout
):
return {} # timeout — no new data
return dict(self._latest)
def close(self) -> None:
self._running = False
self.send("H")
self.send("M0")
time.sleep(0.1)
self._thread.join(timeout=1.0)
self.ser.close()
# ── PRBS excitation signal ───────────────────────────────────────────
class _PRBSExcitation:
"""Random hold-value excitation with configurable amplitude and hold time.
At each call to ``__call__``, returns the current PWM value.
The value is held for a random duration (``hold_min````hold_max`` ms),
then a new random value is drawn uniformly from ``[-amplitude, +amplitude]``.
"""
def __init__(
self,
amplitude: int = 150,
hold_min_ms: int = 50,
hold_max_ms: int = 300,
):
self.amplitude = amplitude
self.hold_min_ms = hold_min_ms
self.hold_max_ms = hold_max_ms
self._current: int = 0
self._switch_time: float = 0.0
self._new_value()
def _new_value(self) -> None:
self._current = random.randint(-self.amplitude, self.amplitude)
hold_ms = random.randint(self.hold_min_ms, self.hold_max_ms)
self._switch_time = time.monotonic() + hold_ms / 1000.0
def __call__(self) -> int:
if time.monotonic() >= self._switch_time:
self._new_value()
return self._current
# ── Main capture loop ────────────────────────────────────────────────
def capture(
robot_path: str | Path,
port: str = "/dev/cu.usbserial-0001",
baud: int = 115200,
duration: float = 20.0,
amplitude: int = 150,
hold_min_ms: int = 50,
hold_max_ms: int = 300,
dt: float = 0.02,
motor_angle_limit_deg: float = 90.0,
) -> Path:
"""Run the capture procedure and return the path to the saved .npz file.
The capture loop is **stream-driven**: it blocks on each incoming
state line from the firmware (which arrives at 50 Hz), sends the
next motor command immediately, and records both.
Parameters
----------
robot_path : path to robot asset directory
port : serial port for ESP32
baud : baud rate
duration : capture duration in seconds
amplitude : max PWM magnitude for excitation
hold_min_ms / hold_max_ms : random hold time range (ms)
dt : nominal sample period for buffer sizing (seconds)
motor_angle_limit_deg : safety limit for motor angle
"""
robot_path = Path(robot_path).resolve()
max_motor_rad = math.radians(motor_angle_limit_deg) if motor_angle_limit_deg > 0 else 0.0
# Connect.
reader = _SerialReader(port, baud)
excitation = _PRBSExcitation(amplitude, hold_min_ms, hold_max_ms)
# Prepare recording buffers (generous headroom).
max_samples = int(duration / dt) + 500
rec_time = np.zeros(max_samples, dtype=np.float64)
rec_action = np.zeros(max_samples, dtype=np.float64)
rec_motor_angle = np.zeros(max_samples, dtype=np.float64)
rec_motor_vel = np.zeros(max_samples, dtype=np.float64)
rec_pend_angle = np.zeros(max_samples, dtype=np.float64)
rec_pend_vel = np.zeros(max_samples, dtype=np.float64)
# Start streaming.
reader.send("G")
time.sleep(0.1)
log.info(
"capture_starting",
port=port,
duration=duration,
amplitude=amplitude,
hold_range_ms=f"{hold_min_ms}{hold_max_ms}",
mode="stream-driven (firmware clock)",
)
idx = 0
pwm = 0
last_esp_ms = -1 # firmware timestamp of last recorded sample
no_data_count = 0 # consecutive timeouts with no data
t0 = time.monotonic()
try:
while True:
# Block until the firmware sends the next state line (~20 ms).
# Timeout at 100 ms prevents hanging if the ESP32 disconnects.
state = reader.read_blocking(timeout=0.1)
if not state:
no_data_count += 1
if no_data_count == 30: # 3 seconds with no data
log.warning(
"no_data_received",
msg="No state lines from firmware after 3s. "
"Check: is the ESP32 powered? Is it running the right firmware? "
"Try pressing the RESET button.",
)
if no_data_count == 100: # 10 seconds
log.critical(
"no_data_timeout",
msg="No data for 10s — aborting capture.",
)
break
continue # no data yet — retry
no_data_count = 0
# Deduplicate: the firmware may send multiple state lines per
# tick (e.g. M-command echo + tick). Only record one sample
# per unique firmware timestamp.
esp_ms = state.get("timestamp_ms", 0)
if esp_ms == last_esp_ms:
continue
last_esp_ms = esp_ms
elapsed = time.monotonic() - t0
if elapsed >= duration:
break
# Get excitation PWM for the NEXT tick.
pwm = excitation()
# Safety: keep the arm well within its mechanical range.
# Firmware sends motor angle in radians — use directly.
motor_angle_rad = state.get("motor_rad", 0.0)
if max_motor_rad > 0:
ratio = motor_angle_rad / max_motor_rad # signed, -1..+1
abs_ratio = abs(ratio)
if abs_ratio > 0.90:
# Deep in the danger zone — force a strong return.
brake_strength = min(1.0, (abs_ratio - 0.90) / 0.10) # 0→1
brake_pwm = int(amplitude * (0.5 + 0.5 * brake_strength))
pwm = -brake_pwm if ratio > 0 else brake_pwm
elif abs_ratio > 0.70:
# Soft zone — only allow actions pointing back to centre.
if ratio > 0 and pwm > 0:
pwm = -abs(pwm)
elif ratio < 0 and pwm < 0:
pwm = abs(pwm)
# Send command immediately — it will take effect on the next tick.
reader.send(f"M{pwm}")
# Record this tick's state + the action the motor *actually*
# received. Firmware sends SI units — use directly.
motor_angle = state.get("motor_rad", 0.0)
motor_vel = state.get("motor_vel", 0.0)
pend_angle = state.get("pend_rad", 0.0)
pend_vel = state.get("pend_vel", 0.0)
# Firmware constrains to ±255; normalise to [-1, 1].
applied = state.get("motor_speed", 0)
action_norm = max(-255, min(255, applied)) / 255.0
if idx < max_samples:
rec_time[idx] = elapsed
rec_action[idx] = action_norm
rec_motor_angle[idx] = motor_angle
rec_motor_vel[idx] = motor_vel
rec_pend_angle[idx] = pend_angle
rec_pend_vel[idx] = pend_vel
idx += 1
else:
break # buffer full
# Progress (every 50 samples ≈ once per second at 50 Hz).
if idx % 50 == 0:
log.info(
"capture_progress",
elapsed=f"{elapsed:.1f}/{duration:.0f}s",
samples=idx,
pwm=pwm,
)
finally:
reader.send("M0")
reader.close()
# Trim to actual sample count.
rec_time = rec_time[:idx]
rec_action = rec_action[:idx]
rec_motor_angle = rec_motor_angle[:idx]
rec_motor_vel = rec_motor_vel[:idx]
rec_pend_angle = rec_pend_angle[:idx]
rec_pend_vel = rec_pend_vel[:idx]
# Save.
recordings_dir = robot_path / "recordings"
recordings_dir.mkdir(exist_ok=True)
stamp = datetime.now().strftime("%Y%m%d_%H%M%S")
out_path = recordings_dir / f"capture_{stamp}.npz"
np.savez_compressed(
out_path,
time=rec_time,
action=rec_action,
motor_angle=rec_motor_angle,
motor_vel=rec_motor_vel,
pendulum_angle=rec_pend_angle,
pendulum_vel=rec_pend_vel,
)
log.info(
"capture_saved",
path=str(out_path),
samples=idx,
duration_actual=f"{rec_time[-1]:.2f}s" if idx > 0 else "0s",
)
return out_path
# ── CLI entry point ──────────────────────────────────────────────────
def main() -> None:
parser = argparse.ArgumentParser(
description="Capture a real-robot trajectory for system identification."
)
parser.add_argument(
"--robot-path",
type=str,
default="assets/rotary_cartpole",
help="Path to robot asset directory",
)
parser.add_argument(
"--port",
type=str,
default="/dev/cu.usbserial-0001",
help="Serial port for ESP32",
)
parser.add_argument("--baud", type=int, default=115200)
parser.add_argument(
"--duration", type=float, default=20.0, help="Capture duration (s)"
)
parser.add_argument(
"--amplitude", type=int, default=150,
help="Max PWM magnitude (should not exceed firmware MAX_MOTOR_SPEED=150)",
)
parser.add_argument(
"--hold-min-ms", type=int, default=50, help="Min hold time (ms)"
)
parser.add_argument(
"--hold-max-ms", type=int, default=300, help="Max hold time (ms)"
)
parser.add_argument(
"--dt", type=float, default=0.02, help="Nominal sample period for buffer sizing (s)"
)
parser.add_argument(
"--motor-angle-limit", type=float, default=90.0,
help="Motor angle safety limit in degrees (0 = disabled)",
)
args = parser.parse_args()
capture(
robot_path=args.robot_path,
port=args.port,
baud=args.baud,
duration=args.duration,
amplitude=args.amplitude,
hold_min_ms=args.hold_min_ms,
hold_max_ms=args.hold_max_ms,
dt=args.dt,
motor_angle_limit_deg=args.motor_angle_limit,
)
if __name__ == "__main__":
main()

192
src/sysid/export.py Normal file
View File

@@ -0,0 +1,192 @@
"""Export tuned parameters to URDF and robot.yaml files.
Reads the original files, injects the optimised parameter values,
and writes ``rotary_cartpole_tuned.urdf`` + ``robot_tuned.yaml``
alongside the originals in the robot asset directory.
"""
from __future__ import annotations
import copy
import xml.etree.ElementTree as ET
from pathlib import Path
import structlog
import yaml
from src.sysid._urdf import patch_link_inertials
log = structlog.get_logger()
def export_tuned_files(
robot_path: str | Path,
params: dict[str, float],
motor_params: dict[str, float] | None = None,
) -> tuple[Path, Path]:
"""Write tuned URDF and robot.yaml files.
Parameters
----------
robot_path : robot asset directory (contains robot.yaml + *.urdf)
params : dict of parameter name → tuned value (from optimizer)
motor_params : locked motor sysid params (asymmetric model).
If provided, motor joint parameters come from here.
Returns
-------
(tuned_urdf_path, tuned_robot_yaml_path)
"""
robot_path = Path(robot_path).resolve()
# ── Load originals ───────────────────────────────────────────
robot_yaml_path = robot_path / "robot.yaml"
robot_cfg = yaml.safe_load(robot_yaml_path.read_text())
urdf_path = robot_path / robot_cfg["urdf"]
# ── Tune URDF ────────────────────────────────────────────────
tree = ET.parse(urdf_path)
patch_link_inertials(tree.getroot(), params)
# Write tuned URDF.
tuned_urdf_name = urdf_path.stem + "_tuned" + urdf_path.suffix
tuned_urdf_path = robot_path / tuned_urdf_name
# Preserve the XML declaration and original formatting as much as possible.
ET.indent(tree, space=" ")
tree.write(str(tuned_urdf_path), xml_declaration=True, encoding="unicode")
log.info("tuned_urdf_written", path=str(tuned_urdf_path))
# ── Tune robot.yaml ──────────────────────────────────────────
tuned_cfg = copy.deepcopy(robot_cfg)
# Point to the tuned URDF.
tuned_cfg["urdf"] = tuned_urdf_name
# Update actuator parameters — full asymmetric motor model.
if tuned_cfg.get("actuators") and len(tuned_cfg["actuators"]) > 0:
act = tuned_cfg["actuators"][0]
if motor_params:
# Asymmetric gear, damping, deadzone, frictionloss as [pos, neg].
gear_pos = motor_params.get("actuator_gear_pos", 0.424)
gear_neg = motor_params.get("actuator_gear_neg", 0.425)
act["gear"] = [round(gear_pos, 6), round(gear_neg, 6)]
damp_pos = motor_params.get("motor_damping_pos", 0.002)
damp_neg = motor_params.get("motor_damping_neg", 0.015)
act["damping"] = [round(damp_pos, 6), round(damp_neg, 6)]
dz_pos = motor_params.get("motor_deadzone_pos", 0.141)
dz_neg = motor_params.get("motor_deadzone_neg", 0.078)
act["deadzone"] = [round(dz_pos, 6), round(dz_neg, 6)]
fl_pos = motor_params.get("motor_frictionloss_pos", 0.057)
fl_neg = motor_params.get("motor_frictionloss_neg", 0.053)
act["frictionloss"] = [round(fl_pos, 6), round(fl_neg, 6)]
if "actuator_filter_tau" in motor_params:
act["filter_tau"] = round(motor_params["actuator_filter_tau"], 6)
if "viscous_quadratic" in motor_params:
act["viscous_quadratic"] = round(motor_params["viscous_quadratic"], 6)
if "back_emf_gain" in motor_params:
act["back_emf_gain"] = round(motor_params["back_emf_gain"], 6)
else:
if "actuator_gear" in params:
act["gear"] = round(params["actuator_gear"], 6)
if "actuator_filter_tau" in params:
act["filter_tau"] = round(params["actuator_filter_tau"], 6)
if "motor_damping" in params:
act["damping"] = round(params["motor_damping"], 6)
if "motor_deadzone" in params:
act["deadzone"] = round(params["motor_deadzone"], 6)
# ctrl_range from ctrl_limit parameter.
if "ctrl_limit" in params:
lim = round(params["ctrl_limit"], 6)
act["ctrl_range"] = [-lim, lim]
# Update joint overrides.
if "joints" not in tuned_cfg:
tuned_cfg["joints"] = {}
if "motor_joint" not in tuned_cfg["joints"]:
tuned_cfg["joints"]["motor_joint"] = {}
mj = tuned_cfg["joints"]["motor_joint"]
if motor_params:
mj["armature"] = round(motor_params.get("motor_armature", 0.00277), 6)
# Frictionloss/damping = 0 in MuJoCo (motor model handles via qfrc_applied).
mj["frictionloss"] = 0.0
else:
if "motor_armature" in params:
mj["armature"] = round(params["motor_armature"], 6)
if "motor_frictionloss" in params:
mj["frictionloss"] = round(params["motor_frictionloss"], 6)
if "pendulum_joint" not in tuned_cfg["joints"]:
tuned_cfg["joints"]["pendulum_joint"] = {}
pj = tuned_cfg["joints"]["pendulum_joint"]
if "pendulum_damping" in params:
pj["damping"] = round(params["pendulum_damping"], 6)
if "pendulum_frictionloss" in params:
pj["frictionloss"] = round(params["pendulum_frictionloss"], 6)
# Write tuned robot.yaml.
tuned_yaml_path = robot_path / "robot_tuned.yaml"
# Add a header comment.
header = (
"# Tuned robot config — generated by src.sysid.optimize\n"
"# Original: robot.yaml\n"
"# Run `python -m src.sysid.visualize` to compare real vs sim.\n\n"
)
tuned_yaml_path.write_text(
header + yaml.dump(tuned_cfg, default_flow_style=False, sort_keys=False)
)
log.info("tuned_robot_yaml_written", path=str(tuned_yaml_path))
return tuned_urdf_path, tuned_yaml_path
# ── CLI entry point ──────────────────────────────────────────────────
def main() -> None:
import argparse
import json
from src.sysid.rollout import LOCKED_MOTOR_PARAMS
parser = argparse.ArgumentParser(
description="Export tuned URDF + robot.yaml from sysid results."
)
parser.add_argument(
"--robot-path", type=str, default="assets/rotary_cartpole",
help="Path to robot asset directory",
)
parser.add_argument(
"--result", type=str, default=None,
help="Path to sysid_result.json (auto-detected if omitted)",
)
args = parser.parse_args()
robot_path = Path(args.robot_path).resolve()
if args.result:
result_path = Path(args.result)
else:
result_path = robot_path / "sysid_result.json"
if not result_path.exists():
raise FileNotFoundError(f"Result file not found: {result_path}")
result = json.loads(result_path.read_text())
export_tuned_files(
robot_path=args.robot_path,
params=result["best_params"],
motor_params=result.get("motor_params", dict(LOCKED_MOTOR_PARAMS)),
)
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,5 @@
"""Motor-only system identification subpackage.
Identifies JGB37-520 DC motor dynamics (no pendulum, no limits)
so the MuJoCo simulation matches the real hardware response.
"""

364
src/sysid/motor/capture.py Normal file
View File

@@ -0,0 +1,364 @@
"""Capture a motor-only trajectory under random excitation (PRBS-style).
Connects to the ESP32 running the simplified sysid firmware (no pendulum,
no limits), sends random PWM commands, and records motor angle + velocity
at ~ 50 Hz.
Firmware serial protocol (115200 baud):
Commands: M<speed>\\n R\\n S\\n G\\n H\\n P\\n
State: S,<millis>,<encoder_count>,<rpm>,<applied_speed>,<enc_vel_cps>\\n
Usage:
python -m src.sysid.motor.capture --duration 20
python -m src.sysid.motor.capture --duration 30 --amplitude 200
"""
from __future__ import annotations
import argparse
import math
import random
import threading
import time
from datetime import datetime
from pathlib import Path
from typing import Any
import numpy as np
import structlog
import yaml
log = structlog.get_logger()
# ── Default asset path ───────────────────────────────────────────────
_DEFAULT_ASSET = "assets/motor"
# ── Serial protocol helpers ──────────────────────────────────────────
def _parse_state_line(line: str) -> dict[str, Any] | None:
"""Parse an ``S,…`` state line from the motor sysid firmware.
Format: S,<millis>,<encoder_count>,<rpm>,<applied_speed>,<enc_vel_cps>
"""
if not line.startswith("S,"):
return None
parts = line.split(",")
if len(parts) < 6:
return None
try:
return {
"timestamp_ms": int(parts[1]),
"encoder_count": int(parts[2]),
"rpm": float(parts[3]),
"applied_speed": int(parts[4]),
"enc_vel_cps": float(parts[5]),
}
except (ValueError, IndexError):
return None
# ── Background serial reader ─────────────────────────────────────────
class _SerialReader:
"""Minimal background reader for the ESP32 serial stream."""
def __init__(self, port: str, baud: int = 115200):
import serial as _serial
self._serial_mod = _serial
self.ser = _serial.Serial(port, baud, timeout=0.05)
time.sleep(2) # Wait for ESP32 boot
self.ser.reset_input_buffer()
self._latest: dict[str, Any] = {}
self._seq: int = 0
self._lock = threading.Lock()
self._cond = threading.Condition(self._lock)
self._running = True
self._thread = threading.Thread(target=self._reader_loop, daemon=True)
self._thread.start()
def _reader_loop(self) -> None:
while self._running:
try:
if self.ser.in_waiting:
line = (
self.ser.readline()
.decode("utf-8", errors="ignore")
.strip()
)
parsed = _parse_state_line(line)
if parsed is not None:
with self._cond:
self._latest = parsed
self._seq += 1
self._cond.notify_all()
elif line and not line.startswith("S,"):
# Log non-state lines (READY, PONG, WARN, etc.)
log.debug("serial_info", line=line)
else:
time.sleep(0.001)
except (OSError, self._serial_mod.SerialException):
log.critical("serial_lost")
break
def send(self, cmd: str) -> None:
try:
self.ser.write(f"{cmd}\n".encode())
except (OSError, self._serial_mod.SerialException):
log.critical("serial_send_failed", cmd=cmd)
def read_blocking(self, timeout: float = 0.1) -> dict[str, Any]:
"""Wait until a *new* state line arrives, then return it."""
with self._cond:
seq_before = self._seq
if not self._cond.wait_for(
lambda: self._seq > seq_before, timeout=timeout
):
return {}
return dict(self._latest)
def close(self) -> None:
self._running = False
self.send("H")
self.send("M0")
time.sleep(0.1)
self._thread.join(timeout=1.0)
self.ser.close()
# ── PRBS excitation signal ───────────────────────────────────────────
class _PRBSExcitation:
"""Random hold-value excitation with configurable amplitude and hold time."""
def __init__(
self,
amplitude: int = 200,
hold_min_ms: int = 50,
hold_max_ms: int = 400,
):
self.amplitude = amplitude
self.hold_min_ms = hold_min_ms
self.hold_max_ms = hold_max_ms
self._current: int = 0
self._switch_time: float = 0.0
self._new_value()
def _new_value(self) -> None:
self._current = random.randint(-self.amplitude, self.amplitude)
hold_ms = random.randint(self.hold_min_ms, self.hold_max_ms)
self._switch_time = time.monotonic() + hold_ms / 1000.0
def __call__(self) -> int:
if time.monotonic() >= self._switch_time:
self._new_value()
return self._current
# ── Main capture loop ────────────────────────────────────────────────
def capture(
asset_path: str | Path = _DEFAULT_ASSET,
port: str = "/dev/cu.usbserial-0001",
baud: int = 115200,
duration: float = 20.0,
amplitude: int = 200,
hold_min_ms: int = 50,
hold_max_ms: int = 400,
dt: float = 0.02,
) -> Path:
"""Run motor-only capture and return the path to the saved .npz file.
Stream-driven: blocks on each firmware state line (~50 Hz),
sends next motor command immediately, records both.
No time.sleep pacing — locked to firmware clock.
The recording stores:
- time: wall-clock seconds since start
- action: normalised action = applied_speed / 255
- motor_angle: shaft angle in radians (from encoder)
- motor_vel: shaft velocity in rad/s (from encoder velocity)
"""
asset_path = Path(asset_path).resolve()
# Load hardware config for encoder conversion.
hw_yaml = asset_path / "hardware.yaml"
if not hw_yaml.exists():
raise FileNotFoundError(f"hardware.yaml not found in {asset_path}")
raw_hw = yaml.safe_load(hw_yaml.read_text())
ppr = raw_hw.get("encoder", {}).get("ppr", 11)
gear_ratio = raw_hw.get("encoder", {}).get("gear_ratio", 30.0)
counts_per_rev: float = ppr * gear_ratio * 4.0
max_pwm = raw_hw.get("motor", {}).get("max_pwm", 255)
log.info(
"hardware_config",
ppr=ppr,
gear_ratio=gear_ratio,
counts_per_rev=counts_per_rev,
max_pwm=max_pwm,
)
# Connect.
reader = _SerialReader(port, baud)
excitation = _PRBSExcitation(amplitude, hold_min_ms, hold_max_ms)
# Prepare recording buffers.
max_samples = int(duration / dt) + 500
rec_time = np.zeros(max_samples, dtype=np.float64)
rec_action = np.zeros(max_samples, dtype=np.float64)
rec_motor_angle = np.zeros(max_samples, dtype=np.float64)
rec_motor_vel = np.zeros(max_samples, dtype=np.float64)
# Reset encoder to zero.
reader.send("R")
time.sleep(0.1)
# Start streaming.
reader.send("G")
time.sleep(0.1)
log.info(
"capture_starting",
port=port,
duration=duration,
amplitude=amplitude,
hold_range_ms=f"{hold_min_ms}{hold_max_ms}",
)
idx = 0
pwm = 0
last_esp_ms = -1
t0 = time.monotonic()
try:
while True:
state = reader.read_blocking(timeout=0.1)
if not state:
continue
# Deduplicate by firmware timestamp.
esp_ms = state.get("timestamp_ms", 0)
if esp_ms == last_esp_ms:
continue
last_esp_ms = esp_ms
elapsed = time.monotonic() - t0
if elapsed >= duration:
break
# Generate next excitation PWM.
pwm = excitation()
# Send command.
reader.send(f"M{pwm}")
# Convert encoder to angle/velocity.
enc = state.get("encoder_count", 0)
motor_angle = enc / counts_per_rev * 2.0 * math.pi
motor_vel = (
state.get("enc_vel_cps", 0.0) / counts_per_rev * 2.0 * math.pi
)
# Use firmware-applied speed for the action.
applied = state.get("applied_speed", 0)
action_norm = applied / 255.0
if idx < max_samples:
rec_time[idx] = elapsed
rec_action[idx] = action_norm
rec_motor_angle[idx] = motor_angle
rec_motor_vel[idx] = motor_vel
idx += 1
else:
break
if idx % 50 == 0:
log.info(
"capture_progress",
elapsed=f"{elapsed:.1f}/{duration:.0f}s",
samples=idx,
pwm=pwm,
angle_deg=f"{math.degrees(motor_angle):.1f}",
vel_rps=f"{motor_vel / (2 * math.pi):.1f}",
)
finally:
reader.send("M0")
reader.close()
# Trim.
rec_time = rec_time[:idx]
rec_action = rec_action[:idx]
rec_motor_angle = rec_motor_angle[:idx]
rec_motor_vel = rec_motor_vel[:idx]
# Save.
recordings_dir = asset_path / "recordings"
recordings_dir.mkdir(exist_ok=True)
stamp = datetime.now().strftime("%Y%m%d_%H%M%S")
out_path = recordings_dir / f"motor_capture_{stamp}.npz"
np.savez_compressed(
out_path,
time=rec_time,
action=rec_action,
motor_angle=rec_motor_angle,
motor_vel=rec_motor_vel,
)
log.info(
"capture_saved",
path=str(out_path),
samples=idx,
duration_actual=f"{rec_time[-1]:.2f}s" if idx > 0 else "0s",
)
return out_path
# ── CLI ──────────────────────────────────────────────────────────────
def main() -> None:
parser = argparse.ArgumentParser(
description="Capture motor-only trajectory for system identification."
)
parser.add_argument(
"--asset-path", type=str, default=_DEFAULT_ASSET,
help="Path to motor asset directory (contains hardware.yaml)",
)
parser.add_argument(
"--port", type=str, default="/dev/cu.usbserial-0001",
help="Serial port for ESP32",
)
parser.add_argument("--baud", type=int, default=115200)
parser.add_argument("--duration", type=float, default=20.0, help="Capture duration (s)")
parser.add_argument(
"--amplitude", type=int, default=200,
help="Max PWM magnitude (0255, firmware allows full range)",
)
parser.add_argument("--hold-min-ms", type=int, default=50, help="PRBS min hold (ms)")
parser.add_argument("--hold-max-ms", type=int, default=400, help="PRBS max hold (ms)")
parser.add_argument("--dt", type=float, default=0.02, help="Nominal sample period (s)")
args = parser.parse_args()
capture(
asset_path=args.asset_path,
port=args.port,
baud=args.baud,
duration=args.duration,
amplitude=args.amplitude,
hold_min_ms=args.hold_min_ms,
hold_max_ms=args.hold_max_ms,
dt=args.dt,
)
if __name__ == "__main__":
main()

186
src/sysid/motor/export.py Normal file
View File

@@ -0,0 +1,186 @@
"""Export tuned motor parameters to MJCF and robot.yaml files.
Reads the original motor.xml and robot.yaml, patches with optimised
parameter values, and writes motor_tuned.xml + robot_tuned.yaml.
Usage:
python -m src.sysid.motor.export \
--asset-path assets/motor \
--result assets/motor/motor_sysid_result.json
"""
from __future__ import annotations
import argparse
import copy
import json
import xml.etree.ElementTree as ET
from pathlib import Path
import structlog
import yaml
log = structlog.get_logger()
_DEFAULT_ASSET = "assets/motor"
def export_tuned_files(
asset_path: str | Path,
params: dict[str, float],
) -> tuple[Path, Path]:
"""Write tuned MJCF and robot.yaml files.
Returns (tuned_mjcf_path, tuned_robot_yaml_path).
"""
asset_path = Path(asset_path).resolve()
robot_yaml_path = asset_path / "robot.yaml"
robot_cfg = yaml.safe_load(robot_yaml_path.read_text())
mjcf_path = asset_path / robot_cfg["mjcf"]
# ── Tune MJCF ────────────────────────────────────────────────
tree = ET.parse(str(mjcf_path))
root = tree.getroot()
# Actuator — use average gear for the MJCF model.
gear_pos = params.get("actuator_gear_pos", params.get("actuator_gear"))
gear_neg = params.get("actuator_gear_neg", params.get("actuator_gear"))
gear_avg = None
if gear_pos is not None and gear_neg is not None:
gear_avg = (gear_pos + gear_neg) / 2.0
elif gear_pos is not None:
gear_avg = gear_pos
filter_tau = params.get("actuator_filter_tau")
for act_el in root.iter("general"):
if act_el.get("name") == "motor":
if gear_avg is not None:
act_el.set("gear", str(gear_avg))
if filter_tau is not None:
if filter_tau > 0:
act_el.set("dyntype", "filter")
act_el.set("dynprm", str(filter_tau))
else:
act_el.set("dyntype", "none")
# Joint — average damping & friction for MJCF (asymmetry in runtime).
fl_pos = params.get("motor_frictionloss_pos", params.get("motor_frictionloss"))
fl_neg = params.get("motor_frictionloss_neg", params.get("motor_frictionloss"))
fl_avg = None
if fl_pos is not None and fl_neg is not None:
fl_avg = (fl_pos + fl_neg) / 2.0
elif fl_pos is not None:
fl_avg = fl_pos
damp_pos = params.get("motor_damping_pos", params.get("motor_damping"))
damp_neg = params.get("motor_damping_neg", params.get("motor_damping"))
damp_avg = None
if damp_pos is not None and damp_neg is not None:
damp_avg = (damp_pos + damp_neg) / 2.0
elif damp_pos is not None:
damp_avg = damp_pos
for jnt in root.iter("joint"):
if jnt.get("name") == "motor_joint":
if damp_avg is not None:
jnt.set("damping", str(damp_avg))
if "motor_armature" in params:
jnt.set("armature", str(params["motor_armature"]))
if fl_avg is not None:
jnt.set("frictionloss", str(fl_avg))
# Rotor mass.
if "rotor_mass" in params:
for geom in root.iter("geom"):
if geom.get("name") == "rotor_disk":
geom.set("mass", str(params["rotor_mass"]))
# Write tuned MJCF.
tuned_mjcf_name = mjcf_path.stem + "_tuned" + mjcf_path.suffix
tuned_mjcf_path = asset_path / tuned_mjcf_name
ET.indent(tree, space=" ")
tree.write(str(tuned_mjcf_path), xml_declaration=True, encoding="unicode")
log.info("tuned_mjcf_written", path=str(tuned_mjcf_path))
# ── Tune robot.yaml ──────────────────────────────────────────
tuned_cfg = copy.deepcopy(robot_cfg)
tuned_cfg["mjcf"] = tuned_mjcf_name
if tuned_cfg.get("actuators") and len(tuned_cfg["actuators"]) > 0:
act = tuned_cfg["actuators"][0]
if gear_avg is not None:
act["gear"] = round(gear_avg, 6)
if "actuator_filter_tau" in params:
act["filter_tau"] = round(params["actuator_filter_tau"], 6)
if "motor_damping" in params:
act["damping"] = round(params["motor_damping"], 6)
if "joints" not in tuned_cfg:
tuned_cfg["joints"] = {}
if "motor_joint" not in tuned_cfg["joints"]:
tuned_cfg["joints"]["motor_joint"] = {}
mj = tuned_cfg["joints"]["motor_joint"]
if "motor_armature" in params:
mj["armature"] = round(params["motor_armature"], 6)
if fl_avg is not None:
mj["frictionloss"] = round(fl_avg, 6)
# Asymmetric / hardware-realism / nonlinear parameters.
realism = {}
for key in [
"actuator_gear_pos", "actuator_gear_neg",
"motor_damping_pos", "motor_damping_neg",
"motor_frictionloss_pos", "motor_frictionloss_neg",
"motor_deadzone_pos", "motor_deadzone_neg",
"action_bias",
"viscous_quadratic", "back_emf_gain",
"stribeck_friction_boost", "stribeck_vel",
"gearbox_backlash",
]:
if key in params:
realism[key] = round(params[key], 6)
if realism:
tuned_cfg["hardware_realism"] = realism
tuned_yaml_path = asset_path / "robot_tuned.yaml"
header = (
"# Tuned motor config — generated by src.sysid.motor.optimize\n"
"# Original: robot.yaml\n\n"
)
tuned_yaml_path.write_text(
header + yaml.dump(tuned_cfg, default_flow_style=False, sort_keys=False)
)
log.info("tuned_robot_yaml_written", path=str(tuned_yaml_path))
return tuned_mjcf_path, tuned_yaml_path
# ── CLI ──────────────────────────────────────────────────────────────
def main() -> None:
parser = argparse.ArgumentParser(
description="Export tuned motor parameters to MJCF + robot.yaml."
)
parser.add_argument("--asset-path", type=str, default=_DEFAULT_ASSET)
parser.add_argument(
"--result", type=str, default=None,
help="Path to motor_sysid_result.json (auto-detected if omitted)",
)
args = parser.parse_args()
asset_path = Path(args.asset_path).resolve()
if args.result:
result_path = Path(args.result)
else:
result_path = asset_path / "motor_sysid_result.json"
if not result_path.exists():
raise FileNotFoundError(f"Result file not found: {result_path}")
result = json.loads(result_path.read_text())
params = result["best_params"]
export_tuned_files(asset_path=args.asset_path, params=params)
if __name__ == "__main__":
main()

367
src/sysid/motor/optimize.py Normal file
View File

@@ -0,0 +1,367 @@
"""CMA-ES optimiser — fit motor simulation parameters to a real recording.
Motor-only version: minimises trajectory-matching cost between MuJoCo
rollout and recorded motor angle + velocity.
Usage:
python -m src.sysid.motor.optimize \
--recording assets/motor/recordings/motor_capture_YYYYMMDD_HHMMSS.npz
# Quick test:
python -m src.sysid.motor.optimize --recording <file>.npz \
--max-generations 20 --population-size 10
"""
from __future__ import annotations
import argparse
import json
import time
from datetime import datetime
from pathlib import Path
import numpy as np
import structlog
from src.sysid.motor.preprocess import recompute_velocity
from src.sysid.motor.rollout import (
MOTOR_PARAMS,
ParamSpec,
bounds_arrays,
defaults_vector,
params_to_dict,
rollout,
windowed_rollout,
)
log = structlog.get_logger()
_DEFAULT_ASSET = "assets/motor"
# ── Cost function ────────────────────────────────────────────────────
def _compute_trajectory_cost(
sim: dict[str, np.ndarray],
recording: dict[str, np.ndarray],
pos_weight: float = 1.0,
vel_weight: float = 0.5,
acc_weight: float = 0.0,
dt: float = 0.02,
) -> float:
"""Weighted MSE between simulated and real motor trajectories.
Motor-only: angle, velocity, and optionally acceleration.
Acceleration is computed as finite-difference of velocity.
"""
angle_err = sim["motor_angle"] - recording["motor_angle"]
vel_err = sim["motor_vel"] - recording["motor_vel"]
# Reject NaN results (unstable simulation).
if np.any(~np.isfinite(angle_err)) or np.any(~np.isfinite(vel_err)):
return 1e6
cost = float(
pos_weight * np.mean(angle_err**2)
+ vel_weight * np.mean(vel_err**2)
)
# Optional acceleration term — penalises wrong dynamics (d(vel)/dt).
if acc_weight > 0 and len(vel_err) > 2:
sim_acc = np.diff(sim["motor_vel"]) / dt
real_acc = np.diff(recording["motor_vel"]) / dt
acc_err = sim_acc - real_acc
if np.any(~np.isfinite(acc_err)):
return 1e6
# Normalise by typical acceleration scale (~50 rad/s²) to keep
# the weight intuitive relative to vel/pos terms.
cost += acc_weight * np.mean(acc_err**2) / (50.0**2)
return cost
def cost_function(
params_vec: np.ndarray,
recording: dict[str, np.ndarray],
asset_path: Path,
specs: list[ParamSpec],
sim_dt: float = 0.002,
substeps: int = 10,
pos_weight: float = 1.0,
vel_weight: float = 0.5,
acc_weight: float = 0.0,
window_duration: float = 0.5,
) -> float:
"""Compute trajectory-matching cost for a candidate parameter vector.
Uses multiple-shooting (windowed rollout) by default.
"""
params = params_to_dict(params_vec, specs)
try:
if window_duration > 0:
sim = windowed_rollout(
asset_path=asset_path,
params=params,
recording=recording,
window_duration=window_duration,
sim_dt=sim_dt,
substeps=substeps,
)
else:
sim = rollout(
asset_path=asset_path,
params=params,
actions=recording["action"],
sim_dt=sim_dt,
substeps=substeps,
)
except Exception as exc:
log.warning("rollout_failed", error=str(exc))
return 1e6
return _compute_trajectory_cost(
sim, recording, pos_weight, vel_weight, acc_weight,
dt=np.mean(np.diff(recording["time"])) if len(recording["time"]) > 1 else 0.02,
)
# ── CMA-ES optimisation loop ────────────────────────────────────────
def optimize(
asset_path: str | Path = _DEFAULT_ASSET,
recording_path: str | Path = "",
specs: list[ParamSpec] | None = None,
sigma0: float = 0.3,
population_size: int = 30,
max_generations: int = 300,
sim_dt: float = 0.002,
substeps: int = 10,
pos_weight: float = 1.0,
vel_weight: float = 0.5,
acc_weight: float = 0.0,
window_duration: float = 0.5,
seed: int = 42,
preprocess_vel: bool = True,
sg_window: int = 7,
sg_polyorder: int = 3,
) -> dict:
"""Run CMA-ES optimisation and return results dict."""
from cmaes import CMA
asset_path = Path(asset_path).resolve()
recording_path = Path(recording_path).resolve()
if specs is None:
specs = MOTOR_PARAMS
# Load recording.
recording = dict(np.load(recording_path))
n_samples = len(recording["time"])
duration = recording["time"][-1] - recording["time"][0]
n_windows = max(1, int(duration / window_duration)) if window_duration > 0 else 1
log.info(
"recording_loaded",
path=str(recording_path),
samples=n_samples,
duration=f"{duration:.1f}s",
n_windows=n_windows,
)
# Preprocess velocity: replace noisy firmware finite-difference with
# smooth Savitzky-Golay derivative of the angle signal.
if preprocess_vel:
recording = recompute_velocity(
recording,
window_length=sg_window,
polyorder=sg_polyorder,
)
# Normalise to [0, 1] for CMA-ES.
lo, hi = bounds_arrays(specs)
x0 = defaults_vector(specs)
span = hi - lo
span[span == 0] = 1.0
def to_normed(x: np.ndarray) -> np.ndarray:
return (x - lo) / span
def from_normed(x_n: np.ndarray) -> np.ndarray:
return x_n * span + lo
x0_normed = to_normed(x0)
bounds_normed = np.column_stack(
[np.zeros(len(specs)), np.ones(len(specs))]
)
optimizer = CMA(
mean=x0_normed,
sigma=sigma0,
bounds=bounds_normed,
population_size=population_size,
seed=seed,
)
best_cost = float("inf")
best_params_vec = x0.copy()
history: list[tuple[int, float]] = []
log.info(
"cmaes_starting",
n_params=len(specs),
population=population_size,
max_gens=max_generations,
sigma0=sigma0,
)
t0 = time.monotonic()
for gen in range(max_generations):
solutions = []
for _ in range(optimizer.population_size):
x_normed = optimizer.ask()
x_natural = from_normed(x_normed)
x_natural = np.clip(x_natural, lo, hi)
c = cost_function(
x_natural,
recording,
asset_path,
specs,
sim_dt=sim_dt,
substeps=substeps,
pos_weight=pos_weight,
vel_weight=vel_weight,
acc_weight=acc_weight,
window_duration=window_duration,
)
solutions.append((x_normed, c))
if c < best_cost:
best_cost = c
best_params_vec = x_natural.copy()
optimizer.tell(solutions)
history.append((gen, best_cost))
elapsed = time.monotonic() - t0
if gen % 5 == 0 or gen == max_generations - 1:
log.info(
"cmaes_generation",
gen=gen,
best_cost=f"{best_cost:.6f}",
elapsed=f"{elapsed:.1f}s",
gen_best=f"{min(c for _, c in solutions):.6f}",
)
total_time = time.monotonic() - t0
best_params = params_to_dict(best_params_vec, specs)
log.info(
"cmaes_finished",
best_cost=f"{best_cost:.6f}",
total_time=f"{total_time:.1f}s",
evaluations=max_generations * population_size,
)
# Log parameter comparison.
defaults = params_to_dict(defaults_vector(specs), specs)
for name in best_params:
d = defaults[name]
b = best_params[name]
change_pct = ((b - d) / abs(d) * 100) if abs(d) > 1e-12 else 0.0
log.info(
"param_result",
name=name,
default=f"{d:.6g}",
tuned=f"{b:.6g}",
change=f"{change_pct:+.1f}%",
)
return {
"best_params": best_params,
"best_cost": best_cost,
"history": history,
"recording": str(recording_path),
"param_names": [s.name for s in specs],
"defaults": {s.name: s.default for s in specs},
"timestamp": datetime.now().isoformat(),
}
# ── CLI ──────────────────────────────────────────────────────────────
def main() -> None:
parser = argparse.ArgumentParser(
description="Fit motor simulation parameters to a real recording (CMA-ES)."
)
parser.add_argument(
"--asset-path", type=str, default=_DEFAULT_ASSET,
help="Path to motor asset directory",
)
parser.add_argument(
"--recording", type=str, required=True,
help="Path to .npz recording file",
)
parser.add_argument("--sigma0", type=float, default=0.3)
parser.add_argument("--population-size", type=int, default=30)
parser.add_argument("--max-generations", type=int, default=300)
parser.add_argument("--sim-dt", type=float, default=0.002)
parser.add_argument("--substeps", type=int, default=10)
parser.add_argument("--pos-weight", type=float, default=1.0)
parser.add_argument("--vel-weight", type=float, default=0.5)
parser.add_argument("--acc-weight", type=float, default=0.0,
help="Weight for acceleration matching (0=off, try 0.1-0.5)")
parser.add_argument("--window-duration", type=float, default=0.5)
parser.add_argument("--seed", type=int, default=42)
parser.add_argument(
"--no-preprocess-vel", action="store_true",
help="Disable Savitzky-Golay velocity preprocessing",
)
parser.add_argument("--sg-window", type=int, default=7,
help="Savitzky-Golay window length (odd, default 7 = 140ms)")
parser.add_argument("--sg-polyorder", type=int, default=3,
help="Savitzky-Golay polynomial order (default 3 = cubic)")
args = parser.parse_args()
result = optimize(
asset_path=args.asset_path,
recording_path=args.recording,
sigma0=args.sigma0,
population_size=args.population_size,
max_generations=args.max_generations,
sim_dt=args.sim_dt,
substeps=args.substeps,
pos_weight=args.pos_weight,
vel_weight=args.vel_weight,
acc_weight=args.acc_weight,
window_duration=args.window_duration,
seed=args.seed,
preprocess_vel=not args.no_preprocess_vel,
sg_window=args.sg_window,
sg_polyorder=args.sg_polyorder,
)
# Save results JSON.
asset_path = Path(args.asset_path).resolve()
result_path = asset_path / "motor_sysid_result.json"
result_json = {k: v for k, v in result.items() if k != "history"}
result_json["history_summary"] = {
"first_cost": result["history"][0][1] if result["history"] else None,
"final_cost": result["history"][-1][1] if result["history"] else None,
"generations": len(result["history"]),
}
result_path.write_text(json.dumps(result_json, indent=2, default=str))
log.info("results_saved", path=str(result_path))
# Export tuned files.
from src.sysid.motor.export import export_tuned_files
export_tuned_files(asset_path=args.asset_path, params=result["best_params"])
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,114 @@
"""Recording preprocessing — clean velocity estimation from angle data.
The ESP32 firmware computes velocity as a raw finite-difference of encoder
counts at 50 Hz. With a 1320 CPR encoder that gives ~0.24 rad/s of
quantisation noise per count. This module replaces the noisy firmware
velocity with a smooth differentiation of the (much cleaner) angle signal.
Method: Savitzky-Golay filter applied to the angle signal, then
differentiated analytically. Zero phase lag, preserves transients well.
This is standard practice in robotics sysid — see e.g. MATLAB System ID
Toolbox, Drake's trajectory processing, or ETH's ANYmal sysid pipeline.
"""
from __future__ import annotations
import numpy as np
from scipy.signal import savgol_filter
import structlog
log = structlog.get_logger()
def recompute_velocity(
recording: dict[str, np.ndarray],
window_length: int = 7,
polyorder: int = 3,
deriv: int = 1,
) -> dict[str, np.ndarray]:
"""Recompute motor_vel from motor_angle using Savitzky-Golay differentiation.
Parameters
----------
recording : dict with at least 'time', 'motor_angle', 'motor_vel' keys.
window_length : SG filter window (must be odd, > polyorder).
7 samples at 50 Hz = 140ms window — good balance of smoothness
and responsiveness. Captures dynamics up to ~7 Hz.
polyorder : Polynomial order for the SG filter (3 = cubic).
deriv : Derivative order (1 = first derivative = velocity).
Returns
-------
New recording dict with 'motor_vel' replaced and 'motor_vel_raw' added.
"""
rec = dict(recording) # shallow copy
times = rec["time"]
angles = rec["motor_angle"]
dt = np.mean(np.diff(times))
# Keep original for diagnostics.
rec["motor_vel_raw"] = rec["motor_vel"].copy()
# Savitzky-Golay derivative: fits a polynomial to each window,
# then takes the analytical derivative → smooth, zero phase lag.
vel_sg = savgol_filter(
angles,
window_length=window_length,
polyorder=polyorder,
deriv=deriv,
delta=dt,
)
# Compute stats for logging.
raw_vel = rec["motor_vel_raw"]
noise_estimate = np.std(raw_vel - vel_sg)
max_diff = np.max(np.abs(raw_vel - vel_sg))
log.info(
"velocity_recomputed",
method="savgol",
window=window_length,
polyorder=polyorder,
dt=f"{dt*1000:.1f}ms",
noise_std=f"{noise_estimate:.3f} rad/s",
max_diff=f"{max_diff:.3f} rad/s",
raw_rms=f"{np.sqrt(np.mean(raw_vel**2)):.3f}",
sg_rms=f"{np.sqrt(np.mean(vel_sg**2)):.3f}",
)
rec["motor_vel"] = vel_sg
return rec
def smooth_velocity(
recording: dict[str, np.ndarray],
cutoff_hz: float = 10.0,
) -> dict[str, np.ndarray]:
"""Alternative: apply zero-phase Butterworth low-pass to motor_vel.
Simpler than SG derivative but introduces slight edge effects.
"""
from scipy.signal import butter, filtfilt
rec = dict(recording)
rec["motor_vel_raw"] = rec["motor_vel"].copy()
dt = np.mean(np.diff(rec["time"]))
fs = 1.0 / dt
nyq = fs / 2.0
norm_cutoff = min(cutoff_hz / nyq, 0.99)
b, a = butter(2, norm_cutoff, btype="low")
rec["motor_vel"] = filtfilt(b, a, rec["motor_vel"])
log.info(
"velocity_smoothed",
method="butterworth",
cutoff_hz=cutoff_hz,
fs=fs,
)
return rec

460
src/sysid/motor/rollout.py Normal file
View File

@@ -0,0 +1,460 @@
"""Deterministic simulation replay — roll out recorded actions in MuJoCo.
Motor-only version: single hinge joint, no pendulum.
Given a parameter vector and recorded actions, builds a MuJoCo model
with overridden dynamics, replays the actions, and returns the simulated
motor angle + velocity for comparison with the real recording.
"""
from __future__ import annotations
import dataclasses
import xml.etree.ElementTree as ET
from pathlib import Path
import mujoco
import numpy as np
import yaml
# ── Tunable parameter specification ──────────────────────────────────
@dataclasses.dataclass
class ParamSpec:
"""Specification for a single tunable parameter."""
name: str
default: float
lower: float
upper: float
log_scale: bool = False
# Motor-only parameters to identify.
# These capture the full transfer function: PWM → shaft angle/velocity.
#
# Asymmetric parameters (pos/neg suffix) capture real-world differences
# between CW and CCW rotation caused by gear mesh, brush contact,
# and H-bridge asymmetry.
MOTOR_PARAMS: list[ParamSpec] = [
# ── Actuator ─────────────────────────────────────────────────
# gear_pos/neg: effective torque per unit ctrl, split by direction.
# Real motors + L298N often have different drive strength per direction.
ParamSpec("actuator_gear_pos", 0.064, 0.005, 0.5, log_scale=True),
ParamSpec("actuator_gear_neg", 0.064, 0.005, 0.5, log_scale=True),
# filter_tau: first-order electrical/driver time constant (s).
# Lower bound 1ms — L298N PWM switching is very fast.
ParamSpec("actuator_filter_tau", 0.03, 0.001, 0.20),
# ── Joint dynamics ───────────────────────────────────────────
# damping_pos/neg: viscous friction (back-EMF), split by direction.
ParamSpec("motor_damping_pos", 0.003, 1e-5, 0.1, log_scale=True),
ParamSpec("motor_damping_neg", 0.003, 1e-5, 0.1, log_scale=True),
# armature: reflected rotor inertia (kg·m²).
ParamSpec("motor_armature", 0.0001, 1e-6, 0.01, log_scale=True),
# frictionloss_pos/neg: Coulomb friction, split by velocity direction.
ParamSpec("motor_frictionloss_pos", 0.03, 0.001, 0.2, log_scale=True),
ParamSpec("motor_frictionloss_neg", 0.03, 0.001, 0.2, log_scale=True),
# ── Nonlinear dynamics ───────────────────────────────────────
# viscous_quadratic: velocity-squared drag term (N·m·s²/rad²).
# Captures nonlinear friction that increases at high speed (air drag,
# grease viscosity, etc.). Opposes motion.
ParamSpec("viscous_quadratic", 0.0, 0.0, 0.005),
# back_emf_gain: torque reduction proportional to |vel × ctrl|.
# Models the back-EMF effect: at high speed the motor produces less
# torque because the voltage drop across the armature is smaller.
ParamSpec("back_emf_gain", 0.0, 0.0, 0.05),
# stribeck_vel: characteristic velocity below which Coulomb friction
# is boosted (Stribeck effect). 0 = standard Coulomb only.
ParamSpec("stribeck_friction_boost", 0.0, 0.0, 0.15),
ParamSpec("stribeck_vel", 2.0, 0.1, 8.0),
# ── Rotor load ───────────────────────────────────────────────
ParamSpec("rotor_mass", 0.012, 0.002, 0.05, log_scale=True),
# ── Hardware realism ─────────────────────────────────────────
# deadzone_pos/neg: minimum |action| per direction.
ParamSpec("motor_deadzone_pos", 0.08, 0.0, 0.30),
ParamSpec("motor_deadzone_neg", 0.08, 0.0, 0.30),
# action_bias: constant offset added to ctrl (H-bridge asymmetry).
ParamSpec("action_bias", 0.0, -0.10, 0.10),
# ── Gearbox backlash ─────────────────────────────────────────
# backlash_halfwidth: half the angular deadband (rad) in the gearbox.
# When the motor reverses, the shaft doesn't move until the backlash
# gap is taken up. Typical for 30:1 plastic/metal spur gears.
ParamSpec("gearbox_backlash", 0.0, 0.0, 0.15),
]
def params_to_dict(
values: np.ndarray, specs: list[ParamSpec] | None = None
) -> dict[str, float]:
if specs is None:
specs = MOTOR_PARAMS
return {s.name: float(values[i]) for i, s in enumerate(specs)}
def defaults_vector(specs: list[ParamSpec] | None = None) -> np.ndarray:
if specs is None:
specs = MOTOR_PARAMS
return np.array([s.default for s in specs], dtype=np.float64)
def bounds_arrays(
specs: list[ParamSpec] | None = None,
) -> tuple[np.ndarray, np.ndarray]:
if specs is None:
specs = MOTOR_PARAMS
lo = np.array([s.lower for s in specs], dtype=np.float64)
hi = np.array([s.upper for s in specs], dtype=np.float64)
return lo, hi
# ── MuJoCo model building with parameter overrides ──────────────────
def _build_model(
asset_path: Path,
params: dict[str, float],
) -> mujoco.MjModel:
"""Build a MuJoCo model from motor.xml with parameter overrides.
Parses the MJCF, patches actuator/joint/body parameters, reloads.
"""
asset_path = Path(asset_path).resolve()
robot_cfg = yaml.safe_load((asset_path / "robot.yaml").read_text())
mjcf_path = asset_path / robot_cfg["mjcf"]
tree = ET.parse(str(mjcf_path))
root = tree.getroot()
# ── Actuator overrides ───────────────────────────────────────
# Use average of pos/neg gear for MuJoCo (asymmetry handled in ctrl).
gear_pos = params.get("actuator_gear_pos", params.get("actuator_gear", 0.064))
gear_neg = params.get("actuator_gear_neg", params.get("actuator_gear", 0.064))
gear = (gear_pos + gear_neg) / 2.0
filter_tau = params.get("actuator_filter_tau", 0.03)
for act_el in root.iter("general"):
if act_el.get("name") == "motor":
act_el.set("gear", str(gear))
if filter_tau > 0:
act_el.set("dyntype", "filter")
act_el.set("dynprm", str(filter_tau))
else:
act_el.set("dyntype", "none")
act_el.set("dynprm", "1")
# ── Joint overrides ──────────────────────────────────────────
# Damping and friction are asymmetric + nonlinear → applied manually.
# Set MuJoCo damping & frictionloss to 0; we handle them in qfrc_applied.
armature = params.get("motor_armature", 0.0001)
for jnt in root.iter("joint"):
if jnt.get("name") == "motor_joint":
jnt.set("damping", "0")
jnt.set("armature", str(armature))
jnt.set("frictionloss", "0")
# ── Rotor mass override ──────────────────────────────────────
rotor_mass = params.get("rotor_mass", 0.012)
for geom in root.iter("geom"):
if geom.get("name") == "rotor_disk":
geom.set("mass", str(rotor_mass))
# Write temp file and load.
tmp_path = asset_path / "_tmp_motor_sysid.xml"
try:
tree.write(str(tmp_path), xml_declaration=True, encoding="unicode")
model = mujoco.MjModel.from_xml_path(str(tmp_path))
finally:
tmp_path.unlink(missing_ok=True)
return model
# ── Action + asymmetry transforms ────────────────────────────────────
def _transform_action(
action: float,
params: dict[str, float],
) -> float:
"""Apply bias, direction-dependent deadzone, and gear scaling.
The MuJoCo actuator has the *average* gear ratio. We rescale the
control signal so that ``ctrl × gear_avg ≈ action × gear_dir``.
"""
# Constant bias (H-bridge asymmetry).
action = action + params.get("action_bias", 0.0)
# Direction-dependent deadzone.
dz_pos = params.get("motor_deadzone_pos", params.get("motor_deadzone", 0.08))
dz_neg = params.get("motor_deadzone_neg", params.get("motor_deadzone", 0.08))
if action >= 0 and action < dz_pos:
return 0.0
if action < 0 and action > -dz_neg:
return 0.0
# Direction-dependent gear scaling.
# MuJoCo model uses gear_avg; we rescale ctrl to get the right torque.
gear_pos = params.get("actuator_gear_pos", params.get("actuator_gear", 0.064))
gear_neg = params.get("actuator_gear_neg", params.get("actuator_gear", 0.064))
gear_avg = (gear_pos + gear_neg) / 2.0
if gear_avg < 1e-8:
return 0.0
gear_dir = gear_pos if action >= 0 else gear_neg
return action * (gear_dir / gear_avg)
def _apply_forces(
data: mujoco.MjData,
vel: float,
ctrl: float,
params: dict[str, float],
backlash_state: list[float] | None = None,
) -> None:
"""Apply all manual forces: asymmetric friction, damping, and nonlinear terms.
Everything that MuJoCo can't represent with its symmetric joint model
is injected here via ``qfrc_applied``.
Forces applied (all oppose motion or reduce torque):
1. Asymmetric Coulomb friction (with Stribeck boost at low speed)
2. Asymmetric viscous damping
3. Quadratic velocity drag
4. Back-EMF torque reduction (proportional to |vel|)
Backlash:
If backlash_state is provided, it is a 1-element list [gap_pos].
gap_pos tracks the motor's position within the backlash deadband.
When inside the gap, no actuator torque is transmitted to the
output shaft — only friction forces act.
"""
torque = 0.0
# ── Gearbox backlash ──────────────────────────────────────────
# Model: the gear teeth have play of 2×halfwidth radians.
# We track where the motor is within that gap. When at the
# edge (contact), actuator torque passes through normally.
# When inside the gap, no actuator torque is transmitted.
backlash_hw = params.get("gearbox_backlash", 0.0)
actuator_torque_scale = 1.0 # 1.0 = full contact, 0.0 = in gap
if backlash_hw > 0 and backlash_state is not None:
# gap_pos: how far into the backlash gap we are.
# Range: [-backlash_hw, +backlash_hw]
# At ±backlash_hw, gears are in contact and torque transmits.
gap = backlash_state[0]
# Update gap position based on velocity.
dt_sub = data.model.opt.timestep
gap += vel * dt_sub
# Clamp to backlash range.
if gap > backlash_hw:
gap = backlash_hw
elif gap < -backlash_hw:
gap = -backlash_hw
backlash_state[0] = gap
# If not at contact edge, no torque transmitted.
if abs(gap) < backlash_hw - 1e-8:
actuator_torque_scale = 0.0
else:
actuator_torque_scale = 1.0
# ── 1. Coulomb friction (direction-dependent + Stribeck) ─────
fl_pos = params.get("motor_frictionloss_pos", params.get("motor_frictionloss", 0.03))
fl_neg = params.get("motor_frictionloss_neg", params.get("motor_frictionloss", 0.03))
stribeck_boost = params.get("stribeck_friction_boost", 0.0)
stribeck_vel = params.get("stribeck_vel", 2.0)
if abs(vel) > 1e-6:
fl = fl_pos if vel > 0 else fl_neg
# Stribeck: boost friction at low speed. Exponential decay.
if stribeck_boost > 0 and stribeck_vel > 0:
fl = fl * (1.0 + stribeck_boost * np.exp(-abs(vel) / stribeck_vel))
# Coulomb: constant magnitude, opposes motion.
torque -= np.sign(vel) * fl
# ── 2. Asymmetric viscous damping ────────────────────────────
damp_pos = params.get("motor_damping_pos", params.get("motor_damping", 0.003))
damp_neg = params.get("motor_damping_neg", params.get("motor_damping", 0.003))
damp = damp_pos if vel > 0 else damp_neg
torque -= damp * vel
# ── 3. Quadratic velocity drag ───────────────────────────────
visc_quad = params.get("viscous_quadratic", 0.0)
if visc_quad > 0:
torque -= visc_quad * vel * abs(vel)
# ── 4. Back-EMF torque reduction ─────────────────────────────
# At high speed, the motor's effective torque is reduced because
# back-EMF opposes the supply voltage. Modelled as a torque that
# opposes the control signal proportional to speed.
bemf = params.get("back_emf_gain", 0.0)
if bemf > 0 and abs(ctrl) > 1e-6:
# The reduction should oppose the actuator torque direction.
torque -= bemf * vel * np.sign(ctrl) * actuator_torque_scale
# ── 5. Scale actuator contribution by backlash state ─────────
# When in the backlash gap, MuJoCo's actuator force should not
# transmit. We cancel it by applying an opposing force.
if actuator_torque_scale < 1.0:
# The actuator_force from MuJoCo will be applied by mj_step.
# We need to counteract it. data.qfrc_actuator isn't set yet
# at this point (pre-step), so we zero the ctrl instead.
# This is handled in the rollout loop by zeroing ctrl.
pass
data.qfrc_applied[0] = max(-10.0, min(10.0, torque))
return actuator_torque_scale
# ── Simulation rollout ───────────────────────────────────────────────
def rollout(
asset_path: str | Path,
params: dict[str, float],
actions: np.ndarray,
sim_dt: float = 0.002,
substeps: int = 10,
) -> dict[str, np.ndarray]:
"""Open-loop replay of recorded actions.
Parameters
----------
asset_path : motor asset directory
params : named parameter overrides
actions : (N,) normalised actions [-1, 1] from the recording
sim_dt : MuJoCo physics timestep
substeps : physics substeps per control step (ctrl_dt = sim_dt × substeps)
Returns
-------
dict with motor_angle (N,) and motor_vel (N,).
"""
asset_path = Path(asset_path).resolve()
model = _build_model(asset_path, params)
model.opt.timestep = sim_dt
data = mujoco.MjData(model)
mujoco.mj_resetData(model, data)
n = len(actions)
sim_motor_angle = np.zeros(n, dtype=np.float64)
sim_motor_vel = np.zeros(n, dtype=np.float64)
# Backlash state: [gap_position]. Starts at 0 (centered in gap).
backlash_state = [0.0]
for i in range(n):
ctrl = _transform_action(actions[i], params)
data.ctrl[0] = ctrl
for _ in range(substeps):
scale = _apply_forces(data, data.qvel[0], ctrl, params, backlash_state)
# If in backlash gap, zero ctrl so actuator torque doesn't transmit.
if scale < 1.0:
data.ctrl[0] = 0.0
else:
data.ctrl[0] = ctrl
mujoco.mj_step(model, data)
# Bail out on NaN/instability.
if not np.isfinite(data.qpos[0]) or abs(data.qvel[0]) > 1e4:
sim_motor_angle[i:] = np.nan
sim_motor_vel[i:] = np.nan
break
sim_motor_angle[i] = data.qpos[0]
sim_motor_vel[i] = data.qvel[0]
return {
"motor_angle": sim_motor_angle,
"motor_vel": sim_motor_vel,
}
def windowed_rollout(
asset_path: str | Path,
params: dict[str, float],
recording: dict[str, np.ndarray],
window_duration: float = 0.5,
sim_dt: float = 0.002,
substeps: int = 10,
) -> dict[str, np.ndarray]:
"""Multiple-shooting rollout for motor-only sysid.
Splits the recording into short windows. Each window is initialised
from the real motor state, preventing error accumulation.
Returns
-------
dict with motor_angle (N,), motor_vel (N,), n_windows (int).
"""
asset_path = Path(asset_path).resolve()
model = _build_model(asset_path, params)
model.opt.timestep = sim_dt
data = mujoco.MjData(model)
times = recording["time"]
actions = recording["action"]
real_angle = recording["motor_angle"]
real_vel = recording["motor_vel"]
n = len(actions)
sim_motor_angle = np.zeros(n, dtype=np.float64)
sim_motor_vel = np.zeros(n, dtype=np.float64)
# Compute window boundaries.
t0 = times[0]
t_end = times[-1]
window_starts: list[int] = []
current_t = t0
while current_t < t_end:
idx = int(np.searchsorted(times, current_t))
idx = min(idx, n - 1)
window_starts.append(idx)
current_t += window_duration
n_windows = len(window_starts)
for w, w_start in enumerate(window_starts):
w_end = window_starts[w + 1] if w + 1 < n_windows else n
# Init from real state at window start.
mujoco.mj_resetData(model, data)
data.qpos[0] = real_angle[w_start]
data.qvel[0] = real_vel[w_start]
data.ctrl[:] = 0.0
mujoco.mj_forward(model, data)
# Backlash state resets each window (assume contact at start).
backlash_state = [0.0]
for i in range(w_start, w_end):
ctrl = _transform_action(actions[i], params)
data.ctrl[0] = ctrl
for _ in range(substeps):
scale = _apply_forces(data, data.qvel[0], ctrl, params, backlash_state)
if scale < 1.0:
data.ctrl[0] = 0.0
else:
data.ctrl[0] = ctrl
mujoco.mj_step(model, data)
# Bail out on NaN/instability — fill rest of window with last good.
if not np.isfinite(data.qpos[0]) or abs(data.qvel[0]) > 1e4:
sim_motor_angle[i:w_end] = sim_motor_angle[max(i-1, w_start)]
sim_motor_vel[i:w_end] = 0.0
break
sim_motor_angle[i] = data.qpos[0]
sim_motor_vel[i] = data.qvel[0]
return {
"motor_angle": sim_motor_angle,
"motor_vel": sim_motor_vel,
"n_windows": n_windows,
}

View File

@@ -0,0 +1,204 @@
"""Visualise motor system identification — real vs simulated trajectories.
Usage:
python -m src.sysid.motor.visualize \
--recording assets/motor/recordings/motor_capture_YYYYMMDD_HHMMSS.npz
# With tuned result:
python -m src.sysid.motor.visualize \
--recording <file>.npz \
--result assets/motor/motor_sysid_result.json
"""
from __future__ import annotations
import argparse
import json
from pathlib import Path
import numpy as np
import structlog
log = structlog.get_logger()
_DEFAULT_ASSET = "assets/motor"
def visualize(
asset_path: str | Path = _DEFAULT_ASSET,
recording_path: str | Path = "",
result_path: str | Path | None = None,
sim_dt: float = 0.002,
substeps: int = 10,
window_duration: float = 0.5,
save_path: str | Path | None = None,
show: bool = True,
) -> None:
"""Generate 3-panel comparison plot: angle, velocity, action."""
import matplotlib.pyplot as plt
from src.sysid.motor.rollout import (
MOTOR_PARAMS,
defaults_vector,
params_to_dict,
rollout,
windowed_rollout,
)
asset_path = Path(asset_path).resolve()
recording = dict(np.load(recording_path))
t = recording["time"]
actions = recording["action"]
# ── Simulate with default parameters ─────────────────────────
default_params = params_to_dict(defaults_vector(MOTOR_PARAMS), MOTOR_PARAMS)
log.info("simulating_default_params", windowed=window_duration > 0)
if window_duration > 0:
sim_default = windowed_rollout(
asset_path=asset_path,
params=default_params,
recording=recording,
window_duration=window_duration,
sim_dt=sim_dt,
substeps=substeps,
)
else:
sim_default = rollout(
asset_path=asset_path,
params=default_params,
actions=actions,
sim_dt=sim_dt,
substeps=substeps,
)
# ── Simulate with tuned parameters (if available) ────────────
sim_tuned = None
tuned_cost = None
if result_path is not None:
result_path = Path(result_path)
else:
# Auto-detect.
auto = asset_path / "motor_sysid_result.json"
if auto.exists():
result_path = auto
if result_path is not None and result_path.exists():
result = json.loads(result_path.read_text())
tuned_params = result.get("best_params", {})
tuned_cost = result.get("best_cost")
log.info("simulating_tuned_params", cost=tuned_cost)
if window_duration > 0:
sim_tuned = windowed_rollout(
asset_path=asset_path,
params=tuned_params,
recording=recording,
window_duration=window_duration,
sim_dt=sim_dt,
substeps=substeps,
)
else:
sim_tuned = rollout(
asset_path=asset_path,
params=tuned_params,
actions=actions,
sim_dt=sim_dt,
substeps=substeps,
)
# ── Plot ─────────────────────────────────────────────────────
fig, axes = plt.subplots(3, 1, figsize=(14, 8), sharex=True)
# Motor angle.
ax = axes[0]
ax.plot(t, np.degrees(recording["motor_angle"]), "k-", lw=1.2, alpha=0.8, label="Real")
ax.plot(t, np.degrees(sim_default["motor_angle"]), "--", color="#d62728", lw=1.0, alpha=0.7, label="Sim (default)")
if sim_tuned is not None:
ax.plot(t, np.degrees(sim_tuned["motor_angle"]), "--", color="#2ca02c", lw=1.0, alpha=0.7, label="Sim (tuned)")
ax.set_ylabel("Motor Angle (°)")
ax.legend(loc="upper right", fontsize=8)
ax.grid(True, alpha=0.3)
# Motor velocity.
ax = axes[1]
ax.plot(t, recording["motor_vel"], "k-", lw=1.2, alpha=0.8, label="Real")
ax.plot(t, sim_default["motor_vel"], "--", color="#d62728", lw=1.0, alpha=0.7, label="Sim (default)")
if sim_tuned is not None:
ax.plot(t, sim_tuned["motor_vel"], "--", color="#2ca02c", lw=1.0, alpha=0.7, label="Sim (tuned)")
ax.set_ylabel("Motor Velocity (rad/s)")
ax.legend(loc="upper right", fontsize=8)
ax.grid(True, alpha=0.3)
# Action.
ax = axes[2]
ax.plot(t, actions, "b-", lw=0.8, alpha=0.6)
ax.set_ylabel("Action (norm)")
ax.set_xlabel("Time (s)")
ax.grid(True, alpha=0.3)
ax.set_ylim(-1.1, 1.1)
# Title.
title = "Motor System Identification — Real vs Simulated"
if tuned_cost is not None:
from src.sysid.motor.optimize import cost_function
orig_cost = cost_function(
defaults_vector(MOTOR_PARAMS),
recording,
asset_path,
MOTOR_PARAMS,
sim_dt=sim_dt,
substeps=substeps,
window_duration=window_duration,
)
title += f"\nDefault cost: {orig_cost:.4f} → Tuned cost: {tuned_cost:.4f}"
improvement = (1.0 - tuned_cost / orig_cost) * 100 if orig_cost > 0 else 0
title += f" ({improvement:+.1f}%)"
fig.suptitle(title, fontsize=12)
plt.tight_layout()
if save_path:
fig.savefig(str(save_path), dpi=150, bbox_inches="tight")
log.info("figure_saved", path=str(save_path))
if show:
plt.show()
else:
plt.close(fig)
# ── CLI ──────────────────────────────────────────────────────────────
def main() -> None:
parser = argparse.ArgumentParser(
description="Visualise motor system identification results."
)
parser.add_argument("--asset-path", type=str, default=_DEFAULT_ASSET)
parser.add_argument("--recording", type=str, required=True, help=".npz file")
parser.add_argument("--result", type=str, default=None, help="sysid result JSON")
parser.add_argument("--sim-dt", type=float, default=0.002)
parser.add_argument("--substeps", type=int, default=10)
parser.add_argument("--window-duration", type=float, default=0.5)
parser.add_argument("--save", type=str, default=None, help="Save figure path")
parser.add_argument("--no-show", action="store_true")
args = parser.parse_args()
visualize(
asset_path=args.asset_path,
recording_path=args.recording,
result_path=args.result,
sim_dt=args.sim_dt,
substeps=args.substeps,
window_duration=args.window_duration,
save_path=args.save,
show=not args.no_show,
)
if __name__ == "__main__":
main()

580
src/sysid/optimize.py Normal file
View File

@@ -0,0 +1,580 @@
"""CMA-ES optimiser — fit simulation parameters to a real-robot recording.
Minimises the trajectory-matching cost between a MuJoCo rollout and a
recorded real-robot sequence. Uses the ``cmaes`` package (pure-Python
CMA-ES with native box-constraint support).
Motor parameters are **locked** from the motor-only sysid — only
pendulum/arm inertial parameters, joint dynamics, and ctrl_limit are
optimised. Velocities are optionally preprocessed with Savitzky-Golay
differentiation for cleaner targets.
Usage:
python -m src.sysid.optimize \
--robot-path assets/rotary_cartpole \
--recording assets/rotary_cartpole/recordings/capture_20260314_000435.npz
# Shorter run for testing:
python -m src.sysid.optimize \
--robot-path assets/rotary_cartpole \
--recording <file>.npz \
--max-generations 10 --population-size 8
"""
from __future__ import annotations
import argparse
import json
import time
from datetime import datetime
from pathlib import Path
import numpy as np
import structlog
from src.sysid.rollout import (
LOCKED_MOTOR_PARAMS,
PARAM_SETS,
ROTARY_CARTPOLE_PARAMS,
ParamSpec,
bounds_arrays,
defaults_vector,
params_to_dict,
rollout,
windowed_rollout,
)
log = structlog.get_logger()
# ── Velocity preprocessing ───────────────────────────────────────────
def _preprocess_recording(
recording: dict[str, np.ndarray],
preprocess_vel: bool = True,
sg_window: int = 7,
sg_polyorder: int = 3,
) -> dict[str, np.ndarray]:
"""Optionally recompute velocities using Savitzky-Golay differentiation.
Applies SG filtering to both motor_vel and pendulum_vel, replacing
the noisy firmware finite-difference velocities with smooth
analytical derivatives of the (clean) angle signals.
"""
if not preprocess_vel:
return recording
from scipy.signal import savgol_filter
rec = dict(recording)
times = rec["time"]
dt = float(np.mean(np.diff(times)))
# Motor velocity.
rec["motor_vel_raw"] = rec["motor_vel"].copy()
rec["motor_vel"] = savgol_filter(
rec["motor_angle"],
window_length=sg_window,
polyorder=sg_polyorder,
deriv=1,
delta=dt,
)
# Pendulum velocity.
rec["pendulum_vel_raw"] = rec["pendulum_vel"].copy()
rec["pendulum_vel"] = savgol_filter(
rec["pendulum_angle"],
window_length=sg_window,
polyorder=sg_polyorder,
deriv=1,
delta=dt,
)
motor_noise = np.std(rec["motor_vel_raw"] - rec["motor_vel"])
pend_noise = np.std(rec["pendulum_vel_raw"] - rec["pendulum_vel"])
log.info(
"velocity_preprocessed",
method="savgol",
sg_window=sg_window,
sg_polyorder=sg_polyorder,
dt_ms=f"{dt*1000:.1f}",
motor_noise_std=f"{motor_noise:.3f} rad/s",
pend_noise_std=f"{pend_noise:.3f} rad/s",
)
return rec
# ── Cost function ────────────────────────────────────────────────────
def _angle_diff(a: np.ndarray, b: np.ndarray) -> np.ndarray:
"""Shortest signed angle difference, handling wrapping."""
return np.arctan2(np.sin(a - b), np.cos(a - b))
def _check_inertia_valid(params: dict[str, float]) -> bool:
"""Quick reject: pendulum inertia tensor must be positive-definite."""
ixx = params.get("pendulum_ixx", 6.16e-06)
iyy = params.get("pendulum_iyy", 6.16e-06)
izz = params.get("pendulum_izz", 1.23e-05)
ixy = params.get("pendulum_ixy", 6.10e-06)
det_xy = ixx * iyy - ixy * ixy
return det_xy > 0 and ixx > 0 and iyy > 0 and izz > 0
def _compute_trajectory_cost(
sim: dict[str, np.ndarray],
recording: dict[str, np.ndarray],
pos_weight: float = 1.0,
vel_weight: float = 0.1,
pendulum_scale: float = 3.0,
vel_outlier_threshold: float = 20.0,
) -> float:
"""Weighted MSE between sim and real trajectories.
pendulum_scale multiplies the pendulum terms relative to motor terms.
Samples where the *real* pendulum velocity exceeds
``vel_outlier_threshold`` (rad/s) are excluded from the velocity
terms. These are encoder-wrap artefacts (pendulum swinging past
vertical) that no simulator can reproduce.
"""
motor_err = _angle_diff(sim["motor_angle"], recording["motor_angle"])
pend_err = _angle_diff(sim["pendulum_angle"], recording["pendulum_angle"])
motor_vel_err = sim["motor_vel"] - recording["motor_vel"]
pend_vel_err = sim["pendulum_vel"] - recording["pendulum_vel"]
# Mask out encoder-wrap velocity spikes so the optimizer doesn't
# waste capacity fitting artefacts.
valid = np.abs(recording["pendulum_vel"]) < vel_outlier_threshold
if valid.sum() < len(valid):
motor_vel_err = motor_vel_err[valid]
pend_vel_err = pend_vel_err[valid]
return float(
pos_weight * np.mean(motor_err**2)
+ pos_weight * pendulum_scale * np.mean(pend_err**2)
+ vel_weight * np.mean(motor_vel_err**2)
+ vel_weight * pendulum_scale * np.mean(pend_vel_err**2)
)
def cost_function(
params_vec: np.ndarray,
recording: dict[str, np.ndarray],
robot_path: Path,
specs: list[ParamSpec],
sim_dt: float = 0.002,
substeps: int = 10,
pos_weight: float = 1.0,
vel_weight: float = 0.1,
pendulum_scale: float = 3.0,
window_duration: float = 0.5,
motor_params: dict[str, float] | None = None,
) -> float:
"""Compute trajectory-matching cost for a candidate parameter vector.
Uses **multiple-shooting** (windowed rollout): the recording is split
into short windows (default 0.5 s). Each window is initialised from
the real qpos/qvel, so early errors dont compound across the full
trajectory. This gives a much smoother cost landscape for CMA-ES.
Set ``window_duration=0`` to fall back to the original open-loop
single-shot rollout (not recommended).
"""
params = params_to_dict(params_vec, specs)
if not _check_inertia_valid(params):
return 1e6
try:
if window_duration > 0:
sim = windowed_rollout(
robot_path=robot_path,
params=params,
recording=recording,
window_duration=window_duration,
sim_dt=sim_dt,
substeps=substeps,
motor_params=motor_params,
)
else:
sim = rollout(
robot_path=robot_path,
params=params,
actions=recording["action"],
sim_dt=sim_dt,
substeps=substeps,
motor_params=motor_params,
)
except Exception as exc:
log.warning("rollout_failed", error=str(exc))
return 1e6
# Check for NaN in sim output.
for key in ("motor_angle", "motor_vel", "pendulum_angle", "pendulum_vel"):
if np.any(~np.isfinite(sim[key])):
return 1e6
return _compute_trajectory_cost(sim, recording, pos_weight, vel_weight, pendulum_scale)
# ── Parallel evaluation helper (module-level for pickling) ───────────
# Shared state set by the parent process before spawning workers.
_par_recording: dict[str, np.ndarray] = {}
_par_robot_path: Path = Path(".")
_par_specs: list[ParamSpec] = []
_par_kwargs: dict = {}
def _init_worker(recording, robot_path, specs, kwargs):
"""Initialiser run once per worker process."""
global _par_recording, _par_robot_path, _par_specs, _par_kwargs
_par_recording = recording
_par_robot_path = robot_path
_par_specs = specs
_par_kwargs = kwargs
def _eval_candidate(x_natural: np.ndarray) -> float:
"""Evaluate a single candidate — called in worker processes."""
return cost_function(
x_natural,
_par_recording,
_par_robot_path,
_par_specs,
**_par_kwargs,
)
# ── CMA-ES optimisation loop ────────────────────────────────────────
def optimize(
robot_path: str | Path,
recording_path: str | Path,
specs: list[ParamSpec] | None = None,
sigma0: float = 0.3,
population_size: int = 20,
max_generations: int = 1000,
sim_dt: float = 0.002,
substeps: int = 10,
pos_weight: float = 1.0,
vel_weight: float = 0.1,
pendulum_scale: float = 3.0,
window_duration: float = 0.5,
seed: int = 42,
preprocess_vel: bool = True,
sg_window: int = 7,
sg_polyorder: int = 3,
) -> dict:
"""Run CMA-ES optimisation and return results.
Motor parameters are locked from the motor-only sysid.
Only pendulum/arm parameters are optimised.
Returns a dict with:
best_params: dict[str, float]
best_cost: float
history: list of (generation, best_cost) tuples
recording: str (path used)
specs: list of param names
motor_params: dict of locked motor params
"""
from cmaes import CMA
robot_path = Path(robot_path).resolve()
recording_path = Path(recording_path).resolve()
if specs is None:
specs = ROTARY_CARTPOLE_PARAMS
motor_params = dict(LOCKED_MOTOR_PARAMS)
log.info(
"motor_params_locked",
n_params=len(motor_params),
gear_avg=f"{(motor_params['actuator_gear_pos'] + motor_params['actuator_gear_neg']) / 2:.4f}",
)
# Load recording.
recording = dict(np.load(recording_path))
# Preprocessing: SG velocity recomputation.
recording = _preprocess_recording(
recording,
preprocess_vel=preprocess_vel,
sg_window=sg_window,
sg_polyorder=sg_polyorder,
)
n_samples = len(recording["time"])
duration = recording["time"][-1] - recording["time"][0]
n_windows = max(1, int(duration / window_duration)) if window_duration > 0 else 1
log.info(
"recording_loaded",
path=str(recording_path),
samples=n_samples,
duration=f"{duration:.1f}s",
window_duration=f"{window_duration}s",
n_windows=n_windows,
)
# Initial point (defaults) — normalised to [0, 1] for CMA-ES.
lo, hi = bounds_arrays(specs)
x0 = defaults_vector(specs)
# Normalise to [0, 1] for the optimizer (better conditioned).
span = hi - lo
span[span == 0] = 1.0 # avoid division by zero
def to_normed(x: np.ndarray) -> np.ndarray:
return (x - lo) / span
def from_normed(x_n: np.ndarray) -> np.ndarray:
return x_n * span + lo
x0_normed = to_normed(x0)
bounds_normed = np.column_stack(
[np.zeros(len(specs)), np.ones(len(specs))]
)
optimizer = CMA(
mean=x0_normed,
sigma=sigma0,
bounds=bounds_normed,
population_size=population_size,
seed=seed,
)
best_cost = float("inf")
best_params_vec = x0.copy()
history: list[tuple[int, float]] = []
log.info(
"cmaes_starting",
n_params=len(specs),
population=population_size,
max_gens=max_generations,
sigma0=sigma0,
)
t0 = time.monotonic()
# ── Parallel evaluation setup ────────────────────────────────
# Each candidate is independent — evaluate them in parallel using
# a process pool. Falls back to sequential if n_workers=1.
import multiprocessing as mp
n_workers = max(1, mp.cpu_count() - 1) # leave 1 core free
eval_kwargs = dict(
sim_dt=sim_dt,
substeps=substeps,
pos_weight=pos_weight,
vel_weight=vel_weight,
pendulum_scale=pendulum_scale,
window_duration=window_duration,
motor_params=motor_params,
)
log.info("parallel_workers", n_workers=n_workers)
# Create a persistent pool (avoids per-generation fork overhead).
pool = None
if n_workers > 1:
pool = mp.Pool(
n_workers,
initializer=_init_worker,
initargs=(recording, robot_path, specs, eval_kwargs),
)
for gen in range(max_generations):
# Ask all candidates first.
candidates_normed = []
candidates_natural = []
for _ in range(optimizer.population_size):
x_normed = optimizer.ask()
x_natural = from_normed(x_normed)
x_natural = np.clip(x_natural, lo, hi)
candidates_normed.append(x_normed)
candidates_natural.append(x_natural)
# Evaluate in parallel.
if pool is not None:
costs = pool.map(_eval_candidate, candidates_natural)
else:
costs = [cost_function(
x, recording, robot_path, specs, **eval_kwargs
) for x in candidates_natural]
solutions = list(zip(candidates_normed, costs))
for x_natural, c in zip(candidates_natural, costs):
if c < best_cost:
best_cost = c
best_params_vec = x_natural.copy()
optimizer.tell(solutions)
history.append((gen, best_cost))
elapsed = time.monotonic() - t0
if gen % 5 == 0 or gen == max_generations - 1:
log.info(
"cmaes_generation",
gen=gen,
best_cost=f"{best_cost:.6f}",
elapsed=f"{elapsed:.1f}s",
gen_best=f"{min(c for _, c in solutions):.6f}",
)
total_time = time.monotonic() - t0
# Clean up process pool.
if pool is not None:
pool.close()
pool.join()
best_params = params_to_dict(best_params_vec, specs)
log.info(
"cmaes_finished",
best_cost=f"{best_cost:.6f}",
total_time=f"{total_time:.1f}s",
evaluations=max_generations * population_size,
)
# Log parameter comparison.
defaults = params_to_dict(defaults_vector(specs), specs)
for name in best_params:
d = defaults[name]
b = best_params[name]
change_pct = ((b - d) / abs(d) * 100) if abs(d) > 1e-12 else 0.0
log.info(
"param_result",
name=name,
default=f"{d:.6g}",
tuned=f"{b:.6g}",
change=f"{change_pct:+.1f}%",
)
return {
"best_params": best_params,
"best_cost": best_cost,
"history": history,
"recording": str(recording_path),
"param_names": [s.name for s in specs],
"defaults": {s.name: s.default for s in specs},
"motor_params": motor_params,
"preprocess_vel": preprocess_vel,
"timestamp": datetime.now().isoformat(),
}
# ── CLI entry point ──────────────────────────────────────────────────
def main() -> None:
parser = argparse.ArgumentParser(
description="Fit simulation parameters to a real-robot recording (CMA-ES)."
)
parser.add_argument(
"--robot-path",
type=str,
default="assets/rotary_cartpole",
help="Path to robot asset directory",
)
parser.add_argument(
"--recording",
type=str,
required=True,
help="Path to .npz recording file",
)
parser.add_argument("--sigma0", type=float, default=0.3)
parser.add_argument("--population-size", type=int, default=20)
parser.add_argument("--max-generations", type=int, default=200)
parser.add_argument("--sim-dt", type=float, default=0.002)
parser.add_argument("--substeps", type=int, default=10)
parser.add_argument("--pos-weight", type=float, default=1.0)
parser.add_argument("--vel-weight", type=float, default=0.1)
parser.add_argument("--pendulum-scale", type=float, default=3.0,
help="Multiplier for pendulum terms relative to motor (default 3.0)")
parser.add_argument(
"--window-duration",
type=float,
default=0.5,
help="Shooting window length in seconds (0 = open-loop, default 0.5)",
)
parser.add_argument("--seed", type=int, default=42)
parser.add_argument(
"--no-export",
action="store_true",
help="Skip exporting tuned files (results JSON only)",
)
parser.add_argument(
"--no-preprocess-vel",
action="store_true",
help="Skip Savitzky-Golay velocity preprocessing",
)
parser.add_argument("--sg-window", type=int, default=7,
help="Savitzky-Golay window length (odd, default 7)")
parser.add_argument("--sg-polyorder", type=int, default=3,
help="Savitzky-Golay polynomial order (default 3)")
parser.add_argument(
"--param-set",
type=str,
default="full",
choices=list(PARAM_SETS.keys()),
help="Parameter set to optimize: 'reduced' (6 params, fast) or 'full' (15 params)",
)
args = parser.parse_args()
specs = PARAM_SETS[args.param_set]
result = optimize(
robot_path=args.robot_path,
recording_path=args.recording,
specs=specs,
sigma0=args.sigma0,
population_size=args.population_size,
max_generations=args.max_generations,
sim_dt=args.sim_dt,
substeps=args.substeps,
pos_weight=args.pos_weight,
vel_weight=args.vel_weight,
pendulum_scale=args.pendulum_scale,
window_duration=args.window_duration,
seed=args.seed,
preprocess_vel=not args.no_preprocess_vel,
sg_window=args.sg_window,
sg_polyorder=args.sg_polyorder,
)
# Save results JSON.
robot_path = Path(args.robot_path).resolve()
result_path = robot_path / "sysid_result.json"
# Convert numpy types for JSON serialisation.
result_json = {
k: v for k, v in result.items() if k != "history"
}
result_json["history_summary"] = {
"first_cost": result["history"][0][1] if result["history"] else None,
"final_cost": result["history"][-1][1] if result["history"] else None,
"generations": len(result["history"]),
}
result_path.write_text(json.dumps(result_json, indent=2, default=str))
log.info("results_saved", path=str(result_path))
# Export tuned files unless --no-export.
if not args.no_export:
from src.sysid.export import export_tuned_files
export_tuned_files(
robot_path=args.robot_path,
params=result["best_params"],
motor_params=result.get("motor_params"),
)
if __name__ == "__main__":
main()

416
src/sysid/rollout.py Normal file
View File

@@ -0,0 +1,416 @@
"""Deterministic simulation replay — roll out recorded actions in MuJoCo.
Given a parameter vector and a recorded action sequence, builds a MuJoCo
model with overridden physics parameters, replays the actions, and returns
the simulated trajectory for comparison with the real recording.
This module is the inner loop of the CMA-ES optimizer: it is called once
per candidate parameter vector per generation.
Motor parameters are **locked** from the motor-only sysid result.
The optimizer only fits
pendulum/arm inertial parameters, pendulum joint dynamics, and
``ctrl_limit``. The asymmetric motor model (deadzone, gear compensation,
Coulomb friction, viscous damping, quadratic drag, back-EMF) is applied
via ``ActuatorConfig.transform_ctrl()`` and ``compute_motor_force()``.
"""
from __future__ import annotations
import dataclasses
import os
import tempfile
import xml.etree.ElementTree as ET
from pathlib import Path
import mujoco
import numpy as np
import yaml
from src.core.robot import ActuatorConfig, JointConfig, RobotConfig
from src.runners.mujoco import ActuatorLimits, load_mujoco_model
from src.sysid._urdf import patch_link_inertials
# ── Locked motor parameters (from motor-only sysid) ─────────────────
# These are FIXED and not optimised. They come from the 12-param model
# in robot.yaml (from motor-only sysid, cost 0.862).
LOCKED_MOTOR_PARAMS: dict[str, float] = {
"actuator_gear_pos": 0.424182,
"actuator_gear_neg": 0.425031,
"actuator_filter_tau": 0.00503506,
"motor_damping_pos": 0.00202682,
"motor_damping_neg": 0.0146651,
"motor_armature": 0.00277342,
"motor_frictionloss_pos": 0.0573282,
"motor_frictionloss_neg": 0.0533549,
"viscous_quadratic": 0.000285329,
"back_emf_gain": 0.00675809,
"motor_deadzone_pos": 0.141291,
"motor_deadzone_neg": 0.0780148,
}
# ── Tunable parameter specification ──────────────────────────────────
@dataclasses.dataclass
class ParamSpec:
"""Specification for a single tunable parameter."""
name: str
default: float
lower: float
upper: float
log_scale: bool = False # optimise in log-space (masses, inertias)
# Pendulum sysid parameters — motor params are LOCKED (not here).
# Order matters: the optimizer maps a flat vector to these specs.
# Defaults are from the URDF exported by Fusion 360.
ROTARY_CARTPOLE_PARAMS: list[ParamSpec] = [
# ── Arm link (URDF) ──────────────────────────────────────────
ParamSpec("arm_mass", 0.02110, 0.005, 0.08, log_scale=True),
ParamSpec("arm_com_x", -0.00710, -0.03, 0.03),
ParamSpec("arm_com_y", 0.00085, -0.02, 0.02),
ParamSpec("arm_com_z", 0.00795, -0.02, 0.02),
# ── Pendulum link (URDF) ─────────────────────────────────────
ParamSpec("pendulum_mass", 0.03937, 0.010, 0.10, log_scale=True),
ParamSpec("pendulum_com_x", 0.06025, 0.01, 0.15),
ParamSpec("pendulum_com_y", -0.07602, -0.20, 0.0),
ParamSpec("pendulum_com_z", -0.00346, -0.05, 0.05),
ParamSpec("pendulum_ixx", 6.20e-05, 1e-07, 1e-03, log_scale=True),
ParamSpec("pendulum_iyy", 3.70e-05, 1e-07, 1e-03, log_scale=True),
ParamSpec("pendulum_izz", 7.83e-05, 1e-07, 1e-03, log_scale=True),
ParamSpec("pendulum_ixy", -6.93e-06, -1e-03, 1e-03),
# ── Pendulum joint dynamics ──────────────────────────────────
ParamSpec("pendulum_damping", 0.0001, 1e-6, 0.05, log_scale=True),
ParamSpec("pendulum_frictionloss", 0.0001, 1e-6, 0.05, log_scale=True),
# ── Hardware realism (control pipeline) ────────────────────
ParamSpec("ctrl_limit", 0.588, 0.45, 0.70), # MAX_MOTOR_SPEED / 255
]
# Extended set: full params + motor armature (compensates for the
# motor-only sysid having captured arm/pendulum loading in armature).
EXTENDED_PARAMS: list[ParamSpec] = ROTARY_CARTPOLE_PARAMS + [
ParamSpec("motor_armature", 0.00277, 0.0005, 0.02, log_scale=True),
]
# Reduced set: only the 6 most impactful pendulum parameters.
# Good for a fast first pass — converges in ~50 generations.
REDUCED_PARAMS: list[ParamSpec] = [
ParamSpec("pendulum_mass", 0.03937, 0.010, 0.10, log_scale=True),
ParamSpec("pendulum_com_x", 0.06025, 0.01, 0.15),
ParamSpec("pendulum_com_y", -0.07602, -0.20, 0.0),
ParamSpec("pendulum_ixx", 6.20e-05, 1e-07, 1e-03, log_scale=True),
ParamSpec("pendulum_damping", 0.0001, 1e-6, 0.05, log_scale=True),
ParamSpec("pendulum_frictionloss", 0.0001, 1e-6, 0.05, log_scale=True),
]
PARAM_SETS: dict[str, list[ParamSpec]] = {
"full": ROTARY_CARTPOLE_PARAMS,
"extended": EXTENDED_PARAMS,
"reduced": REDUCED_PARAMS,
}
def params_to_dict(
values: np.ndarray, specs: list[ParamSpec] | None = None
) -> dict[str, float]:
"""Convert a flat parameter vector to a named dict."""
if specs is None:
specs = ROTARY_CARTPOLE_PARAMS
return {s.name: float(values[i]) for i, s in enumerate(specs)}
def defaults_vector(specs: list[ParamSpec] | None = None) -> np.ndarray:
"""Return the default parameter vector (in natural space)."""
if specs is None:
specs = ROTARY_CARTPOLE_PARAMS
return np.array([s.default for s in specs], dtype=np.float64)
def bounds_arrays(
specs: list[ParamSpec] | None = None,
) -> tuple[np.ndarray, np.ndarray]:
"""Return (lower, upper) bound arrays."""
if specs is None:
specs = ROTARY_CARTPOLE_PARAMS
lo = np.array([s.lower for s in specs], dtype=np.float64)
hi = np.array([s.upper for s in specs], dtype=np.float64)
return lo, hi
# ── MuJoCo model building with parameter overrides ──────────────────
def _build_model(
robot_path: Path,
params: dict[str, float],
motor_params: dict[str, float] | None = None,
) -> tuple[mujoco.MjModel, ActuatorConfig]:
"""Build a MuJoCo model with sysid overrides.
Returns (model, actuator) — use ``actuator.transform_ctrl()`` and
``actuator.compute_motor_force()`` in the rollout loop.
"""
if motor_params is None:
motor_params = LOCKED_MOTOR_PARAMS
robot_path = Path(robot_path).resolve()
# ── Patch URDF inertial parameters to a temp file ────────────
robot_yaml = yaml.safe_load((robot_path / "robot.yaml").read_text())
urdf_path = robot_path / robot_yaml["urdf"]
tree = ET.parse(urdf_path)
patch_link_inertials(tree.getroot(), params)
fd, tmp_urdf = tempfile.mkstemp(
suffix=".urdf", prefix="_sysid_", dir=str(robot_path),
)
os.close(fd)
tmp_urdf_path = Path(tmp_urdf)
tree.write(str(tmp_urdf_path), xml_declaration=True, encoding="unicode")
# ── Build RobotConfig with full motor sysid values ───────────
gear_pos = motor_params.get("actuator_gear_pos", 0.424182)
gear_neg = motor_params.get("actuator_gear_neg", 0.425031)
motor_armature = params.get(
"motor_armature",
motor_params.get("motor_armature", 0.00277342),
)
pend_damping = params.get("pendulum_damping", 0.0001)
pend_frictionloss = params.get("pendulum_frictionloss", 0.0001)
act_cfg = robot_yaml["actuators"][0]
ctrl_lo, ctrl_hi = act_cfg.get("ctrl_range", [-1.0, 1.0])
actuator = ActuatorConfig(
joint=act_cfg["joint"],
type="motor",
gear=(gear_pos, gear_neg),
ctrl_range=(ctrl_lo, ctrl_hi),
deadzone=(
motor_params.get("motor_deadzone_pos", 0.141),
motor_params.get("motor_deadzone_neg", 0.078),
),
damping=(
motor_params.get("motor_damping_pos", 0.002),
motor_params.get("motor_damping_neg", 0.015),
),
frictionloss=(
motor_params.get("motor_frictionloss_pos", 0.057),
motor_params.get("motor_frictionloss_neg", 0.053),
),
filter_tau=motor_params.get("actuator_filter_tau", 0.005),
viscous_quadratic=motor_params.get("viscous_quadratic", 0.000285),
back_emf_gain=motor_params.get("back_emf_gain", 0.00676),
)
robot = RobotConfig(
urdf_path=tmp_urdf_path,
actuators=[actuator],
joints={
"motor_joint": JointConfig(
damping=0.0,
armature=motor_armature,
frictionloss=0.0,
),
"pendulum_joint": JointConfig(
damping=pend_damping,
frictionloss=pend_frictionloss,
),
},
)
try:
model = load_mujoco_model(robot)
finally:
tmp_urdf_path.unlink(missing_ok=True)
return model, actuator
# ── Simulation rollout ───────────────────────────────────────────────
def rollout(
robot_path: str | Path,
params: dict[str, float],
actions: np.ndarray,
sim_dt: float = 0.002,
substeps: int = 10,
motor_params: dict[str, float] | None = None,
) -> dict[str, np.ndarray]:
"""Replay recorded actions in MuJoCo with overridden parameters.
Parameters
----------
robot_path : asset directory
params : named parameter overrides (pendulum/arm only)
actions : (N,) normalised actions [-1, 1] from the recording
sim_dt : MuJoCo physics timestep
substeps : physics substeps per control step
motor_params : locked motor params (default: LOCKED_MOTOR_PARAMS)
Returns
-------
dict with keys: motor_angle, motor_vel, pendulum_angle, pendulum_vel
Each is an (N,) numpy array of simulated values.
"""
if motor_params is None:
motor_params = LOCKED_MOTOR_PARAMS
robot_path = Path(robot_path).resolve()
model, actuator = _build_model(robot_path, params, motor_params)
model.opt.timestep = sim_dt
data = mujoco.MjData(model)
mujoco.mj_resetData(model, data)
n = len(actions)
ctrl_limit = params.get("ctrl_limit", 0.588)
sim_motor_angle = np.zeros(n, dtype=np.float64)
sim_motor_vel = np.zeros(n, dtype=np.float64)
sim_pend_angle = np.zeros(n, dtype=np.float64)
sim_pend_vel = np.zeros(n, dtype=np.float64)
limits = ActuatorLimits(model)
for i in range(n):
action = max(-ctrl_limit, min(ctrl_limit, float(actions[i])))
ctrl = actuator.transform_ctrl(action)
data.ctrl[0] = ctrl
for _ in range(substeps):
limits.enforce(model, data)
data.qfrc_applied[0] = actuator.compute_motor_force(data.qvel[0], ctrl)
mujoco.mj_step(model, data)
sim_motor_angle[i] = data.qpos[0]
sim_pend_angle[i] = data.qpos[1]
sim_motor_vel[i] = data.qvel[0]
sim_pend_vel[i] = data.qvel[1]
return {
"motor_angle": sim_motor_angle,
"motor_vel": sim_motor_vel,
"pendulum_angle": sim_pend_angle,
"pendulum_vel": sim_pend_vel,
}
def windowed_rollout(
robot_path: str | Path,
params: dict[str, float],
recording: dict[str, np.ndarray],
window_duration: float = 0.5,
sim_dt: float = 0.002,
substeps: int = 10,
motor_params: dict[str, float] | None = None,
) -> dict[str, np.ndarray | float]:
"""Multiple-shooting rollout — split recording into short windows.
For each window:
1. Initialize MuJoCo state from the real qpos/qvel at the window start.
2. Replay the recorded actions within the window.
3. Record the simulated output.
Motor dynamics (asymmetric friction, damping, back-EMF, etc.) are
applied via qfrc_applied using the locked motor sysid parameters.
Parameters
----------
robot_path : asset directory
params : named parameter overrides (pendulum/arm only)
recording : dict with keys time, action, motor_angle, motor_vel,
pendulum_angle, pendulum_vel (all 1D arrays of length N)
window_duration : length of each shooting window in seconds
sim_dt : MuJoCo physics timestep
substeps : physics substeps per control step
motor_params : locked motor params (default: LOCKED_MOTOR_PARAMS)
Returns
-------
dict with:
motor_angle, motor_vel, pendulum_angle, pendulum_vel — (N,) arrays
(stitched from per-window simulations)
n_windows — number of windows used
"""
if motor_params is None:
motor_params = LOCKED_MOTOR_PARAMS
robot_path = Path(robot_path).resolve()
model, actuator = _build_model(robot_path, params, motor_params)
model.opt.timestep = sim_dt
data = mujoco.MjData(model)
times = recording["time"]
actions = recording["action"]
real_motor = recording["motor_angle"]
real_motor_vel = recording["motor_vel"]
real_pend = recording["pendulum_angle"]
real_pend_vel = recording["pendulum_vel"]
n = len(actions)
sim_motor_angle = np.zeros(n, dtype=np.float64)
sim_motor_vel = np.zeros(n, dtype=np.float64)
sim_pend_angle = np.zeros(n, dtype=np.float64)
sim_pend_vel = np.zeros(n, dtype=np.float64)
limits = ActuatorLimits(model)
t0 = times[0]
t_end = times[-1]
window_starts: list[int] = []
current_t = t0
while current_t < t_end:
idx = int(np.searchsorted(times, current_t))
idx = min(idx, n - 1)
window_starts.append(idx)
current_t += window_duration
ctrl_limit = params.get("ctrl_limit", 0.588)
n_windows = len(window_starts)
for w, w_start in enumerate(window_starts):
w_end = window_starts[w + 1] if w + 1 < n_windows else n
mujoco.mj_resetData(model, data)
data.qpos[0] = real_motor[w_start]
data.qpos[1] = real_pend[w_start]
data.qvel[0] = real_motor_vel[w_start]
data.qvel[1] = real_pend_vel[w_start]
data.ctrl[:] = 0.0
mujoco.mj_forward(model, data)
for i in range(w_start, w_end):
action = max(-ctrl_limit, min(ctrl_limit, float(actions[i])))
ctrl = actuator.transform_ctrl(action)
data.ctrl[0] = ctrl
for _ in range(substeps):
limits.enforce(model, data)
data.qfrc_applied[0] = actuator.compute_motor_force(data.qvel[0], ctrl)
mujoco.mj_step(model, data)
sim_motor_angle[i] = data.qpos[0]
sim_pend_angle[i] = data.qpos[1]
sim_motor_vel[i] = data.qvel[0]
sim_pend_vel[i] = data.qvel[1]
return {
"motor_angle": sim_motor_angle,
"motor_vel": sim_motor_vel,
"pendulum_angle": sim_pend_angle,
"pendulum_vel": sim_pend_vel,
"n_windows": n_windows,
}

253
src/sysid/visualize.py Normal file
View File

@@ -0,0 +1,253 @@
"""Visualise system identification results — real vs simulated trajectories.
Loads a recording and runs simulation with both the original and tuned
parameters, then plots a 4-panel comparison (motor angle, motor vel,
pendulum angle, pendulum vel) over time.
Usage:
python -m src.sysid.visualize \
--robot-path assets/rotary_cartpole \
--recording assets/rotary_cartpole/recordings/capture_20260311_120000.npz
# Also compare with tuned parameters:
python -m src.sysid.visualize \
--robot-path assets/rotary_cartpole \
--recording <file>.npz \
--result assets/rotary_cartpole/sysid_result.json
"""
from __future__ import annotations
import argparse
import json
from pathlib import Path
import numpy as np
import structlog
log = structlog.get_logger()
def _run_sim(
robot_path: Path,
params: dict[str, float],
recording: dict[str, np.ndarray],
window_duration: float,
sim_dt: float,
substeps: int,
motor_params: dict[str, float],
) -> dict[str, np.ndarray]:
"""Run windowed or open-loop rollout depending on window_duration."""
from src.sysid.rollout import rollout, windowed_rollout
if window_duration > 0:
return windowed_rollout(
robot_path=robot_path, params=params, recording=recording,
window_duration=window_duration, sim_dt=sim_dt,
substeps=substeps, motor_params=motor_params,
)
return rollout(
robot_path=robot_path, params=params, actions=recording["action"],
substeps=substeps, motor_params=motor_params,
)
def visualize(
robot_path: str | Path,
recording_path: str | Path,
result_path: str | Path | None = None,
sim_dt: float = 0.002,
substeps: int = 10,
window_duration: float = 0.5,
save_path: str | Path | None = None,
show: bool = True,
) -> None:
"""Generate comparison plot."""
import matplotlib.pyplot as plt
from src.sysid.rollout import (
LOCKED_MOTOR_PARAMS,
ROTARY_CARTPOLE_PARAMS,
defaults_vector,
params_to_dict,
)
robot_path = Path(robot_path).resolve()
recording = dict(np.load(recording_path))
motor_params = LOCKED_MOTOR_PARAMS
sim_kwargs = dict(
robot_path=robot_path, recording=recording,
window_duration=window_duration, sim_dt=sim_dt,
substeps=substeps, motor_params=motor_params,
)
t = recording["time"]
actions = recording["action"]
# ── Simulate with default parameters ─────────────────────────
default_params = params_to_dict(
defaults_vector(ROTARY_CARTPOLE_PARAMS), ROTARY_CARTPOLE_PARAMS
)
log.info("simulating_default_params", windowed=window_duration > 0)
sim_default = _run_sim(params=default_params, **sim_kwargs)
# ── Simulate with tuned parameters (if available) ────────────
# Resolve result path (explicit or auto-detect).
if result_path is None:
auto = robot_path / "sysid_result.json"
if auto.exists():
result_path = auto
sim_tuned = None
tuned_cost = None
if result_path is not None:
result_path = Path(result_path)
if result_path.exists():
result = json.loads(result_path.read_text())
tuned_params = result.get("best_params", {})
tuned_cost = result.get("best_cost")
log.info("simulating_tuned_params", cost=tuned_cost)
sim_tuned = _run_sim(params=tuned_params, **sim_kwargs)
else:
log.warning("result_file_not_found", path=str(result_path))
# ── Plot ─────────────────────────────────────────────────────
fig, axes = plt.subplots(5, 1, figsize=(14, 12), sharex=True)
channels = [
("motor_angle", "Motor Angle (rad)"),
("motor_vel", "Motor Velocity (rad/s)"),
("pendulum_angle", "Pendulum Angle (rad)"),
("pendulum_vel", "Pendulum Velocity (rad/s)"),
]
for ax, (key, ylabel) in zip(axes[:4], channels):
real = recording[key]
ax.plot(t, real, "k-", linewidth=1.2, alpha=0.8, label="Real")
ax.plot(
t,
sim_default[key],
"--",
color="#d62728",
linewidth=1.0,
alpha=0.7,
label="Sim (original)",
)
if sim_tuned is not None:
ax.plot(
t,
sim_tuned[key],
"--",
color="#2ca02c",
linewidth=1.0,
alpha=0.7,
label="Sim (tuned)",
)
ax.set_ylabel(ylabel)
ax.legend(loc="upper right", fontsize=8)
ax.grid(True, alpha=0.3)
# Action plot (bottom panel).
axes[4].plot(t, actions, "b-", linewidth=0.8, alpha=0.6)
axes[4].set_ylabel("Action (norm)")
axes[4].set_xlabel("Time (s)")
axes[4].grid(True, alpha=0.3)
axes[4].set_ylim(-1.1, 1.1)
# Title with cost info.
title = "System Identification — Real vs Simulated Trajectories"
if tuned_cost is not None:
# Compute original cost for comparison.
from src.sysid.optimize import cost_function
orig_cost = cost_function(
defaults_vector(ROTARY_CARTPOLE_PARAMS),
recording,
robot_path,
ROTARY_CARTPOLE_PARAMS,
sim_dt=sim_dt,
substeps=substeps,
window_duration=window_duration,
motor_params=motor_params,
)
title += f"\nOriginal cost: {orig_cost:.4f} → Tuned cost: {tuned_cost:.4f}"
improvement = (1.0 - tuned_cost / orig_cost) * 100 if orig_cost > 0 else 0
title += f" ({improvement:+.1f}%)"
fig.suptitle(title, fontsize=12)
plt.tight_layout()
if save_path:
save_path = Path(save_path)
fig.savefig(str(save_path), dpi=150, bbox_inches="tight")
log.info("figure_saved", path=str(save_path))
if show:
plt.show()
else:
plt.close(fig)
# ── CLI entry point ──────────────────────────────────────────────────
def main() -> None:
parser = argparse.ArgumentParser(
description="Visualise system identification results."
)
parser.add_argument(
"--robot-path",
type=str,
default="assets/rotary_cartpole",
)
parser.add_argument(
"--recording",
type=str,
required=True,
help="Path to .npz recording file",
)
parser.add_argument(
"--result",
type=str,
default=None,
help="Path to sysid_result.json (auto-detected if omitted)",
)
parser.add_argument("--sim-dt", type=float, default=0.002)
parser.add_argument("--substeps", type=int, default=10)
parser.add_argument(
"--window-duration",
type=float,
default=0.5,
help="Shooting window length in seconds (0 = open-loop)",
)
parser.add_argument(
"--save",
type=str,
default=None,
help="Save figure to this path (PNG, PDF, …)",
)
parser.add_argument(
"--no-show",
action="store_true",
help="Don't show interactive window (useful for CI)",
)
args = parser.parse_args()
visualize(
robot_path=args.robot_path,
recording_path=args.recording,
result_path=args.result,
sim_dt=args.sim_dt,
substeps=args.substeps,
window_duration=args.window_duration,
save_path=args.save,
show=not args.no_show,
)
if __name__ == "__main__":
main()

View File

@@ -4,19 +4,24 @@ import tempfile
from pathlib import Path
import numpy as np
import structlog
import torch
import tqdm
from clearml import Logger
from skrl.agents.torch.ppo import PPO, PPO_DEFAULT_CONFIG
from skrl.memories.torch import RandomMemory
from skrl.resources.preprocessors.torch import RunningStandardScaler
from skrl.trainers.torch import SequentialTrainer
from src.core.runner import BaseRunner
from clearml import Task, Logger
import torch
from gymnasium import spaces
from skrl.memories.torch import RandomMemory
from src.models.mlp import SharedMLP
from skrl.agents.torch.ppo import PPO, PPO_DEFAULT_CONFIG
from skrl.trainers.torch import SequentialTrainer
log = structlog.get_logger()
@dataclasses.dataclass
class TrainerConfig:
# PPO
rollout_steps: int = 2048
learning_epochs: int = 8
mini_batches: int = 4
@@ -29,30 +34,33 @@ class TrainerConfig:
hidden_sizes: tuple[int, ...] = (64, 64)
# Policy
initial_log_std: float = 0.5 # initial exploration noise
min_log_std: float = -2.0 # minimum exploration noise
max_log_std: float = 2.0 # maximum exploration noise (2.0 ≈ σ=7.4)
# Training
total_timesteps: int = 1_000_000
log_interval: int = 10
checkpoint_interval: int = 50_000
# Video recording
record_video_every: int = 10000 # record a video every N timesteps (0 = disabled)
record_video_min_seconds: float = 10.0 # minimum video duration in seconds
record_video_fps: int = 0 # 0 = auto-derive from simulation rate
# Video recording (uploaded to ClearML)
record_video_every: int = 10_000 # 0 = disabled
record_video_fps: int = 0 # 0 = derive from sim dt×substeps
clearml_project: str | None = None
clearml_task: str | None = None
# ── Video-recording trainer ──────────────────────────────────────────
class VideoRecordingTrainer(SequentialTrainer):
"""Subclass of skrl's SequentialTrainer that records videos periodically."""
"""SequentialTrainer with periodic evaluation videos uploaded to ClearML."""
def __init__(self, env, agents, cfg=None, trainer_config: TrainerConfig | None = None):
super().__init__(env=env, agents=agents, cfg=cfg)
self._trainer_config = trainer_config
self._tcfg = trainer_config
self._video_dir = Path(tempfile.mkdtemp(prefix="rl_videos_"))
def single_agent_train(self) -> None:
"""Override to add periodic video recording."""
assert self.num_simultaneous_agents == 1
assert self.env.num_agents == 1
assert self.num_simultaneous_agents == 1 and self.env.num_agents == 1
states, infos = self.env.reset()
@@ -61,26 +69,17 @@ class VideoRecordingTrainer(SequentialTrainer):
disable=self.disable_progressbar,
file=sys.stdout,
):
# Pre-interaction
self.agents.pre_interaction(timestep=timestep, timesteps=self.timesteps)
with torch.no_grad():
actions = self.agents.act(states, timestep=timestep, timesteps=self.timesteps)[0]
next_states, rewards, terminated, truncated, infos = self.env.step(actions)
if not self.headless:
self.env.render()
self.agents.record_transition(
states=states,
actions=actions,
rewards=rewards,
next_states=next_states,
terminated=terminated,
truncated=truncated,
infos=infos,
timestep=timestep,
timesteps=self.timesteps,
states=states, actions=actions, rewards=rewards,
next_states=next_states, terminated=terminated,
truncated=truncated, infos=infos,
timestep=timestep, timesteps=self.timesteps,
)
if self.environment_info in infos:
@@ -90,7 +89,7 @@ class VideoRecordingTrainer(SequentialTrainer):
self.agents.post_interaction(timestep=timestep, timesteps=self.timesteps)
# Reset environments
# Auto-reset for multi-env; single-env resets manually
if self.env.num_envs > 1:
states = next_states
else:
@@ -100,111 +99,91 @@ class VideoRecordingTrainer(SequentialTrainer):
else:
states = next_states
# Record video at intervals
cfg = self._trainer_config
# Periodic video recording
if (
cfg
and cfg.record_video_every > 0
and (timestep + 1) % cfg.record_video_every == 0
self._tcfg
and self._tcfg.record_video_every > 0
and (timestep + 1) % self._tcfg.record_video_every == 0
):
self._record_video(timestep + 1)
def _get_video_fps(self) -> int:
"""Derive video fps from the simulation rate, or use configured value."""
cfg = self._trainer_config
if cfg.record_video_fps > 0:
return cfg.record_video_fps
# Auto-derive from runner's simulation parameters
runner = self.env
dt = getattr(runner.config, "dt", 0.02)
substeps = getattr(runner.config, "substeps", 1)
# ── helpers ───────────────────────────────────────────────────────
def _get_fps(self) -> int:
if self._tcfg and self._tcfg.record_video_fps > 0:
return self._tcfg.record_video_fps
dt = getattr(self.env.config, "dt", 0.02)
substeps = getattr(self.env.config, "substeps", 1)
# SerialRunner has dt but no substeps — dt *is* the control period.
return max(1, int(round(1.0 / (dt * substeps))))
def _record_video(self, timestep: int) -> None:
"""Record evaluation episodes and upload to ClearML."""
try:
import imageio.v3 as iio
except ImportError:
try:
import imageio as iio
except ImportError:
return
cfg = self._trainer_config
fps = self._get_video_fps()
min_frames = int(cfg.record_video_min_seconds * fps)
max_frames = min_frames * 3 # hard cap to prevent runaway recording
fps = self._get_fps()
max_steps = getattr(self.env.env.config, "max_steps", 500)
frames: list[np.ndarray] = []
while len(frames) < min_frames and len(frames) < max_frames:
obs, _ = self.env.reset()
done = False
steps = 0
max_episode_steps = getattr(self.env.env.config, "max_steps", 500)
while not done and steps < max_episode_steps:
with torch.no_grad():
for _ in range(max_steps):
action = self.agents.act(obs, timestep=timestep, timesteps=self.timesteps)[0]
obs, _, terminated, truncated, _ = self.env.step(action)
frame = self.env.render(mode="rgb_array")
frame = self.env.render()
if frame is not None:
frames.append(frame.cpu().numpy() if isinstance(frame, torch.Tensor) else frame)
done = (terminated | truncated).any().item()
steps += 1
if len(frames) >= max_frames:
frames.append(frame)
if (terminated | truncated).any().item():
break
if frames:
video_path = str(self._video_dir / f"step_{timestep}.mp4")
iio.imwrite(video_path, frames, fps=fps)
path = str(self._video_dir / f"step_{timestep}.mp4")
iio.imwrite(path, frames, fps=fps)
logger = Logger.current_logger()
if logger:
logger.report_media(
title="Training Video",
series=f"step_{timestep}",
local_path=video_path,
iteration=timestep,
"Training Video", f"step_{timestep}",
local_path=path, iteration=timestep,
)
# Reset back to training state after recording
self.env.reset()
# ── Main trainer ─────────────────────────────────────────────────────
class Trainer:
def __init__(self, runner: BaseRunner, config: TrainerConfig):
self.runner = runner
self.config = config
self._init_clearml()
self._init_agent()
def _init_clearml(self) -> None:
if self.config.clearml_project and self.config.clearml_task:
self.clearml_task = Task.init(
project_name=self.config.clearml_project,
task_name=self.config.clearml_task,
)
else:
self.clearml_task = None
def _init_agent(self) -> None:
device: torch.device = self.runner.device
obs_space: spaces.Space = self.runner.observation_space
act_space: spaces.Space = self.runner.action_space
num_envs: int = self.runner.num_envs
device = self.runner.device
obs_space = self.runner.observation_space
act_space = self.runner.action_space
self.memory: RandomMemory = RandomMemory(memory_size=self.config.rollout_steps, num_envs=num_envs, device=device)
self.memory = RandomMemory(
memory_size=self.config.rollout_steps,
num_envs=self.runner.num_envs,
device=device,
)
self.model: SharedMLP = SharedMLP(
self.model = SharedMLP(
observation_space=obs_space,
action_space=act_space,
device=device,
hidden_sizes=self.config.hidden_sizes,
initial_log_std=self.config.initial_log_std,
min_log_std=self.config.min_log_std,
max_log_std=self.config.max_log_std,
)
models = {
"policy": self.model,
"value": self.model,
}
models = {"policy": self.model, "value": self.model}
agent_cfg = PPO_DEFAULT_CONFIG.copy()
agent_cfg.update({
@@ -217,9 +196,19 @@ class Trainer:
"ratio_clip": self.config.clip_ratio,
"value_loss_scale": self.config.value_loss_scale,
"entropy_loss_scale": self.config.entropy_loss_scale,
"state_preprocessor": RunningStandardScaler,
"state_preprocessor_kwargs": {"size": obs_space, "device": device},
"value_preprocessor": RunningStandardScaler,
"value_preprocessor_kwargs": {"size": 1, "device": device},
})
# Wire up logging frequency: write_interval is in timesteps.
# log_interval=1 → log every PPO update (= every rollout_steps timesteps).
agent_cfg["experiment"]["write_interval"] = self.config.log_interval
agent_cfg["experiment"]["checkpoint_interval"] = max(
self.config.checkpoint_interval, self.config.rollout_steps
)
self.agent: PPO = PPO(
self.agent = PPO(
models=models,
memory=self.memory,
observation_space=obs_space,
@@ -239,5 +228,3 @@ class Trainer:
def close(self) -> None:
self.runner.close()
if self.clearml_task:
self.clearml_task.close()

View File

@@ -1,47 +0,0 @@
import hydra
from hydra.core.hydra_config import HydraConfig
from omegaconf import DictConfig, OmegaConf
from src.envs.cartpole import CartPoleEnv, CartPoleConfig
from src.runners.mujoco import MuJoCoRunner, MuJoCoRunnerConfig
from src.training.trainer import Trainer, TrainerConfig
from src.core.env import ActuatorConfig
def _build_env_config(cfg: DictConfig) -> CartPoleConfig:
env_dict = OmegaConf.to_container(cfg.env, resolve=True)
if "actuators" in env_dict:
for a in env_dict["actuators"]:
if "ctrl_range" in a:
a["ctrl_range"] = tuple(a["ctrl_range"])
env_dict["actuators"] = [ActuatorConfig(**a) for a in env_dict["actuators"]]
return CartPoleConfig(**env_dict)
@hydra.main(version_base=None, config_path="configs", config_name="config")
def main(cfg: DictConfig) -> None:
env_config = _build_env_config(cfg)
runner_config = MuJoCoRunnerConfig(**OmegaConf.to_container(cfg.runner, resolve=True))
training_dict = OmegaConf.to_container(cfg.training, resolve=True)
# Build ClearML task name dynamically from Hydra config group choices
if not training_dict.get("clearml_task"):
choices = HydraConfig.get().runtime.choices
env_name = choices.get("env", "env")
runner_name = choices.get("runner", "runner")
training_name = choices.get("training", "algo")
training_dict["clearml_task"] = f"{env_name}-{runner_name}-{training_name}"
trainer_config = TrainerConfig(**training_dict)
env = CartPoleEnv(env_config)
runner = MuJoCoRunner(env=env, config=runner_config)
trainer = Trainer(runner=runner, config=trainer_config)
try:
trainer.train()
finally:
trainer.close()
if __name__ == "__main__":
main()