✨ clean up lot of stuff
This commit is contained in:
24
.gitignore
vendored
24
.gitignore
vendored
@@ -1,3 +1,23 @@
|
||||
outputs/
|
||||
# IDE / editor
|
||||
.vscode/
|
||||
runs/
|
||||
|
||||
# Training & HPO outputs
|
||||
outputs/
|
||||
runs/
|
||||
smac3_output/
|
||||
|
||||
# MuJoCo
|
||||
MUJOCO_LOG.TXT
|
||||
|
||||
# Python
|
||||
__pycache__/
|
||||
*.pyc
|
||||
*.pyo
|
||||
*.egg-info/
|
||||
.eggs/
|
||||
dist/
|
||||
build/
|
||||
|
||||
# Temp files
|
||||
*.stl
|
||||
*.scad
|
||||
@@ -10,7 +10,7 @@ encoder:
|
||||
# counts_per_rev = ppr × gear_ratio × 4 (quadrature) = 1320
|
||||
|
||||
safety:
|
||||
max_motor_angle_deg: 90.0 # hard termination limit (0 = disabled)
|
||||
max_motor_angle_deg: 90.0 # hard termination limit (physical endstop ~70-80°)
|
||||
soft_limit_deg: 40.0 # progressive penalty ramp starts here
|
||||
|
||||
reset:
|
||||
|
||||
Binary file not shown.
@@ -1,20 +1,21 @@
|
||||
# Tuned robot config — generated by src.sysid.optimize
|
||||
# Original: robot.yaml
|
||||
# Run `python -m src.sysid.visualize` to compare real vs sim.
|
||||
|
||||
urdf: rotary_cartpole.urdf
|
||||
actuators:
|
||||
- joint: motor_joint
|
||||
type: motor
|
||||
gear: 0.176692
|
||||
ctrl_range:
|
||||
- -1.0
|
||||
- 1.0
|
||||
damping: 0.009505
|
||||
filter_tau: 0.040906
|
||||
gear: [0.424182, 0.425031] # torque constant [pos, neg] (motor sysid)
|
||||
ctrl_range: [-0.592, 0.592] # effective control bound (sysid-tuned)
|
||||
deadzone: [0.141291, 0.078015] # L298N min |ctrl| for torque [pos, neg]
|
||||
damping: [0.002027, 0.014665] # viscous damping [pos, neg]
|
||||
frictionloss: [0.057328, 0.053355] # Coulomb friction [pos, neg]
|
||||
filter_tau: 0.005035 # 1st-order actuator filter (motor sysid)
|
||||
viscous_quadratic: 0.000285 # velocity² drag
|
||||
back_emf_gain: 0.006758 # back-EMF torque reduction
|
||||
joints:
|
||||
motor_joint:
|
||||
armature: 0.001389
|
||||
frictionloss: 0.002179
|
||||
armature: 0.002773 # reflected rotor inertia (motor sysid)
|
||||
frictionloss: 0.0 # handled by motor model via qfrc_applied
|
||||
pendulum_joint:
|
||||
damping: 6.1e-05
|
||||
damping: 0.000119
|
||||
frictionloss: 1.0e-05
|
||||
|
||||
@@ -26,8 +26,8 @@
|
||||
</joint>
|
||||
<link name="arm">
|
||||
<inertial>
|
||||
<origin xyz="0.014950488360794875 0.006089886527968399 0.004470745447817278" rpy="0 0 0" />
|
||||
<mass value="0.012391951282440451" />
|
||||
<origin xyz="-0.0071030505291264975 0.0008511826488989179 0.007952020186701035" rpy="0 0 0" />
|
||||
<mass value="0.02110029934220782" />
|
||||
<inertia ixx="2.70e-06" iyy="7.80e-07" izz="2.44e-06" ixy="0.0" iyz="7.20e-08" ixz="0.0" />
|
||||
</inertial>
|
||||
<visual>
|
||||
@@ -53,9 +53,9 @@
|
||||
</joint>
|
||||
<link name="pendulum">
|
||||
<inertial>
|
||||
<origin xyz="0.06432778588634695 -0.05999895841669392 0.0008769789937631209" rpy="0 0 0" />
|
||||
<mass value="0.035508993892747365" />
|
||||
<inertia ixx="3.139576982078822e-05" iyy="9.431951659638859e-06" izz="4.07315891863556e-05" ixy="-1.8892943833253423e-06" iyz="0.0" ixz="0.0" />
|
||||
<origin xyz="0.060245187591695615 -0.07601707109312682 -0.0034636702158137786" rpy="0 0 0" />
|
||||
<mass value="0.03936742845036306" />
|
||||
<inertia ixx="6.202768755990066e-05" iyy="3.70078470430685e-05" izz="7.827356811788924e-05" ixy="-6.925117819616428e-06" iyz="0.0" ixz="0.0" />
|
||||
</inertial>
|
||||
<visual>
|
||||
<origin xyz="0.006895 -0.023224 -0.162953" rpy="0 0 0" />
|
||||
|
||||
@@ -1,70 +0,0 @@
|
||||
{
|
||||
"best_params": {
|
||||
"arm_mass": 0.012391951282440451,
|
||||
"arm_com_x": 0.014950488360794875,
|
||||
"arm_com_y": 0.006089886527968399,
|
||||
"arm_com_z": 0.004470745447817278,
|
||||
"pendulum_mass": 0.035508993892747365,
|
||||
"pendulum_com_x": 0.06432778588634695,
|
||||
"pendulum_com_y": -0.05999895841669392,
|
||||
"pendulum_com_z": 0.0008769789937631209,
|
||||
"pendulum_ixx": 3.139576982078822e-05,
|
||||
"pendulum_iyy": 9.431951659638859e-06,
|
||||
"pendulum_izz": 4.07315891863556e-05,
|
||||
"pendulum_ixy": -1.8892943833253423e-06,
|
||||
"actuator_gear": 0.17669161390939517,
|
||||
"actuator_filter_tau": 0.040905643692382504,
|
||||
"motor_damping": 0.009504542103348917,
|
||||
"pendulum_damping": 6.128535042404019e-05,
|
||||
"motor_armature": 0.0013894759540138252,
|
||||
"motor_frictionloss": 0.002179448047511452
|
||||
},
|
||||
"best_cost": 0.7471380533090072,
|
||||
"recording": "/Users/victormylle/Library/CloudStorage/SeaDrive-VictorMylle(cloud.optimize-it.be)/My Libraries/Projects/AI/RL-Framework/assets/rotary_cartpole/recordings/capture_20260311_215608.npz",
|
||||
"param_names": [
|
||||
"arm_mass",
|
||||
"arm_com_x",
|
||||
"arm_com_y",
|
||||
"arm_com_z",
|
||||
"pendulum_mass",
|
||||
"pendulum_com_x",
|
||||
"pendulum_com_y",
|
||||
"pendulum_com_z",
|
||||
"pendulum_ixx",
|
||||
"pendulum_iyy",
|
||||
"pendulum_izz",
|
||||
"pendulum_ixy",
|
||||
"actuator_gear",
|
||||
"actuator_filter_tau",
|
||||
"motor_damping",
|
||||
"pendulum_damping",
|
||||
"motor_armature",
|
||||
"motor_frictionloss"
|
||||
],
|
||||
"defaults": {
|
||||
"arm_mass": 0.01,
|
||||
"arm_com_x": 5e-05,
|
||||
"arm_com_y": 0.0065,
|
||||
"arm_com_z": 0.00563,
|
||||
"pendulum_mass": 0.015,
|
||||
"pendulum_com_x": 0.1583,
|
||||
"pendulum_com_y": -0.0983,
|
||||
"pendulum_com_z": 0.0,
|
||||
"pendulum_ixx": 6.16e-06,
|
||||
"pendulum_iyy": 6.16e-06,
|
||||
"pendulum_izz": 1.23e-05,
|
||||
"pendulum_ixy": 6.1e-06,
|
||||
"actuator_gear": 0.064,
|
||||
"actuator_filter_tau": 0.03,
|
||||
"motor_damping": 0.003,
|
||||
"pendulum_damping": 0.0001,
|
||||
"motor_armature": 0.0001,
|
||||
"motor_frictionloss": 0.03
|
||||
},
|
||||
"timestamp": "2026-03-11T22:08:04.782736",
|
||||
"history_summary": {
|
||||
"first_cost": 3.909456214944022,
|
||||
"final_cost": 0.7471380533090072,
|
||||
"generations": 200
|
||||
}
|
||||
}
|
||||
13
configs/env/rotary_cartpole.yaml
vendored
13
configs/env/rotary_cartpole.yaml
vendored
@@ -1,10 +1,19 @@
|
||||
max_steps: 1000
|
||||
robot_path: assets/rotary_cartpole
|
||||
reward_upright_scale: 1.0
|
||||
speed_penalty_scale: 0.1
|
||||
|
||||
# ── Regularisation penalties (prevent fast spinning) ─────────────────
|
||||
motor_vel_penalty: 0.01 # penalise high motor angular velocity
|
||||
motor_angle_penalty: 0.05 # penalise deviation from centre
|
||||
action_penalty: 0.05 # penalise large actions (energy cost)
|
||||
|
||||
# ── Software safety limit (env-level, always applied) ────────────────
|
||||
motor_angle_limit_deg: 90.0 # terminate episode if motor exceeds ±90°
|
||||
|
||||
# ── HPO search ranges ────────────────────────────────────────────────
|
||||
hpo:
|
||||
reward_upright_scale: {min: 0.5, max: 5.0}
|
||||
speed_penalty_scale: {min: 0.01, max: 1.0}
|
||||
motor_vel_penalty: {min: 0.001, max: 0.1}
|
||||
motor_angle_penalty: {min: 0.01, max: 0.2}
|
||||
action_penalty: {min: 0.01, max: 0.2}
|
||||
max_steps: {values: [500, 1000, 2000]}
|
||||
@@ -1,4 +1,4 @@
|
||||
num_envs: 1024 # MJX shines with many parallel envs
|
||||
device: auto # auto = cuda if available, else cpu
|
||||
dt: 0.002
|
||||
substeps: 20
|
||||
substeps: 10
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
num_envs: 64
|
||||
device: auto # auto = cuda if available, else cpu
|
||||
dt: 0.002
|
||||
substeps: 20
|
||||
substeps: 10
|
||||
|
||||
@@ -6,6 +6,5 @@ num_envs: 1
|
||||
device: cpu
|
||||
port: /dev/cu.usbserial-0001
|
||||
baud: 115200
|
||||
dt: 0.02 # control loop period (50 Hz)
|
||||
dt: 0.02 # control loop period (50 Hz, matches training)
|
||||
no_data_timeout: 2.0 # seconds of silence before declaring disconnect
|
||||
encoder_jump_threshold: 200 # encoder tick jump → reboot detection
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
# System identification defaults.
|
||||
# Override via CLI: python -m src.sysid.optimize sysid.max_generations=50
|
||||
# Override via CLI: python scripts/sysid.py optimize --max-generations 50
|
||||
#
|
||||
# These are NOT Hydra config groups — the sysid scripts use argparse.
|
||||
# This file serves as documentation and can be loaded by custom wrappers.
|
||||
@@ -8,18 +8,25 @@ capture:
|
||||
port: /dev/cu.usbserial-0001
|
||||
baud: 115200
|
||||
duration: 20.0 # seconds
|
||||
amplitude: 180 # max PWM magnitude (0–255)
|
||||
amplitude: 150 # max PWM magnitude — must match firmware MAX_MOTOR_SPEED
|
||||
hold_min_ms: 50 # PRBS min hold time
|
||||
hold_max_ms: 300 # PRBS max hold time
|
||||
dt: 0.02 # sample period (50 Hz)
|
||||
|
||||
optimize:
|
||||
sigma0: 0.3 # CMA-ES initial step size (in [0,1] normalised space)
|
||||
population_size: 20 # candidates per generation
|
||||
max_generations: 200 # total generations (~4000 evaluations)
|
||||
population_size: 50 # candidates per generation
|
||||
max_generations: 1000 # total generations (~4000 evaluations)
|
||||
sim_dt: 0.002 # MuJoCo physics timestep
|
||||
substeps: 10 # physics substeps per control step (ctrl_dt = 0.02s)
|
||||
pos_weight: 1.0 # MSE weight for angle errors
|
||||
vel_weight: 0.1 # MSE weight for velocity errors
|
||||
window_duration: 0.5 # multiple-shooting window length (s); 0 = open-loop
|
||||
seed: 42
|
||||
|
||||
# Tunable hardware-realism params (added to ROTARY_CARTPOLE_PARAMS):
|
||||
# ctrl_limit — effective motor range → exported as ctrl_range in robot.yaml
|
||||
# motor_deadzone — L298N minimum |action| for torque → exported as deadzone in robot.yaml
|
||||
# Firmware sends raw (unfiltered) sensor data; EMA filtering is
|
||||
# handled on the Python side (env transforms) and is NOT part of
|
||||
# the sysid parameter search.
|
||||
|
||||
@@ -8,15 +8,17 @@ defaults:
|
||||
- _self_
|
||||
|
||||
hidden_sizes: [256, 256]
|
||||
total_timesteps: 100000
|
||||
learning_epochs: 5
|
||||
learning_rate: 0.001 # conservative — can't undo real-world damage
|
||||
entropy_loss_scale: 0.0001
|
||||
log_interval: 1024
|
||||
total_timesteps: 2000000
|
||||
learning_epochs: 10
|
||||
learning_rate: 0.0005 # conservative — can't undo real-world damage
|
||||
entropy_loss_scale: 0.01
|
||||
rollout_steps: 2048
|
||||
mini_batches: 8
|
||||
log_interval: 2048
|
||||
checkpoint_interval: 5000 # frequent saves — can't rewind real hardware
|
||||
initial_log_std: -0.5 # moderate initial exploration
|
||||
min_log_std: -4.0
|
||||
max_log_std: 0.0 # cap σ at 1.0
|
||||
max_log_std: 2.0 # cap σ at 1.0
|
||||
|
||||
# Never run real-hardware training remotely
|
||||
remote: false
|
||||
|
||||
@@ -8,7 +8,7 @@ defaults:
|
||||
- _self_
|
||||
|
||||
hidden_sizes: [256, 256]
|
||||
total_timesteps: 1000000
|
||||
total_timesteps: 2000000
|
||||
learning_epochs: 10
|
||||
learning_rate: 0.0003
|
||||
entropy_loss_scale: 0.01
|
||||
|
||||
379
scripts/eval.py
Normal file
379
scripts/eval.py
Normal file
@@ -0,0 +1,379 @@
|
||||
"""Evaluate a trained policy on real hardware (or in simulation).
|
||||
|
||||
Loads a checkpoint and runs the policy in a closed loop. For real
|
||||
hardware the serial runner talks to the ESP32; for sim it uses the
|
||||
MuJoCo runner. A digital-twin MuJoCo viewer mirrors the robot state
|
||||
in both modes.
|
||||
|
||||
Usage (real hardware):
|
||||
mjpython scripts/eval.py env=rotary_cartpole runner=serial \
|
||||
checkpoint=runs/26-03-12_00-16-43-308420_PPO/checkpoints/agent_1000000.pt
|
||||
|
||||
Usage (simulation):
|
||||
mjpython scripts/eval.py env=rotary_cartpole runner=mujoco_single \
|
||||
checkpoint=runs/26-03-12_00-16-43-308420_PPO/checkpoints/agent_1000000.pt
|
||||
|
||||
Controls:
|
||||
Space — pause / resume policy (motor stops while paused)
|
||||
R — reset environment
|
||||
Esc — quit
|
||||
"""
|
||||
|
||||
import math
|
||||
import sys
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
# Ensure project root is on sys.path
|
||||
_PROJECT_ROOT = str(Path(__file__).resolve().parent.parent)
|
||||
if _PROJECT_ROOT not in sys.path:
|
||||
sys.path.insert(0, _PROJECT_ROOT)
|
||||
|
||||
import hydra
|
||||
import mujoco
|
||||
import mujoco.viewer
|
||||
import numpy as np
|
||||
import structlog
|
||||
import torch
|
||||
from gymnasium import spaces
|
||||
from hydra.core.hydra_config import HydraConfig
|
||||
from omegaconf import DictConfig, OmegaConf
|
||||
from skrl.resources.preprocessors.torch import RunningStandardScaler
|
||||
|
||||
from src.core.registry import build_env
|
||||
from src.models.mlp import SharedMLP
|
||||
|
||||
logger = structlog.get_logger()
|
||||
|
||||
|
||||
# ── keyboard state ───────────────────────────────────────────────────
|
||||
_reset_flag = [False]
|
||||
_paused = [False]
|
||||
_quit_flag = [False]
|
||||
|
||||
|
||||
def _key_callback(keycode: int) -> None:
|
||||
"""Called by MuJoCo viewer on key press."""
|
||||
if keycode == 32: # GLFW_KEY_SPACE
|
||||
_paused[0] = not _paused[0]
|
||||
elif keycode == 82: # GLFW_KEY_R
|
||||
_reset_flag[0] = True
|
||||
elif keycode == 256: # GLFW_KEY_ESCAPE
|
||||
_quit_flag[0] = True
|
||||
|
||||
|
||||
# ── checkpoint loading ───────────────────────────────────────────────
|
||||
|
||||
def _infer_hidden_sizes(state_dict: dict[str, torch.Tensor]) -> tuple[int, ...]:
|
||||
"""Infer hidden layer sizes from a SharedMLP state dict."""
|
||||
sizes = []
|
||||
i = 0
|
||||
while f"net.{i}.weight" in state_dict:
|
||||
sizes.append(state_dict[f"net.{i}.weight"].shape[0])
|
||||
i += 2 # skip activation layers (ELU)
|
||||
return tuple(sizes)
|
||||
|
||||
|
||||
def load_policy(
|
||||
checkpoint_path: str,
|
||||
observation_space: spaces.Space,
|
||||
action_space: spaces.Space,
|
||||
device: torch.device = torch.device("cpu"),
|
||||
) -> tuple[SharedMLP, RunningStandardScaler]:
|
||||
"""Load a trained SharedMLP + observation normalizer from a checkpoint.
|
||||
|
||||
Returns:
|
||||
(model, state_preprocessor) ready for inference.
|
||||
"""
|
||||
ckpt = torch.load(checkpoint_path, map_location=device, weights_only=False)
|
||||
|
||||
# Infer architecture from saved weights.
|
||||
hidden_sizes = _infer_hidden_sizes(ckpt["policy"])
|
||||
|
||||
# Reconstruct model.
|
||||
model = SharedMLP(
|
||||
observation_space=observation_space,
|
||||
action_space=action_space,
|
||||
device=device,
|
||||
hidden_sizes=hidden_sizes,
|
||||
)
|
||||
model.load_state_dict(ckpt["policy"])
|
||||
model.eval()
|
||||
|
||||
# Reconstruct observation normalizer.
|
||||
state_preprocessor = RunningStandardScaler(size=observation_space, device=device)
|
||||
state_preprocessor.running_mean = ckpt["state_preprocessor"]["running_mean"].to(device)
|
||||
state_preprocessor.running_variance = ckpt["state_preprocessor"]["running_variance"].to(device)
|
||||
state_preprocessor.current_count = ckpt["state_preprocessor"]["current_count"]
|
||||
# Freeze the normalizer — don't update stats during eval.
|
||||
state_preprocessor.training = False
|
||||
|
||||
logger.info(
|
||||
"checkpoint_loaded",
|
||||
path=checkpoint_path,
|
||||
hidden_sizes=hidden_sizes,
|
||||
obs_mean=[round(x, 3) for x in state_preprocessor.running_mean.tolist()],
|
||||
obs_std=[round(x, 3) for x in state_preprocessor.running_variance.sqrt().tolist()],
|
||||
)
|
||||
return model, state_preprocessor
|
||||
|
||||
|
||||
# ── action arrow overlay ─────────────────────────────────────────────
|
||||
|
||||
def _add_action_arrow(viewer, model, data, action_val: float) -> None:
|
||||
"""Draw an arrow showing applied torque direction."""
|
||||
if abs(action_val) < 0.01 or model.nu == 0:
|
||||
return
|
||||
|
||||
jnt_id = model.actuator_trnid[0, 0]
|
||||
body_id = model.jnt_bodyid[jnt_id]
|
||||
pos = data.xpos[body_id].copy()
|
||||
pos[2] += 0.02
|
||||
|
||||
axis = data.xmat[body_id].reshape(3, 3) @ model.jnt_axis[jnt_id]
|
||||
arrow_len = 0.08 * action_val
|
||||
direction = axis * np.sign(arrow_len)
|
||||
|
||||
z = direction / (np.linalg.norm(direction) + 1e-8)
|
||||
up = np.array([0, 0, 1]) if abs(z[2]) < 0.99 else np.array([0, 1, 0])
|
||||
x = np.cross(up, z)
|
||||
x /= np.linalg.norm(x) + 1e-8
|
||||
y = np.cross(z, x)
|
||||
mat = np.column_stack([x, y, z]).flatten()
|
||||
|
||||
rgba = np.array(
|
||||
[0.2, 0.8, 0.2, 0.8] if action_val > 0 else [0.8, 0.2, 0.2, 0.8],
|
||||
dtype=np.float32,
|
||||
)
|
||||
|
||||
geom = viewer.user_scn.geoms[viewer.user_scn.ngeom]
|
||||
mujoco.mjv_initGeom(
|
||||
geom,
|
||||
type=mujoco.mjtGeom.mjGEOM_ARROW,
|
||||
size=np.array([0.008, 0.008, abs(arrow_len)]),
|
||||
pos=pos,
|
||||
mat=mat,
|
||||
rgba=rgba,
|
||||
)
|
||||
viewer.user_scn.ngeom += 1
|
||||
|
||||
|
||||
# ── main loops ───────────────────────────────────────────────────────
|
||||
|
||||
@hydra.main(version_base=None, config_path="../configs", config_name="config")
|
||||
def main(cfg: DictConfig) -> None:
|
||||
choices = HydraConfig.get().runtime.choices
|
||||
env_name = choices.get("env", "cartpole")
|
||||
runner_name = choices.get("runner", "mujoco_single")
|
||||
|
||||
checkpoint_path = cfg.get("checkpoint", None)
|
||||
if checkpoint_path is None:
|
||||
logger.error("No checkpoint specified. Use: +checkpoint=path/to/agent.pt")
|
||||
sys.exit(1)
|
||||
|
||||
# Resolve relative paths against original working directory.
|
||||
checkpoint_path = str(Path(hydra.utils.get_original_cwd()) / checkpoint_path)
|
||||
if not Path(checkpoint_path).exists():
|
||||
logger.error("checkpoint_not_found", path=checkpoint_path)
|
||||
sys.exit(1)
|
||||
|
||||
if runner_name == "serial":
|
||||
_eval_serial(cfg, env_name, checkpoint_path)
|
||||
else:
|
||||
_eval_sim(cfg, env_name, checkpoint_path)
|
||||
|
||||
|
||||
def _eval_sim(cfg: DictConfig, env_name: str, checkpoint_path: str) -> None:
|
||||
"""Evaluate policy in MuJoCo simulation with viewer."""
|
||||
from src.runners.mujoco import MuJoCoRunner, MuJoCoRunnerConfig
|
||||
|
||||
env = build_env(env_name, cfg)
|
||||
runner_dict = OmegaConf.to_container(cfg.runner, resolve=True)
|
||||
runner_dict["num_envs"] = 1
|
||||
runner = MuJoCoRunner(env=env, config=MuJoCoRunnerConfig(**runner_dict))
|
||||
|
||||
device = runner.device
|
||||
model, preprocessor = load_policy(
|
||||
checkpoint_path, runner.observation_space, runner.action_space, device
|
||||
)
|
||||
|
||||
mj_model = runner._model
|
||||
mj_data = runner._data[0]
|
||||
dt_ctrl = runner.config.dt * runner.config.substeps
|
||||
|
||||
with mujoco.viewer.launch_passive(mj_model, mj_data, key_callback=_key_callback) as viewer:
|
||||
obs, _ = runner.reset()
|
||||
step = 0
|
||||
episode = 0
|
||||
episode_reward = 0.0
|
||||
|
||||
logger.info(
|
||||
"eval_started",
|
||||
env=env_name,
|
||||
mode="simulation",
|
||||
checkpoint=Path(checkpoint_path).name,
|
||||
controls="Space=pause, R=reset, Esc=quit",
|
||||
)
|
||||
|
||||
while viewer.is_running() and not _quit_flag[0]:
|
||||
if _reset_flag[0]:
|
||||
_reset_flag[0] = False
|
||||
obs, _ = runner.reset()
|
||||
step = 0
|
||||
episode += 1
|
||||
episode_reward = 0.0
|
||||
logger.info("reset", episode=episode)
|
||||
|
||||
if _paused[0]:
|
||||
viewer.sync()
|
||||
time.sleep(0.05)
|
||||
continue
|
||||
|
||||
# Policy inference
|
||||
with torch.no_grad():
|
||||
normalized_obs = preprocessor(obs.unsqueeze(0) if obs.dim() == 1 else obs)
|
||||
action = model.act({"states": normalized_obs}, role="policy")[0]
|
||||
action = action.clamp(-1.0, 1.0)
|
||||
|
||||
obs, reward, terminated, truncated, info = runner.step(action)
|
||||
episode_reward += reward.item()
|
||||
step += 1
|
||||
|
||||
# Sync viewer
|
||||
mujoco.mj_forward(mj_model, mj_data)
|
||||
viewer.user_scn.ngeom = 0
|
||||
_add_action_arrow(viewer, mj_model, mj_data, action[0, 0].item())
|
||||
viewer.sync()
|
||||
|
||||
if step % 50 == 0:
|
||||
joints = {mj_model.jnt(i).name: round(math.degrees(mj_data.qpos[i]), 1)
|
||||
for i in range(mj_model.njnt)}
|
||||
logger.debug(
|
||||
"step", n=step, reward=round(reward.item(), 3),
|
||||
action=round(action[0, 0].item(), 2),
|
||||
ep_reward=round(episode_reward, 1), **joints,
|
||||
)
|
||||
|
||||
if terminated.any() or truncated.any():
|
||||
logger.info(
|
||||
"episode_done", episode=episode, steps=step,
|
||||
total_reward=round(episode_reward, 2),
|
||||
reason="terminated" if terminated.any() else "truncated",
|
||||
)
|
||||
obs, _ = runner.reset()
|
||||
step = 0
|
||||
episode += 1
|
||||
episode_reward = 0.0
|
||||
|
||||
time.sleep(dt_ctrl)
|
||||
|
||||
runner.close()
|
||||
|
||||
|
||||
def _eval_serial(cfg: DictConfig, env_name: str, checkpoint_path: str) -> None:
|
||||
"""Evaluate policy on real hardware via serial, with digital-twin viewer."""
|
||||
from src.runners.serial import SerialRunner, SerialRunnerConfig
|
||||
|
||||
env = build_env(env_name, cfg)
|
||||
runner_dict = OmegaConf.to_container(cfg.runner, resolve=True)
|
||||
serial_runner = SerialRunner(env=env, config=SerialRunnerConfig(**runner_dict))
|
||||
|
||||
device = serial_runner.device
|
||||
model, preprocessor = load_policy(
|
||||
checkpoint_path, serial_runner.observation_space, serial_runner.action_space, device
|
||||
)
|
||||
|
||||
# Set up digital-twin MuJoCo model for visualization.
|
||||
serial_runner._ensure_viz_model()
|
||||
mj_model = serial_runner._viz_model
|
||||
mj_data = serial_runner._viz_data
|
||||
|
||||
with mujoco.viewer.launch_passive(mj_model, mj_data, key_callback=_key_callback) as viewer:
|
||||
obs, _ = serial_runner.reset()
|
||||
step = 0
|
||||
episode = 0
|
||||
episode_reward = 0.0
|
||||
|
||||
logger.info(
|
||||
"eval_started",
|
||||
env=env_name,
|
||||
mode="real hardware (serial)",
|
||||
port=serial_runner.config.port,
|
||||
checkpoint=Path(checkpoint_path).name,
|
||||
controls="Space=pause, R=reset, Esc=quit",
|
||||
)
|
||||
|
||||
while viewer.is_running() and not _quit_flag[0]:
|
||||
if _reset_flag[0]:
|
||||
_reset_flag[0] = False
|
||||
serial_runner._send("M0")
|
||||
serial_runner._drive_to_center()
|
||||
serial_runner._wait_for_pendulum_still()
|
||||
obs, _ = serial_runner.reset()
|
||||
step = 0
|
||||
episode += 1
|
||||
episode_reward = 0.0
|
||||
logger.info("reset", episode=episode)
|
||||
|
||||
if _paused[0]:
|
||||
serial_runner._send("M0") # safety: stop motor while paused
|
||||
serial_runner._sync_viz()
|
||||
viewer.sync()
|
||||
time.sleep(0.05)
|
||||
continue
|
||||
|
||||
# Policy inference
|
||||
with torch.no_grad():
|
||||
normalized_obs = preprocessor(obs.unsqueeze(0) if obs.dim() == 1 else obs)
|
||||
action = model.act({"states": normalized_obs}, role="policy")[0]
|
||||
action = action.clamp(-1.0, 1.0)
|
||||
|
||||
obs, reward, terminated, truncated, info = serial_runner.step(action)
|
||||
episode_reward += reward.item()
|
||||
step += 1
|
||||
|
||||
# Sync digital twin with real sensor data.
|
||||
serial_runner._sync_viz()
|
||||
viewer.user_scn.ngeom = 0
|
||||
_add_action_arrow(viewer, mj_model, mj_data, action[0, 0].item())
|
||||
viewer.sync()
|
||||
|
||||
if step % 25 == 0:
|
||||
state = serial_runner._read_state()
|
||||
logger.debug(
|
||||
"step", n=step, reward=round(reward.item(), 3),
|
||||
action=round(action[0, 0].item(), 2),
|
||||
ep_reward=round(episode_reward, 1),
|
||||
motor_enc=state["encoder_count"],
|
||||
pend_deg=round(state["pendulum_angle"], 1),
|
||||
)
|
||||
|
||||
# Check for safety / disconnection.
|
||||
if info.get("reboot_detected") or info.get("motor_limit_exceeded"):
|
||||
logger.error(
|
||||
"safety_stop",
|
||||
reboot=info.get("reboot_detected", False),
|
||||
motor_limit=info.get("motor_limit_exceeded", False),
|
||||
)
|
||||
serial_runner._send("M0")
|
||||
break
|
||||
|
||||
if terminated.any() or truncated.any():
|
||||
logger.info(
|
||||
"episode_done", episode=episode, steps=step,
|
||||
total_reward=round(episode_reward, 2),
|
||||
reason="terminated" if terminated.any() else "truncated",
|
||||
)
|
||||
# Auto-reset for next episode.
|
||||
obs, _ = serial_runner.reset()
|
||||
step = 0
|
||||
episode += 1
|
||||
episode_reward = 0.0
|
||||
|
||||
# Real-time pacing is handled by serial_runner.step() (dt sleep).
|
||||
|
||||
serial_runner.close()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
64
scripts/motor_sysid.py
Normal file
64
scripts/motor_sysid.py
Normal file
@@ -0,0 +1,64 @@
|
||||
"""Unified CLI for motor-only system identification.
|
||||
|
||||
Usage:
|
||||
python scripts/motor_sysid.py capture --duration 20
|
||||
python scripts/motor_sysid.py optimize --recording assets/motor/recordings/<file>.npz
|
||||
python scripts/motor_sysid.py visualize --recording assets/motor/recordings/<file>.npz
|
||||
python scripts/motor_sysid.py export --result assets/motor/motor_sysid_result.json
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
# Ensure project root is on sys.path
|
||||
_PROJECT_ROOT = str(Path(__file__).resolve().parent.parent)
|
||||
if _PROJECT_ROOT not in sys.path:
|
||||
sys.path.insert(0, _PROJECT_ROOT)
|
||||
|
||||
|
||||
def main() -> None:
|
||||
if len(sys.argv) < 2 or sys.argv[1] in ("-h", "--help"):
|
||||
print(
|
||||
"Motor System Identification\n"
|
||||
"===========================\n"
|
||||
"Usage: python scripts/motor_sysid.py <command> [options]\n"
|
||||
"\n"
|
||||
"Commands:\n"
|
||||
" capture Record motor trajectory under PRBS excitation\n"
|
||||
" optimize Run CMA-ES to fit motor parameters\n"
|
||||
" visualize Plot real vs simulated motor response\n"
|
||||
" export Write tuned MJCF + robot.yaml files\n"
|
||||
"\n"
|
||||
"Workflow:\n"
|
||||
" 1. Flash sysid firmware to ESP32 (motor-only, no limits)\n"
|
||||
" 2. python scripts/motor_sysid.py capture --duration 20\n"
|
||||
" 3. python scripts/motor_sysid.py optimize --recording <file>.npz\n"
|
||||
" 4. python scripts/motor_sysid.py visualize --recording <file>.npz\n"
|
||||
"\n"
|
||||
"Run '<command> --help' for command-specific options."
|
||||
)
|
||||
sys.exit(0)
|
||||
|
||||
command = sys.argv[1]
|
||||
sys.argv = [f"motor_sysid {command}"] + sys.argv[2:]
|
||||
|
||||
if command == "capture":
|
||||
from src.sysid.motor.capture import main as cmd_main
|
||||
elif command == "optimize":
|
||||
from src.sysid.motor.optimize import main as cmd_main
|
||||
elif command == "visualize":
|
||||
from src.sysid.motor.visualize import main as cmd_main
|
||||
elif command == "export":
|
||||
from src.sysid.motor.export import main as cmd_main
|
||||
else:
|
||||
print(f"Unknown command: {command}")
|
||||
print("Available commands: capture, optimize, visualize, export")
|
||||
sys.exit(1)
|
||||
|
||||
cmd_main()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -48,9 +48,10 @@ class BaseEnv(abc.ABC, Generic[T]):
|
||||
def compute_truncations(self, step_counts: torch.Tensor) -> torch.Tensor:
|
||||
return step_counts >= self.config.max_steps
|
||||
|
||||
def get_default_qpos(self, nq: int) -> list[float] | None:
|
||||
"""Return the default joint positions for reset.
|
||||
Override in subclass if the URDF zero pose doesn't match
|
||||
the desired initial state (e.g. pendulum hanging down).
|
||||
Returns None to use the URDF default (all zeros)."""
|
||||
return None
|
||||
def is_reset_ready(self, qpos: torch.Tensor, qvel: torch.Tensor) -> bool:
|
||||
"""Check whether the physical robot has settled enough to start an episode.
|
||||
|
||||
Used by the SerialRunner after driving to center and waiting for the
|
||||
pendulum. Default: always ready (sim doesn't need settling).
|
||||
"""
|
||||
return True
|
||||
|
||||
@@ -1,91 +0,0 @@
|
||||
"""Real-hardware configuration — loaded from hardware.yaml next to robot.yaml.
|
||||
|
||||
Provides robot-specific constants for the SerialRunner: encoder specs,
|
||||
safety limits, and reset behaviour. Simulation-only robots simply don't
|
||||
have a hardware.yaml (the loader returns None).
|
||||
|
||||
Usage:
|
||||
hw = load_hardware_config("assets/rotary_cartpole")
|
||||
if hw is not None:
|
||||
counts_per_rev = hw.encoder.ppr * hw.encoder.gear_ratio * 4.0
|
||||
"""
|
||||
|
||||
import dataclasses
|
||||
from pathlib import Path
|
||||
|
||||
import structlog
|
||||
import yaml
|
||||
|
||||
log = structlog.get_logger()
|
||||
|
||||
|
||||
@dataclasses.dataclass
|
||||
class EncoderConfig:
|
||||
"""Rotary encoder parameters."""
|
||||
|
||||
ppr: int = 11 # pulses per revolution (before quadrature)
|
||||
gear_ratio: float = 30.0 # gearbox ratio
|
||||
|
||||
@property
|
||||
def counts_per_rev(self) -> float:
|
||||
"""Total encoder counts per output-shaft revolution (quadrature)."""
|
||||
return self.ppr * self.gear_ratio * 4.0
|
||||
|
||||
|
||||
@dataclasses.dataclass
|
||||
class SafetyConfig:
|
||||
"""Safety limits enforced by the runner (not the env)."""
|
||||
|
||||
max_motor_angle_deg: float = 90.0 # hard termination (0 = disabled)
|
||||
soft_limit_deg: float = 40.0 # progressive penalty ramp start
|
||||
|
||||
|
||||
@dataclasses.dataclass
|
||||
class ResetConfig:
|
||||
"""Parameters for the physical reset procedure."""
|
||||
|
||||
drive_speed: int = 80 # PWM for bang-bang drive-to-center
|
||||
deadband: int = 15 # encoder count threshold for "centered"
|
||||
drive_timeout: float = 3.0 # seconds
|
||||
|
||||
settle_angle_deg: float = 2.0 # pendulum angle threshold (degrees)
|
||||
settle_vel_dps: float = 5.0 # pendulum velocity threshold (deg/s)
|
||||
settle_duration: float = 0.5 # seconds the pendulum must stay still
|
||||
settle_timeout: float = 30.0 # give up after this many seconds
|
||||
|
||||
|
||||
@dataclasses.dataclass
|
||||
class HardwareConfig:
|
||||
"""Complete real-hardware description for a robot."""
|
||||
|
||||
encoder: EncoderConfig = dataclasses.field(default_factory=EncoderConfig)
|
||||
safety: SafetyConfig = dataclasses.field(default_factory=SafetyConfig)
|
||||
reset: ResetConfig = dataclasses.field(default_factory=ResetConfig)
|
||||
|
||||
|
||||
def load_hardware_config(robot_dir: str | Path) -> HardwareConfig | None:
|
||||
"""Load hardware.yaml from a directory.
|
||||
|
||||
Returns None if the file doesn't exist (simulation-only robot).
|
||||
"""
|
||||
robot_dir = Path(robot_dir).resolve()
|
||||
yaml_path = robot_dir / "hardware.yaml"
|
||||
|
||||
if not yaml_path.exists():
|
||||
return None
|
||||
|
||||
raw = yaml.safe_load(yaml_path.read_text()) or {}
|
||||
|
||||
encoder = EncoderConfig(**raw.get("encoder", {}))
|
||||
safety = SafetyConfig(**raw.get("safety", {}))
|
||||
reset = ResetConfig(**raw.get("reset", {}))
|
||||
|
||||
config = HardwareConfig(encoder=encoder, safety=safety, reset=reset)
|
||||
|
||||
log.debug(
|
||||
"hardware_config_loaded",
|
||||
robot_dir=str(robot_dir),
|
||||
counts_per_rev=encoder.counts_per_rev,
|
||||
max_motor_angle_deg=safety.max_motor_angle_deg,
|
||||
)
|
||||
return config
|
||||
@@ -11,6 +11,7 @@ Usage:
|
||||
"""
|
||||
|
||||
import dataclasses
|
||||
import math
|
||||
from pathlib import Path
|
||||
|
||||
import structlog
|
||||
@@ -19,10 +20,20 @@ import yaml
|
||||
log = structlog.get_logger()
|
||||
|
||||
|
||||
def _as_pair(val) -> tuple[float, float]:
|
||||
"""Convert scalar or [pos, neg] list to (pos, neg) tuple."""
|
||||
if isinstance(val, (list, tuple)) and len(val) == 2:
|
||||
return (float(val[0]), float(val[1]))
|
||||
return (float(val), float(val))
|
||||
|
||||
|
||||
@dataclasses.dataclass
|
||||
class ActuatorConfig:
|
||||
"""Motor/actuator attached to a joint.
|
||||
|
||||
Asymmetric fields use (positive_dir, negative_dir) tuples.
|
||||
A scalar in YAML is expanded to a symmetric pair.
|
||||
|
||||
type:
|
||||
motor — direct torque control (ctrl = normalised torque)
|
||||
position — PD position servo (ctrl = target angle, needs kp)
|
||||
@@ -30,12 +41,91 @@ class ActuatorConfig:
|
||||
"""
|
||||
joint: str = ""
|
||||
type: str = "motor"
|
||||
gear: float = 1.0
|
||||
ctrl_range: tuple[float, float] = (-1.0, 1.0)
|
||||
damping: float = 0.05
|
||||
kp: float = 0.0 # proportional gain (position / velocity actuators)
|
||||
kv: float = 0.0 # derivative gain (position actuators)
|
||||
filter_tau: float = 0.0 # 1st-order filter time constant (s); 0 = no filter
|
||||
gear: tuple[float, float] = (1.0, 1.0) # torque constant (pos, neg)
|
||||
ctrl_range: tuple[float, float] = (-1.0, 1.0) # (lower, upper) control bounds
|
||||
deadzone: tuple[float, float] = (0.0, 0.0) # min |ctrl| for torque (pos, neg)
|
||||
damping: tuple[float, float] = (0.0, 0.0) # viscous damping (pos, neg)
|
||||
frictionloss: tuple[float, float] = (0.0, 0.0) # Coulomb friction (pos, neg)
|
||||
kp: float = 0.0 # proportional gain (position / velocity actuators)
|
||||
kv: float = 0.0 # derivative gain (position actuators)
|
||||
filter_tau: float = 0.0 # 1st-order filter time constant (s); 0 = no filter
|
||||
viscous_quadratic: float = 0.0 # velocity² drag coefficient
|
||||
back_emf_gain: float = 0.0 # back-EMF torque reduction
|
||||
|
||||
@property
|
||||
def gear_avg(self) -> float:
|
||||
return (self.gear[0] + self.gear[1]) / 2.0
|
||||
|
||||
@property
|
||||
def has_motor_model(self) -> bool:
|
||||
"""True if this actuator needs the runtime motor model."""
|
||||
return (
|
||||
self.gear[0] != self.gear[1]
|
||||
or self.deadzone != (0.0, 0.0)
|
||||
or self.damping != (0.0, 0.0)
|
||||
or self.frictionloss != (0.0, 0.0)
|
||||
or self.viscous_quadratic > 0
|
||||
or self.back_emf_gain > 0
|
||||
)
|
||||
|
||||
def transform_ctrl(self, ctrl: float) -> float:
|
||||
"""Apply asymmetric deadzone and gear compensation to a scalar ctrl."""
|
||||
# Deadzone
|
||||
dz_pos, dz_neg = self.deadzone
|
||||
if ctrl >= 0 and ctrl < dz_pos:
|
||||
return 0.0
|
||||
if ctrl < 0 and ctrl > -dz_neg:
|
||||
return 0.0
|
||||
|
||||
# Gear compensation: rescale so ctrl × gear_avg ≈ action × gear_dir
|
||||
gear_avg = self.gear_avg
|
||||
if gear_avg > 1e-8:
|
||||
gear_dir = self.gear[0] if ctrl >= 0 else self.gear[1]
|
||||
ctrl *= gear_dir / gear_avg
|
||||
|
||||
return ctrl
|
||||
|
||||
def compute_motor_force(self, vel: float, ctrl: float) -> float:
|
||||
"""Asymmetric friction, damping, drag, back-EMF → applied torque."""
|
||||
torque = 0.0
|
||||
|
||||
# Coulomb friction (direction-dependent)
|
||||
fl_pos, fl_neg = self.frictionloss
|
||||
if abs(vel) > 1e-6:
|
||||
fl = fl_pos if vel > 0 else fl_neg
|
||||
torque -= math.copysign(fl, vel)
|
||||
|
||||
# Viscous damping (direction-dependent)
|
||||
damp = self.damping[0] if vel > 0 else self.damping[1]
|
||||
torque -= damp * vel
|
||||
|
||||
# Quadratic velocity drag
|
||||
if self.viscous_quadratic > 0:
|
||||
torque -= self.viscous_quadratic * vel * abs(vel)
|
||||
|
||||
# Back-EMF torque reduction
|
||||
if self.back_emf_gain > 0 and abs(ctrl) > 1e-6:
|
||||
torque -= self.back_emf_gain * vel * math.copysign(1.0, ctrl)
|
||||
|
||||
return max(-10.0, min(10.0, torque))
|
||||
|
||||
def transform_action(self, action):
|
||||
"""Vectorised deadzone + gear compensation for a torch batch."""
|
||||
dz_pos, dz_neg = self.deadzone
|
||||
if dz_pos > 0 or dz_neg > 0:
|
||||
action = action.clone()
|
||||
pos_dead = (action >= 0) & (action < dz_pos)
|
||||
neg_dead = (action < 0) & (action > -dz_neg)
|
||||
action[pos_dead | neg_dead] = 0.0
|
||||
|
||||
gear_avg = self.gear_avg
|
||||
if gear_avg > 1e-8 and self.gear[0] != self.gear[1]:
|
||||
action = action.clone() if dz_pos == 0 and dz_neg == 0 else action
|
||||
pos = action >= 0
|
||||
action[pos] *= self.gear[0] / gear_avg
|
||||
action[~pos] *= self.gear[1] / gear_avg
|
||||
|
||||
return action
|
||||
|
||||
|
||||
@dataclasses.dataclass
|
||||
@@ -84,6 +174,9 @@ def load_robot_config(robot_dir: str | Path) -> RobotConfig:
|
||||
for a in raw.get("actuators", []):
|
||||
if "ctrl_range" in a:
|
||||
a["ctrl_range"] = tuple(a["ctrl_range"])
|
||||
for key in ("gear", "deadzone", "damping", "frictionloss"):
|
||||
if key in a:
|
||||
a[key] = _as_pair(a[key])
|
||||
actuators.append(ActuatorConfig(**a))
|
||||
|
||||
# Parse joint overrides
|
||||
|
||||
@@ -1,9 +1,12 @@
|
||||
import dataclasses
|
||||
import abc
|
||||
from typing import Any, Generic, TypeVar
|
||||
from src.core.env import BaseEnv
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
|
||||
from src.core.env import BaseEnv
|
||||
|
||||
|
||||
T = TypeVar("T")
|
||||
|
||||
@@ -21,6 +24,8 @@ class BaseRunner(abc.ABC, Generic[T]):
|
||||
if getattr(self.config, "device", None) == "auto":
|
||||
self.config.device = "cuda" if torch.cuda.is_available() else "cpu"
|
||||
|
||||
self._last_actions: torch.Tensor | None = None
|
||||
|
||||
self._sim_initialize(config)
|
||||
|
||||
self.observation_space = self.env.observation_space
|
||||
@@ -68,6 +73,7 @@ class BaseRunner(abc.ABC, Generic[T]):
|
||||
return obs, {}
|
||||
|
||||
def step(self, actions: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor, dict[str, Any]]:
|
||||
self._last_actions = actions
|
||||
qpos, qvel = self._sim_step(actions)
|
||||
self.step_counts += 1
|
||||
|
||||
@@ -95,9 +101,50 @@ class BaseRunner(abc.ABC, Generic[T]):
|
||||
# skrl expects (num_envs, 1) for rewards/terminated/truncated
|
||||
return obs, rewards.unsqueeze(-1), terminated.unsqueeze(-1), truncated.unsqueeze(-1), info
|
||||
|
||||
def render(self, env_idx: int = 0):
|
||||
"""Offscreen render → RGB numpy array. Override in subclass."""
|
||||
def _render_frame(self, env_idx: int = 0) -> np.ndarray:
|
||||
"""Return a raw RGB frame. Override in subclass."""
|
||||
raise NotImplementedError("Render not implemented for this runner.")
|
||||
|
||||
|
||||
def render(self, env_idx: int = 0) -> np.ndarray:
|
||||
"""Render frame with action overlay."""
|
||||
frame = self._render_frame(env_idx)
|
||||
if self._last_actions is not None:
|
||||
ctrl = float(self._last_actions[env_idx, 0].clamp(-1.0, 1.0))
|
||||
_draw_action_overlay(frame, ctrl)
|
||||
return frame
|
||||
|
||||
def close(self) -> None:
|
||||
self._sim_close()
|
||||
self._sim_close()
|
||||
|
||||
|
||||
def _draw_action_overlay(frame: np.ndarray, action: float) -> None:
|
||||
"""Draw an action bar on a rendered frame (no OpenCV needed).
|
||||
|
||||
Bar is centered horizontally: green to the right (+), red to the left (-).
|
||||
"""
|
||||
h, w = frame.shape[:2]
|
||||
|
||||
bar_y = h - 30
|
||||
bar_h = 16
|
||||
bar_x_center = w // 2
|
||||
bar_half_w = w // 4
|
||||
bar_x_left = bar_x_center - bar_half_w
|
||||
bar_x_right = bar_x_center + bar_half_w
|
||||
|
||||
# Background (dark grey)
|
||||
frame[bar_y:bar_y + bar_h, bar_x_left:bar_x_right] = [40, 40, 40]
|
||||
|
||||
# Filled bar
|
||||
fill_len = int(abs(action) * bar_half_w)
|
||||
if action > 0:
|
||||
color = [60, 200, 60] # green
|
||||
x0 = bar_x_center
|
||||
x1 = min(bar_x_center + fill_len, bar_x_right)
|
||||
else:
|
||||
color = [200, 60, 60] # red
|
||||
x1 = bar_x_center
|
||||
x0 = max(bar_x_center - fill_len, bar_x_left)
|
||||
frame[bar_y:bar_y + bar_h, x0:x1] = color
|
||||
|
||||
# Center tick mark (white)
|
||||
frame[bar_y:bar_y + bar_h, bar_x_center - 1:bar_x_center + 1] = [255, 255, 255]
|
||||
@@ -1,4 +1,5 @@
|
||||
import dataclasses
|
||||
import math
|
||||
import torch
|
||||
from src.core.env import BaseEnv, BaseEnvConfig
|
||||
from gymnasium import spaces
|
||||
@@ -21,7 +22,12 @@ class RotaryCartPoleConfig(BaseEnvConfig):
|
||||
"""
|
||||
# Reward shaping
|
||||
reward_upright_scale: float = 1.0 # cos(pendulum) when upright = +1
|
||||
speed_penalty_scale: float = 0.1 # penalty for high pendulum velocity near top
|
||||
motor_vel_penalty: float = 0.01 # penalise high motor angular velocity
|
||||
motor_angle_penalty: float = 0.05 # penalise deviation from centre
|
||||
action_penalty: float = 0.05 # penalise large actions (energy cost)
|
||||
|
||||
# ── Software safety limit (env-level, on top of URDF + hardware) ──
|
||||
motor_angle_limit_deg: float = 90.0 # terminate episode if exceeded
|
||||
|
||||
|
||||
class RotaryCartPoleEnv(BaseEnv[RotaryCartPoleConfig]):
|
||||
@@ -34,12 +40,14 @@ class RotaryCartPoleEnv(BaseEnv[RotaryCartPoleConfig]):
|
||||
Using sin/cos avoids discontinuities at ±π for continuous joints.
|
||||
|
||||
Actions (1):
|
||||
Torque applied to the motor_joint.
|
||||
Torque applied to the motor_joint (normalised to [-1, 1]).
|
||||
"""
|
||||
|
||||
def __init__(self, config: RotaryCartPoleConfig):
|
||||
super().__init__(config)
|
||||
|
||||
# ── Spaces ───────────────────────────────────────────────────
|
||||
|
||||
@property
|
||||
def observation_space(self) -> spaces.Space:
|
||||
return spaces.Box(low=-torch.inf, high=torch.inf, shape=(6,))
|
||||
@@ -48,6 +56,8 @@ class RotaryCartPoleEnv(BaseEnv[RotaryCartPoleConfig]):
|
||||
def action_space(self) -> spaces.Space:
|
||||
return spaces.Box(low=-1.0, high=1.0, shape=(1,))
|
||||
|
||||
# ── State building ───────────────────────────────────────────
|
||||
|
||||
def build_state(self, qpos: torch.Tensor, qvel: torch.Tensor) -> RotaryCartPoleState:
|
||||
return RotaryCartPoleState(
|
||||
motor_angle=qpos[:, 0],
|
||||
@@ -56,32 +66,64 @@ class RotaryCartPoleEnv(BaseEnv[RotaryCartPoleConfig]):
|
||||
pendulum_vel=qvel[:, 1],
|
||||
)
|
||||
|
||||
# ── Observations ─────────────────────────────────────────────
|
||||
|
||||
def compute_observations(self, state: RotaryCartPoleState) -> torch.Tensor:
|
||||
return torch.stack([
|
||||
obs = [
|
||||
torch.sin(state.motor_angle),
|
||||
torch.cos(state.motor_angle),
|
||||
torch.sin(state.pendulum_angle),
|
||||
torch.cos(state.pendulum_angle),
|
||||
state.motor_vel,
|
||||
state.pendulum_vel,
|
||||
], dim=-1)
|
||||
]
|
||||
return torch.stack(obs, dim=-1)
|
||||
|
||||
# ── Rewards ──────────────────────────────────────────────────
|
||||
|
||||
def compute_rewards(self, state: RotaryCartPoleState, actions: torch.Tensor) -> torch.Tensor:
|
||||
# Upright reward: -cos(θ) ∈ [-1, +1]
|
||||
upright = -torch.cos(state.pendulum_angle)
|
||||
reward = -torch.cos(state.pendulum_angle) * self.config.reward_upright_scale
|
||||
|
||||
# Penalise high pendulum velocity when near the top (upright).
|
||||
# "nearness" is weighted by how upright the pendulum is (0 at bottom, 1 at top).
|
||||
near_top = torch.clamp(-torch.cos(state.pendulum_angle), min=0.0) # 0‥1
|
||||
speed_penalty = self.config.speed_penalty_scale * near_top * state.pendulum_vel.abs()
|
||||
# Penalise fast motor spinning (discourages violent oscillation)
|
||||
reward = reward - self.config.motor_vel_penalty * state.motor_vel.pow(2)
|
||||
|
||||
return upright * self.config.reward_upright_scale - speed_penalty
|
||||
# Penalise motor deviation from centre (keep arm near zero)
|
||||
reward = reward - self.config.motor_angle_penalty * state.motor_angle.pow(2)
|
||||
|
||||
# Penalise large actions (energy efficiency / smoother control)
|
||||
reward = reward - self.config.action_penalty * actions.squeeze(-1).pow(2)
|
||||
|
||||
# Penalty for exceeding motor angle limit (episode also terminates)
|
||||
limit_rad = math.radians(self.config.motor_angle_limit_deg)
|
||||
exceeded = state.motor_angle.abs() >= limit_rad
|
||||
reward = torch.where(exceeded, torch.tensor(-1000.0, device=reward.device), reward)
|
||||
|
||||
return reward
|
||||
|
||||
# ── Terminations ─────────────────────────────────────────────
|
||||
|
||||
def compute_terminations(self, state: RotaryCartPoleState) -> torch.Tensor:
|
||||
# No early termination — episode runs for max_steps (truncation only).
|
||||
# The agent must learn to swing up AND balance continuously.
|
||||
return torch.zeros_like(state.motor_angle, dtype=torch.bool)
|
||||
# Software safety: terminate if motor angle exceeds limit.
|
||||
limit_rad = math.radians(self.config.motor_angle_limit_deg)
|
||||
exceeded = state.motor_angle.abs() >= limit_rad
|
||||
return exceeded
|
||||
|
||||
# ── Reset readiness (for SerialRunner) ───────────────────────
|
||||
|
||||
def is_reset_ready(self, qpos: torch.Tensor, qvel: torch.Tensor) -> bool:
|
||||
"""Pendulum must be hanging still and motor near center."""
|
||||
motor_angle = float(qpos[0, 0])
|
||||
pend_angle = float(qpos[0, 1])
|
||||
motor_vel = float(qvel[0, 0])
|
||||
pend_vel = float(qvel[0, 1])
|
||||
|
||||
# Pendulum near hanging-down (angle ~0) and slow
|
||||
angle_ok = abs(pend_angle) < math.radians(2.0)
|
||||
vel_ok = abs(pend_vel) < math.radians(5.0)
|
||||
# Motor near center and slow
|
||||
motor_ok = abs(motor_angle) < math.radians(5.0)
|
||||
motor_vel_ok = abs(motor_vel) < math.radians(10.0)
|
||||
return angle_ok and vel_ok and motor_ok and motor_vel_ok
|
||||
|
||||
|
||||
def get_default_qpos(self, nq: int) -> list[float] | None:
|
||||
# qpos=0 = pendulum hanging down (joint frame rotated in URDF).
|
||||
return None
|
||||
|
||||
@@ -29,7 +29,10 @@ import numpy as np
|
||||
|
||||
from src.core.env import BaseEnv
|
||||
from src.core.runner import BaseRunner, BaseRunnerConfig
|
||||
from src.runners.mujoco import MuJoCoRunner # reuse _load_model
|
||||
from src.runners.mujoco import (
|
||||
ActuatorLimits,
|
||||
load_mujoco_model,
|
||||
)
|
||||
|
||||
log = structlog.get_logger()
|
||||
|
||||
@@ -39,7 +42,7 @@ class MJXRunnerConfig(BaseRunnerConfig):
|
||||
num_envs: int = 1024
|
||||
device: str = "cuda"
|
||||
dt: float = 0.002
|
||||
substeps: int = 20
|
||||
substeps: int = 10
|
||||
|
||||
|
||||
class MJXRunner(BaseRunner[MJXRunnerConfig]):
|
||||
@@ -64,7 +67,7 @@ class MJXRunner(BaseRunner[MJXRunnerConfig]):
|
||||
|
||||
def _sim_initialize(self, config: MJXRunnerConfig) -> None:
|
||||
# Step 1: Load CPU model (reuses URDF → MJCF → actuator injection)
|
||||
self._mj_model = MuJoCoRunner._load_model(self.env.robot)
|
||||
self._mj_model = load_mujoco_model(self.env.robot)
|
||||
self._mj_model.opt.timestep = config.dt
|
||||
self._nq = self._mj_model.nq
|
||||
self._nv = self._mj_model.nv
|
||||
@@ -75,9 +78,6 @@ class MJXRunner(BaseRunner[MJXRunnerConfig]):
|
||||
|
||||
# Step 3: Default reset state on GPU
|
||||
default_data = mujoco.MjData(self._mj_model)
|
||||
default_qpos = self.env.get_default_qpos(self._nq)
|
||||
if default_qpos is not None:
|
||||
default_data.qpos[:] = default_qpos
|
||||
mujoco.mj_forward(self._mj_model, default_data)
|
||||
self._default_mjx_data = mjx.put_data(self._mj_model, default_data)
|
||||
|
||||
@@ -85,6 +85,18 @@ class MJXRunner(BaseRunner[MJXRunnerConfig]):
|
||||
self._rng = jax.random.PRNGKey(42)
|
||||
self._batch_data = self._make_batched_data(config.num_envs)
|
||||
|
||||
# Step 4b: Build motor model info (ctrl_idx, qvel_idx, ActuatorConfig)
|
||||
self._motor_info: list[tuple[int, int]] = []
|
||||
self._motor_acts: list = []
|
||||
for ctrl_idx, act in enumerate(self.env.robot.actuators):
|
||||
if act.has_motor_model:
|
||||
jnt_id = mujoco.mj_name2id(
|
||||
self._mj_model, mujoco.mjtObj.mjOBJ_JOINT, act.joint,
|
||||
)
|
||||
qvel_idx = self._mj_model.jnt_dofadr[jnt_id]
|
||||
self._motor_info.append((ctrl_idx, qvel_idx))
|
||||
self._motor_acts.append(act)
|
||||
|
||||
# Step 5: JIT-compile the hot-path functions
|
||||
self._compile_jit_fns(config.substeps)
|
||||
|
||||
@@ -121,28 +133,77 @@ class MJXRunner(BaseRunner[MJXRunnerConfig]):
|
||||
model = self._mjx_model
|
||||
default = self._default_mjx_data
|
||||
|
||||
# ── Per-actuator limit info (shared helper on MuJoCoRunner) ──
|
||||
lim = MuJoCoRunner._extract_actuator_limits(self._mj_model)
|
||||
lim = ActuatorLimits(self._mj_model)
|
||||
act_jnt_ids = jnp.array(lim.jnt_ids)
|
||||
act_limited = jnp.array(lim.limited)
|
||||
act_lo = jnp.array(lim.lo)
|
||||
act_hi = jnp.array(lim.hi)
|
||||
act_gs = jnp.array(lim.gear_sign)
|
||||
|
||||
# ── Motor model params (JAX arrays for JIT) ─────────────────
|
||||
_has_motor = len(self._motor_info) > 0
|
||||
if _has_motor:
|
||||
acts = self._motor_acts
|
||||
_ctrl_ids = jnp.array([c for c, _ in self._motor_info])
|
||||
_qvel_ids = jnp.array([q for _, q in self._motor_info])
|
||||
_dz_pos = jnp.array([a.deadzone[0] for a in acts])
|
||||
_dz_neg = jnp.array([a.deadzone[1] for a in acts])
|
||||
_gear_pos = jnp.array([a.gear[0] for a in acts])
|
||||
_gear_neg = jnp.array([a.gear[1] for a in acts])
|
||||
_gear_avg = jnp.array([a.gear_avg for a in acts])
|
||||
_fl_pos = jnp.array([a.frictionloss[0] for a in acts])
|
||||
_fl_neg = jnp.array([a.frictionloss[1] for a in acts])
|
||||
_damp_pos = jnp.array([a.damping[0] for a in acts])
|
||||
_damp_neg = jnp.array([a.damping[1] for a in acts])
|
||||
_visc_quad = jnp.array([a.viscous_quadratic for a in acts])
|
||||
_back_emf = jnp.array([a.back_emf_gain for a in acts])
|
||||
|
||||
# ── Batched step (N substeps per call) ──────────────────────
|
||||
@jax.jit
|
||||
def step_fn(data):
|
||||
# Software limit switch: clamp ctrl once before substeps.
|
||||
# Armature + default joint limits prevent significant overshoot
|
||||
# within the substep window, so per-substep clamping isn't needed.
|
||||
pos = data.qpos[:, act_jnt_ids]
|
||||
ctrl = data.ctrl
|
||||
at_hi = act_limited & (pos >= act_hi) & (act_gs * ctrl > 0)
|
||||
at_lo = act_limited & (pos <= act_lo) & (act_gs * ctrl < 0)
|
||||
clamped = jnp.where(at_hi | at_lo, 0.0, ctrl)
|
||||
data = data.replace(ctrl=clamped)
|
||||
ctrl = jnp.where(at_hi | at_lo, 0.0, ctrl)
|
||||
|
||||
if _has_motor:
|
||||
# Deadzone + asymmetric gear compensation
|
||||
mc = ctrl[:, _ctrl_ids]
|
||||
mc = jnp.where((mc >= 0) & (mc < _dz_pos), 0.0, mc)
|
||||
mc = jnp.where((mc < 0) & (mc > -_dz_neg), 0.0, mc)
|
||||
gear_dir = jnp.where(mc >= 0, _gear_pos, _gear_neg)
|
||||
mc = mc * gear_dir / _gear_avg
|
||||
ctrl = ctrl.at[:, _ctrl_ids].set(mc)
|
||||
|
||||
data = data.replace(ctrl=ctrl)
|
||||
|
||||
def body(_, d):
|
||||
if _has_motor:
|
||||
vel = d.qvel[:, _qvel_ids]
|
||||
mc = d.ctrl[:, _ctrl_ids]
|
||||
|
||||
# Coulomb friction (direction-dependent)
|
||||
fl = jnp.where(vel > 0, _fl_pos, _fl_neg)
|
||||
torque = -jnp.where(
|
||||
jnp.abs(vel) > 1e-6, jnp.sign(vel) * fl, 0.0,
|
||||
)
|
||||
# Viscous damping (direction-dependent)
|
||||
damp = jnp.where(vel > 0, _damp_pos, _damp_neg)
|
||||
torque = torque - damp * vel
|
||||
# Quadratic velocity drag
|
||||
torque = torque - _visc_quad * vel * jnp.abs(vel)
|
||||
# Back-EMF torque reduction
|
||||
bemf = _back_emf * vel * jnp.sign(mc)
|
||||
torque = torque - jnp.where(
|
||||
jnp.abs(mc) > 1e-6, bemf, 0.0,
|
||||
)
|
||||
torque = jnp.clip(torque, -10.0, 10.0)
|
||||
d = d.replace(
|
||||
qfrc_applied=d.qfrc_applied.at[:, _qvel_ids].set(torque),
|
||||
)
|
||||
|
||||
return jax.vmap(mjx.step, in_axes=(None, 0))(model, d)
|
||||
|
||||
return jax.lax.fori_loop(0, substeps, body, data)
|
||||
@@ -210,7 +271,7 @@ class MJXRunner(BaseRunner[MJXRunnerConfig]):
|
||||
|
||||
# ── Rendering ────────────────────────────────────────────────────
|
||||
|
||||
def render(self, env_idx: int = 0) -> np.ndarray:
|
||||
def _render_frame(self, env_idx: int = 0) -> np.ndarray:
|
||||
"""Offscreen render — copies one env's state from GPU to CPU."""
|
||||
self._render_data.qpos[:] = np.asarray(self._batch_data.qpos[env_idx])
|
||||
self._render_data.qvel[:] = np.asarray(self._batch_data.qvel[env_idx])
|
||||
@@ -222,10 +283,4 @@ class MJXRunner(BaseRunner[MJXRunnerConfig]):
|
||||
self._mj_model, width=640, height=480,
|
||||
)
|
||||
self._offscreen_renderer.update_scene(self._render_data)
|
||||
frame = self._offscreen_renderer.render().copy()
|
||||
|
||||
# Import shared overlay helper from mujoco runner
|
||||
from src.runners.mujoco import _draw_action_overlay
|
||||
ctrl_val = float(self._render_data.ctrl[0]) if self._mj_model.nu > 0 else 0.0
|
||||
_draw_action_overlay(frame, ctrl_val)
|
||||
return frame
|
||||
return self._offscreen_renderer.render().copy()
|
||||
|
||||
@@ -1,4 +1,6 @@
|
||||
import dataclasses
|
||||
import os
|
||||
import tempfile
|
||||
import xml.etree.ElementTree as ET
|
||||
from pathlib import Path
|
||||
|
||||
@@ -10,26 +12,195 @@ from src.core.env import BaseEnv
|
||||
from src.core.robot import RobotConfig
|
||||
from src.core.runner import BaseRunner, BaseRunnerConfig
|
||||
|
||||
|
||||
@dataclasses.dataclass
|
||||
class MuJoCoRunnerConfig(BaseRunnerConfig):
|
||||
num_envs: int = 16
|
||||
device: str = "cpu"
|
||||
dt: float = 0.02
|
||||
substeps: int = 2
|
||||
dt: float = 0.002
|
||||
substeps: int = 10
|
||||
|
||||
|
||||
@dataclasses.dataclass
|
||||
class ActuatorLimits:
|
||||
"""Per-actuator joint-limit info used for software ctrl clamping.
|
||||
"""Software limit-switch: cuts motor ctrl when a joint hits its range.
|
||||
|
||||
Real motor controllers have position limits that prevent the motor
|
||||
from driving into the mechanical hard stop.
|
||||
The real robot has physical limit switches that kill motor current
|
||||
at the travel endpoints. MuJoCo's built-in joint limits only apply
|
||||
a spring force — they don't zero the actuator signal. This class
|
||||
replicates the hardware behavior.
|
||||
"""
|
||||
jnt_ids: np.ndarray # (nu,) joint index for each actuator
|
||||
limited: np.ndarray # (nu,) bool — whether that joint is limited
|
||||
lo: np.ndarray # (nu,) lower position bound
|
||||
hi: np.ndarray # (nu,) upper position bound
|
||||
gear_sign: np.ndarray # (nu,) sign of gear ratio
|
||||
|
||||
def __init__(self, model: mujoco.MjModel) -> None:
|
||||
jnt_ids = model.actuator_trnid[:model.nu, 0]
|
||||
self.jnt_ids = jnt_ids
|
||||
self.limited = model.jnt_limited[jnt_ids].astype(bool)
|
||||
self.lo = model.jnt_range[jnt_ids, 0]
|
||||
self.hi = model.jnt_range[jnt_ids, 1]
|
||||
self.gear_sign = np.sign(model.actuator_gear[:model.nu, 0])
|
||||
|
||||
def enforce(self, model: mujoco.MjModel, data: mujoco.MjData) -> None:
|
||||
"""Zero ctrl that would push past joint limits (call every substep)."""
|
||||
if not self.limited.any():
|
||||
return
|
||||
pos = data.qpos[self.jnt_ids]
|
||||
signed_ctrl = self.gear_sign * data.ctrl[:model.nu]
|
||||
at_hi = self.limited & (pos >= self.hi) & (signed_ctrl > 0)
|
||||
at_lo = self.limited & (pos <= self.lo) & (signed_ctrl < 0)
|
||||
data.ctrl[at_hi | at_lo] = 0.0
|
||||
|
||||
|
||||
# ── Public utilities ─────────────────────────────────────────────────
|
||||
|
||||
|
||||
def load_mujoco_model(robot: RobotConfig) -> mujoco.MjModel:
|
||||
"""Load a URDF (or MJCF) and apply robot.yaml settings.
|
||||
|
||||
Single model-loading entry point for all MuJoCo-based code:
|
||||
training runners, MJX, and system identification.
|
||||
|
||||
Two-step approach required because MuJoCo's URDF parser ignores
|
||||
``<actuator>`` in the ``<mujoco>`` extension block:
|
||||
|
||||
1. Load the URDF -> MuJoCo converts it to internal MJCF
|
||||
2. Export the MJCF XML, inject actuators + joint overrides, reload
|
||||
|
||||
This keeps the URDF clean (re-exportable from CAD) -- all hardware
|
||||
tuning lives in ``robot.yaml``.
|
||||
"""
|
||||
abs_path = robot.urdf_path.resolve()
|
||||
model_dir = abs_path.parent
|
||||
is_urdf = abs_path.suffix.lower() == ".urdf"
|
||||
|
||||
# -- Step 1: Load URDF with meshdir injection --
|
||||
if is_urdf:
|
||||
tree = ET.parse(abs_path)
|
||||
root = tree.getroot()
|
||||
|
||||
# MuJoCo's URDF parser strips directory prefixes from mesh
|
||||
# filenames, so we inject a <mujoco><compiler meshdir="..."/>
|
||||
# block. The original URDF stays clean and simulator-agnostic.
|
||||
meshdir = None
|
||||
for mesh_el in root.iter("mesh"):
|
||||
fn = mesh_el.get("filename", "")
|
||||
parent = str(Path(fn).parent)
|
||||
if parent and parent != ".":
|
||||
meshdir = parent
|
||||
break
|
||||
if meshdir:
|
||||
mj_ext = ET.SubElement(root, "mujoco")
|
||||
ET.SubElement(mj_ext, "compiler", attrib={
|
||||
"meshdir": meshdir,
|
||||
"balanceinertia": "true",
|
||||
})
|
||||
|
||||
# Write to a temp file (unique name for multiprocessing safety).
|
||||
fd, tmp_path = tempfile.mkstemp(
|
||||
suffix=".urdf", prefix="_mj_", dir=str(model_dir),
|
||||
)
|
||||
os.close(fd)
|
||||
try:
|
||||
tree.write(tmp_path, xml_declaration=True, encoding="unicode")
|
||||
model_raw = mujoco.MjModel.from_xml_path(tmp_path)
|
||||
finally:
|
||||
Path(tmp_path).unlink(missing_ok=True)
|
||||
else:
|
||||
model_raw = mujoco.MjModel.from_xml_path(str(abs_path))
|
||||
|
||||
# If robot.yaml has no actuators/joints, we're done.
|
||||
if not robot.actuators and not robot.joints:
|
||||
return model_raw
|
||||
|
||||
# -- Step 2: Export MJCF, inject actuators + joint overrides --
|
||||
fd, tmp_path = tempfile.mkstemp(
|
||||
suffix=".xml", prefix="_mj_", dir=str(model_dir),
|
||||
)
|
||||
os.close(fd)
|
||||
try:
|
||||
mujoco.mj_saveLastXML(tmp_path, model_raw)
|
||||
mjcf_str = Path(tmp_path).read_text()
|
||||
root = ET.fromstring(mjcf_str)
|
||||
|
||||
# -- Inject actuators --
|
||||
if robot.actuators:
|
||||
act_elem = ET.SubElement(root, "actuator")
|
||||
for act in robot.actuators:
|
||||
attribs = {
|
||||
"name": f"{act.joint}_{act.type}",
|
||||
"joint": act.joint,
|
||||
"gear": str(act.gear_avg),
|
||||
"ctrlrange": f"{act.ctrl_range[0]} {act.ctrl_range[1]}",
|
||||
}
|
||||
|
||||
# dyntype is only available on <general>, not on
|
||||
# shortcut elements like <motor>/<position>/<velocity>.
|
||||
use_general = act.filter_tau > 0
|
||||
|
||||
if use_general:
|
||||
attribs["dyntype"] = "filter"
|
||||
attribs["dynprm"] = str(act.filter_tau)
|
||||
attribs["gaintype"] = "fixed"
|
||||
if act.type == "position":
|
||||
attribs["biastype"] = "affine"
|
||||
attribs["gainprm"] = str(act.kp)
|
||||
attribs["biasprm"] = f"0 -{act.kp} -{act.kv}"
|
||||
elif act.type == "velocity":
|
||||
attribs["biastype"] = "affine"
|
||||
attribs["gainprm"] = str(act.kp)
|
||||
attribs["biasprm"] = f"0 0 -{act.kp}"
|
||||
else: # motor
|
||||
attribs["biastype"] = "none"
|
||||
ET.SubElement(act_elem, "general", attrib=attribs)
|
||||
else:
|
||||
if act.type == "position":
|
||||
attribs["kp"] = str(act.kp)
|
||||
if act.kv > 0:
|
||||
attribs["kv"] = str(act.kv)
|
||||
elif act.type == "velocity":
|
||||
attribs["kp"] = str(act.kp)
|
||||
ET.SubElement(act_elem, act.type, attrib=attribs)
|
||||
|
||||
# -- Apply joint overrides from robot.yaml --
|
||||
# For actuated joints with a motor model, MuJoCo damping/frictionloss
|
||||
# are set to 0 — the motor model handles them via qfrc_applied.
|
||||
joint_damping: dict[str, float] = {}
|
||||
joint_frictionloss: dict[str, float] = {}
|
||||
for act in robot.actuators:
|
||||
if act.has_motor_model:
|
||||
joint_damping[act.joint] = 0.0
|
||||
joint_frictionloss[act.joint] = 0.0
|
||||
joint_armature: dict[str, float] = {}
|
||||
for name, jcfg in robot.joints.items():
|
||||
if jcfg.damping is not None:
|
||||
joint_damping[name] = jcfg.damping
|
||||
if jcfg.armature is not None:
|
||||
joint_armature[name] = jcfg.armature
|
||||
if jcfg.frictionloss is not None:
|
||||
joint_frictionloss[name] = jcfg.frictionloss
|
||||
|
||||
for body in root.iter("body"):
|
||||
for jnt in body.findall("joint"):
|
||||
name = jnt.get("name")
|
||||
if name in joint_damping:
|
||||
jnt.set("damping", str(joint_damping[name]))
|
||||
if name in joint_armature:
|
||||
jnt.set("armature", str(joint_armature[name]))
|
||||
if name in joint_frictionloss:
|
||||
jnt.set("frictionloss", str(joint_frictionloss[name]))
|
||||
|
||||
# Disable self-collision on all geoms.
|
||||
for geom in root.iter("geom"):
|
||||
geom.set("contype", "0")
|
||||
geom.set("conaffinity", "0")
|
||||
|
||||
modified_xml = ET.tostring(root, encoding="unicode")
|
||||
Path(tmp_path).write_text(modified_xml)
|
||||
return mujoco.MjModel.from_xml_path(tmp_path)
|
||||
finally:
|
||||
Path(tmp_path).unlink(missing_ok=True)
|
||||
|
||||
|
||||
# -- Runner -----------------------------------------------------------
|
||||
|
||||
|
||||
class MuJoCoRunner(BaseRunner[MuJoCoRunnerConfig]):
|
||||
def __init__(self, env: BaseEnv, config: MuJoCoRunnerConfig):
|
||||
@@ -38,201 +209,52 @@ class MuJoCoRunner(BaseRunner[MuJoCoRunnerConfig]):
|
||||
@property
|
||||
def num_envs(self) -> int:
|
||||
return self.config.num_envs
|
||||
|
||||
|
||||
@property
|
||||
def device(self) -> torch.device:
|
||||
return torch.device(self.config.device)
|
||||
|
||||
@staticmethod
|
||||
def _load_model(robot: RobotConfig) -> mujoco.MjModel:
|
||||
"""Load a URDF (or MJCF) and apply robot.yaml settings.
|
||||
|
||||
Two-step approach required because MuJoCo's URDF parser ignores
|
||||
<actuator> in the <mujoco> extension block:
|
||||
1. Load the URDF → MuJoCo converts it to internal MJCF
|
||||
2. Export the MJCF XML, inject actuators + joint overrides, reload
|
||||
|
||||
This keeps the URDF clean (re-exportable from CAD) — all hardware
|
||||
tuning lives in robot.yaml.
|
||||
"""
|
||||
abs_path = robot.urdf_path.resolve()
|
||||
model_dir = abs_path.parent
|
||||
is_urdf = abs_path.suffix.lower() == ".urdf"
|
||||
|
||||
# MuJoCo's URDF parser strips directory prefixes from mesh filenames,
|
||||
# so we inject a <mujoco><compiler meshdir="..."/> block into a
|
||||
# temporary copy. The original URDF stays clean and simulator-agnostic.
|
||||
if is_urdf:
|
||||
tree = ET.parse(abs_path)
|
||||
root = tree.getroot()
|
||||
# Detect the mesh subdirectory from the first mesh filename
|
||||
meshdir = None
|
||||
for mesh_el in root.iter("mesh"):
|
||||
fn = mesh_el.get("filename", "")
|
||||
parent = str(Path(fn).parent)
|
||||
if parent and parent != ".":
|
||||
meshdir = parent
|
||||
break
|
||||
if meshdir:
|
||||
mj_ext = ET.SubElement(root, "mujoco")
|
||||
ET.SubElement(mj_ext, "compiler", attrib={
|
||||
"meshdir": meshdir,
|
||||
"balanceinertia": "true",
|
||||
})
|
||||
tmp_urdf = model_dir / "_tmp_mujoco_load.urdf"
|
||||
tree.write(str(tmp_urdf), xml_declaration=True, encoding="unicode")
|
||||
try:
|
||||
model_raw = mujoco.MjModel.from_xml_path(str(tmp_urdf))
|
||||
finally:
|
||||
tmp_urdf.unlink()
|
||||
else:
|
||||
model_raw = mujoco.MjModel.from_xml_path(str(abs_path))
|
||||
|
||||
if not robot.actuators and not robot.joints:
|
||||
return model_raw
|
||||
|
||||
# Step 2: Export internal MJCF, inject actuators + joint overrides, reload
|
||||
tmp_mjcf = model_dir / "_tmp_actuator_inject.xml"
|
||||
try:
|
||||
mujoco.mj_saveLastXML(str(tmp_mjcf), model_raw)
|
||||
mjcf_str = tmp_mjcf.read_text()
|
||||
|
||||
root = ET.fromstring(mjcf_str)
|
||||
|
||||
# ── Inject actuators ────────────────────────────────────
|
||||
if robot.actuators:
|
||||
act_elem = ET.SubElement(root, "actuator")
|
||||
for act in robot.actuators:
|
||||
attribs = {
|
||||
"name": f"{act.joint}_{act.type}",
|
||||
"joint": act.joint,
|
||||
"gear": str(act.gear),
|
||||
"ctrlrange": f"{act.ctrl_range[0]} {act.ctrl_range[1]}",
|
||||
}
|
||||
|
||||
# Actuator dynamics: 1st-order low-pass filter on ctrl.
|
||||
# MuJoCo applies this every physics substep, which is
|
||||
# more physically accurate than an external EMA.
|
||||
# dyntype is only available on <general>, not on
|
||||
# shortcut elements like <motor>/<position>/<velocity>.
|
||||
use_general = act.filter_tau > 0
|
||||
|
||||
if use_general:
|
||||
attribs["dyntype"] = "filter"
|
||||
attribs["dynprm"] = str(act.filter_tau)
|
||||
|
||||
if use_general:
|
||||
attribs["gaintype"] = "fixed"
|
||||
if act.type == "position":
|
||||
attribs["biastype"] = "affine"
|
||||
attribs["gainprm"] = str(act.kp)
|
||||
attribs["biasprm"] = f"0 -{act.kp} -{act.kv}"
|
||||
elif act.type == "velocity":
|
||||
attribs["biastype"] = "affine"
|
||||
attribs["gainprm"] = str(act.kp)
|
||||
attribs["biasprm"] = f"0 0 -{act.kp}"
|
||||
else: # motor
|
||||
attribs["biastype"] = "none"
|
||||
ET.SubElement(act_elem, "general", attrib=attribs)
|
||||
else:
|
||||
if act.type == "position":
|
||||
attribs["kp"] = str(act.kp)
|
||||
if act.kv > 0:
|
||||
attribs["kv"] = str(act.kv)
|
||||
elif act.type == "velocity":
|
||||
attribs["kp"] = str(act.kp)
|
||||
ET.SubElement(act_elem, act.type, attrib=attribs)
|
||||
|
||||
# ── Apply joint overrides from robot.yaml ───────────────
|
||||
# Merge actuator damping + explicit joint overrides
|
||||
joint_damping = {a.joint: a.damping for a in robot.actuators}
|
||||
joint_armature: dict[str, float] = {}
|
||||
joint_frictionloss: dict[str, float] = {}
|
||||
for name, jcfg in robot.joints.items():
|
||||
if jcfg.damping is not None:
|
||||
joint_damping[name] = jcfg.damping
|
||||
if jcfg.armature is not None:
|
||||
joint_armature[name] = jcfg.armature
|
||||
if jcfg.frictionloss is not None:
|
||||
joint_frictionloss[name] = jcfg.frictionloss
|
||||
|
||||
for body in root.iter("body"):
|
||||
for jnt in body.findall("joint"):
|
||||
name = jnt.get("name")
|
||||
if name in joint_damping:
|
||||
jnt.set("damping", str(joint_damping[name]))
|
||||
if name in joint_armature:
|
||||
jnt.set("armature", str(joint_armature[name]))
|
||||
if name in joint_frictionloss:
|
||||
jnt.set("frictionloss", str(joint_frictionloss[name]))
|
||||
|
||||
# Disable self-collision on all geoms.
|
||||
# URDF mesh convex hulls often overlap at joints (especially
|
||||
# grandparent↔grandchild bodies that MuJoCo does NOT auto-exclude),
|
||||
# causing phantom contact forces.
|
||||
for geom in root.iter("geom"):
|
||||
geom.set("contype", "0")
|
||||
geom.set("conaffinity", "0")
|
||||
|
||||
# Joint limits use MuJoCo's default solver settings.
|
||||
# The software limit switch (zeroing ctrl at limits) + armature
|
||||
# prevent overshoot without needing ultra-stiff solref that
|
||||
# kills MJX GPU solver performance.
|
||||
|
||||
# Step 4: Write modified MJCF and reload from file path
|
||||
# (from_xml_path resolves mesh paths relative to the file location)
|
||||
modified_xml = ET.tostring(root, encoding="unicode")
|
||||
tmp_mjcf.write_text(modified_xml)
|
||||
return mujoco.MjModel.from_xml_path(str(tmp_mjcf))
|
||||
finally:
|
||||
tmp_mjcf.unlink(missing_ok=True)
|
||||
|
||||
@staticmethod
|
||||
def _extract_actuator_limits(model: mujoco.MjModel) -> ActuatorLimits:
|
||||
"""Extract per-actuator joint-limit arrays from a loaded MjModel.
|
||||
|
||||
Shared by MuJoCoRunner and MJXRunner so the logic isn't duplicated.
|
||||
"""
|
||||
nu = model.nu
|
||||
jnt_ids = np.array([model.actuator_trnid[i, 0] for i in range(nu)])
|
||||
return ActuatorLimits(
|
||||
jnt_ids=jnt_ids,
|
||||
limited=np.array([model.jnt_limited[j] for j in jnt_ids], dtype=bool),
|
||||
lo=np.array([model.jnt_range[j, 0] for j in jnt_ids]),
|
||||
hi=np.array([model.jnt_range[j, 1] for j in jnt_ids]),
|
||||
gear_sign=np.sign(np.array([model.actuator_gear[i, 0] for i in range(nu)])),
|
||||
)
|
||||
|
||||
def _sim_initialize(self, config: MuJoCoRunnerConfig) -> None:
|
||||
self._model = self._load_model(self.env.robot)
|
||||
self._model = load_mujoco_model(self.env.robot)
|
||||
self._model.opt.timestep = config.dt
|
||||
self._data: list[mujoco.MjData] = [mujoco.MjData(self._model) for _ in range(config.num_envs)]
|
||||
|
||||
self._data: list[mujoco.MjData] = [
|
||||
mujoco.MjData(self._model) for _ in range(config.num_envs)
|
||||
]
|
||||
self._nq = self._model.nq
|
||||
self._nv = self._model.nv
|
||||
self._limits = ActuatorLimits(self._model)
|
||||
|
||||
self._limits = self._extract_actuator_limits(self._model)
|
||||
# Build motor model: list of (actuator_config, joint_qvel_index) for
|
||||
# actuators that have asymmetric motor dynamics.
|
||||
self._motor_actuators: list[tuple] = []
|
||||
for act in self.env.robot.actuators:
|
||||
if act.has_motor_model:
|
||||
jnt_id = mujoco.mj_name2id(self._model, mujoco.mjtObj.mjOBJ_JOINT, act.joint)
|
||||
qvel_idx = self._model.jnt_dofadr[jnt_id]
|
||||
self._motor_actuators.append((act, qvel_idx))
|
||||
|
||||
def _sim_step(self, actions: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor]:
|
||||
actions_np: np.ndarray = actions.cpu().numpy()
|
||||
|
||||
# Apply per-actuator ctrl transform (deadzone + gear compensation)
|
||||
for act_idx, (act, _) in enumerate(self._motor_actuators):
|
||||
for env_idx in range(self.num_envs):
|
||||
actions_np[env_idx, act_idx] = act.transform_ctrl(
|
||||
float(actions_np[env_idx, act_idx])
|
||||
)
|
||||
|
||||
qpos_batch = np.zeros((self.num_envs, self._nq), dtype=np.float32)
|
||||
qvel_batch = np.zeros((self.num_envs, self._nv), dtype=np.float32)
|
||||
|
||||
for i, data in enumerate(self._data):
|
||||
data.ctrl[:] = actions_np[i]
|
||||
for _ in range(self.config.substeps):
|
||||
# Software limit switch: zero out ctrl that would push
|
||||
# a joint past its position limit (like a real controller).
|
||||
lim = self._limits
|
||||
for a in range(self._model.nu):
|
||||
if lim.limited[a]:
|
||||
pos = data.qpos[lim.jnt_ids[a]]
|
||||
gs = lim.gear_sign[a]
|
||||
if pos >= lim.hi[a] and gs * data.ctrl[a] > 0:
|
||||
data.ctrl[a] = 0.0
|
||||
elif pos <= lim.lo[a] and gs * data.ctrl[a] < 0:
|
||||
data.ctrl[a] = 0.0
|
||||
# Apply asymmetric motor forces via qfrc_applied
|
||||
for act, qvel_idx in self._motor_actuators:
|
||||
vel = data.qvel[qvel_idx]
|
||||
ctrl = data.ctrl[0] # TODO: generalise for multi-actuator
|
||||
data.qfrc_applied[qvel_idx] = act.compute_motor_force(vel, ctrl)
|
||||
self._limits.enforce(self._model, data)
|
||||
mujoco.mj_step(self._model, data)
|
||||
|
||||
qpos_batch[i] = data.qpos
|
||||
@@ -242,7 +264,7 @@ class MuJoCoRunner(BaseRunner[MuJoCoRunnerConfig]):
|
||||
torch.from_numpy(qpos_batch).to(self.device),
|
||||
torch.from_numpy(qvel_batch).to(self.device),
|
||||
)
|
||||
|
||||
|
||||
def _sim_reset(self, env_ids: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor]:
|
||||
ids = env_ids.cpu().numpy()
|
||||
n = len(ids)
|
||||
@@ -250,21 +272,13 @@ class MuJoCoRunner(BaseRunner[MuJoCoRunnerConfig]):
|
||||
qpos_batch = np.zeros((n, self._nq), dtype=np.float32)
|
||||
qvel_batch = np.zeros((n, self._nv), dtype=np.float32)
|
||||
|
||||
default_qpos = self.env.get_default_qpos(self._nq)
|
||||
|
||||
for i, env_id in enumerate(ids):
|
||||
data = self._data[env_id]
|
||||
mujoco.mj_resetData(self._model, data)
|
||||
|
||||
# Set initial pose (env-specific, e.g. pendulum hanging down)
|
||||
if default_qpos is not None:
|
||||
data.qpos[:] = default_qpos
|
||||
|
||||
# Add small random perturbation for exploration
|
||||
# Small random perturbation for exploration
|
||||
data.qpos[:] += np.random.uniform(-0.05, 0.05, size=self._nq)
|
||||
data.qvel[:] += np.random.uniform(-0.05, 0.05, size=self._nv)
|
||||
|
||||
# Reset smoothed ctrl so motor starts from rest
|
||||
data.ctrl[:] = 0.0
|
||||
|
||||
qpos_batch[i] = data.qpos
|
||||
@@ -275,7 +289,7 @@ class MuJoCoRunner(BaseRunner[MuJoCoRunnerConfig]):
|
||||
torch.from_numpy(qvel_batch).to(self.device),
|
||||
)
|
||||
|
||||
def render(self, env_idx: int = 0) -> np.ndarray:
|
||||
def _render_frame(self, env_idx: int = 0) -> np.ndarray:
|
||||
"""Offscreen render of a single environment."""
|
||||
if not hasattr(self, "_offscreen_renderer"):
|
||||
self._offscreen_renderer = mujoco.Renderer(
|
||||
@@ -283,43 +297,4 @@ class MuJoCoRunner(BaseRunner[MuJoCoRunnerConfig]):
|
||||
)
|
||||
mujoco.mj_forward(self._model, self._data[env_idx])
|
||||
self._offscreen_renderer.update_scene(self._data[env_idx])
|
||||
frame = self._offscreen_renderer.render().copy()
|
||||
|
||||
# Draw action bar overlay — shows ctrl[0] as a horizontal bar
|
||||
ctrl_val = float(self._data[env_idx].ctrl[0]) if self._model.nu > 0 else 0.0
|
||||
_draw_action_overlay(frame, ctrl_val)
|
||||
return frame
|
||||
|
||||
|
||||
def _draw_action_overlay(frame: np.ndarray, action: float) -> None:
|
||||
"""Draw an action bar + text on a rendered frame (no OpenCV needed).
|
||||
|
||||
Bar is centered horizontally: green to the right (+), red to the left (-).
|
||||
"""
|
||||
h, w = frame.shape[:2]
|
||||
|
||||
# Bar geometry
|
||||
bar_y = h - 30
|
||||
bar_h = 16
|
||||
bar_x_center = w // 2
|
||||
bar_half_w = w // 4 # max half-width of the bar
|
||||
bar_x_left = bar_x_center - bar_half_w
|
||||
bar_x_right = bar_x_center + bar_half_w
|
||||
|
||||
# Background (dark grey)
|
||||
frame[bar_y:bar_y + bar_h, bar_x_left:bar_x_right] = [40, 40, 40]
|
||||
|
||||
# Filled bar
|
||||
fill_len = int(abs(action) * bar_half_w)
|
||||
if action > 0:
|
||||
color = [60, 200, 60] # green
|
||||
x0 = bar_x_center
|
||||
x1 = min(bar_x_center + fill_len, bar_x_right)
|
||||
else:
|
||||
color = [200, 60, 60] # red
|
||||
x1 = bar_x_center
|
||||
x0 = max(bar_x_center - fill_len, bar_x_left)
|
||||
frame[bar_y:bar_y + bar_h, x0:x1] = color
|
||||
|
||||
# Center tick mark (white)
|
||||
frame[bar_y:bar_y + bar_h, bar_x_center - 1:bar_x_center + 1] = [255, 255, 255]
|
||||
return self._offscreen_renderer.render().copy()
|
||||
|
||||
@@ -11,11 +11,15 @@ Serial protocol (ESP32 firmware):
|
||||
H — stop streaming
|
||||
M<int> — set motor PWM speed (-255 … 255)
|
||||
|
||||
State lines received FROM the ESP32:
|
||||
S,<ms>,<enc>,<rpm>,<motor_speed>,<at_limit>,
|
||||
<pend_deg>,<pend_vel>,<target_speed>,<braking>,
|
||||
<enc_vel_cps>,<pendulum_ok>
|
||||
(12 comma-separated fields after the ``S`` prefix)
|
||||
State lines received FROM the ESP32 (firmware sends SI units):
|
||||
S,<ms>,<motor_rad>,<motor_vel>,<pend_rad>,<pend_vel>,<motor_speed>
|
||||
(7 comma-separated fields after the ``S`` prefix)
|
||||
|
||||
motor_rad — motor joint angle (radians)
|
||||
motor_vel — motor joint velocity (rad/s)
|
||||
pend_rad — pendulum angle (radians, 0 = hanging down)
|
||||
pend_vel — pendulum angular velocity (rad/s)
|
||||
motor_speed — applied PWM (-255..255, for action recording)
|
||||
|
||||
A daemon thread continuously reads the serial stream so the control
|
||||
loop never blocks on I/O.
|
||||
@@ -28,7 +32,6 @@ from __future__ import annotations
|
||||
|
||||
import dataclasses
|
||||
import logging
|
||||
import math
|
||||
import threading
|
||||
import time
|
||||
from typing import Any
|
||||
@@ -36,8 +39,6 @@ from typing import Any
|
||||
import numpy as np
|
||||
import torch
|
||||
|
||||
from src.core.env import BaseEnv
|
||||
from src.core.hardware import HardwareConfig, load_hardware_config
|
||||
from src.core.runner import BaseRunner, BaseRunnerConfig
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@@ -52,9 +53,14 @@ class SerialRunnerConfig(BaseRunnerConfig):
|
||||
|
||||
port: str = "/dev/cu.usbserial-0001"
|
||||
baud: int = 115200
|
||||
dt: float = 0.02 # control loop period (seconds), 50 Hz
|
||||
dt: float = 0.04 # control loop period (seconds), 25 Hz
|
||||
no_data_timeout: float = 2.0 # seconds of silence → disconnect
|
||||
encoder_jump_threshold: int = 200 # encoder tick jump → reboot
|
||||
|
||||
# Physical reset procedure
|
||||
reset_drive_speed: int = 80 # PWM for bang-bang drive-to-center
|
||||
reset_deadband_rad: float = 0.01 # "centered" threshold (~0.6°)
|
||||
reset_drive_timeout: float = 3.0 # seconds to reach center
|
||||
reset_settle_timeout: float = 30.0 # seconds to wait for pendulum
|
||||
|
||||
|
||||
class SerialRunner(BaseRunner[SerialRunnerConfig]):
|
||||
@@ -80,22 +86,6 @@ class SerialRunner(BaseRunner[SerialRunnerConfig]):
|
||||
return torch.device("cpu")
|
||||
|
||||
def _sim_initialize(self, config: SerialRunnerConfig) -> None:
|
||||
# Load hardware description (encoder, safety, reset params).
|
||||
hw = load_hardware_config(self.env.config.robot_path)
|
||||
if hw is None:
|
||||
raise FileNotFoundError(
|
||||
f"hardware.yaml not found in {self.env.config.robot_path}. "
|
||||
"The serial runner requires a hardware config for encoder, "
|
||||
"safety, and reset parameters."
|
||||
)
|
||||
self._hw: HardwareConfig = hw
|
||||
self._counts_per_rev: float = hw.encoder.counts_per_rev
|
||||
self._max_motor_angle_rad: float = (
|
||||
math.radians(hw.safety.max_motor_angle_deg)
|
||||
if hw.safety.max_motor_angle_deg > 0
|
||||
else 0.0
|
||||
)
|
||||
|
||||
# Joint dimensions for the rotary cartpole (motor + pendulum).
|
||||
self._nq = 2
|
||||
self._nv = 2
|
||||
@@ -105,8 +95,16 @@ class SerialRunner(BaseRunner[SerialRunnerConfig]):
|
||||
|
||||
self._serial_mod = _serial
|
||||
|
||||
# Explicitly disable hardware flow control and exclusive mode to
|
||||
# avoid termios.error (errno 22) on macOS with CH340/CP2102 adapters.
|
||||
self.ser: _serial.Serial = _serial.Serial(
|
||||
config.port, config.baud, timeout=0.05
|
||||
port=config.port,
|
||||
baudrate=config.baud,
|
||||
timeout=0.05,
|
||||
xonxoff=False,
|
||||
rtscts=False,
|
||||
dsrdtr=False,
|
||||
exclusive=False,
|
||||
)
|
||||
time.sleep(2) # Wait for ESP32 boot.
|
||||
self.ser.reset_input_buffer()
|
||||
@@ -116,22 +114,17 @@ class SerialRunner(BaseRunner[SerialRunnerConfig]):
|
||||
self._serial_disconnected: bool = False
|
||||
self._last_esp_ms: int = 0
|
||||
self._last_data_time: float = time.monotonic()
|
||||
self._last_encoder_count: int = 0
|
||||
self._streaming: bool = False
|
||||
|
||||
# Latest parsed state (updated by the reader thread).
|
||||
# Firmware sends SI units — values are used directly as qpos/qvel.
|
||||
self._latest_state: dict[str, Any] = {
|
||||
"timestamp_ms": 0,
|
||||
"encoder_count": 0,
|
||||
"rpm": 0.0,
|
||||
"motor_rad": 0.0,
|
||||
"motor_vel": 0.0,
|
||||
"pend_rad": 0.0,
|
||||
"pend_vel": 0.0,
|
||||
"motor_speed": 0,
|
||||
"at_limit": False,
|
||||
"pendulum_angle": 0.0,
|
||||
"pendulum_velocity": 0.0,
|
||||
"target_speed": 0,
|
||||
"braking": False,
|
||||
"enc_vel_cps": 0.0,
|
||||
"pendulum_ok": False,
|
||||
}
|
||||
self._state_lock = threading.Lock()
|
||||
self._state_event = threading.Event()
|
||||
@@ -148,6 +141,11 @@ class SerialRunner(BaseRunner[SerialRunnerConfig]):
|
||||
self._streaming = True
|
||||
self._last_data_time = time.monotonic()
|
||||
|
||||
# Derive max PWM from actuator ctrl_range so the serial
|
||||
# command range matches what MuJoCo training sees.
|
||||
ctrl_hi = self.env.robot.actuators[0].ctrl_range[1]
|
||||
self._max_pwm: int = round(ctrl_hi * 255)
|
||||
|
||||
# Track wall-clock time of last step for PPO-gap detection.
|
||||
self._last_step_time: float = time.monotonic()
|
||||
|
||||
@@ -170,41 +168,22 @@ class SerialRunner(BaseRunner[SerialRunnerConfig]):
|
||||
self._sim_reset(all_ids)
|
||||
self.step_counts.zero_()
|
||||
|
||||
step_start = time.monotonic()
|
||||
|
||||
# Map normalised action [-1, 1] → PWM [-255, 255].
|
||||
# Map normalised action [-1, 1] → PWM, scaled by ctrl_range.
|
||||
action_val = float(actions[0, 0].clamp(-1.0, 1.0))
|
||||
motor_speed = int(action_val * 255)
|
||||
motor_speed = int(action_val * self._max_pwm)
|
||||
self._send(f"M{motor_speed}")
|
||||
|
||||
# Enforce dt wall-clock timing.
|
||||
elapsed = time.monotonic() - step_start
|
||||
remaining = self.config.dt - elapsed
|
||||
if remaining > 0:
|
||||
time.sleep(remaining)
|
||||
# Stream-driven: block until the firmware sends the next state
|
||||
# line (~20 ms at 50 Hz).
|
||||
state = self._read_state_blocking(timeout=0.1)
|
||||
|
||||
# Read latest sensor data (non-blocking — dt sleep ensures freshness).
|
||||
state = self._read_state()
|
||||
# Firmware sends SI units — use directly.
|
||||
qpos, qvel = self._state_to_tensors(state)
|
||||
|
||||
motor_angle = (
|
||||
state["encoder_count"] / self._counts_per_rev * 2.0 * math.pi
|
||||
)
|
||||
motor_vel = (
|
||||
state["enc_vel_cps"] / self._counts_per_rev * 2.0 * math.pi
|
||||
)
|
||||
pendulum_angle = math.radians(state["pendulum_angle"])
|
||||
pendulum_vel = math.radians(state["pendulum_velocity"])
|
||||
|
||||
# Cache motor angle for safety check in step() — avoids a second read.
|
||||
self._last_motor_angle_rad = motor_angle
|
||||
# Cache for _sync_viz().
|
||||
self._last_sync_state = state
|
||||
self._last_step_time = time.monotonic()
|
||||
|
||||
qpos = torch.tensor(
|
||||
[[motor_angle, pendulum_angle]], dtype=torch.float32
|
||||
)
|
||||
qvel = torch.tensor(
|
||||
[[motor_vel, pendulum_vel]], dtype=torch.float32
|
||||
)
|
||||
return qpos, qvel
|
||||
|
||||
def _sim_reset(
|
||||
@@ -232,59 +211,42 @@ class SerialRunner(BaseRunner[SerialRunnerConfig]):
|
||||
# Physically return the motor to the centre position.
|
||||
self._drive_to_center()
|
||||
|
||||
# Wait until the pendulum settles.
|
||||
self._wait_for_pendulum_still()
|
||||
# Wait until the env considers the robot settled.
|
||||
self._wait_for_settle()
|
||||
|
||||
# Refresh data timer so health checks don't false-positive.
|
||||
self._last_data_time = time.monotonic()
|
||||
|
||||
# Read settled state and return as qpos/qvel.
|
||||
state = self._read_state_blocking()
|
||||
motor_angle = (
|
||||
state["encoder_count"] / self._counts_per_rev * 2.0 * math.pi
|
||||
)
|
||||
motor_vel = (
|
||||
state["enc_vel_cps"] / self._counts_per_rev * 2.0 * math.pi
|
||||
)
|
||||
pendulum_angle = math.radians(state["pendulum_angle"])
|
||||
pendulum_vel = math.radians(state["pendulum_velocity"])
|
||||
qpos, qvel = self._state_to_tensors(state)
|
||||
|
||||
qpos = torch.tensor(
|
||||
[[motor_angle, pendulum_angle]], dtype=torch.float32
|
||||
)
|
||||
qvel = torch.tensor(
|
||||
[[motor_vel, pendulum_vel]], dtype=torch.float32
|
||||
)
|
||||
self._last_sync_state = state
|
||||
return qpos, qvel
|
||||
|
||||
def _sim_close(self) -> None:
|
||||
self._reader_running = False
|
||||
self._streaming = False
|
||||
self._send("H") # Stop streaming.
|
||||
self._send("M0") # Stop motor.
|
||||
self._send("H")
|
||||
self._send("M0")
|
||||
time.sleep(0.1)
|
||||
self._reader_thread.join(timeout=1.0)
|
||||
self.ser.close()
|
||||
if hasattr(self, "_offscreen_renderer") and self._offscreen_renderer is not None:
|
||||
self._offscreen_renderer.close()
|
||||
super()._sim_close()
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# MuJoCo digital-twin rendering
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def _ensure_viz_model(self) -> None:
|
||||
"""Lazily load the MuJoCo model for visualisation (digital twin).
|
||||
|
||||
Reuses the same URDF + robot.yaml that the MuJoCoRunner would use,
|
||||
but only for rendering — no physics stepping.
|
||||
"""
|
||||
"""Lazily load the MuJoCo model for visualisation (digital twin)."""
|
||||
if hasattr(self, "_viz_model"):
|
||||
return
|
||||
|
||||
import mujoco
|
||||
from src.runners.mujoco import MuJoCoRunner
|
||||
from src.runners.mujoco import load_mujoco_model
|
||||
|
||||
self._viz_model = MuJoCoRunner._load_model(self.env.robot)
|
||||
self._viz_model = load_mujoco_model(self.env.robot)
|
||||
self._viz_data = mujoco.MjData(self._viz_model)
|
||||
self._offscreen_renderer = None
|
||||
|
||||
@@ -293,32 +255,21 @@ class SerialRunner(BaseRunner[SerialRunnerConfig]):
|
||||
import mujoco
|
||||
|
||||
self._ensure_viz_model()
|
||||
state = self._read_state()
|
||||
|
||||
# Set joint positions from serial data.
|
||||
motor_angle = (
|
||||
state["encoder_count"] / self._counts_per_rev * 2.0 * math.pi
|
||||
)
|
||||
pendulum_angle = math.radians(state["pendulum_angle"])
|
||||
self._viz_data.qpos[0] = motor_angle
|
||||
self._viz_data.qpos[1] = pendulum_angle
|
||||
last_state = getattr(self, "_last_sync_state", None)
|
||||
if last_state is None:
|
||||
last_state = self._read_state()
|
||||
|
||||
# Set joint velocities (for any velocity-dependent visuals).
|
||||
motor_vel = (
|
||||
state["enc_vel_cps"] / self._counts_per_rev * 2.0 * math.pi
|
||||
)
|
||||
pendulum_vel = math.radians(state["pendulum_velocity"])
|
||||
self._viz_data.qvel[0] = motor_vel
|
||||
self._viz_data.qvel[1] = pendulum_vel
|
||||
# Firmware sends radians — use directly.
|
||||
self._viz_data.qpos[0] = last_state["motor_rad"]
|
||||
self._viz_data.qpos[1] = last_state["pend_rad"]
|
||||
self._viz_data.qvel[0] = last_state["motor_vel"]
|
||||
self._viz_data.qvel[1] = last_state["pend_vel"]
|
||||
|
||||
# Forward kinematics (updates body positions for rendering).
|
||||
mujoco.mj_forward(self._viz_model, self._viz_data)
|
||||
|
||||
def render(self, env_idx: int = 0) -> np.ndarray:
|
||||
"""Offscreen render of the digital-twin MuJoCo model.
|
||||
|
||||
Called by VideoRecordingTrainer during training to capture frames.
|
||||
"""
|
||||
def _render_frame(self, env_idx: int = 0) -> np.ndarray:
|
||||
"""Offscreen render of the digital-twin MuJoCo model."""
|
||||
import mujoco
|
||||
|
||||
self._sync_viz()
|
||||
@@ -340,7 +291,6 @@ class SerialRunner(BaseRunner[SerialRunnerConfig]):
|
||||
# Check for ESP32 reboot / disconnect BEFORE stepping.
|
||||
if self._rebooted or self._serial_disconnected:
|
||||
self._send("M0")
|
||||
# Return a terminal observation with penalty.
|
||||
qpos, qvel = self._make_current_state()
|
||||
state = self.env.build_state(qpos, qvel)
|
||||
obs = self.env.compute_observations(state)
|
||||
@@ -359,16 +309,6 @@ class SerialRunner(BaseRunner[SerialRunnerConfig]):
|
||||
rewards = torch.tensor([[-100.0]])
|
||||
info["reboot_detected"] = True
|
||||
|
||||
# Check motor angle against hard safety limit.
|
||||
# Uses the cached value from _sim_step — no extra serial read.
|
||||
if self._max_motor_angle_rad > 0:
|
||||
motor_angle = abs(getattr(self, "_last_motor_angle_rad", 0.0))
|
||||
if motor_angle >= self._max_motor_angle_rad:
|
||||
self._send("M0")
|
||||
terminated = torch.tensor([[True]])
|
||||
rewards = torch.tensor([[-100.0]])
|
||||
info["motor_limit_exceeded"] = True
|
||||
|
||||
# Always stop motor on episode end.
|
||||
if terminated.any() or truncated.any():
|
||||
self._send("M0")
|
||||
@@ -387,7 +327,11 @@ class SerialRunner(BaseRunner[SerialRunnerConfig]):
|
||||
self._serial_disconnected = True
|
||||
|
||||
def _serial_reader(self) -> None:
|
||||
"""Background thread: continuously read and parse serial lines."""
|
||||
"""Background thread: continuously read and parse serial lines.
|
||||
|
||||
Protocol: ``S,<ms>,<motor_rad>,<motor_vel>,<pend_rad>,<pend_vel>,<motor_speed>``
|
||||
(7 comma-separated fields). Firmware sends SI units directly.
|
||||
"""
|
||||
while self._reader_running:
|
||||
try:
|
||||
if self.ser.in_waiting:
|
||||
@@ -405,9 +349,15 @@ class SerialRunner(BaseRunner[SerialRunnerConfig]):
|
||||
|
||||
if line.startswith("S,"):
|
||||
parts = line.split(",")
|
||||
if len(parts) >= 12:
|
||||
esp_ms = int(parts[1])
|
||||
enc = int(parts[2])
|
||||
if len(parts) >= 7:
|
||||
try:
|
||||
esp_ms = int(parts[1])
|
||||
except ValueError:
|
||||
logger.debug(
|
||||
"Malformed state line (header): %s",
|
||||
line,
|
||||
)
|
||||
continue
|
||||
|
||||
# Detect reboot: timestamp jumped backwards.
|
||||
if (
|
||||
@@ -422,38 +372,24 @@ class SerialRunner(BaseRunner[SerialRunnerConfig]):
|
||||
esp_ms,
|
||||
)
|
||||
|
||||
# Detect reboot: encoder snapped to 0 from
|
||||
# a far position.
|
||||
if (
|
||||
abs(self._last_encoder_count)
|
||||
> self.config.encoder_jump_threshold
|
||||
and abs(enc) < 5
|
||||
):
|
||||
self._rebooted = True
|
||||
logger.critical(
|
||||
"ESP32 reboot detected: encoder"
|
||||
" jumped %d -> %d",
|
||||
self._last_encoder_count,
|
||||
enc,
|
||||
)
|
||||
|
||||
self._last_esp_ms = esp_ms
|
||||
self._last_encoder_count = enc
|
||||
self._last_data_time = time.monotonic()
|
||||
|
||||
parsed: dict[str, Any] = {
|
||||
"timestamp_ms": esp_ms,
|
||||
"encoder_count": enc,
|
||||
"rpm": float(parts[3]),
|
||||
"motor_speed": int(parts[4]),
|
||||
"at_limit": bool(int(parts[5])),
|
||||
"pendulum_angle": float(parts[6]),
|
||||
"pendulum_velocity": float(parts[7]),
|
||||
"target_speed": int(parts[8]),
|
||||
"braking": bool(int(parts[9])),
|
||||
"enc_vel_cps": float(parts[10]),
|
||||
"pendulum_ok": bool(int(parts[11])),
|
||||
}
|
||||
try:
|
||||
parsed: dict[str, Any] = {
|
||||
"timestamp_ms": esp_ms,
|
||||
"motor_rad": float(parts[2]),
|
||||
"motor_vel": float(parts[3]),
|
||||
"pend_rad": float(parts[4]),
|
||||
"pend_vel": float(parts[5]),
|
||||
"motor_speed": int(parts[6]),
|
||||
}
|
||||
except ValueError:
|
||||
logger.debug(
|
||||
"Malformed state line (fields): %s",
|
||||
line,
|
||||
)
|
||||
continue
|
||||
with self._state_lock:
|
||||
self._latest_state = parsed
|
||||
self._state_event.set()
|
||||
@@ -483,81 +419,68 @@ class SerialRunner(BaseRunner[SerialRunnerConfig]):
|
||||
return True
|
||||
|
||||
def _read_state(self) -> dict[str, Any]:
|
||||
"""Return the most recent state from the reader thread (non-blocking).
|
||||
|
||||
The background thread updates at ~50 Hz and `_sim_step` already
|
||||
sleeps for `dt` before calling this, so the data is always fresh.
|
||||
"""
|
||||
"""Return the most recent state from the reader thread (non-blocking)."""
|
||||
with self._state_lock:
|
||||
return dict(self._latest_state)
|
||||
|
||||
def _read_state_blocking(self, timeout: float = 0.05) -> dict[str, Any]:
|
||||
"""Wait for a fresh sample, then return it.
|
||||
|
||||
Used during reset / settling where we need to guarantee we have
|
||||
a new reading (no prior dt sleep).
|
||||
"""
|
||||
"""Wait for a fresh sample, then return it."""
|
||||
self._state_event.clear()
|
||||
self._state_event.wait(timeout=timeout)
|
||||
with self._state_lock:
|
||||
return dict(self._latest_state)
|
||||
|
||||
def _make_current_state(self) -> tuple[torch.Tensor, torch.Tensor]:
|
||||
"""Build qpos/qvel from current sensor data (utility)."""
|
||||
state = self._read_state_blocking()
|
||||
motor_angle = (
|
||||
state["encoder_count"] / self._counts_per_rev * 2.0 * math.pi
|
||||
)
|
||||
motor_vel = (
|
||||
state["enc_vel_cps"] / self._counts_per_rev * 2.0 * math.pi
|
||||
)
|
||||
pendulum_angle = math.radians(state["pendulum_angle"])
|
||||
pendulum_vel = math.radians(state["pendulum_velocity"])
|
||||
|
||||
def _state_to_tensors(
|
||||
self, state: dict[str, Any],
|
||||
) -> tuple[torch.Tensor, torch.Tensor]:
|
||||
"""Convert a parsed state dict to (qpos, qvel) tensors."""
|
||||
qpos = torch.tensor(
|
||||
[[motor_angle, pendulum_angle]], dtype=torch.float32
|
||||
[[state["motor_rad"], state["pend_rad"]]], dtype=torch.float32
|
||||
)
|
||||
qvel = torch.tensor(
|
||||
[[motor_vel, pendulum_vel]], dtype=torch.float32
|
||||
[[state["motor_vel"], state["pend_vel"]]], dtype=torch.float32
|
||||
)
|
||||
return qpos, qvel
|
||||
|
||||
def _make_current_state(self) -> tuple[torch.Tensor, torch.Tensor]:
|
||||
"""Build qpos/qvel from current sensor data (utility)."""
|
||||
return self._state_to_tensors(self._read_state_blocking())
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Physical reset helpers
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def _drive_to_center(self) -> None:
|
||||
"""Drive the motor back toward encoder=0 using bang-bang control."""
|
||||
rc = self._hw.reset
|
||||
"""Drive the motor back toward center using bang-bang control."""
|
||||
cfg = self.config
|
||||
start = time.time()
|
||||
while time.time() - start < rc.drive_timeout:
|
||||
while time.time() - start < cfg.reset_drive_timeout:
|
||||
state = self._read_state_blocking()
|
||||
enc = state["encoder_count"]
|
||||
if abs(enc) < rc.deadband:
|
||||
motor_rad = state["motor_rad"]
|
||||
if abs(motor_rad) < cfg.reset_deadband_rad:
|
||||
break
|
||||
speed = rc.drive_speed if enc < 0 else -rc.drive_speed
|
||||
speed = cfg.reset_drive_speed if motor_rad < 0 else -cfg.reset_drive_speed
|
||||
self._send(f"M{speed}")
|
||||
time.sleep(0.05)
|
||||
self._send("M0")
|
||||
time.sleep(0.2)
|
||||
|
||||
def _wait_for_pendulum_still(self) -> None:
|
||||
"""Block until the pendulum has settled (angle and velocity near zero)."""
|
||||
rc = self._hw.reset
|
||||
def _wait_for_settle(self) -> None:
|
||||
"""Block until the env considers the robot ready for a new episode."""
|
||||
cfg = self.config
|
||||
stable_since: float | None = None
|
||||
start = time.monotonic()
|
||||
|
||||
while time.monotonic() - start < rc.settle_timeout:
|
||||
while time.monotonic() - start < cfg.reset_settle_timeout:
|
||||
state = self._read_state_blocking()
|
||||
angle_ok = abs(state["pendulum_angle"]) < rc.settle_angle_deg
|
||||
vel_ok = abs(state["pendulum_velocity"]) < rc.settle_vel_dps
|
||||
qpos, qvel = self._state_to_tensors(state)
|
||||
|
||||
if angle_ok and vel_ok:
|
||||
if self.env.is_reset_ready(qpos, qvel):
|
||||
if stable_since is None:
|
||||
stable_since = time.monotonic()
|
||||
elif time.monotonic() - stable_since >= rc.settle_duration:
|
||||
elif time.monotonic() - stable_since >= 0.5:
|
||||
logger.info(
|
||||
"Pendulum settled after %.2f s",
|
||||
"Robot settled after %.2f s",
|
||||
time.monotonic() - start,
|
||||
)
|
||||
return
|
||||
@@ -566,6 +489,6 @@ class SerialRunner(BaseRunner[SerialRunnerConfig]):
|
||||
time.sleep(0.02)
|
||||
|
||||
logger.warning(
|
||||
"Pendulum did not fully settle within %.1f s — proceeding anyway.",
|
||||
rc.settle_timeout,
|
||||
"Robot did not settle within %.1f s — proceeding anyway.",
|
||||
cfg.reset_settle_timeout,
|
||||
)
|
||||
|
||||
79
src/sysid/_urdf.py
Normal file
79
src/sysid/_urdf.py
Normal file
@@ -0,0 +1,79 @@
|
||||
"""URDF XML helpers shared by sysid rollout and export modules."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import xml.etree.ElementTree as ET
|
||||
|
||||
|
||||
def set_mass(inertial: ET.Element, mass: float | None) -> None:
|
||||
if mass is None:
|
||||
return
|
||||
mass_el = inertial.find("mass")
|
||||
if mass_el is not None:
|
||||
mass_el.set("value", str(mass))
|
||||
|
||||
|
||||
def set_com(
|
||||
inertial: ET.Element,
|
||||
x: float | None,
|
||||
y: float | None,
|
||||
z: float | None,
|
||||
) -> None:
|
||||
origin = inertial.find("origin")
|
||||
if origin is None:
|
||||
return
|
||||
xyz = origin.get("xyz", "0 0 0").split()
|
||||
if x is not None:
|
||||
xyz[0] = str(x)
|
||||
if y is not None:
|
||||
xyz[1] = str(y)
|
||||
if z is not None:
|
||||
xyz[2] = str(z)
|
||||
origin.set("xyz", " ".join(xyz))
|
||||
|
||||
|
||||
def set_inertia(
|
||||
inertial: ET.Element,
|
||||
ixx: float | None = None,
|
||||
iyy: float | None = None,
|
||||
izz: float | None = None,
|
||||
ixy: float | None = None,
|
||||
iyz: float | None = None,
|
||||
ixz: float | None = None,
|
||||
) -> None:
|
||||
ine = inertial.find("inertia")
|
||||
if ine is None:
|
||||
return
|
||||
for attr, val in [
|
||||
("ixx", ixx), ("iyy", iyy), ("izz", izz),
|
||||
("ixy", ixy), ("iyz", iyz), ("ixz", ixz),
|
||||
]:
|
||||
if val is not None:
|
||||
ine.set(attr, str(val))
|
||||
|
||||
|
||||
def patch_link_inertials(
|
||||
root: ET.Element,
|
||||
params: dict[str, float],
|
||||
) -> None:
|
||||
"""Patch arm and pendulum inertial parameters in a URDF ElementTree root."""
|
||||
for link in root.iter("link"):
|
||||
link_name = link.get("name", "")
|
||||
inertial = link.find("inertial")
|
||||
if inertial is None:
|
||||
continue
|
||||
|
||||
if link_name == "arm":
|
||||
set_mass(inertial, params.get("arm_mass"))
|
||||
set_com(inertial, params.get("arm_com_x"),
|
||||
params.get("arm_com_y"), params.get("arm_com_z"))
|
||||
|
||||
elif link_name == "pendulum":
|
||||
set_mass(inertial, params.get("pendulum_mass"))
|
||||
set_com(inertial, params.get("pendulum_com_x"),
|
||||
params.get("pendulum_com_y"), params.get("pendulum_com_z"))
|
||||
set_inertia(inertial,
|
||||
ixx=params.get("pendulum_ixx"),
|
||||
iyy=params.get("pendulum_iyy"),
|
||||
izz=params.get("pendulum_izz"),
|
||||
ixy=params.get("pendulum_ixy"))
|
||||
@@ -6,6 +6,10 @@ the system, and records motor + pendulum angles and velocities at ~50 Hz.
|
||||
Saves a compressed numpy archive (.npz) that the optimizer can replay
|
||||
in simulation to fit physics parameters.
|
||||
|
||||
Serial protocol (same as SerialRunner):
|
||||
S,<ms>,<motor_rad>,<motor_vel>,<pend_rad>,<pend_vel>,<motor_speed>
|
||||
(7 comma-separated fields — firmware sends SI units)
|
||||
|
||||
Usage:
|
||||
python -m src.sysid.capture \
|
||||
--robot-path assets/rotary_cartpole \
|
||||
@@ -17,7 +21,6 @@ from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import math
|
||||
import os
|
||||
import random
|
||||
import threading
|
||||
import time
|
||||
@@ -27,7 +30,6 @@ from typing import Any
|
||||
|
||||
import numpy as np
|
||||
import structlog
|
||||
import yaml
|
||||
|
||||
log = structlog.get_logger()
|
||||
|
||||
@@ -36,25 +38,24 @@ log = structlog.get_logger()
|
||||
|
||||
|
||||
def _parse_state_line(line: str) -> dict[str, Any] | None:
|
||||
"""Parse an ``S,…`` state line from the ESP32."""
|
||||
"""Parse an ``S,…`` state line from the ESP32.
|
||||
|
||||
Format: S,<ms>,<motor_rad>,<motor_vel>,<pend_rad>,<pend_vel>,<motor_speed>
|
||||
(7 comma-separated fields, firmware sends SI units)
|
||||
"""
|
||||
if not line.startswith("S,"):
|
||||
return None
|
||||
parts = line.split(",")
|
||||
if len(parts) < 12:
|
||||
if len(parts) < 7:
|
||||
return None
|
||||
try:
|
||||
return {
|
||||
"timestamp_ms": int(parts[1]),
|
||||
"encoder_count": int(parts[2]),
|
||||
"rpm": float(parts[3]),
|
||||
"motor_speed": int(parts[4]),
|
||||
"at_limit": bool(int(parts[5])),
|
||||
"pendulum_angle": float(parts[6]),
|
||||
"pendulum_velocity": float(parts[7]),
|
||||
"target_speed": int(parts[8]),
|
||||
"braking": bool(int(parts[9])),
|
||||
"enc_vel_cps": float(parts[10]),
|
||||
"pendulum_ok": bool(int(parts[11])),
|
||||
"motor_rad": float(parts[2]),
|
||||
"motor_vel": float(parts[3]),
|
||||
"pend_rad": float(parts[4]),
|
||||
"pend_vel": float(parts[5]),
|
||||
"motor_speed": int(parts[6]),
|
||||
}
|
||||
except (ValueError, IndexError):
|
||||
return None
|
||||
@@ -64,7 +65,12 @@ def _parse_state_line(line: str) -> dict[str, Any] | None:
|
||||
|
||||
|
||||
class _SerialReader:
|
||||
"""Minimal background reader for the ESP32 serial stream."""
|
||||
"""Minimal background reader for the ESP32 serial stream.
|
||||
|
||||
Uses a sequence counter so ``read_blocking()`` guarantees it returns
|
||||
a *new* state line (not a stale repeat). This keeps the capture
|
||||
loop locked to the firmware's 50 Hz tick.
|
||||
"""
|
||||
|
||||
def __init__(self, port: str, baud: int = 115200):
|
||||
import serial as _serial
|
||||
@@ -75,14 +81,16 @@ class _SerialReader:
|
||||
self.ser.reset_input_buffer()
|
||||
|
||||
self._latest: dict[str, Any] = {}
|
||||
self._seq: int = 0 # incremented on every new state line
|
||||
self._lock = threading.Lock()
|
||||
self._event = threading.Event()
|
||||
self._cond = threading.Condition(self._lock)
|
||||
self._running = True
|
||||
|
||||
self._thread = threading.Thread(target=self._reader_loop, daemon=True)
|
||||
self._thread.start()
|
||||
|
||||
def _reader_loop(self) -> None:
|
||||
_debug_count = 0
|
||||
while self._running:
|
||||
try:
|
||||
if self.ser.in_waiting:
|
||||
@@ -91,11 +99,16 @@ class _SerialReader:
|
||||
.decode("utf-8", errors="ignore")
|
||||
.strip()
|
||||
)
|
||||
# Debug: log first 10 raw lines so we can see what the firmware sends.
|
||||
if _debug_count < 10 and line:
|
||||
log.info("serial_raw_line", line=repr(line), count=_debug_count)
|
||||
_debug_count += 1
|
||||
parsed = _parse_state_line(line)
|
||||
if parsed is not None:
|
||||
with self._lock:
|
||||
with self._cond:
|
||||
self._latest = parsed
|
||||
self._event.set()
|
||||
self._seq += 1
|
||||
self._cond.notify_all()
|
||||
else:
|
||||
time.sleep(0.001)
|
||||
except (OSError, self._serial_mod.SerialException):
|
||||
@@ -108,14 +121,19 @@ class _SerialReader:
|
||||
except (OSError, self._serial_mod.SerialException):
|
||||
log.critical("serial_send_failed", cmd=cmd)
|
||||
|
||||
def read(self) -> dict[str, Any]:
|
||||
with self._lock:
|
||||
return dict(self._latest)
|
||||
|
||||
def read_blocking(self, timeout: float = 0.1) -> dict[str, Any]:
|
||||
self._event.clear()
|
||||
self._event.wait(timeout=timeout)
|
||||
return self.read()
|
||||
"""Wait until a *new* state line arrives, then return it.
|
||||
|
||||
Uses a sequence counter to guarantee the returned state is
|
||||
different from whatever was available before this call.
|
||||
"""
|
||||
with self._cond:
|
||||
seq_before = self._seq
|
||||
if not self._cond.wait_for(
|
||||
lambda: self._seq > seq_before, timeout=timeout
|
||||
):
|
||||
return {} # timeout — no new data
|
||||
return dict(self._latest)
|
||||
|
||||
def close(self) -> None:
|
||||
self._running = False
|
||||
@@ -139,7 +157,7 @@ class _PRBSExcitation:
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
amplitude: int = 180,
|
||||
amplitude: int = 150,
|
||||
hold_min_ms: int = 50,
|
||||
hold_max_ms: int = 300,
|
||||
):
|
||||
@@ -169,42 +187,39 @@ def capture(
|
||||
port: str = "/dev/cu.usbserial-0001",
|
||||
baud: int = 115200,
|
||||
duration: float = 20.0,
|
||||
amplitude: int = 180,
|
||||
amplitude: int = 150,
|
||||
hold_min_ms: int = 50,
|
||||
hold_max_ms: int = 300,
|
||||
dt: float = 0.02,
|
||||
motor_angle_limit_deg: float = 90.0,
|
||||
) -> Path:
|
||||
"""Run the capture procedure and return the path to the saved .npz file.
|
||||
|
||||
The capture loop is **stream-driven**: it blocks on each incoming
|
||||
state line from the firmware (which arrives at 50 Hz), sends the
|
||||
next motor command immediately, and records both.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
robot_path : path to robot asset directory (contains hardware.yaml)
|
||||
robot_path : path to robot asset directory
|
||||
port : serial port for ESP32
|
||||
baud : baud rate
|
||||
duration : capture duration in seconds
|
||||
amplitude : max PWM magnitude for excitation (0–255)
|
||||
amplitude : max PWM magnitude for excitation
|
||||
hold_min_ms / hold_max_ms : random hold time range (ms)
|
||||
dt : target sampling period (seconds), default 50 Hz
|
||||
dt : nominal sample period for buffer sizing (seconds)
|
||||
motor_angle_limit_deg : safety limit for motor angle
|
||||
"""
|
||||
robot_path = Path(robot_path).resolve()
|
||||
|
||||
# Load hardware config for encoder conversion + safety.
|
||||
hw_yaml = robot_path / "hardware.yaml"
|
||||
if not hw_yaml.exists():
|
||||
raise FileNotFoundError(f"hardware.yaml not found in {robot_path}")
|
||||
raw_hw = yaml.safe_load(hw_yaml.read_text())
|
||||
ppr = raw_hw.get("encoder", {}).get("ppr", 11)
|
||||
gear_ratio = raw_hw.get("encoder", {}).get("gear_ratio", 30.0)
|
||||
counts_per_rev: float = ppr * gear_ratio * 4.0
|
||||
max_motor_deg = raw_hw.get("safety", {}).get("max_motor_angle_deg", 90.0)
|
||||
max_motor_rad = math.radians(max_motor_deg) if max_motor_deg > 0 else 0.0
|
||||
max_motor_rad = math.radians(motor_angle_limit_deg) if motor_angle_limit_deg > 0 else 0.0
|
||||
|
||||
# Connect.
|
||||
reader = _SerialReader(port, baud)
|
||||
excitation = _PRBSExcitation(amplitude, hold_min_ms, hold_max_ms)
|
||||
|
||||
# Prepare recording buffers.
|
||||
max_samples = int(duration / dt) + 500 # headroom
|
||||
# Prepare recording buffers (generous headroom).
|
||||
max_samples = int(duration / dt) + 500
|
||||
rec_time = np.zeros(max_samples, dtype=np.float64)
|
||||
rec_action = np.zeros(max_samples, dtype=np.float64)
|
||||
rec_motor_angle = np.zeros(max_samples, dtype=np.float64)
|
||||
@@ -222,63 +237,96 @@ def capture(
|
||||
duration=duration,
|
||||
amplitude=amplitude,
|
||||
hold_range_ms=f"{hold_min_ms}–{hold_max_ms}",
|
||||
dt=dt,
|
||||
mode="stream-driven (firmware clock)",
|
||||
)
|
||||
|
||||
idx = 0
|
||||
pwm = 0
|
||||
last_esp_ms = -1 # firmware timestamp of last recorded sample
|
||||
no_data_count = 0 # consecutive timeouts with no data
|
||||
t0 = time.monotonic()
|
||||
try:
|
||||
while True:
|
||||
loop_start = time.monotonic()
|
||||
elapsed = loop_start - t0
|
||||
# Block until the firmware sends the next state line (~20 ms).
|
||||
# Timeout at 100 ms prevents hanging if the ESP32 disconnects.
|
||||
state = reader.read_blocking(timeout=0.1)
|
||||
if not state:
|
||||
no_data_count += 1
|
||||
if no_data_count == 30: # 3 seconds with no data
|
||||
log.warning(
|
||||
"no_data_received",
|
||||
msg="No state lines from firmware after 3s. "
|
||||
"Check: is the ESP32 powered? Is it running the right firmware? "
|
||||
"Try pressing the RESET button.",
|
||||
)
|
||||
if no_data_count == 100: # 10 seconds
|
||||
log.critical(
|
||||
"no_data_timeout",
|
||||
msg="No data for 10s — aborting capture.",
|
||||
)
|
||||
break
|
||||
continue # no data yet — retry
|
||||
no_data_count = 0
|
||||
|
||||
# Deduplicate: the firmware may send multiple state lines per
|
||||
# tick (e.g. M-command echo + tick). Only record one sample
|
||||
# per unique firmware timestamp.
|
||||
esp_ms = state.get("timestamp_ms", 0)
|
||||
if esp_ms == last_esp_ms:
|
||||
continue
|
||||
last_esp_ms = esp_ms
|
||||
|
||||
elapsed = time.monotonic() - t0
|
||||
if elapsed >= duration:
|
||||
break
|
||||
|
||||
# Get excitation PWM.
|
||||
# Get excitation PWM for the NEXT tick.
|
||||
pwm = excitation()
|
||||
|
||||
# Safety: reverse/zero if near motor limit.
|
||||
state = reader.read()
|
||||
if state:
|
||||
motor_angle_rad = (
|
||||
state.get("encoder_count", 0) / counts_per_rev * 2.0 * math.pi
|
||||
)
|
||||
if max_motor_rad > 0:
|
||||
margin = max_motor_rad * 0.85 # start braking at 85%
|
||||
if motor_angle_rad > margin and pwm > 0:
|
||||
pwm = -abs(pwm) # reverse
|
||||
elif motor_angle_rad < -margin and pwm < 0:
|
||||
pwm = abs(pwm) # reverse
|
||||
# Safety: keep the arm well within its mechanical range.
|
||||
# Firmware sends motor angle in radians — use directly.
|
||||
motor_angle_rad = state.get("motor_rad", 0.0)
|
||||
if max_motor_rad > 0:
|
||||
ratio = motor_angle_rad / max_motor_rad # signed, -1..+1
|
||||
abs_ratio = abs(ratio)
|
||||
|
||||
# Send command.
|
||||
if abs_ratio > 0.90:
|
||||
# Deep in the danger zone — force a strong return.
|
||||
brake_strength = min(1.0, (abs_ratio - 0.90) / 0.10) # 0→1
|
||||
brake_pwm = int(amplitude * (0.5 + 0.5 * brake_strength))
|
||||
pwm = -brake_pwm if ratio > 0 else brake_pwm
|
||||
elif abs_ratio > 0.70:
|
||||
# Soft zone — only allow actions pointing back to centre.
|
||||
if ratio > 0 and pwm > 0:
|
||||
pwm = -abs(pwm)
|
||||
elif ratio < 0 and pwm < 0:
|
||||
pwm = abs(pwm)
|
||||
|
||||
# Send command immediately — it will take effect on the next tick.
|
||||
reader.send(f"M{pwm}")
|
||||
|
||||
# Wait for fresh data.
|
||||
time.sleep(max(0, dt - (time.monotonic() - loop_start) - 0.005))
|
||||
state = reader.read_blocking(timeout=dt)
|
||||
# Record this tick's state + the action the motor *actually*
|
||||
# received. Firmware sends SI units — use directly.
|
||||
motor_angle = state.get("motor_rad", 0.0)
|
||||
motor_vel = state.get("motor_vel", 0.0)
|
||||
pend_angle = state.get("pend_rad", 0.0)
|
||||
pend_vel = state.get("pend_vel", 0.0)
|
||||
# Firmware constrains to ±255; normalise to [-1, 1].
|
||||
applied = state.get("motor_speed", 0)
|
||||
action_norm = max(-255, min(255, applied)) / 255.0
|
||||
|
||||
if state:
|
||||
enc = state.get("encoder_count", 0)
|
||||
motor_angle = enc / counts_per_rev * 2.0 * math.pi
|
||||
motor_vel = (
|
||||
state.get("enc_vel_cps", 0.0) / counts_per_rev * 2.0 * math.pi
|
||||
)
|
||||
pend_angle = math.radians(state.get("pendulum_angle", 0.0))
|
||||
pend_vel = math.radians(state.get("pendulum_velocity", 0.0))
|
||||
if idx < max_samples:
|
||||
rec_time[idx] = elapsed
|
||||
rec_action[idx] = action_norm
|
||||
rec_motor_angle[idx] = motor_angle
|
||||
rec_motor_vel[idx] = motor_vel
|
||||
rec_pend_angle[idx] = pend_angle
|
||||
rec_pend_vel[idx] = pend_vel
|
||||
idx += 1
|
||||
else:
|
||||
break # buffer full
|
||||
|
||||
# Normalised action: PWM / 255 → [-1, 1]
|
||||
action_norm = pwm / 255.0
|
||||
|
||||
if idx < max_samples:
|
||||
rec_time[idx] = elapsed
|
||||
rec_action[idx] = action_norm
|
||||
rec_motor_angle[idx] = motor_angle
|
||||
rec_motor_vel[idx] = motor_vel
|
||||
rec_pend_angle[idx] = pend_angle
|
||||
rec_pend_vel[idx] = pend_vel
|
||||
idx += 1
|
||||
|
||||
# Progress.
|
||||
# Progress (every 50 samples ≈ once per second at 50 Hz).
|
||||
if idx % 50 == 0:
|
||||
log.info(
|
||||
"capture_progress",
|
||||
@@ -287,11 +335,6 @@ def capture(
|
||||
pwm=pwm,
|
||||
)
|
||||
|
||||
# Pace to dt.
|
||||
remaining = dt - (time.monotonic() - loop_start)
|
||||
if remaining > 0:
|
||||
time.sleep(remaining)
|
||||
|
||||
finally:
|
||||
reader.send("M0")
|
||||
reader.close()
|
||||
@@ -339,7 +382,7 @@ def main() -> None:
|
||||
"--robot-path",
|
||||
type=str,
|
||||
default="assets/rotary_cartpole",
|
||||
help="Path to robot asset directory (contains hardware.yaml)",
|
||||
help="Path to robot asset directory",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--port",
|
||||
@@ -352,7 +395,8 @@ def main() -> None:
|
||||
"--duration", type=float, default=20.0, help="Capture duration (s)"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--amplitude", type=int, default=180, help="Max PWM magnitude (0–255)"
|
||||
"--amplitude", type=int, default=150,
|
||||
help="Max PWM magnitude (should not exceed firmware MAX_MOTOR_SPEED=150)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--hold-min-ms", type=int, default=50, help="Min hold time (ms)"
|
||||
@@ -361,7 +405,11 @@ def main() -> None:
|
||||
"--hold-max-ms", type=int, default=300, help="Max hold time (ms)"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--dt", type=float, default=0.02, help="Sample period (s)"
|
||||
"--dt", type=float, default=0.02, help="Nominal sample period for buffer sizing (s)"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--motor-angle-limit", type=float, default=90.0,
|
||||
help="Motor angle safety limit in degrees (0 = disabled)",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
@@ -374,6 +422,7 @@ def main() -> None:
|
||||
hold_min_ms=args.hold_min_ms,
|
||||
hold_max_ms=args.hold_max_ms,
|
||||
dt=args.dt,
|
||||
motor_angle_limit_deg=args.motor_angle_limit,
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -8,20 +8,21 @@ alongside the originals in the robot asset directory.
|
||||
from __future__ import annotations
|
||||
|
||||
import copy
|
||||
import json
|
||||
import shutil
|
||||
import xml.etree.ElementTree as ET
|
||||
from pathlib import Path
|
||||
|
||||
import structlog
|
||||
import yaml
|
||||
|
||||
from src.sysid._urdf import patch_link_inertials
|
||||
|
||||
log = structlog.get_logger()
|
||||
|
||||
|
||||
def export_tuned_files(
|
||||
robot_path: str | Path,
|
||||
params: dict[str, float],
|
||||
motor_params: dict[str, float] | None = None,
|
||||
) -> tuple[Path, Path]:
|
||||
"""Write tuned URDF and robot.yaml files.
|
||||
|
||||
@@ -29,6 +30,8 @@ def export_tuned_files(
|
||||
----------
|
||||
robot_path : robot asset directory (contains robot.yaml + *.urdf)
|
||||
params : dict of parameter name → tuned value (from optimizer)
|
||||
motor_params : locked motor sysid params (asymmetric model).
|
||||
If provided, motor joint parameters come from here.
|
||||
|
||||
Returns
|
||||
-------
|
||||
@@ -43,38 +46,7 @@ def export_tuned_files(
|
||||
|
||||
# ── Tune URDF ────────────────────────────────────────────────
|
||||
tree = ET.parse(urdf_path)
|
||||
root = tree.getroot()
|
||||
|
||||
for link in root.iter("link"):
|
||||
link_name = link.get("name", "")
|
||||
inertial = link.find("inertial")
|
||||
if inertial is None:
|
||||
continue
|
||||
|
||||
if link_name == "arm":
|
||||
_set_mass(inertial, params.get("arm_mass"))
|
||||
_set_com(
|
||||
inertial,
|
||||
params.get("arm_com_x"),
|
||||
params.get("arm_com_y"),
|
||||
params.get("arm_com_z"),
|
||||
)
|
||||
|
||||
elif link_name == "pendulum":
|
||||
_set_mass(inertial, params.get("pendulum_mass"))
|
||||
_set_com(
|
||||
inertial,
|
||||
params.get("pendulum_com_x"),
|
||||
params.get("pendulum_com_y"),
|
||||
params.get("pendulum_com_z"),
|
||||
)
|
||||
_set_inertia(
|
||||
inertial,
|
||||
ixx=params.get("pendulum_ixx"),
|
||||
iyy=params.get("pendulum_iyy"),
|
||||
izz=params.get("pendulum_izz"),
|
||||
ixy=params.get("pendulum_ixy"),
|
||||
)
|
||||
patch_link_inertials(tree.getroot(), params)
|
||||
|
||||
# Write tuned URDF.
|
||||
tuned_urdf_name = urdf_path.stem + "_tuned" + urdf_path.suffix
|
||||
@@ -91,15 +63,47 @@ def export_tuned_files(
|
||||
# Point to the tuned URDF.
|
||||
tuned_cfg["urdf"] = tuned_urdf_name
|
||||
|
||||
# Update actuator parameters.
|
||||
# Update actuator parameters — full asymmetric motor model.
|
||||
if tuned_cfg.get("actuators") and len(tuned_cfg["actuators"]) > 0:
|
||||
act = tuned_cfg["actuators"][0]
|
||||
if "actuator_gear" in params:
|
||||
act["gear"] = round(params["actuator_gear"], 6)
|
||||
if "actuator_filter_tau" in params:
|
||||
act["filter_tau"] = round(params["actuator_filter_tau"], 6)
|
||||
if "motor_damping" in params:
|
||||
act["damping"] = round(params["motor_damping"], 6)
|
||||
if motor_params:
|
||||
# Asymmetric gear, damping, deadzone, frictionloss as [pos, neg].
|
||||
gear_pos = motor_params.get("actuator_gear_pos", 0.424)
|
||||
gear_neg = motor_params.get("actuator_gear_neg", 0.425)
|
||||
act["gear"] = [round(gear_pos, 6), round(gear_neg, 6)]
|
||||
|
||||
damp_pos = motor_params.get("motor_damping_pos", 0.002)
|
||||
damp_neg = motor_params.get("motor_damping_neg", 0.015)
|
||||
act["damping"] = [round(damp_pos, 6), round(damp_neg, 6)]
|
||||
|
||||
dz_pos = motor_params.get("motor_deadzone_pos", 0.141)
|
||||
dz_neg = motor_params.get("motor_deadzone_neg", 0.078)
|
||||
act["deadzone"] = [round(dz_pos, 6), round(dz_neg, 6)]
|
||||
|
||||
fl_pos = motor_params.get("motor_frictionloss_pos", 0.057)
|
||||
fl_neg = motor_params.get("motor_frictionloss_neg", 0.053)
|
||||
act["frictionloss"] = [round(fl_pos, 6), round(fl_neg, 6)]
|
||||
|
||||
if "actuator_filter_tau" in motor_params:
|
||||
act["filter_tau"] = round(motor_params["actuator_filter_tau"], 6)
|
||||
if "viscous_quadratic" in motor_params:
|
||||
act["viscous_quadratic"] = round(motor_params["viscous_quadratic"], 6)
|
||||
if "back_emf_gain" in motor_params:
|
||||
act["back_emf_gain"] = round(motor_params["back_emf_gain"], 6)
|
||||
else:
|
||||
if "actuator_gear" in params:
|
||||
act["gear"] = round(params["actuator_gear"], 6)
|
||||
if "actuator_filter_tau" in params:
|
||||
act["filter_tau"] = round(params["actuator_filter_tau"], 6)
|
||||
if "motor_damping" in params:
|
||||
act["damping"] = round(params["motor_damping"], 6)
|
||||
if "motor_deadzone" in params:
|
||||
act["deadzone"] = round(params["motor_deadzone"], 6)
|
||||
|
||||
# ctrl_range from ctrl_limit parameter.
|
||||
if "ctrl_limit" in params:
|
||||
lim = round(params["ctrl_limit"], 6)
|
||||
act["ctrl_range"] = [-lim, lim]
|
||||
|
||||
# Update joint overrides.
|
||||
if "joints" not in tuned_cfg:
|
||||
@@ -108,16 +112,23 @@ def export_tuned_files(
|
||||
if "motor_joint" not in tuned_cfg["joints"]:
|
||||
tuned_cfg["joints"]["motor_joint"] = {}
|
||||
mj = tuned_cfg["joints"]["motor_joint"]
|
||||
if "motor_armature" in params:
|
||||
mj["armature"] = round(params["motor_armature"], 6)
|
||||
if "motor_frictionloss" in params:
|
||||
mj["frictionloss"] = round(params["motor_frictionloss"], 6)
|
||||
if motor_params:
|
||||
mj["armature"] = round(motor_params.get("motor_armature", 0.00277), 6)
|
||||
# Frictionloss/damping = 0 in MuJoCo (motor model handles via qfrc_applied).
|
||||
mj["frictionloss"] = 0.0
|
||||
else:
|
||||
if "motor_armature" in params:
|
||||
mj["armature"] = round(params["motor_armature"], 6)
|
||||
if "motor_frictionloss" in params:
|
||||
mj["frictionloss"] = round(params["motor_frictionloss"], 6)
|
||||
|
||||
if "pendulum_joint" not in tuned_cfg["joints"]:
|
||||
tuned_cfg["joints"]["pendulum_joint"] = {}
|
||||
pj = tuned_cfg["joints"]["pendulum_joint"]
|
||||
if "pendulum_damping" in params:
|
||||
pj["damping"] = round(params["pendulum_damping"], 6)
|
||||
if "pendulum_frictionloss" in params:
|
||||
pj["frictionloss"] = round(params["pendulum_frictionloss"], 6)
|
||||
|
||||
# Write tuned robot.yaml.
|
||||
tuned_yaml_path = robot_path / "robot_tuned.yaml"
|
||||
@@ -136,51 +147,46 @@ def export_tuned_files(
|
||||
return tuned_urdf_path, tuned_yaml_path
|
||||
|
||||
|
||||
# ── XML helpers (shared with rollout.py) ─────────────────────────────
|
||||
# ── CLI entry point ──────────────────────────────────────────────────
|
||||
|
||||
|
||||
def _set_mass(inertial: ET.Element, mass: float | None) -> None:
|
||||
if mass is None:
|
||||
return
|
||||
mass_el = inertial.find("mass")
|
||||
if mass_el is not None:
|
||||
mass_el.set("value", str(mass))
|
||||
def main() -> None:
|
||||
import argparse
|
||||
import json
|
||||
|
||||
from src.sysid.rollout import LOCKED_MOTOR_PARAMS
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Export tuned URDF + robot.yaml from sysid results."
|
||||
)
|
||||
parser.add_argument(
|
||||
"--robot-path", type=str, default="assets/rotary_cartpole",
|
||||
help="Path to robot asset directory",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--result", type=str, default=None,
|
||||
help="Path to sysid_result.json (auto-detected if omitted)",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
robot_path = Path(args.robot_path).resolve()
|
||||
if args.result:
|
||||
result_path = Path(args.result)
|
||||
else:
|
||||
result_path = robot_path / "sysid_result.json"
|
||||
|
||||
if not result_path.exists():
|
||||
raise FileNotFoundError(f"Result file not found: {result_path}")
|
||||
|
||||
result = json.loads(result_path.read_text())
|
||||
|
||||
export_tuned_files(
|
||||
robot_path=args.robot_path,
|
||||
params=result["best_params"],
|
||||
motor_params=result.get("motor_params", dict(LOCKED_MOTOR_PARAMS)),
|
||||
)
|
||||
|
||||
|
||||
def _set_com(
|
||||
inertial: ET.Element,
|
||||
x: float | None,
|
||||
y: float | None,
|
||||
z: float | None,
|
||||
) -> None:
|
||||
origin = inertial.find("origin")
|
||||
if origin is None:
|
||||
return
|
||||
xyz = origin.get("xyz", "0 0 0").split()
|
||||
if x is not None:
|
||||
xyz[0] = str(x)
|
||||
if y is not None:
|
||||
xyz[1] = str(y)
|
||||
if z is not None:
|
||||
xyz[2] = str(z)
|
||||
origin.set("xyz", " ".join(xyz))
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
||||
|
||||
def _set_inertia(
|
||||
inertial: ET.Element,
|
||||
ixx: float | None = None,
|
||||
iyy: float | None = None,
|
||||
izz: float | None = None,
|
||||
ixy: float | None = None,
|
||||
iyz: float | None = None,
|
||||
ixz: float | None = None,
|
||||
) -> None:
|
||||
ine = inertial.find("inertia")
|
||||
if ine is None:
|
||||
return
|
||||
for attr, val in [
|
||||
("ixx", ixx), ("iyy", iyy), ("izz", izz),
|
||||
("ixy", ixy), ("iyz", iyz), ("ixz", ixz),
|
||||
]:
|
||||
if val is not None:
|
||||
ine.set(attr, str(val))
|
||||
|
||||
5
src/sysid/motor/__init__.py
Normal file
5
src/sysid/motor/__init__.py
Normal file
@@ -0,0 +1,5 @@
|
||||
"""Motor-only system identification subpackage.
|
||||
|
||||
Identifies JGB37-520 DC motor dynamics (no pendulum, no limits)
|
||||
so the MuJoCo simulation matches the real hardware response.
|
||||
"""
|
||||
364
src/sysid/motor/capture.py
Normal file
364
src/sysid/motor/capture.py
Normal file
@@ -0,0 +1,364 @@
|
||||
"""Capture a motor-only trajectory under random excitation (PRBS-style).
|
||||
|
||||
Connects to the ESP32 running the simplified sysid firmware (no pendulum,
|
||||
no limits), sends random PWM commands, and records motor angle + velocity
|
||||
at ~ 50 Hz.
|
||||
|
||||
Firmware serial protocol (115200 baud):
|
||||
Commands: M<speed>\\n R\\n S\\n G\\n H\\n P\\n
|
||||
State: S,<millis>,<encoder_count>,<rpm>,<applied_speed>,<enc_vel_cps>\\n
|
||||
|
||||
Usage:
|
||||
python -m src.sysid.motor.capture --duration 20
|
||||
python -m src.sysid.motor.capture --duration 30 --amplitude 200
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import math
|
||||
import random
|
||||
import threading
|
||||
import time
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
import numpy as np
|
||||
import structlog
|
||||
import yaml
|
||||
|
||||
log = structlog.get_logger()
|
||||
|
||||
# ── Default asset path ───────────────────────────────────────────────
|
||||
_DEFAULT_ASSET = "assets/motor"
|
||||
|
||||
|
||||
# ── Serial protocol helpers ──────────────────────────────────────────
|
||||
|
||||
|
||||
def _parse_state_line(line: str) -> dict[str, Any] | None:
|
||||
"""Parse an ``S,…`` state line from the motor sysid firmware.
|
||||
|
||||
Format: S,<millis>,<encoder_count>,<rpm>,<applied_speed>,<enc_vel_cps>
|
||||
"""
|
||||
if not line.startswith("S,"):
|
||||
return None
|
||||
parts = line.split(",")
|
||||
if len(parts) < 6:
|
||||
return None
|
||||
try:
|
||||
return {
|
||||
"timestamp_ms": int(parts[1]),
|
||||
"encoder_count": int(parts[2]),
|
||||
"rpm": float(parts[3]),
|
||||
"applied_speed": int(parts[4]),
|
||||
"enc_vel_cps": float(parts[5]),
|
||||
}
|
||||
except (ValueError, IndexError):
|
||||
return None
|
||||
|
||||
|
||||
# ── Background serial reader ─────────────────────────────────────────
|
||||
|
||||
|
||||
class _SerialReader:
|
||||
"""Minimal background reader for the ESP32 serial stream."""
|
||||
|
||||
def __init__(self, port: str, baud: int = 115200):
|
||||
import serial as _serial
|
||||
|
||||
self._serial_mod = _serial
|
||||
self.ser = _serial.Serial(port, baud, timeout=0.05)
|
||||
time.sleep(2) # Wait for ESP32 boot
|
||||
self.ser.reset_input_buffer()
|
||||
|
||||
self._latest: dict[str, Any] = {}
|
||||
self._seq: int = 0
|
||||
self._lock = threading.Lock()
|
||||
self._cond = threading.Condition(self._lock)
|
||||
self._running = True
|
||||
|
||||
self._thread = threading.Thread(target=self._reader_loop, daemon=True)
|
||||
self._thread.start()
|
||||
|
||||
def _reader_loop(self) -> None:
|
||||
while self._running:
|
||||
try:
|
||||
if self.ser.in_waiting:
|
||||
line = (
|
||||
self.ser.readline()
|
||||
.decode("utf-8", errors="ignore")
|
||||
.strip()
|
||||
)
|
||||
parsed = _parse_state_line(line)
|
||||
if parsed is not None:
|
||||
with self._cond:
|
||||
self._latest = parsed
|
||||
self._seq += 1
|
||||
self._cond.notify_all()
|
||||
elif line and not line.startswith("S,"):
|
||||
# Log non-state lines (READY, PONG, WARN, etc.)
|
||||
log.debug("serial_info", line=line)
|
||||
else:
|
||||
time.sleep(0.001)
|
||||
except (OSError, self._serial_mod.SerialException):
|
||||
log.critical("serial_lost")
|
||||
break
|
||||
|
||||
def send(self, cmd: str) -> None:
|
||||
try:
|
||||
self.ser.write(f"{cmd}\n".encode())
|
||||
except (OSError, self._serial_mod.SerialException):
|
||||
log.critical("serial_send_failed", cmd=cmd)
|
||||
|
||||
def read_blocking(self, timeout: float = 0.1) -> dict[str, Any]:
|
||||
"""Wait until a *new* state line arrives, then return it."""
|
||||
with self._cond:
|
||||
seq_before = self._seq
|
||||
if not self._cond.wait_for(
|
||||
lambda: self._seq > seq_before, timeout=timeout
|
||||
):
|
||||
return {}
|
||||
return dict(self._latest)
|
||||
|
||||
def close(self) -> None:
|
||||
self._running = False
|
||||
self.send("H")
|
||||
self.send("M0")
|
||||
time.sleep(0.1)
|
||||
self._thread.join(timeout=1.0)
|
||||
self.ser.close()
|
||||
|
||||
|
||||
# ── PRBS excitation signal ───────────────────────────────────────────
|
||||
|
||||
|
||||
class _PRBSExcitation:
|
||||
"""Random hold-value excitation with configurable amplitude and hold time."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
amplitude: int = 200,
|
||||
hold_min_ms: int = 50,
|
||||
hold_max_ms: int = 400,
|
||||
):
|
||||
self.amplitude = amplitude
|
||||
self.hold_min_ms = hold_min_ms
|
||||
self.hold_max_ms = hold_max_ms
|
||||
self._current: int = 0
|
||||
self._switch_time: float = 0.0
|
||||
self._new_value()
|
||||
|
||||
def _new_value(self) -> None:
|
||||
self._current = random.randint(-self.amplitude, self.amplitude)
|
||||
hold_ms = random.randint(self.hold_min_ms, self.hold_max_ms)
|
||||
self._switch_time = time.monotonic() + hold_ms / 1000.0
|
||||
|
||||
def __call__(self) -> int:
|
||||
if time.monotonic() >= self._switch_time:
|
||||
self._new_value()
|
||||
return self._current
|
||||
|
||||
|
||||
# ── Main capture loop ────────────────────────────────────────────────
|
||||
|
||||
|
||||
def capture(
|
||||
asset_path: str | Path = _DEFAULT_ASSET,
|
||||
port: str = "/dev/cu.usbserial-0001",
|
||||
baud: int = 115200,
|
||||
duration: float = 20.0,
|
||||
amplitude: int = 200,
|
||||
hold_min_ms: int = 50,
|
||||
hold_max_ms: int = 400,
|
||||
dt: float = 0.02,
|
||||
) -> Path:
|
||||
"""Run motor-only capture and return the path to the saved .npz file.
|
||||
|
||||
Stream-driven: blocks on each firmware state line (~50 Hz),
|
||||
sends next motor command immediately, records both.
|
||||
No time.sleep pacing — locked to firmware clock.
|
||||
|
||||
The recording stores:
|
||||
- time: wall-clock seconds since start
|
||||
- action: normalised action = applied_speed / 255
|
||||
- motor_angle: shaft angle in radians (from encoder)
|
||||
- motor_vel: shaft velocity in rad/s (from encoder velocity)
|
||||
"""
|
||||
asset_path = Path(asset_path).resolve()
|
||||
|
||||
# Load hardware config for encoder conversion.
|
||||
hw_yaml = asset_path / "hardware.yaml"
|
||||
if not hw_yaml.exists():
|
||||
raise FileNotFoundError(f"hardware.yaml not found in {asset_path}")
|
||||
raw_hw = yaml.safe_load(hw_yaml.read_text())
|
||||
ppr = raw_hw.get("encoder", {}).get("ppr", 11)
|
||||
gear_ratio = raw_hw.get("encoder", {}).get("gear_ratio", 30.0)
|
||||
counts_per_rev: float = ppr * gear_ratio * 4.0
|
||||
max_pwm = raw_hw.get("motor", {}).get("max_pwm", 255)
|
||||
|
||||
log.info(
|
||||
"hardware_config",
|
||||
ppr=ppr,
|
||||
gear_ratio=gear_ratio,
|
||||
counts_per_rev=counts_per_rev,
|
||||
max_pwm=max_pwm,
|
||||
)
|
||||
|
||||
# Connect.
|
||||
reader = _SerialReader(port, baud)
|
||||
excitation = _PRBSExcitation(amplitude, hold_min_ms, hold_max_ms)
|
||||
|
||||
# Prepare recording buffers.
|
||||
max_samples = int(duration / dt) + 500
|
||||
rec_time = np.zeros(max_samples, dtype=np.float64)
|
||||
rec_action = np.zeros(max_samples, dtype=np.float64)
|
||||
rec_motor_angle = np.zeros(max_samples, dtype=np.float64)
|
||||
rec_motor_vel = np.zeros(max_samples, dtype=np.float64)
|
||||
|
||||
# Reset encoder to zero.
|
||||
reader.send("R")
|
||||
time.sleep(0.1)
|
||||
|
||||
# Start streaming.
|
||||
reader.send("G")
|
||||
time.sleep(0.1)
|
||||
|
||||
log.info(
|
||||
"capture_starting",
|
||||
port=port,
|
||||
duration=duration,
|
||||
amplitude=amplitude,
|
||||
hold_range_ms=f"{hold_min_ms}–{hold_max_ms}",
|
||||
)
|
||||
|
||||
idx = 0
|
||||
pwm = 0
|
||||
last_esp_ms = -1
|
||||
t0 = time.monotonic()
|
||||
|
||||
try:
|
||||
while True:
|
||||
state = reader.read_blocking(timeout=0.1)
|
||||
if not state:
|
||||
continue
|
||||
|
||||
# Deduplicate by firmware timestamp.
|
||||
esp_ms = state.get("timestamp_ms", 0)
|
||||
if esp_ms == last_esp_ms:
|
||||
continue
|
||||
last_esp_ms = esp_ms
|
||||
|
||||
elapsed = time.monotonic() - t0
|
||||
if elapsed >= duration:
|
||||
break
|
||||
|
||||
# Generate next excitation PWM.
|
||||
pwm = excitation()
|
||||
|
||||
# Send command.
|
||||
reader.send(f"M{pwm}")
|
||||
|
||||
# Convert encoder to angle/velocity.
|
||||
enc = state.get("encoder_count", 0)
|
||||
motor_angle = enc / counts_per_rev * 2.0 * math.pi
|
||||
motor_vel = (
|
||||
state.get("enc_vel_cps", 0.0) / counts_per_rev * 2.0 * math.pi
|
||||
)
|
||||
|
||||
# Use firmware-applied speed for the action.
|
||||
applied = state.get("applied_speed", 0)
|
||||
action_norm = applied / 255.0
|
||||
|
||||
if idx < max_samples:
|
||||
rec_time[idx] = elapsed
|
||||
rec_action[idx] = action_norm
|
||||
rec_motor_angle[idx] = motor_angle
|
||||
rec_motor_vel[idx] = motor_vel
|
||||
idx += 1
|
||||
else:
|
||||
break
|
||||
|
||||
if idx % 50 == 0:
|
||||
log.info(
|
||||
"capture_progress",
|
||||
elapsed=f"{elapsed:.1f}/{duration:.0f}s",
|
||||
samples=idx,
|
||||
pwm=pwm,
|
||||
angle_deg=f"{math.degrees(motor_angle):.1f}",
|
||||
vel_rps=f"{motor_vel / (2 * math.pi):.1f}",
|
||||
)
|
||||
|
||||
finally:
|
||||
reader.send("M0")
|
||||
reader.close()
|
||||
|
||||
# Trim.
|
||||
rec_time = rec_time[:idx]
|
||||
rec_action = rec_action[:idx]
|
||||
rec_motor_angle = rec_motor_angle[:idx]
|
||||
rec_motor_vel = rec_motor_vel[:idx]
|
||||
|
||||
# Save.
|
||||
recordings_dir = asset_path / "recordings"
|
||||
recordings_dir.mkdir(exist_ok=True)
|
||||
stamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||
out_path = recordings_dir / f"motor_capture_{stamp}.npz"
|
||||
np.savez_compressed(
|
||||
out_path,
|
||||
time=rec_time,
|
||||
action=rec_action,
|
||||
motor_angle=rec_motor_angle,
|
||||
motor_vel=rec_motor_vel,
|
||||
)
|
||||
|
||||
log.info(
|
||||
"capture_saved",
|
||||
path=str(out_path),
|
||||
samples=idx,
|
||||
duration_actual=f"{rec_time[-1]:.2f}s" if idx > 0 else "0s",
|
||||
)
|
||||
return out_path
|
||||
|
||||
|
||||
# ── CLI ──────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
def main() -> None:
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Capture motor-only trajectory for system identification."
|
||||
)
|
||||
parser.add_argument(
|
||||
"--asset-path", type=str, default=_DEFAULT_ASSET,
|
||||
help="Path to motor asset directory (contains hardware.yaml)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--port", type=str, default="/dev/cu.usbserial-0001",
|
||||
help="Serial port for ESP32",
|
||||
)
|
||||
parser.add_argument("--baud", type=int, default=115200)
|
||||
parser.add_argument("--duration", type=float, default=20.0, help="Capture duration (s)")
|
||||
parser.add_argument(
|
||||
"--amplitude", type=int, default=200,
|
||||
help="Max PWM magnitude (0–255, firmware allows full range)",
|
||||
)
|
||||
parser.add_argument("--hold-min-ms", type=int, default=50, help="PRBS min hold (ms)")
|
||||
parser.add_argument("--hold-max-ms", type=int, default=400, help="PRBS max hold (ms)")
|
||||
parser.add_argument("--dt", type=float, default=0.02, help="Nominal sample period (s)")
|
||||
args = parser.parse_args()
|
||||
|
||||
capture(
|
||||
asset_path=args.asset_path,
|
||||
port=args.port,
|
||||
baud=args.baud,
|
||||
duration=args.duration,
|
||||
amplitude=args.amplitude,
|
||||
hold_min_ms=args.hold_min_ms,
|
||||
hold_max_ms=args.hold_max_ms,
|
||||
dt=args.dt,
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
186
src/sysid/motor/export.py
Normal file
186
src/sysid/motor/export.py
Normal file
@@ -0,0 +1,186 @@
|
||||
"""Export tuned motor parameters to MJCF and robot.yaml files.
|
||||
|
||||
Reads the original motor.xml and robot.yaml, patches with optimised
|
||||
parameter values, and writes motor_tuned.xml + robot_tuned.yaml.
|
||||
|
||||
Usage:
|
||||
python -m src.sysid.motor.export \
|
||||
--asset-path assets/motor \
|
||||
--result assets/motor/motor_sysid_result.json
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import copy
|
||||
import json
|
||||
import xml.etree.ElementTree as ET
|
||||
from pathlib import Path
|
||||
|
||||
import structlog
|
||||
import yaml
|
||||
|
||||
log = structlog.get_logger()
|
||||
|
||||
_DEFAULT_ASSET = "assets/motor"
|
||||
|
||||
|
||||
def export_tuned_files(
|
||||
asset_path: str | Path,
|
||||
params: dict[str, float],
|
||||
) -> tuple[Path, Path]:
|
||||
"""Write tuned MJCF and robot.yaml files.
|
||||
|
||||
Returns (tuned_mjcf_path, tuned_robot_yaml_path).
|
||||
"""
|
||||
asset_path = Path(asset_path).resolve()
|
||||
|
||||
robot_yaml_path = asset_path / "robot.yaml"
|
||||
robot_cfg = yaml.safe_load(robot_yaml_path.read_text())
|
||||
mjcf_path = asset_path / robot_cfg["mjcf"]
|
||||
|
||||
# ── Tune MJCF ────────────────────────────────────────────────
|
||||
tree = ET.parse(str(mjcf_path))
|
||||
root = tree.getroot()
|
||||
|
||||
# Actuator — use average gear for the MJCF model.
|
||||
gear_pos = params.get("actuator_gear_pos", params.get("actuator_gear"))
|
||||
gear_neg = params.get("actuator_gear_neg", params.get("actuator_gear"))
|
||||
gear_avg = None
|
||||
if gear_pos is not None and gear_neg is not None:
|
||||
gear_avg = (gear_pos + gear_neg) / 2.0
|
||||
elif gear_pos is not None:
|
||||
gear_avg = gear_pos
|
||||
filter_tau = params.get("actuator_filter_tau")
|
||||
for act_el in root.iter("general"):
|
||||
if act_el.get("name") == "motor":
|
||||
if gear_avg is not None:
|
||||
act_el.set("gear", str(gear_avg))
|
||||
if filter_tau is not None:
|
||||
if filter_tau > 0:
|
||||
act_el.set("dyntype", "filter")
|
||||
act_el.set("dynprm", str(filter_tau))
|
||||
else:
|
||||
act_el.set("dyntype", "none")
|
||||
|
||||
# Joint — average damping & friction for MJCF (asymmetry in runtime).
|
||||
fl_pos = params.get("motor_frictionloss_pos", params.get("motor_frictionloss"))
|
||||
fl_neg = params.get("motor_frictionloss_neg", params.get("motor_frictionloss"))
|
||||
fl_avg = None
|
||||
if fl_pos is not None and fl_neg is not None:
|
||||
fl_avg = (fl_pos + fl_neg) / 2.0
|
||||
elif fl_pos is not None:
|
||||
fl_avg = fl_pos
|
||||
damp_pos = params.get("motor_damping_pos", params.get("motor_damping"))
|
||||
damp_neg = params.get("motor_damping_neg", params.get("motor_damping"))
|
||||
damp_avg = None
|
||||
if damp_pos is not None and damp_neg is not None:
|
||||
damp_avg = (damp_pos + damp_neg) / 2.0
|
||||
elif damp_pos is not None:
|
||||
damp_avg = damp_pos
|
||||
for jnt in root.iter("joint"):
|
||||
if jnt.get("name") == "motor_joint":
|
||||
if damp_avg is not None:
|
||||
jnt.set("damping", str(damp_avg))
|
||||
if "motor_armature" in params:
|
||||
jnt.set("armature", str(params["motor_armature"]))
|
||||
if fl_avg is not None:
|
||||
jnt.set("frictionloss", str(fl_avg))
|
||||
|
||||
# Rotor mass.
|
||||
if "rotor_mass" in params:
|
||||
for geom in root.iter("geom"):
|
||||
if geom.get("name") == "rotor_disk":
|
||||
geom.set("mass", str(params["rotor_mass"]))
|
||||
|
||||
# Write tuned MJCF.
|
||||
tuned_mjcf_name = mjcf_path.stem + "_tuned" + mjcf_path.suffix
|
||||
tuned_mjcf_path = asset_path / tuned_mjcf_name
|
||||
ET.indent(tree, space=" ")
|
||||
tree.write(str(tuned_mjcf_path), xml_declaration=True, encoding="unicode")
|
||||
log.info("tuned_mjcf_written", path=str(tuned_mjcf_path))
|
||||
|
||||
# ── Tune robot.yaml ──────────────────────────────────────────
|
||||
tuned_cfg = copy.deepcopy(robot_cfg)
|
||||
tuned_cfg["mjcf"] = tuned_mjcf_name
|
||||
|
||||
if tuned_cfg.get("actuators") and len(tuned_cfg["actuators"]) > 0:
|
||||
act = tuned_cfg["actuators"][0]
|
||||
if gear_avg is not None:
|
||||
act["gear"] = round(gear_avg, 6)
|
||||
if "actuator_filter_tau" in params:
|
||||
act["filter_tau"] = round(params["actuator_filter_tau"], 6)
|
||||
if "motor_damping" in params:
|
||||
act["damping"] = round(params["motor_damping"], 6)
|
||||
|
||||
if "joints" not in tuned_cfg:
|
||||
tuned_cfg["joints"] = {}
|
||||
if "motor_joint" not in tuned_cfg["joints"]:
|
||||
tuned_cfg["joints"]["motor_joint"] = {}
|
||||
mj = tuned_cfg["joints"]["motor_joint"]
|
||||
if "motor_armature" in params:
|
||||
mj["armature"] = round(params["motor_armature"], 6)
|
||||
if fl_avg is not None:
|
||||
mj["frictionloss"] = round(fl_avg, 6)
|
||||
|
||||
# Asymmetric / hardware-realism / nonlinear parameters.
|
||||
realism = {}
|
||||
for key in [
|
||||
"actuator_gear_pos", "actuator_gear_neg",
|
||||
"motor_damping_pos", "motor_damping_neg",
|
||||
"motor_frictionloss_pos", "motor_frictionloss_neg",
|
||||
"motor_deadzone_pos", "motor_deadzone_neg",
|
||||
"action_bias",
|
||||
"viscous_quadratic", "back_emf_gain",
|
||||
"stribeck_friction_boost", "stribeck_vel",
|
||||
"gearbox_backlash",
|
||||
]:
|
||||
if key in params:
|
||||
realism[key] = round(params[key], 6)
|
||||
if realism:
|
||||
tuned_cfg["hardware_realism"] = realism
|
||||
|
||||
tuned_yaml_path = asset_path / "robot_tuned.yaml"
|
||||
header = (
|
||||
"# Tuned motor config — generated by src.sysid.motor.optimize\n"
|
||||
"# Original: robot.yaml\n\n"
|
||||
)
|
||||
tuned_yaml_path.write_text(
|
||||
header + yaml.dump(tuned_cfg, default_flow_style=False, sort_keys=False)
|
||||
)
|
||||
log.info("tuned_robot_yaml_written", path=str(tuned_yaml_path))
|
||||
|
||||
return tuned_mjcf_path, tuned_yaml_path
|
||||
|
||||
|
||||
# ── CLI ──────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
def main() -> None:
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Export tuned motor parameters to MJCF + robot.yaml."
|
||||
)
|
||||
parser.add_argument("--asset-path", type=str, default=_DEFAULT_ASSET)
|
||||
parser.add_argument(
|
||||
"--result", type=str, default=None,
|
||||
help="Path to motor_sysid_result.json (auto-detected if omitted)",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
asset_path = Path(args.asset_path).resolve()
|
||||
if args.result:
|
||||
result_path = Path(args.result)
|
||||
else:
|
||||
result_path = asset_path / "motor_sysid_result.json"
|
||||
|
||||
if not result_path.exists():
|
||||
raise FileNotFoundError(f"Result file not found: {result_path}")
|
||||
|
||||
result = json.loads(result_path.read_text())
|
||||
params = result["best_params"]
|
||||
|
||||
export_tuned_files(asset_path=args.asset_path, params=params)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
367
src/sysid/motor/optimize.py
Normal file
367
src/sysid/motor/optimize.py
Normal file
@@ -0,0 +1,367 @@
|
||||
"""CMA-ES optimiser — fit motor simulation parameters to a real recording.
|
||||
|
||||
Motor-only version: minimises trajectory-matching cost between MuJoCo
|
||||
rollout and recorded motor angle + velocity.
|
||||
|
||||
Usage:
|
||||
python -m src.sysid.motor.optimize \
|
||||
--recording assets/motor/recordings/motor_capture_YYYYMMDD_HHMMSS.npz
|
||||
|
||||
# Quick test:
|
||||
python -m src.sysid.motor.optimize --recording <file>.npz \
|
||||
--max-generations 20 --population-size 10
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import time
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
|
||||
import numpy as np
|
||||
import structlog
|
||||
|
||||
from src.sysid.motor.preprocess import recompute_velocity
|
||||
from src.sysid.motor.rollout import (
|
||||
MOTOR_PARAMS,
|
||||
ParamSpec,
|
||||
bounds_arrays,
|
||||
defaults_vector,
|
||||
params_to_dict,
|
||||
rollout,
|
||||
windowed_rollout,
|
||||
)
|
||||
|
||||
log = structlog.get_logger()
|
||||
|
||||
_DEFAULT_ASSET = "assets/motor"
|
||||
|
||||
|
||||
# ── Cost function ────────────────────────────────────────────────────
|
||||
|
||||
|
||||
def _compute_trajectory_cost(
|
||||
sim: dict[str, np.ndarray],
|
||||
recording: dict[str, np.ndarray],
|
||||
pos_weight: float = 1.0,
|
||||
vel_weight: float = 0.5,
|
||||
acc_weight: float = 0.0,
|
||||
dt: float = 0.02,
|
||||
) -> float:
|
||||
"""Weighted MSE between simulated and real motor trajectories.
|
||||
|
||||
Motor-only: angle, velocity, and optionally acceleration.
|
||||
Acceleration is computed as finite-difference of velocity.
|
||||
"""
|
||||
angle_err = sim["motor_angle"] - recording["motor_angle"]
|
||||
vel_err = sim["motor_vel"] - recording["motor_vel"]
|
||||
|
||||
# Reject NaN results (unstable simulation).
|
||||
if np.any(~np.isfinite(angle_err)) or np.any(~np.isfinite(vel_err)):
|
||||
return 1e6
|
||||
|
||||
cost = float(
|
||||
pos_weight * np.mean(angle_err**2)
|
||||
+ vel_weight * np.mean(vel_err**2)
|
||||
)
|
||||
|
||||
# Optional acceleration term — penalises wrong dynamics (d(vel)/dt).
|
||||
if acc_weight > 0 and len(vel_err) > 2:
|
||||
sim_acc = np.diff(sim["motor_vel"]) / dt
|
||||
real_acc = np.diff(recording["motor_vel"]) / dt
|
||||
acc_err = sim_acc - real_acc
|
||||
if np.any(~np.isfinite(acc_err)):
|
||||
return 1e6
|
||||
# Normalise by typical acceleration scale (~50 rad/s²) to keep
|
||||
# the weight intuitive relative to vel/pos terms.
|
||||
cost += acc_weight * np.mean(acc_err**2) / (50.0**2)
|
||||
|
||||
return cost
|
||||
|
||||
|
||||
def cost_function(
|
||||
params_vec: np.ndarray,
|
||||
recording: dict[str, np.ndarray],
|
||||
asset_path: Path,
|
||||
specs: list[ParamSpec],
|
||||
sim_dt: float = 0.002,
|
||||
substeps: int = 10,
|
||||
pos_weight: float = 1.0,
|
||||
vel_weight: float = 0.5,
|
||||
acc_weight: float = 0.0,
|
||||
window_duration: float = 0.5,
|
||||
) -> float:
|
||||
"""Compute trajectory-matching cost for a candidate parameter vector.
|
||||
|
||||
Uses multiple-shooting (windowed rollout) by default.
|
||||
"""
|
||||
params = params_to_dict(params_vec, specs)
|
||||
|
||||
try:
|
||||
if window_duration > 0:
|
||||
sim = windowed_rollout(
|
||||
asset_path=asset_path,
|
||||
params=params,
|
||||
recording=recording,
|
||||
window_duration=window_duration,
|
||||
sim_dt=sim_dt,
|
||||
substeps=substeps,
|
||||
)
|
||||
else:
|
||||
sim = rollout(
|
||||
asset_path=asset_path,
|
||||
params=params,
|
||||
actions=recording["action"],
|
||||
sim_dt=sim_dt,
|
||||
substeps=substeps,
|
||||
)
|
||||
except Exception as exc:
|
||||
log.warning("rollout_failed", error=str(exc))
|
||||
return 1e6
|
||||
|
||||
return _compute_trajectory_cost(
|
||||
sim, recording, pos_weight, vel_weight, acc_weight,
|
||||
dt=np.mean(np.diff(recording["time"])) if len(recording["time"]) > 1 else 0.02,
|
||||
)
|
||||
|
||||
|
||||
# ── CMA-ES optimisation loop ────────────────────────────────────────
|
||||
|
||||
|
||||
def optimize(
|
||||
asset_path: str | Path = _DEFAULT_ASSET,
|
||||
recording_path: str | Path = "",
|
||||
specs: list[ParamSpec] | None = None,
|
||||
sigma0: float = 0.3,
|
||||
population_size: int = 30,
|
||||
max_generations: int = 300,
|
||||
sim_dt: float = 0.002,
|
||||
substeps: int = 10,
|
||||
pos_weight: float = 1.0,
|
||||
vel_weight: float = 0.5,
|
||||
acc_weight: float = 0.0,
|
||||
window_duration: float = 0.5,
|
||||
seed: int = 42,
|
||||
preprocess_vel: bool = True,
|
||||
sg_window: int = 7,
|
||||
sg_polyorder: int = 3,
|
||||
) -> dict:
|
||||
"""Run CMA-ES optimisation and return results dict."""
|
||||
from cmaes import CMA
|
||||
|
||||
asset_path = Path(asset_path).resolve()
|
||||
recording_path = Path(recording_path).resolve()
|
||||
|
||||
if specs is None:
|
||||
specs = MOTOR_PARAMS
|
||||
|
||||
# Load recording.
|
||||
recording = dict(np.load(recording_path))
|
||||
n_samples = len(recording["time"])
|
||||
duration = recording["time"][-1] - recording["time"][0]
|
||||
n_windows = max(1, int(duration / window_duration)) if window_duration > 0 else 1
|
||||
log.info(
|
||||
"recording_loaded",
|
||||
path=str(recording_path),
|
||||
samples=n_samples,
|
||||
duration=f"{duration:.1f}s",
|
||||
n_windows=n_windows,
|
||||
)
|
||||
|
||||
# Preprocess velocity: replace noisy firmware finite-difference with
|
||||
# smooth Savitzky-Golay derivative of the angle signal.
|
||||
if preprocess_vel:
|
||||
recording = recompute_velocity(
|
||||
recording,
|
||||
window_length=sg_window,
|
||||
polyorder=sg_polyorder,
|
||||
)
|
||||
|
||||
# Normalise to [0, 1] for CMA-ES.
|
||||
lo, hi = bounds_arrays(specs)
|
||||
x0 = defaults_vector(specs)
|
||||
span = hi - lo
|
||||
span[span == 0] = 1.0
|
||||
|
||||
def to_normed(x: np.ndarray) -> np.ndarray:
|
||||
return (x - lo) / span
|
||||
|
||||
def from_normed(x_n: np.ndarray) -> np.ndarray:
|
||||
return x_n * span + lo
|
||||
|
||||
x0_normed = to_normed(x0)
|
||||
bounds_normed = np.column_stack(
|
||||
[np.zeros(len(specs)), np.ones(len(specs))]
|
||||
)
|
||||
|
||||
optimizer = CMA(
|
||||
mean=x0_normed,
|
||||
sigma=sigma0,
|
||||
bounds=bounds_normed,
|
||||
population_size=population_size,
|
||||
seed=seed,
|
||||
)
|
||||
|
||||
best_cost = float("inf")
|
||||
best_params_vec = x0.copy()
|
||||
history: list[tuple[int, float]] = []
|
||||
|
||||
log.info(
|
||||
"cmaes_starting",
|
||||
n_params=len(specs),
|
||||
population=population_size,
|
||||
max_gens=max_generations,
|
||||
sigma0=sigma0,
|
||||
)
|
||||
|
||||
t0 = time.monotonic()
|
||||
|
||||
for gen in range(max_generations):
|
||||
solutions = []
|
||||
for _ in range(optimizer.population_size):
|
||||
x_normed = optimizer.ask()
|
||||
x_natural = from_normed(x_normed)
|
||||
x_natural = np.clip(x_natural, lo, hi)
|
||||
|
||||
c = cost_function(
|
||||
x_natural,
|
||||
recording,
|
||||
asset_path,
|
||||
specs,
|
||||
sim_dt=sim_dt,
|
||||
substeps=substeps,
|
||||
pos_weight=pos_weight,
|
||||
vel_weight=vel_weight,
|
||||
acc_weight=acc_weight,
|
||||
window_duration=window_duration,
|
||||
)
|
||||
solutions.append((x_normed, c))
|
||||
|
||||
if c < best_cost:
|
||||
best_cost = c
|
||||
best_params_vec = x_natural.copy()
|
||||
|
||||
optimizer.tell(solutions)
|
||||
history.append((gen, best_cost))
|
||||
|
||||
elapsed = time.monotonic() - t0
|
||||
if gen % 5 == 0 or gen == max_generations - 1:
|
||||
log.info(
|
||||
"cmaes_generation",
|
||||
gen=gen,
|
||||
best_cost=f"{best_cost:.6f}",
|
||||
elapsed=f"{elapsed:.1f}s",
|
||||
gen_best=f"{min(c for _, c in solutions):.6f}",
|
||||
)
|
||||
|
||||
total_time = time.monotonic() - t0
|
||||
best_params = params_to_dict(best_params_vec, specs)
|
||||
|
||||
log.info(
|
||||
"cmaes_finished",
|
||||
best_cost=f"{best_cost:.6f}",
|
||||
total_time=f"{total_time:.1f}s",
|
||||
evaluations=max_generations * population_size,
|
||||
)
|
||||
|
||||
# Log parameter comparison.
|
||||
defaults = params_to_dict(defaults_vector(specs), specs)
|
||||
for name in best_params:
|
||||
d = defaults[name]
|
||||
b = best_params[name]
|
||||
change_pct = ((b - d) / abs(d) * 100) if abs(d) > 1e-12 else 0.0
|
||||
log.info(
|
||||
"param_result",
|
||||
name=name,
|
||||
default=f"{d:.6g}",
|
||||
tuned=f"{b:.6g}",
|
||||
change=f"{change_pct:+.1f}%",
|
||||
)
|
||||
|
||||
return {
|
||||
"best_params": best_params,
|
||||
"best_cost": best_cost,
|
||||
"history": history,
|
||||
"recording": str(recording_path),
|
||||
"param_names": [s.name for s in specs],
|
||||
"defaults": {s.name: s.default for s in specs},
|
||||
"timestamp": datetime.now().isoformat(),
|
||||
}
|
||||
|
||||
|
||||
# ── CLI ──────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
def main() -> None:
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Fit motor simulation parameters to a real recording (CMA-ES)."
|
||||
)
|
||||
parser.add_argument(
|
||||
"--asset-path", type=str, default=_DEFAULT_ASSET,
|
||||
help="Path to motor asset directory",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--recording", type=str, required=True,
|
||||
help="Path to .npz recording file",
|
||||
)
|
||||
parser.add_argument("--sigma0", type=float, default=0.3)
|
||||
parser.add_argument("--population-size", type=int, default=30)
|
||||
parser.add_argument("--max-generations", type=int, default=300)
|
||||
parser.add_argument("--sim-dt", type=float, default=0.002)
|
||||
parser.add_argument("--substeps", type=int, default=10)
|
||||
parser.add_argument("--pos-weight", type=float, default=1.0)
|
||||
parser.add_argument("--vel-weight", type=float, default=0.5)
|
||||
parser.add_argument("--acc-weight", type=float, default=0.0,
|
||||
help="Weight for acceleration matching (0=off, try 0.1-0.5)")
|
||||
parser.add_argument("--window-duration", type=float, default=0.5)
|
||||
parser.add_argument("--seed", type=int, default=42)
|
||||
parser.add_argument(
|
||||
"--no-preprocess-vel", action="store_true",
|
||||
help="Disable Savitzky-Golay velocity preprocessing",
|
||||
)
|
||||
parser.add_argument("--sg-window", type=int, default=7,
|
||||
help="Savitzky-Golay window length (odd, default 7 = 140ms)")
|
||||
parser.add_argument("--sg-polyorder", type=int, default=3,
|
||||
help="Savitzky-Golay polynomial order (default 3 = cubic)")
|
||||
args = parser.parse_args()
|
||||
|
||||
result = optimize(
|
||||
asset_path=args.asset_path,
|
||||
recording_path=args.recording,
|
||||
sigma0=args.sigma0,
|
||||
population_size=args.population_size,
|
||||
max_generations=args.max_generations,
|
||||
sim_dt=args.sim_dt,
|
||||
substeps=args.substeps,
|
||||
pos_weight=args.pos_weight,
|
||||
vel_weight=args.vel_weight,
|
||||
acc_weight=args.acc_weight,
|
||||
window_duration=args.window_duration,
|
||||
seed=args.seed,
|
||||
preprocess_vel=not args.no_preprocess_vel,
|
||||
sg_window=args.sg_window,
|
||||
sg_polyorder=args.sg_polyorder,
|
||||
)
|
||||
|
||||
# Save results JSON.
|
||||
asset_path = Path(args.asset_path).resolve()
|
||||
result_path = asset_path / "motor_sysid_result.json"
|
||||
result_json = {k: v for k, v in result.items() if k != "history"}
|
||||
result_json["history_summary"] = {
|
||||
"first_cost": result["history"][0][1] if result["history"] else None,
|
||||
"final_cost": result["history"][-1][1] if result["history"] else None,
|
||||
"generations": len(result["history"]),
|
||||
}
|
||||
result_path.write_text(json.dumps(result_json, indent=2, default=str))
|
||||
log.info("results_saved", path=str(result_path))
|
||||
|
||||
# Export tuned files.
|
||||
from src.sysid.motor.export import export_tuned_files
|
||||
|
||||
export_tuned_files(asset_path=args.asset_path, params=result["best_params"])
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
114
src/sysid/motor/preprocess.py
Normal file
114
src/sysid/motor/preprocess.py
Normal file
@@ -0,0 +1,114 @@
|
||||
"""Recording preprocessing — clean velocity estimation from angle data.
|
||||
|
||||
The ESP32 firmware computes velocity as a raw finite-difference of encoder
|
||||
counts at 50 Hz. With a 1320 CPR encoder that gives ~0.24 rad/s of
|
||||
quantisation noise per count. This module replaces the noisy firmware
|
||||
velocity with a smooth differentiation of the (much cleaner) angle signal.
|
||||
|
||||
Method: Savitzky-Golay filter applied to the angle signal, then
|
||||
differentiated analytically. Zero phase lag, preserves transients well.
|
||||
|
||||
This is standard practice in robotics sysid — see e.g. MATLAB System ID
|
||||
Toolbox, Drake's trajectory processing, or ETH's ANYmal sysid pipeline.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import numpy as np
|
||||
from scipy.signal import savgol_filter
|
||||
|
||||
import structlog
|
||||
|
||||
log = structlog.get_logger()
|
||||
|
||||
|
||||
def recompute_velocity(
|
||||
recording: dict[str, np.ndarray],
|
||||
window_length: int = 7,
|
||||
polyorder: int = 3,
|
||||
deriv: int = 1,
|
||||
) -> dict[str, np.ndarray]:
|
||||
"""Recompute motor_vel from motor_angle using Savitzky-Golay differentiation.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
recording : dict with at least 'time', 'motor_angle', 'motor_vel' keys.
|
||||
window_length : SG filter window (must be odd, > polyorder).
|
||||
7 samples at 50 Hz = 140ms window — good balance of smoothness
|
||||
and responsiveness. Captures dynamics up to ~7 Hz.
|
||||
polyorder : Polynomial order for the SG filter (3 = cubic).
|
||||
deriv : Derivative order (1 = first derivative = velocity).
|
||||
|
||||
Returns
|
||||
-------
|
||||
New recording dict with 'motor_vel' replaced and 'motor_vel_raw' added.
|
||||
"""
|
||||
rec = dict(recording) # shallow copy
|
||||
|
||||
times = rec["time"]
|
||||
angles = rec["motor_angle"]
|
||||
dt = np.mean(np.diff(times))
|
||||
|
||||
# Keep original for diagnostics.
|
||||
rec["motor_vel_raw"] = rec["motor_vel"].copy()
|
||||
|
||||
# Savitzky-Golay derivative: fits a polynomial to each window,
|
||||
# then takes the analytical derivative → smooth, zero phase lag.
|
||||
vel_sg = savgol_filter(
|
||||
angles,
|
||||
window_length=window_length,
|
||||
polyorder=polyorder,
|
||||
deriv=deriv,
|
||||
delta=dt,
|
||||
)
|
||||
|
||||
# Compute stats for logging.
|
||||
raw_vel = rec["motor_vel_raw"]
|
||||
noise_estimate = np.std(raw_vel - vel_sg)
|
||||
max_diff = np.max(np.abs(raw_vel - vel_sg))
|
||||
|
||||
log.info(
|
||||
"velocity_recomputed",
|
||||
method="savgol",
|
||||
window=window_length,
|
||||
polyorder=polyorder,
|
||||
dt=f"{dt*1000:.1f}ms",
|
||||
noise_std=f"{noise_estimate:.3f} rad/s",
|
||||
max_diff=f"{max_diff:.3f} rad/s",
|
||||
raw_rms=f"{np.sqrt(np.mean(raw_vel**2)):.3f}",
|
||||
sg_rms=f"{np.sqrt(np.mean(vel_sg**2)):.3f}",
|
||||
)
|
||||
|
||||
rec["motor_vel"] = vel_sg
|
||||
return rec
|
||||
|
||||
|
||||
def smooth_velocity(
|
||||
recording: dict[str, np.ndarray],
|
||||
cutoff_hz: float = 10.0,
|
||||
) -> dict[str, np.ndarray]:
|
||||
"""Alternative: apply zero-phase Butterworth low-pass to motor_vel.
|
||||
|
||||
Simpler than SG derivative but introduces slight edge effects.
|
||||
"""
|
||||
from scipy.signal import butter, filtfilt
|
||||
|
||||
rec = dict(recording)
|
||||
rec["motor_vel_raw"] = rec["motor_vel"].copy()
|
||||
|
||||
dt = np.mean(np.diff(rec["time"]))
|
||||
fs = 1.0 / dt
|
||||
nyq = fs / 2.0
|
||||
norm_cutoff = min(cutoff_hz / nyq, 0.99)
|
||||
|
||||
b, a = butter(2, norm_cutoff, btype="low")
|
||||
rec["motor_vel"] = filtfilt(b, a, rec["motor_vel"])
|
||||
|
||||
log.info(
|
||||
"velocity_smoothed",
|
||||
method="butterworth",
|
||||
cutoff_hz=cutoff_hz,
|
||||
fs=fs,
|
||||
)
|
||||
|
||||
return rec
|
||||
460
src/sysid/motor/rollout.py
Normal file
460
src/sysid/motor/rollout.py
Normal file
@@ -0,0 +1,460 @@
|
||||
"""Deterministic simulation replay — roll out recorded actions in MuJoCo.
|
||||
|
||||
Motor-only version: single hinge joint, no pendulum.
|
||||
Given a parameter vector and recorded actions, builds a MuJoCo model
|
||||
with overridden dynamics, replays the actions, and returns the simulated
|
||||
motor angle + velocity for comparison with the real recording.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import dataclasses
|
||||
import xml.etree.ElementTree as ET
|
||||
from pathlib import Path
|
||||
|
||||
import mujoco
|
||||
import numpy as np
|
||||
import yaml
|
||||
|
||||
|
||||
# ── Tunable parameter specification ──────────────────────────────────
|
||||
|
||||
|
||||
@dataclasses.dataclass
|
||||
class ParamSpec:
|
||||
"""Specification for a single tunable parameter."""
|
||||
|
||||
name: str
|
||||
default: float
|
||||
lower: float
|
||||
upper: float
|
||||
log_scale: bool = False
|
||||
|
||||
|
||||
# Motor-only parameters to identify.
|
||||
# These capture the full transfer function: PWM → shaft angle/velocity.
|
||||
#
|
||||
# Asymmetric parameters (pos/neg suffix) capture real-world differences
|
||||
# between CW and CCW rotation caused by gear mesh, brush contact,
|
||||
# and H-bridge asymmetry.
|
||||
MOTOR_PARAMS: list[ParamSpec] = [
|
||||
# ── Actuator ─────────────────────────────────────────────────
|
||||
# gear_pos/neg: effective torque per unit ctrl, split by direction.
|
||||
# Real motors + L298N often have different drive strength per direction.
|
||||
ParamSpec("actuator_gear_pos", 0.064, 0.005, 0.5, log_scale=True),
|
||||
ParamSpec("actuator_gear_neg", 0.064, 0.005, 0.5, log_scale=True),
|
||||
# filter_tau: first-order electrical/driver time constant (s).
|
||||
# Lower bound 1ms — L298N PWM switching is very fast.
|
||||
ParamSpec("actuator_filter_tau", 0.03, 0.001, 0.20),
|
||||
# ── Joint dynamics ───────────────────────────────────────────
|
||||
# damping_pos/neg: viscous friction (back-EMF), split by direction.
|
||||
ParamSpec("motor_damping_pos", 0.003, 1e-5, 0.1, log_scale=True),
|
||||
ParamSpec("motor_damping_neg", 0.003, 1e-5, 0.1, log_scale=True),
|
||||
# armature: reflected rotor inertia (kg·m²).
|
||||
ParamSpec("motor_armature", 0.0001, 1e-6, 0.01, log_scale=True),
|
||||
# frictionloss_pos/neg: Coulomb friction, split by velocity direction.
|
||||
ParamSpec("motor_frictionloss_pos", 0.03, 0.001, 0.2, log_scale=True),
|
||||
ParamSpec("motor_frictionloss_neg", 0.03, 0.001, 0.2, log_scale=True),
|
||||
# ── Nonlinear dynamics ───────────────────────────────────────
|
||||
# viscous_quadratic: velocity-squared drag term (N·m·s²/rad²).
|
||||
# Captures nonlinear friction that increases at high speed (air drag,
|
||||
# grease viscosity, etc.). Opposes motion.
|
||||
ParamSpec("viscous_quadratic", 0.0, 0.0, 0.005),
|
||||
# back_emf_gain: torque reduction proportional to |vel × ctrl|.
|
||||
# Models the back-EMF effect: at high speed the motor produces less
|
||||
# torque because the voltage drop across the armature is smaller.
|
||||
ParamSpec("back_emf_gain", 0.0, 0.0, 0.05),
|
||||
# stribeck_vel: characteristic velocity below which Coulomb friction
|
||||
# is boosted (Stribeck effect). 0 = standard Coulomb only.
|
||||
ParamSpec("stribeck_friction_boost", 0.0, 0.0, 0.15),
|
||||
ParamSpec("stribeck_vel", 2.0, 0.1, 8.0),
|
||||
# ── Rotor load ───────────────────────────────────────────────
|
||||
ParamSpec("rotor_mass", 0.012, 0.002, 0.05, log_scale=True),
|
||||
# ── Hardware realism ─────────────────────────────────────────
|
||||
# deadzone_pos/neg: minimum |action| per direction.
|
||||
ParamSpec("motor_deadzone_pos", 0.08, 0.0, 0.30),
|
||||
ParamSpec("motor_deadzone_neg", 0.08, 0.0, 0.30),
|
||||
# action_bias: constant offset added to ctrl (H-bridge asymmetry).
|
||||
ParamSpec("action_bias", 0.0, -0.10, 0.10),
|
||||
# ── Gearbox backlash ─────────────────────────────────────────
|
||||
# backlash_halfwidth: half the angular deadband (rad) in the gearbox.
|
||||
# When the motor reverses, the shaft doesn't move until the backlash
|
||||
# gap is taken up. Typical for 30:1 plastic/metal spur gears.
|
||||
ParamSpec("gearbox_backlash", 0.0, 0.0, 0.15),
|
||||
]
|
||||
|
||||
|
||||
def params_to_dict(
|
||||
values: np.ndarray, specs: list[ParamSpec] | None = None
|
||||
) -> dict[str, float]:
|
||||
if specs is None:
|
||||
specs = MOTOR_PARAMS
|
||||
return {s.name: float(values[i]) for i, s in enumerate(specs)}
|
||||
|
||||
|
||||
def defaults_vector(specs: list[ParamSpec] | None = None) -> np.ndarray:
|
||||
if specs is None:
|
||||
specs = MOTOR_PARAMS
|
||||
return np.array([s.default for s in specs], dtype=np.float64)
|
||||
|
||||
|
||||
def bounds_arrays(
|
||||
specs: list[ParamSpec] | None = None,
|
||||
) -> tuple[np.ndarray, np.ndarray]:
|
||||
if specs is None:
|
||||
specs = MOTOR_PARAMS
|
||||
lo = np.array([s.lower for s in specs], dtype=np.float64)
|
||||
hi = np.array([s.upper for s in specs], dtype=np.float64)
|
||||
return lo, hi
|
||||
|
||||
|
||||
# ── MuJoCo model building with parameter overrides ──────────────────
|
||||
|
||||
|
||||
def _build_model(
|
||||
asset_path: Path,
|
||||
params: dict[str, float],
|
||||
) -> mujoco.MjModel:
|
||||
"""Build a MuJoCo model from motor.xml with parameter overrides.
|
||||
|
||||
Parses the MJCF, patches actuator/joint/body parameters, reloads.
|
||||
"""
|
||||
asset_path = Path(asset_path).resolve()
|
||||
robot_cfg = yaml.safe_load((asset_path / "robot.yaml").read_text())
|
||||
mjcf_path = asset_path / robot_cfg["mjcf"]
|
||||
|
||||
tree = ET.parse(str(mjcf_path))
|
||||
root = tree.getroot()
|
||||
|
||||
# ── Actuator overrides ───────────────────────────────────────
|
||||
# Use average of pos/neg gear for MuJoCo (asymmetry handled in ctrl).
|
||||
gear_pos = params.get("actuator_gear_pos", params.get("actuator_gear", 0.064))
|
||||
gear_neg = params.get("actuator_gear_neg", params.get("actuator_gear", 0.064))
|
||||
gear = (gear_pos + gear_neg) / 2.0
|
||||
filter_tau = params.get("actuator_filter_tau", 0.03)
|
||||
|
||||
for act_el in root.iter("general"):
|
||||
if act_el.get("name") == "motor":
|
||||
act_el.set("gear", str(gear))
|
||||
if filter_tau > 0:
|
||||
act_el.set("dyntype", "filter")
|
||||
act_el.set("dynprm", str(filter_tau))
|
||||
else:
|
||||
act_el.set("dyntype", "none")
|
||||
act_el.set("dynprm", "1")
|
||||
|
||||
# ── Joint overrides ──────────────────────────────────────────
|
||||
# Damping and friction are asymmetric + nonlinear → applied manually.
|
||||
# Set MuJoCo damping & frictionloss to 0; we handle them in qfrc_applied.
|
||||
armature = params.get("motor_armature", 0.0001)
|
||||
|
||||
for jnt in root.iter("joint"):
|
||||
if jnt.get("name") == "motor_joint":
|
||||
jnt.set("damping", "0")
|
||||
jnt.set("armature", str(armature))
|
||||
jnt.set("frictionloss", "0")
|
||||
|
||||
# ── Rotor mass override ──────────────────────────────────────
|
||||
rotor_mass = params.get("rotor_mass", 0.012)
|
||||
for geom in root.iter("geom"):
|
||||
if geom.get("name") == "rotor_disk":
|
||||
geom.set("mass", str(rotor_mass))
|
||||
|
||||
# Write temp file and load.
|
||||
tmp_path = asset_path / "_tmp_motor_sysid.xml"
|
||||
try:
|
||||
tree.write(str(tmp_path), xml_declaration=True, encoding="unicode")
|
||||
model = mujoco.MjModel.from_xml_path(str(tmp_path))
|
||||
finally:
|
||||
tmp_path.unlink(missing_ok=True)
|
||||
|
||||
return model
|
||||
|
||||
|
||||
# ── Action + asymmetry transforms ────────────────────────────────────
|
||||
|
||||
|
||||
def _transform_action(
|
||||
action: float,
|
||||
params: dict[str, float],
|
||||
) -> float:
|
||||
"""Apply bias, direction-dependent deadzone, and gear scaling.
|
||||
|
||||
The MuJoCo actuator has the *average* gear ratio. We rescale the
|
||||
control signal so that ``ctrl × gear_avg ≈ action × gear_dir``.
|
||||
"""
|
||||
# Constant bias (H-bridge asymmetry).
|
||||
action = action + params.get("action_bias", 0.0)
|
||||
|
||||
# Direction-dependent deadzone.
|
||||
dz_pos = params.get("motor_deadzone_pos", params.get("motor_deadzone", 0.08))
|
||||
dz_neg = params.get("motor_deadzone_neg", params.get("motor_deadzone", 0.08))
|
||||
if action >= 0 and action < dz_pos:
|
||||
return 0.0
|
||||
if action < 0 and action > -dz_neg:
|
||||
return 0.0
|
||||
|
||||
# Direction-dependent gear scaling.
|
||||
# MuJoCo model uses gear_avg; we rescale ctrl to get the right torque.
|
||||
gear_pos = params.get("actuator_gear_pos", params.get("actuator_gear", 0.064))
|
||||
gear_neg = params.get("actuator_gear_neg", params.get("actuator_gear", 0.064))
|
||||
gear_avg = (gear_pos + gear_neg) / 2.0
|
||||
if gear_avg < 1e-8:
|
||||
return 0.0
|
||||
gear_dir = gear_pos if action >= 0 else gear_neg
|
||||
return action * (gear_dir / gear_avg)
|
||||
|
||||
|
||||
def _apply_forces(
|
||||
data: mujoco.MjData,
|
||||
vel: float,
|
||||
ctrl: float,
|
||||
params: dict[str, float],
|
||||
backlash_state: list[float] | None = None,
|
||||
) -> None:
|
||||
"""Apply all manual forces: asymmetric friction, damping, and nonlinear terms.
|
||||
|
||||
Everything that MuJoCo can't represent with its symmetric joint model
|
||||
is injected here via ``qfrc_applied``.
|
||||
|
||||
Forces applied (all oppose motion or reduce torque):
|
||||
1. Asymmetric Coulomb friction (with Stribeck boost at low speed)
|
||||
2. Asymmetric viscous damping
|
||||
3. Quadratic velocity drag
|
||||
4. Back-EMF torque reduction (proportional to |vel|)
|
||||
|
||||
Backlash:
|
||||
If backlash_state is provided, it is a 1-element list [gap_pos].
|
||||
gap_pos tracks the motor's position within the backlash deadband.
|
||||
When inside the gap, no actuator torque is transmitted to the
|
||||
output shaft — only friction forces act.
|
||||
"""
|
||||
torque = 0.0
|
||||
|
||||
# ── Gearbox backlash ──────────────────────────────────────────
|
||||
# Model: the gear teeth have play of 2×halfwidth radians.
|
||||
# We track where the motor is within that gap. When at the
|
||||
# edge (contact), actuator torque passes through normally.
|
||||
# When inside the gap, no actuator torque is transmitted.
|
||||
backlash_hw = params.get("gearbox_backlash", 0.0)
|
||||
actuator_torque_scale = 1.0 # 1.0 = full contact, 0.0 = in gap
|
||||
|
||||
if backlash_hw > 0 and backlash_state is not None:
|
||||
# gap_pos: how far into the backlash gap we are.
|
||||
# Range: [-backlash_hw, +backlash_hw]
|
||||
# At ±backlash_hw, gears are in contact and torque transmits.
|
||||
gap = backlash_state[0]
|
||||
# Update gap position based on velocity.
|
||||
dt_sub = data.model.opt.timestep
|
||||
gap += vel * dt_sub
|
||||
# Clamp to backlash range.
|
||||
if gap > backlash_hw:
|
||||
gap = backlash_hw
|
||||
elif gap < -backlash_hw:
|
||||
gap = -backlash_hw
|
||||
|
||||
backlash_state[0] = gap
|
||||
|
||||
# If not at contact edge, no torque transmitted.
|
||||
if abs(gap) < backlash_hw - 1e-8:
|
||||
actuator_torque_scale = 0.0
|
||||
else:
|
||||
actuator_torque_scale = 1.0
|
||||
|
||||
# ── 1. Coulomb friction (direction-dependent + Stribeck) ─────
|
||||
fl_pos = params.get("motor_frictionloss_pos", params.get("motor_frictionloss", 0.03))
|
||||
fl_neg = params.get("motor_frictionloss_neg", params.get("motor_frictionloss", 0.03))
|
||||
stribeck_boost = params.get("stribeck_friction_boost", 0.0)
|
||||
stribeck_vel = params.get("stribeck_vel", 2.0)
|
||||
|
||||
if abs(vel) > 1e-6:
|
||||
fl = fl_pos if vel > 0 else fl_neg
|
||||
# Stribeck: boost friction at low speed. Exponential decay.
|
||||
if stribeck_boost > 0 and stribeck_vel > 0:
|
||||
fl = fl * (1.0 + stribeck_boost * np.exp(-abs(vel) / stribeck_vel))
|
||||
# Coulomb: constant magnitude, opposes motion.
|
||||
torque -= np.sign(vel) * fl
|
||||
|
||||
# ── 2. Asymmetric viscous damping ────────────────────────────
|
||||
damp_pos = params.get("motor_damping_pos", params.get("motor_damping", 0.003))
|
||||
damp_neg = params.get("motor_damping_neg", params.get("motor_damping", 0.003))
|
||||
damp = damp_pos if vel > 0 else damp_neg
|
||||
torque -= damp * vel
|
||||
|
||||
# ── 3. Quadratic velocity drag ───────────────────────────────
|
||||
visc_quad = params.get("viscous_quadratic", 0.0)
|
||||
if visc_quad > 0:
|
||||
torque -= visc_quad * vel * abs(vel)
|
||||
|
||||
# ── 4. Back-EMF torque reduction ─────────────────────────────
|
||||
# At high speed, the motor's effective torque is reduced because
|
||||
# back-EMF opposes the supply voltage. Modelled as a torque that
|
||||
# opposes the control signal proportional to speed.
|
||||
bemf = params.get("back_emf_gain", 0.0)
|
||||
if bemf > 0 and abs(ctrl) > 1e-6:
|
||||
# The reduction should oppose the actuator torque direction.
|
||||
torque -= bemf * vel * np.sign(ctrl) * actuator_torque_scale
|
||||
|
||||
# ── 5. Scale actuator contribution by backlash state ─────────
|
||||
# When in the backlash gap, MuJoCo's actuator force should not
|
||||
# transmit. We cancel it by applying an opposing force.
|
||||
if actuator_torque_scale < 1.0:
|
||||
# The actuator_force from MuJoCo will be applied by mj_step.
|
||||
# We need to counteract it. data.qfrc_actuator isn't set yet
|
||||
# at this point (pre-step), so we zero the ctrl instead.
|
||||
# This is handled in the rollout loop by zeroing ctrl.
|
||||
pass
|
||||
|
||||
data.qfrc_applied[0] = max(-10.0, min(10.0, torque))
|
||||
return actuator_torque_scale
|
||||
|
||||
|
||||
# ── Simulation rollout ───────────────────────────────────────────────
|
||||
|
||||
|
||||
def rollout(
|
||||
asset_path: str | Path,
|
||||
params: dict[str, float],
|
||||
actions: np.ndarray,
|
||||
sim_dt: float = 0.002,
|
||||
substeps: int = 10,
|
||||
) -> dict[str, np.ndarray]:
|
||||
"""Open-loop replay of recorded actions.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
asset_path : motor asset directory
|
||||
params : named parameter overrides
|
||||
actions : (N,) normalised actions [-1, 1] from the recording
|
||||
sim_dt : MuJoCo physics timestep
|
||||
substeps : physics substeps per control step (ctrl_dt = sim_dt × substeps)
|
||||
|
||||
Returns
|
||||
-------
|
||||
dict with motor_angle (N,) and motor_vel (N,).
|
||||
"""
|
||||
asset_path = Path(asset_path).resolve()
|
||||
model = _build_model(asset_path, params)
|
||||
model.opt.timestep = sim_dt
|
||||
data = mujoco.MjData(model)
|
||||
mujoco.mj_resetData(model, data)
|
||||
|
||||
n = len(actions)
|
||||
|
||||
sim_motor_angle = np.zeros(n, dtype=np.float64)
|
||||
sim_motor_vel = np.zeros(n, dtype=np.float64)
|
||||
|
||||
# Backlash state: [gap_position]. Starts at 0 (centered in gap).
|
||||
backlash_state = [0.0]
|
||||
|
||||
for i in range(n):
|
||||
ctrl = _transform_action(actions[i], params)
|
||||
data.ctrl[0] = ctrl
|
||||
|
||||
for _ in range(substeps):
|
||||
scale = _apply_forces(data, data.qvel[0], ctrl, params, backlash_state)
|
||||
# If in backlash gap, zero ctrl so actuator torque doesn't transmit.
|
||||
if scale < 1.0:
|
||||
data.ctrl[0] = 0.0
|
||||
else:
|
||||
data.ctrl[0] = ctrl
|
||||
mujoco.mj_step(model, data)
|
||||
|
||||
# Bail out on NaN/instability.
|
||||
if not np.isfinite(data.qpos[0]) or abs(data.qvel[0]) > 1e4:
|
||||
sim_motor_angle[i:] = np.nan
|
||||
sim_motor_vel[i:] = np.nan
|
||||
break
|
||||
|
||||
sim_motor_angle[i] = data.qpos[0]
|
||||
sim_motor_vel[i] = data.qvel[0]
|
||||
|
||||
return {
|
||||
"motor_angle": sim_motor_angle,
|
||||
"motor_vel": sim_motor_vel,
|
||||
}
|
||||
|
||||
|
||||
def windowed_rollout(
|
||||
asset_path: str | Path,
|
||||
params: dict[str, float],
|
||||
recording: dict[str, np.ndarray],
|
||||
window_duration: float = 0.5,
|
||||
sim_dt: float = 0.002,
|
||||
substeps: int = 10,
|
||||
) -> dict[str, np.ndarray]:
|
||||
"""Multiple-shooting rollout for motor-only sysid.
|
||||
|
||||
Splits the recording into short windows. Each window is initialised
|
||||
from the real motor state, preventing error accumulation.
|
||||
|
||||
Returns
|
||||
-------
|
||||
dict with motor_angle (N,), motor_vel (N,), n_windows (int).
|
||||
"""
|
||||
asset_path = Path(asset_path).resolve()
|
||||
model = _build_model(asset_path, params)
|
||||
model.opt.timestep = sim_dt
|
||||
data = mujoco.MjData(model)
|
||||
|
||||
times = recording["time"]
|
||||
actions = recording["action"]
|
||||
real_angle = recording["motor_angle"]
|
||||
real_vel = recording["motor_vel"]
|
||||
n = len(actions)
|
||||
|
||||
sim_motor_angle = np.zeros(n, dtype=np.float64)
|
||||
sim_motor_vel = np.zeros(n, dtype=np.float64)
|
||||
|
||||
# Compute window boundaries.
|
||||
t0 = times[0]
|
||||
t_end = times[-1]
|
||||
window_starts: list[int] = []
|
||||
current_t = t0
|
||||
while current_t < t_end:
|
||||
idx = int(np.searchsorted(times, current_t))
|
||||
idx = min(idx, n - 1)
|
||||
window_starts.append(idx)
|
||||
current_t += window_duration
|
||||
|
||||
n_windows = len(window_starts)
|
||||
|
||||
for w, w_start in enumerate(window_starts):
|
||||
w_end = window_starts[w + 1] if w + 1 < n_windows else n
|
||||
|
||||
# Init from real state at window start.
|
||||
mujoco.mj_resetData(model, data)
|
||||
data.qpos[0] = real_angle[w_start]
|
||||
data.qvel[0] = real_vel[w_start]
|
||||
data.ctrl[:] = 0.0
|
||||
mujoco.mj_forward(model, data)
|
||||
|
||||
# Backlash state resets each window (assume contact at start).
|
||||
backlash_state = [0.0]
|
||||
|
||||
for i in range(w_start, w_end):
|
||||
ctrl = _transform_action(actions[i], params)
|
||||
data.ctrl[0] = ctrl
|
||||
|
||||
for _ in range(substeps):
|
||||
scale = _apply_forces(data, data.qvel[0], ctrl, params, backlash_state)
|
||||
if scale < 1.0:
|
||||
data.ctrl[0] = 0.0
|
||||
else:
|
||||
data.ctrl[0] = ctrl
|
||||
mujoco.mj_step(model, data)
|
||||
|
||||
# Bail out on NaN/instability — fill rest of window with last good.
|
||||
if not np.isfinite(data.qpos[0]) or abs(data.qvel[0]) > 1e4:
|
||||
sim_motor_angle[i:w_end] = sim_motor_angle[max(i-1, w_start)]
|
||||
sim_motor_vel[i:w_end] = 0.0
|
||||
break
|
||||
|
||||
sim_motor_angle[i] = data.qpos[0]
|
||||
sim_motor_vel[i] = data.qvel[0]
|
||||
|
||||
return {
|
||||
"motor_angle": sim_motor_angle,
|
||||
"motor_vel": sim_motor_vel,
|
||||
"n_windows": n_windows,
|
||||
}
|
||||
204
src/sysid/motor/visualize.py
Normal file
204
src/sysid/motor/visualize.py
Normal file
@@ -0,0 +1,204 @@
|
||||
"""Visualise motor system identification — real vs simulated trajectories.
|
||||
|
||||
Usage:
|
||||
python -m src.sysid.motor.visualize \
|
||||
--recording assets/motor/recordings/motor_capture_YYYYMMDD_HHMMSS.npz
|
||||
|
||||
# With tuned result:
|
||||
python -m src.sysid.motor.visualize \
|
||||
--recording <file>.npz \
|
||||
--result assets/motor/motor_sysid_result.json
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
from pathlib import Path
|
||||
|
||||
import numpy as np
|
||||
import structlog
|
||||
|
||||
log = structlog.get_logger()
|
||||
|
||||
_DEFAULT_ASSET = "assets/motor"
|
||||
|
||||
|
||||
def visualize(
|
||||
asset_path: str | Path = _DEFAULT_ASSET,
|
||||
recording_path: str | Path = "",
|
||||
result_path: str | Path | None = None,
|
||||
sim_dt: float = 0.002,
|
||||
substeps: int = 10,
|
||||
window_duration: float = 0.5,
|
||||
save_path: str | Path | None = None,
|
||||
show: bool = True,
|
||||
) -> None:
|
||||
"""Generate 3-panel comparison plot: angle, velocity, action."""
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
from src.sysid.motor.rollout import (
|
||||
MOTOR_PARAMS,
|
||||
defaults_vector,
|
||||
params_to_dict,
|
||||
rollout,
|
||||
windowed_rollout,
|
||||
)
|
||||
|
||||
asset_path = Path(asset_path).resolve()
|
||||
recording = dict(np.load(recording_path))
|
||||
|
||||
t = recording["time"]
|
||||
actions = recording["action"]
|
||||
|
||||
# ── Simulate with default parameters ─────────────────────────
|
||||
default_params = params_to_dict(defaults_vector(MOTOR_PARAMS), MOTOR_PARAMS)
|
||||
log.info("simulating_default_params", windowed=window_duration > 0)
|
||||
|
||||
if window_duration > 0:
|
||||
sim_default = windowed_rollout(
|
||||
asset_path=asset_path,
|
||||
params=default_params,
|
||||
recording=recording,
|
||||
window_duration=window_duration,
|
||||
sim_dt=sim_dt,
|
||||
substeps=substeps,
|
||||
)
|
||||
else:
|
||||
sim_default = rollout(
|
||||
asset_path=asset_path,
|
||||
params=default_params,
|
||||
actions=actions,
|
||||
sim_dt=sim_dt,
|
||||
substeps=substeps,
|
||||
)
|
||||
|
||||
# ── Simulate with tuned parameters (if available) ────────────
|
||||
sim_tuned = None
|
||||
tuned_cost = None
|
||||
|
||||
if result_path is not None:
|
||||
result_path = Path(result_path)
|
||||
else:
|
||||
# Auto-detect.
|
||||
auto = asset_path / "motor_sysid_result.json"
|
||||
if auto.exists():
|
||||
result_path = auto
|
||||
|
||||
if result_path is not None and result_path.exists():
|
||||
result = json.loads(result_path.read_text())
|
||||
tuned_params = result.get("best_params", {})
|
||||
tuned_cost = result.get("best_cost")
|
||||
log.info("simulating_tuned_params", cost=tuned_cost)
|
||||
|
||||
if window_duration > 0:
|
||||
sim_tuned = windowed_rollout(
|
||||
asset_path=asset_path,
|
||||
params=tuned_params,
|
||||
recording=recording,
|
||||
window_duration=window_duration,
|
||||
sim_dt=sim_dt,
|
||||
substeps=substeps,
|
||||
)
|
||||
else:
|
||||
sim_tuned = rollout(
|
||||
asset_path=asset_path,
|
||||
params=tuned_params,
|
||||
actions=actions,
|
||||
sim_dt=sim_dt,
|
||||
substeps=substeps,
|
||||
)
|
||||
|
||||
# ── Plot ─────────────────────────────────────────────────────
|
||||
fig, axes = plt.subplots(3, 1, figsize=(14, 8), sharex=True)
|
||||
|
||||
# Motor angle.
|
||||
ax = axes[0]
|
||||
ax.plot(t, np.degrees(recording["motor_angle"]), "k-", lw=1.2, alpha=0.8, label="Real")
|
||||
ax.plot(t, np.degrees(sim_default["motor_angle"]), "--", color="#d62728", lw=1.0, alpha=0.7, label="Sim (default)")
|
||||
if sim_tuned is not None:
|
||||
ax.plot(t, np.degrees(sim_tuned["motor_angle"]), "--", color="#2ca02c", lw=1.0, alpha=0.7, label="Sim (tuned)")
|
||||
ax.set_ylabel("Motor Angle (°)")
|
||||
ax.legend(loc="upper right", fontsize=8)
|
||||
ax.grid(True, alpha=0.3)
|
||||
|
||||
# Motor velocity.
|
||||
ax = axes[1]
|
||||
ax.plot(t, recording["motor_vel"], "k-", lw=1.2, alpha=0.8, label="Real")
|
||||
ax.plot(t, sim_default["motor_vel"], "--", color="#d62728", lw=1.0, alpha=0.7, label="Sim (default)")
|
||||
if sim_tuned is not None:
|
||||
ax.plot(t, sim_tuned["motor_vel"], "--", color="#2ca02c", lw=1.0, alpha=0.7, label="Sim (tuned)")
|
||||
ax.set_ylabel("Motor Velocity (rad/s)")
|
||||
ax.legend(loc="upper right", fontsize=8)
|
||||
ax.grid(True, alpha=0.3)
|
||||
|
||||
# Action.
|
||||
ax = axes[2]
|
||||
ax.plot(t, actions, "b-", lw=0.8, alpha=0.6)
|
||||
ax.set_ylabel("Action (norm)")
|
||||
ax.set_xlabel("Time (s)")
|
||||
ax.grid(True, alpha=0.3)
|
||||
ax.set_ylim(-1.1, 1.1)
|
||||
|
||||
# Title.
|
||||
title = "Motor System Identification — Real vs Simulated"
|
||||
if tuned_cost is not None:
|
||||
from src.sysid.motor.optimize import cost_function
|
||||
|
||||
orig_cost = cost_function(
|
||||
defaults_vector(MOTOR_PARAMS),
|
||||
recording,
|
||||
asset_path,
|
||||
MOTOR_PARAMS,
|
||||
sim_dt=sim_dt,
|
||||
substeps=substeps,
|
||||
window_duration=window_duration,
|
||||
)
|
||||
title += f"\nDefault cost: {orig_cost:.4f} → Tuned cost: {tuned_cost:.4f}"
|
||||
improvement = (1.0 - tuned_cost / orig_cost) * 100 if orig_cost > 0 else 0
|
||||
title += f" ({improvement:+.1f}%)"
|
||||
|
||||
fig.suptitle(title, fontsize=12)
|
||||
plt.tight_layout()
|
||||
|
||||
if save_path:
|
||||
fig.savefig(str(save_path), dpi=150, bbox_inches="tight")
|
||||
log.info("figure_saved", path=str(save_path))
|
||||
|
||||
if show:
|
||||
plt.show()
|
||||
else:
|
||||
plt.close(fig)
|
||||
|
||||
|
||||
# ── CLI ──────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
def main() -> None:
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Visualise motor system identification results."
|
||||
)
|
||||
parser.add_argument("--asset-path", type=str, default=_DEFAULT_ASSET)
|
||||
parser.add_argument("--recording", type=str, required=True, help=".npz file")
|
||||
parser.add_argument("--result", type=str, default=None, help="sysid result JSON")
|
||||
parser.add_argument("--sim-dt", type=float, default=0.002)
|
||||
parser.add_argument("--substeps", type=int, default=10)
|
||||
parser.add_argument("--window-duration", type=float, default=0.5)
|
||||
parser.add_argument("--save", type=str, default=None, help="Save figure path")
|
||||
parser.add_argument("--no-show", action="store_true")
|
||||
args = parser.parse_args()
|
||||
|
||||
visualize(
|
||||
asset_path=args.asset_path,
|
||||
recording_path=args.recording,
|
||||
result_path=args.result,
|
||||
sim_dt=args.sim_dt,
|
||||
substeps=args.substeps,
|
||||
window_duration=args.window_duration,
|
||||
save_path=args.save,
|
||||
show=not args.no_show,
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -4,10 +4,15 @@ Minimises the trajectory-matching cost between a MuJoCo rollout and a
|
||||
recorded real-robot sequence. Uses the ``cmaes`` package (pure-Python
|
||||
CMA-ES with native box-constraint support).
|
||||
|
||||
Motor parameters are **locked** from the motor-only sysid — only
|
||||
pendulum/arm inertial parameters, joint dynamics, and ctrl_limit are
|
||||
optimised. Velocities are optionally preprocessed with Savitzky-Golay
|
||||
differentiation for cleaner targets.
|
||||
|
||||
Usage:
|
||||
python -m src.sysid.optimize \
|
||||
--robot-path assets/rotary_cartpole \
|
||||
--recording assets/rotary_cartpole/recordings/capture_20260311_120000.npz
|
||||
--recording assets/rotary_cartpole/recordings/capture_20260314_000435.npz
|
||||
|
||||
# Shorter run for testing:
|
||||
python -m src.sysid.optimize \
|
||||
@@ -28,6 +33,8 @@ import numpy as np
|
||||
import structlog
|
||||
|
||||
from src.sysid.rollout import (
|
||||
LOCKED_MOTOR_PARAMS,
|
||||
PARAM_SETS,
|
||||
ROTARY_CARTPOLE_PARAMS,
|
||||
ParamSpec,
|
||||
bounds_arrays,
|
||||
@@ -40,6 +47,65 @@ from src.sysid.rollout import (
|
||||
log = structlog.get_logger()
|
||||
|
||||
|
||||
# ── Velocity preprocessing ───────────────────────────────────────────
|
||||
|
||||
|
||||
def _preprocess_recording(
|
||||
recording: dict[str, np.ndarray],
|
||||
preprocess_vel: bool = True,
|
||||
sg_window: int = 7,
|
||||
sg_polyorder: int = 3,
|
||||
) -> dict[str, np.ndarray]:
|
||||
"""Optionally recompute velocities using Savitzky-Golay differentiation.
|
||||
|
||||
Applies SG filtering to both motor_vel and pendulum_vel, replacing
|
||||
the noisy firmware finite-difference velocities with smooth
|
||||
analytical derivatives of the (clean) angle signals.
|
||||
"""
|
||||
if not preprocess_vel:
|
||||
return recording
|
||||
|
||||
from scipy.signal import savgol_filter
|
||||
|
||||
rec = dict(recording)
|
||||
times = rec["time"]
|
||||
dt = float(np.mean(np.diff(times)))
|
||||
|
||||
# Motor velocity.
|
||||
rec["motor_vel_raw"] = rec["motor_vel"].copy()
|
||||
rec["motor_vel"] = savgol_filter(
|
||||
rec["motor_angle"],
|
||||
window_length=sg_window,
|
||||
polyorder=sg_polyorder,
|
||||
deriv=1,
|
||||
delta=dt,
|
||||
)
|
||||
|
||||
# Pendulum velocity.
|
||||
rec["pendulum_vel_raw"] = rec["pendulum_vel"].copy()
|
||||
rec["pendulum_vel"] = savgol_filter(
|
||||
rec["pendulum_angle"],
|
||||
window_length=sg_window,
|
||||
polyorder=sg_polyorder,
|
||||
deriv=1,
|
||||
delta=dt,
|
||||
)
|
||||
|
||||
motor_noise = np.std(rec["motor_vel_raw"] - rec["motor_vel"])
|
||||
pend_noise = np.std(rec["pendulum_vel_raw"] - rec["pendulum_vel"])
|
||||
log.info(
|
||||
"velocity_preprocessed",
|
||||
method="savgol",
|
||||
sg_window=sg_window,
|
||||
sg_polyorder=sg_polyorder,
|
||||
dt_ms=f"{dt*1000:.1f}",
|
||||
motor_noise_std=f"{motor_noise:.3f} rad/s",
|
||||
pend_noise_std=f"{pend_noise:.3f} rad/s",
|
||||
)
|
||||
|
||||
return rec
|
||||
|
||||
|
||||
# ── Cost function ────────────────────────────────────────────────────
|
||||
|
||||
|
||||
@@ -63,18 +129,35 @@ def _compute_trajectory_cost(
|
||||
recording: dict[str, np.ndarray],
|
||||
pos_weight: float = 1.0,
|
||||
vel_weight: float = 0.1,
|
||||
pendulum_scale: float = 3.0,
|
||||
vel_outlier_threshold: float = 20.0,
|
||||
) -> float:
|
||||
"""Weighted MSE between sim and real trajectories."""
|
||||
"""Weighted MSE between sim and real trajectories.
|
||||
|
||||
pendulum_scale multiplies the pendulum terms relative to motor terms.
|
||||
|
||||
Samples where the *real* pendulum velocity exceeds
|
||||
``vel_outlier_threshold`` (rad/s) are excluded from the velocity
|
||||
terms. These are encoder-wrap artefacts (pendulum swinging past
|
||||
vertical) that no simulator can reproduce.
|
||||
"""
|
||||
motor_err = _angle_diff(sim["motor_angle"], recording["motor_angle"])
|
||||
pend_err = _angle_diff(sim["pendulum_angle"], recording["pendulum_angle"])
|
||||
motor_vel_err = sim["motor_vel"] - recording["motor_vel"]
|
||||
pend_vel_err = sim["pendulum_vel"] - recording["pendulum_vel"]
|
||||
|
||||
# Mask out encoder-wrap velocity spikes so the optimizer doesn't
|
||||
# waste capacity fitting artefacts.
|
||||
valid = np.abs(recording["pendulum_vel"]) < vel_outlier_threshold
|
||||
if valid.sum() < len(valid):
|
||||
motor_vel_err = motor_vel_err[valid]
|
||||
pend_vel_err = pend_vel_err[valid]
|
||||
|
||||
return float(
|
||||
pos_weight * np.mean(motor_err**2)
|
||||
+ pos_weight * np.mean(pend_err**2)
|
||||
+ pos_weight * pendulum_scale * np.mean(pend_err**2)
|
||||
+ vel_weight * np.mean(motor_vel_err**2)
|
||||
+ vel_weight * np.mean(pend_vel_err**2)
|
||||
+ vel_weight * pendulum_scale * np.mean(pend_vel_err**2)
|
||||
)
|
||||
|
||||
|
||||
@@ -87,7 +170,9 @@ def cost_function(
|
||||
substeps: int = 10,
|
||||
pos_weight: float = 1.0,
|
||||
vel_weight: float = 0.1,
|
||||
pendulum_scale: float = 3.0,
|
||||
window_duration: float = 0.5,
|
||||
motor_params: dict[str, float] | None = None,
|
||||
) -> float:
|
||||
"""Compute trajectory-matching cost for a candidate parameter vector.
|
||||
|
||||
@@ -113,21 +198,56 @@ def cost_function(
|
||||
window_duration=window_duration,
|
||||
sim_dt=sim_dt,
|
||||
substeps=substeps,
|
||||
motor_params=motor_params,
|
||||
)
|
||||
else:
|
||||
sim = rollout(
|
||||
robot_path=robot_path,
|
||||
params=params,
|
||||
actions=recording["action"],
|
||||
timesteps=recording["time"],
|
||||
sim_dt=sim_dt,
|
||||
substeps=substeps,
|
||||
motor_params=motor_params,
|
||||
)
|
||||
except Exception as exc:
|
||||
log.warning("rollout_failed", error=str(exc))
|
||||
return 1e6
|
||||
|
||||
return _compute_trajectory_cost(sim, recording, pos_weight, vel_weight)
|
||||
# Check for NaN in sim output.
|
||||
for key in ("motor_angle", "motor_vel", "pendulum_angle", "pendulum_vel"):
|
||||
if np.any(~np.isfinite(sim[key])):
|
||||
return 1e6
|
||||
|
||||
return _compute_trajectory_cost(sim, recording, pos_weight, vel_weight, pendulum_scale)
|
||||
|
||||
|
||||
# ── Parallel evaluation helper (module-level for pickling) ───────────
|
||||
|
||||
# Shared state set by the parent process before spawning workers.
|
||||
_par_recording: dict[str, np.ndarray] = {}
|
||||
_par_robot_path: Path = Path(".")
|
||||
_par_specs: list[ParamSpec] = []
|
||||
_par_kwargs: dict = {}
|
||||
|
||||
|
||||
def _init_worker(recording, robot_path, specs, kwargs):
|
||||
"""Initialiser run once per worker process."""
|
||||
global _par_recording, _par_robot_path, _par_specs, _par_kwargs
|
||||
_par_recording = recording
|
||||
_par_robot_path = robot_path
|
||||
_par_specs = specs
|
||||
_par_kwargs = kwargs
|
||||
|
||||
|
||||
def _eval_candidate(x_natural: np.ndarray) -> float:
|
||||
"""Evaluate a single candidate — called in worker processes."""
|
||||
return cost_function(
|
||||
x_natural,
|
||||
_par_recording,
|
||||
_par_robot_path,
|
||||
_par_specs,
|
||||
**_par_kwargs,
|
||||
)
|
||||
|
||||
|
||||
# ── CMA-ES optimisation loop ────────────────────────────────────────
|
||||
@@ -144,17 +264,25 @@ def optimize(
|
||||
substeps: int = 10,
|
||||
pos_weight: float = 1.0,
|
||||
vel_weight: float = 0.1,
|
||||
pendulum_scale: float = 3.0,
|
||||
window_duration: float = 0.5,
|
||||
seed: int = 42,
|
||||
preprocess_vel: bool = True,
|
||||
sg_window: int = 7,
|
||||
sg_polyorder: int = 3,
|
||||
) -> dict:
|
||||
"""Run CMA-ES optimisation and return results.
|
||||
|
||||
Motor parameters are locked from the motor-only sysid.
|
||||
Only pendulum/arm parameters are optimised.
|
||||
|
||||
Returns a dict with:
|
||||
best_params: dict[str, float]
|
||||
best_cost: float
|
||||
history: list of (generation, best_cost) tuples
|
||||
recording: str (path used)
|
||||
specs: list of param names
|
||||
motor_params: dict of locked motor params
|
||||
"""
|
||||
from cmaes import CMA
|
||||
|
||||
@@ -164,8 +292,24 @@ def optimize(
|
||||
if specs is None:
|
||||
specs = ROTARY_CARTPOLE_PARAMS
|
||||
|
||||
motor_params = dict(LOCKED_MOTOR_PARAMS)
|
||||
log.info(
|
||||
"motor_params_locked",
|
||||
n_params=len(motor_params),
|
||||
gear_avg=f"{(motor_params['actuator_gear_pos'] + motor_params['actuator_gear_neg']) / 2:.4f}",
|
||||
)
|
||||
|
||||
# Load recording.
|
||||
recording = dict(np.load(recording_path))
|
||||
|
||||
# Preprocessing: SG velocity recomputation.
|
||||
recording = _preprocess_recording(
|
||||
recording,
|
||||
preprocess_vel=preprocess_vel,
|
||||
sg_window=sg_window,
|
||||
sg_polyorder=sg_polyorder,
|
||||
)
|
||||
|
||||
n_samples = len(recording["time"])
|
||||
duration = recording["time"][-1] - recording["time"][0]
|
||||
n_windows = max(1, int(duration / window_duration)) if window_duration > 0 else 1
|
||||
@@ -219,28 +363,54 @@ def optimize(
|
||||
|
||||
t0 = time.monotonic()
|
||||
|
||||
# ── Parallel evaluation setup ────────────────────────────────
|
||||
# Each candidate is independent — evaluate them in parallel using
|
||||
# a process pool. Falls back to sequential if n_workers=1.
|
||||
import multiprocessing as mp
|
||||
n_workers = max(1, mp.cpu_count() - 1) # leave 1 core free
|
||||
|
||||
eval_kwargs = dict(
|
||||
sim_dt=sim_dt,
|
||||
substeps=substeps,
|
||||
pos_weight=pos_weight,
|
||||
vel_weight=vel_weight,
|
||||
pendulum_scale=pendulum_scale,
|
||||
window_duration=window_duration,
|
||||
motor_params=motor_params,
|
||||
)
|
||||
|
||||
log.info("parallel_workers", n_workers=n_workers)
|
||||
|
||||
# Create a persistent pool (avoids per-generation fork overhead).
|
||||
pool = None
|
||||
if n_workers > 1:
|
||||
pool = mp.Pool(
|
||||
n_workers,
|
||||
initializer=_init_worker,
|
||||
initargs=(recording, robot_path, specs, eval_kwargs),
|
||||
)
|
||||
|
||||
for gen in range(max_generations):
|
||||
solutions = []
|
||||
# Ask all candidates first.
|
||||
candidates_normed = []
|
||||
candidates_natural = []
|
||||
for _ in range(optimizer.population_size):
|
||||
x_normed = optimizer.ask()
|
||||
x_natural = from_normed(x_normed)
|
||||
|
||||
# Clip to bounds (CMA-ES can slightly exceed with sampling noise).
|
||||
x_natural = np.clip(x_natural, lo, hi)
|
||||
candidates_normed.append(x_normed)
|
||||
candidates_natural.append(x_natural)
|
||||
|
||||
c = cost_function(
|
||||
x_natural,
|
||||
recording,
|
||||
robot_path,
|
||||
specs,
|
||||
sim_dt=sim_dt,
|
||||
substeps=substeps,
|
||||
pos_weight=pos_weight,
|
||||
vel_weight=vel_weight,
|
||||
window_duration=window_duration,
|
||||
)
|
||||
solutions.append((x_normed, c))
|
||||
# Evaluate in parallel.
|
||||
if pool is not None:
|
||||
costs = pool.map(_eval_candidate, candidates_natural)
|
||||
else:
|
||||
costs = [cost_function(
|
||||
x, recording, robot_path, specs, **eval_kwargs
|
||||
) for x in candidates_natural]
|
||||
|
||||
solutions = list(zip(candidates_normed, costs))
|
||||
for x_natural, c in zip(candidates_natural, costs):
|
||||
if c < best_cost:
|
||||
best_cost = c
|
||||
best_params_vec = x_natural.copy()
|
||||
@@ -259,6 +429,12 @@ def optimize(
|
||||
)
|
||||
|
||||
total_time = time.monotonic() - t0
|
||||
|
||||
# Clean up process pool.
|
||||
if pool is not None:
|
||||
pool.close()
|
||||
pool.join()
|
||||
|
||||
best_params = params_to_dict(best_params_vec, specs)
|
||||
|
||||
log.info(
|
||||
@@ -289,6 +465,8 @@ def optimize(
|
||||
"recording": str(recording_path),
|
||||
"param_names": [s.name for s in specs],
|
||||
"defaults": {s.name: s.default for s in specs},
|
||||
"motor_params": motor_params,
|
||||
"preprocess_vel": preprocess_vel,
|
||||
"timestamp": datetime.now().isoformat(),
|
||||
}
|
||||
|
||||
@@ -319,6 +497,8 @@ def main() -> None:
|
||||
parser.add_argument("--substeps", type=int, default=10)
|
||||
parser.add_argument("--pos-weight", type=float, default=1.0)
|
||||
parser.add_argument("--vel-weight", type=float, default=0.1)
|
||||
parser.add_argument("--pendulum-scale", type=float, default=3.0,
|
||||
help="Multiplier for pendulum terms relative to motor (default 3.0)")
|
||||
parser.add_argument(
|
||||
"--window-duration",
|
||||
type=float,
|
||||
@@ -331,11 +511,30 @@ def main() -> None:
|
||||
action="store_true",
|
||||
help="Skip exporting tuned files (results JSON only)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--no-preprocess-vel",
|
||||
action="store_true",
|
||||
help="Skip Savitzky-Golay velocity preprocessing",
|
||||
)
|
||||
parser.add_argument("--sg-window", type=int, default=7,
|
||||
help="Savitzky-Golay window length (odd, default 7)")
|
||||
parser.add_argument("--sg-polyorder", type=int, default=3,
|
||||
help="Savitzky-Golay polynomial order (default 3)")
|
||||
parser.add_argument(
|
||||
"--param-set",
|
||||
type=str,
|
||||
default="full",
|
||||
choices=list(PARAM_SETS.keys()),
|
||||
help="Parameter set to optimize: 'reduced' (6 params, fast) or 'full' (15 params)",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
specs = PARAM_SETS[args.param_set]
|
||||
|
||||
result = optimize(
|
||||
robot_path=args.robot_path,
|
||||
recording_path=args.recording,
|
||||
specs=specs,
|
||||
sigma0=args.sigma0,
|
||||
population_size=args.population_size,
|
||||
max_generations=args.max_generations,
|
||||
@@ -343,8 +542,12 @@ def main() -> None:
|
||||
substeps=args.substeps,
|
||||
pos_weight=args.pos_weight,
|
||||
vel_weight=args.vel_weight,
|
||||
pendulum_scale=args.pendulum_scale,
|
||||
window_duration=args.window_duration,
|
||||
seed=args.seed,
|
||||
preprocess_vel=not args.no_preprocess_vel,
|
||||
sg_window=args.sg_window,
|
||||
sg_polyorder=args.sg_polyorder,
|
||||
)
|
||||
|
||||
# Save results JSON.
|
||||
@@ -369,6 +572,7 @@ def main() -> None:
|
||||
export_tuned_files(
|
||||
robot_path=args.robot_path,
|
||||
params=result["best_params"],
|
||||
motor_params=result.get("motor_params"),
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -6,22 +6,51 @@ the simulated trajectory for comparison with the real recording.
|
||||
|
||||
This module is the inner loop of the CMA-ES optimizer: it is called once
|
||||
per candidate parameter vector per generation.
|
||||
|
||||
Motor parameters are **locked** from the motor-only sysid result.
|
||||
The optimizer only fits
|
||||
pendulum/arm inertial parameters, pendulum joint dynamics, and
|
||||
``ctrl_limit``. The asymmetric motor model (deadzone, gear compensation,
|
||||
Coulomb friction, viscous damping, quadratic drag, back-EMF) is applied
|
||||
via ``ActuatorConfig.transform_ctrl()`` and ``compute_motor_force()``.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import copy
|
||||
import dataclasses
|
||||
import math
|
||||
import os
|
||||
import tempfile
|
||||
import xml.etree.ElementTree as ET
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
import mujoco
|
||||
import numpy as np
|
||||
import yaml
|
||||
|
||||
from src.core.robot import ActuatorConfig, JointConfig, RobotConfig
|
||||
from src.runners.mujoco import ActuatorLimits, load_mujoco_model
|
||||
from src.sysid._urdf import patch_link_inertials
|
||||
|
||||
|
||||
# ── Locked motor parameters (from motor-only sysid) ─────────────────
|
||||
# These are FIXED and not optimised. They come from the 12-param model
|
||||
# in robot.yaml (from motor-only sysid, cost 0.862).
|
||||
|
||||
LOCKED_MOTOR_PARAMS: dict[str, float] = {
|
||||
"actuator_gear_pos": 0.424182,
|
||||
"actuator_gear_neg": 0.425031,
|
||||
"actuator_filter_tau": 0.00503506,
|
||||
"motor_damping_pos": 0.00202682,
|
||||
"motor_damping_neg": 0.0146651,
|
||||
"motor_armature": 0.00277342,
|
||||
"motor_frictionloss_pos": 0.0573282,
|
||||
"motor_frictionloss_neg": 0.0533549,
|
||||
"viscous_quadratic": 0.000285329,
|
||||
"back_emf_gain": 0.00675809,
|
||||
"motor_deadzone_pos": 0.141291,
|
||||
"motor_deadzone_neg": 0.0780148,
|
||||
}
|
||||
|
||||
|
||||
# ── Tunable parameter specification ──────────────────────────────────
|
||||
|
||||
@@ -37,33 +66,58 @@ class ParamSpec:
|
||||
log_scale: bool = False # optimise in log-space (masses, inertias)
|
||||
|
||||
|
||||
# Default parameter specs for the rotary cartpole.
|
||||
# Pendulum sysid parameters — motor params are LOCKED (not here).
|
||||
# Order matters: the optimizer maps a flat vector to these specs.
|
||||
# Defaults are from the URDF exported by Fusion 360.
|
||||
ROTARY_CARTPOLE_PARAMS: list[ParamSpec] = [
|
||||
# ── Arm link (URDF) ──────────────────────────────────────────
|
||||
ParamSpec("arm_mass", 0.010, 0.003, 0.05, log_scale=True),
|
||||
ParamSpec("arm_com_x", 0.00005, -0.02, 0.02),
|
||||
ParamSpec("arm_com_y", 0.0065, -0.01, 0.02),
|
||||
ParamSpec("arm_com_z", 0.00563, -0.01, 0.02),
|
||||
ParamSpec("arm_mass", 0.02110, 0.005, 0.08, log_scale=True),
|
||||
ParamSpec("arm_com_x", -0.00710, -0.03, 0.03),
|
||||
ParamSpec("arm_com_y", 0.00085, -0.02, 0.02),
|
||||
ParamSpec("arm_com_z", 0.00795, -0.02, 0.02),
|
||||
# ── Pendulum link (URDF) ─────────────────────────────────────
|
||||
ParamSpec("pendulum_mass", 0.015, 0.005, 0.05, log_scale=True),
|
||||
ParamSpec("pendulum_com_x", 0.1583, 0.05, 0.25),
|
||||
ParamSpec("pendulum_com_y", -0.0983, -0.20, 0.0),
|
||||
ParamSpec("pendulum_com_z", 0.0, -0.05, 0.05),
|
||||
ParamSpec("pendulum_ixx", 6.16e-06, 1e-07, 1e-04, log_scale=True),
|
||||
ParamSpec("pendulum_iyy", 6.16e-06, 1e-07, 1e-04, log_scale=True),
|
||||
ParamSpec("pendulum_izz", 1.23e-05, 1e-07, 1e-04, log_scale=True),
|
||||
ParamSpec("pendulum_ixy", 6.10e-06, -1e-04, 1e-04),
|
||||
# ── Actuator / joint dynamics (robot.yaml) ───────────────────
|
||||
ParamSpec("actuator_gear", 0.064, 0.01, 0.2, log_scale=True),
|
||||
ParamSpec("actuator_filter_tau", 0.03, 0.005, 0.15),
|
||||
ParamSpec("motor_damping", 0.003, 1e-4, 0.05, log_scale=True),
|
||||
ParamSpec("pendulum_damping", 0.0001, 1e-5, 0.01, log_scale=True),
|
||||
ParamSpec("motor_armature", 0.0001, 1e-5, 0.01, log_scale=True),
|
||||
ParamSpec("motor_frictionloss", 0.03, 0.001, 0.1, log_scale=True),
|
||||
ParamSpec("pendulum_mass", 0.03937, 0.010, 0.10, log_scale=True),
|
||||
ParamSpec("pendulum_com_x", 0.06025, 0.01, 0.15),
|
||||
ParamSpec("pendulum_com_y", -0.07602, -0.20, 0.0),
|
||||
ParamSpec("pendulum_com_z", -0.00346, -0.05, 0.05),
|
||||
ParamSpec("pendulum_ixx", 6.20e-05, 1e-07, 1e-03, log_scale=True),
|
||||
ParamSpec("pendulum_iyy", 3.70e-05, 1e-07, 1e-03, log_scale=True),
|
||||
ParamSpec("pendulum_izz", 7.83e-05, 1e-07, 1e-03, log_scale=True),
|
||||
ParamSpec("pendulum_ixy", -6.93e-06, -1e-03, 1e-03),
|
||||
# ── Pendulum joint dynamics ──────────────────────────────────
|
||||
ParamSpec("pendulum_damping", 0.0001, 1e-6, 0.05, log_scale=True),
|
||||
ParamSpec("pendulum_frictionloss", 0.0001, 1e-6, 0.05, log_scale=True),
|
||||
# ── Hardware realism (control pipeline) ────────────────────
|
||||
ParamSpec("ctrl_limit", 0.588, 0.45, 0.70), # MAX_MOTOR_SPEED / 255
|
||||
]
|
||||
|
||||
|
||||
# Extended set: full params + motor armature (compensates for the
|
||||
# motor-only sysid having captured arm/pendulum loading in armature).
|
||||
EXTENDED_PARAMS: list[ParamSpec] = ROTARY_CARTPOLE_PARAMS + [
|
||||
ParamSpec("motor_armature", 0.00277, 0.0005, 0.02, log_scale=True),
|
||||
]
|
||||
|
||||
|
||||
# Reduced set: only the 6 most impactful pendulum parameters.
|
||||
# Good for a fast first pass — converges in ~50 generations.
|
||||
REDUCED_PARAMS: list[ParamSpec] = [
|
||||
ParamSpec("pendulum_mass", 0.03937, 0.010, 0.10, log_scale=True),
|
||||
ParamSpec("pendulum_com_x", 0.06025, 0.01, 0.15),
|
||||
ParamSpec("pendulum_com_y", -0.07602, -0.20, 0.0),
|
||||
ParamSpec("pendulum_ixx", 6.20e-05, 1e-07, 1e-03, log_scale=True),
|
||||
ParamSpec("pendulum_damping", 0.0001, 1e-6, 0.05, log_scale=True),
|
||||
ParamSpec("pendulum_frictionloss", 0.0001, 1e-6, 0.05, log_scale=True),
|
||||
]
|
||||
|
||||
|
||||
PARAM_SETS: dict[str, list[ParamSpec]] = {
|
||||
"full": ROTARY_CARTPOLE_PARAMS,
|
||||
"extended": EXTENDED_PARAMS,
|
||||
"reduced": REDUCED_PARAMS,
|
||||
}
|
||||
|
||||
|
||||
def params_to_dict(
|
||||
values: np.ndarray, specs: list[ParamSpec] | None = None
|
||||
) -> dict[str, float]:
|
||||
@@ -97,182 +151,93 @@ def bounds_arrays(
|
||||
def _build_model(
|
||||
robot_path: Path,
|
||||
params: dict[str, float],
|
||||
) -> mujoco.MjModel:
|
||||
"""Build a MuJoCo model from URDF + robot.yaml with parameter overrides.
|
||||
motor_params: dict[str, float] | None = None,
|
||||
) -> tuple[mujoco.MjModel, ActuatorConfig]:
|
||||
"""Build a MuJoCo model with sysid overrides.
|
||||
|
||||
Follows the same two-step approach as ``MuJoCoRunner._load_model()``:
|
||||
1. Parse URDF, inject meshdir, load into MuJoCo
|
||||
2. Export MJCF, inject actuators + joint overrides + param overrides, reload
|
||||
Returns (model, actuator) — use ``actuator.transform_ctrl()`` and
|
||||
``actuator.compute_motor_force()`` in the rollout loop.
|
||||
"""
|
||||
if motor_params is None:
|
||||
motor_params = LOCKED_MOTOR_PARAMS
|
||||
|
||||
robot_path = Path(robot_path).resolve()
|
||||
|
||||
# ── Patch URDF inertial parameters to a temp file ────────────
|
||||
robot_yaml = yaml.safe_load((robot_path / "robot.yaml").read_text())
|
||||
urdf_path = robot_path / robot_yaml["urdf"]
|
||||
|
||||
# ── Step 1: Load URDF ────────────────────────────────────────
|
||||
tree = ET.parse(urdf_path)
|
||||
root = tree.getroot()
|
||||
patch_link_inertials(tree.getroot(), params)
|
||||
|
||||
# Inject meshdir compiler directive.
|
||||
meshdir = None
|
||||
for mesh_el in root.iter("mesh"):
|
||||
fn = mesh_el.get("filename", "")
|
||||
parent = str(Path(fn).parent)
|
||||
if parent and parent != ".":
|
||||
meshdir = parent
|
||||
break
|
||||
if meshdir:
|
||||
mj_ext = ET.SubElement(root, "mujoco")
|
||||
ET.SubElement(
|
||||
mj_ext, "compiler", attrib={"meshdir": meshdir, "balanceinertia": "true"}
|
||||
)
|
||||
fd, tmp_urdf = tempfile.mkstemp(
|
||||
suffix=".urdf", prefix="_sysid_", dir=str(robot_path),
|
||||
)
|
||||
os.close(fd)
|
||||
tmp_urdf_path = Path(tmp_urdf)
|
||||
tree.write(str(tmp_urdf_path), xml_declaration=True, encoding="unicode")
|
||||
|
||||
# Override URDF inertial parameters BEFORE MuJoCo loading.
|
||||
for link in root.iter("link"):
|
||||
link_name = link.get("name", "")
|
||||
inertial = link.find("inertial")
|
||||
if inertial is None:
|
||||
continue
|
||||
# ── Build RobotConfig with full motor sysid values ───────────
|
||||
gear_pos = motor_params.get("actuator_gear_pos", 0.424182)
|
||||
gear_neg = motor_params.get("actuator_gear_neg", 0.425031)
|
||||
motor_armature = params.get(
|
||||
"motor_armature",
|
||||
motor_params.get("motor_armature", 0.00277342),
|
||||
)
|
||||
pend_damping = params.get("pendulum_damping", 0.0001)
|
||||
pend_frictionloss = params.get("pendulum_frictionloss", 0.0001)
|
||||
|
||||
if link_name == "arm":
|
||||
_set_mass(inertial, params.get("arm_mass"))
|
||||
_set_com(
|
||||
inertial,
|
||||
params.get("arm_com_x"),
|
||||
params.get("arm_com_y"),
|
||||
params.get("arm_com_z"),
|
||||
)
|
||||
act_cfg = robot_yaml["actuators"][0]
|
||||
ctrl_lo, ctrl_hi = act_cfg.get("ctrl_range", [-1.0, 1.0])
|
||||
|
||||
elif link_name == "pendulum":
|
||||
_set_mass(inertial, params.get("pendulum_mass"))
|
||||
_set_com(
|
||||
inertial,
|
||||
params.get("pendulum_com_x"),
|
||||
params.get("pendulum_com_y"),
|
||||
params.get("pendulum_com_z"),
|
||||
)
|
||||
_set_inertia(
|
||||
inertial,
|
||||
ixx=params.get("pendulum_ixx"),
|
||||
iyy=params.get("pendulum_iyy"),
|
||||
izz=params.get("pendulum_izz"),
|
||||
ixy=params.get("pendulum_ixy"),
|
||||
)
|
||||
actuator = ActuatorConfig(
|
||||
joint=act_cfg["joint"],
|
||||
type="motor",
|
||||
gear=(gear_pos, gear_neg),
|
||||
ctrl_range=(ctrl_lo, ctrl_hi),
|
||||
deadzone=(
|
||||
motor_params.get("motor_deadzone_pos", 0.141),
|
||||
motor_params.get("motor_deadzone_neg", 0.078),
|
||||
),
|
||||
damping=(
|
||||
motor_params.get("motor_damping_pos", 0.002),
|
||||
motor_params.get("motor_damping_neg", 0.015),
|
||||
),
|
||||
frictionloss=(
|
||||
motor_params.get("motor_frictionloss_pos", 0.057),
|
||||
motor_params.get("motor_frictionloss_neg", 0.053),
|
||||
),
|
||||
filter_tau=motor_params.get("actuator_filter_tau", 0.005),
|
||||
viscous_quadratic=motor_params.get("viscous_quadratic", 0.000285),
|
||||
back_emf_gain=motor_params.get("back_emf_gain", 0.00676),
|
||||
)
|
||||
|
||||
robot = RobotConfig(
|
||||
urdf_path=tmp_urdf_path,
|
||||
actuators=[actuator],
|
||||
joints={
|
||||
"motor_joint": JointConfig(
|
||||
damping=0.0,
|
||||
armature=motor_armature,
|
||||
frictionloss=0.0,
|
||||
),
|
||||
"pendulum_joint": JointConfig(
|
||||
damping=pend_damping,
|
||||
frictionloss=pend_frictionloss,
|
||||
),
|
||||
},
|
||||
)
|
||||
|
||||
# Write temp URDF and load.
|
||||
tmp_urdf = robot_path / "_tmp_sysid_load.urdf"
|
||||
tree.write(str(tmp_urdf), xml_declaration=True, encoding="unicode")
|
||||
try:
|
||||
model_raw = mujoco.MjModel.from_xml_path(str(tmp_urdf))
|
||||
model = load_mujoco_model(robot)
|
||||
finally:
|
||||
tmp_urdf.unlink(missing_ok=True)
|
||||
tmp_urdf_path.unlink(missing_ok=True)
|
||||
|
||||
# ── Step 2: Export MJCF, inject actuators + overrides ────────
|
||||
tmp_mjcf = robot_path / "_tmp_sysid_inject.xml"
|
||||
try:
|
||||
mujoco.mj_saveLastXML(str(tmp_mjcf), model_raw)
|
||||
mjcf_root = ET.fromstring(tmp_mjcf.read_text())
|
||||
|
||||
# Actuator.
|
||||
gear = params.get("actuator_gear", robot_yaml["actuators"][0].get("gear", 0.064))
|
||||
filter_tau = params.get(
|
||||
"actuator_filter_tau",
|
||||
robot_yaml["actuators"][0].get("filter_tau", 0.03),
|
||||
)
|
||||
act_cfg = robot_yaml["actuators"][0]
|
||||
ctrl_lo, ctrl_hi = act_cfg.get("ctrl_range", [-1.0, 1.0])
|
||||
|
||||
act_elem = ET.SubElement(mjcf_root, "actuator")
|
||||
attribs: dict[str, str] = {
|
||||
"name": f"{act_cfg['joint']}_motor",
|
||||
"joint": act_cfg["joint"],
|
||||
"gear": str(gear),
|
||||
"ctrlrange": f"{ctrl_lo} {ctrl_hi}",
|
||||
}
|
||||
if filter_tau > 0:
|
||||
attribs["dyntype"] = "filter"
|
||||
attribs["dynprm"] = str(filter_tau)
|
||||
attribs["gaintype"] = "fixed"
|
||||
attribs["biastype"] = "none"
|
||||
ET.SubElement(act_elem, "general", attrib=attribs)
|
||||
else:
|
||||
ET.SubElement(act_elem, "motor", attrib=attribs)
|
||||
|
||||
# Joint overrides.
|
||||
motor_damping = params.get("motor_damping", 0.003)
|
||||
pend_damping = params.get("pendulum_damping", 0.0001)
|
||||
motor_armature = params.get("motor_armature", 0.0001)
|
||||
motor_frictionloss = params.get("motor_frictionloss", 0.03)
|
||||
|
||||
for body in mjcf_root.iter("body"):
|
||||
for jnt in body.findall("joint"):
|
||||
name = jnt.get("name")
|
||||
if name == "motor_joint":
|
||||
jnt.set("damping", str(motor_damping))
|
||||
jnt.set("armature", str(motor_armature))
|
||||
jnt.set("frictionloss", str(motor_frictionloss))
|
||||
elif name == "pendulum_joint":
|
||||
jnt.set("damping", str(pend_damping))
|
||||
|
||||
# Disable self-collision.
|
||||
for geom in mjcf_root.iter("geom"):
|
||||
geom.set("contype", "0")
|
||||
geom.set("conaffinity", "0")
|
||||
|
||||
modified_xml = ET.tostring(mjcf_root, encoding="unicode")
|
||||
tmp_mjcf.write_text(modified_xml)
|
||||
model = mujoco.MjModel.from_xml_path(str(tmp_mjcf))
|
||||
finally:
|
||||
tmp_mjcf.unlink(missing_ok=True)
|
||||
|
||||
return model
|
||||
return model, actuator
|
||||
|
||||
|
||||
def _set_mass(inertial: ET.Element, mass: float | None) -> None:
|
||||
if mass is None:
|
||||
return
|
||||
mass_el = inertial.find("mass")
|
||||
if mass_el is not None:
|
||||
mass_el.set("value", str(mass))
|
||||
|
||||
|
||||
def _set_com(
|
||||
inertial: ET.Element,
|
||||
x: float | None,
|
||||
y: float | None,
|
||||
z: float | None,
|
||||
) -> None:
|
||||
origin = inertial.find("origin")
|
||||
if origin is None:
|
||||
return
|
||||
xyz = origin.get("xyz", "0 0 0").split()
|
||||
if x is not None:
|
||||
xyz[0] = str(x)
|
||||
if y is not None:
|
||||
xyz[1] = str(y)
|
||||
if z is not None:
|
||||
xyz[2] = str(z)
|
||||
origin.set("xyz", " ".join(xyz))
|
||||
|
||||
|
||||
def _set_inertia(
|
||||
inertial: ET.Element,
|
||||
ixx: float | None = None,
|
||||
iyy: float | None = None,
|
||||
izz: float | None = None,
|
||||
ixy: float | None = None,
|
||||
iyz: float | None = None,
|
||||
ixz: float | None = None,
|
||||
) -> None:
|
||||
ine = inertial.find("inertia")
|
||||
if ine is None:
|
||||
return
|
||||
for attr, val in [
|
||||
("ixx", ixx), ("iyy", iyy), ("izz", izz),
|
||||
("ixy", ixy), ("iyz", iyz), ("ixz", ixz),
|
||||
]:
|
||||
if val is not None:
|
||||
ine.set(attr, str(val))
|
||||
|
||||
|
||||
# ── Simulation rollout ───────────────────────────────────────────────
|
||||
|
||||
@@ -281,72 +246,58 @@ def rollout(
|
||||
robot_path: str | Path,
|
||||
params: dict[str, float],
|
||||
actions: np.ndarray,
|
||||
timesteps: np.ndarray,
|
||||
sim_dt: float = 0.002,
|
||||
substeps: int = 10,
|
||||
motor_params: dict[str, float] | None = None,
|
||||
) -> dict[str, np.ndarray]:
|
||||
"""Replay recorded actions in MuJoCo with overridden parameters.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
robot_path : asset directory
|
||||
params : named parameter overrides
|
||||
params : named parameter overrides (pendulum/arm only)
|
||||
actions : (N,) normalised actions [-1, 1] from the recording
|
||||
timesteps : (N,) wall-clock times (seconds) from the recording
|
||||
sim_dt : MuJoCo physics timestep
|
||||
substeps : physics substeps per control step
|
||||
motor_params : locked motor params (default: LOCKED_MOTOR_PARAMS)
|
||||
|
||||
Returns
|
||||
-------
|
||||
dict with keys: motor_angle, motor_vel, pendulum_angle, pendulum_vel
|
||||
Each is an (N,) numpy array of simulated values.
|
||||
"""
|
||||
if motor_params is None:
|
||||
motor_params = LOCKED_MOTOR_PARAMS
|
||||
|
||||
robot_path = Path(robot_path).resolve()
|
||||
model = _build_model(robot_path, params)
|
||||
model, actuator = _build_model(robot_path, params, motor_params)
|
||||
model.opt.timestep = sim_dt
|
||||
data = mujoco.MjData(model)
|
||||
|
||||
# Start from pendulum hanging down (qpos=0 is down per URDF convention).
|
||||
mujoco.mj_resetData(model, data)
|
||||
|
||||
# Control dt derived from actual recording sample rate.
|
||||
n = len(actions)
|
||||
ctrl_dt = sim_dt * substeps
|
||||
ctrl_limit = params.get("ctrl_limit", 0.588)
|
||||
|
||||
# Pre-allocate output.
|
||||
sim_motor_angle = np.zeros(n, dtype=np.float64)
|
||||
sim_motor_vel = np.zeros(n, dtype=np.float64)
|
||||
sim_pend_angle = np.zeros(n, dtype=np.float64)
|
||||
sim_pend_vel = np.zeros(n, dtype=np.float64)
|
||||
|
||||
# Extract actuator limit info for software limit switch.
|
||||
nu = model.nu
|
||||
if nu > 0:
|
||||
jnt_id = model.actuator_trnid[0, 0]
|
||||
jnt_limited = bool(model.jnt_limited[jnt_id])
|
||||
jnt_lo = model.jnt_range[jnt_id, 0]
|
||||
jnt_hi = model.jnt_range[jnt_id, 1]
|
||||
gear_sign = float(np.sign(model.actuator_gear[0, 0]))
|
||||
else:
|
||||
jnt_limited = False
|
||||
jnt_lo = jnt_hi = gear_sign = 0.0
|
||||
limits = ActuatorLimits(model)
|
||||
|
||||
for i in range(n):
|
||||
data.ctrl[0] = actions[i]
|
||||
action = max(-ctrl_limit, min(ctrl_limit, float(actions[i])))
|
||||
ctrl = actuator.transform_ctrl(action)
|
||||
data.ctrl[0] = ctrl
|
||||
|
||||
for _ in range(substeps):
|
||||
# Software limit switch (mirrors MuJoCoRunner).
|
||||
if jnt_limited and nu > 0:
|
||||
pos = data.qpos[jnt_id]
|
||||
if pos >= jnt_hi and gear_sign * data.ctrl[0] > 0:
|
||||
data.ctrl[0] = 0.0
|
||||
elif pos <= jnt_lo and gear_sign * data.ctrl[0] < 0:
|
||||
data.ctrl[0] = 0.0
|
||||
limits.enforce(model, data)
|
||||
data.qfrc_applied[0] = actuator.compute_motor_force(data.qvel[0], ctrl)
|
||||
mujoco.mj_step(model, data)
|
||||
|
||||
sim_motor_angle[i] = data.qpos[0]
|
||||
sim_motor_vel[i] = data.qvel[0]
|
||||
sim_pend_angle[i] = data.qpos[1]
|
||||
sim_motor_vel[i] = data.qvel[0]
|
||||
sim_pend_vel[i] = data.qvel[1]
|
||||
|
||||
return {
|
||||
@@ -364,6 +315,7 @@ def windowed_rollout(
|
||||
window_duration: float = 0.5,
|
||||
sim_dt: float = 0.002,
|
||||
substeps: int = 10,
|
||||
motor_params: dict[str, float] | None = None,
|
||||
) -> dict[str, np.ndarray | float]:
|
||||
"""Multiple-shooting rollout — split recording into short windows.
|
||||
|
||||
@@ -372,18 +324,19 @@ def windowed_rollout(
|
||||
2. Replay the recorded actions within the window.
|
||||
3. Record the simulated output.
|
||||
|
||||
This prevents error accumulation across the full trajectory, giving
|
||||
a much smoother cost landscape for the optimizer.
|
||||
Motor dynamics (asymmetric friction, damping, back-EMF, etc.) are
|
||||
applied via qfrc_applied using the locked motor sysid parameters.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
robot_path : asset directory
|
||||
params : named parameter overrides
|
||||
params : named parameter overrides (pendulum/arm only)
|
||||
recording : dict with keys time, action, motor_angle, motor_vel,
|
||||
pendulum_angle, pendulum_vel (all 1D arrays of length N)
|
||||
window_duration : length of each shooting window in seconds
|
||||
sim_dt : MuJoCo physics timestep
|
||||
substeps : physics substeps per control step
|
||||
motor_params : locked motor params (default: LOCKED_MOTOR_PARAMS)
|
||||
|
||||
Returns
|
||||
-------
|
||||
@@ -392,8 +345,11 @@ def windowed_rollout(
|
||||
(stitched from per-window simulations)
|
||||
n_windows — number of windows used
|
||||
"""
|
||||
if motor_params is None:
|
||||
motor_params = LOCKED_MOTOR_PARAMS
|
||||
|
||||
robot_path = Path(robot_path).resolve()
|
||||
model = _build_model(robot_path, params)
|
||||
model, actuator = _build_model(robot_path, params, motor_params)
|
||||
model.opt.timestep = sim_dt
|
||||
data = mujoco.MjData(model)
|
||||
|
||||
@@ -405,67 +361,50 @@ def windowed_rollout(
|
||||
real_pend_vel = recording["pendulum_vel"]
|
||||
n = len(actions)
|
||||
|
||||
# Pre-allocate output (stitched from all windows).
|
||||
sim_motor_angle = np.zeros(n, dtype=np.float64)
|
||||
sim_motor_vel = np.zeros(n, dtype=np.float64)
|
||||
sim_pend_angle = np.zeros(n, dtype=np.float64)
|
||||
sim_pend_vel = np.zeros(n, dtype=np.float64)
|
||||
|
||||
# Extract actuator limit info.
|
||||
nu = model.nu
|
||||
if nu > 0:
|
||||
jnt_id = model.actuator_trnid[0, 0]
|
||||
jnt_limited = bool(model.jnt_limited[jnt_id])
|
||||
jnt_lo = model.jnt_range[jnt_id, 0]
|
||||
jnt_hi = model.jnt_range[jnt_id, 1]
|
||||
gear_sign = float(np.sign(model.actuator_gear[0, 0]))
|
||||
else:
|
||||
jnt_limited = False
|
||||
jnt_lo = jnt_hi = gear_sign = 0.0
|
||||
limits = ActuatorLimits(model)
|
||||
|
||||
# Compute window boundaries from recording timestamps.
|
||||
t0 = times[0]
|
||||
t_end = times[-1]
|
||||
window_starts: list[int] = [] # indices into the recording
|
||||
window_starts: list[int] = []
|
||||
current_t = t0
|
||||
while current_t < t_end:
|
||||
# Find the index closest to current_t.
|
||||
idx = int(np.searchsorted(times, current_t))
|
||||
idx = min(idx, n - 1)
|
||||
window_starts.append(idx)
|
||||
current_t += window_duration
|
||||
|
||||
ctrl_limit = params.get("ctrl_limit", 0.588)
|
||||
n_windows = len(window_starts)
|
||||
|
||||
for w, w_start in enumerate(window_starts):
|
||||
# Window end: next window start, or end of recording.
|
||||
w_end = window_starts[w + 1] if w + 1 < n_windows else n
|
||||
|
||||
# Initialize MuJoCo state from real data at window start.
|
||||
mujoco.mj_resetData(model, data)
|
||||
data.qpos[0] = real_motor[w_start]
|
||||
data.qpos[1] = real_pend[w_start]
|
||||
data.qvel[0] = real_motor_vel[w_start]
|
||||
data.qvel[1] = real_pend_vel[w_start]
|
||||
data.ctrl[:] = 0.0
|
||||
# Forward kinematics to make state consistent.
|
||||
mujoco.mj_forward(model, data)
|
||||
|
||||
for i in range(w_start, w_end):
|
||||
data.ctrl[0] = actions[i]
|
||||
action = max(-ctrl_limit, min(ctrl_limit, float(actions[i])))
|
||||
ctrl = actuator.transform_ctrl(action)
|
||||
data.ctrl[0] = ctrl
|
||||
|
||||
for _ in range(substeps):
|
||||
if jnt_limited and nu > 0:
|
||||
pos = data.qpos[jnt_id]
|
||||
if pos >= jnt_hi and gear_sign * data.ctrl[0] > 0:
|
||||
data.ctrl[0] = 0.0
|
||||
elif pos <= jnt_lo and gear_sign * data.ctrl[0] < 0:
|
||||
data.ctrl[0] = 0.0
|
||||
limits.enforce(model, data)
|
||||
data.qfrc_applied[0] = actuator.compute_motor_force(data.qvel[0], ctrl)
|
||||
mujoco.mj_step(model, data)
|
||||
|
||||
sim_motor_angle[i] = data.qpos[0]
|
||||
sim_motor_vel[i] = data.qvel[0]
|
||||
sim_pend_angle[i] = data.qpos[1]
|
||||
sim_motor_vel[i] = data.qvel[0]
|
||||
sim_pend_vel[i] = data.qvel[1]
|
||||
|
||||
return {
|
||||
|
||||
@@ -20,7 +20,6 @@ from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import math
|
||||
from pathlib import Path
|
||||
|
||||
import numpy as np
|
||||
@@ -29,6 +28,30 @@ import structlog
|
||||
log = structlog.get_logger()
|
||||
|
||||
|
||||
def _run_sim(
|
||||
robot_path: Path,
|
||||
params: dict[str, float],
|
||||
recording: dict[str, np.ndarray],
|
||||
window_duration: float,
|
||||
sim_dt: float,
|
||||
substeps: int,
|
||||
motor_params: dict[str, float],
|
||||
) -> dict[str, np.ndarray]:
|
||||
"""Run windowed or open-loop rollout depending on window_duration."""
|
||||
from src.sysid.rollout import rollout, windowed_rollout
|
||||
|
||||
if window_duration > 0:
|
||||
return windowed_rollout(
|
||||
robot_path=robot_path, params=params, recording=recording,
|
||||
window_duration=window_duration, sim_dt=sim_dt,
|
||||
substeps=substeps, motor_params=motor_params,
|
||||
)
|
||||
return rollout(
|
||||
robot_path=robot_path, params=params, actions=recording["action"],
|
||||
substeps=substeps, motor_params=motor_params,
|
||||
)
|
||||
|
||||
|
||||
def visualize(
|
||||
robot_path: str | Path,
|
||||
recording_path: str | Path,
|
||||
@@ -39,31 +62,27 @@ def visualize(
|
||||
save_path: str | Path | None = None,
|
||||
show: bool = True,
|
||||
) -> None:
|
||||
"""Generate comparison plot.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
robot_path : robot asset directory
|
||||
recording_path : .npz file from capture
|
||||
result_path : sysid_result.json with best_params (optional)
|
||||
sim_dt / substeps : physics settings for rollout
|
||||
window_duration : shooting window length (s); 0 = open-loop
|
||||
save_path : if provided, save figure to this path (PNG, PDF, …)
|
||||
show : if True, display interactive matplotlib window
|
||||
"""
|
||||
"""Generate comparison plot."""
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
from src.sysid.rollout import (
|
||||
LOCKED_MOTOR_PARAMS,
|
||||
ROTARY_CARTPOLE_PARAMS,
|
||||
defaults_vector,
|
||||
params_to_dict,
|
||||
rollout,
|
||||
windowed_rollout,
|
||||
)
|
||||
|
||||
robot_path = Path(robot_path).resolve()
|
||||
recording = dict(np.load(recording_path))
|
||||
|
||||
motor_params = LOCKED_MOTOR_PARAMS
|
||||
|
||||
sim_kwargs = dict(
|
||||
robot_path=robot_path, recording=recording,
|
||||
window_duration=window_duration, sim_dt=sim_dt,
|
||||
substeps=substeps, motor_params=motor_params,
|
||||
)
|
||||
|
||||
t = recording["time"]
|
||||
actions = recording["action"]
|
||||
|
||||
@@ -72,26 +91,15 @@ def visualize(
|
||||
defaults_vector(ROTARY_CARTPOLE_PARAMS), ROTARY_CARTPOLE_PARAMS
|
||||
)
|
||||
log.info("simulating_default_params", windowed=window_duration > 0)
|
||||
if window_duration > 0:
|
||||
sim_default = windowed_rollout(
|
||||
robot_path=robot_path,
|
||||
params=default_params,
|
||||
recording=recording,
|
||||
window_duration=window_duration,
|
||||
sim_dt=sim_dt,
|
||||
substeps=substeps,
|
||||
)
|
||||
else:
|
||||
sim_default = rollout(
|
||||
robot_path=robot_path,
|
||||
params=default_params,
|
||||
actions=actions,
|
||||
timesteps=t,
|
||||
sim_dt=sim_dt,
|
||||
substeps=substeps,
|
||||
)
|
||||
sim_default = _run_sim(params=default_params, **sim_kwargs)
|
||||
|
||||
# ── Simulate with tuned parameters (if available) ────────────
|
||||
# Resolve result path (explicit or auto-detect).
|
||||
if result_path is None:
|
||||
auto = robot_path / "sysid_result.json"
|
||||
if auto.exists():
|
||||
result_path = auto
|
||||
|
||||
sim_tuned = None
|
||||
tuned_cost = None
|
||||
if result_path is not None:
|
||||
@@ -101,64 +109,21 @@ def visualize(
|
||||
tuned_params = result.get("best_params", {})
|
||||
tuned_cost = result.get("best_cost")
|
||||
log.info("simulating_tuned_params", cost=tuned_cost)
|
||||
if window_duration > 0:
|
||||
sim_tuned = windowed_rollout(
|
||||
robot_path=robot_path,
|
||||
params=tuned_params,
|
||||
recording=recording,
|
||||
window_duration=window_duration,
|
||||
sim_dt=sim_dt,
|
||||
substeps=substeps,
|
||||
)
|
||||
else:
|
||||
sim_tuned = rollout(
|
||||
robot_path=robot_path,
|
||||
params=tuned_params,
|
||||
actions=actions,
|
||||
timesteps=t,
|
||||
sim_dt=sim_dt,
|
||||
substeps=substeps,
|
||||
)
|
||||
sim_tuned = _run_sim(params=tuned_params, **sim_kwargs)
|
||||
else:
|
||||
log.warning("result_file_not_found", path=str(result_path))
|
||||
else:
|
||||
# Auto-detect sysid_result.json in robot_path.
|
||||
auto_result = robot_path / "sysid_result.json"
|
||||
if auto_result.exists():
|
||||
result = json.loads(auto_result.read_text())
|
||||
tuned_params = result.get("best_params", {})
|
||||
tuned_cost = result.get("best_cost")
|
||||
log.info("auto_detected_tuned_params", cost=tuned_cost)
|
||||
if window_duration > 0:
|
||||
sim_tuned = windowed_rollout(
|
||||
robot_path=robot_path,
|
||||
params=tuned_params,
|
||||
recording=recording,
|
||||
window_duration=window_duration,
|
||||
sim_dt=sim_dt,
|
||||
substeps=substeps,
|
||||
)
|
||||
else:
|
||||
sim_tuned = rollout(
|
||||
robot_path=robot_path,
|
||||
params=tuned_params,
|
||||
actions=actions,
|
||||
timesteps=t,
|
||||
sim_dt=sim_dt,
|
||||
substeps=substeps,
|
||||
)
|
||||
|
||||
# ── Plot ─────────────────────────────────────────────────────
|
||||
fig, axes = plt.subplots(5, 1, figsize=(14, 12), sharex=True)
|
||||
|
||||
channels = [
|
||||
("motor_angle", "Motor Angle (rad)", True),
|
||||
("motor_vel", "Motor Velocity (rad/s)", False),
|
||||
("pendulum_angle", "Pendulum Angle (rad)", True),
|
||||
("pendulum_vel", "Pendulum Velocity (rad/s)", False),
|
||||
("motor_angle", "Motor Angle (rad)"),
|
||||
("motor_vel", "Motor Velocity (rad/s)"),
|
||||
("pendulum_angle", "Pendulum Angle (rad)"),
|
||||
("pendulum_vel", "Pendulum Velocity (rad/s)"),
|
||||
]
|
||||
|
||||
for ax, (key, ylabel, is_angle) in zip(axes[:4], channels):
|
||||
for ax, (key, ylabel) in zip(axes[:4], channels):
|
||||
real = recording[key]
|
||||
|
||||
ax.plot(t, real, "k-", linewidth=1.2, alpha=0.8, label="Real")
|
||||
@@ -207,6 +172,7 @@ def visualize(
|
||||
sim_dt=sim_dt,
|
||||
substeps=substeps,
|
||||
window_duration=window_duration,
|
||||
motor_params=motor_params,
|
||||
)
|
||||
title += f"\nOriginal cost: {orig_cost:.4f} → Tuned cost: {tuned_cost:.4f}"
|
||||
improvement = (1.0 - tuned_cost / orig_cost) * 100 if orig_cost > 0 else 0
|
||||
|
||||
@@ -8,7 +8,6 @@ import structlog
|
||||
import torch
|
||||
import tqdm
|
||||
from clearml import Logger
|
||||
from gymnasium import spaces
|
||||
from skrl.agents.torch.ppo import PPO, PPO_DEFAULT_CONFIG
|
||||
from skrl.memories.torch import RandomMemory
|
||||
from skrl.resources.preprocessors.torch import RunningStandardScaler
|
||||
@@ -129,24 +128,17 @@ class VideoRecordingTrainer(SequentialTrainer):
|
||||
frames: list[np.ndarray] = []
|
||||
|
||||
obs, _ = self.env.reset()
|
||||
for _ in range(max_steps):
|
||||
with torch.no_grad():
|
||||
with torch.no_grad():
|
||||
for _ in range(max_steps):
|
||||
action = self.agents.act(obs, timestep=timestep, timesteps=self.timesteps)[0]
|
||||
obs, _, terminated, truncated, _ = self.env.step(action)
|
||||
obs, _, terminated, truncated, _ = self.env.step(action)
|
||||
|
||||
try:
|
||||
frame = self.env.render()
|
||||
except Exception:
|
||||
# Headless environment without OpenGL — skip video recording
|
||||
log.warning("video_recording_disabled", reason="render failed (headless?)")
|
||||
self.env.reset()
|
||||
return
|
||||
if frame is not None:
|
||||
frames.append(frame)
|
||||
|
||||
if frame is not None:
|
||||
frames.append(frame)
|
||||
|
||||
if (terminated | truncated).any().item():
|
||||
break
|
||||
if (terminated | truncated).any().item():
|
||||
break
|
||||
|
||||
if frames:
|
||||
path = str(self._video_dir / f"step_{timestep}.mp4")
|
||||
@@ -159,7 +151,6 @@ class VideoRecordingTrainer(SequentialTrainer):
|
||||
local_path=path, iteration=timestep,
|
||||
)
|
||||
|
||||
# Restore training state
|
||||
self.env.reset()
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user