From ca0e7b8b0355964eabddbc072fe37de192ce2d09 Mon Sep 17 00:00:00 2001 From: Victor Mylle Date: Sun, 22 Mar 2026 15:49:13 +0100 Subject: [PATCH] :sparkles: clean up lot of stuff --- .gitignore | 24 +- assets/rotary_cartpole/hardware.yaml | 2 +- .../recordings/capture_20260311_215608.npz | Bin 27620 -> 0 bytes assets/rotary_cartpole/robot.yaml | 23 +- assets/rotary_cartpole/rotary_cartpole.urdf | 10 +- assets/rotary_cartpole/sysid_result.json | 70 --- configs/env/rotary_cartpole.yaml | 13 +- configs/runner/mjx.yaml | 2 +- configs/runner/mujoco.yaml | 2 +- configs/runner/serial.yaml | 3 +- configs/sysid.yaml | 15 +- configs/training/ppo_real.yaml | 14 +- configs/training/ppo_single.yaml | 2 +- scripts/eval.py | 379 +++++++++++++++ scripts/motor_sysid.py | 64 +++ src/core/env.py | 13 +- src/core/hardware.py | 91 ---- src/core/robot.py | 105 +++- src/core/runner.py | 57 ++- src/envs/rotary_cartpole.py | 74 ++- src/runners/mjx.py | 95 +++- src/runners/mujoco.py | 451 ++++++++--------- src/runners/serial.py | 319 +++++------- src/sysid/_urdf.py | 79 +++ src/sysid/capture.py | 231 +++++---- src/sysid/export.py | 182 +++---- src/sysid/motor/__init__.py | 5 + src/sysid/motor/capture.py | 364 ++++++++++++++ src/sysid/motor/export.py | 186 +++++++ src/sysid/motor/optimize.py | 367 ++++++++++++++ src/sysid/motor/preprocess.py | 114 +++++ src/sysid/motor/rollout.py | 460 ++++++++++++++++++ src/sysid/motor/visualize.py | 204 ++++++++ src/sysid/optimize.py | 246 +++++++++- src/sysid/rollout.py | 417 +++++++--------- src/sysid/visualize.py | 130 ++--- src/training/trainer.py | 23 +- 37 files changed, 3613 insertions(+), 1223 deletions(-) delete mode 100644 assets/rotary_cartpole/recordings/capture_20260311_215608.npz delete mode 100644 assets/rotary_cartpole/sysid_result.json create mode 100644 scripts/eval.py create mode 100644 scripts/motor_sysid.py delete mode 100644 src/core/hardware.py create mode 100644 src/sysid/_urdf.py create mode 100644 src/sysid/motor/__init__.py create mode 100644 src/sysid/motor/capture.py create mode 100644 src/sysid/motor/export.py create mode 100644 src/sysid/motor/optimize.py create mode 100644 src/sysid/motor/preprocess.py create mode 100644 src/sysid/motor/rollout.py create mode 100644 src/sysid/motor/visualize.py diff --git a/.gitignore b/.gitignore index 25335e9..3fe1d68 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,23 @@ -outputs/ +# IDE / editor .vscode/ -runs/ \ No newline at end of file + +# Training & HPO outputs +outputs/ +runs/ +smac3_output/ + +# MuJoCo +MUJOCO_LOG.TXT + +# Python +__pycache__/ +*.pyc +*.pyo +*.egg-info/ +.eggs/ +dist/ +build/ + +# Temp files +*.stl +*.scad \ No newline at end of file diff --git a/assets/rotary_cartpole/hardware.yaml b/assets/rotary_cartpole/hardware.yaml index a968c91..1c5ccf7 100644 --- a/assets/rotary_cartpole/hardware.yaml +++ b/assets/rotary_cartpole/hardware.yaml @@ -10,7 +10,7 @@ encoder: # counts_per_rev = ppr × gear_ratio × 4 (quadrature) = 1320 safety: - max_motor_angle_deg: 90.0 # hard termination limit (0 = disabled) + max_motor_angle_deg: 90.0 # hard termination limit (physical endstop ~70-80°) soft_limit_deg: 40.0 # progressive penalty ramp starts here reset: diff --git a/assets/rotary_cartpole/recordings/capture_20260311_215608.npz b/assets/rotary_cartpole/recordings/capture_20260311_215608.npz deleted file mode 100644 index 850edaeba3bf6b5a306a76f487d7f54d1071534e..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 27620 zcmZs?V~i$D^zGX=rfu6co@txYc2CskY6A$1pgiRuW(TRXU|@F z10-E-K-+I7T(+a2lC9AM|I%y-qQ($gD^y7sPN2^>4mXay9NLt`Rb~-0sjjjxY!DSO zreVOAd73oq`RQlt?b6RZ;Ge$gU<62RS((`!^o^sAqLSY1ow-F(_XZNzy6I@^1rq~5 z>F07!j}IJEPWQq)(lc9gsS2-8j*c(JM&Wq1msHVJVGM(QUScDI?;=-~i)Z}4UXo?O zLuSI2$gzFLvk{DdVuA+PO7Sj{U^Bip?h)_(<4%YOxnI+UR;K2sg(=Gs^hLBUaNiLX z77{j5|~zJvZ<}39qI?rT=}s_zhkuz?Eadlv@*TwWES5dMS40ns zc=@K1P@ikmaE5*(Y)#Z1I?eE5U&NI&&)ENEq=A6KNe^S%{k}-;%74bb_#?^m?#7U9 zSQ2*0!tEf{9g^+T%YKHUY#DgRkhRKJbqhVS*PHTdl%{hYw7FIj?VI&7 z(NjL?d4VyDK7^hgRy?#-7}6(~%zi)r#{~QF>o1QEj*QO;K0xLhypu-cz6!x-%6Egg zn+^*>)mLIGQ>C@ke{7gfZ4=|x_)yPoXxGP4b(%g#~aFLCI;wFd-4PGJ*XHf zrikn)ed<1t#~Ukp%7p&|)8YDrBr@WMsB@{Y;+@TF44}P$nHp@>@fWLnqBHRL^4A+p za1SxTxTeY|g0Z^K#rK0a2>x|?WvPQ8cX`2^pA*hV&;pN~?#R6PXgLb~EKoKPfLbbx zYbZ1lYN2uMEMd%XZI!eS>hnXXA9&f&j%+xlawVbR+!C6I_CNu&4i+=vmDszy9;Hr; zS)v&P1&AqInHB+xc+K1 zcp=(>GUvW#gG|Id4r;?$Ket&9z0_=9$r}ca5caPvZVW1#pIqV+h7iJqE|GL!M3Xg1 zaWaP1Fi_s?GU;9b>W&AMJoG{eN(ZcW(UiX-(~Mc1;gPUtn+C)CS9vt9+NsAvKjhyt zMKWihxrlYU&T%GXYk$C+pYR~x>IvILu#VB?-TT5Xs={X{+#krwd`8T$*FVB*Pz_G; zKjaaf**$TDgx=>wX6bAmEU4ccsPQQ;C!_=C7#NFhN8Ev2S;!7Vf%r*pVrA&~|5`C` zGL0O_s$!Am0|bsP)oxV{8|WtKmEimeFUI$GXEiJNN1NZQFtTLFZB3qK){y(I(i1KN z+<{G@1#!`CowP-j0G0r(7hHmXkR#nvVwP=oFnRaM;z7XL#dV*eN&iaSrSdiQXCG}N z#Kc&#_YZ4Vx)m+B)3x_8MIS7BZZ6UeMXFFE^da z529LmdL29qxtLqTfW{v$Rd;X0MVJppzTTQ-(w;+6W(_h{}N(q4v%^#g0M4*Ph1_3+hwtI5?xNBI&dP z`InsEA~IJibpo9ZQ9Ql_wy^RU)3@PmiU1@*a*h+> zo{R|Ewh4Cyx+c4id1U=F-Rxj|9zOMOBfta4{%ld(B8;b*oghSR+f!~pBckA*;yp1C z7oDk++8;ll#>M`uK?C{Hkl+m=5II{bgs2Jgb>*O|y0qC~`a9RlfSO|#LwnNd z0bGelkktlTN+fC2jeCQ2zMqdeDgOt^$2B%y5Gg8%H42BCWOgO@WFKjan2g+2$SI#; zq<)Nn`UU2~_dne9I1>ti*YH#_dkG`!E0b>$P9u3=b@!;w-gjvVa4#^8ss19UGFpnt zt^eS*y${ZY*{!UIosqDjLi?*DRO4Q&3Lw|LHI)ANFo}QHb$^KmUK4x9c@YQ^u#7vK_qVv`M0WFstAv-jsiHe zOubX4*G}455EBxy@rHe`0&q@SCgG+U)(6{=Ks9v5J@*5B+()0f7dq?klG#0dN4!|} z9Fc(%7yGARZi~}Q+iKVixJ1M}1KnIm1l3goc&g}Fp22z!6Fp|GcPx}~SDav>*tl@NOZ)hQtQ|2yxw1!VPHb2L~ZcO_FeI;$`_oX?U2;I&jNMzj%QsE8C zDQ0YAOdRkZZ?8-Q0SQ${gMnRD)Wh;u$&h_I%-DSUF%>Fr%rf!d?S^l5^PZbIghge2 zEx#-iQ)^c4nGtlt*Nf*c7}*=8)DWBQyYbm1)?(O~Vi)b-ON5}>uRP|xY*qU*PXT(Y z-J;w$)F@DPy8M{n61iJaA&)1KYs^&>SrDy6Q&em#Tiinajb2ZghVBGJ7&(et3W0$; zj;6aFmHm)qjb7jzl-{N7G5mElGUYIvJ(ij5^m}XlTOD<0#n89Ra7K7NM7p z0`E7&G9i4KIkho-XZvOL-flY7>CJPZTlM5qvIsvAPpXt_+tPulHqAD~%}+a*P@PQmr%2x-#+nz4TXDgnhMcw zb~A|tC>l+tVrI~})i6oF;jIWHUp9?LUe@Z`~mo%X~=g5x^TJr=^H z`%lu|em)oh{y~Ojb{DxD_TG1W1v2v{MKOPD_gv|iSNpPOKwM9XQ!J#X!Ir;{;}d{l z?(=~Mis?PC7P3KEz)muC-3^-6UiB9iUSW7`0R0Td%)PNAY1G-17G1AwHrju#9A#{l z@!W$`#iKO>B6C?I-q6b(7qTYy+AiQPLxk?_;~l)p>*}ijqV3e|C!EW6t;wIi+@!!D zXxoTUO#c#>K?qYH&q7vbSe_bK5qdtv-9=zSN*tr_7^W-XsZ+?hIw9KLQGc@HtD_DV zlwN_ZY{*{;+{Qi6r6qFgj1#Gld&n1RW}z!*EPl&%@!+IVNVZwyjZ?5m;?X*qgU)A{ z6aR^;A=23m_Xv<3gfbctW%)45#hxWgkVelHEn%LFP=-ZX-~CBvjZ5+(BAEL?q0Z7l zgBmz1*3S}~48%Z}l1P$+A9B{H({6G9Ik)oc*~e0R3~@)}1%sP~a-1^tB8AY5y>x3@ zgdGQl`@V^cYzXS5R(TxXD5u#VL#}%S1K=bTynB=M7^d>VG@poJxhYfIi+>mq{j4Lr z(elK_-@N$!cODY?4QCKQc zxEPXk1wy+m)WCJuCBw}^&>2TKl;Y7GG<@3wXg{X>CEp$b8wXuCvD_CE z%J!dCG^j@YMkC_RomyjRHoH{tL068ryt_)DcX<}0riu(U0( z&i!!XD71^u-?30`Mc%poEU=wt2xN@mH)g6vhp2xcp!wf-n?S665~wGuh^o*-ttPgm z`+ZT~Dci0Sf>0lO4I3cgT;9$5Ab+e0UaKnQat(vdGJA)ai(30|pH|ybz~fk{Y>@#e zr@TVF&5YOfyu42kn<>gC(H;jP`qLyg4!`(f@rQ=6!o9!xQp~Zb%+O*xbPn3d(-adk zkR(Qzxb|eSx4j=#bMzPkNb3_!%1VpUHbr`PuXUrVlrlBp8*Q{NNNjeL$v*euR{VQj zR)EjHA56*x7l~Zf5C$d#P8FDiYYg!Z5WVPn$vW4cCkJC^PEwQrm-OQu$?*rQkK8T$ zos%vn;)0umbbkD4nluQx`nl*5y5!PHAOf{oV*_xIYYev#C$@->PdSXULOq{1Rz7Wc z-R4p6$e7aPJ>I2i)brY=n5a$EFt=;J-`CZ)57p*?JgvFPXO(XUc-Z$O2ueLHsXm_2?D1v8s85C@meh z+h6MCnBU0_HK9*dK6rC0rH%)X_sB8tNQb7K^@Q9BE>*~;5M~H}y-#Xl6mI#6ULeln z6IFNeJ9jrc7G>CvQ{v%IL^WDja&wWO*$?dZOh*&0+%!R!eP}U4Vs-Q)MH*}AlxGvP zr7*ud16-v?^(6kb5<_oqmj%k|z%7S@<~~I2wcM2+-=^$`aYX6w3Bb&(SgeYu4i?HY z`*4mEJktOK{GYsYLUKfMs=U!)nyrh4l~OC}qk<-^`{&9cQMwc3lDVi#2G3FM*2l)@GETU=~Y(HW)Wj_Ux1%6!X%h=Ab#u2#}l~x3%qpMbP zsQu{8w@4BG{M6Nkewn$V?rz_5Yd+#&O4|xe1G*HzuRHcC!H=rh9$5(6@4{Iek1wnz_`*$)#7wqWelWR_W9O&U_J^N8y{@6k z_7$l3e;}ndid58b+~-j{KK`p*w6h*8uNIW;Tc4{TEqFKo|g`o*HJVP!88>>f^F9=jw*5 zQ{b}Udh$}XClwjCIOy_nnrv+WOCh_xfrb;cGx1abS$Bp!W(182Duy!Q?q8pQDR*MW z5?9)p!ommDAFj~(iyP!E$u0TXUP0%oQZ5f}oL;;Y+lN(ZRS8Wddy0jDADcIN3?BBQ zLkJG!4%3k`C|uNltegTtNY7|(j$=$)D7g=tV76D(G zj?9N!v}Ga9i{lJ&MQ2;7;E=mUf?Cvz7oK27Nd>jlr1D7{R2pmV@1>%zuSmyo;OW(8^HS>)zN?Rqt&|=ZVTqxQ^+b03PW%6>$7)>UNUt{y6OHitAPPN5|T6U|-Rd!faI8VNae=a39zal&M z+E86(SZa@pU6w1CF&fZc8WB2T>o1@wirW422z4J2QF+E3!=PZIv-q>E^;XY>Xj@UEa9JOnoIwR4Itrn}MV2`!W9 zfEVJn)TZ(Qv*(sI6$&d92>(>(N9@?72rZRekf<*?In?J&T3mS7_Ur_HgZ`gdANonu z{@+?h{!i;RcFuN=`i8b<*8d-hiV6Qef?v|~a-LGhlvj*!FEZ~MVnD2BHis37#kj*I zQ>sl^CL1(~!kPbvEtBTSan^)AuPhcdN?iTlo9KQB1Dim4FAve~LG>c;xYLd`AzN~? z@iATZKBDn(dYIcdedFfndve*`YX|h&Q2;lSPv;%kvPsqsnjy7$9bnFl-xe&y{CGpS zeb($EwKmZhy?>({z_j@J_VymF9c+%C83f{>R@h{{;IX)$`hA0Ut#|-l^eYqe`d4;7 ziFLNM%eH2_zvCuzjdx6TlBsWcPynY#3N_;q1%0kUSFA_b{5uQ`wTBaZ%Qtp>t$7;X z`%$@LcJ}F)dIzno2V>H2Z~L#pf2_JE@zI~2u38C4%SNb7dUBOGtx&AnQ;ZW*U5}!8 z<_2hoY_n%pS=VD8*n4v0Zq;r$ zI6d;@6!)z*svABp2(YLKImS4@^v3wJwP$uV-uC+pF^mxJvlI0u7>+m^8twX`B)YAP z`_F^UN02K$db~=XT}C2s1?_4`29%v-n9LrRs+Y4lMpHivXRzLu@6#HWe9pU%n?HLF zK7?$f-LMZQaahihw-6mq>_w9H!t^{%in7Zx3r|i6cn=w;M-O62 z;#6=AE5Jf{S$|^>`x zyc7xPlu&}8p~aZwGjR(%5?-qpF5|92z0EWS^1hzCyzp+{?BzL z*;k?;*S)<)Qn{d<^@1dh3v~^ zJmME|UaD~y8KAj48J9fd|N3Mpy2}mo*uqsw@NZ$rAf5j(ch({~6r<5HMv5(W`B8a5 z-$aOIqYC1kOrpOOa;#;Xl?Oblwh4^fRUNw(HE7f1Y7YSg9zuRLXCJ}^=uy8-d`Z67 zD#Pz=yK9A}QH0<#y?FibLW6)FqFK_fIM3D~cI!tsW}c_dxea5zu93(w3atT54D-N9 zyG$9pN#(5xa;}?~i*Mbs@HZj6>kBT1HR=(!InFUb(oc4`CnHG~7D^VmKkmbV8<8urIHssm%{-lsq|sm= z=-(As>IzXl0(L#|EAd-lZeu;;H<IyvoPH|FeLf>0 z;_DY0k*N``p6@%%*Y^cR$>3dJ?%EuGf^G7*?l|Zlt8U`G#9|e#BzMQ6-3XU%|HeP@ z8;LC1??gR4gIg|_rdpivdwBOyr<1+!qmEq;tj2`Z9eCz$RpYxjIAH9g#_znxIrF-O zx^S3B?p6!7eQDR9=0$Ha>&{yyN-R?E1D{1q51Zj9%s0vpjL-n|N=hZ1QaHUGiD%4)<8#~$Y}GJJ&?|o1=se)s+Pwem zp5o;)Ju7ISBMMCZrTa^1;tlNy@%}HzwGwPriVzXzpzm8%D>YMPo7671a5wZO;mAW{ z6xY0N6Vaa1omaS@e!g2}KHv%LT`13YpXYY$U3w zM@d6BhON9m6{|nd{u}*at9G9R?eM~`zrtFq2J~0r#9Pv(_%p;dq_d#28pnCEWlUkI zgr|ZF(OTvw)c4;kCH(o9q@0@F*SrTs0jW2cwfvwf=$?K4(nmu=`)tIl9Vny)`<;o` zYYQQlOTdE^>xD`K+g-uFS@u5|a4)0!u7w)uDZ)pX`W75dc333`@9IF0v?ve^)}?Y#kQiD7h`y#WuM6_L)Y3k(9> zU(*6LFDg1%dBB8`x7bTb_d=yNmP{8)cf=bRMP#+nD}m+VBB&*)Q`76gJf{99k~3)B zvo{4kDDOIsf2zpB`u#tQU%#J-*1|tU-f%oFe_V?`_-`b?*dp}LCula8@)Xp`r~c|GNX6Km1`NPB8wK>~$tt z0(OTM$+j*={F|cZ=u_6A?;XWM>IqHfE%6-nNL~H=&yx|x1<`n>v3dv95|=UZZ9NFG zBSFoy73PJ^$gyu_<8LO7c196`2U&cjor4CBx=-O@z$1@u-gN1Eq%+l-952?H{5&6f z>C-Mn4B)(U_GJV?RwEJlT~ShFvOk&iPMm zKSp>`cFH?JpU>(PYfTr z?=Q&L_%LnX?;TtvpyVn_m&x(Nf{!GTA~3~w28^j*uRu`XX=YS@v6tej%wC6Hm4E9e zHx>x~f7)tXnrN{9ZL1OgUt8^JV*P)2kq6lSW2%ciJouCJyM7RcSEra|pNk=rrM1i* zVuDmsih@KV;PUC?rD=<(W#W4*NKhEaDK>cE^2s%YjdxlNvH4!S^x0f~+G;;KdXL^t zx&&v~X5M*Dy5*Y2K#kbwNc%%~YaKV|9#%Dz86s#mvYJ%X=qk9|bDwy<7N@4aE*(aG zHtp;emlp1JzH+EyJ-NTND=n?eYt*!>-IeZsJkW^kTQa%=s0gO@XWOgq(0W2YW}(Wn zldjqmC7<{a8SI_*TK!`L)<3gp7&o@ zSgk5+JRQ7`v(`ZwTyUqmS95q(>0%0rR(bRi(FZBcJS)#HsoUb{;-$B!qf#T@;!kew zjrP*2ZJTr5nK=N8g;Sm=EIUnE@$x1GCvJg9wmeORWR0HXa0(GZArEKiWD_v6GKz045u!0lV^_=H2aV@k zNe$$_B-LC{7P(PaQXTJL?lWR)r?}owDBbb->e`n5g#_yuH9@Ak6_VEUYwFB4^u!K`ATw z<4MVfeLdZftQB6;;~L;?(X1yzc{v}h==`u>we#^ktP>+OfWTY)ha14YDSIrSLXV)Z z(Tzc*RPg&MOoY?O7Jc>lE1}LY4J^3H1)Y;FvcLXnD7nLPzpk;KwzN4F%46k{mH{<6 zh$%f(5Ac#V=zM*cCvIZl-gsfuwGM}~nsvSIWtUE$)XwYZn=g;4*V0|C+n|r66(M~n z0W~r3Nt6`1H)66IWr)ynn-V~KLnrGND%p3ejCfEW>Oh^8yq7mGs45j)!QQW!3{T=RcFe+?6w(1tN5C{bqsct=E;Fg%U2`*#v`|k7thaE?uL&;?L zUFq!mpBFVUF5)eAsXC9n|DF?{>Zg-_ew#74&uZqG)~foYsZxrNaaNb`fLZIvp>93=f4-nz;)aWGav zaDSC2e7Ngvi*HvzS@7a&-BME<@DaSgi0s6;rL!2cnt?O1AMZa$8ljewjnn?ta!d-~ zszu*gN1uxP%Bn^LzeS(N*y7ogy$0n<7;M;Lnhe0=h<_jI;kQqZw~3$h1zml4CZ{15 zMt}a7^vle9$YAzTJoV3CcJs_78%%s#ds5T?D?wJL^@IY2rXR;q+k_E!Oe^_ zR0g=HZ81Ad*ey~JYVre7tlcjgfH{) zDFavOZfcewe4(5@OPA<6Zdq!i{!3eo$wcNr>wGKkPkMufhRpe(g05y2P7<3doe-`< z8)t+T5<~xm{Ds-+i1a>RX5m3b%)GtQ*4`&L-w?Cv?UHa#C^2ImX7|MU?_t~2PbM`P zhl8wl#>T8<_|X-i#<8=1PF%bp9%X(gy6UIb1;-BT%++D+lJ4@n*5f6_o~N}|I&F&_+eyx#u? zOd~K?!wcR_82BH1CP!}-AjFB$0o`?5A?aznm_LWx;QXQ*wp8=b*_X>h?|`1jU}5Q| zBuZzySQdGjDik=E#XLEhmUr0p_VALGvxRY@5ZpK;o}2|JAl@Pqf?8QmPzj9VUOA9o zAl`^3fT&>T-!`E`Pqa4{ySI8)Dl&n1a2@5A?_#ihZ9Y0HZI0A+HNpq2CtciK!srkz zPYH24$@YH_v}w{lj;;DJ!8N%@ujrMSD3Jl-(a)#P)O}%GbjcNNG(wbb!#7^Z(#&mY zE%^$%I3>MmaHBSA0kzv6ltlFEK>)Lf@gne}-Mbw+X->U-k(-^6HT^E?xQt1>lP5ns zy%))J4v`~}US^v(1`DAf|9-^eMg-@}sAC(uTAC!FrsAV?_c%3 z7rOF3kxpAb=kT4#vHm$oD6Sp4m1LoOw#MHA ztgU;z0;&7&^oS0Cg^H#^iJ9Lpv<;?F%`C%;??_hvhAxGzeZ7!TX$1tKst7@Cf!0bS;ATR z)+`BqeA(vT%sByE|9%#k`1(V8ZDH1Y-?sS+j5Sk}P(x8N6&ilM)~McFH=)t9k^Rsj zX+fxlc$GW7@z*7BxSadI;Sgq0(~#2MiO6xC-NkNx-ilV%RwqC(HEFwyT~5=IRxQZ) z4`tKezMHS5yqDVjmZHDAY{{ZjY34i}a?3V-Jai}t)wf_QBTdpyYA$+FW1zkJ#<35ji%5Vpo8XF7z@|Chz@z?}a7A@m4z5x3Do83_t7i zq0L?dC4p(2AlX?xz@@nosI#CF5_9e(A-tYsm@aC)TiKq48ebyDOP2UygW^V+0nfE3%Xf6UM^=jc^i*_ zIwbP+$@>8r-A(&KWt(MkZ^u$pp>>-&nKfwgNDwAwp|8;;u ztClCo0H;`Z;NkWc@ZQ?ntkYnmS!!|ziFXwwP9J+R0)QxfquGD0&Xi#jNwT+AdaI7N zH?d~_nBDvXr15=rcr=SRR_^LsSkxRW4Ws*!@}%FU4L(ro7+%neG%TfqCo&&4_GYq6 zS@-cph23I^<3-Vrg@T?S5AMqFtIYIt9Ds#_me95i+V9eJ2sM(fD(2sP6i^S`;H}D~NR{o78)mlfWQV z1V)Ua|K)pq#^k)HNKunn*7#DH!lJ9)PoT&qg&4m4OFgkk@b@j!{susr>@Gl{<`zb? zTW7Pw*fFOBkf@!J3P%5(6YJCM2S~ zi~M_YT}9IPP&hp1XO+sk&~N@+SYEHY6`2e^cXAMts|T<9OBoKo&SnAtr&GX3Ktq%~ zPT~T-a7;eU4m{X#oYJyIFS-QkvtG)-ms@;{+OmyjF(GY)NR1ZS6N|`w;In3KG3w|m zvceUn6AcHk0!hoX0clOxD|WFD8H}~LE{8II*uAx8wB5H^X$JsrfAlX z(`1vgK{d8fQ-cCe_cZt!#4O)R*f*9&ui9beKj;@qaBi2nWHB$p@0k~z^L4sc;}o$E z{~Q*aPGVBn;ndDYr~X>N7;J%yn<_+val5Vm+fa=TAJ}iQ9iOe7eWv=>iyfrui_-Ku zTv3Nut+=tsc8P)@(^1aK<%auhquX%qrH^uoUve9rE9L*_8BQlZr{eCN8Q)8Z4C(v! z>*K8}668?r1%XxeFhqnr|3*r=8lUhpykwsYvW4F`4|V-_&}RruQZd#*TXK_M1y^su z%MfFpJ>CT$VS1vuV-vuZ=vo+Yj8MDnssU7cBo-vrmn;!6#A&zAi@Ns*usIv;SjFL# z4C9@zL;QS*2}!v5ncaJY09ivZBUUwq7GUkYAB;Ko2Q|nT3>_7wN<*C)s>j$J-}RRj zi0z89-)RJ85#U*_gw7B_wS4u$)NSf}DO&_Zyg}{{xwrQkhg1)-;HNjO&y-XIj3AZ= zGCpj9L3oZf>O7)~H$$Z!ubK+JdZp&ej;6vOXJ^{@9?H*q{PiW#MgaMpg}t9yt_90Ht0gntYi}fG?m7 z{e7D5v@C2UE4oc0F71EinB&+)@Dl4DNJQPedQe}WPbR_COe$ABnfbSrZ!UER+LE_I z7v@I#t_tT$D8=s7c5+&<=|*pSDWVSO04M}*gg+IUQ`i;jlKya`zGWz484E;P4uqRd zbvc>4dBa`(ec@j}=9|^*bU=aZIjTvOCg1hl$rvspAwBMbHqD(0RL8eRzngit6#6V_=`-7*HT*nUo!;no5`G;J7mJWf{3ZC)cKf6y zW4KvA#vkdZ^9y>3zvEcpcnJ7vpDkZmAHMTz|FjnH2^lhi<3B-r47!6H-1Sc!RhGp( zXG#0+rpu0H)D@O$P-v}$I3N7$l6J;i1f<#(Np}L z9vW6SI5z|jOJNum?%HiazX4XFbnvd;zs?I^Hc!J08$u1YR@ zw0DNFus4(vl&wu?Ch1=8qb%oxdv@c7gYLOv(~%m)bt9up3mgpuz+dFNkUAQ*=$-CYX*jzBP_3^*|Z zIJ_r^eo}<5Q4yMwa>|#IJiFwAaO$goyT;5R4d|OOcI)xMta{91n`iwvyEgm!Z zq#2_JDaA-tG{XS()y$XDtR6$mc1a83nBvFxJq%X&Noj2dWxa%NWABiQM<|>i;u+P7 zGl)9+Z$cuqou?-oW?^J$M2%em+@xPR zA9I;aDzmSjtloV^`j2_z2WIcU0qRIqO6PUSGrxy+R1^@c!BUmRN`5Lz-@I}SK5(bI z0;aJ*?CD$>??=Gp?KAJRlbmb(de|6$6mIEj?x^WB()`=kJ*$}N$WSuiyoS)Wm~KR< z;N8z?)txBLNqO3^+7kNZnztsy(GCO)tywRJsdoR6GI$qR-t~(Wg#%*>L=S`GTYDo> z=$P4XtM6sxcTjoq%^CK@T_j4@9jn<-d#`VxZtm*^*J|_8PwfrJ<;@+wu_#37*Rc!; zuX<3)1aJ%7Yx_xEGDOTV=ZCix>GMO!t@0@etr8FbH|Hf+^E2@gj2X|b<7}RZwa4kfVHIthX8uiB4Vc}&$ykW-V&*laQsqLfz4eq(V ze##FYd~!oZ6r?HsjVF?8cR&3Z_oU>rk~7$454>qT%R?c}iVgqzc63C&+_B)zIoh1? z$fepnL+|%~Y34+C;T8Iv&tz%pYP(#%GFvz`?=dP_dFClvtZ{nm*adSOcU57$PW!5i z#b5|WJ+3UBJ-ttYKa@c`1re}TSp8P|*KY-ZMRm%A>|8MR@4vpN+{gS_zDrv5eDo)c zji8?XKkmR6+I7{;VJFE%8v85wYS{r|(r)~RZZsfl>e=g`CZChx!~uBhS@MHq{|K4z z(F^x_K|0R?@pd@3FGl^xD$;j&b7lD$9^_L`dbbnXAL97A+3NZT5dSWtM+{b!>r)>o z(@8mh$&RyH#coDr-E2dB@;efyEAN^-jy>O?=2YWAvi)RiG&!a z#M>T03X*s<;V{!i&3|P?k`h^dOS|%9o+HL`TC3l8GfaWB@#;7(->hsXiE?*5#;G)# zIZIK$EDKVCl@#=j9k=V}an0!93ntrfHcFJr1xaen(1M4Co%e#u^1a~w^ zI~^qKD;@AGYH5NdCy@6baw$W0zYf!t*(U_ASgs)BsVwQQ0ns~OxoqxPlpoTYw}6^3y?U9X6o#cSg9$041d5Ix`Z&xjwRLXG*Qa zct`WfJDAXw52aib0e1SPF9kxsCZp=B1y5i9T4-#Em5~_#Yl{$#sR+ov4z?z~KRI~Y zoyjt6Yt!IcfoPu4)AzsEHp^e(^)}<`69t!=Qs6D2TfCQ*$m}1V(4m3M1xfA;u9?hn zkq&bh!(*CFHScQ4Q<`9UKXg`^D_Ep0U%1|qmVEH->6ld&qvyXbeN76MHrPx6Dd_&l zHIEZ&4;+5colE%MZ=gucOL=FyCV3BH#@{tf!o+nRrLitH_$Th@rA+z2yAa)vFQWaO z9|5FqyY?syiU?_U&4^RSs>q}3ElE|jh+1I>5Q5dV;iPlcs6}&`L8`5gxlimbnTIt) z<4P;?MZ<~T7#5x{>bI=F_!J0)m;wlVKJY!`y*eF3C+(3k-uIY*LPfEO4^si!PoC+= z=bQFFUqMUdh~(HU5@jI2q~gAQ5V;Od8E-E>Cq|(LmHM7YqaO>^j`ogMn8Zr|2!{g)W4SyekW)Bp05Me$nMntd|?OSNu+c=M_`4{n7*=YvCY!FONroPkVy}*e%`IvR#2yK57AHe$1h) zyK46iMKE?p;m~Z(?igV(nt{|Nr-h2kU=LaHd=EC)=zH<0UC?k#0s5 z%wdz#6pIxhlou(}rYAxv;m#UFDTgN&$jXV5H)a187p`|{D8PPnHWQtFx6o9?Q3XaZV8(?J zU$%U`{k$M?vxOF<`jTl>(=d}EgZ=f@fU|UdkkCIGF+(X}J~P{YaR#N`D|XGFCsM&Y zT+PIb_i6f!yd<43eJE}PyRK^rl%ck@=ymrFAniL=xx^I6pvL!)SGspTP`T`EI_=G5 zD3O}y@K7-v>nlc2_q@vbs=jD_>F>0hr#>e?&*#EcG83gy;k0i{rqbPDJ#5-q)S@{k zHvN}zG87m$twu(cJk^zxHte%M_F`r?#o294GS`U82HtvrP?Q8t_9X|4V$Y)@qo`WX zE(?l+ue1wzfr*y+bC31y3V-pE#NJ`$J6-!DFvv(2P!YmnP%cn<2ZQ1te8a;taqQjP zR=UK_bZ<8vOcM6?U01DU8JRrtkTAsvJo!B8L53%lkM%wFz`nx;y+mK{>(%)@ZCy+e zp_=}?XrSeAI8h0~^UQkvF&H0{GXaLeS!iz^!)0I80_WNu##Px-u}(*SD+W)Dwa&hK}k8Mm2`q>?@}=L>J{E(+$dqFRvA8E}D+Tj88b{#ix__FbCE z02dP3?i~i2UEs{(Gr0@ie(GYo9g)w!eIZDP<=!**x0Xh$sc{|l_9k|Vvu0s-nU`(t z?-J~l>=|dpNSfi)9lS0PYxww`CtCgWMrY@3B1c{3ONNjR@f+T+PagIkG!NxxS-45A z8no4#bZBU;7ks)T8t|IYzC_;Gl`DftCh3-S~4iYDnX)#;%S8SbRZI^-T54+&^h<-O*@HYg$FStH9e_TPBKX)|ItR%Sd-pBw7VgKjD^g}c)@Yfn1g zx&*BOT3ow2+mVxo2bU4)wuf79eeLau{;Hfr@Fq6A4)V8MjVq83_Dux~(r*r6)-SIm zlk{a!Eq)I8B1v$|L&ui;O9;&HW?Xu=*N|*9gGpxvpwt-8i}lj@L4yPKV+ehV#Rn8nJVUoJ6wUDI&ULyT1Uo!Zt$t&gsP7d$6NpY zsqQ~V9Og4_8bNMfJDw3&`F2ES1Ab`1J=2+K*p@Vs>Frz5ZdtBf#~G(WA69k~x$chz z?K^zh>SubBzCL>v^C^VJ(V@Ul70euDCZ0l)$-ZQxukzlr?6yBA8>KZo)sl}Af;Fbk z<~vIWgYSYT@i`IB(;cG7YbIqBi~UYq@wI?PZG*m+^DI9>jFQK0xoNDu;d86Y_rZhDHvZbY$b&lI){ay= z4MbMf)mCp;7#fV|uFPhQMiV;cTr#!2Rd-K%xV5@XHI^r$B|NE^nwF+;;l>bFBYdz? zn)aIkT@Nn#U=zBeoM(Gv`(0%-V{VgIi~xg5LFF2k-SV&m!3Nie`S6p z-+EfNak*$OS0QN!C&zyOQIEV(Q=a}4yKE4G-x$*z9?*3${L`+gplKYAN78?G2kfQz7Tchq3FYqP^ z3%qFGgWWNrv(ID!6AjuG@3QXyY3nS5;^@|P9o*f5y9678yCt~01PSi$HUx*@?he5v zxI4k!-CYKUfdS5&Q+uED?Ok8@kM6GOYptsOwdz@YUr!?QLsSLSw?RG_NRZC7iJRDt zss$?8Kw)@+;B`=?8^$UZFot?jVuIgUv4X7gQK#d#ubUzU_H8*q{}2JNqjWaNnU*eb zrB&V)oekVnRf5vYyz#`M<*V<$hii*y#0s;A+q?Fd3?83neMW*I@eT`R{hZ31 zY}FlTrM2CIHW5YkFs@m8b4&i+X_Ym4Md%^CBa!n|;4baUJ1ePiDk7k5R-%K*Xisrl zEPT|4PS%^b7tisn(64-xQR>LCH33ImT~4PEU5+lZEfRmXHf)?kz^DXg(Qea&089?j zPF}stORb_PnQa&7yW(C|s27W$iBu%^kTV?e=CzVsrZ}yDLwJXH@}qXg;WNkN1ST<+w@^V9J{d7U z#bIGIlotO?=~t&WPlb#0(@$oa(=J?sE(jS5=S;ZEqk&+zcrj^XH-o!h)Y(9C)zH0R zs@u*~tAy}W-wE$RRg3OQSir)4jDUO&+f|5t&J_TIAG>WOM3%c%iJwzIdDv%XR!fdp z1!8yD@SMy{b9YFmZ8gqw@1Fo81t8=1|C~qk$NpqZYp_{V=a8Q#F;B5puiFWg<7D60 ztk31p%;xBRL)m}wXSb~gjsd|++s3b1$}aye&x@>#dREU~VYxaOMZ&!gq}>R&dWl!7 zQM_Zvx@ZQrx2i?dcy{YAQUIIZwHju9LeCZ=l3i`9w^Z@bD0uTt*;jjPR}z&nWcM4Dc_Ji*anz;`z;o3Qvskc!xF9eF7`sCV?kyZvOxOMK60!Z+Q37byw* zbsi;n%Avcsv?*rHtJObd7rd|;j*J2vwty-wPV&Iw&K{@B%1Y8SbTZ%ChI zJ&;8#UV}5}yF&|(6hu9>HTE+V3a70&nbWt~VGB@%^>Kuj6dtt3&R=>f4dU(tMr)c^ zu0GxE0jK&=lrF#TT^_nGaqXIs}@Py2Q z-nAhB3(r3JMx28Ypm8%lcLEX9W_pm*obMoC?T1bl4{7VAcbo>S7NOiwC6YN1SyH$O z5M+?jrTj6XdS*5^LwWMZi;d|6lD~Dk9a0=_&@2Vnwa5LKaPG(t&rMy@xuh zTFL#H$y?zKpRbaeem2U>19bm*&VPiMQ|375mU$Exs-#COUy!QfqWZX*cz=O`7m13t zSuuzr%YhafD&B-r+46s#BwCo}Lf;U5+9I#Lv|Mg*oMlT}>r$;>A`OTYWUpzqC4u{r zN?qmmO{!P+2K(F2AH=IljR_lbw0;j24f@-3@_oGvDDh^(tFp!T_la`N@KxWl$;^9y zzl-3Ty{{+mmFmmff^Ly9^u(`4vTe^Qz(4h)cCNT+dbWX}``E&L=R?5dVB)dAGFay7 zNCsD@aOS|-1KMo=#x_HDC{B+6nePss9|&X6U2Kq=zTK(YgHTd9 zmyGWq7wSAJlW=U~TTc&VyUVo)9b}~Bg87a#z>Hpxnv*pqNPaH;_Ea;mR+@+EIqY=) z9h{?pROiB;`$Gh03>IX8`Xy|iA-B?r{3Nw4&z&%GX{wm0A?QoZ32b_yEw4xMFH^j} z0Jo?YcXf5rYaoDa;H#I@yJ4)&i?T~e;*oE(m>%P5Q_I6CCfMi8FwYTGyMZYt*H%)g zr{tXxQ{vEpC@#*?{nb}i1BpSzYK5W2O^h#n7`V=N0`F#02p66RoLyGuZ;i?D-e#A* z8DG@jv*c`Dviic>dUhMxQ3Ib zmqfARMi^4C;xfB&f@NBnhFwwodO#8R&|0W7I*vG(^nRZ&jW1Yg7JaE7vx=cAZI`@04~_opdXpJC%lCmJwh7y|ds4RLYgASYF4 z($8v5RWqVHJGeASZY=7C!1I7&u5O#n3XH#A%C7Eu<4!lauQN}Fs1FV7A@sG&Y_SL1 z8QcupaYaFOa`Yhp(e|Kb!m0#J!e*|U6jCz4nAOT){RT@F5o9)SokW}S4uQozkTD4(6fMPeWI&MOt99j7hoVu{d7E;DDZXq=E7tgk))InMc0gP z^h98b$PEvY?5M394NqeDx@(R^;ZiR537fKi0F_#*ssdb>hPa*AeD3@2I1IfD&=7m z9uRu;(PVV5>1;I}(O26uBe9w&QR(mLK6swO+Al?Ucc0CDM33E?$DTGaVK?j+w=fk! zc0!YB_nw*7cJEBnbLTp7DU|MB{!m2zNVTUTqP**sSzpyO_yA zU5dUpo!yVK+y&cnk~4QzDf0hJvvazag+ZS7LR;=9aJK90l;E1NPOJulJawZE zGh7L>%Xtr7;h$QQ&m`?v;nDkoe`v=^1&9|PcE;T5%57w=q@NKW_dv0GZD(w3zEZ?M zuUoQr$R>Pnta%@a_rn^2MW|8yv135hIijA8@qWv2bfEFY2LV+#TtO6WhqMYVXr$5= z#cnoKO_w=h2`_}ar)oF7Qw;bk9rv$OHCS9`4hf~JC%j76lNR{8a2 zj~RVQSrH2|0!7v$>A^ZaAdT?)>FLjHta(FxMkkuo=oq>>N=>T%WoqupXLr*ddk(hu zIoXc;zY+usv2loq-5$MF^N!jty%j7NQP2DrPxyNS`|Ujp?#){3=?=+_)K+4E)d%e*Uxo7&af z(*kLZC|t1UJ>tPU1ayObYR$Mwz0s>Lky-5JBVOMyXh4B`-cc~LQRcNvu4Gk$v!mD9 z&Wr1-r-u{U%u}5-Bkg3NUS2rW85&MctLN|3&d%JELL4={*WdYo3m~P!wZ-p6AA-nJ zIP>H+uWQg>>9QK;z;?rHQJd_c7oXaRYxf&dY8@2O>XIHvVxs|43Di!9AL9`q0mA`X zz4xQa+C7u1p8gD3E8t?HQE+r;@#a$hLtCtMMuEb1CVz5DU46v9?7cFFH?Qhq9lYzy z1O5dI03m@1YD?BhExF1`bi9hKA+J$yb}eGx_mgCSVud3?E*Ck+W4E58w{y#gqZQKw z*M?VpWgh9P;EROmT$!msUV+)N2sGAO>2VwLK7yf(fYi|eIxnZ&al`l301Q1c?`Thl zS=QGUTb3b_1ivW#%D_P2l$PlJk>E!p886R!d9*SImMbd7mOJi5g%A;%# zvwA}xh9W}AbcsmA{oVmb$KSC13XMCGT{fWwD-cw3Q{w$HQ6bq9t>rkN+IL{jt>+8X zti?oi1s+E`;H3=Kqo8!HzGo|&lT-JoTg)&uU=&IR#4I}WizQv@wek6kuzB%G@lYBwGo|{7&Ga-EJ9>b^JhvqVexez zEErqv64X#8H>mvPD$)HIYlusR4c|2R{J^XredY z<8oLZ3a-_5I#xRW)a9X5_*yer0C6CVf%~~28M3Sh8bk4$+qiiZJyV?SyIw9vXMri+ zt@2+5W5?pn!RP3T8<^9it0*aTl%c zF?w+G`_f6vpAHiBc$53P5#O*PXhf(%I5*OlI)RrMoKY}IOisJmF^|9bk=yot;1wz> z#pgbQkbf@tgSVms7M4zJ>ORjn7Du+I(M>+v?&{@;CRrt2>lD^(JA(ft6!O&Z35N}L z7mMbgth2P^>P+a%98NhqZ^;h7B>RRZTA(y=U_l5YsH%t}M4uikOsiv#99s>o$x52f zO!KFkH1yil>f-m=NkoLtYNXL8`TU)8*&YN18gA8~Se=*Ee-&sBlqJH4_NKIC*=8La z40H~CTbFOmeXF=ZlLC#r260Euq~6$+^L3<_Cc-PHbh9ls*2a(1a`m_ju-_%K!6A=> zY<_FvO-$-jv{D~+89fMxrxBCS;2|Sdcp1kp*J#r`8KPR~*LNfjJD_LXdcK8K{4zUo znS8}=&6OS>vCW#1$87h$bJ&6a{L5+hl)11;3RC*&3)fto`9+qD*GEId$c8>;ktohf zE!uWcIe~D*Z$?llJT0K+z2z*k+O@@Q;&oe!{CR(Nm*6A5siPE$z$Fks_d+2Dc!C`E z{iN7nCB0O{S?LpQc~HJ+vm;gDC%Jeo?9|LGwbwM{b#l(V+)Q#uHI(b(IX9v4C~6ax z`dRj-Iy{D-^RI`~G=JIcxZ!&gAb(7@_o;6SzC&xwYL#@pQHY)1va^3*IO5> zPxH&urQ`eiQfmA8wadH5hC}n1@T30`KZ?TP$77<>;8IAgNMF!DRpkdm0r};RTC2w> zJlF~VgOir42b{%Q@Nv885Mq7P{Eq67%&xtXMty`Z+CD+MlhA#!n7NGR+{LVuc$?0R z-_5PqF4LE1o3|5pDlhUiolo^?$9smuTrV!Pe1E!@x;gS<)%EM0Zr%*-IgzL&jp1Og zH=<44xvQv3Wy>#Ls_A|Z=r^WY?9(Wd9lqNNPzN+!w}E97c3XB-Fgz53DQ>zJCdTB8 z@laTF-a?)bIPTdp@|Mci|l8DB#y@ zk;iV!;WmwSW7ldyPv^lQ4<=!C!hg)cz-d!tou zj-w)XnS}^meEDQp=7~d>l*DJ{nq{if8(KP9H)f%0dO1Ro$tp*8JdMFYsqCP$#WG{w zbGN6@snPVAZA*0Lnxgt8G;$k_?4=euh62|bsQ!X?5RFs_h>ulH&%eGSEveq84J7G= zVN^?_@cD%rux{tuc_rOQktK3jvVcPkE3#{MNX&~Qwgra#MD#ZZAI;buts$Y$xEzER zQ6#3Wd+hWwuAD@h=Wp5GUA&jmoUs&LaLbaGBij^U*pwUK&tb4YYQkxcOdycRo}%;! zCU0}7UYC;c1e8V)r><_u0_@V<4@VvSO^3+RIcsdPlJUNWokhw-0J4`hk zh%enf3X0!J%ACoETqZ8HkDQ(8C1;NPlpdTuW=ceR9av6=y_l6!`#Pi93Rn~p#5~k( ze7B?)@vY9iH+_T>t5r@;TK5sdtiu(2oOH(dQQ-Ds*423wuz?;)yLL3wqxWtGoNLG0 z^@}d;YBPAe^F|UgteQeeX^4B)W>c|vNkz5U=&Ukw1Ak?>F)EDi%kT7uRi?_) z_?|@_W#7=0AOgj{eCVPpy#t5hLXNKwwyloRd-w8T$7d*B9q2h_y(#~(V*c9`Y`JS;J zzws#5r&N)vRZ&SSPAM?T;PqHAnGXW+QU+tt$aFSfQ4IA&R80&@11aNEy4Z=V(~NrE z_#hob&rhjG{EZcq8ax`V86w`^wK8yWsD}(_QB2!!yKZ17fZ1n{V(*B~FdA@3l=>?}ADz-^bYHVGxP7tV%hZ-F^_Y>|v4}PQk$eVB z;t6@17s@MhbmIE(N<%RN7K07^ixI6|hCQ}_eFzbJo%)(4ug$PB_Zr25!ac^hS?ey- z`_`C)VaS37)wK7O|65oXhnC+!WZM3xt_HH%Gab4wv6%!IZf<1w*ZAT!0!{)LML&0S zz#s&=z~i)Xf(z{(DJdqKmpC*a)e{Hf$uJ5G<@)>CVPO3%^hZ)HK#kR8(7cO$LN@^} z3aOBQJUjWs%?qK3D1Rdc@a*(7^W^al70!^$Ch0!72jGL@-HiB()e1d-cM*AhWA#5@mdQOGP+~Jzt=<#!4 z`5c3h=S1*?n^*ADljopLFKJM)jc;|Oj3eN8#-*LBw;VC$Jj>>!FF0}<`;C3~rXYZK zhLnXFN9{|vuM9Ik#a+;Ls*f^TT}{;XI1ZM$@AwNNBxd+i&DyKfiNJ3*ql@eVx8kqk zxS-~YK|n+moQ2_P+EN^DwQE9_TY`jUZWF)$C4G1KB%gOnd2FSnuH6mI%U0IAl3)8U zQ$&eu^$Ucmtd()k*HCs9DX{q>1AbOo(&z**12yYmrU(d+jems_1WpF+nJS5Q2wl4d z*8A@j8Kg&fT=#d=EVa2fd+h`jJ=;DaFRw}A0~Ed7w+e#R$HRT!4tfCz-fv5=HL6|% z>u~^*b!&QK?@Ug&)e^+|*F%YC!M^V_zi02?C}0wpzaG4IsRb|7Z)F)PXi)IwIb0uN zbD!5uX?l)+51ba=lM17P*XyFy-0P6VZBu>E%Ri>sx#Sshz3`#wUXHFcv5!eCNY88v zD+%A8Jzd2#!y~-Q^kJKS<3m{kTG^u#EB_fbkw0!N3{)D^JxVhB&HGdt%->CAMcbOc2EDVr?HCF&trkO@{l{33H;eA{w3nE{mxoYx_v+8YN|}W}8@51+)plxw zzh*R9gSHrXw;S&j#-t>YaLb=+SH=wRhE@C7C@jpxY3a(RZcwQt< zN${&zw%$K!LXpC|=Tymgl@If2!2_RD;!RMJC?GBoKR_Q)tP{KiKsiRvS9 zO19qgG3Q$81EtspYY#`TRd<45%K>Z(0wi=H`30feVHTg+n8e2V&A)rU7uTNFtGFCn z%VZJZC-S{YCfq_2IJ!aXXi$TK|4OVM%XPp@x9Dc1R6-9}rFT)GUoUa=zOVldr|{97TmTz75USElUhSQ}x?=`WhY+du7) zNZ*sgDME-z|M{P;-hFAP8|`X}e}VbvsR13AgE^=_hn8 zk>E;!!+l%dezOa#x((QFwhnrhc2qvW;@IT76lfV0Qpkt}SZC!M5*UO`9>bX;x)F0a zt3t|p=R;k~w-QgDGGA=(u15iLv?jF0Evz1>KPy7`tz)5QXr5oiF`iD@uL_zXny+usXRVOUuMXt9<(p4{^TT$lA&WtV=5jC`X;*x_NAa*PCsAbEBd`D z1qDcxVwc+@c`bY4kW8-_HVNj6dgWXm&4sJ44U8kjoHP;oOb%H zVVYY74^{qbXI>3WnzvND&!H5?u#zj4FPnRNy93LVgzvGyyX8vpqSuTm4IHliZ9iRQ9YlFYirh>NMnht?; z!Q=I%47k;FZq=;|lL5(jk***-N7UZexCl!`1hrfAPo6r`tz#s!@sk!Kv-0OFi4a*q zIpy;kDrynKzW>^auvSHARKLWqHe3;9V$t;F9SFuksiCCwf|q~vTI;PlmCNoNn7AeL z57O2*oh%6{6W;JWwrJmr6#3xs8c1;<&XaTu)+Fv=d70;?yDZyPy^B&@BtKGuKQHPKW0PS%#~S&Gw*H zx6e@X5ixMvFcdcjMQ`~@AhW=j{q|MnS}_y<_{s1olf;?dMi@kb5oOrK2~c?FA!qR( zwOP~1^AdQqGpT}K|0U#Z@4OW9=)=s8VEO#L!8+y0vV%;}QDnK8{TIs7mO^!H!))m2 zanCmcz`PjX(L1>hoi1%5@<$;QX@`<9#XuuJ;+K$QPHvaLqGx?<@+AYy<`dI=i<&G` z^L)nYqYWFJ?3$2N@?!mRRBq9rYOgLSa^ju3Anxd<)ac)xr^gDvV?(7ca>o%pat4`la5V#|=*ues{43 z17bv6N8j}eOKd>R*>;W-Yk<%dN0Id9Mp$%O4_QJJVIq#JCL-%eCo^n8QrFkW*& z^bDVq-RlmtOjQ|h1%?0)d;L%evZ=+tBz(ow)08;?5u%vw&3rJ z+f^C1YyIUe51PfcMxWUBehw2u$ivP1UXomHw(doX)@pAA$Uh;hhL&nex^C_s8&T#$ zAh)uSzsJ0xAKUGX8b=60c$j+aMe#rfwh+ey&=T9teJs>J!JKJ65qmhGAzrahZrN49 zq!%ng2jzRSm5Wzwo|IuwyVzB6@1F$uxvXS_dAL&6%RbSneTAV4ZhEWBC_zm^w-ANh zNJFj9reVmPQ-VhW==tsDz`{FNCu9f~1Vgcmi!72aIJrnK%lcMDgFL_x--Yt9q#W)Z zl?jIvkC-zP+zwKQo9W)tsZpgMH?FH{pHF9P^|Mi|YXe99L#v#Ms?U|F+uP`{OWSGx(9RN&RoMlP(kcT!BI6v82M`yX$5pkBIW&dv?zjzn7!@{Z_od<-=k~ zNkq?`mOk{PcBoAb$8Xe88==K+*d@V^x?!wfKj+Pb1z&B7r|Wg(y>6NV4uO&WM&=xa zdb#02YT5{+k%NI2)2AEGX!kylSy5)J6;C{!x5s2x3X=HQ05+GH3NQ|n+pA$!m*mb4 zEe9@K@iA4fWnj4+-QHMi468c0pqp9yrJrqaAQy>WxO3_t_n%e2Mc`By3+;K}Z^C!` zw|^EKV@xon z1$)!mZ`rCp)7rzeUPA>@ralfW9Y>Akoj)tDoth)*>A1*#9n$@vCNs9W<)b>%vf&{f z$?XJ_{3sJR$^&X!FLZX?!X!s*W#|js+2zhCKsL=<2CBICi;y#>m3GwX5c1D>`4zs3 z$8g_y37L?G3>G_Mor_%v@rPfbtY|$)mcm_&)>!T{BUTL`V zqDXa1mujy@`I;fzVX3i0A&7m4mn*5Gh2yd}n30vGGs(F|2Z^r>)+-mW{%Hxt>F|^% zef(g0!ObX2jlk*S@mYA?RXjpWR{)Wj7T*~>Z5)ZuC*rBG!|@RI$`d4Pbr!QeU`Z6k zp6DlSizB&q#T)35{)w-)FHFrbtCX|$>Top*NJcurIZ;aO%)+Xdoa=dmJ>|?1A%i&b zid$^gn(8K6q3NiX5qd(t)}0JZUcaWw+l6JVbKW)FTxUF})O$t{OuZ=h7_?ZQV|m$f zXV25DW--lpK*r~4UY?B-_p5;nUTK~6?{|e#&fh&CD+^sO?8M*7?H$^X+#6<88 zQEX{~{gELABPMUeBo z{CsJWZutnp5}SPGb;A>nrA1I3Minda48kD)taGNlE0|_Vo|4ta(W|3HE_1S_n~+hW z8aSHyhMfc;-Z?M)5DrXB$}Y!;I&^!M(kK4BOg!MnZ>|l?%MV5Ol|%`C)CDFCAp)+9 zhvq2r9!njlF11|bMyRb1kybskqyHqrGd^-nXYTCDShFwQ7UON@4aNm4A3iLu#(ZXX z^4X<3x`)~CCTNPpW;Wr*UDQJwkT$`4JQ7Wx`)Pc*G9U&<5^a@|Sae6aI=<%K{%r^_?0u zS0!UMT}gmX)^~;`#90c_#RR1{`~&q)>pWh2A!L(2Q4U4@##N=bOHjtEJXb?zLJ6e! zu!=eGBz~j4D*W0s){hdy;ZVsnC{PPhdL(4$)YYVyr+%rM($nr5IipV-Usb^uAL$9U z#mfOt@pJE<<*Y9SWB8x;_gzbAffdXMny9T{7?GOxtE?oDK%dh--oo)_zdZT($EBX&sL_9U_K!=Y>}EJ@<@dy5)V7w ziQrriw?KDLr)Bxdxzi~XZ<2>oyhp<|FS1)@t>4pw-~UN0KtZ#>{AaJJzrCdX_LchE zed^!G52(L)|L#2X-_&27@LyT*Ukdr($UoN~P-_2-_5bC<|IPhXv;K9i{^c8~ z@ZX}=|AzkxsQ$X8{=#Vg0jE&>8~)d3&wrzTlNo>W7Jt!yHO~6K1N?w$;rsBv - - + + @@ -53,9 +53,9 @@ - - - + + + diff --git a/assets/rotary_cartpole/sysid_result.json b/assets/rotary_cartpole/sysid_result.json deleted file mode 100644 index 7673ddc..0000000 --- a/assets/rotary_cartpole/sysid_result.json +++ /dev/null @@ -1,70 +0,0 @@ -{ - "best_params": { - "arm_mass": 0.012391951282440451, - "arm_com_x": 0.014950488360794875, - "arm_com_y": 0.006089886527968399, - "arm_com_z": 0.004470745447817278, - "pendulum_mass": 0.035508993892747365, - "pendulum_com_x": 0.06432778588634695, - "pendulum_com_y": -0.05999895841669392, - "pendulum_com_z": 0.0008769789937631209, - "pendulum_ixx": 3.139576982078822e-05, - "pendulum_iyy": 9.431951659638859e-06, - "pendulum_izz": 4.07315891863556e-05, - "pendulum_ixy": -1.8892943833253423e-06, - "actuator_gear": 0.17669161390939517, - "actuator_filter_tau": 0.040905643692382504, - "motor_damping": 0.009504542103348917, - "pendulum_damping": 6.128535042404019e-05, - "motor_armature": 0.0013894759540138252, - "motor_frictionloss": 0.002179448047511452 - }, - "best_cost": 0.7471380533090072, - "recording": "/Users/victormylle/Library/CloudStorage/SeaDrive-VictorMylle(cloud.optimize-it.be)/My Libraries/Projects/AI/RL-Framework/assets/rotary_cartpole/recordings/capture_20260311_215608.npz", - "param_names": [ - "arm_mass", - "arm_com_x", - "arm_com_y", - "arm_com_z", - "pendulum_mass", - "pendulum_com_x", - "pendulum_com_y", - "pendulum_com_z", - "pendulum_ixx", - "pendulum_iyy", - "pendulum_izz", - "pendulum_ixy", - "actuator_gear", - "actuator_filter_tau", - "motor_damping", - "pendulum_damping", - "motor_armature", - "motor_frictionloss" - ], - "defaults": { - "arm_mass": 0.01, - "arm_com_x": 5e-05, - "arm_com_y": 0.0065, - "arm_com_z": 0.00563, - "pendulum_mass": 0.015, - "pendulum_com_x": 0.1583, - "pendulum_com_y": -0.0983, - "pendulum_com_z": 0.0, - "pendulum_ixx": 6.16e-06, - "pendulum_iyy": 6.16e-06, - "pendulum_izz": 1.23e-05, - "pendulum_ixy": 6.1e-06, - "actuator_gear": 0.064, - "actuator_filter_tau": 0.03, - "motor_damping": 0.003, - "pendulum_damping": 0.0001, - "motor_armature": 0.0001, - "motor_frictionloss": 0.03 - }, - "timestamp": "2026-03-11T22:08:04.782736", - "history_summary": { - "first_cost": 3.909456214944022, - "final_cost": 0.7471380533090072, - "generations": 200 - } -} \ No newline at end of file diff --git a/configs/env/rotary_cartpole.yaml b/configs/env/rotary_cartpole.yaml index 65049af..23bb616 100644 --- a/configs/env/rotary_cartpole.yaml +++ b/configs/env/rotary_cartpole.yaml @@ -1,10 +1,19 @@ max_steps: 1000 robot_path: assets/rotary_cartpole reward_upright_scale: 1.0 -speed_penalty_scale: 0.1 + +# ── Regularisation penalties (prevent fast spinning) ───────────────── +motor_vel_penalty: 0.01 # penalise high motor angular velocity +motor_angle_penalty: 0.05 # penalise deviation from centre +action_penalty: 0.05 # penalise large actions (energy cost) + +# ── Software safety limit (env-level, always applied) ──────────────── +motor_angle_limit_deg: 90.0 # terminate episode if motor exceeds ±90° # ── HPO search ranges ──────────────────────────────────────────────── hpo: reward_upright_scale: {min: 0.5, max: 5.0} - speed_penalty_scale: {min: 0.01, max: 1.0} + motor_vel_penalty: {min: 0.001, max: 0.1} + motor_angle_penalty: {min: 0.01, max: 0.2} + action_penalty: {min: 0.01, max: 0.2} max_steps: {values: [500, 1000, 2000]} \ No newline at end of file diff --git a/configs/runner/mjx.yaml b/configs/runner/mjx.yaml index 87ccd3f..5c35dbf 100644 --- a/configs/runner/mjx.yaml +++ b/configs/runner/mjx.yaml @@ -1,4 +1,4 @@ num_envs: 1024 # MJX shines with many parallel envs device: auto # auto = cuda if available, else cpu dt: 0.002 -substeps: 20 +substeps: 10 diff --git a/configs/runner/mujoco.yaml b/configs/runner/mujoco.yaml index 7d23949..f99ff9b 100644 --- a/configs/runner/mujoco.yaml +++ b/configs/runner/mujoco.yaml @@ -1,4 +1,4 @@ num_envs: 64 device: auto # auto = cuda if available, else cpu dt: 0.002 -substeps: 20 +substeps: 10 diff --git a/configs/runner/serial.yaml b/configs/runner/serial.yaml index c187c11..44f19ff 100644 --- a/configs/runner/serial.yaml +++ b/configs/runner/serial.yaml @@ -6,6 +6,5 @@ num_envs: 1 device: cpu port: /dev/cu.usbserial-0001 baud: 115200 -dt: 0.02 # control loop period (50 Hz) +dt: 0.02 # control loop period (50 Hz, matches training) no_data_timeout: 2.0 # seconds of silence before declaring disconnect -encoder_jump_threshold: 200 # encoder tick jump → reboot detection diff --git a/configs/sysid.yaml b/configs/sysid.yaml index 492c3c0..d61489a 100644 --- a/configs/sysid.yaml +++ b/configs/sysid.yaml @@ -1,5 +1,5 @@ # System identification defaults. -# Override via CLI: python -m src.sysid.optimize sysid.max_generations=50 +# Override via CLI: python scripts/sysid.py optimize --max-generations 50 # # These are NOT Hydra config groups — the sysid scripts use argparse. # This file serves as documentation and can be loaded by custom wrappers. @@ -8,18 +8,25 @@ capture: port: /dev/cu.usbserial-0001 baud: 115200 duration: 20.0 # seconds - amplitude: 180 # max PWM magnitude (0–255) + amplitude: 150 # max PWM magnitude — must match firmware MAX_MOTOR_SPEED hold_min_ms: 50 # PRBS min hold time hold_max_ms: 300 # PRBS max hold time dt: 0.02 # sample period (50 Hz) optimize: sigma0: 0.3 # CMA-ES initial step size (in [0,1] normalised space) - population_size: 20 # candidates per generation - max_generations: 200 # total generations (~4000 evaluations) + population_size: 50 # candidates per generation + max_generations: 1000 # total generations (~4000 evaluations) sim_dt: 0.002 # MuJoCo physics timestep substeps: 10 # physics substeps per control step (ctrl_dt = 0.02s) pos_weight: 1.0 # MSE weight for angle errors vel_weight: 0.1 # MSE weight for velocity errors window_duration: 0.5 # multiple-shooting window length (s); 0 = open-loop seed: 42 + +# Tunable hardware-realism params (added to ROTARY_CARTPOLE_PARAMS): +# ctrl_limit — effective motor range → exported as ctrl_range in robot.yaml +# motor_deadzone — L298N minimum |action| for torque → exported as deadzone in robot.yaml +# Firmware sends raw (unfiltered) sensor data; EMA filtering is +# handled on the Python side (env transforms) and is NOT part of +# the sysid parameter search. diff --git a/configs/training/ppo_real.yaml b/configs/training/ppo_real.yaml index b165e56..835845e 100644 --- a/configs/training/ppo_real.yaml +++ b/configs/training/ppo_real.yaml @@ -8,15 +8,17 @@ defaults: - _self_ hidden_sizes: [256, 256] -total_timesteps: 100000 -learning_epochs: 5 -learning_rate: 0.001 # conservative — can't undo real-world damage -entropy_loss_scale: 0.0001 -log_interval: 1024 +total_timesteps: 2000000 +learning_epochs: 10 +learning_rate: 0.0005 # conservative — can't undo real-world damage +entropy_loss_scale: 0.01 +rollout_steps: 2048 +mini_batches: 8 +log_interval: 2048 checkpoint_interval: 5000 # frequent saves — can't rewind real hardware initial_log_std: -0.5 # moderate initial exploration min_log_std: -4.0 -max_log_std: 0.0 # cap σ at 1.0 +max_log_std: 2.0 # cap σ at 1.0 # Never run real-hardware training remotely remote: false diff --git a/configs/training/ppo_single.yaml b/configs/training/ppo_single.yaml index 7534433..21d521c 100644 --- a/configs/training/ppo_single.yaml +++ b/configs/training/ppo_single.yaml @@ -8,7 +8,7 @@ defaults: - _self_ hidden_sizes: [256, 256] -total_timesteps: 1000000 +total_timesteps: 2000000 learning_epochs: 10 learning_rate: 0.0003 entropy_loss_scale: 0.01 diff --git a/scripts/eval.py b/scripts/eval.py new file mode 100644 index 0000000..2f01266 --- /dev/null +++ b/scripts/eval.py @@ -0,0 +1,379 @@ +"""Evaluate a trained policy on real hardware (or in simulation). + +Loads a checkpoint and runs the policy in a closed loop. For real +hardware the serial runner talks to the ESP32; for sim it uses the +MuJoCo runner. A digital-twin MuJoCo viewer mirrors the robot state +in both modes. + +Usage (real hardware): + mjpython scripts/eval.py env=rotary_cartpole runner=serial \ + checkpoint=runs/26-03-12_00-16-43-308420_PPO/checkpoints/agent_1000000.pt + +Usage (simulation): + mjpython scripts/eval.py env=rotary_cartpole runner=mujoco_single \ + checkpoint=runs/26-03-12_00-16-43-308420_PPO/checkpoints/agent_1000000.pt + +Controls: + Space — pause / resume policy (motor stops while paused) + R — reset environment + Esc — quit +""" + +import math +import sys +import time +from pathlib import Path + +# Ensure project root is on sys.path +_PROJECT_ROOT = str(Path(__file__).resolve().parent.parent) +if _PROJECT_ROOT not in sys.path: + sys.path.insert(0, _PROJECT_ROOT) + +import hydra +import mujoco +import mujoco.viewer +import numpy as np +import structlog +import torch +from gymnasium import spaces +from hydra.core.hydra_config import HydraConfig +from omegaconf import DictConfig, OmegaConf +from skrl.resources.preprocessors.torch import RunningStandardScaler + +from src.core.registry import build_env +from src.models.mlp import SharedMLP + +logger = structlog.get_logger() + + +# ── keyboard state ─────────────────────────────────────────────────── +_reset_flag = [False] +_paused = [False] +_quit_flag = [False] + + +def _key_callback(keycode: int) -> None: + """Called by MuJoCo viewer on key press.""" + if keycode == 32: # GLFW_KEY_SPACE + _paused[0] = not _paused[0] + elif keycode == 82: # GLFW_KEY_R + _reset_flag[0] = True + elif keycode == 256: # GLFW_KEY_ESCAPE + _quit_flag[0] = True + + +# ── checkpoint loading ─────────────────────────────────────────────── + +def _infer_hidden_sizes(state_dict: dict[str, torch.Tensor]) -> tuple[int, ...]: + """Infer hidden layer sizes from a SharedMLP state dict.""" + sizes = [] + i = 0 + while f"net.{i}.weight" in state_dict: + sizes.append(state_dict[f"net.{i}.weight"].shape[0]) + i += 2 # skip activation layers (ELU) + return tuple(sizes) + + +def load_policy( + checkpoint_path: str, + observation_space: spaces.Space, + action_space: spaces.Space, + device: torch.device = torch.device("cpu"), +) -> tuple[SharedMLP, RunningStandardScaler]: + """Load a trained SharedMLP + observation normalizer from a checkpoint. + + Returns: + (model, state_preprocessor) ready for inference. + """ + ckpt = torch.load(checkpoint_path, map_location=device, weights_only=False) + + # Infer architecture from saved weights. + hidden_sizes = _infer_hidden_sizes(ckpt["policy"]) + + # Reconstruct model. + model = SharedMLP( + observation_space=observation_space, + action_space=action_space, + device=device, + hidden_sizes=hidden_sizes, + ) + model.load_state_dict(ckpt["policy"]) + model.eval() + + # Reconstruct observation normalizer. + state_preprocessor = RunningStandardScaler(size=observation_space, device=device) + state_preprocessor.running_mean = ckpt["state_preprocessor"]["running_mean"].to(device) + state_preprocessor.running_variance = ckpt["state_preprocessor"]["running_variance"].to(device) + state_preprocessor.current_count = ckpt["state_preprocessor"]["current_count"] + # Freeze the normalizer — don't update stats during eval. + state_preprocessor.training = False + + logger.info( + "checkpoint_loaded", + path=checkpoint_path, + hidden_sizes=hidden_sizes, + obs_mean=[round(x, 3) for x in state_preprocessor.running_mean.tolist()], + obs_std=[round(x, 3) for x in state_preprocessor.running_variance.sqrt().tolist()], + ) + return model, state_preprocessor + + +# ── action arrow overlay ───────────────────────────────────────────── + +def _add_action_arrow(viewer, model, data, action_val: float) -> None: + """Draw an arrow showing applied torque direction.""" + if abs(action_val) < 0.01 or model.nu == 0: + return + + jnt_id = model.actuator_trnid[0, 0] + body_id = model.jnt_bodyid[jnt_id] + pos = data.xpos[body_id].copy() + pos[2] += 0.02 + + axis = data.xmat[body_id].reshape(3, 3) @ model.jnt_axis[jnt_id] + arrow_len = 0.08 * action_val + direction = axis * np.sign(arrow_len) + + z = direction / (np.linalg.norm(direction) + 1e-8) + up = np.array([0, 0, 1]) if abs(z[2]) < 0.99 else np.array([0, 1, 0]) + x = np.cross(up, z) + x /= np.linalg.norm(x) + 1e-8 + y = np.cross(z, x) + mat = np.column_stack([x, y, z]).flatten() + + rgba = np.array( + [0.2, 0.8, 0.2, 0.8] if action_val > 0 else [0.8, 0.2, 0.2, 0.8], + dtype=np.float32, + ) + + geom = viewer.user_scn.geoms[viewer.user_scn.ngeom] + mujoco.mjv_initGeom( + geom, + type=mujoco.mjtGeom.mjGEOM_ARROW, + size=np.array([0.008, 0.008, abs(arrow_len)]), + pos=pos, + mat=mat, + rgba=rgba, + ) + viewer.user_scn.ngeom += 1 + + +# ── main loops ─────────────────────────────────────────────────────── + +@hydra.main(version_base=None, config_path="../configs", config_name="config") +def main(cfg: DictConfig) -> None: + choices = HydraConfig.get().runtime.choices + env_name = choices.get("env", "cartpole") + runner_name = choices.get("runner", "mujoco_single") + + checkpoint_path = cfg.get("checkpoint", None) + if checkpoint_path is None: + logger.error("No checkpoint specified. Use: +checkpoint=path/to/agent.pt") + sys.exit(1) + + # Resolve relative paths against original working directory. + checkpoint_path = str(Path(hydra.utils.get_original_cwd()) / checkpoint_path) + if not Path(checkpoint_path).exists(): + logger.error("checkpoint_not_found", path=checkpoint_path) + sys.exit(1) + + if runner_name == "serial": + _eval_serial(cfg, env_name, checkpoint_path) + else: + _eval_sim(cfg, env_name, checkpoint_path) + + +def _eval_sim(cfg: DictConfig, env_name: str, checkpoint_path: str) -> None: + """Evaluate policy in MuJoCo simulation with viewer.""" + from src.runners.mujoco import MuJoCoRunner, MuJoCoRunnerConfig + + env = build_env(env_name, cfg) + runner_dict = OmegaConf.to_container(cfg.runner, resolve=True) + runner_dict["num_envs"] = 1 + runner = MuJoCoRunner(env=env, config=MuJoCoRunnerConfig(**runner_dict)) + + device = runner.device + model, preprocessor = load_policy( + checkpoint_path, runner.observation_space, runner.action_space, device + ) + + mj_model = runner._model + mj_data = runner._data[0] + dt_ctrl = runner.config.dt * runner.config.substeps + + with mujoco.viewer.launch_passive(mj_model, mj_data, key_callback=_key_callback) as viewer: + obs, _ = runner.reset() + step = 0 + episode = 0 + episode_reward = 0.0 + + logger.info( + "eval_started", + env=env_name, + mode="simulation", + checkpoint=Path(checkpoint_path).name, + controls="Space=pause, R=reset, Esc=quit", + ) + + while viewer.is_running() and not _quit_flag[0]: + if _reset_flag[0]: + _reset_flag[0] = False + obs, _ = runner.reset() + step = 0 + episode += 1 + episode_reward = 0.0 + logger.info("reset", episode=episode) + + if _paused[0]: + viewer.sync() + time.sleep(0.05) + continue + + # Policy inference + with torch.no_grad(): + normalized_obs = preprocessor(obs.unsqueeze(0) if obs.dim() == 1 else obs) + action = model.act({"states": normalized_obs}, role="policy")[0] + action = action.clamp(-1.0, 1.0) + + obs, reward, terminated, truncated, info = runner.step(action) + episode_reward += reward.item() + step += 1 + + # Sync viewer + mujoco.mj_forward(mj_model, mj_data) + viewer.user_scn.ngeom = 0 + _add_action_arrow(viewer, mj_model, mj_data, action[0, 0].item()) + viewer.sync() + + if step % 50 == 0: + joints = {mj_model.jnt(i).name: round(math.degrees(mj_data.qpos[i]), 1) + for i in range(mj_model.njnt)} + logger.debug( + "step", n=step, reward=round(reward.item(), 3), + action=round(action[0, 0].item(), 2), + ep_reward=round(episode_reward, 1), **joints, + ) + + if terminated.any() or truncated.any(): + logger.info( + "episode_done", episode=episode, steps=step, + total_reward=round(episode_reward, 2), + reason="terminated" if terminated.any() else "truncated", + ) + obs, _ = runner.reset() + step = 0 + episode += 1 + episode_reward = 0.0 + + time.sleep(dt_ctrl) + + runner.close() + + +def _eval_serial(cfg: DictConfig, env_name: str, checkpoint_path: str) -> None: + """Evaluate policy on real hardware via serial, with digital-twin viewer.""" + from src.runners.serial import SerialRunner, SerialRunnerConfig + + env = build_env(env_name, cfg) + runner_dict = OmegaConf.to_container(cfg.runner, resolve=True) + serial_runner = SerialRunner(env=env, config=SerialRunnerConfig(**runner_dict)) + + device = serial_runner.device + model, preprocessor = load_policy( + checkpoint_path, serial_runner.observation_space, serial_runner.action_space, device + ) + + # Set up digital-twin MuJoCo model for visualization. + serial_runner._ensure_viz_model() + mj_model = serial_runner._viz_model + mj_data = serial_runner._viz_data + + with mujoco.viewer.launch_passive(mj_model, mj_data, key_callback=_key_callback) as viewer: + obs, _ = serial_runner.reset() + step = 0 + episode = 0 + episode_reward = 0.0 + + logger.info( + "eval_started", + env=env_name, + mode="real hardware (serial)", + port=serial_runner.config.port, + checkpoint=Path(checkpoint_path).name, + controls="Space=pause, R=reset, Esc=quit", + ) + + while viewer.is_running() and not _quit_flag[0]: + if _reset_flag[0]: + _reset_flag[0] = False + serial_runner._send("M0") + serial_runner._drive_to_center() + serial_runner._wait_for_pendulum_still() + obs, _ = serial_runner.reset() + step = 0 + episode += 1 + episode_reward = 0.0 + logger.info("reset", episode=episode) + + if _paused[0]: + serial_runner._send("M0") # safety: stop motor while paused + serial_runner._sync_viz() + viewer.sync() + time.sleep(0.05) + continue + + # Policy inference + with torch.no_grad(): + normalized_obs = preprocessor(obs.unsqueeze(0) if obs.dim() == 1 else obs) + action = model.act({"states": normalized_obs}, role="policy")[0] + action = action.clamp(-1.0, 1.0) + + obs, reward, terminated, truncated, info = serial_runner.step(action) + episode_reward += reward.item() + step += 1 + + # Sync digital twin with real sensor data. + serial_runner._sync_viz() + viewer.user_scn.ngeom = 0 + _add_action_arrow(viewer, mj_model, mj_data, action[0, 0].item()) + viewer.sync() + + if step % 25 == 0: + state = serial_runner._read_state() + logger.debug( + "step", n=step, reward=round(reward.item(), 3), + action=round(action[0, 0].item(), 2), + ep_reward=round(episode_reward, 1), + motor_enc=state["encoder_count"], + pend_deg=round(state["pendulum_angle"], 1), + ) + + # Check for safety / disconnection. + if info.get("reboot_detected") or info.get("motor_limit_exceeded"): + logger.error( + "safety_stop", + reboot=info.get("reboot_detected", False), + motor_limit=info.get("motor_limit_exceeded", False), + ) + serial_runner._send("M0") + break + + if terminated.any() or truncated.any(): + logger.info( + "episode_done", episode=episode, steps=step, + total_reward=round(episode_reward, 2), + reason="terminated" if terminated.any() else "truncated", + ) + # Auto-reset for next episode. + obs, _ = serial_runner.reset() + step = 0 + episode += 1 + episode_reward = 0.0 + + # Real-time pacing is handled by serial_runner.step() (dt sleep). + + serial_runner.close() + + +if __name__ == "__main__": + main() diff --git a/scripts/motor_sysid.py b/scripts/motor_sysid.py new file mode 100644 index 0000000..8ef354d --- /dev/null +++ b/scripts/motor_sysid.py @@ -0,0 +1,64 @@ +"""Unified CLI for motor-only system identification. + +Usage: + python scripts/motor_sysid.py capture --duration 20 + python scripts/motor_sysid.py optimize --recording assets/motor/recordings/.npz + python scripts/motor_sysid.py visualize --recording assets/motor/recordings/.npz + python scripts/motor_sysid.py export --result assets/motor/motor_sysid_result.json +""" + +from __future__ import annotations + +import sys +from pathlib import Path + +# Ensure project root is on sys.path +_PROJECT_ROOT = str(Path(__file__).resolve().parent.parent) +if _PROJECT_ROOT not in sys.path: + sys.path.insert(0, _PROJECT_ROOT) + + +def main() -> None: + if len(sys.argv) < 2 or sys.argv[1] in ("-h", "--help"): + print( + "Motor System Identification\n" + "===========================\n" + "Usage: python scripts/motor_sysid.py [options]\n" + "\n" + "Commands:\n" + " capture Record motor trajectory under PRBS excitation\n" + " optimize Run CMA-ES to fit motor parameters\n" + " visualize Plot real vs simulated motor response\n" + " export Write tuned MJCF + robot.yaml files\n" + "\n" + "Workflow:\n" + " 1. Flash sysid firmware to ESP32 (motor-only, no limits)\n" + " 2. python scripts/motor_sysid.py capture --duration 20\n" + " 3. python scripts/motor_sysid.py optimize --recording .npz\n" + " 4. python scripts/motor_sysid.py visualize --recording .npz\n" + "\n" + "Run ' --help' for command-specific options." + ) + sys.exit(0) + + command = sys.argv[1] + sys.argv = [f"motor_sysid {command}"] + sys.argv[2:] + + if command == "capture": + from src.sysid.motor.capture import main as cmd_main + elif command == "optimize": + from src.sysid.motor.optimize import main as cmd_main + elif command == "visualize": + from src.sysid.motor.visualize import main as cmd_main + elif command == "export": + from src.sysid.motor.export import main as cmd_main + else: + print(f"Unknown command: {command}") + print("Available commands: capture, optimize, visualize, export") + sys.exit(1) + + cmd_main() + + +if __name__ == "__main__": + main() diff --git a/src/core/env.py b/src/core/env.py index 75d9e81..19f122d 100644 --- a/src/core/env.py +++ b/src/core/env.py @@ -48,9 +48,10 @@ class BaseEnv(abc.ABC, Generic[T]): def compute_truncations(self, step_counts: torch.Tensor) -> torch.Tensor: return step_counts >= self.config.max_steps - def get_default_qpos(self, nq: int) -> list[float] | None: - """Return the default joint positions for reset. - Override in subclass if the URDF zero pose doesn't match - the desired initial state (e.g. pendulum hanging down). - Returns None to use the URDF default (all zeros).""" - return None + def is_reset_ready(self, qpos: torch.Tensor, qvel: torch.Tensor) -> bool: + """Check whether the physical robot has settled enough to start an episode. + + Used by the SerialRunner after driving to center and waiting for the + pendulum. Default: always ready (sim doesn't need settling). + """ + return True diff --git a/src/core/hardware.py b/src/core/hardware.py deleted file mode 100644 index 2d49569..0000000 --- a/src/core/hardware.py +++ /dev/null @@ -1,91 +0,0 @@ -"""Real-hardware configuration — loaded from hardware.yaml next to robot.yaml. - -Provides robot-specific constants for the SerialRunner: encoder specs, -safety limits, and reset behaviour. Simulation-only robots simply don't -have a hardware.yaml (the loader returns None). - -Usage: - hw = load_hardware_config("assets/rotary_cartpole") - if hw is not None: - counts_per_rev = hw.encoder.ppr * hw.encoder.gear_ratio * 4.0 -""" - -import dataclasses -from pathlib import Path - -import structlog -import yaml - -log = structlog.get_logger() - - -@dataclasses.dataclass -class EncoderConfig: - """Rotary encoder parameters.""" - - ppr: int = 11 # pulses per revolution (before quadrature) - gear_ratio: float = 30.0 # gearbox ratio - - @property - def counts_per_rev(self) -> float: - """Total encoder counts per output-shaft revolution (quadrature).""" - return self.ppr * self.gear_ratio * 4.0 - - -@dataclasses.dataclass -class SafetyConfig: - """Safety limits enforced by the runner (not the env).""" - - max_motor_angle_deg: float = 90.0 # hard termination (0 = disabled) - soft_limit_deg: float = 40.0 # progressive penalty ramp start - - -@dataclasses.dataclass -class ResetConfig: - """Parameters for the physical reset procedure.""" - - drive_speed: int = 80 # PWM for bang-bang drive-to-center - deadband: int = 15 # encoder count threshold for "centered" - drive_timeout: float = 3.0 # seconds - - settle_angle_deg: float = 2.0 # pendulum angle threshold (degrees) - settle_vel_dps: float = 5.0 # pendulum velocity threshold (deg/s) - settle_duration: float = 0.5 # seconds the pendulum must stay still - settle_timeout: float = 30.0 # give up after this many seconds - - -@dataclasses.dataclass -class HardwareConfig: - """Complete real-hardware description for a robot.""" - - encoder: EncoderConfig = dataclasses.field(default_factory=EncoderConfig) - safety: SafetyConfig = dataclasses.field(default_factory=SafetyConfig) - reset: ResetConfig = dataclasses.field(default_factory=ResetConfig) - - -def load_hardware_config(robot_dir: str | Path) -> HardwareConfig | None: - """Load hardware.yaml from a directory. - - Returns None if the file doesn't exist (simulation-only robot). - """ - robot_dir = Path(robot_dir).resolve() - yaml_path = robot_dir / "hardware.yaml" - - if not yaml_path.exists(): - return None - - raw = yaml.safe_load(yaml_path.read_text()) or {} - - encoder = EncoderConfig(**raw.get("encoder", {})) - safety = SafetyConfig(**raw.get("safety", {})) - reset = ResetConfig(**raw.get("reset", {})) - - config = HardwareConfig(encoder=encoder, safety=safety, reset=reset) - - log.debug( - "hardware_config_loaded", - robot_dir=str(robot_dir), - counts_per_rev=encoder.counts_per_rev, - max_motor_angle_deg=safety.max_motor_angle_deg, - ) - return config diff --git a/src/core/robot.py b/src/core/robot.py index e8ef93a..c52dacd 100644 --- a/src/core/robot.py +++ b/src/core/robot.py @@ -11,6 +11,7 @@ Usage: """ import dataclasses +import math from pathlib import Path import structlog @@ -19,10 +20,20 @@ import yaml log = structlog.get_logger() +def _as_pair(val) -> tuple[float, float]: + """Convert scalar or [pos, neg] list to (pos, neg) tuple.""" + if isinstance(val, (list, tuple)) and len(val) == 2: + return (float(val[0]), float(val[1])) + return (float(val), float(val)) + + @dataclasses.dataclass class ActuatorConfig: """Motor/actuator attached to a joint. + Asymmetric fields use (positive_dir, negative_dir) tuples. + A scalar in YAML is expanded to a symmetric pair. + type: motor — direct torque control (ctrl = normalised torque) position — PD position servo (ctrl = target angle, needs kp) @@ -30,12 +41,91 @@ class ActuatorConfig: """ joint: str = "" type: str = "motor" - gear: float = 1.0 - ctrl_range: tuple[float, float] = (-1.0, 1.0) - damping: float = 0.05 - kp: float = 0.0 # proportional gain (position / velocity actuators) - kv: float = 0.0 # derivative gain (position actuators) - filter_tau: float = 0.0 # 1st-order filter time constant (s); 0 = no filter + gear: tuple[float, float] = (1.0, 1.0) # torque constant (pos, neg) + ctrl_range: tuple[float, float] = (-1.0, 1.0) # (lower, upper) control bounds + deadzone: tuple[float, float] = (0.0, 0.0) # min |ctrl| for torque (pos, neg) + damping: tuple[float, float] = (0.0, 0.0) # viscous damping (pos, neg) + frictionloss: tuple[float, float] = (0.0, 0.0) # Coulomb friction (pos, neg) + kp: float = 0.0 # proportional gain (position / velocity actuators) + kv: float = 0.0 # derivative gain (position actuators) + filter_tau: float = 0.0 # 1st-order filter time constant (s); 0 = no filter + viscous_quadratic: float = 0.0 # velocity² drag coefficient + back_emf_gain: float = 0.0 # back-EMF torque reduction + + @property + def gear_avg(self) -> float: + return (self.gear[0] + self.gear[1]) / 2.0 + + @property + def has_motor_model(self) -> bool: + """True if this actuator needs the runtime motor model.""" + return ( + self.gear[0] != self.gear[1] + or self.deadzone != (0.0, 0.0) + or self.damping != (0.0, 0.0) + or self.frictionloss != (0.0, 0.0) + or self.viscous_quadratic > 0 + or self.back_emf_gain > 0 + ) + + def transform_ctrl(self, ctrl: float) -> float: + """Apply asymmetric deadzone and gear compensation to a scalar ctrl.""" + # Deadzone + dz_pos, dz_neg = self.deadzone + if ctrl >= 0 and ctrl < dz_pos: + return 0.0 + if ctrl < 0 and ctrl > -dz_neg: + return 0.0 + + # Gear compensation: rescale so ctrl × gear_avg ≈ action × gear_dir + gear_avg = self.gear_avg + if gear_avg > 1e-8: + gear_dir = self.gear[0] if ctrl >= 0 else self.gear[1] + ctrl *= gear_dir / gear_avg + + return ctrl + + def compute_motor_force(self, vel: float, ctrl: float) -> float: + """Asymmetric friction, damping, drag, back-EMF → applied torque.""" + torque = 0.0 + + # Coulomb friction (direction-dependent) + fl_pos, fl_neg = self.frictionloss + if abs(vel) > 1e-6: + fl = fl_pos if vel > 0 else fl_neg + torque -= math.copysign(fl, vel) + + # Viscous damping (direction-dependent) + damp = self.damping[0] if vel > 0 else self.damping[1] + torque -= damp * vel + + # Quadratic velocity drag + if self.viscous_quadratic > 0: + torque -= self.viscous_quadratic * vel * abs(vel) + + # Back-EMF torque reduction + if self.back_emf_gain > 0 and abs(ctrl) > 1e-6: + torque -= self.back_emf_gain * vel * math.copysign(1.0, ctrl) + + return max(-10.0, min(10.0, torque)) + + def transform_action(self, action): + """Vectorised deadzone + gear compensation for a torch batch.""" + dz_pos, dz_neg = self.deadzone + if dz_pos > 0 or dz_neg > 0: + action = action.clone() + pos_dead = (action >= 0) & (action < dz_pos) + neg_dead = (action < 0) & (action > -dz_neg) + action[pos_dead | neg_dead] = 0.0 + + gear_avg = self.gear_avg + if gear_avg > 1e-8 and self.gear[0] != self.gear[1]: + action = action.clone() if dz_pos == 0 and dz_neg == 0 else action + pos = action >= 0 + action[pos] *= self.gear[0] / gear_avg + action[~pos] *= self.gear[1] / gear_avg + + return action @dataclasses.dataclass @@ -84,6 +174,9 @@ def load_robot_config(robot_dir: str | Path) -> RobotConfig: for a in raw.get("actuators", []): if "ctrl_range" in a: a["ctrl_range"] = tuple(a["ctrl_range"]) + for key in ("gear", "deadzone", "damping", "frictionloss"): + if key in a: + a[key] = _as_pair(a[key]) actuators.append(ActuatorConfig(**a)) # Parse joint overrides diff --git a/src/core/runner.py b/src/core/runner.py index f543ff1..2b064e2 100644 --- a/src/core/runner.py +++ b/src/core/runner.py @@ -1,9 +1,12 @@ import dataclasses import abc from typing import Any, Generic, TypeVar -from src.core.env import BaseEnv + +import numpy as np import torch +from src.core.env import BaseEnv + T = TypeVar("T") @@ -21,6 +24,8 @@ class BaseRunner(abc.ABC, Generic[T]): if getattr(self.config, "device", None) == "auto": self.config.device = "cuda" if torch.cuda.is_available() else "cpu" + self._last_actions: torch.Tensor | None = None + self._sim_initialize(config) self.observation_space = self.env.observation_space @@ -68,6 +73,7 @@ class BaseRunner(abc.ABC, Generic[T]): return obs, {} def step(self, actions: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor, dict[str, Any]]: + self._last_actions = actions qpos, qvel = self._sim_step(actions) self.step_counts += 1 @@ -95,9 +101,50 @@ class BaseRunner(abc.ABC, Generic[T]): # skrl expects (num_envs, 1) for rewards/terminated/truncated return obs, rewards.unsqueeze(-1), terminated.unsqueeze(-1), truncated.unsqueeze(-1), info - def render(self, env_idx: int = 0): - """Offscreen render → RGB numpy array. Override in subclass.""" + def _render_frame(self, env_idx: int = 0) -> np.ndarray: + """Return a raw RGB frame. Override in subclass.""" raise NotImplementedError("Render not implemented for this runner.") - + + def render(self, env_idx: int = 0) -> np.ndarray: + """Render frame with action overlay.""" + frame = self._render_frame(env_idx) + if self._last_actions is not None: + ctrl = float(self._last_actions[env_idx, 0].clamp(-1.0, 1.0)) + _draw_action_overlay(frame, ctrl) + return frame + def close(self) -> None: - self._sim_close() \ No newline at end of file + self._sim_close() + + +def _draw_action_overlay(frame: np.ndarray, action: float) -> None: + """Draw an action bar on a rendered frame (no OpenCV needed). + + Bar is centered horizontally: green to the right (+), red to the left (-). + """ + h, w = frame.shape[:2] + + bar_y = h - 30 + bar_h = 16 + bar_x_center = w // 2 + bar_half_w = w // 4 + bar_x_left = bar_x_center - bar_half_w + bar_x_right = bar_x_center + bar_half_w + + # Background (dark grey) + frame[bar_y:bar_y + bar_h, bar_x_left:bar_x_right] = [40, 40, 40] + + # Filled bar + fill_len = int(abs(action) * bar_half_w) + if action > 0: + color = [60, 200, 60] # green + x0 = bar_x_center + x1 = min(bar_x_center + fill_len, bar_x_right) + else: + color = [200, 60, 60] # red + x1 = bar_x_center + x0 = max(bar_x_center - fill_len, bar_x_left) + frame[bar_y:bar_y + bar_h, x0:x1] = color + + # Center tick mark (white) + frame[bar_y:bar_y + bar_h, bar_x_center - 1:bar_x_center + 1] = [255, 255, 255] \ No newline at end of file diff --git a/src/envs/rotary_cartpole.py b/src/envs/rotary_cartpole.py index 84eccf3..ebecca2 100644 --- a/src/envs/rotary_cartpole.py +++ b/src/envs/rotary_cartpole.py @@ -1,4 +1,5 @@ import dataclasses +import math import torch from src.core.env import BaseEnv, BaseEnvConfig from gymnasium import spaces @@ -21,7 +22,12 @@ class RotaryCartPoleConfig(BaseEnvConfig): """ # Reward shaping reward_upright_scale: float = 1.0 # cos(pendulum) when upright = +1 - speed_penalty_scale: float = 0.1 # penalty for high pendulum velocity near top + motor_vel_penalty: float = 0.01 # penalise high motor angular velocity + motor_angle_penalty: float = 0.05 # penalise deviation from centre + action_penalty: float = 0.05 # penalise large actions (energy cost) + + # ── Software safety limit (env-level, on top of URDF + hardware) ── + motor_angle_limit_deg: float = 90.0 # terminate episode if exceeded class RotaryCartPoleEnv(BaseEnv[RotaryCartPoleConfig]): @@ -34,12 +40,14 @@ class RotaryCartPoleEnv(BaseEnv[RotaryCartPoleConfig]): Using sin/cos avoids discontinuities at ±π for continuous joints. Actions (1): - Torque applied to the motor_joint. + Torque applied to the motor_joint (normalised to [-1, 1]). """ def __init__(self, config: RotaryCartPoleConfig): super().__init__(config) + # ── Spaces ─────────────────────────────────────────────────── + @property def observation_space(self) -> spaces.Space: return spaces.Box(low=-torch.inf, high=torch.inf, shape=(6,)) @@ -48,6 +56,8 @@ class RotaryCartPoleEnv(BaseEnv[RotaryCartPoleConfig]): def action_space(self) -> spaces.Space: return spaces.Box(low=-1.0, high=1.0, shape=(1,)) + # ── State building ─────────────────────────────────────────── + def build_state(self, qpos: torch.Tensor, qvel: torch.Tensor) -> RotaryCartPoleState: return RotaryCartPoleState( motor_angle=qpos[:, 0], @@ -56,32 +66,64 @@ class RotaryCartPoleEnv(BaseEnv[RotaryCartPoleConfig]): pendulum_vel=qvel[:, 1], ) + # ── Observations ───────────────────────────────────────────── + def compute_observations(self, state: RotaryCartPoleState) -> torch.Tensor: - return torch.stack([ + obs = [ torch.sin(state.motor_angle), torch.cos(state.motor_angle), torch.sin(state.pendulum_angle), torch.cos(state.pendulum_angle), state.motor_vel, state.pendulum_vel, - ], dim=-1) + ] + return torch.stack(obs, dim=-1) + + # ── Rewards ────────────────────────────────────────────────── def compute_rewards(self, state: RotaryCartPoleState, actions: torch.Tensor) -> torch.Tensor: # Upright reward: -cos(θ) ∈ [-1, +1] - upright = -torch.cos(state.pendulum_angle) + reward = -torch.cos(state.pendulum_angle) * self.config.reward_upright_scale - # Penalise high pendulum velocity when near the top (upright). - # "nearness" is weighted by how upright the pendulum is (0 at bottom, 1 at top). - near_top = torch.clamp(-torch.cos(state.pendulum_angle), min=0.0) # 0‥1 - speed_penalty = self.config.speed_penalty_scale * near_top * state.pendulum_vel.abs() + # Penalise fast motor spinning (discourages violent oscillation) + reward = reward - self.config.motor_vel_penalty * state.motor_vel.pow(2) - return upright * self.config.reward_upright_scale - speed_penalty + # Penalise motor deviation from centre (keep arm near zero) + reward = reward - self.config.motor_angle_penalty * state.motor_angle.pow(2) + + # Penalise large actions (energy efficiency / smoother control) + reward = reward - self.config.action_penalty * actions.squeeze(-1).pow(2) + + # Penalty for exceeding motor angle limit (episode also terminates) + limit_rad = math.radians(self.config.motor_angle_limit_deg) + exceeded = state.motor_angle.abs() >= limit_rad + reward = torch.where(exceeded, torch.tensor(-1000.0, device=reward.device), reward) + + return reward + + # ── Terminations ───────────────────────────────────────────── def compute_terminations(self, state: RotaryCartPoleState) -> torch.Tensor: - # No early termination — episode runs for max_steps (truncation only). - # The agent must learn to swing up AND balance continuously. - return torch.zeros_like(state.motor_angle, dtype=torch.bool) + # Software safety: terminate if motor angle exceeds limit. + limit_rad = math.radians(self.config.motor_angle_limit_deg) + exceeded = state.motor_angle.abs() >= limit_rad + return exceeded + + # ── Reset readiness (for SerialRunner) ─────────────────────── + + def is_reset_ready(self, qpos: torch.Tensor, qvel: torch.Tensor) -> bool: + """Pendulum must be hanging still and motor near center.""" + motor_angle = float(qpos[0, 0]) + pend_angle = float(qpos[0, 1]) + motor_vel = float(qvel[0, 0]) + pend_vel = float(qvel[0, 1]) + + # Pendulum near hanging-down (angle ~0) and slow + angle_ok = abs(pend_angle) < math.radians(2.0) + vel_ok = abs(pend_vel) < math.radians(5.0) + # Motor near center and slow + motor_ok = abs(motor_angle) < math.radians(5.0) + motor_vel_ok = abs(motor_vel) < math.radians(10.0) + return angle_ok and vel_ok and motor_ok and motor_vel_ok + - def get_default_qpos(self, nq: int) -> list[float] | None: - # qpos=0 = pendulum hanging down (joint frame rotated in URDF). - return None diff --git a/src/runners/mjx.py b/src/runners/mjx.py index a92ac2d..b67ace9 100644 --- a/src/runners/mjx.py +++ b/src/runners/mjx.py @@ -29,7 +29,10 @@ import numpy as np from src.core.env import BaseEnv from src.core.runner import BaseRunner, BaseRunnerConfig -from src.runners.mujoco import MuJoCoRunner # reuse _load_model +from src.runners.mujoco import ( + ActuatorLimits, + load_mujoco_model, +) log = structlog.get_logger() @@ -39,7 +42,7 @@ class MJXRunnerConfig(BaseRunnerConfig): num_envs: int = 1024 device: str = "cuda" dt: float = 0.002 - substeps: int = 20 + substeps: int = 10 class MJXRunner(BaseRunner[MJXRunnerConfig]): @@ -64,7 +67,7 @@ class MJXRunner(BaseRunner[MJXRunnerConfig]): def _sim_initialize(self, config: MJXRunnerConfig) -> None: # Step 1: Load CPU model (reuses URDF → MJCF → actuator injection) - self._mj_model = MuJoCoRunner._load_model(self.env.robot) + self._mj_model = load_mujoco_model(self.env.robot) self._mj_model.opt.timestep = config.dt self._nq = self._mj_model.nq self._nv = self._mj_model.nv @@ -75,9 +78,6 @@ class MJXRunner(BaseRunner[MJXRunnerConfig]): # Step 3: Default reset state on GPU default_data = mujoco.MjData(self._mj_model) - default_qpos = self.env.get_default_qpos(self._nq) - if default_qpos is not None: - default_data.qpos[:] = default_qpos mujoco.mj_forward(self._mj_model, default_data) self._default_mjx_data = mjx.put_data(self._mj_model, default_data) @@ -85,6 +85,18 @@ class MJXRunner(BaseRunner[MJXRunnerConfig]): self._rng = jax.random.PRNGKey(42) self._batch_data = self._make_batched_data(config.num_envs) + # Step 4b: Build motor model info (ctrl_idx, qvel_idx, ActuatorConfig) + self._motor_info: list[tuple[int, int]] = [] + self._motor_acts: list = [] + for ctrl_idx, act in enumerate(self.env.robot.actuators): + if act.has_motor_model: + jnt_id = mujoco.mj_name2id( + self._mj_model, mujoco.mjtObj.mjOBJ_JOINT, act.joint, + ) + qvel_idx = self._mj_model.jnt_dofadr[jnt_id] + self._motor_info.append((ctrl_idx, qvel_idx)) + self._motor_acts.append(act) + # Step 5: JIT-compile the hot-path functions self._compile_jit_fns(config.substeps) @@ -121,28 +133,77 @@ class MJXRunner(BaseRunner[MJXRunnerConfig]): model = self._mjx_model default = self._default_mjx_data - # ── Per-actuator limit info (shared helper on MuJoCoRunner) ── - lim = MuJoCoRunner._extract_actuator_limits(self._mj_model) + lim = ActuatorLimits(self._mj_model) act_jnt_ids = jnp.array(lim.jnt_ids) act_limited = jnp.array(lim.limited) act_lo = jnp.array(lim.lo) act_hi = jnp.array(lim.hi) act_gs = jnp.array(lim.gear_sign) + # ── Motor model params (JAX arrays for JIT) ───────────────── + _has_motor = len(self._motor_info) > 0 + if _has_motor: + acts = self._motor_acts + _ctrl_ids = jnp.array([c for c, _ in self._motor_info]) + _qvel_ids = jnp.array([q for _, q in self._motor_info]) + _dz_pos = jnp.array([a.deadzone[0] for a in acts]) + _dz_neg = jnp.array([a.deadzone[1] for a in acts]) + _gear_pos = jnp.array([a.gear[0] for a in acts]) + _gear_neg = jnp.array([a.gear[1] for a in acts]) + _gear_avg = jnp.array([a.gear_avg for a in acts]) + _fl_pos = jnp.array([a.frictionloss[0] for a in acts]) + _fl_neg = jnp.array([a.frictionloss[1] for a in acts]) + _damp_pos = jnp.array([a.damping[0] for a in acts]) + _damp_neg = jnp.array([a.damping[1] for a in acts]) + _visc_quad = jnp.array([a.viscous_quadratic for a in acts]) + _back_emf = jnp.array([a.back_emf_gain for a in acts]) + # ── Batched step (N substeps per call) ────────────────────── @jax.jit def step_fn(data): # Software limit switch: clamp ctrl once before substeps. - # Armature + default joint limits prevent significant overshoot - # within the substep window, so per-substep clamping isn't needed. pos = data.qpos[:, act_jnt_ids] ctrl = data.ctrl at_hi = act_limited & (pos >= act_hi) & (act_gs * ctrl > 0) at_lo = act_limited & (pos <= act_lo) & (act_gs * ctrl < 0) - clamped = jnp.where(at_hi | at_lo, 0.0, ctrl) - data = data.replace(ctrl=clamped) + ctrl = jnp.where(at_hi | at_lo, 0.0, ctrl) + + if _has_motor: + # Deadzone + asymmetric gear compensation + mc = ctrl[:, _ctrl_ids] + mc = jnp.where((mc >= 0) & (mc < _dz_pos), 0.0, mc) + mc = jnp.where((mc < 0) & (mc > -_dz_neg), 0.0, mc) + gear_dir = jnp.where(mc >= 0, _gear_pos, _gear_neg) + mc = mc * gear_dir / _gear_avg + ctrl = ctrl.at[:, _ctrl_ids].set(mc) + + data = data.replace(ctrl=ctrl) def body(_, d): + if _has_motor: + vel = d.qvel[:, _qvel_ids] + mc = d.ctrl[:, _ctrl_ids] + + # Coulomb friction (direction-dependent) + fl = jnp.where(vel > 0, _fl_pos, _fl_neg) + torque = -jnp.where( + jnp.abs(vel) > 1e-6, jnp.sign(vel) * fl, 0.0, + ) + # Viscous damping (direction-dependent) + damp = jnp.where(vel > 0, _damp_pos, _damp_neg) + torque = torque - damp * vel + # Quadratic velocity drag + torque = torque - _visc_quad * vel * jnp.abs(vel) + # Back-EMF torque reduction + bemf = _back_emf * vel * jnp.sign(mc) + torque = torque - jnp.where( + jnp.abs(mc) > 1e-6, bemf, 0.0, + ) + torque = jnp.clip(torque, -10.0, 10.0) + d = d.replace( + qfrc_applied=d.qfrc_applied.at[:, _qvel_ids].set(torque), + ) + return jax.vmap(mjx.step, in_axes=(None, 0))(model, d) return jax.lax.fori_loop(0, substeps, body, data) @@ -210,7 +271,7 @@ class MJXRunner(BaseRunner[MJXRunnerConfig]): # ── Rendering ──────────────────────────────────────────────────── - def render(self, env_idx: int = 0) -> np.ndarray: + def _render_frame(self, env_idx: int = 0) -> np.ndarray: """Offscreen render — copies one env's state from GPU to CPU.""" self._render_data.qpos[:] = np.asarray(self._batch_data.qpos[env_idx]) self._render_data.qvel[:] = np.asarray(self._batch_data.qvel[env_idx]) @@ -222,10 +283,4 @@ class MJXRunner(BaseRunner[MJXRunnerConfig]): self._mj_model, width=640, height=480, ) self._offscreen_renderer.update_scene(self._render_data) - frame = self._offscreen_renderer.render().copy() - - # Import shared overlay helper from mujoco runner - from src.runners.mujoco import _draw_action_overlay - ctrl_val = float(self._render_data.ctrl[0]) if self._mj_model.nu > 0 else 0.0 - _draw_action_overlay(frame, ctrl_val) - return frame + return self._offscreen_renderer.render().copy() diff --git a/src/runners/mujoco.py b/src/runners/mujoco.py index 73f071b..811f057 100644 --- a/src/runners/mujoco.py +++ b/src/runners/mujoco.py @@ -1,4 +1,6 @@ import dataclasses +import os +import tempfile import xml.etree.ElementTree as ET from pathlib import Path @@ -10,26 +12,195 @@ from src.core.env import BaseEnv from src.core.robot import RobotConfig from src.core.runner import BaseRunner, BaseRunnerConfig + @dataclasses.dataclass class MuJoCoRunnerConfig(BaseRunnerConfig): num_envs: int = 16 device: str = "cpu" - dt: float = 0.02 - substeps: int = 2 + dt: float = 0.002 + substeps: int = 10 -@dataclasses.dataclass class ActuatorLimits: - """Per-actuator joint-limit info used for software ctrl clamping. + """Software limit-switch: cuts motor ctrl when a joint hits its range. - Real motor controllers have position limits that prevent the motor - from driving into the mechanical hard stop. + The real robot has physical limit switches that kill motor current + at the travel endpoints. MuJoCo's built-in joint limits only apply + a spring force — they don't zero the actuator signal. This class + replicates the hardware behavior. """ - jnt_ids: np.ndarray # (nu,) joint index for each actuator - limited: np.ndarray # (nu,) bool — whether that joint is limited - lo: np.ndarray # (nu,) lower position bound - hi: np.ndarray # (nu,) upper position bound - gear_sign: np.ndarray # (nu,) sign of gear ratio + + def __init__(self, model: mujoco.MjModel) -> None: + jnt_ids = model.actuator_trnid[:model.nu, 0] + self.jnt_ids = jnt_ids + self.limited = model.jnt_limited[jnt_ids].astype(bool) + self.lo = model.jnt_range[jnt_ids, 0] + self.hi = model.jnt_range[jnt_ids, 1] + self.gear_sign = np.sign(model.actuator_gear[:model.nu, 0]) + + def enforce(self, model: mujoco.MjModel, data: mujoco.MjData) -> None: + """Zero ctrl that would push past joint limits (call every substep).""" + if not self.limited.any(): + return + pos = data.qpos[self.jnt_ids] + signed_ctrl = self.gear_sign * data.ctrl[:model.nu] + at_hi = self.limited & (pos >= self.hi) & (signed_ctrl > 0) + at_lo = self.limited & (pos <= self.lo) & (signed_ctrl < 0) + data.ctrl[at_hi | at_lo] = 0.0 + + +# ── Public utilities ───────────────────────────────────────────────── + + +def load_mujoco_model(robot: RobotConfig) -> mujoco.MjModel: + """Load a URDF (or MJCF) and apply robot.yaml settings. + + Single model-loading entry point for all MuJoCo-based code: + training runners, MJX, and system identification. + + Two-step approach required because MuJoCo's URDF parser ignores + ```` in the ```` extension block: + + 1. Load the URDF -> MuJoCo converts it to internal MJCF + 2. Export the MJCF XML, inject actuators + joint overrides, reload + + This keeps the URDF clean (re-exportable from CAD) -- all hardware + tuning lives in ``robot.yaml``. + """ + abs_path = robot.urdf_path.resolve() + model_dir = abs_path.parent + is_urdf = abs_path.suffix.lower() == ".urdf" + + # -- Step 1: Load URDF with meshdir injection -- + if is_urdf: + tree = ET.parse(abs_path) + root = tree.getroot() + + # MuJoCo's URDF parser strips directory prefixes from mesh + # filenames, so we inject a + # block. The original URDF stays clean and simulator-agnostic. + meshdir = None + for mesh_el in root.iter("mesh"): + fn = mesh_el.get("filename", "") + parent = str(Path(fn).parent) + if parent and parent != ".": + meshdir = parent + break + if meshdir: + mj_ext = ET.SubElement(root, "mujoco") + ET.SubElement(mj_ext, "compiler", attrib={ + "meshdir": meshdir, + "balanceinertia": "true", + }) + + # Write to a temp file (unique name for multiprocessing safety). + fd, tmp_path = tempfile.mkstemp( + suffix=".urdf", prefix="_mj_", dir=str(model_dir), + ) + os.close(fd) + try: + tree.write(tmp_path, xml_declaration=True, encoding="unicode") + model_raw = mujoco.MjModel.from_xml_path(tmp_path) + finally: + Path(tmp_path).unlink(missing_ok=True) + else: + model_raw = mujoco.MjModel.from_xml_path(str(abs_path)) + + # If robot.yaml has no actuators/joints, we're done. + if not robot.actuators and not robot.joints: + return model_raw + + # -- Step 2: Export MJCF, inject actuators + joint overrides -- + fd, tmp_path = tempfile.mkstemp( + suffix=".xml", prefix="_mj_", dir=str(model_dir), + ) + os.close(fd) + try: + mujoco.mj_saveLastXML(tmp_path, model_raw) + mjcf_str = Path(tmp_path).read_text() + root = ET.fromstring(mjcf_str) + + # -- Inject actuators -- + if robot.actuators: + act_elem = ET.SubElement(root, "actuator") + for act in robot.actuators: + attribs = { + "name": f"{act.joint}_{act.type}", + "joint": act.joint, + "gear": str(act.gear_avg), + "ctrlrange": f"{act.ctrl_range[0]} {act.ctrl_range[1]}", + } + + # dyntype is only available on , not on + # shortcut elements like //. + use_general = act.filter_tau > 0 + + if use_general: + attribs["dyntype"] = "filter" + attribs["dynprm"] = str(act.filter_tau) + attribs["gaintype"] = "fixed" + if act.type == "position": + attribs["biastype"] = "affine" + attribs["gainprm"] = str(act.kp) + attribs["biasprm"] = f"0 -{act.kp} -{act.kv}" + elif act.type == "velocity": + attribs["biastype"] = "affine" + attribs["gainprm"] = str(act.kp) + attribs["biasprm"] = f"0 0 -{act.kp}" + else: # motor + attribs["biastype"] = "none" + ET.SubElement(act_elem, "general", attrib=attribs) + else: + if act.type == "position": + attribs["kp"] = str(act.kp) + if act.kv > 0: + attribs["kv"] = str(act.kv) + elif act.type == "velocity": + attribs["kp"] = str(act.kp) + ET.SubElement(act_elem, act.type, attrib=attribs) + + # -- Apply joint overrides from robot.yaml -- + # For actuated joints with a motor model, MuJoCo damping/frictionloss + # are set to 0 — the motor model handles them via qfrc_applied. + joint_damping: dict[str, float] = {} + joint_frictionloss: dict[str, float] = {} + for act in robot.actuators: + if act.has_motor_model: + joint_damping[act.joint] = 0.0 + joint_frictionloss[act.joint] = 0.0 + joint_armature: dict[str, float] = {} + for name, jcfg in robot.joints.items(): + if jcfg.damping is not None: + joint_damping[name] = jcfg.damping + if jcfg.armature is not None: + joint_armature[name] = jcfg.armature + if jcfg.frictionloss is not None: + joint_frictionloss[name] = jcfg.frictionloss + + for body in root.iter("body"): + for jnt in body.findall("joint"): + name = jnt.get("name") + if name in joint_damping: + jnt.set("damping", str(joint_damping[name])) + if name in joint_armature: + jnt.set("armature", str(joint_armature[name])) + if name in joint_frictionloss: + jnt.set("frictionloss", str(joint_frictionloss[name])) + + # Disable self-collision on all geoms. + for geom in root.iter("geom"): + geom.set("contype", "0") + geom.set("conaffinity", "0") + + modified_xml = ET.tostring(root, encoding="unicode") + Path(tmp_path).write_text(modified_xml) + return mujoco.MjModel.from_xml_path(tmp_path) + finally: + Path(tmp_path).unlink(missing_ok=True) + + +# -- Runner ----------------------------------------------------------- + class MuJoCoRunner(BaseRunner[MuJoCoRunnerConfig]): def __init__(self, env: BaseEnv, config: MuJoCoRunnerConfig): @@ -38,201 +209,52 @@ class MuJoCoRunner(BaseRunner[MuJoCoRunnerConfig]): @property def num_envs(self) -> int: return self.config.num_envs - + @property def device(self) -> torch.device: return torch.device(self.config.device) - - @staticmethod - def _load_model(robot: RobotConfig) -> mujoco.MjModel: - """Load a URDF (or MJCF) and apply robot.yaml settings. - - Two-step approach required because MuJoCo's URDF parser ignores - in the extension block: - 1. Load the URDF → MuJoCo converts it to internal MJCF - 2. Export the MJCF XML, inject actuators + joint overrides, reload - - This keeps the URDF clean (re-exportable from CAD) — all hardware - tuning lives in robot.yaml. - """ - abs_path = robot.urdf_path.resolve() - model_dir = abs_path.parent - is_urdf = abs_path.suffix.lower() == ".urdf" - - # MuJoCo's URDF parser strips directory prefixes from mesh filenames, - # so we inject a block into a - # temporary copy. The original URDF stays clean and simulator-agnostic. - if is_urdf: - tree = ET.parse(abs_path) - root = tree.getroot() - # Detect the mesh subdirectory from the first mesh filename - meshdir = None - for mesh_el in root.iter("mesh"): - fn = mesh_el.get("filename", "") - parent = str(Path(fn).parent) - if parent and parent != ".": - meshdir = parent - break - if meshdir: - mj_ext = ET.SubElement(root, "mujoco") - ET.SubElement(mj_ext, "compiler", attrib={ - "meshdir": meshdir, - "balanceinertia": "true", - }) - tmp_urdf = model_dir / "_tmp_mujoco_load.urdf" - tree.write(str(tmp_urdf), xml_declaration=True, encoding="unicode") - try: - model_raw = mujoco.MjModel.from_xml_path(str(tmp_urdf)) - finally: - tmp_urdf.unlink() - else: - model_raw = mujoco.MjModel.from_xml_path(str(abs_path)) - - if not robot.actuators and not robot.joints: - return model_raw - - # Step 2: Export internal MJCF, inject actuators + joint overrides, reload - tmp_mjcf = model_dir / "_tmp_actuator_inject.xml" - try: - mujoco.mj_saveLastXML(str(tmp_mjcf), model_raw) - mjcf_str = tmp_mjcf.read_text() - - root = ET.fromstring(mjcf_str) - - # ── Inject actuators ──────────────────────────────────── - if robot.actuators: - act_elem = ET.SubElement(root, "actuator") - for act in robot.actuators: - attribs = { - "name": f"{act.joint}_{act.type}", - "joint": act.joint, - "gear": str(act.gear), - "ctrlrange": f"{act.ctrl_range[0]} {act.ctrl_range[1]}", - } - - # Actuator dynamics: 1st-order low-pass filter on ctrl. - # MuJoCo applies this every physics substep, which is - # more physically accurate than an external EMA. - # dyntype is only available on , not on - # shortcut elements like //. - use_general = act.filter_tau > 0 - - if use_general: - attribs["dyntype"] = "filter" - attribs["dynprm"] = str(act.filter_tau) - - if use_general: - attribs["gaintype"] = "fixed" - if act.type == "position": - attribs["biastype"] = "affine" - attribs["gainprm"] = str(act.kp) - attribs["biasprm"] = f"0 -{act.kp} -{act.kv}" - elif act.type == "velocity": - attribs["biastype"] = "affine" - attribs["gainprm"] = str(act.kp) - attribs["biasprm"] = f"0 0 -{act.kp}" - else: # motor - attribs["biastype"] = "none" - ET.SubElement(act_elem, "general", attrib=attribs) - else: - if act.type == "position": - attribs["kp"] = str(act.kp) - if act.kv > 0: - attribs["kv"] = str(act.kv) - elif act.type == "velocity": - attribs["kp"] = str(act.kp) - ET.SubElement(act_elem, act.type, attrib=attribs) - - # ── Apply joint overrides from robot.yaml ─────────────── - # Merge actuator damping + explicit joint overrides - joint_damping = {a.joint: a.damping for a in robot.actuators} - joint_armature: dict[str, float] = {} - joint_frictionloss: dict[str, float] = {} - for name, jcfg in robot.joints.items(): - if jcfg.damping is not None: - joint_damping[name] = jcfg.damping - if jcfg.armature is not None: - joint_armature[name] = jcfg.armature - if jcfg.frictionloss is not None: - joint_frictionloss[name] = jcfg.frictionloss - - for body in root.iter("body"): - for jnt in body.findall("joint"): - name = jnt.get("name") - if name in joint_damping: - jnt.set("damping", str(joint_damping[name])) - if name in joint_armature: - jnt.set("armature", str(joint_armature[name])) - if name in joint_frictionloss: - jnt.set("frictionloss", str(joint_frictionloss[name])) - - # Disable self-collision on all geoms. - # URDF mesh convex hulls often overlap at joints (especially - # grandparent↔grandchild bodies that MuJoCo does NOT auto-exclude), - # causing phantom contact forces. - for geom in root.iter("geom"): - geom.set("contype", "0") - geom.set("conaffinity", "0") - - # Joint limits use MuJoCo's default solver settings. - # The software limit switch (zeroing ctrl at limits) + armature - # prevent overshoot without needing ultra-stiff solref that - # kills MJX GPU solver performance. - - # Step 4: Write modified MJCF and reload from file path - # (from_xml_path resolves mesh paths relative to the file location) - modified_xml = ET.tostring(root, encoding="unicode") - tmp_mjcf.write_text(modified_xml) - return mujoco.MjModel.from_xml_path(str(tmp_mjcf)) - finally: - tmp_mjcf.unlink(missing_ok=True) - - @staticmethod - def _extract_actuator_limits(model: mujoco.MjModel) -> ActuatorLimits: - """Extract per-actuator joint-limit arrays from a loaded MjModel. - - Shared by MuJoCoRunner and MJXRunner so the logic isn't duplicated. - """ - nu = model.nu - jnt_ids = np.array([model.actuator_trnid[i, 0] for i in range(nu)]) - return ActuatorLimits( - jnt_ids=jnt_ids, - limited=np.array([model.jnt_limited[j] for j in jnt_ids], dtype=bool), - lo=np.array([model.jnt_range[j, 0] for j in jnt_ids]), - hi=np.array([model.jnt_range[j, 1] for j in jnt_ids]), - gear_sign=np.sign(np.array([model.actuator_gear[i, 0] for i in range(nu)])), - ) def _sim_initialize(self, config: MuJoCoRunnerConfig) -> None: - self._model = self._load_model(self.env.robot) + self._model = load_mujoco_model(self.env.robot) self._model.opt.timestep = config.dt - self._data: list[mujoco.MjData] = [mujoco.MjData(self._model) for _ in range(config.num_envs)] - + self._data: list[mujoco.MjData] = [ + mujoco.MjData(self._model) for _ in range(config.num_envs) + ] self._nq = self._model.nq self._nv = self._model.nv + self._limits = ActuatorLimits(self._model) - self._limits = self._extract_actuator_limits(self._model) + # Build motor model: list of (actuator_config, joint_qvel_index) for + # actuators that have asymmetric motor dynamics. + self._motor_actuators: list[tuple] = [] + for act in self.env.robot.actuators: + if act.has_motor_model: + jnt_id = mujoco.mj_name2id(self._model, mujoco.mjtObj.mjOBJ_JOINT, act.joint) + qvel_idx = self._model.jnt_dofadr[jnt_id] + self._motor_actuators.append((act, qvel_idx)) def _sim_step(self, actions: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor]: actions_np: np.ndarray = actions.cpu().numpy() + # Apply per-actuator ctrl transform (deadzone + gear compensation) + for act_idx, (act, _) in enumerate(self._motor_actuators): + for env_idx in range(self.num_envs): + actions_np[env_idx, act_idx] = act.transform_ctrl( + float(actions_np[env_idx, act_idx]) + ) + qpos_batch = np.zeros((self.num_envs, self._nq), dtype=np.float32) qvel_batch = np.zeros((self.num_envs, self._nv), dtype=np.float32) for i, data in enumerate(self._data): data.ctrl[:] = actions_np[i] for _ in range(self.config.substeps): - # Software limit switch: zero out ctrl that would push - # a joint past its position limit (like a real controller). - lim = self._limits - for a in range(self._model.nu): - if lim.limited[a]: - pos = data.qpos[lim.jnt_ids[a]] - gs = lim.gear_sign[a] - if pos >= lim.hi[a] and gs * data.ctrl[a] > 0: - data.ctrl[a] = 0.0 - elif pos <= lim.lo[a] and gs * data.ctrl[a] < 0: - data.ctrl[a] = 0.0 + # Apply asymmetric motor forces via qfrc_applied + for act, qvel_idx in self._motor_actuators: + vel = data.qvel[qvel_idx] + ctrl = data.ctrl[0] # TODO: generalise for multi-actuator + data.qfrc_applied[qvel_idx] = act.compute_motor_force(vel, ctrl) + self._limits.enforce(self._model, data) mujoco.mj_step(self._model, data) qpos_batch[i] = data.qpos @@ -242,7 +264,7 @@ class MuJoCoRunner(BaseRunner[MuJoCoRunnerConfig]): torch.from_numpy(qpos_batch).to(self.device), torch.from_numpy(qvel_batch).to(self.device), ) - + def _sim_reset(self, env_ids: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor]: ids = env_ids.cpu().numpy() n = len(ids) @@ -250,21 +272,13 @@ class MuJoCoRunner(BaseRunner[MuJoCoRunnerConfig]): qpos_batch = np.zeros((n, self._nq), dtype=np.float32) qvel_batch = np.zeros((n, self._nv), dtype=np.float32) - default_qpos = self.env.get_default_qpos(self._nq) - for i, env_id in enumerate(ids): data = self._data[env_id] mujoco.mj_resetData(self._model, data) - # Set initial pose (env-specific, e.g. pendulum hanging down) - if default_qpos is not None: - data.qpos[:] = default_qpos - - # Add small random perturbation for exploration + # Small random perturbation for exploration data.qpos[:] += np.random.uniform(-0.05, 0.05, size=self._nq) data.qvel[:] += np.random.uniform(-0.05, 0.05, size=self._nv) - - # Reset smoothed ctrl so motor starts from rest data.ctrl[:] = 0.0 qpos_batch[i] = data.qpos @@ -275,7 +289,7 @@ class MuJoCoRunner(BaseRunner[MuJoCoRunnerConfig]): torch.from_numpy(qvel_batch).to(self.device), ) - def render(self, env_idx: int = 0) -> np.ndarray: + def _render_frame(self, env_idx: int = 0) -> np.ndarray: """Offscreen render of a single environment.""" if not hasattr(self, "_offscreen_renderer"): self._offscreen_renderer = mujoco.Renderer( @@ -283,43 +297,4 @@ class MuJoCoRunner(BaseRunner[MuJoCoRunnerConfig]): ) mujoco.mj_forward(self._model, self._data[env_idx]) self._offscreen_renderer.update_scene(self._data[env_idx]) - frame = self._offscreen_renderer.render().copy() - - # Draw action bar overlay — shows ctrl[0] as a horizontal bar - ctrl_val = float(self._data[env_idx].ctrl[0]) if self._model.nu > 0 else 0.0 - _draw_action_overlay(frame, ctrl_val) - return frame - - -def _draw_action_overlay(frame: np.ndarray, action: float) -> None: - """Draw an action bar + text on a rendered frame (no OpenCV needed). - - Bar is centered horizontally: green to the right (+), red to the left (-). - """ - h, w = frame.shape[:2] - - # Bar geometry - bar_y = h - 30 - bar_h = 16 - bar_x_center = w // 2 - bar_half_w = w // 4 # max half-width of the bar - bar_x_left = bar_x_center - bar_half_w - bar_x_right = bar_x_center + bar_half_w - - # Background (dark grey) - frame[bar_y:bar_y + bar_h, bar_x_left:bar_x_right] = [40, 40, 40] - - # Filled bar - fill_len = int(abs(action) * bar_half_w) - if action > 0: - color = [60, 200, 60] # green - x0 = bar_x_center - x1 = min(bar_x_center + fill_len, bar_x_right) - else: - color = [200, 60, 60] # red - x1 = bar_x_center - x0 = max(bar_x_center - fill_len, bar_x_left) - frame[bar_y:bar_y + bar_h, x0:x1] = color - - # Center tick mark (white) - frame[bar_y:bar_y + bar_h, bar_x_center - 1:bar_x_center + 1] = [255, 255, 255] \ No newline at end of file + return self._offscreen_renderer.render().copy() diff --git a/src/runners/serial.py b/src/runners/serial.py index 1c38725..396881a 100644 --- a/src/runners/serial.py +++ b/src/runners/serial.py @@ -11,11 +11,15 @@ Serial protocol (ESP32 firmware): H — stop streaming M — set motor PWM speed (-255 … 255) - State lines received FROM the ESP32: - S,,,,,, - ,,,, - , - (12 comma-separated fields after the ``S`` prefix) + State lines received FROM the ESP32 (firmware sends SI units): + S,,,,,, + (7 comma-separated fields after the ``S`` prefix) + + motor_rad — motor joint angle (radians) + motor_vel — motor joint velocity (rad/s) + pend_rad — pendulum angle (radians, 0 = hanging down) + pend_vel — pendulum angular velocity (rad/s) + motor_speed — applied PWM (-255..255, for action recording) A daemon thread continuously reads the serial stream so the control loop never blocks on I/O. @@ -28,7 +32,6 @@ from __future__ import annotations import dataclasses import logging -import math import threading import time from typing import Any @@ -36,8 +39,6 @@ from typing import Any import numpy as np import torch -from src.core.env import BaseEnv -from src.core.hardware import HardwareConfig, load_hardware_config from src.core.runner import BaseRunner, BaseRunnerConfig logger = logging.getLogger(__name__) @@ -52,9 +53,14 @@ class SerialRunnerConfig(BaseRunnerConfig): port: str = "/dev/cu.usbserial-0001" baud: int = 115200 - dt: float = 0.02 # control loop period (seconds), 50 Hz + dt: float = 0.04 # control loop period (seconds), 25 Hz no_data_timeout: float = 2.0 # seconds of silence → disconnect - encoder_jump_threshold: int = 200 # encoder tick jump → reboot + + # Physical reset procedure + reset_drive_speed: int = 80 # PWM for bang-bang drive-to-center + reset_deadband_rad: float = 0.01 # "centered" threshold (~0.6°) + reset_drive_timeout: float = 3.0 # seconds to reach center + reset_settle_timeout: float = 30.0 # seconds to wait for pendulum class SerialRunner(BaseRunner[SerialRunnerConfig]): @@ -80,22 +86,6 @@ class SerialRunner(BaseRunner[SerialRunnerConfig]): return torch.device("cpu") def _sim_initialize(self, config: SerialRunnerConfig) -> None: - # Load hardware description (encoder, safety, reset params). - hw = load_hardware_config(self.env.config.robot_path) - if hw is None: - raise FileNotFoundError( - f"hardware.yaml not found in {self.env.config.robot_path}. " - "The serial runner requires a hardware config for encoder, " - "safety, and reset parameters." - ) - self._hw: HardwareConfig = hw - self._counts_per_rev: float = hw.encoder.counts_per_rev - self._max_motor_angle_rad: float = ( - math.radians(hw.safety.max_motor_angle_deg) - if hw.safety.max_motor_angle_deg > 0 - else 0.0 - ) - # Joint dimensions for the rotary cartpole (motor + pendulum). self._nq = 2 self._nv = 2 @@ -105,8 +95,16 @@ class SerialRunner(BaseRunner[SerialRunnerConfig]): self._serial_mod = _serial + # Explicitly disable hardware flow control and exclusive mode to + # avoid termios.error (errno 22) on macOS with CH340/CP2102 adapters. self.ser: _serial.Serial = _serial.Serial( - config.port, config.baud, timeout=0.05 + port=config.port, + baudrate=config.baud, + timeout=0.05, + xonxoff=False, + rtscts=False, + dsrdtr=False, + exclusive=False, ) time.sleep(2) # Wait for ESP32 boot. self.ser.reset_input_buffer() @@ -116,22 +114,17 @@ class SerialRunner(BaseRunner[SerialRunnerConfig]): self._serial_disconnected: bool = False self._last_esp_ms: int = 0 self._last_data_time: float = time.monotonic() - self._last_encoder_count: int = 0 self._streaming: bool = False # Latest parsed state (updated by the reader thread). + # Firmware sends SI units — values are used directly as qpos/qvel. self._latest_state: dict[str, Any] = { "timestamp_ms": 0, - "encoder_count": 0, - "rpm": 0.0, + "motor_rad": 0.0, + "motor_vel": 0.0, + "pend_rad": 0.0, + "pend_vel": 0.0, "motor_speed": 0, - "at_limit": False, - "pendulum_angle": 0.0, - "pendulum_velocity": 0.0, - "target_speed": 0, - "braking": False, - "enc_vel_cps": 0.0, - "pendulum_ok": False, } self._state_lock = threading.Lock() self._state_event = threading.Event() @@ -148,6 +141,11 @@ class SerialRunner(BaseRunner[SerialRunnerConfig]): self._streaming = True self._last_data_time = time.monotonic() + # Derive max PWM from actuator ctrl_range so the serial + # command range matches what MuJoCo training sees. + ctrl_hi = self.env.robot.actuators[0].ctrl_range[1] + self._max_pwm: int = round(ctrl_hi * 255) + # Track wall-clock time of last step for PPO-gap detection. self._last_step_time: float = time.monotonic() @@ -170,41 +168,22 @@ class SerialRunner(BaseRunner[SerialRunnerConfig]): self._sim_reset(all_ids) self.step_counts.zero_() - step_start = time.monotonic() - - # Map normalised action [-1, 1] → PWM [-255, 255]. + # Map normalised action [-1, 1] → PWM, scaled by ctrl_range. action_val = float(actions[0, 0].clamp(-1.0, 1.0)) - motor_speed = int(action_val * 255) + motor_speed = int(action_val * self._max_pwm) self._send(f"M{motor_speed}") - # Enforce dt wall-clock timing. - elapsed = time.monotonic() - step_start - remaining = self.config.dt - elapsed - if remaining > 0: - time.sleep(remaining) + # Stream-driven: block until the firmware sends the next state + # line (~20 ms at 50 Hz). + state = self._read_state_blocking(timeout=0.1) - # Read latest sensor data (non-blocking — dt sleep ensures freshness). - state = self._read_state() + # Firmware sends SI units — use directly. + qpos, qvel = self._state_to_tensors(state) - motor_angle = ( - state["encoder_count"] / self._counts_per_rev * 2.0 * math.pi - ) - motor_vel = ( - state["enc_vel_cps"] / self._counts_per_rev * 2.0 * math.pi - ) - pendulum_angle = math.radians(state["pendulum_angle"]) - pendulum_vel = math.radians(state["pendulum_velocity"]) - - # Cache motor angle for safety check in step() — avoids a second read. - self._last_motor_angle_rad = motor_angle + # Cache for _sync_viz(). + self._last_sync_state = state self._last_step_time = time.monotonic() - qpos = torch.tensor( - [[motor_angle, pendulum_angle]], dtype=torch.float32 - ) - qvel = torch.tensor( - [[motor_vel, pendulum_vel]], dtype=torch.float32 - ) return qpos, qvel def _sim_reset( @@ -232,59 +211,42 @@ class SerialRunner(BaseRunner[SerialRunnerConfig]): # Physically return the motor to the centre position. self._drive_to_center() - # Wait until the pendulum settles. - self._wait_for_pendulum_still() + # Wait until the env considers the robot settled. + self._wait_for_settle() # Refresh data timer so health checks don't false-positive. self._last_data_time = time.monotonic() # Read settled state and return as qpos/qvel. state = self._read_state_blocking() - motor_angle = ( - state["encoder_count"] / self._counts_per_rev * 2.0 * math.pi - ) - motor_vel = ( - state["enc_vel_cps"] / self._counts_per_rev * 2.0 * math.pi - ) - pendulum_angle = math.radians(state["pendulum_angle"]) - pendulum_vel = math.radians(state["pendulum_velocity"]) + qpos, qvel = self._state_to_tensors(state) - qpos = torch.tensor( - [[motor_angle, pendulum_angle]], dtype=torch.float32 - ) - qvel = torch.tensor( - [[motor_vel, pendulum_vel]], dtype=torch.float32 - ) + self._last_sync_state = state return qpos, qvel def _sim_close(self) -> None: self._reader_running = False self._streaming = False - self._send("H") # Stop streaming. - self._send("M0") # Stop motor. + self._send("H") + self._send("M0") time.sleep(0.1) self._reader_thread.join(timeout=1.0) self.ser.close() - if hasattr(self, "_offscreen_renderer") and self._offscreen_renderer is not None: - self._offscreen_renderer.close() + super()._sim_close() # ------------------------------------------------------------------ # MuJoCo digital-twin rendering # ------------------------------------------------------------------ def _ensure_viz_model(self) -> None: - """Lazily load the MuJoCo model for visualisation (digital twin). - - Reuses the same URDF + robot.yaml that the MuJoCoRunner would use, - but only for rendering — no physics stepping. - """ + """Lazily load the MuJoCo model for visualisation (digital twin).""" if hasattr(self, "_viz_model"): return import mujoco - from src.runners.mujoco import MuJoCoRunner + from src.runners.mujoco import load_mujoco_model - self._viz_model = MuJoCoRunner._load_model(self.env.robot) + self._viz_model = load_mujoco_model(self.env.robot) self._viz_data = mujoco.MjData(self._viz_model) self._offscreen_renderer = None @@ -293,32 +255,21 @@ class SerialRunner(BaseRunner[SerialRunnerConfig]): import mujoco self._ensure_viz_model() - state = self._read_state() - # Set joint positions from serial data. - motor_angle = ( - state["encoder_count"] / self._counts_per_rev * 2.0 * math.pi - ) - pendulum_angle = math.radians(state["pendulum_angle"]) - self._viz_data.qpos[0] = motor_angle - self._viz_data.qpos[1] = pendulum_angle + last_state = getattr(self, "_last_sync_state", None) + if last_state is None: + last_state = self._read_state() - # Set joint velocities (for any velocity-dependent visuals). - motor_vel = ( - state["enc_vel_cps"] / self._counts_per_rev * 2.0 * math.pi - ) - pendulum_vel = math.radians(state["pendulum_velocity"]) - self._viz_data.qvel[0] = motor_vel - self._viz_data.qvel[1] = pendulum_vel + # Firmware sends radians — use directly. + self._viz_data.qpos[0] = last_state["motor_rad"] + self._viz_data.qpos[1] = last_state["pend_rad"] + self._viz_data.qvel[0] = last_state["motor_vel"] + self._viz_data.qvel[1] = last_state["pend_vel"] - # Forward kinematics (updates body positions for rendering). mujoco.mj_forward(self._viz_model, self._viz_data) - def render(self, env_idx: int = 0) -> np.ndarray: - """Offscreen render of the digital-twin MuJoCo model. - - Called by VideoRecordingTrainer during training to capture frames. - """ + def _render_frame(self, env_idx: int = 0) -> np.ndarray: + """Offscreen render of the digital-twin MuJoCo model.""" import mujoco self._sync_viz() @@ -340,7 +291,6 @@ class SerialRunner(BaseRunner[SerialRunnerConfig]): # Check for ESP32 reboot / disconnect BEFORE stepping. if self._rebooted or self._serial_disconnected: self._send("M0") - # Return a terminal observation with penalty. qpos, qvel = self._make_current_state() state = self.env.build_state(qpos, qvel) obs = self.env.compute_observations(state) @@ -359,16 +309,6 @@ class SerialRunner(BaseRunner[SerialRunnerConfig]): rewards = torch.tensor([[-100.0]]) info["reboot_detected"] = True - # Check motor angle against hard safety limit. - # Uses the cached value from _sim_step — no extra serial read. - if self._max_motor_angle_rad > 0: - motor_angle = abs(getattr(self, "_last_motor_angle_rad", 0.0)) - if motor_angle >= self._max_motor_angle_rad: - self._send("M0") - terminated = torch.tensor([[True]]) - rewards = torch.tensor([[-100.0]]) - info["motor_limit_exceeded"] = True - # Always stop motor on episode end. if terminated.any() or truncated.any(): self._send("M0") @@ -387,7 +327,11 @@ class SerialRunner(BaseRunner[SerialRunnerConfig]): self._serial_disconnected = True def _serial_reader(self) -> None: - """Background thread: continuously read and parse serial lines.""" + """Background thread: continuously read and parse serial lines. + + Protocol: ``S,,,,,,`` + (7 comma-separated fields). Firmware sends SI units directly. + """ while self._reader_running: try: if self.ser.in_waiting: @@ -405,9 +349,15 @@ class SerialRunner(BaseRunner[SerialRunnerConfig]): if line.startswith("S,"): parts = line.split(",") - if len(parts) >= 12: - esp_ms = int(parts[1]) - enc = int(parts[2]) + if len(parts) >= 7: + try: + esp_ms = int(parts[1]) + except ValueError: + logger.debug( + "Malformed state line (header): %s", + line, + ) + continue # Detect reboot: timestamp jumped backwards. if ( @@ -422,38 +372,24 @@ class SerialRunner(BaseRunner[SerialRunnerConfig]): esp_ms, ) - # Detect reboot: encoder snapped to 0 from - # a far position. - if ( - abs(self._last_encoder_count) - > self.config.encoder_jump_threshold - and abs(enc) < 5 - ): - self._rebooted = True - logger.critical( - "ESP32 reboot detected: encoder" - " jumped %d -> %d", - self._last_encoder_count, - enc, - ) - self._last_esp_ms = esp_ms - self._last_encoder_count = enc self._last_data_time = time.monotonic() - parsed: dict[str, Any] = { - "timestamp_ms": esp_ms, - "encoder_count": enc, - "rpm": float(parts[3]), - "motor_speed": int(parts[4]), - "at_limit": bool(int(parts[5])), - "pendulum_angle": float(parts[6]), - "pendulum_velocity": float(parts[7]), - "target_speed": int(parts[8]), - "braking": bool(int(parts[9])), - "enc_vel_cps": float(parts[10]), - "pendulum_ok": bool(int(parts[11])), - } + try: + parsed: dict[str, Any] = { + "timestamp_ms": esp_ms, + "motor_rad": float(parts[2]), + "motor_vel": float(parts[3]), + "pend_rad": float(parts[4]), + "pend_vel": float(parts[5]), + "motor_speed": int(parts[6]), + } + except ValueError: + logger.debug( + "Malformed state line (fields): %s", + line, + ) + continue with self._state_lock: self._latest_state = parsed self._state_event.set() @@ -483,81 +419,68 @@ class SerialRunner(BaseRunner[SerialRunnerConfig]): return True def _read_state(self) -> dict[str, Any]: - """Return the most recent state from the reader thread (non-blocking). - - The background thread updates at ~50 Hz and `_sim_step` already - sleeps for `dt` before calling this, so the data is always fresh. - """ + """Return the most recent state from the reader thread (non-blocking).""" with self._state_lock: return dict(self._latest_state) def _read_state_blocking(self, timeout: float = 0.05) -> dict[str, Any]: - """Wait for a fresh sample, then return it. - - Used during reset / settling where we need to guarantee we have - a new reading (no prior dt sleep). - """ + """Wait for a fresh sample, then return it.""" self._state_event.clear() self._state_event.wait(timeout=timeout) with self._state_lock: return dict(self._latest_state) - def _make_current_state(self) -> tuple[torch.Tensor, torch.Tensor]: - """Build qpos/qvel from current sensor data (utility).""" - state = self._read_state_blocking() - motor_angle = ( - state["encoder_count"] / self._counts_per_rev * 2.0 * math.pi - ) - motor_vel = ( - state["enc_vel_cps"] / self._counts_per_rev * 2.0 * math.pi - ) - pendulum_angle = math.radians(state["pendulum_angle"]) - pendulum_vel = math.radians(state["pendulum_velocity"]) - + def _state_to_tensors( + self, state: dict[str, Any], + ) -> tuple[torch.Tensor, torch.Tensor]: + """Convert a parsed state dict to (qpos, qvel) tensors.""" qpos = torch.tensor( - [[motor_angle, pendulum_angle]], dtype=torch.float32 + [[state["motor_rad"], state["pend_rad"]]], dtype=torch.float32 ) qvel = torch.tensor( - [[motor_vel, pendulum_vel]], dtype=torch.float32 + [[state["motor_vel"], state["pend_vel"]]], dtype=torch.float32 ) return qpos, qvel + def _make_current_state(self) -> tuple[torch.Tensor, torch.Tensor]: + """Build qpos/qvel from current sensor data (utility).""" + return self._state_to_tensors(self._read_state_blocking()) + # ------------------------------------------------------------------ # Physical reset helpers # ------------------------------------------------------------------ def _drive_to_center(self) -> None: - """Drive the motor back toward encoder=0 using bang-bang control.""" - rc = self._hw.reset + """Drive the motor back toward center using bang-bang control.""" + cfg = self.config start = time.time() - while time.time() - start < rc.drive_timeout: + while time.time() - start < cfg.reset_drive_timeout: state = self._read_state_blocking() - enc = state["encoder_count"] - if abs(enc) < rc.deadband: + motor_rad = state["motor_rad"] + if abs(motor_rad) < cfg.reset_deadband_rad: break - speed = rc.drive_speed if enc < 0 else -rc.drive_speed + speed = cfg.reset_drive_speed if motor_rad < 0 else -cfg.reset_drive_speed self._send(f"M{speed}") time.sleep(0.05) self._send("M0") time.sleep(0.2) - def _wait_for_pendulum_still(self) -> None: - """Block until the pendulum has settled (angle and velocity near zero).""" - rc = self._hw.reset + def _wait_for_settle(self) -> None: + """Block until the env considers the robot ready for a new episode.""" + cfg = self.config stable_since: float | None = None start = time.monotonic() - while time.monotonic() - start < rc.settle_timeout: + while time.monotonic() - start < cfg.reset_settle_timeout: state = self._read_state_blocking() - angle_ok = abs(state["pendulum_angle"]) < rc.settle_angle_deg - vel_ok = abs(state["pendulum_velocity"]) < rc.settle_vel_dps + qpos, qvel = self._state_to_tensors(state) - if angle_ok and vel_ok: + if self.env.is_reset_ready(qpos, qvel): if stable_since is None: stable_since = time.monotonic() - elif time.monotonic() - stable_since >= rc.settle_duration: + elif time.monotonic() - stable_since >= 0.5: logger.info( - "Pendulum settled after %.2f s", + "Robot settled after %.2f s", time.monotonic() - start, ) return @@ -566,6 +489,6 @@ class SerialRunner(BaseRunner[SerialRunnerConfig]): time.sleep(0.02) logger.warning( - "Pendulum did not fully settle within %.1f s — proceeding anyway.", - rc.settle_timeout, + "Robot did not settle within %.1f s — proceeding anyway.", + cfg.reset_settle_timeout, ) diff --git a/src/sysid/_urdf.py b/src/sysid/_urdf.py new file mode 100644 index 0000000..d703aca --- /dev/null +++ b/src/sysid/_urdf.py @@ -0,0 +1,79 @@ +"""URDF XML helpers shared by sysid rollout and export modules.""" + +from __future__ import annotations + +import xml.etree.ElementTree as ET + + +def set_mass(inertial: ET.Element, mass: float | None) -> None: + if mass is None: + return + mass_el = inertial.find("mass") + if mass_el is not None: + mass_el.set("value", str(mass)) + + +def set_com( + inertial: ET.Element, + x: float | None, + y: float | None, + z: float | None, +) -> None: + origin = inertial.find("origin") + if origin is None: + return + xyz = origin.get("xyz", "0 0 0").split() + if x is not None: + xyz[0] = str(x) + if y is not None: + xyz[1] = str(y) + if z is not None: + xyz[2] = str(z) + origin.set("xyz", " ".join(xyz)) + + +def set_inertia( + inertial: ET.Element, + ixx: float | None = None, + iyy: float | None = None, + izz: float | None = None, + ixy: float | None = None, + iyz: float | None = None, + ixz: float | None = None, +) -> None: + ine = inertial.find("inertia") + if ine is None: + return + for attr, val in [ + ("ixx", ixx), ("iyy", iyy), ("izz", izz), + ("ixy", ixy), ("iyz", iyz), ("ixz", ixz), + ]: + if val is not None: + ine.set(attr, str(val)) + + +def patch_link_inertials( + root: ET.Element, + params: dict[str, float], +) -> None: + """Patch arm and pendulum inertial parameters in a URDF ElementTree root.""" + for link in root.iter("link"): + link_name = link.get("name", "") + inertial = link.find("inertial") + if inertial is None: + continue + + if link_name == "arm": + set_mass(inertial, params.get("arm_mass")) + set_com(inertial, params.get("arm_com_x"), + params.get("arm_com_y"), params.get("arm_com_z")) + + elif link_name == "pendulum": + set_mass(inertial, params.get("pendulum_mass")) + set_com(inertial, params.get("pendulum_com_x"), + params.get("pendulum_com_y"), params.get("pendulum_com_z")) + set_inertia(inertial, + ixx=params.get("pendulum_ixx"), + iyy=params.get("pendulum_iyy"), + izz=params.get("pendulum_izz"), + ixy=params.get("pendulum_ixy")) diff --git a/src/sysid/capture.py b/src/sysid/capture.py index 9c54ea4..f202c74 100644 --- a/src/sysid/capture.py +++ b/src/sysid/capture.py @@ -6,6 +6,10 @@ the system, and records motor + pendulum angles and velocities at ~50 Hz. Saves a compressed numpy archive (.npz) that the optimizer can replay in simulation to fit physics parameters. +Serial protocol (same as SerialRunner): + S,,,,,, + (7 comma-separated fields — firmware sends SI units) + Usage: python -m src.sysid.capture \ --robot-path assets/rotary_cartpole \ @@ -17,7 +21,6 @@ from __future__ import annotations import argparse import math -import os import random import threading import time @@ -27,7 +30,6 @@ from typing import Any import numpy as np import structlog -import yaml log = structlog.get_logger() @@ -36,25 +38,24 @@ log = structlog.get_logger() def _parse_state_line(line: str) -> dict[str, Any] | None: - """Parse an ``S,…`` state line from the ESP32.""" + """Parse an ``S,…`` state line from the ESP32. + + Format: S,,,,,, + (7 comma-separated fields, firmware sends SI units) + """ if not line.startswith("S,"): return None parts = line.split(",") - if len(parts) < 12: + if len(parts) < 7: return None try: return { "timestamp_ms": int(parts[1]), - "encoder_count": int(parts[2]), - "rpm": float(parts[3]), - "motor_speed": int(parts[4]), - "at_limit": bool(int(parts[5])), - "pendulum_angle": float(parts[6]), - "pendulum_velocity": float(parts[7]), - "target_speed": int(parts[8]), - "braking": bool(int(parts[9])), - "enc_vel_cps": float(parts[10]), - "pendulum_ok": bool(int(parts[11])), + "motor_rad": float(parts[2]), + "motor_vel": float(parts[3]), + "pend_rad": float(parts[4]), + "pend_vel": float(parts[5]), + "motor_speed": int(parts[6]), } except (ValueError, IndexError): return None @@ -64,7 +65,12 @@ def _parse_state_line(line: str) -> dict[str, Any] | None: class _SerialReader: - """Minimal background reader for the ESP32 serial stream.""" + """Minimal background reader for the ESP32 serial stream. + + Uses a sequence counter so ``read_blocking()`` guarantees it returns + a *new* state line (not a stale repeat). This keeps the capture + loop locked to the firmware's 50 Hz tick. + """ def __init__(self, port: str, baud: int = 115200): import serial as _serial @@ -75,14 +81,16 @@ class _SerialReader: self.ser.reset_input_buffer() self._latest: dict[str, Any] = {} + self._seq: int = 0 # incremented on every new state line self._lock = threading.Lock() - self._event = threading.Event() + self._cond = threading.Condition(self._lock) self._running = True self._thread = threading.Thread(target=self._reader_loop, daemon=True) self._thread.start() def _reader_loop(self) -> None: + _debug_count = 0 while self._running: try: if self.ser.in_waiting: @@ -91,11 +99,16 @@ class _SerialReader: .decode("utf-8", errors="ignore") .strip() ) + # Debug: log first 10 raw lines so we can see what the firmware sends. + if _debug_count < 10 and line: + log.info("serial_raw_line", line=repr(line), count=_debug_count) + _debug_count += 1 parsed = _parse_state_line(line) if parsed is not None: - with self._lock: + with self._cond: self._latest = parsed - self._event.set() + self._seq += 1 + self._cond.notify_all() else: time.sleep(0.001) except (OSError, self._serial_mod.SerialException): @@ -108,14 +121,19 @@ class _SerialReader: except (OSError, self._serial_mod.SerialException): log.critical("serial_send_failed", cmd=cmd) - def read(self) -> dict[str, Any]: - with self._lock: - return dict(self._latest) - def read_blocking(self, timeout: float = 0.1) -> dict[str, Any]: - self._event.clear() - self._event.wait(timeout=timeout) - return self.read() + """Wait until a *new* state line arrives, then return it. + + Uses a sequence counter to guarantee the returned state is + different from whatever was available before this call. + """ + with self._cond: + seq_before = self._seq + if not self._cond.wait_for( + lambda: self._seq > seq_before, timeout=timeout + ): + return {} # timeout — no new data + return dict(self._latest) def close(self) -> None: self._running = False @@ -139,7 +157,7 @@ class _PRBSExcitation: def __init__( self, - amplitude: int = 180, + amplitude: int = 150, hold_min_ms: int = 50, hold_max_ms: int = 300, ): @@ -169,42 +187,39 @@ def capture( port: str = "/dev/cu.usbserial-0001", baud: int = 115200, duration: float = 20.0, - amplitude: int = 180, + amplitude: int = 150, hold_min_ms: int = 50, hold_max_ms: int = 300, dt: float = 0.02, + motor_angle_limit_deg: float = 90.0, ) -> Path: """Run the capture procedure and return the path to the saved .npz file. + The capture loop is **stream-driven**: it blocks on each incoming + state line from the firmware (which arrives at 50 Hz), sends the + next motor command immediately, and records both. + Parameters ---------- - robot_path : path to robot asset directory (contains hardware.yaml) + robot_path : path to robot asset directory port : serial port for ESP32 baud : baud rate duration : capture duration in seconds - amplitude : max PWM magnitude for excitation (0–255) + amplitude : max PWM magnitude for excitation hold_min_ms / hold_max_ms : random hold time range (ms) - dt : target sampling period (seconds), default 50 Hz + dt : nominal sample period for buffer sizing (seconds) + motor_angle_limit_deg : safety limit for motor angle """ robot_path = Path(robot_path).resolve() - # Load hardware config for encoder conversion + safety. - hw_yaml = robot_path / "hardware.yaml" - if not hw_yaml.exists(): - raise FileNotFoundError(f"hardware.yaml not found in {robot_path}") - raw_hw = yaml.safe_load(hw_yaml.read_text()) - ppr = raw_hw.get("encoder", {}).get("ppr", 11) - gear_ratio = raw_hw.get("encoder", {}).get("gear_ratio", 30.0) - counts_per_rev: float = ppr * gear_ratio * 4.0 - max_motor_deg = raw_hw.get("safety", {}).get("max_motor_angle_deg", 90.0) - max_motor_rad = math.radians(max_motor_deg) if max_motor_deg > 0 else 0.0 + max_motor_rad = math.radians(motor_angle_limit_deg) if motor_angle_limit_deg > 0 else 0.0 # Connect. reader = _SerialReader(port, baud) excitation = _PRBSExcitation(amplitude, hold_min_ms, hold_max_ms) - # Prepare recording buffers. - max_samples = int(duration / dt) + 500 # headroom + # Prepare recording buffers (generous headroom). + max_samples = int(duration / dt) + 500 rec_time = np.zeros(max_samples, dtype=np.float64) rec_action = np.zeros(max_samples, dtype=np.float64) rec_motor_angle = np.zeros(max_samples, dtype=np.float64) @@ -222,63 +237,96 @@ def capture( duration=duration, amplitude=amplitude, hold_range_ms=f"{hold_min_ms}–{hold_max_ms}", - dt=dt, + mode="stream-driven (firmware clock)", ) idx = 0 + pwm = 0 + last_esp_ms = -1 # firmware timestamp of last recorded sample + no_data_count = 0 # consecutive timeouts with no data t0 = time.monotonic() try: while True: - loop_start = time.monotonic() - elapsed = loop_start - t0 + # Block until the firmware sends the next state line (~20 ms). + # Timeout at 100 ms prevents hanging if the ESP32 disconnects. + state = reader.read_blocking(timeout=0.1) + if not state: + no_data_count += 1 + if no_data_count == 30: # 3 seconds with no data + log.warning( + "no_data_received", + msg="No state lines from firmware after 3s. " + "Check: is the ESP32 powered? Is it running the right firmware? " + "Try pressing the RESET button.", + ) + if no_data_count == 100: # 10 seconds + log.critical( + "no_data_timeout", + msg="No data for 10s — aborting capture.", + ) + break + continue # no data yet — retry + no_data_count = 0 + + # Deduplicate: the firmware may send multiple state lines per + # tick (e.g. M-command echo + tick). Only record one sample + # per unique firmware timestamp. + esp_ms = state.get("timestamp_ms", 0) + if esp_ms == last_esp_ms: + continue + last_esp_ms = esp_ms + + elapsed = time.monotonic() - t0 if elapsed >= duration: break - # Get excitation PWM. + # Get excitation PWM for the NEXT tick. pwm = excitation() - # Safety: reverse/zero if near motor limit. - state = reader.read() - if state: - motor_angle_rad = ( - state.get("encoder_count", 0) / counts_per_rev * 2.0 * math.pi - ) - if max_motor_rad > 0: - margin = max_motor_rad * 0.85 # start braking at 85% - if motor_angle_rad > margin and pwm > 0: - pwm = -abs(pwm) # reverse - elif motor_angle_rad < -margin and pwm < 0: - pwm = abs(pwm) # reverse + # Safety: keep the arm well within its mechanical range. + # Firmware sends motor angle in radians — use directly. + motor_angle_rad = state.get("motor_rad", 0.0) + if max_motor_rad > 0: + ratio = motor_angle_rad / max_motor_rad # signed, -1..+1 + abs_ratio = abs(ratio) - # Send command. + if abs_ratio > 0.90: + # Deep in the danger zone — force a strong return. + brake_strength = min(1.0, (abs_ratio - 0.90) / 0.10) # 0→1 + brake_pwm = int(amplitude * (0.5 + 0.5 * brake_strength)) + pwm = -brake_pwm if ratio > 0 else brake_pwm + elif abs_ratio > 0.70: + # Soft zone — only allow actions pointing back to centre. + if ratio > 0 and pwm > 0: + pwm = -abs(pwm) + elif ratio < 0 and pwm < 0: + pwm = abs(pwm) + + # Send command immediately — it will take effect on the next tick. reader.send(f"M{pwm}") - # Wait for fresh data. - time.sleep(max(0, dt - (time.monotonic() - loop_start) - 0.005)) - state = reader.read_blocking(timeout=dt) + # Record this tick's state + the action the motor *actually* + # received. Firmware sends SI units — use directly. + motor_angle = state.get("motor_rad", 0.0) + motor_vel = state.get("motor_vel", 0.0) + pend_angle = state.get("pend_rad", 0.0) + pend_vel = state.get("pend_vel", 0.0) + # Firmware constrains to ±255; normalise to [-1, 1]. + applied = state.get("motor_speed", 0) + action_norm = max(-255, min(255, applied)) / 255.0 - if state: - enc = state.get("encoder_count", 0) - motor_angle = enc / counts_per_rev * 2.0 * math.pi - motor_vel = ( - state.get("enc_vel_cps", 0.0) / counts_per_rev * 2.0 * math.pi - ) - pend_angle = math.radians(state.get("pendulum_angle", 0.0)) - pend_vel = math.radians(state.get("pendulum_velocity", 0.0)) + if idx < max_samples: + rec_time[idx] = elapsed + rec_action[idx] = action_norm + rec_motor_angle[idx] = motor_angle + rec_motor_vel[idx] = motor_vel + rec_pend_angle[idx] = pend_angle + rec_pend_vel[idx] = pend_vel + idx += 1 + else: + break # buffer full - # Normalised action: PWM / 255 → [-1, 1] - action_norm = pwm / 255.0 - - if idx < max_samples: - rec_time[idx] = elapsed - rec_action[idx] = action_norm - rec_motor_angle[idx] = motor_angle - rec_motor_vel[idx] = motor_vel - rec_pend_angle[idx] = pend_angle - rec_pend_vel[idx] = pend_vel - idx += 1 - - # Progress. + # Progress (every 50 samples ≈ once per second at 50 Hz). if idx % 50 == 0: log.info( "capture_progress", @@ -287,11 +335,6 @@ def capture( pwm=pwm, ) - # Pace to dt. - remaining = dt - (time.monotonic() - loop_start) - if remaining > 0: - time.sleep(remaining) - finally: reader.send("M0") reader.close() @@ -339,7 +382,7 @@ def main() -> None: "--robot-path", type=str, default="assets/rotary_cartpole", - help="Path to robot asset directory (contains hardware.yaml)", + help="Path to robot asset directory", ) parser.add_argument( "--port", @@ -352,7 +395,8 @@ def main() -> None: "--duration", type=float, default=20.0, help="Capture duration (s)" ) parser.add_argument( - "--amplitude", type=int, default=180, help="Max PWM magnitude (0–255)" + "--amplitude", type=int, default=150, + help="Max PWM magnitude (should not exceed firmware MAX_MOTOR_SPEED=150)", ) parser.add_argument( "--hold-min-ms", type=int, default=50, help="Min hold time (ms)" @@ -361,7 +405,11 @@ def main() -> None: "--hold-max-ms", type=int, default=300, help="Max hold time (ms)" ) parser.add_argument( - "--dt", type=float, default=0.02, help="Sample period (s)" + "--dt", type=float, default=0.02, help="Nominal sample period for buffer sizing (s)" + ) + parser.add_argument( + "--motor-angle-limit", type=float, default=90.0, + help="Motor angle safety limit in degrees (0 = disabled)", ) args = parser.parse_args() @@ -374,6 +422,7 @@ def main() -> None: hold_min_ms=args.hold_min_ms, hold_max_ms=args.hold_max_ms, dt=args.dt, + motor_angle_limit_deg=args.motor_angle_limit, ) diff --git a/src/sysid/export.py b/src/sysid/export.py index 4dc02a9..ae33855 100644 --- a/src/sysid/export.py +++ b/src/sysid/export.py @@ -8,20 +8,21 @@ alongside the originals in the robot asset directory. from __future__ import annotations import copy -import json -import shutil import xml.etree.ElementTree as ET from pathlib import Path import structlog import yaml +from src.sysid._urdf import patch_link_inertials + log = structlog.get_logger() def export_tuned_files( robot_path: str | Path, params: dict[str, float], + motor_params: dict[str, float] | None = None, ) -> tuple[Path, Path]: """Write tuned URDF and robot.yaml files. @@ -29,6 +30,8 @@ def export_tuned_files( ---------- robot_path : robot asset directory (contains robot.yaml + *.urdf) params : dict of parameter name → tuned value (from optimizer) + motor_params : locked motor sysid params (asymmetric model). + If provided, motor joint parameters come from here. Returns ------- @@ -43,38 +46,7 @@ def export_tuned_files( # ── Tune URDF ──────────────────────────────────────────────── tree = ET.parse(urdf_path) - root = tree.getroot() - - for link in root.iter("link"): - link_name = link.get("name", "") - inertial = link.find("inertial") - if inertial is None: - continue - - if link_name == "arm": - _set_mass(inertial, params.get("arm_mass")) - _set_com( - inertial, - params.get("arm_com_x"), - params.get("arm_com_y"), - params.get("arm_com_z"), - ) - - elif link_name == "pendulum": - _set_mass(inertial, params.get("pendulum_mass")) - _set_com( - inertial, - params.get("pendulum_com_x"), - params.get("pendulum_com_y"), - params.get("pendulum_com_z"), - ) - _set_inertia( - inertial, - ixx=params.get("pendulum_ixx"), - iyy=params.get("pendulum_iyy"), - izz=params.get("pendulum_izz"), - ixy=params.get("pendulum_ixy"), - ) + patch_link_inertials(tree.getroot(), params) # Write tuned URDF. tuned_urdf_name = urdf_path.stem + "_tuned" + urdf_path.suffix @@ -91,15 +63,47 @@ def export_tuned_files( # Point to the tuned URDF. tuned_cfg["urdf"] = tuned_urdf_name - # Update actuator parameters. + # Update actuator parameters — full asymmetric motor model. if tuned_cfg.get("actuators") and len(tuned_cfg["actuators"]) > 0: act = tuned_cfg["actuators"][0] - if "actuator_gear" in params: - act["gear"] = round(params["actuator_gear"], 6) - if "actuator_filter_tau" in params: - act["filter_tau"] = round(params["actuator_filter_tau"], 6) - if "motor_damping" in params: - act["damping"] = round(params["motor_damping"], 6) + if motor_params: + # Asymmetric gear, damping, deadzone, frictionloss as [pos, neg]. + gear_pos = motor_params.get("actuator_gear_pos", 0.424) + gear_neg = motor_params.get("actuator_gear_neg", 0.425) + act["gear"] = [round(gear_pos, 6), round(gear_neg, 6)] + + damp_pos = motor_params.get("motor_damping_pos", 0.002) + damp_neg = motor_params.get("motor_damping_neg", 0.015) + act["damping"] = [round(damp_pos, 6), round(damp_neg, 6)] + + dz_pos = motor_params.get("motor_deadzone_pos", 0.141) + dz_neg = motor_params.get("motor_deadzone_neg", 0.078) + act["deadzone"] = [round(dz_pos, 6), round(dz_neg, 6)] + + fl_pos = motor_params.get("motor_frictionloss_pos", 0.057) + fl_neg = motor_params.get("motor_frictionloss_neg", 0.053) + act["frictionloss"] = [round(fl_pos, 6), round(fl_neg, 6)] + + if "actuator_filter_tau" in motor_params: + act["filter_tau"] = round(motor_params["actuator_filter_tau"], 6) + if "viscous_quadratic" in motor_params: + act["viscous_quadratic"] = round(motor_params["viscous_quadratic"], 6) + if "back_emf_gain" in motor_params: + act["back_emf_gain"] = round(motor_params["back_emf_gain"], 6) + else: + if "actuator_gear" in params: + act["gear"] = round(params["actuator_gear"], 6) + if "actuator_filter_tau" in params: + act["filter_tau"] = round(params["actuator_filter_tau"], 6) + if "motor_damping" in params: + act["damping"] = round(params["motor_damping"], 6) + if "motor_deadzone" in params: + act["deadzone"] = round(params["motor_deadzone"], 6) + + # ctrl_range from ctrl_limit parameter. + if "ctrl_limit" in params: + lim = round(params["ctrl_limit"], 6) + act["ctrl_range"] = [-lim, lim] # Update joint overrides. if "joints" not in tuned_cfg: @@ -108,16 +112,23 @@ def export_tuned_files( if "motor_joint" not in tuned_cfg["joints"]: tuned_cfg["joints"]["motor_joint"] = {} mj = tuned_cfg["joints"]["motor_joint"] - if "motor_armature" in params: - mj["armature"] = round(params["motor_armature"], 6) - if "motor_frictionloss" in params: - mj["frictionloss"] = round(params["motor_frictionloss"], 6) + if motor_params: + mj["armature"] = round(motor_params.get("motor_armature", 0.00277), 6) + # Frictionloss/damping = 0 in MuJoCo (motor model handles via qfrc_applied). + mj["frictionloss"] = 0.0 + else: + if "motor_armature" in params: + mj["armature"] = round(params["motor_armature"], 6) + if "motor_frictionloss" in params: + mj["frictionloss"] = round(params["motor_frictionloss"], 6) if "pendulum_joint" not in tuned_cfg["joints"]: tuned_cfg["joints"]["pendulum_joint"] = {} pj = tuned_cfg["joints"]["pendulum_joint"] if "pendulum_damping" in params: pj["damping"] = round(params["pendulum_damping"], 6) + if "pendulum_frictionloss" in params: + pj["frictionloss"] = round(params["pendulum_frictionloss"], 6) # Write tuned robot.yaml. tuned_yaml_path = robot_path / "robot_tuned.yaml" @@ -136,51 +147,46 @@ def export_tuned_files( return tuned_urdf_path, tuned_yaml_path -# ── XML helpers (shared with rollout.py) ───────────────────────────── +# ── CLI entry point ────────────────────────────────────────────────── -def _set_mass(inertial: ET.Element, mass: float | None) -> None: - if mass is None: - return - mass_el = inertial.find("mass") - if mass_el is not None: - mass_el.set("value", str(mass)) +def main() -> None: + import argparse + import json + + from src.sysid.rollout import LOCKED_MOTOR_PARAMS + + parser = argparse.ArgumentParser( + description="Export tuned URDF + robot.yaml from sysid results." + ) + parser.add_argument( + "--robot-path", type=str, default="assets/rotary_cartpole", + help="Path to robot asset directory", + ) + parser.add_argument( + "--result", type=str, default=None, + help="Path to sysid_result.json (auto-detected if omitted)", + ) + args = parser.parse_args() + + robot_path = Path(args.robot_path).resolve() + if args.result: + result_path = Path(args.result) + else: + result_path = robot_path / "sysid_result.json" + + if not result_path.exists(): + raise FileNotFoundError(f"Result file not found: {result_path}") + + result = json.loads(result_path.read_text()) + + export_tuned_files( + robot_path=args.robot_path, + params=result["best_params"], + motor_params=result.get("motor_params", dict(LOCKED_MOTOR_PARAMS)), + ) -def _set_com( - inertial: ET.Element, - x: float | None, - y: float | None, - z: float | None, -) -> None: - origin = inertial.find("origin") - if origin is None: - return - xyz = origin.get("xyz", "0 0 0").split() - if x is not None: - xyz[0] = str(x) - if y is not None: - xyz[1] = str(y) - if z is not None: - xyz[2] = str(z) - origin.set("xyz", " ".join(xyz)) +if __name__ == "__main__": + main() - -def _set_inertia( - inertial: ET.Element, - ixx: float | None = None, - iyy: float | None = None, - izz: float | None = None, - ixy: float | None = None, - iyz: float | None = None, - ixz: float | None = None, -) -> None: - ine = inertial.find("inertia") - if ine is None: - return - for attr, val in [ - ("ixx", ixx), ("iyy", iyy), ("izz", izz), - ("ixy", ixy), ("iyz", iyz), ("ixz", ixz), - ]: - if val is not None: - ine.set(attr, str(val)) diff --git a/src/sysid/motor/__init__.py b/src/sysid/motor/__init__.py new file mode 100644 index 0000000..1c05f16 --- /dev/null +++ b/src/sysid/motor/__init__.py @@ -0,0 +1,5 @@ +"""Motor-only system identification subpackage. + +Identifies JGB37-520 DC motor dynamics (no pendulum, no limits) +so the MuJoCo simulation matches the real hardware response. +""" diff --git a/src/sysid/motor/capture.py b/src/sysid/motor/capture.py new file mode 100644 index 0000000..f7eb7ea --- /dev/null +++ b/src/sysid/motor/capture.py @@ -0,0 +1,364 @@ +"""Capture a motor-only trajectory under random excitation (PRBS-style). + +Connects to the ESP32 running the simplified sysid firmware (no pendulum, +no limits), sends random PWM commands, and records motor angle + velocity +at ~ 50 Hz. + +Firmware serial protocol (115200 baud): + Commands: M\\n R\\n S\\n G\\n H\\n P\\n + State: S,,,,,\\n + +Usage: + python -m src.sysid.motor.capture --duration 20 + python -m src.sysid.motor.capture --duration 30 --amplitude 200 +""" + +from __future__ import annotations + +import argparse +import math +import random +import threading +import time +from datetime import datetime +from pathlib import Path +from typing import Any + +import numpy as np +import structlog +import yaml + +log = structlog.get_logger() + +# ── Default asset path ─────────────────────────────────────────────── +_DEFAULT_ASSET = "assets/motor" + + +# ── Serial protocol helpers ────────────────────────────────────────── + + +def _parse_state_line(line: str) -> dict[str, Any] | None: + """Parse an ``S,…`` state line from the motor sysid firmware. + + Format: S,,,,, + """ + if not line.startswith("S,"): + return None + parts = line.split(",") + if len(parts) < 6: + return None + try: + return { + "timestamp_ms": int(parts[1]), + "encoder_count": int(parts[2]), + "rpm": float(parts[3]), + "applied_speed": int(parts[4]), + "enc_vel_cps": float(parts[5]), + } + except (ValueError, IndexError): + return None + + +# ── Background serial reader ───────────────────────────────────────── + + +class _SerialReader: + """Minimal background reader for the ESP32 serial stream.""" + + def __init__(self, port: str, baud: int = 115200): + import serial as _serial + + self._serial_mod = _serial + self.ser = _serial.Serial(port, baud, timeout=0.05) + time.sleep(2) # Wait for ESP32 boot + self.ser.reset_input_buffer() + + self._latest: dict[str, Any] = {} + self._seq: int = 0 + self._lock = threading.Lock() + self._cond = threading.Condition(self._lock) + self._running = True + + self._thread = threading.Thread(target=self._reader_loop, daemon=True) + self._thread.start() + + def _reader_loop(self) -> None: + while self._running: + try: + if self.ser.in_waiting: + line = ( + self.ser.readline() + .decode("utf-8", errors="ignore") + .strip() + ) + parsed = _parse_state_line(line) + if parsed is not None: + with self._cond: + self._latest = parsed + self._seq += 1 + self._cond.notify_all() + elif line and not line.startswith("S,"): + # Log non-state lines (READY, PONG, WARN, etc.) + log.debug("serial_info", line=line) + else: + time.sleep(0.001) + except (OSError, self._serial_mod.SerialException): + log.critical("serial_lost") + break + + def send(self, cmd: str) -> None: + try: + self.ser.write(f"{cmd}\n".encode()) + except (OSError, self._serial_mod.SerialException): + log.critical("serial_send_failed", cmd=cmd) + + def read_blocking(self, timeout: float = 0.1) -> dict[str, Any]: + """Wait until a *new* state line arrives, then return it.""" + with self._cond: + seq_before = self._seq + if not self._cond.wait_for( + lambda: self._seq > seq_before, timeout=timeout + ): + return {} + return dict(self._latest) + + def close(self) -> None: + self._running = False + self.send("H") + self.send("M0") + time.sleep(0.1) + self._thread.join(timeout=1.0) + self.ser.close() + + +# ── PRBS excitation signal ─────────────────────────────────────────── + + +class _PRBSExcitation: + """Random hold-value excitation with configurable amplitude and hold time.""" + + def __init__( + self, + amplitude: int = 200, + hold_min_ms: int = 50, + hold_max_ms: int = 400, + ): + self.amplitude = amplitude + self.hold_min_ms = hold_min_ms + self.hold_max_ms = hold_max_ms + self._current: int = 0 + self._switch_time: float = 0.0 + self._new_value() + + def _new_value(self) -> None: + self._current = random.randint(-self.amplitude, self.amplitude) + hold_ms = random.randint(self.hold_min_ms, self.hold_max_ms) + self._switch_time = time.monotonic() + hold_ms / 1000.0 + + def __call__(self) -> int: + if time.monotonic() >= self._switch_time: + self._new_value() + return self._current + + +# ── Main capture loop ──────────────────────────────────────────────── + + +def capture( + asset_path: str | Path = _DEFAULT_ASSET, + port: str = "/dev/cu.usbserial-0001", + baud: int = 115200, + duration: float = 20.0, + amplitude: int = 200, + hold_min_ms: int = 50, + hold_max_ms: int = 400, + dt: float = 0.02, +) -> Path: + """Run motor-only capture and return the path to the saved .npz file. + + Stream-driven: blocks on each firmware state line (~50 Hz), + sends next motor command immediately, records both. + No time.sleep pacing — locked to firmware clock. + + The recording stores: + - time: wall-clock seconds since start + - action: normalised action = applied_speed / 255 + - motor_angle: shaft angle in radians (from encoder) + - motor_vel: shaft velocity in rad/s (from encoder velocity) + """ + asset_path = Path(asset_path).resolve() + + # Load hardware config for encoder conversion. + hw_yaml = asset_path / "hardware.yaml" + if not hw_yaml.exists(): + raise FileNotFoundError(f"hardware.yaml not found in {asset_path}") + raw_hw = yaml.safe_load(hw_yaml.read_text()) + ppr = raw_hw.get("encoder", {}).get("ppr", 11) + gear_ratio = raw_hw.get("encoder", {}).get("gear_ratio", 30.0) + counts_per_rev: float = ppr * gear_ratio * 4.0 + max_pwm = raw_hw.get("motor", {}).get("max_pwm", 255) + + log.info( + "hardware_config", + ppr=ppr, + gear_ratio=gear_ratio, + counts_per_rev=counts_per_rev, + max_pwm=max_pwm, + ) + + # Connect. + reader = _SerialReader(port, baud) + excitation = _PRBSExcitation(amplitude, hold_min_ms, hold_max_ms) + + # Prepare recording buffers. + max_samples = int(duration / dt) + 500 + rec_time = np.zeros(max_samples, dtype=np.float64) + rec_action = np.zeros(max_samples, dtype=np.float64) + rec_motor_angle = np.zeros(max_samples, dtype=np.float64) + rec_motor_vel = np.zeros(max_samples, dtype=np.float64) + + # Reset encoder to zero. + reader.send("R") + time.sleep(0.1) + + # Start streaming. + reader.send("G") + time.sleep(0.1) + + log.info( + "capture_starting", + port=port, + duration=duration, + amplitude=amplitude, + hold_range_ms=f"{hold_min_ms}–{hold_max_ms}", + ) + + idx = 0 + pwm = 0 + last_esp_ms = -1 + t0 = time.monotonic() + + try: + while True: + state = reader.read_blocking(timeout=0.1) + if not state: + continue + + # Deduplicate by firmware timestamp. + esp_ms = state.get("timestamp_ms", 0) + if esp_ms == last_esp_ms: + continue + last_esp_ms = esp_ms + + elapsed = time.monotonic() - t0 + if elapsed >= duration: + break + + # Generate next excitation PWM. + pwm = excitation() + + # Send command. + reader.send(f"M{pwm}") + + # Convert encoder to angle/velocity. + enc = state.get("encoder_count", 0) + motor_angle = enc / counts_per_rev * 2.0 * math.pi + motor_vel = ( + state.get("enc_vel_cps", 0.0) / counts_per_rev * 2.0 * math.pi + ) + + # Use firmware-applied speed for the action. + applied = state.get("applied_speed", 0) + action_norm = applied / 255.0 + + if idx < max_samples: + rec_time[idx] = elapsed + rec_action[idx] = action_norm + rec_motor_angle[idx] = motor_angle + rec_motor_vel[idx] = motor_vel + idx += 1 + else: + break + + if idx % 50 == 0: + log.info( + "capture_progress", + elapsed=f"{elapsed:.1f}/{duration:.0f}s", + samples=idx, + pwm=pwm, + angle_deg=f"{math.degrees(motor_angle):.1f}", + vel_rps=f"{motor_vel / (2 * math.pi):.1f}", + ) + + finally: + reader.send("M0") + reader.close() + + # Trim. + rec_time = rec_time[:idx] + rec_action = rec_action[:idx] + rec_motor_angle = rec_motor_angle[:idx] + rec_motor_vel = rec_motor_vel[:idx] + + # Save. + recordings_dir = asset_path / "recordings" + recordings_dir.mkdir(exist_ok=True) + stamp = datetime.now().strftime("%Y%m%d_%H%M%S") + out_path = recordings_dir / f"motor_capture_{stamp}.npz" + np.savez_compressed( + out_path, + time=rec_time, + action=rec_action, + motor_angle=rec_motor_angle, + motor_vel=rec_motor_vel, + ) + + log.info( + "capture_saved", + path=str(out_path), + samples=idx, + duration_actual=f"{rec_time[-1]:.2f}s" if idx > 0 else "0s", + ) + return out_path + + +# ── CLI ────────────────────────────────────────────────────────────── + + +def main() -> None: + parser = argparse.ArgumentParser( + description="Capture motor-only trajectory for system identification." + ) + parser.add_argument( + "--asset-path", type=str, default=_DEFAULT_ASSET, + help="Path to motor asset directory (contains hardware.yaml)", + ) + parser.add_argument( + "--port", type=str, default="/dev/cu.usbserial-0001", + help="Serial port for ESP32", + ) + parser.add_argument("--baud", type=int, default=115200) + parser.add_argument("--duration", type=float, default=20.0, help="Capture duration (s)") + parser.add_argument( + "--amplitude", type=int, default=200, + help="Max PWM magnitude (0–255, firmware allows full range)", + ) + parser.add_argument("--hold-min-ms", type=int, default=50, help="PRBS min hold (ms)") + parser.add_argument("--hold-max-ms", type=int, default=400, help="PRBS max hold (ms)") + parser.add_argument("--dt", type=float, default=0.02, help="Nominal sample period (s)") + args = parser.parse_args() + + capture( + asset_path=args.asset_path, + port=args.port, + baud=args.baud, + duration=args.duration, + amplitude=args.amplitude, + hold_min_ms=args.hold_min_ms, + hold_max_ms=args.hold_max_ms, + dt=args.dt, + ) + + +if __name__ == "__main__": + main() diff --git a/src/sysid/motor/export.py b/src/sysid/motor/export.py new file mode 100644 index 0000000..b717ce1 --- /dev/null +++ b/src/sysid/motor/export.py @@ -0,0 +1,186 @@ +"""Export tuned motor parameters to MJCF and robot.yaml files. + +Reads the original motor.xml and robot.yaml, patches with optimised +parameter values, and writes motor_tuned.xml + robot_tuned.yaml. + +Usage: + python -m src.sysid.motor.export \ + --asset-path assets/motor \ + --result assets/motor/motor_sysid_result.json +""" + +from __future__ import annotations + +import argparse +import copy +import json +import xml.etree.ElementTree as ET +from pathlib import Path + +import structlog +import yaml + +log = structlog.get_logger() + +_DEFAULT_ASSET = "assets/motor" + + +def export_tuned_files( + asset_path: str | Path, + params: dict[str, float], +) -> tuple[Path, Path]: + """Write tuned MJCF and robot.yaml files. + + Returns (tuned_mjcf_path, tuned_robot_yaml_path). + """ + asset_path = Path(asset_path).resolve() + + robot_yaml_path = asset_path / "robot.yaml" + robot_cfg = yaml.safe_load(robot_yaml_path.read_text()) + mjcf_path = asset_path / robot_cfg["mjcf"] + + # ── Tune MJCF ──────────────────────────────────────────────── + tree = ET.parse(str(mjcf_path)) + root = tree.getroot() + + # Actuator — use average gear for the MJCF model. + gear_pos = params.get("actuator_gear_pos", params.get("actuator_gear")) + gear_neg = params.get("actuator_gear_neg", params.get("actuator_gear")) + gear_avg = None + if gear_pos is not None and gear_neg is not None: + gear_avg = (gear_pos + gear_neg) / 2.0 + elif gear_pos is not None: + gear_avg = gear_pos + filter_tau = params.get("actuator_filter_tau") + for act_el in root.iter("general"): + if act_el.get("name") == "motor": + if gear_avg is not None: + act_el.set("gear", str(gear_avg)) + if filter_tau is not None: + if filter_tau > 0: + act_el.set("dyntype", "filter") + act_el.set("dynprm", str(filter_tau)) + else: + act_el.set("dyntype", "none") + + # Joint — average damping & friction for MJCF (asymmetry in runtime). + fl_pos = params.get("motor_frictionloss_pos", params.get("motor_frictionloss")) + fl_neg = params.get("motor_frictionloss_neg", params.get("motor_frictionloss")) + fl_avg = None + if fl_pos is not None and fl_neg is not None: + fl_avg = (fl_pos + fl_neg) / 2.0 + elif fl_pos is not None: + fl_avg = fl_pos + damp_pos = params.get("motor_damping_pos", params.get("motor_damping")) + damp_neg = params.get("motor_damping_neg", params.get("motor_damping")) + damp_avg = None + if damp_pos is not None and damp_neg is not None: + damp_avg = (damp_pos + damp_neg) / 2.0 + elif damp_pos is not None: + damp_avg = damp_pos + for jnt in root.iter("joint"): + if jnt.get("name") == "motor_joint": + if damp_avg is not None: + jnt.set("damping", str(damp_avg)) + if "motor_armature" in params: + jnt.set("armature", str(params["motor_armature"])) + if fl_avg is not None: + jnt.set("frictionloss", str(fl_avg)) + + # Rotor mass. + if "rotor_mass" in params: + for geom in root.iter("geom"): + if geom.get("name") == "rotor_disk": + geom.set("mass", str(params["rotor_mass"])) + + # Write tuned MJCF. + tuned_mjcf_name = mjcf_path.stem + "_tuned" + mjcf_path.suffix + tuned_mjcf_path = asset_path / tuned_mjcf_name + ET.indent(tree, space=" ") + tree.write(str(tuned_mjcf_path), xml_declaration=True, encoding="unicode") + log.info("tuned_mjcf_written", path=str(tuned_mjcf_path)) + + # ── Tune robot.yaml ────────────────────────────────────────── + tuned_cfg = copy.deepcopy(robot_cfg) + tuned_cfg["mjcf"] = tuned_mjcf_name + + if tuned_cfg.get("actuators") and len(tuned_cfg["actuators"]) > 0: + act = tuned_cfg["actuators"][0] + if gear_avg is not None: + act["gear"] = round(gear_avg, 6) + if "actuator_filter_tau" in params: + act["filter_tau"] = round(params["actuator_filter_tau"], 6) + if "motor_damping" in params: + act["damping"] = round(params["motor_damping"], 6) + + if "joints" not in tuned_cfg: + tuned_cfg["joints"] = {} + if "motor_joint" not in tuned_cfg["joints"]: + tuned_cfg["joints"]["motor_joint"] = {} + mj = tuned_cfg["joints"]["motor_joint"] + if "motor_armature" in params: + mj["armature"] = round(params["motor_armature"], 6) + if fl_avg is not None: + mj["frictionloss"] = round(fl_avg, 6) + + # Asymmetric / hardware-realism / nonlinear parameters. + realism = {} + for key in [ + "actuator_gear_pos", "actuator_gear_neg", + "motor_damping_pos", "motor_damping_neg", + "motor_frictionloss_pos", "motor_frictionloss_neg", + "motor_deadzone_pos", "motor_deadzone_neg", + "action_bias", + "viscous_quadratic", "back_emf_gain", + "stribeck_friction_boost", "stribeck_vel", + "gearbox_backlash", + ]: + if key in params: + realism[key] = round(params[key], 6) + if realism: + tuned_cfg["hardware_realism"] = realism + + tuned_yaml_path = asset_path / "robot_tuned.yaml" + header = ( + "# Tuned motor config — generated by src.sysid.motor.optimize\n" + "# Original: robot.yaml\n\n" + ) + tuned_yaml_path.write_text( + header + yaml.dump(tuned_cfg, default_flow_style=False, sort_keys=False) + ) + log.info("tuned_robot_yaml_written", path=str(tuned_yaml_path)) + + return tuned_mjcf_path, tuned_yaml_path + + +# ── CLI ────────────────────────────────────────────────────────────── + + +def main() -> None: + parser = argparse.ArgumentParser( + description="Export tuned motor parameters to MJCF + robot.yaml." + ) + parser.add_argument("--asset-path", type=str, default=_DEFAULT_ASSET) + parser.add_argument( + "--result", type=str, default=None, + help="Path to motor_sysid_result.json (auto-detected if omitted)", + ) + args = parser.parse_args() + + asset_path = Path(args.asset_path).resolve() + if args.result: + result_path = Path(args.result) + else: + result_path = asset_path / "motor_sysid_result.json" + + if not result_path.exists(): + raise FileNotFoundError(f"Result file not found: {result_path}") + + result = json.loads(result_path.read_text()) + params = result["best_params"] + + export_tuned_files(asset_path=args.asset_path, params=params) + + +if __name__ == "__main__": + main() diff --git a/src/sysid/motor/optimize.py b/src/sysid/motor/optimize.py new file mode 100644 index 0000000..d736498 --- /dev/null +++ b/src/sysid/motor/optimize.py @@ -0,0 +1,367 @@ +"""CMA-ES optimiser — fit motor simulation parameters to a real recording. + +Motor-only version: minimises trajectory-matching cost between MuJoCo +rollout and recorded motor angle + velocity. + +Usage: + python -m src.sysid.motor.optimize \ + --recording assets/motor/recordings/motor_capture_YYYYMMDD_HHMMSS.npz + + # Quick test: + python -m src.sysid.motor.optimize --recording .npz \ + --max-generations 20 --population-size 10 +""" + +from __future__ import annotations + +import argparse +import json +import time +from datetime import datetime +from pathlib import Path + +import numpy as np +import structlog + +from src.sysid.motor.preprocess import recompute_velocity +from src.sysid.motor.rollout import ( + MOTOR_PARAMS, + ParamSpec, + bounds_arrays, + defaults_vector, + params_to_dict, + rollout, + windowed_rollout, +) + +log = structlog.get_logger() + +_DEFAULT_ASSET = "assets/motor" + + +# ── Cost function ──────────────────────────────────────────────────── + + +def _compute_trajectory_cost( + sim: dict[str, np.ndarray], + recording: dict[str, np.ndarray], + pos_weight: float = 1.0, + vel_weight: float = 0.5, + acc_weight: float = 0.0, + dt: float = 0.02, +) -> float: + """Weighted MSE between simulated and real motor trajectories. + + Motor-only: angle, velocity, and optionally acceleration. + Acceleration is computed as finite-difference of velocity. + """ + angle_err = sim["motor_angle"] - recording["motor_angle"] + vel_err = sim["motor_vel"] - recording["motor_vel"] + + # Reject NaN results (unstable simulation). + if np.any(~np.isfinite(angle_err)) or np.any(~np.isfinite(vel_err)): + return 1e6 + + cost = float( + pos_weight * np.mean(angle_err**2) + + vel_weight * np.mean(vel_err**2) + ) + + # Optional acceleration term — penalises wrong dynamics (d(vel)/dt). + if acc_weight > 0 and len(vel_err) > 2: + sim_acc = np.diff(sim["motor_vel"]) / dt + real_acc = np.diff(recording["motor_vel"]) / dt + acc_err = sim_acc - real_acc + if np.any(~np.isfinite(acc_err)): + return 1e6 + # Normalise by typical acceleration scale (~50 rad/s²) to keep + # the weight intuitive relative to vel/pos terms. + cost += acc_weight * np.mean(acc_err**2) / (50.0**2) + + return cost + + +def cost_function( + params_vec: np.ndarray, + recording: dict[str, np.ndarray], + asset_path: Path, + specs: list[ParamSpec], + sim_dt: float = 0.002, + substeps: int = 10, + pos_weight: float = 1.0, + vel_weight: float = 0.5, + acc_weight: float = 0.0, + window_duration: float = 0.5, +) -> float: + """Compute trajectory-matching cost for a candidate parameter vector. + + Uses multiple-shooting (windowed rollout) by default. + """ + params = params_to_dict(params_vec, specs) + + try: + if window_duration > 0: + sim = windowed_rollout( + asset_path=asset_path, + params=params, + recording=recording, + window_duration=window_duration, + sim_dt=sim_dt, + substeps=substeps, + ) + else: + sim = rollout( + asset_path=asset_path, + params=params, + actions=recording["action"], + sim_dt=sim_dt, + substeps=substeps, + ) + except Exception as exc: + log.warning("rollout_failed", error=str(exc)) + return 1e6 + + return _compute_trajectory_cost( + sim, recording, pos_weight, vel_weight, acc_weight, + dt=np.mean(np.diff(recording["time"])) if len(recording["time"]) > 1 else 0.02, + ) + + +# ── CMA-ES optimisation loop ──────────────────────────────────────── + + +def optimize( + asset_path: str | Path = _DEFAULT_ASSET, + recording_path: str | Path = "", + specs: list[ParamSpec] | None = None, + sigma0: float = 0.3, + population_size: int = 30, + max_generations: int = 300, + sim_dt: float = 0.002, + substeps: int = 10, + pos_weight: float = 1.0, + vel_weight: float = 0.5, + acc_weight: float = 0.0, + window_duration: float = 0.5, + seed: int = 42, + preprocess_vel: bool = True, + sg_window: int = 7, + sg_polyorder: int = 3, +) -> dict: + """Run CMA-ES optimisation and return results dict.""" + from cmaes import CMA + + asset_path = Path(asset_path).resolve() + recording_path = Path(recording_path).resolve() + + if specs is None: + specs = MOTOR_PARAMS + + # Load recording. + recording = dict(np.load(recording_path)) + n_samples = len(recording["time"]) + duration = recording["time"][-1] - recording["time"][0] + n_windows = max(1, int(duration / window_duration)) if window_duration > 0 else 1 + log.info( + "recording_loaded", + path=str(recording_path), + samples=n_samples, + duration=f"{duration:.1f}s", + n_windows=n_windows, + ) + + # Preprocess velocity: replace noisy firmware finite-difference with + # smooth Savitzky-Golay derivative of the angle signal. + if preprocess_vel: + recording = recompute_velocity( + recording, + window_length=sg_window, + polyorder=sg_polyorder, + ) + + # Normalise to [0, 1] for CMA-ES. + lo, hi = bounds_arrays(specs) + x0 = defaults_vector(specs) + span = hi - lo + span[span == 0] = 1.0 + + def to_normed(x: np.ndarray) -> np.ndarray: + return (x - lo) / span + + def from_normed(x_n: np.ndarray) -> np.ndarray: + return x_n * span + lo + + x0_normed = to_normed(x0) + bounds_normed = np.column_stack( + [np.zeros(len(specs)), np.ones(len(specs))] + ) + + optimizer = CMA( + mean=x0_normed, + sigma=sigma0, + bounds=bounds_normed, + population_size=population_size, + seed=seed, + ) + + best_cost = float("inf") + best_params_vec = x0.copy() + history: list[tuple[int, float]] = [] + + log.info( + "cmaes_starting", + n_params=len(specs), + population=population_size, + max_gens=max_generations, + sigma0=sigma0, + ) + + t0 = time.monotonic() + + for gen in range(max_generations): + solutions = [] + for _ in range(optimizer.population_size): + x_normed = optimizer.ask() + x_natural = from_normed(x_normed) + x_natural = np.clip(x_natural, lo, hi) + + c = cost_function( + x_natural, + recording, + asset_path, + specs, + sim_dt=sim_dt, + substeps=substeps, + pos_weight=pos_weight, + vel_weight=vel_weight, + acc_weight=acc_weight, + window_duration=window_duration, + ) + solutions.append((x_normed, c)) + + if c < best_cost: + best_cost = c + best_params_vec = x_natural.copy() + + optimizer.tell(solutions) + history.append((gen, best_cost)) + + elapsed = time.monotonic() - t0 + if gen % 5 == 0 or gen == max_generations - 1: + log.info( + "cmaes_generation", + gen=gen, + best_cost=f"{best_cost:.6f}", + elapsed=f"{elapsed:.1f}s", + gen_best=f"{min(c for _, c in solutions):.6f}", + ) + + total_time = time.monotonic() - t0 + best_params = params_to_dict(best_params_vec, specs) + + log.info( + "cmaes_finished", + best_cost=f"{best_cost:.6f}", + total_time=f"{total_time:.1f}s", + evaluations=max_generations * population_size, + ) + + # Log parameter comparison. + defaults = params_to_dict(defaults_vector(specs), specs) + for name in best_params: + d = defaults[name] + b = best_params[name] + change_pct = ((b - d) / abs(d) * 100) if abs(d) > 1e-12 else 0.0 + log.info( + "param_result", + name=name, + default=f"{d:.6g}", + tuned=f"{b:.6g}", + change=f"{change_pct:+.1f}%", + ) + + return { + "best_params": best_params, + "best_cost": best_cost, + "history": history, + "recording": str(recording_path), + "param_names": [s.name for s in specs], + "defaults": {s.name: s.default for s in specs}, + "timestamp": datetime.now().isoformat(), + } + + +# ── CLI ────────────────────────────────────────────────────────────── + + +def main() -> None: + parser = argparse.ArgumentParser( + description="Fit motor simulation parameters to a real recording (CMA-ES)." + ) + parser.add_argument( + "--asset-path", type=str, default=_DEFAULT_ASSET, + help="Path to motor asset directory", + ) + parser.add_argument( + "--recording", type=str, required=True, + help="Path to .npz recording file", + ) + parser.add_argument("--sigma0", type=float, default=0.3) + parser.add_argument("--population-size", type=int, default=30) + parser.add_argument("--max-generations", type=int, default=300) + parser.add_argument("--sim-dt", type=float, default=0.002) + parser.add_argument("--substeps", type=int, default=10) + parser.add_argument("--pos-weight", type=float, default=1.0) + parser.add_argument("--vel-weight", type=float, default=0.5) + parser.add_argument("--acc-weight", type=float, default=0.0, + help="Weight for acceleration matching (0=off, try 0.1-0.5)") + parser.add_argument("--window-duration", type=float, default=0.5) + parser.add_argument("--seed", type=int, default=42) + parser.add_argument( + "--no-preprocess-vel", action="store_true", + help="Disable Savitzky-Golay velocity preprocessing", + ) + parser.add_argument("--sg-window", type=int, default=7, + help="Savitzky-Golay window length (odd, default 7 = 140ms)") + parser.add_argument("--sg-polyorder", type=int, default=3, + help="Savitzky-Golay polynomial order (default 3 = cubic)") + args = parser.parse_args() + + result = optimize( + asset_path=args.asset_path, + recording_path=args.recording, + sigma0=args.sigma0, + population_size=args.population_size, + max_generations=args.max_generations, + sim_dt=args.sim_dt, + substeps=args.substeps, + pos_weight=args.pos_weight, + vel_weight=args.vel_weight, + acc_weight=args.acc_weight, + window_duration=args.window_duration, + seed=args.seed, + preprocess_vel=not args.no_preprocess_vel, + sg_window=args.sg_window, + sg_polyorder=args.sg_polyorder, + ) + + # Save results JSON. + asset_path = Path(args.asset_path).resolve() + result_path = asset_path / "motor_sysid_result.json" + result_json = {k: v for k, v in result.items() if k != "history"} + result_json["history_summary"] = { + "first_cost": result["history"][0][1] if result["history"] else None, + "final_cost": result["history"][-1][1] if result["history"] else None, + "generations": len(result["history"]), + } + result_path.write_text(json.dumps(result_json, indent=2, default=str)) + log.info("results_saved", path=str(result_path)) + + # Export tuned files. + from src.sysid.motor.export import export_tuned_files + + export_tuned_files(asset_path=args.asset_path, params=result["best_params"]) + + +if __name__ == "__main__": + main() diff --git a/src/sysid/motor/preprocess.py b/src/sysid/motor/preprocess.py new file mode 100644 index 0000000..9aa8cf1 --- /dev/null +++ b/src/sysid/motor/preprocess.py @@ -0,0 +1,114 @@ +"""Recording preprocessing — clean velocity estimation from angle data. + +The ESP32 firmware computes velocity as a raw finite-difference of encoder +counts at 50 Hz. With a 1320 CPR encoder that gives ~0.24 rad/s of +quantisation noise per count. This module replaces the noisy firmware +velocity with a smooth differentiation of the (much cleaner) angle signal. + +Method: Savitzky-Golay filter applied to the angle signal, then +differentiated analytically. Zero phase lag, preserves transients well. + +This is standard practice in robotics sysid — see e.g. MATLAB System ID +Toolbox, Drake's trajectory processing, or ETH's ANYmal sysid pipeline. +""" + +from __future__ import annotations + +import numpy as np +from scipy.signal import savgol_filter + +import structlog + +log = structlog.get_logger() + + +def recompute_velocity( + recording: dict[str, np.ndarray], + window_length: int = 7, + polyorder: int = 3, + deriv: int = 1, +) -> dict[str, np.ndarray]: + """Recompute motor_vel from motor_angle using Savitzky-Golay differentiation. + + Parameters + ---------- + recording : dict with at least 'time', 'motor_angle', 'motor_vel' keys. + window_length : SG filter window (must be odd, > polyorder). + 7 samples at 50 Hz = 140ms window — good balance of smoothness + and responsiveness. Captures dynamics up to ~7 Hz. + polyorder : Polynomial order for the SG filter (3 = cubic). + deriv : Derivative order (1 = first derivative = velocity). + + Returns + ------- + New recording dict with 'motor_vel' replaced and 'motor_vel_raw' added. + """ + rec = dict(recording) # shallow copy + + times = rec["time"] + angles = rec["motor_angle"] + dt = np.mean(np.diff(times)) + + # Keep original for diagnostics. + rec["motor_vel_raw"] = rec["motor_vel"].copy() + + # Savitzky-Golay derivative: fits a polynomial to each window, + # then takes the analytical derivative → smooth, zero phase lag. + vel_sg = savgol_filter( + angles, + window_length=window_length, + polyorder=polyorder, + deriv=deriv, + delta=dt, + ) + + # Compute stats for logging. + raw_vel = rec["motor_vel_raw"] + noise_estimate = np.std(raw_vel - vel_sg) + max_diff = np.max(np.abs(raw_vel - vel_sg)) + + log.info( + "velocity_recomputed", + method="savgol", + window=window_length, + polyorder=polyorder, + dt=f"{dt*1000:.1f}ms", + noise_std=f"{noise_estimate:.3f} rad/s", + max_diff=f"{max_diff:.3f} rad/s", + raw_rms=f"{np.sqrt(np.mean(raw_vel**2)):.3f}", + sg_rms=f"{np.sqrt(np.mean(vel_sg**2)):.3f}", + ) + + rec["motor_vel"] = vel_sg + return rec + + +def smooth_velocity( + recording: dict[str, np.ndarray], + cutoff_hz: float = 10.0, +) -> dict[str, np.ndarray]: + """Alternative: apply zero-phase Butterworth low-pass to motor_vel. + + Simpler than SG derivative but introduces slight edge effects. + """ + from scipy.signal import butter, filtfilt + + rec = dict(recording) + rec["motor_vel_raw"] = rec["motor_vel"].copy() + + dt = np.mean(np.diff(rec["time"])) + fs = 1.0 / dt + nyq = fs / 2.0 + norm_cutoff = min(cutoff_hz / nyq, 0.99) + + b, a = butter(2, norm_cutoff, btype="low") + rec["motor_vel"] = filtfilt(b, a, rec["motor_vel"]) + + log.info( + "velocity_smoothed", + method="butterworth", + cutoff_hz=cutoff_hz, + fs=fs, + ) + + return rec diff --git a/src/sysid/motor/rollout.py b/src/sysid/motor/rollout.py new file mode 100644 index 0000000..691703c --- /dev/null +++ b/src/sysid/motor/rollout.py @@ -0,0 +1,460 @@ +"""Deterministic simulation replay — roll out recorded actions in MuJoCo. + +Motor-only version: single hinge joint, no pendulum. +Given a parameter vector and recorded actions, builds a MuJoCo model +with overridden dynamics, replays the actions, and returns the simulated +motor angle + velocity for comparison with the real recording. +""" + +from __future__ import annotations + +import dataclasses +import xml.etree.ElementTree as ET +from pathlib import Path + +import mujoco +import numpy as np +import yaml + + +# ── Tunable parameter specification ────────────────────────────────── + + +@dataclasses.dataclass +class ParamSpec: + """Specification for a single tunable parameter.""" + + name: str + default: float + lower: float + upper: float + log_scale: bool = False + + +# Motor-only parameters to identify. +# These capture the full transfer function: PWM → shaft angle/velocity. +# +# Asymmetric parameters (pos/neg suffix) capture real-world differences +# between CW and CCW rotation caused by gear mesh, brush contact, +# and H-bridge asymmetry. +MOTOR_PARAMS: list[ParamSpec] = [ + # ── Actuator ───────────────────────────────────────────────── + # gear_pos/neg: effective torque per unit ctrl, split by direction. + # Real motors + L298N often have different drive strength per direction. + ParamSpec("actuator_gear_pos", 0.064, 0.005, 0.5, log_scale=True), + ParamSpec("actuator_gear_neg", 0.064, 0.005, 0.5, log_scale=True), + # filter_tau: first-order electrical/driver time constant (s). + # Lower bound 1ms — L298N PWM switching is very fast. + ParamSpec("actuator_filter_tau", 0.03, 0.001, 0.20), + # ── Joint dynamics ─────────────────────────────────────────── + # damping_pos/neg: viscous friction (back-EMF), split by direction. + ParamSpec("motor_damping_pos", 0.003, 1e-5, 0.1, log_scale=True), + ParamSpec("motor_damping_neg", 0.003, 1e-5, 0.1, log_scale=True), + # armature: reflected rotor inertia (kg·m²). + ParamSpec("motor_armature", 0.0001, 1e-6, 0.01, log_scale=True), + # frictionloss_pos/neg: Coulomb friction, split by velocity direction. + ParamSpec("motor_frictionloss_pos", 0.03, 0.001, 0.2, log_scale=True), + ParamSpec("motor_frictionloss_neg", 0.03, 0.001, 0.2, log_scale=True), + # ── Nonlinear dynamics ─────────────────────────────────────── + # viscous_quadratic: velocity-squared drag term (N·m·s²/rad²). + # Captures nonlinear friction that increases at high speed (air drag, + # grease viscosity, etc.). Opposes motion. + ParamSpec("viscous_quadratic", 0.0, 0.0, 0.005), + # back_emf_gain: torque reduction proportional to |vel × ctrl|. + # Models the back-EMF effect: at high speed the motor produces less + # torque because the voltage drop across the armature is smaller. + ParamSpec("back_emf_gain", 0.0, 0.0, 0.05), + # stribeck_vel: characteristic velocity below which Coulomb friction + # is boosted (Stribeck effect). 0 = standard Coulomb only. + ParamSpec("stribeck_friction_boost", 0.0, 0.0, 0.15), + ParamSpec("stribeck_vel", 2.0, 0.1, 8.0), + # ── Rotor load ─────────────────────────────────────────────── + ParamSpec("rotor_mass", 0.012, 0.002, 0.05, log_scale=True), + # ── Hardware realism ───────────────────────────────────────── + # deadzone_pos/neg: minimum |action| per direction. + ParamSpec("motor_deadzone_pos", 0.08, 0.0, 0.30), + ParamSpec("motor_deadzone_neg", 0.08, 0.0, 0.30), + # action_bias: constant offset added to ctrl (H-bridge asymmetry). + ParamSpec("action_bias", 0.0, -0.10, 0.10), + # ── Gearbox backlash ───────────────────────────────────────── + # backlash_halfwidth: half the angular deadband (rad) in the gearbox. + # When the motor reverses, the shaft doesn't move until the backlash + # gap is taken up. Typical for 30:1 plastic/metal spur gears. + ParamSpec("gearbox_backlash", 0.0, 0.0, 0.15), +] + + +def params_to_dict( + values: np.ndarray, specs: list[ParamSpec] | None = None +) -> dict[str, float]: + if specs is None: + specs = MOTOR_PARAMS + return {s.name: float(values[i]) for i, s in enumerate(specs)} + + +def defaults_vector(specs: list[ParamSpec] | None = None) -> np.ndarray: + if specs is None: + specs = MOTOR_PARAMS + return np.array([s.default for s in specs], dtype=np.float64) + + +def bounds_arrays( + specs: list[ParamSpec] | None = None, +) -> tuple[np.ndarray, np.ndarray]: + if specs is None: + specs = MOTOR_PARAMS + lo = np.array([s.lower for s in specs], dtype=np.float64) + hi = np.array([s.upper for s in specs], dtype=np.float64) + return lo, hi + + +# ── MuJoCo model building with parameter overrides ────────────────── + + +def _build_model( + asset_path: Path, + params: dict[str, float], +) -> mujoco.MjModel: + """Build a MuJoCo model from motor.xml with parameter overrides. + + Parses the MJCF, patches actuator/joint/body parameters, reloads. + """ + asset_path = Path(asset_path).resolve() + robot_cfg = yaml.safe_load((asset_path / "robot.yaml").read_text()) + mjcf_path = asset_path / robot_cfg["mjcf"] + + tree = ET.parse(str(mjcf_path)) + root = tree.getroot() + + # ── Actuator overrides ─────────────────────────────────────── + # Use average of pos/neg gear for MuJoCo (asymmetry handled in ctrl). + gear_pos = params.get("actuator_gear_pos", params.get("actuator_gear", 0.064)) + gear_neg = params.get("actuator_gear_neg", params.get("actuator_gear", 0.064)) + gear = (gear_pos + gear_neg) / 2.0 + filter_tau = params.get("actuator_filter_tau", 0.03) + + for act_el in root.iter("general"): + if act_el.get("name") == "motor": + act_el.set("gear", str(gear)) + if filter_tau > 0: + act_el.set("dyntype", "filter") + act_el.set("dynprm", str(filter_tau)) + else: + act_el.set("dyntype", "none") + act_el.set("dynprm", "1") + + # ── Joint overrides ────────────────────────────────────────── + # Damping and friction are asymmetric + nonlinear → applied manually. + # Set MuJoCo damping & frictionloss to 0; we handle them in qfrc_applied. + armature = params.get("motor_armature", 0.0001) + + for jnt in root.iter("joint"): + if jnt.get("name") == "motor_joint": + jnt.set("damping", "0") + jnt.set("armature", str(armature)) + jnt.set("frictionloss", "0") + + # ── Rotor mass override ────────────────────────────────────── + rotor_mass = params.get("rotor_mass", 0.012) + for geom in root.iter("geom"): + if geom.get("name") == "rotor_disk": + geom.set("mass", str(rotor_mass)) + + # Write temp file and load. + tmp_path = asset_path / "_tmp_motor_sysid.xml" + try: + tree.write(str(tmp_path), xml_declaration=True, encoding="unicode") + model = mujoco.MjModel.from_xml_path(str(tmp_path)) + finally: + tmp_path.unlink(missing_ok=True) + + return model + + +# ── Action + asymmetry transforms ──────────────────────────────────── + + +def _transform_action( + action: float, + params: dict[str, float], +) -> float: + """Apply bias, direction-dependent deadzone, and gear scaling. + + The MuJoCo actuator has the *average* gear ratio. We rescale the + control signal so that ``ctrl × gear_avg ≈ action × gear_dir``. + """ + # Constant bias (H-bridge asymmetry). + action = action + params.get("action_bias", 0.0) + + # Direction-dependent deadzone. + dz_pos = params.get("motor_deadzone_pos", params.get("motor_deadzone", 0.08)) + dz_neg = params.get("motor_deadzone_neg", params.get("motor_deadzone", 0.08)) + if action >= 0 and action < dz_pos: + return 0.0 + if action < 0 and action > -dz_neg: + return 0.0 + + # Direction-dependent gear scaling. + # MuJoCo model uses gear_avg; we rescale ctrl to get the right torque. + gear_pos = params.get("actuator_gear_pos", params.get("actuator_gear", 0.064)) + gear_neg = params.get("actuator_gear_neg", params.get("actuator_gear", 0.064)) + gear_avg = (gear_pos + gear_neg) / 2.0 + if gear_avg < 1e-8: + return 0.0 + gear_dir = gear_pos if action >= 0 else gear_neg + return action * (gear_dir / gear_avg) + + +def _apply_forces( + data: mujoco.MjData, + vel: float, + ctrl: float, + params: dict[str, float], + backlash_state: list[float] | None = None, +) -> None: + """Apply all manual forces: asymmetric friction, damping, and nonlinear terms. + + Everything that MuJoCo can't represent with its symmetric joint model + is injected here via ``qfrc_applied``. + + Forces applied (all oppose motion or reduce torque): + 1. Asymmetric Coulomb friction (with Stribeck boost at low speed) + 2. Asymmetric viscous damping + 3. Quadratic velocity drag + 4. Back-EMF torque reduction (proportional to |vel|) + + Backlash: + If backlash_state is provided, it is a 1-element list [gap_pos]. + gap_pos tracks the motor's position within the backlash deadband. + When inside the gap, no actuator torque is transmitted to the + output shaft — only friction forces act. + """ + torque = 0.0 + + # ── Gearbox backlash ────────────────────────────────────────── + # Model: the gear teeth have play of 2×halfwidth radians. + # We track where the motor is within that gap. When at the + # edge (contact), actuator torque passes through normally. + # When inside the gap, no actuator torque is transmitted. + backlash_hw = params.get("gearbox_backlash", 0.0) + actuator_torque_scale = 1.0 # 1.0 = full contact, 0.0 = in gap + + if backlash_hw > 0 and backlash_state is not None: + # gap_pos: how far into the backlash gap we are. + # Range: [-backlash_hw, +backlash_hw] + # At ±backlash_hw, gears are in contact and torque transmits. + gap = backlash_state[0] + # Update gap position based on velocity. + dt_sub = data.model.opt.timestep + gap += vel * dt_sub + # Clamp to backlash range. + if gap > backlash_hw: + gap = backlash_hw + elif gap < -backlash_hw: + gap = -backlash_hw + + backlash_state[0] = gap + + # If not at contact edge, no torque transmitted. + if abs(gap) < backlash_hw - 1e-8: + actuator_torque_scale = 0.0 + else: + actuator_torque_scale = 1.0 + + # ── 1. Coulomb friction (direction-dependent + Stribeck) ───── + fl_pos = params.get("motor_frictionloss_pos", params.get("motor_frictionloss", 0.03)) + fl_neg = params.get("motor_frictionloss_neg", params.get("motor_frictionloss", 0.03)) + stribeck_boost = params.get("stribeck_friction_boost", 0.0) + stribeck_vel = params.get("stribeck_vel", 2.0) + + if abs(vel) > 1e-6: + fl = fl_pos if vel > 0 else fl_neg + # Stribeck: boost friction at low speed. Exponential decay. + if stribeck_boost > 0 and stribeck_vel > 0: + fl = fl * (1.0 + stribeck_boost * np.exp(-abs(vel) / stribeck_vel)) + # Coulomb: constant magnitude, opposes motion. + torque -= np.sign(vel) * fl + + # ── 2. Asymmetric viscous damping ──────────────────────────── + damp_pos = params.get("motor_damping_pos", params.get("motor_damping", 0.003)) + damp_neg = params.get("motor_damping_neg", params.get("motor_damping", 0.003)) + damp = damp_pos if vel > 0 else damp_neg + torque -= damp * vel + + # ── 3. Quadratic velocity drag ─────────────────────────────── + visc_quad = params.get("viscous_quadratic", 0.0) + if visc_quad > 0: + torque -= visc_quad * vel * abs(vel) + + # ── 4. Back-EMF torque reduction ───────────────────────────── + # At high speed, the motor's effective torque is reduced because + # back-EMF opposes the supply voltage. Modelled as a torque that + # opposes the control signal proportional to speed. + bemf = params.get("back_emf_gain", 0.0) + if bemf > 0 and abs(ctrl) > 1e-6: + # The reduction should oppose the actuator torque direction. + torque -= bemf * vel * np.sign(ctrl) * actuator_torque_scale + + # ── 5. Scale actuator contribution by backlash state ───────── + # When in the backlash gap, MuJoCo's actuator force should not + # transmit. We cancel it by applying an opposing force. + if actuator_torque_scale < 1.0: + # The actuator_force from MuJoCo will be applied by mj_step. + # We need to counteract it. data.qfrc_actuator isn't set yet + # at this point (pre-step), so we zero the ctrl instead. + # This is handled in the rollout loop by zeroing ctrl. + pass + + data.qfrc_applied[0] = max(-10.0, min(10.0, torque)) + return actuator_torque_scale + + +# ── Simulation rollout ─────────────────────────────────────────────── + + +def rollout( + asset_path: str | Path, + params: dict[str, float], + actions: np.ndarray, + sim_dt: float = 0.002, + substeps: int = 10, +) -> dict[str, np.ndarray]: + """Open-loop replay of recorded actions. + + Parameters + ---------- + asset_path : motor asset directory + params : named parameter overrides + actions : (N,) normalised actions [-1, 1] from the recording + sim_dt : MuJoCo physics timestep + substeps : physics substeps per control step (ctrl_dt = sim_dt × substeps) + + Returns + ------- + dict with motor_angle (N,) and motor_vel (N,). + """ + asset_path = Path(asset_path).resolve() + model = _build_model(asset_path, params) + model.opt.timestep = sim_dt + data = mujoco.MjData(model) + mujoco.mj_resetData(model, data) + + n = len(actions) + + sim_motor_angle = np.zeros(n, dtype=np.float64) + sim_motor_vel = np.zeros(n, dtype=np.float64) + + # Backlash state: [gap_position]. Starts at 0 (centered in gap). + backlash_state = [0.0] + + for i in range(n): + ctrl = _transform_action(actions[i], params) + data.ctrl[0] = ctrl + + for _ in range(substeps): + scale = _apply_forces(data, data.qvel[0], ctrl, params, backlash_state) + # If in backlash gap, zero ctrl so actuator torque doesn't transmit. + if scale < 1.0: + data.ctrl[0] = 0.0 + else: + data.ctrl[0] = ctrl + mujoco.mj_step(model, data) + + # Bail out on NaN/instability. + if not np.isfinite(data.qpos[0]) or abs(data.qvel[0]) > 1e4: + sim_motor_angle[i:] = np.nan + sim_motor_vel[i:] = np.nan + break + + sim_motor_angle[i] = data.qpos[0] + sim_motor_vel[i] = data.qvel[0] + + return { + "motor_angle": sim_motor_angle, + "motor_vel": sim_motor_vel, + } + + +def windowed_rollout( + asset_path: str | Path, + params: dict[str, float], + recording: dict[str, np.ndarray], + window_duration: float = 0.5, + sim_dt: float = 0.002, + substeps: int = 10, +) -> dict[str, np.ndarray]: + """Multiple-shooting rollout for motor-only sysid. + + Splits the recording into short windows. Each window is initialised + from the real motor state, preventing error accumulation. + + Returns + ------- + dict with motor_angle (N,), motor_vel (N,), n_windows (int). + """ + asset_path = Path(asset_path).resolve() + model = _build_model(asset_path, params) + model.opt.timestep = sim_dt + data = mujoco.MjData(model) + + times = recording["time"] + actions = recording["action"] + real_angle = recording["motor_angle"] + real_vel = recording["motor_vel"] + n = len(actions) + + sim_motor_angle = np.zeros(n, dtype=np.float64) + sim_motor_vel = np.zeros(n, dtype=np.float64) + + # Compute window boundaries. + t0 = times[0] + t_end = times[-1] + window_starts: list[int] = [] + current_t = t0 + while current_t < t_end: + idx = int(np.searchsorted(times, current_t)) + idx = min(idx, n - 1) + window_starts.append(idx) + current_t += window_duration + + n_windows = len(window_starts) + + for w, w_start in enumerate(window_starts): + w_end = window_starts[w + 1] if w + 1 < n_windows else n + + # Init from real state at window start. + mujoco.mj_resetData(model, data) + data.qpos[0] = real_angle[w_start] + data.qvel[0] = real_vel[w_start] + data.ctrl[:] = 0.0 + mujoco.mj_forward(model, data) + + # Backlash state resets each window (assume contact at start). + backlash_state = [0.0] + + for i in range(w_start, w_end): + ctrl = _transform_action(actions[i], params) + data.ctrl[0] = ctrl + + for _ in range(substeps): + scale = _apply_forces(data, data.qvel[0], ctrl, params, backlash_state) + if scale < 1.0: + data.ctrl[0] = 0.0 + else: + data.ctrl[0] = ctrl + mujoco.mj_step(model, data) + + # Bail out on NaN/instability — fill rest of window with last good. + if not np.isfinite(data.qpos[0]) or abs(data.qvel[0]) > 1e4: + sim_motor_angle[i:w_end] = sim_motor_angle[max(i-1, w_start)] + sim_motor_vel[i:w_end] = 0.0 + break + + sim_motor_angle[i] = data.qpos[0] + sim_motor_vel[i] = data.qvel[0] + + return { + "motor_angle": sim_motor_angle, + "motor_vel": sim_motor_vel, + "n_windows": n_windows, + } diff --git a/src/sysid/motor/visualize.py b/src/sysid/motor/visualize.py new file mode 100644 index 0000000..69fa107 --- /dev/null +++ b/src/sysid/motor/visualize.py @@ -0,0 +1,204 @@ +"""Visualise motor system identification — real vs simulated trajectories. + +Usage: + python -m src.sysid.motor.visualize \ + --recording assets/motor/recordings/motor_capture_YYYYMMDD_HHMMSS.npz + + # With tuned result: + python -m src.sysid.motor.visualize \ + --recording .npz \ + --result assets/motor/motor_sysid_result.json +""" + +from __future__ import annotations + +import argparse +import json +from pathlib import Path + +import numpy as np +import structlog + +log = structlog.get_logger() + +_DEFAULT_ASSET = "assets/motor" + + +def visualize( + asset_path: str | Path = _DEFAULT_ASSET, + recording_path: str | Path = "", + result_path: str | Path | None = None, + sim_dt: float = 0.002, + substeps: int = 10, + window_duration: float = 0.5, + save_path: str | Path | None = None, + show: bool = True, +) -> None: + """Generate 3-panel comparison plot: angle, velocity, action.""" + import matplotlib.pyplot as plt + + from src.sysid.motor.rollout import ( + MOTOR_PARAMS, + defaults_vector, + params_to_dict, + rollout, + windowed_rollout, + ) + + asset_path = Path(asset_path).resolve() + recording = dict(np.load(recording_path)) + + t = recording["time"] + actions = recording["action"] + + # ── Simulate with default parameters ───────────────────────── + default_params = params_to_dict(defaults_vector(MOTOR_PARAMS), MOTOR_PARAMS) + log.info("simulating_default_params", windowed=window_duration > 0) + + if window_duration > 0: + sim_default = windowed_rollout( + asset_path=asset_path, + params=default_params, + recording=recording, + window_duration=window_duration, + sim_dt=sim_dt, + substeps=substeps, + ) + else: + sim_default = rollout( + asset_path=asset_path, + params=default_params, + actions=actions, + sim_dt=sim_dt, + substeps=substeps, + ) + + # ── Simulate with tuned parameters (if available) ──────────── + sim_tuned = None + tuned_cost = None + + if result_path is not None: + result_path = Path(result_path) + else: + # Auto-detect. + auto = asset_path / "motor_sysid_result.json" + if auto.exists(): + result_path = auto + + if result_path is not None and result_path.exists(): + result = json.loads(result_path.read_text()) + tuned_params = result.get("best_params", {}) + tuned_cost = result.get("best_cost") + log.info("simulating_tuned_params", cost=tuned_cost) + + if window_duration > 0: + sim_tuned = windowed_rollout( + asset_path=asset_path, + params=tuned_params, + recording=recording, + window_duration=window_duration, + sim_dt=sim_dt, + substeps=substeps, + ) + else: + sim_tuned = rollout( + asset_path=asset_path, + params=tuned_params, + actions=actions, + sim_dt=sim_dt, + substeps=substeps, + ) + + # ── Plot ───────────────────────────────────────────────────── + fig, axes = plt.subplots(3, 1, figsize=(14, 8), sharex=True) + + # Motor angle. + ax = axes[0] + ax.plot(t, np.degrees(recording["motor_angle"]), "k-", lw=1.2, alpha=0.8, label="Real") + ax.plot(t, np.degrees(sim_default["motor_angle"]), "--", color="#d62728", lw=1.0, alpha=0.7, label="Sim (default)") + if sim_tuned is not None: + ax.plot(t, np.degrees(sim_tuned["motor_angle"]), "--", color="#2ca02c", lw=1.0, alpha=0.7, label="Sim (tuned)") + ax.set_ylabel("Motor Angle (°)") + ax.legend(loc="upper right", fontsize=8) + ax.grid(True, alpha=0.3) + + # Motor velocity. + ax = axes[1] + ax.plot(t, recording["motor_vel"], "k-", lw=1.2, alpha=0.8, label="Real") + ax.plot(t, sim_default["motor_vel"], "--", color="#d62728", lw=1.0, alpha=0.7, label="Sim (default)") + if sim_tuned is not None: + ax.plot(t, sim_tuned["motor_vel"], "--", color="#2ca02c", lw=1.0, alpha=0.7, label="Sim (tuned)") + ax.set_ylabel("Motor Velocity (rad/s)") + ax.legend(loc="upper right", fontsize=8) + ax.grid(True, alpha=0.3) + + # Action. + ax = axes[2] + ax.plot(t, actions, "b-", lw=0.8, alpha=0.6) + ax.set_ylabel("Action (norm)") + ax.set_xlabel("Time (s)") + ax.grid(True, alpha=0.3) + ax.set_ylim(-1.1, 1.1) + + # Title. + title = "Motor System Identification — Real vs Simulated" + if tuned_cost is not None: + from src.sysid.motor.optimize import cost_function + + orig_cost = cost_function( + defaults_vector(MOTOR_PARAMS), + recording, + asset_path, + MOTOR_PARAMS, + sim_dt=sim_dt, + substeps=substeps, + window_duration=window_duration, + ) + title += f"\nDefault cost: {orig_cost:.4f} → Tuned cost: {tuned_cost:.4f}" + improvement = (1.0 - tuned_cost / orig_cost) * 100 if orig_cost > 0 else 0 + title += f" ({improvement:+.1f}%)" + + fig.suptitle(title, fontsize=12) + plt.tight_layout() + + if save_path: + fig.savefig(str(save_path), dpi=150, bbox_inches="tight") + log.info("figure_saved", path=str(save_path)) + + if show: + plt.show() + else: + plt.close(fig) + + +# ── CLI ────────────────────────────────────────────────────────────── + + +def main() -> None: + parser = argparse.ArgumentParser( + description="Visualise motor system identification results." + ) + parser.add_argument("--asset-path", type=str, default=_DEFAULT_ASSET) + parser.add_argument("--recording", type=str, required=True, help=".npz file") + parser.add_argument("--result", type=str, default=None, help="sysid result JSON") + parser.add_argument("--sim-dt", type=float, default=0.002) + parser.add_argument("--substeps", type=int, default=10) + parser.add_argument("--window-duration", type=float, default=0.5) + parser.add_argument("--save", type=str, default=None, help="Save figure path") + parser.add_argument("--no-show", action="store_true") + args = parser.parse_args() + + visualize( + asset_path=args.asset_path, + recording_path=args.recording, + result_path=args.result, + sim_dt=args.sim_dt, + substeps=args.substeps, + window_duration=args.window_duration, + save_path=args.save, + show=not args.no_show, + ) + + +if __name__ == "__main__": + main() diff --git a/src/sysid/optimize.py b/src/sysid/optimize.py index 1ffc03f..4a4f931 100644 --- a/src/sysid/optimize.py +++ b/src/sysid/optimize.py @@ -4,10 +4,15 @@ Minimises the trajectory-matching cost between a MuJoCo rollout and a recorded real-robot sequence. Uses the ``cmaes`` package (pure-Python CMA-ES with native box-constraint support). +Motor parameters are **locked** from the motor-only sysid — only +pendulum/arm inertial parameters, joint dynamics, and ctrl_limit are +optimised. Velocities are optionally preprocessed with Savitzky-Golay +differentiation for cleaner targets. + Usage: python -m src.sysid.optimize \ --robot-path assets/rotary_cartpole \ - --recording assets/rotary_cartpole/recordings/capture_20260311_120000.npz + --recording assets/rotary_cartpole/recordings/capture_20260314_000435.npz # Shorter run for testing: python -m src.sysid.optimize \ @@ -28,6 +33,8 @@ import numpy as np import structlog from src.sysid.rollout import ( + LOCKED_MOTOR_PARAMS, + PARAM_SETS, ROTARY_CARTPOLE_PARAMS, ParamSpec, bounds_arrays, @@ -40,6 +47,65 @@ from src.sysid.rollout import ( log = structlog.get_logger() +# ── Velocity preprocessing ─────────────────────────────────────────── + + +def _preprocess_recording( + recording: dict[str, np.ndarray], + preprocess_vel: bool = True, + sg_window: int = 7, + sg_polyorder: int = 3, +) -> dict[str, np.ndarray]: + """Optionally recompute velocities using Savitzky-Golay differentiation. + + Applies SG filtering to both motor_vel and pendulum_vel, replacing + the noisy firmware finite-difference velocities with smooth + analytical derivatives of the (clean) angle signals. + """ + if not preprocess_vel: + return recording + + from scipy.signal import savgol_filter + + rec = dict(recording) + times = rec["time"] + dt = float(np.mean(np.diff(times))) + + # Motor velocity. + rec["motor_vel_raw"] = rec["motor_vel"].copy() + rec["motor_vel"] = savgol_filter( + rec["motor_angle"], + window_length=sg_window, + polyorder=sg_polyorder, + deriv=1, + delta=dt, + ) + + # Pendulum velocity. + rec["pendulum_vel_raw"] = rec["pendulum_vel"].copy() + rec["pendulum_vel"] = savgol_filter( + rec["pendulum_angle"], + window_length=sg_window, + polyorder=sg_polyorder, + deriv=1, + delta=dt, + ) + + motor_noise = np.std(rec["motor_vel_raw"] - rec["motor_vel"]) + pend_noise = np.std(rec["pendulum_vel_raw"] - rec["pendulum_vel"]) + log.info( + "velocity_preprocessed", + method="savgol", + sg_window=sg_window, + sg_polyorder=sg_polyorder, + dt_ms=f"{dt*1000:.1f}", + motor_noise_std=f"{motor_noise:.3f} rad/s", + pend_noise_std=f"{pend_noise:.3f} rad/s", + ) + + return rec + + # ── Cost function ──────────────────────────────────────────────────── @@ -63,18 +129,35 @@ def _compute_trajectory_cost( recording: dict[str, np.ndarray], pos_weight: float = 1.0, vel_weight: float = 0.1, + pendulum_scale: float = 3.0, + vel_outlier_threshold: float = 20.0, ) -> float: - """Weighted MSE between sim and real trajectories.""" + """Weighted MSE between sim and real trajectories. + + pendulum_scale multiplies the pendulum terms relative to motor terms. + + Samples where the *real* pendulum velocity exceeds + ``vel_outlier_threshold`` (rad/s) are excluded from the velocity + terms. These are encoder-wrap artefacts (pendulum swinging past + vertical) that no simulator can reproduce. + """ motor_err = _angle_diff(sim["motor_angle"], recording["motor_angle"]) pend_err = _angle_diff(sim["pendulum_angle"], recording["pendulum_angle"]) motor_vel_err = sim["motor_vel"] - recording["motor_vel"] pend_vel_err = sim["pendulum_vel"] - recording["pendulum_vel"] + # Mask out encoder-wrap velocity spikes so the optimizer doesn't + # waste capacity fitting artefacts. + valid = np.abs(recording["pendulum_vel"]) < vel_outlier_threshold + if valid.sum() < len(valid): + motor_vel_err = motor_vel_err[valid] + pend_vel_err = pend_vel_err[valid] + return float( pos_weight * np.mean(motor_err**2) - + pos_weight * np.mean(pend_err**2) + + pos_weight * pendulum_scale * np.mean(pend_err**2) + vel_weight * np.mean(motor_vel_err**2) - + vel_weight * np.mean(pend_vel_err**2) + + vel_weight * pendulum_scale * np.mean(pend_vel_err**2) ) @@ -87,7 +170,9 @@ def cost_function( substeps: int = 10, pos_weight: float = 1.0, vel_weight: float = 0.1, + pendulum_scale: float = 3.0, window_duration: float = 0.5, + motor_params: dict[str, float] | None = None, ) -> float: """Compute trajectory-matching cost for a candidate parameter vector. @@ -113,21 +198,56 @@ def cost_function( window_duration=window_duration, sim_dt=sim_dt, substeps=substeps, + motor_params=motor_params, ) else: sim = rollout( robot_path=robot_path, params=params, actions=recording["action"], - timesteps=recording["time"], sim_dt=sim_dt, substeps=substeps, + motor_params=motor_params, ) except Exception as exc: log.warning("rollout_failed", error=str(exc)) return 1e6 - return _compute_trajectory_cost(sim, recording, pos_weight, vel_weight) + # Check for NaN in sim output. + for key in ("motor_angle", "motor_vel", "pendulum_angle", "pendulum_vel"): + if np.any(~np.isfinite(sim[key])): + return 1e6 + + return _compute_trajectory_cost(sim, recording, pos_weight, vel_weight, pendulum_scale) + + +# ── Parallel evaluation helper (module-level for pickling) ─────────── + +# Shared state set by the parent process before spawning workers. +_par_recording: dict[str, np.ndarray] = {} +_par_robot_path: Path = Path(".") +_par_specs: list[ParamSpec] = [] +_par_kwargs: dict = {} + + +def _init_worker(recording, robot_path, specs, kwargs): + """Initialiser run once per worker process.""" + global _par_recording, _par_robot_path, _par_specs, _par_kwargs + _par_recording = recording + _par_robot_path = robot_path + _par_specs = specs + _par_kwargs = kwargs + + +def _eval_candidate(x_natural: np.ndarray) -> float: + """Evaluate a single candidate — called in worker processes.""" + return cost_function( + x_natural, + _par_recording, + _par_robot_path, + _par_specs, + **_par_kwargs, + ) # ── CMA-ES optimisation loop ──────────────────────────────────────── @@ -144,17 +264,25 @@ def optimize( substeps: int = 10, pos_weight: float = 1.0, vel_weight: float = 0.1, + pendulum_scale: float = 3.0, window_duration: float = 0.5, seed: int = 42, + preprocess_vel: bool = True, + sg_window: int = 7, + sg_polyorder: int = 3, ) -> dict: """Run CMA-ES optimisation and return results. + Motor parameters are locked from the motor-only sysid. + Only pendulum/arm parameters are optimised. + Returns a dict with: best_params: dict[str, float] best_cost: float history: list of (generation, best_cost) tuples recording: str (path used) specs: list of param names + motor_params: dict of locked motor params """ from cmaes import CMA @@ -164,8 +292,24 @@ def optimize( if specs is None: specs = ROTARY_CARTPOLE_PARAMS + motor_params = dict(LOCKED_MOTOR_PARAMS) + log.info( + "motor_params_locked", + n_params=len(motor_params), + gear_avg=f"{(motor_params['actuator_gear_pos'] + motor_params['actuator_gear_neg']) / 2:.4f}", + ) + # Load recording. recording = dict(np.load(recording_path)) + + # Preprocessing: SG velocity recomputation. + recording = _preprocess_recording( + recording, + preprocess_vel=preprocess_vel, + sg_window=sg_window, + sg_polyorder=sg_polyorder, + ) + n_samples = len(recording["time"]) duration = recording["time"][-1] - recording["time"][0] n_windows = max(1, int(duration / window_duration)) if window_duration > 0 else 1 @@ -219,28 +363,54 @@ def optimize( t0 = time.monotonic() + # ── Parallel evaluation setup ──────────────────────────────── + # Each candidate is independent — evaluate them in parallel using + # a process pool. Falls back to sequential if n_workers=1. + import multiprocessing as mp + n_workers = max(1, mp.cpu_count() - 1) # leave 1 core free + + eval_kwargs = dict( + sim_dt=sim_dt, + substeps=substeps, + pos_weight=pos_weight, + vel_weight=vel_weight, + pendulum_scale=pendulum_scale, + window_duration=window_duration, + motor_params=motor_params, + ) + + log.info("parallel_workers", n_workers=n_workers) + + # Create a persistent pool (avoids per-generation fork overhead). + pool = None + if n_workers > 1: + pool = mp.Pool( + n_workers, + initializer=_init_worker, + initargs=(recording, robot_path, specs, eval_kwargs), + ) + for gen in range(max_generations): - solutions = [] + # Ask all candidates first. + candidates_normed = [] + candidates_natural = [] for _ in range(optimizer.population_size): x_normed = optimizer.ask() x_natural = from_normed(x_normed) - - # Clip to bounds (CMA-ES can slightly exceed with sampling noise). x_natural = np.clip(x_natural, lo, hi) + candidates_normed.append(x_normed) + candidates_natural.append(x_natural) - c = cost_function( - x_natural, - recording, - robot_path, - specs, - sim_dt=sim_dt, - substeps=substeps, - pos_weight=pos_weight, - vel_weight=vel_weight, - window_duration=window_duration, - ) - solutions.append((x_normed, c)) + # Evaluate in parallel. + if pool is not None: + costs = pool.map(_eval_candidate, candidates_natural) + else: + costs = [cost_function( + x, recording, robot_path, specs, **eval_kwargs + ) for x in candidates_natural] + solutions = list(zip(candidates_normed, costs)) + for x_natural, c in zip(candidates_natural, costs): if c < best_cost: best_cost = c best_params_vec = x_natural.copy() @@ -259,6 +429,12 @@ def optimize( ) total_time = time.monotonic() - t0 + + # Clean up process pool. + if pool is not None: + pool.close() + pool.join() + best_params = params_to_dict(best_params_vec, specs) log.info( @@ -289,6 +465,8 @@ def optimize( "recording": str(recording_path), "param_names": [s.name for s in specs], "defaults": {s.name: s.default for s in specs}, + "motor_params": motor_params, + "preprocess_vel": preprocess_vel, "timestamp": datetime.now().isoformat(), } @@ -319,6 +497,8 @@ def main() -> None: parser.add_argument("--substeps", type=int, default=10) parser.add_argument("--pos-weight", type=float, default=1.0) parser.add_argument("--vel-weight", type=float, default=0.1) + parser.add_argument("--pendulum-scale", type=float, default=3.0, + help="Multiplier for pendulum terms relative to motor (default 3.0)") parser.add_argument( "--window-duration", type=float, @@ -331,11 +511,30 @@ def main() -> None: action="store_true", help="Skip exporting tuned files (results JSON only)", ) + parser.add_argument( + "--no-preprocess-vel", + action="store_true", + help="Skip Savitzky-Golay velocity preprocessing", + ) + parser.add_argument("--sg-window", type=int, default=7, + help="Savitzky-Golay window length (odd, default 7)") + parser.add_argument("--sg-polyorder", type=int, default=3, + help="Savitzky-Golay polynomial order (default 3)") + parser.add_argument( + "--param-set", + type=str, + default="full", + choices=list(PARAM_SETS.keys()), + help="Parameter set to optimize: 'reduced' (6 params, fast) or 'full' (15 params)", + ) args = parser.parse_args() + specs = PARAM_SETS[args.param_set] + result = optimize( robot_path=args.robot_path, recording_path=args.recording, + specs=specs, sigma0=args.sigma0, population_size=args.population_size, max_generations=args.max_generations, @@ -343,8 +542,12 @@ def main() -> None: substeps=args.substeps, pos_weight=args.pos_weight, vel_weight=args.vel_weight, + pendulum_scale=args.pendulum_scale, window_duration=args.window_duration, seed=args.seed, + preprocess_vel=not args.no_preprocess_vel, + sg_window=args.sg_window, + sg_polyorder=args.sg_polyorder, ) # Save results JSON. @@ -369,6 +572,7 @@ def main() -> None: export_tuned_files( robot_path=args.robot_path, params=result["best_params"], + motor_params=result.get("motor_params"), ) diff --git a/src/sysid/rollout.py b/src/sysid/rollout.py index cbdcf53..7ac2e96 100644 --- a/src/sysid/rollout.py +++ b/src/sysid/rollout.py @@ -6,22 +6,51 @@ the simulated trajectory for comparison with the real recording. This module is the inner loop of the CMA-ES optimizer: it is called once per candidate parameter vector per generation. + +Motor parameters are **locked** from the motor-only sysid result. +The optimizer only fits +pendulum/arm inertial parameters, pendulum joint dynamics, and +``ctrl_limit``. The asymmetric motor model (deadzone, gear compensation, +Coulomb friction, viscous damping, quadratic drag, back-EMF) is applied +via ``ActuatorConfig.transform_ctrl()`` and ``compute_motor_force()``. """ from __future__ import annotations -import copy import dataclasses -import math +import os import tempfile import xml.etree.ElementTree as ET from pathlib import Path -from typing import Any import mujoco import numpy as np import yaml +from src.core.robot import ActuatorConfig, JointConfig, RobotConfig +from src.runners.mujoco import ActuatorLimits, load_mujoco_model +from src.sysid._urdf import patch_link_inertials + + +# ── Locked motor parameters (from motor-only sysid) ───────────────── +# These are FIXED and not optimised. They come from the 12-param model +# in robot.yaml (from motor-only sysid, cost 0.862). + +LOCKED_MOTOR_PARAMS: dict[str, float] = { + "actuator_gear_pos": 0.424182, + "actuator_gear_neg": 0.425031, + "actuator_filter_tau": 0.00503506, + "motor_damping_pos": 0.00202682, + "motor_damping_neg": 0.0146651, + "motor_armature": 0.00277342, + "motor_frictionloss_pos": 0.0573282, + "motor_frictionloss_neg": 0.0533549, + "viscous_quadratic": 0.000285329, + "back_emf_gain": 0.00675809, + "motor_deadzone_pos": 0.141291, + "motor_deadzone_neg": 0.0780148, +} + # ── Tunable parameter specification ────────────────────────────────── @@ -37,33 +66,58 @@ class ParamSpec: log_scale: bool = False # optimise in log-space (masses, inertias) -# Default parameter specs for the rotary cartpole. +# Pendulum sysid parameters — motor params are LOCKED (not here). # Order matters: the optimizer maps a flat vector to these specs. +# Defaults are from the URDF exported by Fusion 360. ROTARY_CARTPOLE_PARAMS: list[ParamSpec] = [ # ── Arm link (URDF) ────────────────────────────────────────── - ParamSpec("arm_mass", 0.010, 0.003, 0.05, log_scale=True), - ParamSpec("arm_com_x", 0.00005, -0.02, 0.02), - ParamSpec("arm_com_y", 0.0065, -0.01, 0.02), - ParamSpec("arm_com_z", 0.00563, -0.01, 0.02), + ParamSpec("arm_mass", 0.02110, 0.005, 0.08, log_scale=True), + ParamSpec("arm_com_x", -0.00710, -0.03, 0.03), + ParamSpec("arm_com_y", 0.00085, -0.02, 0.02), + ParamSpec("arm_com_z", 0.00795, -0.02, 0.02), # ── Pendulum link (URDF) ───────────────────────────────────── - ParamSpec("pendulum_mass", 0.015, 0.005, 0.05, log_scale=True), - ParamSpec("pendulum_com_x", 0.1583, 0.05, 0.25), - ParamSpec("pendulum_com_y", -0.0983, -0.20, 0.0), - ParamSpec("pendulum_com_z", 0.0, -0.05, 0.05), - ParamSpec("pendulum_ixx", 6.16e-06, 1e-07, 1e-04, log_scale=True), - ParamSpec("pendulum_iyy", 6.16e-06, 1e-07, 1e-04, log_scale=True), - ParamSpec("pendulum_izz", 1.23e-05, 1e-07, 1e-04, log_scale=True), - ParamSpec("pendulum_ixy", 6.10e-06, -1e-04, 1e-04), - # ── Actuator / joint dynamics (robot.yaml) ─────────────────── - ParamSpec("actuator_gear", 0.064, 0.01, 0.2, log_scale=True), - ParamSpec("actuator_filter_tau", 0.03, 0.005, 0.15), - ParamSpec("motor_damping", 0.003, 1e-4, 0.05, log_scale=True), - ParamSpec("pendulum_damping", 0.0001, 1e-5, 0.01, log_scale=True), - ParamSpec("motor_armature", 0.0001, 1e-5, 0.01, log_scale=True), - ParamSpec("motor_frictionloss", 0.03, 0.001, 0.1, log_scale=True), + ParamSpec("pendulum_mass", 0.03937, 0.010, 0.10, log_scale=True), + ParamSpec("pendulum_com_x", 0.06025, 0.01, 0.15), + ParamSpec("pendulum_com_y", -0.07602, -0.20, 0.0), + ParamSpec("pendulum_com_z", -0.00346, -0.05, 0.05), + ParamSpec("pendulum_ixx", 6.20e-05, 1e-07, 1e-03, log_scale=True), + ParamSpec("pendulum_iyy", 3.70e-05, 1e-07, 1e-03, log_scale=True), + ParamSpec("pendulum_izz", 7.83e-05, 1e-07, 1e-03, log_scale=True), + ParamSpec("pendulum_ixy", -6.93e-06, -1e-03, 1e-03), + # ── Pendulum joint dynamics ────────────────────────────────── + ParamSpec("pendulum_damping", 0.0001, 1e-6, 0.05, log_scale=True), + ParamSpec("pendulum_frictionloss", 0.0001, 1e-6, 0.05, log_scale=True), + # ── Hardware realism (control pipeline) ──────────────────── + ParamSpec("ctrl_limit", 0.588, 0.45, 0.70), # MAX_MOTOR_SPEED / 255 ] +# Extended set: full params + motor armature (compensates for the +# motor-only sysid having captured arm/pendulum loading in armature). +EXTENDED_PARAMS: list[ParamSpec] = ROTARY_CARTPOLE_PARAMS + [ + ParamSpec("motor_armature", 0.00277, 0.0005, 0.02, log_scale=True), +] + + +# Reduced set: only the 6 most impactful pendulum parameters. +# Good for a fast first pass — converges in ~50 generations. +REDUCED_PARAMS: list[ParamSpec] = [ + ParamSpec("pendulum_mass", 0.03937, 0.010, 0.10, log_scale=True), + ParamSpec("pendulum_com_x", 0.06025, 0.01, 0.15), + ParamSpec("pendulum_com_y", -0.07602, -0.20, 0.0), + ParamSpec("pendulum_ixx", 6.20e-05, 1e-07, 1e-03, log_scale=True), + ParamSpec("pendulum_damping", 0.0001, 1e-6, 0.05, log_scale=True), + ParamSpec("pendulum_frictionloss", 0.0001, 1e-6, 0.05, log_scale=True), +] + + +PARAM_SETS: dict[str, list[ParamSpec]] = { + "full": ROTARY_CARTPOLE_PARAMS, + "extended": EXTENDED_PARAMS, + "reduced": REDUCED_PARAMS, +} + + def params_to_dict( values: np.ndarray, specs: list[ParamSpec] | None = None ) -> dict[str, float]: @@ -97,182 +151,93 @@ def bounds_arrays( def _build_model( robot_path: Path, params: dict[str, float], -) -> mujoco.MjModel: - """Build a MuJoCo model from URDF + robot.yaml with parameter overrides. + motor_params: dict[str, float] | None = None, +) -> tuple[mujoco.MjModel, ActuatorConfig]: + """Build a MuJoCo model with sysid overrides. - Follows the same two-step approach as ``MuJoCoRunner._load_model()``: - 1. Parse URDF, inject meshdir, load into MuJoCo - 2. Export MJCF, inject actuators + joint overrides + param overrides, reload + Returns (model, actuator) — use ``actuator.transform_ctrl()`` and + ``actuator.compute_motor_force()`` in the rollout loop. """ + if motor_params is None: + motor_params = LOCKED_MOTOR_PARAMS + robot_path = Path(robot_path).resolve() + + # ── Patch URDF inertial parameters to a temp file ──────────── robot_yaml = yaml.safe_load((robot_path / "robot.yaml").read_text()) urdf_path = robot_path / robot_yaml["urdf"] - # ── Step 1: Load URDF ──────────────────────────────────────── tree = ET.parse(urdf_path) - root = tree.getroot() + patch_link_inertials(tree.getroot(), params) - # Inject meshdir compiler directive. - meshdir = None - for mesh_el in root.iter("mesh"): - fn = mesh_el.get("filename", "") - parent = str(Path(fn).parent) - if parent and parent != ".": - meshdir = parent - break - if meshdir: - mj_ext = ET.SubElement(root, "mujoco") - ET.SubElement( - mj_ext, "compiler", attrib={"meshdir": meshdir, "balanceinertia": "true"} - ) + fd, tmp_urdf = tempfile.mkstemp( + suffix=".urdf", prefix="_sysid_", dir=str(robot_path), + ) + os.close(fd) + tmp_urdf_path = Path(tmp_urdf) + tree.write(str(tmp_urdf_path), xml_declaration=True, encoding="unicode") - # Override URDF inertial parameters BEFORE MuJoCo loading. - for link in root.iter("link"): - link_name = link.get("name", "") - inertial = link.find("inertial") - if inertial is None: - continue + # ── Build RobotConfig with full motor sysid values ─────────── + gear_pos = motor_params.get("actuator_gear_pos", 0.424182) + gear_neg = motor_params.get("actuator_gear_neg", 0.425031) + motor_armature = params.get( + "motor_armature", + motor_params.get("motor_armature", 0.00277342), + ) + pend_damping = params.get("pendulum_damping", 0.0001) + pend_frictionloss = params.get("pendulum_frictionloss", 0.0001) - if link_name == "arm": - _set_mass(inertial, params.get("arm_mass")) - _set_com( - inertial, - params.get("arm_com_x"), - params.get("arm_com_y"), - params.get("arm_com_z"), - ) + act_cfg = robot_yaml["actuators"][0] + ctrl_lo, ctrl_hi = act_cfg.get("ctrl_range", [-1.0, 1.0]) - elif link_name == "pendulum": - _set_mass(inertial, params.get("pendulum_mass")) - _set_com( - inertial, - params.get("pendulum_com_x"), - params.get("pendulum_com_y"), - params.get("pendulum_com_z"), - ) - _set_inertia( - inertial, - ixx=params.get("pendulum_ixx"), - iyy=params.get("pendulum_iyy"), - izz=params.get("pendulum_izz"), - ixy=params.get("pendulum_ixy"), - ) + actuator = ActuatorConfig( + joint=act_cfg["joint"], + type="motor", + gear=(gear_pos, gear_neg), + ctrl_range=(ctrl_lo, ctrl_hi), + deadzone=( + motor_params.get("motor_deadzone_pos", 0.141), + motor_params.get("motor_deadzone_neg", 0.078), + ), + damping=( + motor_params.get("motor_damping_pos", 0.002), + motor_params.get("motor_damping_neg", 0.015), + ), + frictionloss=( + motor_params.get("motor_frictionloss_pos", 0.057), + motor_params.get("motor_frictionloss_neg", 0.053), + ), + filter_tau=motor_params.get("actuator_filter_tau", 0.005), + viscous_quadratic=motor_params.get("viscous_quadratic", 0.000285), + back_emf_gain=motor_params.get("back_emf_gain", 0.00676), + ) + + robot = RobotConfig( + urdf_path=tmp_urdf_path, + actuators=[actuator], + joints={ + "motor_joint": JointConfig( + damping=0.0, + armature=motor_armature, + frictionloss=0.0, + ), + "pendulum_joint": JointConfig( + damping=pend_damping, + frictionloss=pend_frictionloss, + ), + }, + ) - # Write temp URDF and load. - tmp_urdf = robot_path / "_tmp_sysid_load.urdf" - tree.write(str(tmp_urdf), xml_declaration=True, encoding="unicode") try: - model_raw = mujoco.MjModel.from_xml_path(str(tmp_urdf)) + model = load_mujoco_model(robot) finally: - tmp_urdf.unlink(missing_ok=True) + tmp_urdf_path.unlink(missing_ok=True) - # ── Step 2: Export MJCF, inject actuators + overrides ──────── - tmp_mjcf = robot_path / "_tmp_sysid_inject.xml" - try: - mujoco.mj_saveLastXML(str(tmp_mjcf), model_raw) - mjcf_root = ET.fromstring(tmp_mjcf.read_text()) - - # Actuator. - gear = params.get("actuator_gear", robot_yaml["actuators"][0].get("gear", 0.064)) - filter_tau = params.get( - "actuator_filter_tau", - robot_yaml["actuators"][0].get("filter_tau", 0.03), - ) - act_cfg = robot_yaml["actuators"][0] - ctrl_lo, ctrl_hi = act_cfg.get("ctrl_range", [-1.0, 1.0]) - - act_elem = ET.SubElement(mjcf_root, "actuator") - attribs: dict[str, str] = { - "name": f"{act_cfg['joint']}_motor", - "joint": act_cfg["joint"], - "gear": str(gear), - "ctrlrange": f"{ctrl_lo} {ctrl_hi}", - } - if filter_tau > 0: - attribs["dyntype"] = "filter" - attribs["dynprm"] = str(filter_tau) - attribs["gaintype"] = "fixed" - attribs["biastype"] = "none" - ET.SubElement(act_elem, "general", attrib=attribs) - else: - ET.SubElement(act_elem, "motor", attrib=attribs) - - # Joint overrides. - motor_damping = params.get("motor_damping", 0.003) - pend_damping = params.get("pendulum_damping", 0.0001) - motor_armature = params.get("motor_armature", 0.0001) - motor_frictionloss = params.get("motor_frictionloss", 0.03) - - for body in mjcf_root.iter("body"): - for jnt in body.findall("joint"): - name = jnt.get("name") - if name == "motor_joint": - jnt.set("damping", str(motor_damping)) - jnt.set("armature", str(motor_armature)) - jnt.set("frictionloss", str(motor_frictionloss)) - elif name == "pendulum_joint": - jnt.set("damping", str(pend_damping)) - - # Disable self-collision. - for geom in mjcf_root.iter("geom"): - geom.set("contype", "0") - geom.set("conaffinity", "0") - - modified_xml = ET.tostring(mjcf_root, encoding="unicode") - tmp_mjcf.write_text(modified_xml) - model = mujoco.MjModel.from_xml_path(str(tmp_mjcf)) - finally: - tmp_mjcf.unlink(missing_ok=True) - - return model + return model, actuator -def _set_mass(inertial: ET.Element, mass: float | None) -> None: - if mass is None: - return - mass_el = inertial.find("mass") - if mass_el is not None: - mass_el.set("value", str(mass)) -def _set_com( - inertial: ET.Element, - x: float | None, - y: float | None, - z: float | None, -) -> None: - origin = inertial.find("origin") - if origin is None: - return - xyz = origin.get("xyz", "0 0 0").split() - if x is not None: - xyz[0] = str(x) - if y is not None: - xyz[1] = str(y) - if z is not None: - xyz[2] = str(z) - origin.set("xyz", " ".join(xyz)) - - -def _set_inertia( - inertial: ET.Element, - ixx: float | None = None, - iyy: float | None = None, - izz: float | None = None, - ixy: float | None = None, - iyz: float | None = None, - ixz: float | None = None, -) -> None: - ine = inertial.find("inertia") - if ine is None: - return - for attr, val in [ - ("ixx", ixx), ("iyy", iyy), ("izz", izz), - ("ixy", ixy), ("iyz", iyz), ("ixz", ixz), - ]: - if val is not None: - ine.set(attr, str(val)) - # ── Simulation rollout ─────────────────────────────────────────────── @@ -281,72 +246,58 @@ def rollout( robot_path: str | Path, params: dict[str, float], actions: np.ndarray, - timesteps: np.ndarray, sim_dt: float = 0.002, substeps: int = 10, + motor_params: dict[str, float] | None = None, ) -> dict[str, np.ndarray]: """Replay recorded actions in MuJoCo with overridden parameters. Parameters ---------- robot_path : asset directory - params : named parameter overrides + params : named parameter overrides (pendulum/arm only) actions : (N,) normalised actions [-1, 1] from the recording - timesteps : (N,) wall-clock times (seconds) from the recording sim_dt : MuJoCo physics timestep substeps : physics substeps per control step + motor_params : locked motor params (default: LOCKED_MOTOR_PARAMS) Returns ------- dict with keys: motor_angle, motor_vel, pendulum_angle, pendulum_vel Each is an (N,) numpy array of simulated values. """ + if motor_params is None: + motor_params = LOCKED_MOTOR_PARAMS + robot_path = Path(robot_path).resolve() - model = _build_model(robot_path, params) + model, actuator = _build_model(robot_path, params, motor_params) model.opt.timestep = sim_dt data = mujoco.MjData(model) - - # Start from pendulum hanging down (qpos=0 is down per URDF convention). mujoco.mj_resetData(model, data) - # Control dt derived from actual recording sample rate. n = len(actions) - ctrl_dt = sim_dt * substeps + ctrl_limit = params.get("ctrl_limit", 0.588) - # Pre-allocate output. sim_motor_angle = np.zeros(n, dtype=np.float64) sim_motor_vel = np.zeros(n, dtype=np.float64) sim_pend_angle = np.zeros(n, dtype=np.float64) sim_pend_vel = np.zeros(n, dtype=np.float64) - # Extract actuator limit info for software limit switch. - nu = model.nu - if nu > 0: - jnt_id = model.actuator_trnid[0, 0] - jnt_limited = bool(model.jnt_limited[jnt_id]) - jnt_lo = model.jnt_range[jnt_id, 0] - jnt_hi = model.jnt_range[jnt_id, 1] - gear_sign = float(np.sign(model.actuator_gear[0, 0])) - else: - jnt_limited = False - jnt_lo = jnt_hi = gear_sign = 0.0 + limits = ActuatorLimits(model) for i in range(n): - data.ctrl[0] = actions[i] + action = max(-ctrl_limit, min(ctrl_limit, float(actions[i]))) + ctrl = actuator.transform_ctrl(action) + data.ctrl[0] = ctrl for _ in range(substeps): - # Software limit switch (mirrors MuJoCoRunner). - if jnt_limited and nu > 0: - pos = data.qpos[jnt_id] - if pos >= jnt_hi and gear_sign * data.ctrl[0] > 0: - data.ctrl[0] = 0.0 - elif pos <= jnt_lo and gear_sign * data.ctrl[0] < 0: - data.ctrl[0] = 0.0 + limits.enforce(model, data) + data.qfrc_applied[0] = actuator.compute_motor_force(data.qvel[0], ctrl) mujoco.mj_step(model, data) sim_motor_angle[i] = data.qpos[0] - sim_motor_vel[i] = data.qvel[0] sim_pend_angle[i] = data.qpos[1] + sim_motor_vel[i] = data.qvel[0] sim_pend_vel[i] = data.qvel[1] return { @@ -364,6 +315,7 @@ def windowed_rollout( window_duration: float = 0.5, sim_dt: float = 0.002, substeps: int = 10, + motor_params: dict[str, float] | None = None, ) -> dict[str, np.ndarray | float]: """Multiple-shooting rollout — split recording into short windows. @@ -372,18 +324,19 @@ def windowed_rollout( 2. Replay the recorded actions within the window. 3. Record the simulated output. - This prevents error accumulation across the full trajectory, giving - a much smoother cost landscape for the optimizer. + Motor dynamics (asymmetric friction, damping, back-EMF, etc.) are + applied via qfrc_applied using the locked motor sysid parameters. Parameters ---------- robot_path : asset directory - params : named parameter overrides + params : named parameter overrides (pendulum/arm only) recording : dict with keys time, action, motor_angle, motor_vel, pendulum_angle, pendulum_vel (all 1D arrays of length N) window_duration : length of each shooting window in seconds sim_dt : MuJoCo physics timestep substeps : physics substeps per control step + motor_params : locked motor params (default: LOCKED_MOTOR_PARAMS) Returns ------- @@ -392,8 +345,11 @@ def windowed_rollout( (stitched from per-window simulations) n_windows — number of windows used """ + if motor_params is None: + motor_params = LOCKED_MOTOR_PARAMS + robot_path = Path(robot_path).resolve() - model = _build_model(robot_path, params) + model, actuator = _build_model(robot_path, params, motor_params) model.opt.timestep = sim_dt data = mujoco.MjData(model) @@ -405,67 +361,50 @@ def windowed_rollout( real_pend_vel = recording["pendulum_vel"] n = len(actions) - # Pre-allocate output (stitched from all windows). sim_motor_angle = np.zeros(n, dtype=np.float64) sim_motor_vel = np.zeros(n, dtype=np.float64) sim_pend_angle = np.zeros(n, dtype=np.float64) sim_pend_vel = np.zeros(n, dtype=np.float64) - # Extract actuator limit info. - nu = model.nu - if nu > 0: - jnt_id = model.actuator_trnid[0, 0] - jnt_limited = bool(model.jnt_limited[jnt_id]) - jnt_lo = model.jnt_range[jnt_id, 0] - jnt_hi = model.jnt_range[jnt_id, 1] - gear_sign = float(np.sign(model.actuator_gear[0, 0])) - else: - jnt_limited = False - jnt_lo = jnt_hi = gear_sign = 0.0 + limits = ActuatorLimits(model) - # Compute window boundaries from recording timestamps. t0 = times[0] t_end = times[-1] - window_starts: list[int] = [] # indices into the recording + window_starts: list[int] = [] current_t = t0 while current_t < t_end: - # Find the index closest to current_t. idx = int(np.searchsorted(times, current_t)) idx = min(idx, n - 1) window_starts.append(idx) current_t += window_duration + ctrl_limit = params.get("ctrl_limit", 0.588) n_windows = len(window_starts) for w, w_start in enumerate(window_starts): - # Window end: next window start, or end of recording. w_end = window_starts[w + 1] if w + 1 < n_windows else n - # Initialize MuJoCo state from real data at window start. mujoco.mj_resetData(model, data) data.qpos[0] = real_motor[w_start] data.qpos[1] = real_pend[w_start] data.qvel[0] = real_motor_vel[w_start] data.qvel[1] = real_pend_vel[w_start] data.ctrl[:] = 0.0 - # Forward kinematics to make state consistent. mujoco.mj_forward(model, data) for i in range(w_start, w_end): - data.ctrl[0] = actions[i] + action = max(-ctrl_limit, min(ctrl_limit, float(actions[i]))) + ctrl = actuator.transform_ctrl(action) + data.ctrl[0] = ctrl for _ in range(substeps): - if jnt_limited and nu > 0: - pos = data.qpos[jnt_id] - if pos >= jnt_hi and gear_sign * data.ctrl[0] > 0: - data.ctrl[0] = 0.0 - elif pos <= jnt_lo and gear_sign * data.ctrl[0] < 0: - data.ctrl[0] = 0.0 + limits.enforce(model, data) + data.qfrc_applied[0] = actuator.compute_motor_force(data.qvel[0], ctrl) mujoco.mj_step(model, data) sim_motor_angle[i] = data.qpos[0] - sim_motor_vel[i] = data.qvel[0] sim_pend_angle[i] = data.qpos[1] + sim_motor_vel[i] = data.qvel[0] sim_pend_vel[i] = data.qvel[1] return { diff --git a/src/sysid/visualize.py b/src/sysid/visualize.py index 987791c..0aed788 100644 --- a/src/sysid/visualize.py +++ b/src/sysid/visualize.py @@ -20,7 +20,6 @@ from __future__ import annotations import argparse import json -import math from pathlib import Path import numpy as np @@ -29,6 +28,30 @@ import structlog log = structlog.get_logger() +def _run_sim( + robot_path: Path, + params: dict[str, float], + recording: dict[str, np.ndarray], + window_duration: float, + sim_dt: float, + substeps: int, + motor_params: dict[str, float], +) -> dict[str, np.ndarray]: + """Run windowed or open-loop rollout depending on window_duration.""" + from src.sysid.rollout import rollout, windowed_rollout + + if window_duration > 0: + return windowed_rollout( + robot_path=robot_path, params=params, recording=recording, + window_duration=window_duration, sim_dt=sim_dt, + substeps=substeps, motor_params=motor_params, + ) + return rollout( + robot_path=robot_path, params=params, actions=recording["action"], + substeps=substeps, motor_params=motor_params, + ) + + def visualize( robot_path: str | Path, recording_path: str | Path, @@ -39,31 +62,27 @@ def visualize( save_path: str | Path | None = None, show: bool = True, ) -> None: - """Generate comparison plot. - - Parameters - ---------- - robot_path : robot asset directory - recording_path : .npz file from capture - result_path : sysid_result.json with best_params (optional) - sim_dt / substeps : physics settings for rollout - window_duration : shooting window length (s); 0 = open-loop - save_path : if provided, save figure to this path (PNG, PDF, …) - show : if True, display interactive matplotlib window - """ + """Generate comparison plot.""" import matplotlib.pyplot as plt from src.sysid.rollout import ( + LOCKED_MOTOR_PARAMS, ROTARY_CARTPOLE_PARAMS, defaults_vector, params_to_dict, - rollout, - windowed_rollout, ) robot_path = Path(robot_path).resolve() recording = dict(np.load(recording_path)) + motor_params = LOCKED_MOTOR_PARAMS + + sim_kwargs = dict( + robot_path=robot_path, recording=recording, + window_duration=window_duration, sim_dt=sim_dt, + substeps=substeps, motor_params=motor_params, + ) + t = recording["time"] actions = recording["action"] @@ -72,26 +91,15 @@ def visualize( defaults_vector(ROTARY_CARTPOLE_PARAMS), ROTARY_CARTPOLE_PARAMS ) log.info("simulating_default_params", windowed=window_duration > 0) - if window_duration > 0: - sim_default = windowed_rollout( - robot_path=robot_path, - params=default_params, - recording=recording, - window_duration=window_duration, - sim_dt=sim_dt, - substeps=substeps, - ) - else: - sim_default = rollout( - robot_path=robot_path, - params=default_params, - actions=actions, - timesteps=t, - sim_dt=sim_dt, - substeps=substeps, - ) + sim_default = _run_sim(params=default_params, **sim_kwargs) # ── Simulate with tuned parameters (if available) ──────────── + # Resolve result path (explicit or auto-detect). + if result_path is None: + auto = robot_path / "sysid_result.json" + if auto.exists(): + result_path = auto + sim_tuned = None tuned_cost = None if result_path is not None: @@ -101,64 +109,21 @@ def visualize( tuned_params = result.get("best_params", {}) tuned_cost = result.get("best_cost") log.info("simulating_tuned_params", cost=tuned_cost) - if window_duration > 0: - sim_tuned = windowed_rollout( - robot_path=robot_path, - params=tuned_params, - recording=recording, - window_duration=window_duration, - sim_dt=sim_dt, - substeps=substeps, - ) - else: - sim_tuned = rollout( - robot_path=robot_path, - params=tuned_params, - actions=actions, - timesteps=t, - sim_dt=sim_dt, - substeps=substeps, - ) + sim_tuned = _run_sim(params=tuned_params, **sim_kwargs) else: log.warning("result_file_not_found", path=str(result_path)) - else: - # Auto-detect sysid_result.json in robot_path. - auto_result = robot_path / "sysid_result.json" - if auto_result.exists(): - result = json.loads(auto_result.read_text()) - tuned_params = result.get("best_params", {}) - tuned_cost = result.get("best_cost") - log.info("auto_detected_tuned_params", cost=tuned_cost) - if window_duration > 0: - sim_tuned = windowed_rollout( - robot_path=robot_path, - params=tuned_params, - recording=recording, - window_duration=window_duration, - sim_dt=sim_dt, - substeps=substeps, - ) - else: - sim_tuned = rollout( - robot_path=robot_path, - params=tuned_params, - actions=actions, - timesteps=t, - sim_dt=sim_dt, - substeps=substeps, - ) # ── Plot ───────────────────────────────────────────────────── fig, axes = plt.subplots(5, 1, figsize=(14, 12), sharex=True) channels = [ - ("motor_angle", "Motor Angle (rad)", True), - ("motor_vel", "Motor Velocity (rad/s)", False), - ("pendulum_angle", "Pendulum Angle (rad)", True), - ("pendulum_vel", "Pendulum Velocity (rad/s)", False), + ("motor_angle", "Motor Angle (rad)"), + ("motor_vel", "Motor Velocity (rad/s)"), + ("pendulum_angle", "Pendulum Angle (rad)"), + ("pendulum_vel", "Pendulum Velocity (rad/s)"), ] - for ax, (key, ylabel, is_angle) in zip(axes[:4], channels): + for ax, (key, ylabel) in zip(axes[:4], channels): real = recording[key] ax.plot(t, real, "k-", linewidth=1.2, alpha=0.8, label="Real") @@ -207,6 +172,7 @@ def visualize( sim_dt=sim_dt, substeps=substeps, window_duration=window_duration, + motor_params=motor_params, ) title += f"\nOriginal cost: {orig_cost:.4f} → Tuned cost: {tuned_cost:.4f}" improvement = (1.0 - tuned_cost / orig_cost) * 100 if orig_cost > 0 else 0 diff --git a/src/training/trainer.py b/src/training/trainer.py index 72d0202..87ddf75 100644 --- a/src/training/trainer.py +++ b/src/training/trainer.py @@ -8,7 +8,6 @@ import structlog import torch import tqdm from clearml import Logger -from gymnasium import spaces from skrl.agents.torch.ppo import PPO, PPO_DEFAULT_CONFIG from skrl.memories.torch import RandomMemory from skrl.resources.preprocessors.torch import RunningStandardScaler @@ -129,24 +128,17 @@ class VideoRecordingTrainer(SequentialTrainer): frames: list[np.ndarray] = [] obs, _ = self.env.reset() - for _ in range(max_steps): - with torch.no_grad(): + with torch.no_grad(): + for _ in range(max_steps): action = self.agents.act(obs, timestep=timestep, timesteps=self.timesteps)[0] - obs, _, terminated, truncated, _ = self.env.step(action) + obs, _, terminated, truncated, _ = self.env.step(action) - try: frame = self.env.render() - except Exception: - # Headless environment without OpenGL — skip video recording - log.warning("video_recording_disabled", reason="render failed (headless?)") - self.env.reset() - return + if frame is not None: + frames.append(frame) - if frame is not None: - frames.append(frame) - - if (terminated | truncated).any().item(): - break + if (terminated | truncated).any().item(): + break if frames: path = str(self._video_dir / f"step_{timestep}.mp4") @@ -159,7 +151,6 @@ class VideoRecordingTrainer(SequentialTrainer): local_path=path, iteration=timestep, ) - # Restore training state self.env.reset()