max_steps: 1000 robot_path: assets/rotary_cartpole reward_upright_scale: 1.0 # ── Regularisation penalties (prevent fast spinning) ───────────────── motor_vel_penalty: 0.01 # penalise high motor angular velocity motor_angle_penalty: 0.05 # penalise deviation from centre action_penalty: 0.05 # penalise large actions (energy cost) # ── Software safety limit (env-level, always applied) ──────────────── motor_angle_limit_deg: 90.0 # terminate episode if motor exceeds ±90° # ── HPO search ranges ──────────────────────────────────────────────── hpo: reward_upright_scale: {min: 0.5, max: 5.0} motor_vel_penalty: {min: 0.001, max: 0.1} motor_angle_penalty: {min: 0.01, max: 0.2} action_penalty: {min: 0.01, max: 0.2} max_steps: {values: [500, 1000, 2000]}