max_steps: 1000 robot_path: assets/rotary_cartpole reward_upright_scale: 1.0 speed_penalty_scale: 0.1 # ── HPO search ranges ──────────────────────────────────────────────── hpo: reward_upright_scale: {min: 0.5, max: 5.0} speed_penalty_scale: {min: 0.01, max: 1.0} max_steps: {values: [500, 1000, 2000]}