♻️ crazy refactor

2026-03-11 22:52:01 +01:00
parent 35223b3560
commit 4115447022
34 changed files with 4255 additions and 102 deletions
--- a/configs/training/ppo.yaml
+++ b/configs/training/ppo.yaml
@@ -12,5 +12,23 @@ entropy_loss_scale: 0.05
 log_interval: 1000
 checkpoint_interval: 50000

+initial_log_std: 0.5
+min_log_std: -2.0
+max_log_std: 2.0
+
+record_video_every: 10000
+
 # ClearML remote execution (GPU worker)
 remote: false
+
+# ── HPO search ranges ────────────────────────────────────────────────
+# Read by scripts/hpo.py — ignored by TrainerConfig during training.
+hpo:
+  learning_rate: {min: 0.00005, max: 0.001}
+  clip_ratio: {min: 0.1, max: 0.3}
+  discount_factor: {min: 0.98, max: 0.999}
+  gae_lambda: {min: 0.9, max: 0.99}
+  entropy_loss_scale: {min: 0.0001, max: 0.1}
+  value_loss_scale: {min: 0.1, max: 1.0}
+  learning_epochs: {min: 2, max: 8, type: int}
+  mini_batches: {values: [2, 4, 8, 16]}