♻️ crazy refactor

2026-03-11 22:52:01 +01:00
parent 35223b3560
commit 4115447022
34 changed files with 4255 additions and 102 deletions
--- a/configs/training/ppo_mjx.yaml
+++ b/configs/training/ppo_mjx.yaml
@@ -1,22 +1,18 @@
 # PPO tuned for MJX (1024+ parallel envs on GPU).
+# Inherits defaults + HPO ranges from ppo.yaml.
 # With 1024 envs, each timestep collects 1024 samples, so total_timesteps
 # can be much lower than the CPU config.

-hidden_sizes: [128, 128]
-total_timesteps: 300000       # 300K × 1024 envs ≈ 307M env steps
-rollout_steps: 1024           # PPO batch = 1024 envs × 1024 steps = 1M samples
-learning_epochs: 4
-mini_batches: 32              # keep mini-batch size similar to CPU config (~32K)
-discount_factor: 0.99
-gae_lambda: 0.95
-learning_rate: 0.001          # ~3x higher LR for 16x larger batch (sqrt scaling)
-clip_ratio: 0.2
-value_loss_scale: 0.5
-entropy_loss_scale: 0.05
-log_interval: 100             # log more often (shorter run)
+defaults:
+  - ppo
+  - _self_
+
+total_timesteps: 300000         # 300K × 1024 envs ≈ 307M env steps
+mini_batches: 32                # keep mini-batch size similar (~32K)
+learning_rate: 0.001            # ~3x higher LR for 16x larger batch (sqrt scaling)
+log_interval: 100
 checkpoint_interval: 10000

 record_video_every: 10000

-# ClearML remote execution (GPU worker)
 remote: false