♻️ crazy refactor

2026-03-11 22:52:01 +01:00
parent 35223b3560
commit 4115447022
34 changed files with 4255 additions and 102 deletions
--- a/configs/training/ppo_single.yaml
+++ b/configs/training/ppo_single.yaml
@@ -0,0 +1,23 @@
+# PPO tuned for single-env simulation — mimics real hardware training.
+# Inherits defaults + HPO ranges from ppo.yaml.
+# Same 50 Hz control (runner=mujoco_single), 1 env, conservative hypers.
+# Sim runs ~100× faster than real time, so we can afford more timesteps.
+
+defaults:
+  - ppo
+  - _self_
+
+hidden_sizes: [256, 256]
+total_timesteps: 500000
+learning_epochs: 5
+learning_rate: 0.001
+entropy_loss_scale: 0.0001
+log_interval: 1024
+checkpoint_interval: 10000
+initial_log_std: -0.5
+min_log_std: -4.0
+max_log_std: 0.0
+
+record_video_every: 50000
+
+remote: false