✨ add new ppo mjx config

2026-03-09 21:33:42 +01:00
parent 26ccb1e902
commit 9be07d9186
1 changed files with 22 additions and 0 deletions
--- a/configs/training/ppo_mjx.yaml
+++ b/configs/training/ppo_mjx.yaml
@@ -0,0 +1,22 @@
 # PPO tuned for MJX (1024+ parallel envs on GPU).
 # With 1024 envs, each timestep collects 1024 samples, so total_timesteps
 # can be much lower than the CPU config.
 hidden_sizes: [128, 128]
 total_timesteps: 300000       # 300K × 1024 envs ≈ 307M env steps
 rollout_steps: 1024           # PPO batch = 1024 envs × 1024 steps = 1M samples
 learning_epochs: 4
 mini_batches: 32              # keep mini-batch size similar to CPU config (~32K)
 discount_factor: 0.99
 gae_lambda: 0.95
 learning_rate: 0.001          # ~3x higher LR for 16x larger batch (sqrt scaling)
 clip_ratio: 0.2
 value_loss_scale: 0.5
 entropy_loss_scale: 0.05
 log_interval: 100             # log more often (shorter run)
 checkpoint_interval: 10000
 record_video_every: 10000
 # ClearML remote execution (GPU worker)
 remote: false