✨ add new ppo mjx config
This commit is contained in:
22
configs/training/ppo_mjx.yaml
Normal file
22
configs/training/ppo_mjx.yaml
Normal file
@@ -0,0 +1,22 @@
|
|||||||
|
# PPO tuned for MJX (1024+ parallel envs on GPU).
|
||||||
|
# With 1024 envs, each timestep collects 1024 samples, so total_timesteps
|
||||||
|
# can be much lower than the CPU config.
|
||||||
|
|
||||||
|
hidden_sizes: [128, 128]
|
||||||
|
total_timesteps: 300000 # 300K × 1024 envs ≈ 307M env steps
|
||||||
|
rollout_steps: 1024 # PPO batch = 1024 envs × 1024 steps = 1M samples
|
||||||
|
learning_epochs: 4
|
||||||
|
mini_batches: 32 # keep mini-batch size similar to CPU config (~32K)
|
||||||
|
discount_factor: 0.99
|
||||||
|
gae_lambda: 0.95
|
||||||
|
learning_rate: 0.001 # ~3x higher LR for 16x larger batch (sqrt scaling)
|
||||||
|
clip_ratio: 0.2
|
||||||
|
value_loss_scale: 0.5
|
||||||
|
entropy_loss_scale: 0.05
|
||||||
|
log_interval: 100 # log more often (shorter run)
|
||||||
|
checkpoint_interval: 10000
|
||||||
|
|
||||||
|
record_video_every: 10000
|
||||||
|
|
||||||
|
# ClearML remote execution (GPU worker)
|
||||||
|
remote: false
|
||||||
Reference in New Issue
Block a user