♻️ crazy refactor
This commit is contained in:
23
configs/training/ppo_single.yaml
Normal file
23
configs/training/ppo_single.yaml
Normal file
@@ -0,0 +1,23 @@
|
||||
# PPO tuned for single-env simulation — mimics real hardware training.
|
||||
# Inherits defaults + HPO ranges from ppo.yaml.
|
||||
# Same 50 Hz control (runner=mujoco_single), 1 env, conservative hypers.
|
||||
# Sim runs ~100× faster than real time, so we can afford more timesteps.
|
||||
|
||||
defaults:
|
||||
- ppo
|
||||
- _self_
|
||||
|
||||
hidden_sizes: [256, 256]
|
||||
total_timesteps: 500000
|
||||
learning_epochs: 5
|
||||
learning_rate: 0.001
|
||||
entropy_loss_scale: 0.0001
|
||||
log_interval: 1024
|
||||
checkpoint_interval: 10000
|
||||
initial_log_std: -0.5
|
||||
min_log_std: -4.0
|
||||
max_log_std: 0.0
|
||||
|
||||
record_video_every: 50000
|
||||
|
||||
remote: false
|
||||
Reference in New Issue
Block a user