♻️ crazy refactor
This commit is contained in:
@@ -12,5 +12,23 @@ entropy_loss_scale: 0.05
|
||||
log_interval: 1000
|
||||
checkpoint_interval: 50000
|
||||
|
||||
initial_log_std: 0.5
|
||||
min_log_std: -2.0
|
||||
max_log_std: 2.0
|
||||
|
||||
record_video_every: 10000
|
||||
|
||||
# ClearML remote execution (GPU worker)
|
||||
remote: false
|
||||
|
||||
# ── HPO search ranges ────────────────────────────────────────────────
|
||||
# Read by scripts/hpo.py — ignored by TrainerConfig during training.
|
||||
hpo:
|
||||
learning_rate: {min: 0.00005, max: 0.001}
|
||||
clip_ratio: {min: 0.1, max: 0.3}
|
||||
discount_factor: {min: 0.98, max: 0.999}
|
||||
gae_lambda: {min: 0.9, max: 0.99}
|
||||
entropy_loss_scale: {min: 0.0001, max: 0.1}
|
||||
value_loss_scale: {min: 0.1, max: 1.0}
|
||||
learning_epochs: {min: 2, max: 8, type: int}
|
||||
mini_batches: {values: [2, 4, 8, 16]}
|
||||
|
||||
Reference in New Issue
Block a user