Added hyperparameter optimization script

2023-11-27 16:06:05 +00:00
parent c1152ff96c
commit f9e8f9e69f
3 changed files with 106 additions and 9 deletions
--- a/src/trainers/trainer.py
+++ b/src/trainers/trainer.py
@@ -78,8 +78,7 @@ class Trainer:
        self.data_processor = task.connect(self.data_processor, name="data_processor")
        self = task.connect(self, name="trainer")
-        task.delete_parameter("trainer/quantiles")
+        task.delete_parameter("trainer/quantiles", force=True)
        task.connect(self.data_processor.data_config, name="data_features")
    def random_samples(self, train: bool = True, num_samples: int = 10):
        train_loader, test_loader = self.data_processor.get_dataloaders(
--- a/src/training_scripts/autoregressive_quantiles.py
+++ b/src/training_scripts/autoregressive_quantiles.py
@@ -22,14 +22,13 @@ task = clearml_helper.get_task(task_name="None")
 #### Data Processor ####
 data_config = DataConfig()
 data_config.NRV_HISTORY = True
-data_config.LOAD_HISTORY = False
+data_config.LOAD_HISTORY = True
-data_config.LOAD_FORECAST = False
+data_config.LOAD_FORECAST = True
 data_config.WIND_FORECAST = False
 data_config.WIND_HISTORY = False
 data_config.QUARTER = True
-data_config.DAY_OF_WEEK = False
+data_config.DAY_OF_WEEK = True
 data_config = task.connect(data_config, name="data_features")
 data_processor = DataProcessor(data_config, path="")
 data_processor.set_batch_size(1024)
@@ -48,7 +47,6 @@ if quantiles is None:
    quantiles = [0.01, 0.05, 0.1, 0.15, 0.3, 0.4, 0.5, 0.6, 0.7, 0.85, 0.9, 0.95, 0.99]
    task.set_parameter("general/quantiles", quantiles)
 # model = LinearRegression(inputDim, len(quantiles))
 time_embedding = TimeEmbedding(data_processor.get_time_feature_size(), 4)
 non_linear_regression_model = NonLinearRegression(time_embedding.output_dim(inputDim), len(quantiles), hiddenSize=1024, numLayers=5)
--- a/src/training_scripts/hyperparameter_optimizer.py
+++ b/src/training_scripts/hyperparameter_optimizer.py
@@ -0,0 +1,100 @@
 import logging
 from clearml import Task
 from clearml.automation import HyperParameterOptimizer
 from clearml.automation.optuna import OptimizerOptuna
 from clearml.automation import (
    DiscreteParameterRange, HyperParameterOptimizer, RandomSearch,
    UniformIntegerParameterRange)
 from src.data.preprocessing import DataConfig
 # trying to load Bayesian optimizer package
 try:
    from clearml.automation.optuna import OptimizerOptuna  # noqa
    aSearchStrategy = OptimizerOptuna
 except ImportError as ex:
    try:
        from clearml.automation.hpbandster import OptimizerBOHB  # noqa
        aSearchStrategy = OptimizerBOHB
    except ImportError as ex:
        logging.getLogger().warning(
            'Apologies, it seems you do not have \'optuna\' or \'hpbandster\' installed, '
            'we will be using RandomSearch strategy instead')
        aSearchStrategy = RandomSearch
 # input task id to optimize
 input_task_id = input("Please enter the task id to optimize: ")
 # check if task id is valid
 if not Task.get_task(task_id=input_task_id):
    raise ValueError("Invalid task id")
 task = Task.init(project_name='Hyper-Parameter Optimization',
                 task_name='Automatic Hyper-Parameter Optimization',
                 task_type=Task.TaskTypes.optimizer,
                 reuse_last_task_id=False)
 execution_queue = "default"
 ### HYPER PARAMETERS ###
 #### Quantiles ####
 quantile_lists = [
    [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9],  # Deciles
    [0.25, 0.5, 0.75],  # Quartiles
    [0.05, 0.15, 0.25, 0.35, 0.45, 0.55, 0.65, 0.75, 0.85, 0.95],  # 10% Increments, Excluding Extremes
    [0.01, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.99],  # Combining Deciles with Extremes
    [0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1],  # Including 0 and 1
    [0.01, 0.05, 0.1, 0.25, 0.5, 0.75, 0.9, 0.95, 0.99],  # Mixed Small and Large Increments
    [0.2, 0.4, 0.6, 0.8],  # 20% Increments
    [0.125, 0.375, 0.625, 0.875],  # Eighths
    [0.05, 0.10, 0.20, 0.30, 0.40, 0.50, 0.60, 0.70, 0.80, 0.90],  # 10% Increments
    [0.01, 0.02, 0.03, 0.04, 0.05, 0.1, 0.15, 0.2, 0.3, 0.5]  # Mixed Fine and Coarser Increments
 ]
 quantiles_range = DiscreteParameterRange("general/quantiles", values=quantile_lists)
 ### OPTIMIZER OBJECT ###
 optimizer = HyperParameterOptimizer(
    base_task_id=input_task_id,
    objective_metric_title="PinballLoss",
    objective_metric_series="test",
    objective_metric_sign="min",
    execution_queue=execution_queue,
    max_number_of_concurrent_tasks=1,
    optimizer_class=aSearchStrategy,
    # save_top_k_tasks_only=3,
    pool_period_min=0.2,
    total_max_jobs=15,
    hyper_parameters=[
        quantiles_range,
    ]
 )
 task.execute_remotely(queue_name="hypertuning", exit_process=True)
 optimizer.set_report_period(0.2)
 def job_complete_callback(
    job_id,                 # type: str
    objective_value,        # type: float
    objective_iteration,    # type: int
    job_parameters,         # type: dict
    top_performance_job_id  # type: str
 ):
    print('Job completed!', job_id, objective_value, objective_iteration, job_parameters)
    if job_id == top_performance_job_id:
        print('WOOT WOOT we broke the record! Objective reached {}'.format(objective_value))
 optimizer.start(job_complete_callback=job_complete_callback)
 optimizer.set_time_limit(in_minutes=120.0)
 optimizer.wait()
 top_exp = optimizer.get_top_experiments(top_k=3)
 print([t.id for t in top_exp])
 # make sure background optimization stopped
 optimizer.stop()
 print('We are done, good bye')