Added LSTM model
This commit is contained in:
@@ -12,10 +12,12 @@ class NrvDataset(Dataset):
|
|||||||
full_day_skip: bool = False,
|
full_day_skip: bool = False,
|
||||||
sequence_length=96,
|
sequence_length=96,
|
||||||
predict_sequence_length=96,
|
predict_sequence_length=96,
|
||||||
|
lstm: bool = False,
|
||||||
):
|
):
|
||||||
self.data_config = data_config
|
self.data_config = data_config
|
||||||
self.dataframe = dataframe
|
self.dataframe = dataframe
|
||||||
self.full_day_skip = full_day_skip
|
self.full_day_skip = full_day_skip
|
||||||
|
self.lstm = lstm
|
||||||
|
|
||||||
# reset dataframe index
|
# reset dataframe index
|
||||||
self.dataframe.reset_index(drop=True, inplace=True)
|
self.dataframe.reset_index(drop=True, inplace=True)
|
||||||
@@ -107,19 +109,26 @@ class NrvDataset(Dataset):
|
|||||||
history_features = history_df[self.history_features].values
|
history_features = history_df[self.history_features].values
|
||||||
|
|
||||||
# combine the history features to one tensor (first one feature, then the next one, etc.)
|
# combine the history features to one tensor (first one feature, then the next one, etc.)
|
||||||
history_features = torch.tensor(history_features).reshape(-1)
|
history_features = torch.tensor(history_features)
|
||||||
|
|
||||||
# get forecast features
|
# get forecast features
|
||||||
forecast_features = forecast_df[self.forecast_features].values
|
forecast_features = forecast_df[self.forecast_features].values
|
||||||
forecast_features = torch.tensor(forecast_features).view(-1)
|
forecast_features = torch.tensor(forecast_features)
|
||||||
|
|
||||||
# add last time feature of the history
|
# add last time feature of the history
|
||||||
time_feature = history_df["time_feature"].iloc[-1]
|
time_feature = history_df["time_feature"].iloc[-1]
|
||||||
|
|
||||||
## all features
|
## all features
|
||||||
all_features = torch.cat(
|
if not self.lstm:
|
||||||
[nrv_features, history_features, forecast_features, torch.tensor([time_feature])], dim=0
|
all_features = torch.cat(
|
||||||
)
|
[nrv_features, history_features.reshape(-1), forecast_features.reshape(-1), torch.tensor([time_feature])], dim=0
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
time_features = torch.tensor(history_df["time_feature"].values).reshape(-1, 1)
|
||||||
|
# combine (96, ) and (96, 2) to (96, 3)
|
||||||
|
all_features = torch.cat(
|
||||||
|
[nrv_features.unsqueeze(1), time_features], dim=1
|
||||||
|
)
|
||||||
|
|
||||||
# Target sequence, flattened if necessary
|
# Target sequence, flattened if necessary
|
||||||
nrv_target = forecast_df["nrv"].values
|
nrv_target = forecast_df["nrv"].values
|
||||||
@@ -133,7 +142,7 @@ class NrvDataset(Dataset):
|
|||||||
# all features and target to float
|
# all features and target to float
|
||||||
all_features = all_features.float()
|
all_features = all_features.float()
|
||||||
|
|
||||||
# to tensors
|
# to tens&éazzaéaz"ezéors
|
||||||
nrv_target = torch.tensor(nrv_target).float()
|
nrv_target = torch.tensor(nrv_target).float()
|
||||||
return all_features, nrv_target, idx
|
return all_features, nrv_target, idx
|
||||||
|
|
||||||
|
|||||||
@@ -36,9 +36,10 @@ class DataConfig:
|
|||||||
|
|
||||||
|
|
||||||
class DataProcessor:
|
class DataProcessor:
|
||||||
def __init__(self, data_config: DataConfig, path:str="./"):
|
def __init__(self, data_config: DataConfig, lstm: bool = False, path:str="./"):
|
||||||
self.batch_size = 2048
|
self.batch_size = 2048
|
||||||
self.path = path
|
self.path = path
|
||||||
|
self.lstm = lstm
|
||||||
|
|
||||||
self.train_range = (
|
self.train_range = (
|
||||||
-np.inf,
|
-np.inf,
|
||||||
@@ -204,6 +205,7 @@ class DataProcessor:
|
|||||||
data_config=self.data_config,
|
data_config=self.data_config,
|
||||||
full_day_skip=self.full_day_skip,
|
full_day_skip=self.full_day_skip,
|
||||||
predict_sequence_length=predict_sequence_length,
|
predict_sequence_length=predict_sequence_length,
|
||||||
|
lstm=self.lstm,
|
||||||
)
|
)
|
||||||
return self.get_dataloader(train_dataset, shuffle=shuffle)
|
return self.get_dataloader(train_dataset, shuffle=shuffle)
|
||||||
|
|
||||||
@@ -234,6 +236,7 @@ class DataProcessor:
|
|||||||
data_config=self.data_config,
|
data_config=self.data_config,
|
||||||
full_day_skip=self.full_day_skip,
|
full_day_skip=self.full_day_skip,
|
||||||
predict_sequence_length=predict_sequence_length,
|
predict_sequence_length=predict_sequence_length,
|
||||||
|
lstm=self.lstm,
|
||||||
)
|
)
|
||||||
return self.get_dataloader(test_dataset, shuffle=False)
|
return self.get_dataloader(test_dataset, shuffle=False)
|
||||||
|
|
||||||
@@ -274,7 +277,7 @@ class DataProcessor:
|
|||||||
predict_sequence_length=self.output_size
|
predict_sequence_length=self.output_size
|
||||||
)
|
)
|
||||||
input, _, _ = next(iter(data_loader))
|
input, _, _ = next(iter(data_loader))
|
||||||
return input.shape[-1]
|
return input.shape
|
||||||
|
|
||||||
def get_time_feature_size(self):
|
def get_time_feature_size(self):
|
||||||
time_feature_size = 1
|
time_feature_size = 1
|
||||||
|
|||||||
45
src/models/lstm_model.py
Normal file
45
src/models/lstm_model.py
Normal file
@@ -0,0 +1,45 @@
|
|||||||
|
import torch
|
||||||
|
|
||||||
|
class LSTMModel(torch.nn.Module):
|
||||||
|
def __init__(self, inputSize, output_size, num_layers: int, hidden_size: int, dropout: float = 0.2):
|
||||||
|
super(LSTMModel, self).__init__()
|
||||||
|
self.inputSize = inputSize
|
||||||
|
self.output_size = output_size
|
||||||
|
|
||||||
|
self.num_layers = num_layers
|
||||||
|
self.hidden_size = hidden_size
|
||||||
|
self.dropout = dropout
|
||||||
|
|
||||||
|
self.lstm = torch.nn.LSTM(input_size=inputSize[-1], hidden_size=hidden_size, num_layers=num_layers, dropout=dropout, batch_first=True)
|
||||||
|
self.linear = torch.nn.Linear(hidden_size, output_size)
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
# Forward pass through the LSTM layers
|
||||||
|
_, (hidden_state, _) = self.lstm(x)
|
||||||
|
|
||||||
|
# Use the hidden state from the last time step for the output
|
||||||
|
output = self.linear(hidden_state[-1])
|
||||||
|
|
||||||
|
return output
|
||||||
|
|
||||||
|
class GRUModel(torch.nn.Module):
|
||||||
|
def __init__(self, inputSize, output_size, num_layers: int, hidden_size: int, dropout: float = 0.2):
|
||||||
|
super(GRUModel, self).__init__()
|
||||||
|
self.inputSize = inputSize
|
||||||
|
self.output_size = output_size
|
||||||
|
|
||||||
|
self.num_layers = num_layers
|
||||||
|
self.hidden_size = hidden_size
|
||||||
|
self.dropout = dropout
|
||||||
|
|
||||||
|
self.gru = torch.nn.GRU(input_size=inputSize[-1], hidden_size=hidden_size, num_layers=num_layers, dropout=dropout, batch_first=True)
|
||||||
|
self.linear = torch.nn.Linear(hidden_size, output_size)
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
# Forward pass through the GRU layers
|
||||||
|
_, hidden_state = self.gru(x)
|
||||||
|
|
||||||
|
# Use the hidden state from the last time step for the output
|
||||||
|
output = self.linear(hidden_state[-1])
|
||||||
|
|
||||||
|
return output
|
||||||
@@ -10,19 +10,20 @@ class TimeEmbedding(nn.Module):
|
|||||||
|
|
||||||
def forward(self, x):
|
def forward(self, x):
|
||||||
# Extract the last 'time_features' from the input
|
# Extract the last 'time_features' from the input
|
||||||
time_feature = x[:, -1]
|
time_feature = x[..., -1] # Use ellipsis to access the last dimension
|
||||||
# convert to int
|
# convert to int
|
||||||
time_feature = time_feature.int()
|
time_feature = time_feature.int()
|
||||||
# Embed these time features
|
# Embed these time features
|
||||||
# print max value of time_feature
|
|
||||||
if time_feature.max() > self.time_features:
|
|
||||||
# print the row from x that includes the max value in the last column
|
|
||||||
print(x[time_feature == time_feature.max()])
|
|
||||||
print("time feature max value is greater than time features")
|
|
||||||
|
|
||||||
embedded_time = self.embedding(time_feature)
|
embedded_time = self.embedding(time_feature)
|
||||||
# Concatenate the embedded features with the original input (minus the last 'time feature')
|
# Concatenate the embedded features with the original input (minus the last 'time feature')
|
||||||
return torch.cat((x[:, :-1], embedded_time), dim=1)
|
return torch.cat((x[..., :-1], embedded_time), dim=-1) # Use -1 to specify the last dimension
|
||||||
|
|
||||||
|
|
||||||
def output_dim(self, input_dim):
|
def output_dim(self, input_dim):
|
||||||
return input_dim + self.embedding.embedding_dim - 1
|
# Create a list from the input dimension
|
||||||
|
input_dim_list = list(input_dim)
|
||||||
|
# Modify the last dimension
|
||||||
|
input_dim_list[-1] = input_dim_list[-1] - 1 + self.embedding.embedding_dim
|
||||||
|
# Convert the list back to a torch.Size object
|
||||||
|
output_dim = torch.Size(input_dim_list)
|
||||||
|
return output_dim
|
||||||
|
|||||||
@@ -1,3 +1,4 @@
|
|||||||
|
from src.models.lstm_model import LSTMModel, GRUModel
|
||||||
from src.data import DataProcessor, DataConfig
|
from src.data import DataProcessor, DataConfig
|
||||||
from src.trainers.quantile_trainer import AutoRegressiveQuantileTrainer, NonAutoRegressiveQuantileRegression
|
from src.trainers.quantile_trainer import AutoRegressiveQuantileTrainer, NonAutoRegressiveQuantileRegression
|
||||||
from src.trainers.probabilistic_baseline import ProbabilisticBaselineTrainer
|
from src.trainers.probabilistic_baseline import ProbabilisticBaselineTrainer
|
||||||
@@ -28,19 +29,21 @@ data_config.LOAD_FORECAST = True
|
|||||||
data_config.QUARTER = True
|
data_config.QUARTER = True
|
||||||
data_config.DAY_OF_WEEK = True
|
data_config.DAY_OF_WEEK = True
|
||||||
|
|
||||||
data_config = task.connect(data_config, name="data_features")
|
# data_config = task.connect(data_config, name="data_features")
|
||||||
|
|
||||||
data_processor = DataProcessor(data_config, path="")
|
data_processor = DataProcessor(data_config, path="", lstm=True)
|
||||||
data_processor.set_batch_size(1024)
|
data_processor.set_batch_size(512)
|
||||||
data_processor.set_full_day_skip(False)
|
data_processor.set_full_day_skip(False)
|
||||||
|
|
||||||
|
|
||||||
#### Hyperparameters ####
|
#### Hyperparameters ####
|
||||||
data_processor.set_output_size(1)
|
data_processor.set_output_size(1)
|
||||||
inputDim = data_processor.get_input_size()
|
inputDim = data_processor.get_input_size()
|
||||||
learningRate = 0.0001
|
learningRate = 0.001
|
||||||
epochs = 100
|
epochs = 100
|
||||||
|
|
||||||
|
print("Input dim: ", inputDim)
|
||||||
|
|
||||||
# add parameters to clearml
|
# add parameters to clearml
|
||||||
quantiles = task.get_parameter("general/quantiles", cast=True)
|
quantiles = task.get_parameter("general/quantiles", cast=True)
|
||||||
if quantiles is None:
|
if quantiles is None:
|
||||||
@@ -49,8 +52,9 @@ if quantiles is None:
|
|||||||
|
|
||||||
# model = LinearRegression(inputDim, len(quantiles))
|
# model = LinearRegression(inputDim, len(quantiles))
|
||||||
time_embedding = TimeEmbedding(data_processor.get_time_feature_size(), 4)
|
time_embedding = TimeEmbedding(data_processor.get_time_feature_size(), 4)
|
||||||
non_linear_regression_model = NonLinearRegression(time_embedding.output_dim(inputDim), len(quantiles), hiddenSize=1024, numLayers=5)
|
# non_linear_regression_model = NonLinearRegression(time_embedding.output_dim(inputDim), len(quantiles), hiddenSize=1024, numLayers=5)
|
||||||
model = nn.Sequential(time_embedding, non_linear_regression_model)
|
lstm_model = GRUModel(time_embedding.output_dim(inputDim), len(quantiles), hidden_size=512, num_layers=2)
|
||||||
|
model = nn.Sequential(time_embedding, lstm_model)
|
||||||
optimizer = torch.optim.Adam(model.parameters(), lr=learningRate)
|
optimizer = torch.optim.Adam(model.parameters(), lr=learningRate)
|
||||||
|
|
||||||
#### Trainer ####
|
#### Trainer ####
|
||||||
@@ -62,9 +66,10 @@ trainer = AutoRegressiveQuantileTrainer(
|
|||||||
"cuda",
|
"cuda",
|
||||||
debug=True,
|
debug=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
trainer.add_metrics_to_track(
|
trainer.add_metrics_to_track(
|
||||||
[PinballLoss(quantiles), MSELoss(), L1Loss(), CRPSLoss()]
|
[PinballLoss(quantiles), MSELoss(), L1Loss(), CRPSLoss()]
|
||||||
)
|
)
|
||||||
trainer.early_stopping(patience=10)
|
trainer.early_stopping(patience=10)
|
||||||
trainer.plot_every(5)
|
trainer.plot_every(100)
|
||||||
trainer.train(task=task, epochs=epochs, remotely=False)
|
trainer.train(task=task, epochs=epochs, remotely=True)
|
||||||
@@ -5,7 +5,6 @@ from clearml.automation.optuna import OptimizerOptuna
|
|||||||
from clearml.automation import (
|
from clearml.automation import (
|
||||||
DiscreteParameterRange, HyperParameterOptimizer, RandomSearch,
|
DiscreteParameterRange, HyperParameterOptimizer, RandomSearch,
|
||||||
UniformIntegerParameterRange)
|
UniformIntegerParameterRange)
|
||||||
from src.data.preprocessing import DataConfig
|
|
||||||
|
|
||||||
# trying to load Bayesian optimizer package
|
# trying to load Bayesian optimizer package
|
||||||
try:
|
try:
|
||||||
@@ -21,17 +20,28 @@ except ImportError as ex:
|
|||||||
'we will be using RandomSearch strategy instead')
|
'we will be using RandomSearch strategy instead')
|
||||||
aSearchStrategy = RandomSearch
|
aSearchStrategy = RandomSearch
|
||||||
|
|
||||||
# input task id to optimize
|
# input task id to optimize using argparse
|
||||||
input_task_id = input("Please enter the task id to optimize: ")
|
import argparse
|
||||||
|
parser = argparse.ArgumentParser()
|
||||||
|
parser.add_argument("--task_id", help="Task ID to optimize", type=str)
|
||||||
|
args = parser.parse_args()
|
||||||
|
input_task_id = args.task_id
|
||||||
|
|
||||||
# check if task id is valid
|
# check if task id is valid
|
||||||
if not Task.get_task(task_id=input_task_id):
|
if not Task.get_task(task_id=input_task_id):
|
||||||
raise ValueError("Invalid task id")
|
raise ValueError("Invalid task id")
|
||||||
|
|
||||||
task = Task.init(project_name='Hyper-Parameter Optimization',
|
Task.add_requirements("requirements.txt")
|
||||||
task_name='Automatic Hyper-Parameter Optimization',
|
Task.ignore_requirements("torch")
|
||||||
|
Task.ignore_requirements("torchvision")
|
||||||
|
Task.ignore_requirements("tensorboard")
|
||||||
|
task = Task.init(project_name='Thesis/NrvForecast',
|
||||||
|
task_name='Autoregressive Quantile Regression Hyper-Parameter Optimization',
|
||||||
task_type=Task.TaskTypes.optimizer,
|
task_type=Task.TaskTypes.optimizer,
|
||||||
reuse_last_task_id=False)
|
reuse_last_task_id=False)
|
||||||
|
task.set_base_docker(f"docker.io/clearml/pytorch-cuda-gcc:2.0.0-cuda11.7-cudnn8-runtime --env GIT_SSL_NO_VERIFY=true --env CLEARML_AGENT_GIT_USER=VictorMylle --env CLEARML_AGENT_GIT_PASS=Voetballer1" )
|
||||||
|
task.set_packages("requirements.txt")
|
||||||
|
|
||||||
|
|
||||||
execution_queue = "default"
|
execution_queue = "default"
|
||||||
|
|
||||||
@@ -40,36 +50,42 @@ execution_queue = "default"
|
|||||||
#### Quantiles ####
|
#### Quantiles ####
|
||||||
quantile_lists = [
|
quantile_lists = [
|
||||||
[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9], # Deciles
|
[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9], # Deciles
|
||||||
[0.25, 0.5, 0.75], # Quartiles
|
|
||||||
[0.05, 0.15, 0.25, 0.35, 0.45, 0.55, 0.65, 0.75, 0.85, 0.95], # 10% Increments, Excluding Extremes
|
[0.05, 0.15, 0.25, 0.35, 0.45, 0.55, 0.65, 0.75, 0.85, 0.95], # 10% Increments, Excluding Extremes
|
||||||
[0.01, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.99], # Combining Deciles with Extremes
|
[0.01, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.99], # Combining Deciles with Extremes
|
||||||
[0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1], # Including 0 and 1
|
[0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1], # Including 0 and 1
|
||||||
[0.01, 0.05, 0.1, 0.25, 0.5, 0.75, 0.9, 0.95, 0.99], # Mixed Small and Large Increments
|
[0.01, 0.05, 0.1, 0.25, 0.5, 0.75, 0.9, 0.95, 0.99], # Mixed Small and Large Increments
|
||||||
[0.2, 0.4, 0.6, 0.8], # 20% Increments
|
|
||||||
[0.125, 0.375, 0.625, 0.875], # Eighths
|
[0.125, 0.375, 0.625, 0.875], # Eighths
|
||||||
[0.05, 0.10, 0.20, 0.30, 0.40, 0.50, 0.60, 0.70, 0.80, 0.90], # 10% Increments
|
[0.05, 0.10, 0.20, 0.30, 0.40, 0.50, 0.60, 0.70, 0.80, 0.90, 0.95], # 10% Increments
|
||||||
[0.01, 0.02, 0.03, 0.04, 0.05, 0.1, 0.15, 0.2, 0.3, 0.5] # Mixed Fine and Coarser Increments
|
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
quantiles_range = DiscreteParameterRange("general/quantiles", values=quantile_lists)
|
quantiles_range = DiscreteParameterRange("general/quantiles", values=quantile_lists)
|
||||||
|
|
||||||
|
#### Data Config ####
|
||||||
|
quarter_range = DiscreteParameterRange("data_features/quarter", values=[True, False])
|
||||||
|
day_of_week_range = DiscreteParameterRange("data_features/day_of_week", values=[True, False])
|
||||||
|
|
||||||
|
load_forecast_range = DiscreteParameterRange("data_features/load_forecast", values=[True, False])
|
||||||
|
load_history_range = DiscreteParameterRange("data_features/load_history", values=[True, False])
|
||||||
|
|
||||||
### OPTIMIZER OBJECT ###
|
### OPTIMIZER OBJECT ###
|
||||||
optimizer = HyperParameterOptimizer(
|
optimizer = HyperParameterOptimizer(
|
||||||
base_task_id=input_task_id,
|
base_task_id=input_task_id,
|
||||||
objective_metric_title="PinballLoss",
|
objective_metric_title="Summary",
|
||||||
objective_metric_series="test",
|
objective_metric_series="test_CRPSLoss",
|
||||||
objective_metric_sign="min",
|
objective_metric_sign="min",
|
||||||
execution_queue=execution_queue,
|
execution_queue=execution_queue,
|
||||||
max_number_of_concurrent_tasks=1,
|
max_number_of_concurrent_tasks=1,
|
||||||
optimizer_class=aSearchStrategy,
|
optimizer_class=aSearchStrategy,
|
||||||
|
max_iteration_per_job=50,
|
||||||
# save_top_k_tasks_only=3,
|
# save_top_k_tasks_only=3,
|
||||||
pool_period_min=0.2,
|
pool_period_min=0.2,
|
||||||
total_max_jobs=15,
|
total_max_jobs=15,
|
||||||
|
|
||||||
hyper_parameters=[
|
hyper_parameters=[
|
||||||
quantiles_range,
|
quantiles_range,
|
||||||
|
quarter_range,
|
||||||
|
day_of_week_range,
|
||||||
|
load_forecast_range,
|
||||||
|
load_history_range
|
||||||
]
|
]
|
||||||
|
|
||||||
)
|
)
|
||||||
|
|||||||
Reference in New Issue
Block a user