Non autregressive gru model load

2024-05-06 16:11:15 +02:00
parent 19ab597ae6
commit d7f4c1849b
7 changed files with 55 additions and 22 deletions
--- a/src/models/lstm_model.py
+++ b/src/models/lstm_model.py
@@ -1,7 +1,15 @@
 import torch

+
 class LSTMModel(torch.nn.Module):
-    def __init__(self, inputSize, output_size, num_layers: int, hidden_size: int, dropout: float = 0.2):
+    def __init__(
+        self,
+        inputSize,
+        output_size,
+        num_layers: int,
+        hidden_size: int,
+        dropout: float = 0.2,
+    ):
        super(LSTMModel, self).__init__()
        self.inputSize = inputSize
        self.output_size = output_size
@@ -10,20 +18,34 @@ class LSTMModel(torch.nn.Module):
        self.hidden_size = hidden_size
        self.dropout = dropout

-        self.lstm = torch.nn.LSTM(input_size=inputSize[-1], hidden_size=hidden_size, num_layers=num_layers, dropout=dropout, batch_first=True)
+        self.lstm = torch.nn.LSTM(
+            input_size=inputSize[-1],
+            hidden_size=hidden_size,
+            num_layers=num_layers,
+            dropout=dropout,
+            batch_first=True,
+        )
        self.linear = torch.nn.Linear(hidden_size, output_size)

    def forward(self, x):
        # Forward pass through the LSTM layers
        _, (hidden_state, _) = self.lstm(x)
-        
+
        # Use the hidden state from the last time step for the output
        output = self.linear(hidden_state[-1])
-        
+
        return output

+
 class GRUModel(torch.nn.Module):
-    def __init__(self, inputSize, output_size, num_layers: int, hidden_size: int, dropout: float = 0.2):
+    def __init__(
+        self,
+        inputSize,
+        output_size,
+        num_layers: int,
+        hidden_size: int,
+        dropout: float = 0.2,
+    ):
        super(GRUModel, self).__init__()
        self.inputSize = inputSize
        self.output_size = output_size
@@ -32,14 +54,24 @@ class GRUModel(torch.nn.Module):
        self.hidden_size = hidden_size
        self.dropout = dropout

-        self.gru = torch.nn.GRU(input_size=inputSize[-1], hidden_size=hidden_size, num_layers=num_layers, dropout=dropout, batch_first=True)
+        self.gru = torch.nn.GRU(
+            input_size=inputSize[-1],
+            hidden_size=hidden_size,
+            num_layers=num_layers,
+            dropout=dropout,
+            batch_first=True,
+        )
        self.linear = torch.nn.Linear(hidden_size, output_size)

    def forward(self, x):
+        # if dimension is 2, add batch dimension to 1
+        if x.dim() == 2:
+            x = x.unsqueeze(0)
+
        # Forward pass through the GRU layers
        x, _ = self.gru(x)
        x = x[:, -1, :]
        # Use the hidden state from the last time step for the output
        output = self.linear(x)
-        
+
        return output
--- a/src/trainers/quantile_trainer.py
+++ b/src/trainers/quantile_trainer.py
@@ -633,6 +633,7 @@ class NonAutoRegressiveQuantileRegression(Trainer):
    def debug_plots(self, task, train: bool, data_loader, sample_indices, epoch):
        for actual_idx, idx in sample_indices.items():
            features, target, _ = data_loader.dataset[idx]
+            print(features.shape, target.shape)

            features = features.to(self.device)
            target = target.to(self.device)
--- a/src/training_scripts/non_autoregressive_quantiles.py
+++ b/src/training_scripts/non_autoregressive_quantiles.py
@@ -2,7 +2,7 @@ from src.utils.clearml import ClearMLHelper

 #### ClearML ####
 clearml_helper = ClearMLHelper(project_name="Thesis/NAQR: GRU")
-task = clearml_helper.get_task(task_name="NAQR: GRU (2 - 256)")
+task = clearml_helper.get_task(task_name="NAQR: GRU (2 - 256) + Load")
 task.execute_remotely(queue_name="default", exit_process=True)

 from src.policies.PolicyEvaluator import PolicyEvaluator
@@ -27,8 +27,8 @@ from src.models.time_embedding_layer import TimeEmbedding
 data_config = DataConfig()

 data_config.NRV_HISTORY = True
-data_config.LOAD_HISTORY = False
-data_config.LOAD_FORECAST = False
+data_config.LOAD_HISTORY = True
+data_config.LOAD_FORECAST = True

 data_config.WIND_FORECAST = False
 data_config.WIND_HISTORY = False
@@ -64,8 +64,8 @@ else:

 model_parameters = {
    "learning_rate": 0.0001,
-    "hidden_size": 256,
-    "num_layers": 2,
+    "hidden_size": 512,
+    "num_layers": 8,
    "dropout": 0.2,
 }

@@ -83,7 +83,7 @@ model_parameters = task.connect(model_parameters, name="model_parameters")

 lstm_model = GRUModel(
    inputDim,
-    len(quantiles),
+    len(quantiles) * 96,
    hidden_size=model_parameters["hidden_size"],
    num_layers=model_parameters["num_layers"],
    dropout=model_parameters["dropout"],