Rewrote the NRVDataset to be cleaner
This commit is contained in:
@@ -1,6 +1,7 @@
|
|||||||
import torch
|
import torch
|
||||||
from torch.utils.data import Dataset, DataLoader
|
from torch.utils.data import Dataset, DataLoader
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
|
||||||
class NrvDataset(Dataset):
|
class NrvDataset(Dataset):
|
||||||
@@ -19,37 +20,44 @@ class NrvDataset(Dataset):
|
|||||||
# reset dataframe index
|
# reset dataframe index
|
||||||
self.dataframe.reset_index(drop=True, inplace=True)
|
self.dataframe.reset_index(drop=True, inplace=True)
|
||||||
|
|
||||||
self.nrv = torch.tensor(dataframe["nrv"].to_numpy(), dtype=torch.float32)
|
|
||||||
self.load_forecast = torch.tensor(
|
|
||||||
dataframe["load_forecast"].to_numpy(), dtype=torch.float32
|
|
||||||
)
|
|
||||||
self.total_load = torch.tensor(
|
|
||||||
dataframe["total_load"].to_numpy(), dtype=torch.float32
|
|
||||||
)
|
|
||||||
self.pv_gen_forecast = torch.tensor(
|
|
||||||
dataframe["pv_forecast"].to_numpy(), dtype=torch.float32
|
|
||||||
)
|
|
||||||
self.wind_gen_forecast = torch.tensor(
|
|
||||||
dataframe["wind_forecast"].to_numpy(), dtype=torch.float32
|
|
||||||
)
|
|
||||||
|
|
||||||
self.quarter = torch.tensor(
|
|
||||||
dataframe["quarter"].to_numpy(), dtype=torch.float32
|
|
||||||
)
|
|
||||||
|
|
||||||
self.day_of_week = torch.tensor(
|
|
||||||
dataframe["day_of_week"].to_numpy(), dtype=torch.float32
|
|
||||||
)
|
|
||||||
|
|
||||||
self.sequence_length = sequence_length
|
self.sequence_length = sequence_length
|
||||||
self.predict_sequence_length = predict_sequence_length
|
self.predict_sequence_length = predict_sequence_length
|
||||||
|
|
||||||
self.samples_to_skip = self.skip_samples()
|
self.samples_to_skip = self.skip_samples()
|
||||||
total_indices = set(
|
total_indices = set(
|
||||||
range(len(self.nrv) - self.sequence_length - self.predict_sequence_length)
|
range(len(self.dataframe) - self.sequence_length - self.predict_sequence_length)
|
||||||
)
|
)
|
||||||
self.valid_indices = sorted(list(total_indices - set(self.samples_to_skip)))
|
self.valid_indices = sorted(list(total_indices - set(self.samples_to_skip)))
|
||||||
|
|
||||||
|
self.history_features = []
|
||||||
|
if self.data_config.LOAD_HISTORY:
|
||||||
|
self.history_features.append("total_load")
|
||||||
|
if self.data_config.PV_HISTORY:
|
||||||
|
self.history_features.append("pv_gen_forecast")
|
||||||
|
if self.data_config.WIND_HISTORY:
|
||||||
|
self.history_features.append("wind_gen_forecast")
|
||||||
|
|
||||||
|
self.forecast_features = []
|
||||||
|
if self.data_config.LOAD_FORECAST:
|
||||||
|
self.forecast_features.append("load_forecast")
|
||||||
|
if self.data_config.PV_FORECAST:
|
||||||
|
self.forecast_features.append("pv_gen_forecast")
|
||||||
|
if self.data_config.WIND_FORECAST:
|
||||||
|
self.forecast_features.append("wind_gen_forecast")
|
||||||
|
|
||||||
|
# add time feature to dataframe
|
||||||
|
time_feature = np.array([0] * len(self.dataframe))
|
||||||
|
if self.data_config.QUARTER:
|
||||||
|
time_feature += self.dataframe["quarter"]
|
||||||
|
|
||||||
|
if self.data_config.DAY_OF_WEEK:
|
||||||
|
d_w = self.dataframe["day_of_week"]
|
||||||
|
if self.data_config.QUARTER:
|
||||||
|
d_w *= 96
|
||||||
|
time_feature += d_w
|
||||||
|
|
||||||
|
self.dataframe["time_feature"] = time_feature
|
||||||
|
|
||||||
def skip_samples(self):
|
def skip_samples(self):
|
||||||
nan_rows = self.dataframe[self.dataframe.isnull().any(axis=1)]
|
nan_rows = self.dataframe[self.dataframe.isnull().any(axis=1)]
|
||||||
nan_indices = nan_rows.index
|
nan_indices = nan_rows.index
|
||||||
@@ -80,88 +88,41 @@ class NrvDataset(Dataset):
|
|||||||
def __len__(self):
|
def __len__(self):
|
||||||
return len(self.valid_indices)
|
return len(self.valid_indices)
|
||||||
|
|
||||||
|
def _get__all_data(self, idx: int):
|
||||||
|
history_df = self.dataframe.iloc[idx : idx + self.sequence_length]
|
||||||
|
forecast_df = self.dataframe.iloc[
|
||||||
|
idx + self.sequence_length : idx + self.sequence_length + self.predict_sequence_length
|
||||||
|
]
|
||||||
|
return history_df, forecast_df
|
||||||
|
|
||||||
def __getitem__(self, idx):
|
def __getitem__(self, idx):
|
||||||
actual_idx = self.valid_indices[idx]
|
actual_idx = self.valid_indices[idx]
|
||||||
features = []
|
|
||||||
|
history_df, forecast_df = self._get__all_data(actual_idx)
|
||||||
|
|
||||||
|
# get nrv history features
|
||||||
|
nrv_features = torch.tensor(history_df[["nrv"]].values).reshape(-1)
|
||||||
|
|
||||||
if self.data_config.NRV_HISTORY:
|
# get history featues
|
||||||
nrv = self.nrv[actual_idx : actual_idx + self.sequence_length]
|
history_features = history_df[self.history_features].values
|
||||||
features.append(nrv.view(-1))
|
|
||||||
|
|
||||||
if self.data_config.LOAD_HISTORY:
|
# combine the history features to one tensor (first one feature, then the next one, etc.)
|
||||||
load_history = self.total_load[
|
history_features = torch.tensor(history_features).reshape(-1)
|
||||||
actual_idx : actual_idx + self.sequence_length
|
|
||||||
]
|
|
||||||
features.append(load_history.view(-1))
|
|
||||||
|
|
||||||
if self.data_config.PV_HISTORY:
|
# get forecast features
|
||||||
pv_history = self.pv_gen_forecast[
|
forecast_features = forecast_df[self.forecast_features].values
|
||||||
actual_idx : actual_idx + self.sequence_length
|
forecast_features = torch.tensor(forecast_features).view(-1)
|
||||||
]
|
|
||||||
features.append(pv_history.view(-1))
|
|
||||||
|
|
||||||
if self.data_config.WIND_HISTORY:
|
# add last time feature of the history
|
||||||
wind_history = self.wind_gen_forecast[
|
time_feature = history_df["time_feature"].iloc[-1]
|
||||||
actual_idx : actual_idx + self.sequence_length
|
|
||||||
]
|
|
||||||
features.append(wind_history.view(-1))
|
|
||||||
|
|
||||||
if self.data_config.LOAD_FORECAST:
|
## all features
|
||||||
load_forecast = self.load_forecast[
|
all_features = torch.cat(
|
||||||
actual_idx
|
[nrv_features, history_features, forecast_features, torch.tensor([time_feature])], dim=0
|
||||||
+ self.sequence_length : actual_idx
|
)
|
||||||
+ self.sequence_length
|
|
||||||
+ self.predict_sequence_length
|
|
||||||
]
|
|
||||||
features.append(load_forecast.view(-1))
|
|
||||||
|
|
||||||
if self.data_config.PV_FORECAST:
|
|
||||||
pv_forecast = self.pv_gen_forecast[
|
|
||||||
actual_idx
|
|
||||||
+ self.sequence_length : actual_idx
|
|
||||||
+ self.sequence_length
|
|
||||||
+ self.predict_sequence_length
|
|
||||||
]
|
|
||||||
features.append(pv_forecast.view(-1))
|
|
||||||
|
|
||||||
if self.data_config.WIND_FORECAST:
|
|
||||||
wind_forecast = self.wind_gen_forecast[
|
|
||||||
actual_idx
|
|
||||||
+ self.sequence_length : actual_idx
|
|
||||||
+ self.sequence_length
|
|
||||||
+ self.predict_sequence_length
|
|
||||||
]
|
|
||||||
features.append(wind_forecast.view(-1))
|
|
||||||
|
|
||||||
### Time Features ###
|
|
||||||
time_feature = 0
|
|
||||||
if self.data_config.QUARTER:
|
|
||||||
time_feature += self.quarter[actual_idx].item()
|
|
||||||
|
|
||||||
if self.data_config.DAY_OF_WEEK:
|
|
||||||
d_w = self.day_of_week[actual_idx].item()
|
|
||||||
if self.data_config.QUARTER:
|
|
||||||
d_w *= 96
|
|
||||||
time_feature += d_w
|
|
||||||
|
|
||||||
if time_feature is not None:
|
|
||||||
features.append(torch.tensor([time_feature]))
|
|
||||||
|
|
||||||
if not features:
|
|
||||||
raise ValueError(
|
|
||||||
"No features are configured to be included in the dataset."
|
|
||||||
)
|
|
||||||
|
|
||||||
# Concatenate along dimension 0 to create a one-dimensional feature vector
|
|
||||||
all_features = torch.cat(features, dim=0)
|
|
||||||
|
|
||||||
# Target sequence, flattened if necessary
|
# Target sequence, flattened if necessary
|
||||||
nrv_target = self.nrv[
|
nrv_target = forecast_df["nrv"].values
|
||||||
actual_idx
|
|
||||||
+ self.sequence_length : actual_idx
|
|
||||||
+ self.sequence_length
|
|
||||||
+ self.predict_sequence_length
|
|
||||||
].view(-1)
|
|
||||||
|
|
||||||
# check if nan values are present
|
# check if nan values are present
|
||||||
if torch.isnan(all_features).any():
|
if torch.isnan(all_features).any():
|
||||||
@@ -169,78 +130,21 @@ class NrvDataset(Dataset):
|
|||||||
print(f"Actual index: {actual_idx}")
|
print(f"Actual index: {actual_idx}")
|
||||||
raise ValueError("There are nan values in the features.")
|
raise ValueError("There are nan values in the features.")
|
||||||
|
|
||||||
|
# all features and target to float
|
||||||
|
all_features = all_features.float()
|
||||||
|
|
||||||
|
# to tensors
|
||||||
|
nrv_target = torch.tensor(nrv_target).float()
|
||||||
return all_features, nrv_target, idx
|
return all_features, nrv_target, idx
|
||||||
|
|
||||||
def random_day_autoregressive(self, idx: int):
|
def random_day_autoregressive(self, idx: int):
|
||||||
idx = self.valid_indices[idx]
|
all_features, nrv_target, _ = self.__getitem__(idx)
|
||||||
features = []
|
|
||||||
|
|
||||||
# we already have the NRV history with the newly predicted values, so we don't need to include the last 96 values
|
# remove the first 96 values of the features (the nrv history)
|
||||||
if self.data_config.LOAD_HISTORY:
|
all_features = all_features[self.sequence_length :]
|
||||||
load_history = self.total_load[idx : idx + self.sequence_length]
|
|
||||||
features.append(load_history.view(-1))
|
|
||||||
|
|
||||||
if self.data_config.PV_HISTORY:
|
return all_features, nrv_target
|
||||||
pv_history = self.pv_gen_forecast[idx : idx + self.sequence_length]
|
|
||||||
features.append(pv_history.view(-1))
|
|
||||||
|
|
||||||
if self.data_config.WIND_HISTORY:
|
|
||||||
wind_history = self.wind_gen_forecast[idx : idx + self.sequence_length]
|
|
||||||
features.append(wind_history.view(-1))
|
|
||||||
|
|
||||||
if self.data_config.LOAD_FORECAST:
|
|
||||||
load_forecast = self.load_forecast[
|
|
||||||
idx
|
|
||||||
+ self.sequence_length : idx
|
|
||||||
+ self.sequence_length
|
|
||||||
+ self.predict_sequence_length
|
|
||||||
]
|
|
||||||
features.append(load_forecast.view(-1))
|
|
||||||
|
|
||||||
if self.data_config.PV_FORECAST:
|
|
||||||
pv_forecast = self.pv_gen_forecast[
|
|
||||||
idx
|
|
||||||
+ self.sequence_length : idx
|
|
||||||
+ self.sequence_length
|
|
||||||
+ self.predict_sequence_length
|
|
||||||
]
|
|
||||||
features.append(pv_forecast.view(-1))
|
|
||||||
|
|
||||||
if self.data_config.WIND_FORECAST:
|
|
||||||
wind_forecast = self.wind_gen_forecast[
|
|
||||||
idx
|
|
||||||
+ self.sequence_length : idx
|
|
||||||
+ self.sequence_length
|
|
||||||
+ self.predict_sequence_length
|
|
||||||
]
|
|
||||||
features.append(wind_forecast.view(-1))
|
|
||||||
|
|
||||||
### Time Features ###
|
|
||||||
time_feature = 0
|
|
||||||
if self.data_config.QUARTER:
|
|
||||||
time_feature += self.quarter[idx]
|
|
||||||
|
|
||||||
if self.data_config.DAY_OF_WEEK:
|
|
||||||
d_w = self.day_of_week[idx].item()
|
|
||||||
if self.data_config.QUARTER:
|
|
||||||
d_w *= 96
|
|
||||||
time_feature += d_w
|
|
||||||
|
|
||||||
if time_feature is not None:
|
|
||||||
features.append(torch.tensor([time_feature]))
|
|
||||||
|
|
||||||
target = self.nrv[
|
|
||||||
idx
|
|
||||||
+ self.sequence_length : idx
|
|
||||||
+ self.sequence_length
|
|
||||||
+ self.predict_sequence_length
|
|
||||||
]
|
|
||||||
|
|
||||||
if len(features) == 0:
|
|
||||||
return None, target
|
|
||||||
|
|
||||||
all_features = torch.cat(features, dim=0)
|
|
||||||
return all_features, target
|
|
||||||
|
|
||||||
def get_batch(self, idx: list):
|
def get_batch(self, idx: list):
|
||||||
features = []
|
features = []
|
||||||
|
|||||||
@@ -1,30 +1,28 @@
|
|||||||
import torch
|
import torch
|
||||||
from torch import nn
|
from torch import nn
|
||||||
import torch
|
import torch
|
||||||
|
from properscoring import crps_ensemble
|
||||||
|
|
||||||
|
|
||||||
class CRPSLoss(nn.Module):
|
class CRPSLoss(nn.Module):
|
||||||
def __init__(self, quantiles):
|
def __init__(self):
|
||||||
super(CRPSLoss, self).__init__()
|
super(CRPSLoss, self).__init__()
|
||||||
|
|
||||||
if not torch.is_tensor(quantiles):
|
|
||||||
quantiles = torch.tensor(quantiles, dtype=torch.float32)
|
|
||||||
self.quantiles_tensor = quantiles
|
|
||||||
|
|
||||||
def forward(self, preds, target):
|
def forward(self, preds, target):
|
||||||
|
# if tensor, to cpu
|
||||||
|
if isinstance(preds, torch.Tensor):
|
||||||
|
preds = preds.detach().cpu()
|
||||||
|
|
||||||
|
if isinstance(target, torch.Tensor):
|
||||||
|
target = target.detach().cpu()
|
||||||
|
|
||||||
|
# target squeeze -1
|
||||||
|
target = target.squeeze(-1)
|
||||||
|
|
||||||
# preds shape: [batch_size, num_quantiles]
|
# preds shape: [batch_size, num_quantiles]
|
||||||
|
scores = crps_ensemble(target, preds)
|
||||||
# unsqueeze target
|
|
||||||
# target = target.unsqueeze(-1)
|
|
||||||
|
|
||||||
mask = (preds > target).float()
|
|
||||||
self.quantiles_tensor = self.quantiles_tensor.to(preds.device)
|
|
||||||
test = self.quantiles_tensor - mask
|
|
||||||
# square them
|
|
||||||
test = test * test
|
|
||||||
crps = torch.trapz(test, x=preds)
|
|
||||||
|
|
||||||
# mean over batch
|
# mean over batch
|
||||||
crps = torch.mean(crps)
|
crps = scores.mean()
|
||||||
|
|
||||||
return crps
|
return crps
|
||||||
|
|||||||
@@ -16,7 +16,7 @@ from src.models.time_embedding_layer import TimeEmbedding
|
|||||||
|
|
||||||
#### ClearML ####
|
#### ClearML ####
|
||||||
clearml_helper = ClearMLHelper(project_name="Thesis/NrvForecast")
|
clearml_helper = ClearMLHelper(project_name="Thesis/NrvForecast")
|
||||||
task = clearml_helper.get_task(task_name="None")
|
task = clearml_helper.get_task(task_name="Autoregressive Quantile Regression")
|
||||||
|
|
||||||
|
|
||||||
#### Data Processor ####
|
#### Data Processor ####
|
||||||
@@ -63,8 +63,8 @@ trainer = AutoRegressiveQuantileTrainer(
|
|||||||
debug=True,
|
debug=True,
|
||||||
)
|
)
|
||||||
trainer.add_metrics_to_track(
|
trainer.add_metrics_to_track(
|
||||||
[PinballLoss(quantiles), MSELoss(), L1Loss(), CRPSLoss(quantiles)]
|
[PinballLoss(quantiles), MSELoss(), L1Loss(), CRPSLoss()]
|
||||||
)
|
)
|
||||||
trainer.early_stopping(patience=10)
|
trainer.early_stopping(patience=10)
|
||||||
trainer.plot_every(5)
|
trainer.plot_every(5)
|
||||||
trainer.train(task=task, epochs=epochs, remotely=True)
|
trainer.train(task=task, epochs=epochs, remotely=False)
|
||||||
Reference in New Issue
Block a user