Added time as input feature

This commit is contained in:
Victor Mylle
2023-11-26 18:43:03 +00:00
parent a2c9427d16
commit 2f40f41786
9 changed files with 168 additions and 138 deletions

View File

@@ -18,14 +18,14 @@
- [x] Fix debug plots for quantile regression -> predict quantiles and look if true value is below a quantile, if so 1 else 0 and average these over all samples - [x] Fix debug plots for quantile regression -> predict quantiles and look if true value is below a quantile, if so 1 else 0 and average these over all samples
- [ ] Full day debug plots for quantile regression - [ ] Full day debug plots for quantile regression
- [x] CPRS Metrics - [x] CPRS Metrics
- [ ] Time as input parameter: - [x] Time as input parameter:
- [x] Probabilistic Baseline -> Quantiles on Training Data -> Breedte bekijken -> Gebruiken voor CPRS en plotjes - [x] Probabilistic Baseline -> Quantiles on Training Data -> Breedte bekijken -> Gebruiken voor CPRS en plotjes
- [ ] Day-ahead implicit net position - [ ] Day-ahead implicit net position
- [x] Faster sampling for quantile regression - [x] Faster sampling for quantile regression
- [ ] Quantile plots for other model (Linear, GRU) (Check if better) - [ ] Quantile plots for other model (Linear, GRU) (Check if better)
- [ ] Check example plots to see if metrics correspond with what seen on plots - [ ] Check example plots to see if metrics correspond with what seen on plots
- [ ] Time step (96 values) to embedding layer - [x] Time step (96 values) to embedding layer
- [x] Mean of nrv per time step plotten (done for probabilistic baseline) - [x] Mean of nrv per time step plotten (done for probabilistic baseline)
- [x] Convert back to MW on plots - [x] Convert back to MW on plots

View File

@@ -8,4 +8,5 @@ statsmodels
lightgbm lightgbm
prettytable prettytable
clearml clearml
properscoring properscoring
nbconvert

View File

@@ -33,6 +33,14 @@ class NrvDataset(Dataset):
dataframe["wind_forecast"].to_numpy(), dtype=torch.float32 dataframe["wind_forecast"].to_numpy(), dtype=torch.float32
) )
self.quarter = torch.tensor(
dataframe["quarter"].to_numpy(), dtype=torch.float32
)
self.day_of_week = torch.tensor(
dataframe["day_of_week"].to_numpy(), dtype=torch.float32
)
self.sequence_length = sequence_length self.sequence_length = sequence_length
self.predict_sequence_length = predict_sequence_length self.predict_sequence_length = predict_sequence_length
@@ -125,6 +133,20 @@ class NrvDataset(Dataset):
] ]
features.append(wind_forecast.view(-1)) features.append(wind_forecast.view(-1))
### Time Features ###
time_feature = 0
if self.data_config.QUARTER:
time_feature += self.quarter[actual_idx].item()
if self.data_config.DAY_OF_WEEK:
d_w = self.day_of_week[actual_idx].item()
if self.data_config.QUARTER:
d_w *= 96
time_feature += d_w
if time_feature is not None:
features.append(torch.tensor([time_feature]))
if not features: if not features:
raise ValueError( raise ValueError(
"No features are configured to be included in the dataset." "No features are configured to be included in the dataset."
@@ -193,6 +215,20 @@ class NrvDataset(Dataset):
] ]
features.append(wind_forecast.view(-1)) features.append(wind_forecast.view(-1))
### Time Features ###
time_feature = 0
if self.data_config.QUARTER:
time_feature += self.quarter[idx]
if self.data_config.DAY_OF_WEEK:
d_w = self.day_of_week[idx].item()
if self.data_config.QUARTER:
d_w *= 96
time_feature += d_w
if time_feature is not None:
features.append(torch.tensor([time_feature]))
target = self.nrv[ target = self.nrv[
idx idx
+ self.sequence_length : idx + self.sequence_length : idx

View File

@@ -31,13 +31,14 @@ class DataConfig:
### TIME ### ### TIME ###
self.YEAR: bool = False self.YEAR: bool = False
self.DAY: bool = False self.DAY_OF_WEEK: bool = False
self.QUARTER: bool = False self.QUARTER: bool = False
class DataProcessor: class DataProcessor:
def __init__(self, data_config: DataConfig): def __init__(self, data_config: DataConfig, path:str="./"):
self.batch_size = 2048 self.batch_size = 2048
self.path = path
self.train_range = ( self.train_range = (
-np.inf, -np.inf,
@@ -62,6 +63,13 @@ class DataProcessor:
self.wind_forecast, on="datetime", how="left" self.wind_forecast, on="datetime", how="left"
) )
self.all_features["quarter"] = (
self.all_features["datetime"].dt.hour * 4
+ self.all_features["datetime"].dt.minute / 15
)
self.all_features["day_of_week"] = self.all_features["datetime"].dt.dayofweek
self.output_size = 96 self.output_size = 96
self.data_config = data_config self.data_config = data_config
@@ -103,7 +111,7 @@ class DataProcessor:
) )
def get_nrv_history(self): def get_nrv_history(self):
df = pd.read_csv(history_data_path, delimiter=";") df = pd.read_csv(self.path + history_data_path, delimiter=";")
df = df[["datetime", "netregulationvolume"]] df = df[["datetime", "netregulationvolume"]]
df = df.rename(columns={"netregulationvolume": "nrv"}) df = df.rename(columns={"netregulationvolume": "nrv"})
df["datetime"] = pd.to_datetime(df["datetime"]) df["datetime"] = pd.to_datetime(df["datetime"])
@@ -114,7 +122,7 @@ class DataProcessor:
return df return df
def get_load_forecast(self): def get_load_forecast(self):
df = pd.read_csv(forecast_data_path, delimiter=";") df = pd.read_csv(self.path + forecast_data_path, delimiter=";")
df = df.rename( df = df.rename(
columns={ columns={
"Day-ahead 6PM forecast": "load_forecast", "Day-ahead 6PM forecast": "load_forecast",
@@ -129,7 +137,7 @@ class DataProcessor:
return df return df
def get_pv_forecast(self): def get_pv_forecast(self):
df = pd.read_csv(pv_forecast_data_path, delimiter=";") df = pd.read_csv(self.path + pv_forecast_data_path, delimiter=";")
df = df.rename( df = df.rename(
columns={"dayahead11hforecast": "pv_forecast", "Datetime": "datetime"} columns={"dayahead11hforecast": "pv_forecast", "Datetime": "datetime"}
@@ -142,7 +150,7 @@ class DataProcessor:
return df return df
def get_wind_forecast(self): def get_wind_forecast(self):
df = pd.read_csv(wind_forecast_data_path, delimiter=";") df = pd.read_csv(self.path + wind_forecast_data_path, delimiter=";")
df = df.rename( df = df.rename(
columns={"dayaheadforecast": "wind_forecast", "datetime": "datetime"} columns={"dayaheadforecast": "wind_forecast", "datetime": "datetime"}
@@ -267,3 +275,14 @@ class DataProcessor:
) )
input, _, _ = next(iter(data_loader)) input, _, _ = next(iter(data_loader))
return input.shape[-1] return input.shape[-1]
def get_time_feature_size(self):
time_feature_size = 1
if self.data_config.QUARTER:
time_feature_size *= 96
if self.data_config.DAY_OF_WEEK:
time_feature_size *= 7
if time_feature_size == 1:
return 0
return time_feature_size

View File

@@ -0,0 +1,28 @@
from torch import nn
import torch
class TimeEmbedding(nn.Module):
def __init__(self, time_features: int, embedding_dim: int):
super().__init__()
self.time_features = time_features
print(time_features)
self.embedding = nn.Embedding(time_features, embedding_dim)
def forward(self, x):
# Extract the last 'time_features' from the input
time_feature = x[:, -1]
# convert to int
time_feature = time_feature.int()
# Embed these time features
# print max value of time_feature
if time_feature.max() > self.time_features:
# print the row from x that includes the max value in the last column
print(x[time_feature == time_feature.max()])
print("time feature max value is greater than time features")
embedded_time = self.embedding(time_feature)
# Concatenate the embedded features with the original input (minus the last 'time feature')
return torch.cat((x[:, :-1], embedded_time), dim=1)
def output_dim(self, input_dim):
return input_dim + self.embedding.embedding_dim - 1

View File

@@ -7,21 +7,22 @@
"outputs": [], "outputs": [],
"source": [ "source": [
"import sys\n", "import sys\n",
"sys.path.append('..')\n", "sys.path.append('../..')\n",
"from data import DataProcessor, DataConfig\n", "from src.data import DataProcessor, DataConfig\n",
"from trainers.quantile_trainer import AutoRegressiveQuantileTrainer, NonAutoRegressiveQuantileRegression\n", "from src.trainers.quantile_trainer import AutoRegressiveQuantileTrainer, NonAutoRegressiveQuantileRegression\n",
"from trainers.probabilistic_baseline import ProbabilisticBaselineTrainer\n", "from src.trainers.probabilistic_baseline import ProbabilisticBaselineTrainer\n",
"from trainers.autoregressive_trainer import AutoRegressiveTrainer\n", "from src.trainers.autoregressive_trainer import AutoRegressiveTrainer\n",
"from trainers.trainer import Trainer\n", "from src.trainers.trainer import Trainer\n",
"from utils.clearml import ClearMLHelper\n", "from src.utils.clearml import ClearMLHelper\n",
"from models import *\n", "from src.models import *\n",
"from losses import *\n", "from src.losses import *\n",
"import torch\n", "import torch\n",
"import numpy as np\n", "import numpy as np\n",
"from torch.nn import MSELoss, L1Loss\n", "from torch.nn import MSELoss, L1Loss\n",
"from datetime import datetime\n", "from datetime import datetime\n",
"import pytz\n",
"import torch.nn as nn\n", "import torch.nn as nn\n",
"from src.models.time_embedding_layer import TimeEmbedding\n",
"\n",
"\n", "\n",
"# auto reload\n", "# auto reload\n",
"%load_ext autoreload\n", "%load_ext autoreload\n",
@@ -33,7 +34,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 2, "execution_count": 9,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@@ -46,7 +47,10 @@
"data_config.WIND_FORECAST = False\n", "data_config.WIND_FORECAST = False\n",
"data_config.WIND_HISTORY = False\n", "data_config.WIND_HISTORY = False\n",
"\n", "\n",
"data_processor = DataProcessor(data_config)\n", "data_config.QUARTER = True\n",
"data_config.DAY_OF_WEEK = False\n",
"\n",
"data_processor = DataProcessor(data_config, path=\"../../\")\n",
"data_processor.set_batch_size(1024)\n", "data_processor.set_batch_size(1024)\n",
"data_processor.set_full_day_skip(False)" "data_processor.set_full_day_skip(False)"
] ]
@@ -60,27 +64,9 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 3, "execution_count": null,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [],
{
"name": "stderr",
"output_type": "stream",
"text": [
"InsecureRequestWarning: Certificate verification is disabled! Adding certificate verification is strongly advised. See: https://urllib3.readthedocs.io/en/latest/advanced-usage.html#ssl-warnings\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"ClearML Task: created new task id=8423d146953041eba8d7b4c27d7ed6a5\n",
"ClearML results page: http://192.168.1.182:8080/projects/2e46d4af6f1e4c399cf9f5aa30bc8795/experiments/8423d146953041eba8d7b4c27d7ed6a5/output/log\n",
"2023-11-23 23:07:35,461 - clearml.Task - INFO - Storing jupyter notebook directly as code\n",
"2023-11-23 23:07:39,250 - clearml - WARNING - JSON serialization of artifact 'dictionary' failed, reverting to pickle\n"
]
}
],
"source": [ "source": [
"data_processor.set_full_day_skip(True)\n", "data_processor.set_full_day_skip(True)\n",
"quantiles = [0.01, 0.05, 0.1, 0.15, 0.4, 0.5, 0.6, 0.85, 0.9, 0.95, 0.99]\n", "quantiles = [0.01, 0.05, 0.1, 0.15, 0.4, 0.5, 0.6, 0.85, 0.9, 0.95, 0.99]\n",
@@ -100,37 +86,9 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 4, "execution_count": null,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [],
{
"name": "stderr",
"output_type": "stream",
"text": [
"InsecureRequestWarning: Certificate verification is disabled! Adding certificate verification is strongly advised. See: https://urllib3.readthedocs.io/en/latest/advanced-usage.html#ssl-warnings\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"ClearML Task: created new task id=11553d672a2744479de07c9ac0a9dbde\n",
"ClearML results page: http://192.168.1.182:8080/projects/2e46d4af6f1e4c399cf9f5aa30bc8795/experiments/11553d672a2744479de07c9ac0a9dbde/output/log\n",
"2023-11-19 18:06:57,539 - clearml.Task - INFO - Storing jupyter notebook directly as code\n",
"2023-11-19 18:06:57,543 - clearml.Repository Detection - WARNING - Can't get url information for git repo in /workspaces/Thesis/src/notebooks\n",
"2023-11-19 18:07:14,402 - clearml.model - WARNING - 500 model found when searching for `file:///workspaces/Thesis/src/notebooks/checkpoint.pt`\n",
"2023-11-19 18:07:14,403 - clearml.model - WARNING - Selected model `Non Autoregressive Quantile Regression` (id=bc0cb0d7fc614e2e8b0edf5b85348646)\n",
"2023-11-19 18:07:14,412 - clearml.frameworks - INFO - Found existing registered model id=bc0cb0d7fc614e2e8b0edf5b85348646 [/workspaces/Thesis/src/notebooks/checkpoint.pt] reusing it.\n",
"2023-11-19 18:07:14,974 - clearml.Task - INFO - Completed model upload to http://192.168.1.182:8081/Thesis/NrvForecast/Non%20Autoregressive%20Model%20%28Non%20Linear%29%20using%20full%20day%20skip%20for%20training%20samples.11553d672a2744479de07c9ac0a9dbde/models/checkpoint.pt\n",
"2023-11-19 18:07:16,827 - clearml.Task - INFO - Completed model upload to http://192.168.1.182:8081/Thesis/NrvForecast/Non%20Autoregressive%20Model%20%28Non%20Linear%29%20using%20full%20day%20skip%20for%20training%20samples.11553d672a2744479de07c9ac0a9dbde/models/checkpoint.pt\n",
"2023-11-19 18:07:18,465 - clearml.Task - INFO - Completed model upload to http://192.168.1.182:8081/Thesis/NrvForecast/Non%20Autoregressive%20Model%20%28Non%20Linear%29%20using%20full%20day%20skip%20for%20training%20samples.11553d672a2744479de07c9ac0a9dbde/models/checkpoint.pt\n",
"2023-11-19 18:07:20,045 - clearml.Task - INFO - Completed model upload to http://192.168.1.182:8081/Thesis/NrvForecast/Non%20Autoregressive%20Model%20%28Non%20Linear%29%20using%20full%20day%20skip%20for%20training%20samples.11553d672a2744479de07c9ac0a9dbde/models/checkpoint.pt\n",
"2023-11-19 18:07:21,843 - clearml.Task - INFO - Completed model upload to http://192.168.1.182:8081/Thesis/NrvForecast/Non%20Autoregressive%20Model%20%28Non%20Linear%29%20using%20full%20day%20skip%20for%20training%20samples.11553d672a2744479de07c9ac0a9dbde/models/checkpoint.pt\n",
"2023-11-19 18:07:28,812 - clearml.Task - INFO - Completed model upload to http://192.168.1.182:8081/Thesis/NrvForecast/Non%20Autoregressive%20Model%20%28Non%20Linear%29%20using%20full%20day%20skip%20for%20training%20samples.11553d672a2744479de07c9ac0a9dbde/models/checkpoint.pt\n",
"Early stopping triggered\n"
]
}
],
"source": [ "source": [
"#### Hyperparameters ####\n", "#### Hyperparameters ####\n",
"inputDim = data_processor.get_input_size()\n", "inputDim = data_processor.get_input_size()\n",
@@ -158,32 +116,9 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 5, "execution_count": null,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [],
{
"ename": "ParserError",
"evalue": "Error tokenizing data. C error: Calling read(nbytes) on source failed. Try engine='python'.",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mParserError\u001b[0m Traceback (most recent call last)",
"\u001b[1;32m/workspaces/Thesis/src/notebooks/training.ipynb Cell 8\u001b[0m line \u001b[0;36m1\n\u001b[1;32m <a href='vscode-notebook-cell://dev-container%2B7b22686f737450617468223a222f686f6d652f766963746f726d796c6c652f53656144726976652f4d79204c69627261726965732f4750552d7365727665722f546865736973222c226c6f63616c446f636b6572223a66616c73652c22636f6e66696746696c65223a7b22246d6964223a312c2270617468223a222f686f6d652f766963746f726d796c6c652f53656144726976652f4d79204c69627261726965732f4750552d7365727665722f5468657369732f2e646576636f6e7461696e65722f646576636f6e7461696e65722e6a736f6e222c22736368656d65223a227673636f64652d66696c65486f7374227d7d@ssh-remote%2Bvictormylle.be/workspaces/Thesis/src/notebooks/training.ipynb#X10sdnNjb2RlLXJlbW90ZQ%3D%3D?line=10'>11</a>\u001b[0m data_config \u001b[39m=\u001b[39m DataConfig()\n\u001b[1;32m <a href='vscode-notebook-cell://dev-container%2B7b22686f737450617468223a222f686f6d652f766963746f726d796c6c652f53656144726976652f4d79204c69627261726965732f4750552d7365727665722f546865736973222c226c6f63616c446f636b6572223a66616c73652c22636f6e66696746696c65223a7b22246d6964223a312c2270617468223a222f686f6d652f766963746f726d796c6c652f53656144726976652f4d79204c69627261726965732f4750552d7365727665722f5468657369732f2e646576636f6e7461696e65722f646576636f6e7461696e65722e6a736f6e222c22736368656d65223a227673636f64652d66696c65486f7374227d7d@ssh-remote%2Bvictormylle.be/workspaces/Thesis/src/notebooks/training.ipynb#X10sdnNjb2RlLXJlbW90ZQ%3D%3D?line=11'>12</a>\u001b[0m data_config\u001b[39m.\u001b[39mLOAD_FORECAST \u001b[39m=\u001b[39m \u001b[39mFalse\u001b[39;00m\n\u001b[0;32m---> <a href='vscode-notebook-cell://dev-container%2B7b22686f737450617468223a222f686f6d652f766963746f726d796c6c652f53656144726976652f4d79204c69627261726965732f4750552d7365727665722f546865736973222c226c6f63616c446f636b6572223a66616c73652c22636f6e66696746696c65223a7b22246d6964223a312c2270617468223a222f686f6d652f766963746f726d796c6c652f53656144726976652f4d79204c69627261726965732f4750552d7365727665722f5468657369732f2e646576636f6e7461696e65722f646576636f6e7461696e65722e6a736f6e222c22736368656d65223a227673636f64652d66696c65486f7374227d7d@ssh-remote%2Bvictormylle.be/workspaces/Thesis/src/notebooks/training.ipynb#X10sdnNjb2RlLXJlbW90ZQ%3D%3D?line=12'>13</a>\u001b[0m data_processor \u001b[39m=\u001b[39m DataProcessor(data_config)\n\u001b[1;32m <a href='vscode-notebook-cell://dev-container%2B7b22686f737450617468223a222f686f6d652f766963746f726d796c6c652f53656144726976652f4d79204c69627261726965732f4750552d7365727665722f546865736973222c226c6f63616c446f636b6572223a66616c73652c22636f6e66696746696c65223a7b22246d6964223a312c2270617468223a222f686f6d652f766963746f726d796c6c652f53656144726976652f4d79204c69627261726965732f4750552d7365727665722f5468657369732f2e646576636f6e7461696e65722f646576636f6e7461696e65722e6a736f6e222c22736368656d65223a227673636f64652d66696c65486f7374227d7d@ssh-remote%2Bvictormylle.be/workspaces/Thesis/src/notebooks/training.ipynb#X10sdnNjb2RlLXJlbW90ZQ%3D%3D?line=13'>14</a>\u001b[0m data_processor\u001b[39m.\u001b[39mset_batch_size(\u001b[39m1024\u001b[39m)\n\u001b[1;32m <a href='vscode-notebook-cell://dev-container%2B7b22686f737450617468223a222f686f6d652f766963746f726d796c6c652f53656144726976652f4d79204c69627261726965732f4750552d7365727665722f546865736973222c226c6f63616c446f636b6572223a66616c73652c22636f6e66696746696c65223a7b22246d6964223a312c2270617468223a222f686f6d652f766963746f726d796c6c652f53656144726976652f4d79204c69627261726965732f4750552d7365727665722f5468657369732f2e646576636f6e7461696e65722f646576636f6e7461696e65722e6a736f6e222c22736368656d65223a227673636f64652d66696c65486f7374227d7d@ssh-remote%2Bvictormylle.be/workspaces/Thesis/src/notebooks/training.ipynb#X10sdnNjb2RlLXJlbW90ZQ%3D%3D?line=16'>17</a>\u001b[0m data_processor\u001b[39m.\u001b[39mset_train_range((datetime(year\u001b[39m=\u001b[39m\u001b[39m2015\u001b[39m, month\u001b[39m=\u001b[39m\u001b[39m1\u001b[39m, day\u001b[39m=\u001b[39m\u001b[39m1\u001b[39m, tzinfo\u001b[39m=\u001b[39mpytz\u001b[39m.\u001b[39mUTC), datetime(year\u001b[39m=\u001b[39m\u001b[39m2022\u001b[39m, month\u001b[39m=\u001b[39m\u001b[39m11\u001b[39m, day\u001b[39m=\u001b[39m\u001b[39m30\u001b[39m, tzinfo\u001b[39m=\u001b[39mpytz\u001b[39m.\u001b[39mUTC)))\n",
"File \u001b[0;32m/workspaces/Thesis/src/notebooks/../data/preprocessing.py:52\u001b[0m, in \u001b[0;36mDataProcessor.__init__\u001b[0;34m(self, data_config)\u001b[0m\n\u001b[1;32m 50\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mhistory_features \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mget_nrv_history()\n\u001b[1;32m 51\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mfuture_features \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mget_load_forecast()\n\u001b[0;32m---> 52\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mpv_forecast \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mget_pv_forecast()\n\u001b[1;32m 53\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mwind_forecast \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mget_wind_forecast()\n\u001b[1;32m 55\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mall_features \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mhistory_features\u001b[39m.\u001b[39mmerge(\n\u001b[1;32m 56\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mfuture_features, on\u001b[39m=\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mdatetime\u001b[39m\u001b[39m\"\u001b[39m, how\u001b[39m=\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mleft\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[1;32m 57\u001b[0m )\n",
"File \u001b[0;32m/workspaces/Thesis/src/notebooks/../data/preprocessing.py:132\u001b[0m, in \u001b[0;36mDataProcessor.get_pv_forecast\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 131\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mget_pv_forecast\u001b[39m(\u001b[39mself\u001b[39m):\n\u001b[0;32m--> 132\u001b[0m df \u001b[39m=\u001b[39m pd\u001b[39m.\u001b[39;49mread_csv(pv_forecast_data_path, delimiter\u001b[39m=\u001b[39;49m\u001b[39m\"\u001b[39;49m\u001b[39m;\u001b[39;49m\u001b[39m\"\u001b[39;49m)\n\u001b[1;32m 134\u001b[0m df \u001b[39m=\u001b[39m df\u001b[39m.\u001b[39mrename(\n\u001b[1;32m 135\u001b[0m columns\u001b[39m=\u001b[39m{\u001b[39m\"\u001b[39m\u001b[39mdayahead11hforecast\u001b[39m\u001b[39m\"\u001b[39m: \u001b[39m\"\u001b[39m\u001b[39mpv_forecast\u001b[39m\u001b[39m\"\u001b[39m, \u001b[39m\"\u001b[39m\u001b[39mDatetime\u001b[39m\u001b[39m\"\u001b[39m: \u001b[39m\"\u001b[39m\u001b[39mdatetime\u001b[39m\u001b[39m\"\u001b[39m}\n\u001b[1;32m 136\u001b[0m )\n\u001b[1;32m 137\u001b[0m df \u001b[39m=\u001b[39m df[[\u001b[39m\"\u001b[39m\u001b[39mdatetime\u001b[39m\u001b[39m\"\u001b[39m, \u001b[39m\"\u001b[39m\u001b[39mpv_forecast\u001b[39m\u001b[39m\"\u001b[39m]]\n",
"File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/pandas/io/parsers/readers.py:912\u001b[0m, in \u001b[0;36mread_csv\u001b[0;34m(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, date_format, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, doublequote, escapechar, comment, encoding, encoding_errors, dialect, on_bad_lines, delim_whitespace, low_memory, memory_map, float_precision, storage_options, dtype_backend)\u001b[0m\n\u001b[1;32m 899\u001b[0m kwds_defaults \u001b[39m=\u001b[39m _refine_defaults_read(\n\u001b[1;32m 900\u001b[0m dialect,\n\u001b[1;32m 901\u001b[0m delimiter,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 908\u001b[0m dtype_backend\u001b[39m=\u001b[39mdtype_backend,\n\u001b[1;32m 909\u001b[0m )\n\u001b[1;32m 910\u001b[0m kwds\u001b[39m.\u001b[39mupdate(kwds_defaults)\n\u001b[0;32m--> 912\u001b[0m \u001b[39mreturn\u001b[39;00m _read(filepath_or_buffer, kwds)\n",
"File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/pandas/io/parsers/readers.py:583\u001b[0m, in \u001b[0;36m_read\u001b[0;34m(filepath_or_buffer, kwds)\u001b[0m\n\u001b[1;32m 580\u001b[0m \u001b[39mreturn\u001b[39;00m parser\n\u001b[1;32m 582\u001b[0m \u001b[39mwith\u001b[39;00m parser:\n\u001b[0;32m--> 583\u001b[0m \u001b[39mreturn\u001b[39;00m parser\u001b[39m.\u001b[39;49mread(nrows)\n",
"File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/pandas/io/parsers/readers.py:1704\u001b[0m, in \u001b[0;36mTextFileReader.read\u001b[0;34m(self, nrows)\u001b[0m\n\u001b[1;32m 1697\u001b[0m nrows \u001b[39m=\u001b[39m validate_integer(\u001b[39m\"\u001b[39m\u001b[39mnrows\u001b[39m\u001b[39m\"\u001b[39m, nrows)\n\u001b[1;32m 1698\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m 1699\u001b[0m \u001b[39m# error: \"ParserBase\" has no attribute \"read\"\u001b[39;00m\n\u001b[1;32m 1700\u001b[0m (\n\u001b[1;32m 1701\u001b[0m index,\n\u001b[1;32m 1702\u001b[0m columns,\n\u001b[1;32m 1703\u001b[0m col_dict,\n\u001b[0;32m-> 1704\u001b[0m ) \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_engine\u001b[39m.\u001b[39;49mread( \u001b[39m# type: ignore[attr-defined]\u001b[39;49;00m\n\u001b[1;32m 1705\u001b[0m nrows\n\u001b[1;32m 1706\u001b[0m )\n\u001b[1;32m 1707\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mException\u001b[39;00m:\n\u001b[1;32m 1708\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mclose()\n",
"File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/pandas/io/parsers/c_parser_wrapper.py:234\u001b[0m, in \u001b[0;36mCParserWrapper.read\u001b[0;34m(self, nrows)\u001b[0m\n\u001b[1;32m 232\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m 233\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mlow_memory:\n\u001b[0;32m--> 234\u001b[0m chunks \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_reader\u001b[39m.\u001b[39;49mread_low_memory(nrows)\n\u001b[1;32m 235\u001b[0m \u001b[39m# destructive to chunks\u001b[39;00m\n\u001b[1;32m 236\u001b[0m data \u001b[39m=\u001b[39m _concatenate_chunks(chunks)\n",
"File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/pandas/_libs/parsers.pyx:814\u001b[0m, in \u001b[0;36mpandas._libs.parsers.TextReader.read_low_memory\u001b[0;34m()\u001b[0m\n",
"File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/pandas/_libs/parsers.pyx:875\u001b[0m, in \u001b[0;36mpandas._libs.parsers.TextReader._read_rows\u001b[0;34m()\u001b[0m\n",
"File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/pandas/_libs/parsers.pyx:850\u001b[0m, in \u001b[0;36mpandas._libs.parsers.TextReader._tokenize_rows\u001b[0;34m()\u001b[0m\n",
"File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/pandas/_libs/parsers.pyx:861\u001b[0m, in \u001b[0;36mpandas._libs.parsers.TextReader._check_tokenize_status\u001b[0;34m()\u001b[0m\n",
"File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/pandas/_libs/parsers.pyx:2029\u001b[0m, in \u001b[0;36mpandas._libs.parsers.raise_parser_error\u001b[0;34m()\u001b[0m\n",
"\u001b[0;31mParserError\u001b[0m: Error tokenizing data. C error: Calling read(nbytes) on source failed. Try engine='python'."
]
}
],
"source": [ "source": [
"#### Hyperparameters ####\n", "#### Hyperparameters ####\n",
"inputDim = 96\n", "inputDim = 96\n",
@@ -219,23 +154,30 @@
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {}, "metadata": {},
"source": [ "source": [
"# Quantile Regression" "# Autoregressive Quantile Regression"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 12, "execution_count": 10,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"96\n"
]
},
{ {
"name": "stderr", "name": "stderr",
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
"/workspaces/Thesis/src/notebooks/../trainers/quantile_trainer.py:70: UserWarning:\n", "/workspaces/Thesis/src/notebooks/../../src/trainers/quantile_trainer.py:68: UserWarning:\n",
"\n", "\n",
"To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n", "To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
"\n", "\n",
"/workspaces/Thesis/src/notebooks/../losses/pinball_loss.py:8: UserWarning:\n", "/workspaces/Thesis/src/notebooks/../../src/losses/pinball_loss.py:8: UserWarning:\n",
"\n", "\n",
"To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n", "To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
"\n" "\n"
@@ -245,8 +187,8 @@
"name": "stdout", "name": "stdout",
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
"ClearML Task: created new task id=215dd7634cf2475693ea6081e2ab7559\n", "ClearML Task: created new task id=cbf4a5162c604d6ea8f14e71e2d27410\n",
"ClearML results page: http://192.168.1.182:8080/projects/2e46d4af6f1e4c399cf9f5aa30bc8795/experiments/215dd7634cf2475693ea6081e2ab7559/output/log\n", "ClearML results page: http://192.168.1.182:8080/projects/2e46d4af6f1e4c399cf9f5aa30bc8795/experiments/cbf4a5162c604d6ea8f14e71e2d27410/output/log\n",
"Early stopping triggered\n" "Early stopping triggered\n"
] ]
} }
@@ -264,7 +206,9 @@
").to(\"cuda\")\n", ").to(\"cuda\")\n",
"\n", "\n",
"# model = LinearRegression(inputDim, len(quantiles))\n", "# model = LinearRegression(inputDim, len(quantiles))\n",
"model = NonLinearRegression(inputDim, len(quantiles), hiddenSize=1024, numLayers=5)\n", "time_embedding = TimeEmbedding(data_processor.get_time_feature_size(), 4)\n",
"non_linear_regression_model = NonLinearRegression(time_embedding.output_dim(inputDim), len(quantiles), hiddenSize=1024, numLayers=5)\n",
"model = nn.Sequential(time_embedding, non_linear_regression_model)\n",
"optimizer = torch.optim.Adam(model.parameters(), lr=learningRate)\n", "optimizer = torch.optim.Adam(model.parameters(), lr=learningRate)\n",
"\n", "\n",
"#### Trainer ####\n", "#### Trainer ####\n",
@@ -294,29 +238,35 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 13, "execution_count": 3,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
"name": "stderr", "name": "stderr",
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
"/workspaces/Thesis/src/notebooks/../trainers/quantile_trainer.py:335: UserWarning:\n", "/workspaces/Thesis/src/notebooks/../../src/trainers/quantile_trainer.py:335: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
"\n", " quantiles_tensor = torch.tensor(quantiles)\n",
"To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n", "/workspaces/Thesis/src/notebooks/../../src/losses/pinball_loss.py:22: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
"\n", " self.quantiles_tensor = torch.tensor(quantiles, dtype=torch.float32)\n",
"/workspaces/Thesis/src/notebooks/../losses/pinball_loss.py:23: UserWarning:\n", "InsecureRequestWarning: Certificate verification is disabled! Adding certificate verification is strongly advised. See: https://urllib3.readthedocs.io/en/latest/advanced-usage.html#ssl-warnings\n"
"\n",
"To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
"\n"
] ]
}, },
{ {
"name": "stdout", "name": "stdout",
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
"ClearML Task: created new task id=160b4938ae3145db9ef8b55e71452987\n", "ClearML Task: created new task id=0c748cf6ec0f4c748cc35be78ae4c6c1\n",
"ClearML results page: http://192.168.1.182:8080/projects/2e46d4af6f1e4c399cf9f5aa30bc8795/experiments/160b4938ae3145db9ef8b55e71452987/output/log\n", "ClearML results page: http://192.168.1.182:8080/projects/2e46d4af6f1e4c399cf9f5aa30bc8795/experiments/0c748cf6ec0f4c748cc35be78ae4c6c1/output/log\n",
"2023-11-26 16:15:07,490 - clearml.Task - INFO - Storing jupyter notebook directly as code\n",
"2023-11-26 16:15:09,255 - clearml.model - WARNING - 500 model found when searching for `file:///workspaces/Thesis/src/notebooks/checkpoint.pt`\n",
"2023-11-26 16:15:09,256 - clearml.model - WARNING - Selected model `Autoregressive Quantile Regression (quarter + day of week)` (id=bc0cb0d7fc614e2e8b0edf5b85348646)\n",
"2023-11-26 16:15:09,265 - clearml.frameworks - INFO - Found existing registered model id=bc0cb0d7fc614e2e8b0edf5b85348646 [/workspaces/Thesis/src/notebooks/checkpoint.pt] reusing it.\n",
"2023-11-26 16:15:09,958 - clearml.Task - INFO - Completed model upload to http://192.168.1.182:8081/Thesis/NrvForecast/Untitled%20Task.0c748cf6ec0f4c748cc35be78ae4c6c1/models/checkpoint.pt\n",
"2023-11-26 16:15:10,998 - clearml.Task - INFO - Completed model upload to http://192.168.1.182:8081/Thesis/NrvForecast/Untitled%20Task.0c748cf6ec0f4c748cc35be78ae4c6c1/models/checkpoint.pt\n",
"2023-11-26 16:15:12,118 - clearml.Task - INFO - Completed model upload to http://192.168.1.182:8081/Thesis/NrvForecast/Untitled%20Task.0c748cf6ec0f4c748cc35be78ae4c6c1/models/checkpoint.pt\n",
"2023-11-26 16:15:13,152 - clearml.Task - INFO - Completed model upload to http://192.168.1.182:8081/Thesis/NrvForecast/Untitled%20Task.0c748cf6ec0f4c748cc35be78ae4c6c1/models/checkpoint.pt\n",
"2023-11-26 16:15:14,540 - clearml.Task - INFO - Completed model upload to http://192.168.1.182:8081/Thesis/NrvForecast/Untitled%20Task.0c748cf6ec0f4c748cc35be78ae4c6c1/models/checkpoint.pt\n",
"Early stopping triggered\n" "Early stopping triggered\n"
] ]
}, },
@@ -324,7 +274,7 @@
"name": "stderr", "name": "stderr",
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
"/workspaces/Thesis/src/notebooks/../trainers/quantile_trainer.py:366: UserWarning:\n", "/workspaces/Thesis/src/notebooks/../../src/trainers/quantile_trainer.py:366: UserWarning:\n",
"\n", "\n",
"Creating a tensor from a list of numpy.ndarrays is extremely slow. Please consider converting the list to a single numpy.ndarray with numpy.array() before converting to a tensor. (Triggered internally at /opt/conda/conda-bld/pytorch_1682343967769/work/torch/csrc/utils/tensor_new.cpp:245.)\n", "Creating a tensor from a list of numpy.ndarrays is extremely slow. Please consider converting the list to a single numpy.ndarray with numpy.array() before converting to a tensor. (Triggered internally at /opt/conda/conda-bld/pytorch_1682343967769/work/torch/csrc/utils/tensor_new.cpp:245.)\n",
"\n" "\n"
@@ -368,18 +318,9 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 18, "execution_count": null,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [],
{
"name": "stdout",
"output_type": "stream",
"text": [
"torch.Size([3, 192])\n",
"torch.Size([3, 96])\n"
]
}
],
"source": [ "source": [
"\n", "\n",
"\n", "\n",

View File

@@ -218,7 +218,7 @@ class AutoRegressiveQuantileTrainer(AutoRegressiveTrainer):
if other_features is not None: if other_features is not None:
prev_features = torch.cat( prev_features = torch.cat(
new_features, other_features, dim=1 (new_features.to(self.device), other_features.to(self.device)), dim=1
) # (batch_size, 96 + new_features) ) # (batch_size, 96 + new_features)
else: else:
prev_features = new_features prev_features = new_features
@@ -252,36 +252,39 @@ class AutoRegressiveQuantileTrainer(AutoRegressiveTrainer):
def plot_quantile_percentages( def plot_quantile_percentages(
self, task, data_loader, train: bool = True, iteration: int = None self, task, data_loader, train: bool = True, iteration: int = None
): ):
quantiles = self.quantiles.cpu().numpy()
total = 0 total = 0
quantile_counter = {q: 0 for q in self.quantiles.cpu().numpy()} quantile_counter = {q: 0 for q in quantiles}
self.model.eval()
with torch.no_grad(): with torch.no_grad():
for inputs, targets, _ in data_loader: for inputs, targets, _ in data_loader:
inputs = inputs.to("cuda") inputs = inputs.to(self.device)
output = self.model(inputs) output = self.model(inputs).cpu().numpy()
targets = targets.squeeze(-1).cpu().numpy()
# output shape: (batch_size, num_quantiles) # output shape: (batch_size, num_quantiles)
# target shape: (batch_size, 1) # target shape: (batch_size, 1)
for i, q in enumerate(self.quantiles.cpu().numpy()): for i, q in enumerate(quantiles):
quantile_counter[q] += np.sum( quantile_counter[q] += np.sum(
targets.squeeze(-1).cpu().numpy() < output[:, i].cpu().numpy() targets < output[:, i]
) )
total += len(targets) total += len(targets)
# to numpy array of length len(quantiles) # to numpy array of length len(quantiles)
percentages = np.array( percentages = np.array(
[quantile_counter[q] / total for q in self.quantiles.cpu().numpy()] [quantile_counter[q] / total for q in quantiles]
) )
bar_width = 0.35 bar_width = 0.35
index = np.arange(len(self.quantiles.cpu().numpy())) index = np.arange(len(quantiles))
# Plotting the bars # Plotting the bars
fig, ax = plt.subplots(figsize=(15, 10)) fig, ax = plt.subplots(figsize=(15, 10))
bar1 = ax.bar( bar1 = ax.bar(
index, self.quantiles.cpu().numpy(), bar_width, label="Ideal", color="brown" index, quantiles, bar_width, label="Ideal", color="brown"
) )
bar2 = ax.bar( bar2 = ax.bar(
index + bar_width, percentages, bar_width, label="NN model", color="blue" index + bar_width, percentages, bar_width, label="NN model", color="blue"
@@ -305,7 +308,7 @@ class AutoRegressiveQuantileTrainer(AutoRegressiveTrainer):
ax.set_ylabel("Fraction of data under quantile forecast") ax.set_ylabel("Fraction of data under quantile forecast")
ax.set_title(f"Quantile Performance Comparison ({series_name})") ax.set_title(f"Quantile Performance Comparison ({series_name})")
ax.set_xticks(index + bar_width / 2) ax.set_xticks(index + bar_width / 2)
ax.set_xticklabels(self.quantiles.cpu().numpy()) ax.set_xticklabels(quantiles)
ax.legend() ax.legend()
task.get_logger().report_matplotlib_figure( task.get_logger().report_matplotlib_figure(

View File

@@ -41,7 +41,7 @@ class Trainer:
self.patience = patience self.patience = patience
self.delta = delta self.delta = delta
def add_metrics_to_track(self, loss: torch.nn.Module | list[torch.nn.Module]): def add_metrics_to_track(self, loss):
if isinstance(loss, list): if isinstance(loss, list):
self.metrics_to_track.extend(loss) self.metrics_to_track.extend(loss)
else: else:
@@ -59,7 +59,8 @@ class Trainer:
if self.debug: if self.debug:
task.add_tags("Debug") task.add_tags("Debug")
change_description = input("Enter a change description: ") # change_description = input("Enter a change description: ")
change_description = ""
if change_description: if change_description:
task.set_comment(change_description) task.set_comment(change_description)

View File

@@ -5,6 +5,7 @@ class ClearMLHelper:
self.project_name = project_name self.project_name = project_name
def get_task(self, task_name: str = "Model Training"): def get_task(self, task_name: str = "Model Training"):
Task.add_requirements("requirements.txt")
Task.ignore_requirements("torch") Task.ignore_requirements("torch")
Task.ignore_requirements("torchvision") Task.ignore_requirements("torchvision")
Task.ignore_requirements("tensorboard") Task.ignore_requirements("tensorboard")