Added time as input feature

This commit is contained in:
Victor Mylle
2023-11-26 18:43:03 +00:00
parent a2c9427d16
commit 2f40f41786
9 changed files with 168 additions and 138 deletions

View File

@@ -18,14 +18,14 @@
- [x] Fix debug plots for quantile regression -> predict quantiles and look if true value is below a quantile, if so 1 else 0 and average these over all samples
- [ ] Full day debug plots for quantile regression
- [x] CPRS Metrics
- [ ] Time as input parameter:
- [x] Time as input parameter:
- [x] Probabilistic Baseline -> Quantiles on Training Data -> Breedte bekijken -> Gebruiken voor CPRS en plotjes
- [ ] Day-ahead implicit net position
- [x] Faster sampling for quantile regression
- [ ] Quantile plots for other model (Linear, GRU) (Check if better)
- [ ] Check example plots to see if metrics correspond with what seen on plots
- [ ] Time step (96 values) to embedding layer
- [x] Time step (96 values) to embedding layer
- [x] Mean of nrv per time step plotten (done for probabilistic baseline)
- [x] Convert back to MW on plots

View File

@@ -8,4 +8,5 @@ statsmodels
lightgbm
prettytable
clearml
properscoring
properscoring
nbconvert

View File

@@ -33,6 +33,14 @@ class NrvDataset(Dataset):
dataframe["wind_forecast"].to_numpy(), dtype=torch.float32
)
self.quarter = torch.tensor(
dataframe["quarter"].to_numpy(), dtype=torch.float32
)
self.day_of_week = torch.tensor(
dataframe["day_of_week"].to_numpy(), dtype=torch.float32
)
self.sequence_length = sequence_length
self.predict_sequence_length = predict_sequence_length
@@ -125,6 +133,20 @@ class NrvDataset(Dataset):
]
features.append(wind_forecast.view(-1))
### Time Features ###
time_feature = 0
if self.data_config.QUARTER:
time_feature += self.quarter[actual_idx].item()
if self.data_config.DAY_OF_WEEK:
d_w = self.day_of_week[actual_idx].item()
if self.data_config.QUARTER:
d_w *= 96
time_feature += d_w
if time_feature is not None:
features.append(torch.tensor([time_feature]))
if not features:
raise ValueError(
"No features are configured to be included in the dataset."
@@ -193,6 +215,20 @@ class NrvDataset(Dataset):
]
features.append(wind_forecast.view(-1))
### Time Features ###
time_feature = 0
if self.data_config.QUARTER:
time_feature += self.quarter[idx]
if self.data_config.DAY_OF_WEEK:
d_w = self.day_of_week[idx].item()
if self.data_config.QUARTER:
d_w *= 96
time_feature += d_w
if time_feature is not None:
features.append(torch.tensor([time_feature]))
target = self.nrv[
idx
+ self.sequence_length : idx

View File

@@ -31,13 +31,14 @@ class DataConfig:
### TIME ###
self.YEAR: bool = False
self.DAY: bool = False
self.DAY_OF_WEEK: bool = False
self.QUARTER: bool = False
class DataProcessor:
def __init__(self, data_config: DataConfig):
def __init__(self, data_config: DataConfig, path:str="./"):
self.batch_size = 2048
self.path = path
self.train_range = (
-np.inf,
@@ -62,6 +63,13 @@ class DataProcessor:
self.wind_forecast, on="datetime", how="left"
)
self.all_features["quarter"] = (
self.all_features["datetime"].dt.hour * 4
+ self.all_features["datetime"].dt.minute / 15
)
self.all_features["day_of_week"] = self.all_features["datetime"].dt.dayofweek
self.output_size = 96
self.data_config = data_config
@@ -103,7 +111,7 @@ class DataProcessor:
)
def get_nrv_history(self):
df = pd.read_csv(history_data_path, delimiter=";")
df = pd.read_csv(self.path + history_data_path, delimiter=";")
df = df[["datetime", "netregulationvolume"]]
df = df.rename(columns={"netregulationvolume": "nrv"})
df["datetime"] = pd.to_datetime(df["datetime"])
@@ -114,7 +122,7 @@ class DataProcessor:
return df
def get_load_forecast(self):
df = pd.read_csv(forecast_data_path, delimiter=";")
df = pd.read_csv(self.path + forecast_data_path, delimiter=";")
df = df.rename(
columns={
"Day-ahead 6PM forecast": "load_forecast",
@@ -129,7 +137,7 @@ class DataProcessor:
return df
def get_pv_forecast(self):
df = pd.read_csv(pv_forecast_data_path, delimiter=";")
df = pd.read_csv(self.path + pv_forecast_data_path, delimiter=";")
df = df.rename(
columns={"dayahead11hforecast": "pv_forecast", "Datetime": "datetime"}
@@ -142,7 +150,7 @@ class DataProcessor:
return df
def get_wind_forecast(self):
df = pd.read_csv(wind_forecast_data_path, delimiter=";")
df = pd.read_csv(self.path + wind_forecast_data_path, delimiter=";")
df = df.rename(
columns={"dayaheadforecast": "wind_forecast", "datetime": "datetime"}
@@ -267,3 +275,14 @@ class DataProcessor:
)
input, _, _ = next(iter(data_loader))
return input.shape[-1]
def get_time_feature_size(self):
time_feature_size = 1
if self.data_config.QUARTER:
time_feature_size *= 96
if self.data_config.DAY_OF_WEEK:
time_feature_size *= 7
if time_feature_size == 1:
return 0
return time_feature_size

View File

@@ -0,0 +1,28 @@
from torch import nn
import torch
class TimeEmbedding(nn.Module):
def __init__(self, time_features: int, embedding_dim: int):
super().__init__()
self.time_features = time_features
print(time_features)
self.embedding = nn.Embedding(time_features, embedding_dim)
def forward(self, x):
# Extract the last 'time_features' from the input
time_feature = x[:, -1]
# convert to int
time_feature = time_feature.int()
# Embed these time features
# print max value of time_feature
if time_feature.max() > self.time_features:
# print the row from x that includes the max value in the last column
print(x[time_feature == time_feature.max()])
print("time feature max value is greater than time features")
embedded_time = self.embedding(time_feature)
# Concatenate the embedded features with the original input (minus the last 'time feature')
return torch.cat((x[:, :-1], embedded_time), dim=1)
def output_dim(self, input_dim):
return input_dim + self.embedding.embedding_dim - 1

View File

@@ -7,21 +7,22 @@
"outputs": [],
"source": [
"import sys\n",
"sys.path.append('..')\n",
"from data import DataProcessor, DataConfig\n",
"from trainers.quantile_trainer import AutoRegressiveQuantileTrainer, NonAutoRegressiveQuantileRegression\n",
"from trainers.probabilistic_baseline import ProbabilisticBaselineTrainer\n",
"from trainers.autoregressive_trainer import AutoRegressiveTrainer\n",
"from trainers.trainer import Trainer\n",
"from utils.clearml import ClearMLHelper\n",
"from models import *\n",
"from losses import *\n",
"sys.path.append('../..')\n",
"from src.data import DataProcessor, DataConfig\n",
"from src.trainers.quantile_trainer import AutoRegressiveQuantileTrainer, NonAutoRegressiveQuantileRegression\n",
"from src.trainers.probabilistic_baseline import ProbabilisticBaselineTrainer\n",
"from src.trainers.autoregressive_trainer import AutoRegressiveTrainer\n",
"from src.trainers.trainer import Trainer\n",
"from src.utils.clearml import ClearMLHelper\n",
"from src.models import *\n",
"from src.losses import *\n",
"import torch\n",
"import numpy as np\n",
"from torch.nn import MSELoss, L1Loss\n",
"from datetime import datetime\n",
"import pytz\n",
"import torch.nn as nn\n",
"from src.models.time_embedding_layer import TimeEmbedding\n",
"\n",
"\n",
"# auto reload\n",
"%load_ext autoreload\n",
@@ -33,7 +34,7 @@
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
@@ -46,7 +47,10 @@
"data_config.WIND_FORECAST = False\n",
"data_config.WIND_HISTORY = False\n",
"\n",
"data_processor = DataProcessor(data_config)\n",
"data_config.QUARTER = True\n",
"data_config.DAY_OF_WEEK = False\n",
"\n",
"data_processor = DataProcessor(data_config, path=\"../../\")\n",
"data_processor.set_batch_size(1024)\n",
"data_processor.set_full_day_skip(False)"
]
@@ -60,27 +64,9 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"InsecureRequestWarning: Certificate verification is disabled! Adding certificate verification is strongly advised. See: https://urllib3.readthedocs.io/en/latest/advanced-usage.html#ssl-warnings\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"ClearML Task: created new task id=8423d146953041eba8d7b4c27d7ed6a5\n",
"ClearML results page: http://192.168.1.182:8080/projects/2e46d4af6f1e4c399cf9f5aa30bc8795/experiments/8423d146953041eba8d7b4c27d7ed6a5/output/log\n",
"2023-11-23 23:07:35,461 - clearml.Task - INFO - Storing jupyter notebook directly as code\n",
"2023-11-23 23:07:39,250 - clearml - WARNING - JSON serialization of artifact 'dictionary' failed, reverting to pickle\n"
]
}
],
"outputs": [],
"source": [
"data_processor.set_full_day_skip(True)\n",
"quantiles = [0.01, 0.05, 0.1, 0.15, 0.4, 0.5, 0.6, 0.85, 0.9, 0.95, 0.99]\n",
@@ -100,37 +86,9 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"InsecureRequestWarning: Certificate verification is disabled! Adding certificate verification is strongly advised. See: https://urllib3.readthedocs.io/en/latest/advanced-usage.html#ssl-warnings\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"ClearML Task: created new task id=11553d672a2744479de07c9ac0a9dbde\n",
"ClearML results page: http://192.168.1.182:8080/projects/2e46d4af6f1e4c399cf9f5aa30bc8795/experiments/11553d672a2744479de07c9ac0a9dbde/output/log\n",
"2023-11-19 18:06:57,539 - clearml.Task - INFO - Storing jupyter notebook directly as code\n",
"2023-11-19 18:06:57,543 - clearml.Repository Detection - WARNING - Can't get url information for git repo in /workspaces/Thesis/src/notebooks\n",
"2023-11-19 18:07:14,402 - clearml.model - WARNING - 500 model found when searching for `file:///workspaces/Thesis/src/notebooks/checkpoint.pt`\n",
"2023-11-19 18:07:14,403 - clearml.model - WARNING - Selected model `Non Autoregressive Quantile Regression` (id=bc0cb0d7fc614e2e8b0edf5b85348646)\n",
"2023-11-19 18:07:14,412 - clearml.frameworks - INFO - Found existing registered model id=bc0cb0d7fc614e2e8b0edf5b85348646 [/workspaces/Thesis/src/notebooks/checkpoint.pt] reusing it.\n",
"2023-11-19 18:07:14,974 - clearml.Task - INFO - Completed model upload to http://192.168.1.182:8081/Thesis/NrvForecast/Non%20Autoregressive%20Model%20%28Non%20Linear%29%20using%20full%20day%20skip%20for%20training%20samples.11553d672a2744479de07c9ac0a9dbde/models/checkpoint.pt\n",
"2023-11-19 18:07:16,827 - clearml.Task - INFO - Completed model upload to http://192.168.1.182:8081/Thesis/NrvForecast/Non%20Autoregressive%20Model%20%28Non%20Linear%29%20using%20full%20day%20skip%20for%20training%20samples.11553d672a2744479de07c9ac0a9dbde/models/checkpoint.pt\n",
"2023-11-19 18:07:18,465 - clearml.Task - INFO - Completed model upload to http://192.168.1.182:8081/Thesis/NrvForecast/Non%20Autoregressive%20Model%20%28Non%20Linear%29%20using%20full%20day%20skip%20for%20training%20samples.11553d672a2744479de07c9ac0a9dbde/models/checkpoint.pt\n",
"2023-11-19 18:07:20,045 - clearml.Task - INFO - Completed model upload to http://192.168.1.182:8081/Thesis/NrvForecast/Non%20Autoregressive%20Model%20%28Non%20Linear%29%20using%20full%20day%20skip%20for%20training%20samples.11553d672a2744479de07c9ac0a9dbde/models/checkpoint.pt\n",
"2023-11-19 18:07:21,843 - clearml.Task - INFO - Completed model upload to http://192.168.1.182:8081/Thesis/NrvForecast/Non%20Autoregressive%20Model%20%28Non%20Linear%29%20using%20full%20day%20skip%20for%20training%20samples.11553d672a2744479de07c9ac0a9dbde/models/checkpoint.pt\n",
"2023-11-19 18:07:28,812 - clearml.Task - INFO - Completed model upload to http://192.168.1.182:8081/Thesis/NrvForecast/Non%20Autoregressive%20Model%20%28Non%20Linear%29%20using%20full%20day%20skip%20for%20training%20samples.11553d672a2744479de07c9ac0a9dbde/models/checkpoint.pt\n",
"Early stopping triggered\n"
]
}
],
"outputs": [],
"source": [
"#### Hyperparameters ####\n",
"inputDim = data_processor.get_input_size()\n",
@@ -158,32 +116,9 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": null,
"metadata": {},
"outputs": [
{
"ename": "ParserError",
"evalue": "Error tokenizing data. C error: Calling read(nbytes) on source failed. Try engine='python'.",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mParserError\u001b[0m Traceback (most recent call last)",
"\u001b[1;32m/workspaces/Thesis/src/notebooks/training.ipynb Cell 8\u001b[0m line \u001b[0;36m1\n\u001b[1;32m <a href='vscode-notebook-cell://dev-container%2B7b22686f737450617468223a222f686f6d652f766963746f726d796c6c652f53656144726976652f4d79204c69627261726965732f4750552d7365727665722f546865736973222c226c6f63616c446f636b6572223a66616c73652c22636f6e66696746696c65223a7b22246d6964223a312c2270617468223a222f686f6d652f766963746f726d796c6c652f53656144726976652f4d79204c69627261726965732f4750552d7365727665722f5468657369732f2e646576636f6e7461696e65722f646576636f6e7461696e65722e6a736f6e222c22736368656d65223a227673636f64652d66696c65486f7374227d7d@ssh-remote%2Bvictormylle.be/workspaces/Thesis/src/notebooks/training.ipynb#X10sdnNjb2RlLXJlbW90ZQ%3D%3D?line=10'>11</a>\u001b[0m data_config \u001b[39m=\u001b[39m DataConfig()\n\u001b[1;32m <a href='vscode-notebook-cell://dev-container%2B7b22686f737450617468223a222f686f6d652f766963746f726d796c6c652f53656144726976652f4d79204c69627261726965732f4750552d7365727665722f546865736973222c226c6f63616c446f636b6572223a66616c73652c22636f6e66696746696c65223a7b22246d6964223a312c2270617468223a222f686f6d652f766963746f726d796c6c652f53656144726976652f4d79204c69627261726965732f4750552d7365727665722f5468657369732f2e646576636f6e7461696e65722f646576636f6e7461696e65722e6a736f6e222c22736368656d65223a227673636f64652d66696c65486f7374227d7d@ssh-remote%2Bvictormylle.be/workspaces/Thesis/src/notebooks/training.ipynb#X10sdnNjb2RlLXJlbW90ZQ%3D%3D?line=11'>12</a>\u001b[0m data_config\u001b[39m.\u001b[39mLOAD_FORECAST \u001b[39m=\u001b[39m \u001b[39mFalse\u001b[39;00m\n\u001b[0;32m---> <a href='vscode-notebook-cell://dev-container%2B7b22686f737450617468223a222f686f6d652f766963746f726d796c6c652f53656144726976652f4d79204c69627261726965732f4750552d7365727665722f546865736973222c226c6f63616c446f636b6572223a66616c73652c22636f6e66696746696c65223a7b22246d6964223a312c2270617468223a222f686f6d652f766963746f726d796c6c652f53656144726976652f4d79204c69627261726965732f4750552d7365727665722f5468657369732f2e646576636f6e7461696e65722f646576636f6e7461696e65722e6a736f6e222c22736368656d65223a227673636f64652d66696c65486f7374227d7d@ssh-remote%2Bvictormylle.be/workspaces/Thesis/src/notebooks/training.ipynb#X10sdnNjb2RlLXJlbW90ZQ%3D%3D?line=12'>13</a>\u001b[0m data_processor \u001b[39m=\u001b[39m DataProcessor(data_config)\n\u001b[1;32m <a href='vscode-notebook-cell://dev-container%2B7b22686f737450617468223a222f686f6d652f766963746f726d796c6c652f53656144726976652f4d79204c69627261726965732f4750552d7365727665722f546865736973222c226c6f63616c446f636b6572223a66616c73652c22636f6e66696746696c65223a7b22246d6964223a312c2270617468223a222f686f6d652f766963746f726d796c6c652f53656144726976652f4d79204c69627261726965732f4750552d7365727665722f5468657369732f2e646576636f6e7461696e65722f646576636f6e7461696e65722e6a736f6e222c22736368656d65223a227673636f64652d66696c65486f7374227d7d@ssh-remote%2Bvictormylle.be/workspaces/Thesis/src/notebooks/training.ipynb#X10sdnNjb2RlLXJlbW90ZQ%3D%3D?line=13'>14</a>\u001b[0m data_processor\u001b[39m.\u001b[39mset_batch_size(\u001b[39m1024\u001b[39m)\n\u001b[1;32m <a href='vscode-notebook-cell://dev-container%2B7b22686f737450617468223a222f686f6d652f766963746f726d796c6c652f53656144726976652f4d79204c69627261726965732f4750552d7365727665722f546865736973222c226c6f63616c446f636b6572223a66616c73652c22636f6e66696746696c65223a7b22246d6964223a312c2270617468223a222f686f6d652f766963746f726d796c6c652f53656144726976652f4d79204c69627261726965732f4750552d7365727665722f5468657369732f2e646576636f6e7461696e65722f646576636f6e7461696e65722e6a736f6e222c22736368656d65223a227673636f64652d66696c65486f7374227d7d@ssh-remote%2Bvictormylle.be/workspaces/Thesis/src/notebooks/training.ipynb#X10sdnNjb2RlLXJlbW90ZQ%3D%3D?line=16'>17</a>\u001b[0m data_processor\u001b[39m.\u001b[39mset_train_range((datetime(year\u001b[39m=\u001b[39m\u001b[39m2015\u001b[39m, month\u001b[39m=\u001b[39m\u001b[39m1\u001b[39m, day\u001b[39m=\u001b[39m\u001b[39m1\u001b[39m, tzinfo\u001b[39m=\u001b[39mpytz\u001b[39m.\u001b[39mUTC), datetime(year\u001b[39m=\u001b[39m\u001b[39m2022\u001b[39m, month\u001b[39m=\u001b[39m\u001b[39m11\u001b[39m, day\u001b[39m=\u001b[39m\u001b[39m30\u001b[39m, tzinfo\u001b[39m=\u001b[39mpytz\u001b[39m.\u001b[39mUTC)))\n",
"File \u001b[0;32m/workspaces/Thesis/src/notebooks/../data/preprocessing.py:52\u001b[0m, in \u001b[0;36mDataProcessor.__init__\u001b[0;34m(self, data_config)\u001b[0m\n\u001b[1;32m 50\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mhistory_features \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mget_nrv_history()\n\u001b[1;32m 51\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mfuture_features \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mget_load_forecast()\n\u001b[0;32m---> 52\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mpv_forecast \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mget_pv_forecast()\n\u001b[1;32m 53\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mwind_forecast \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mget_wind_forecast()\n\u001b[1;32m 55\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mall_features \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mhistory_features\u001b[39m.\u001b[39mmerge(\n\u001b[1;32m 56\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mfuture_features, on\u001b[39m=\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mdatetime\u001b[39m\u001b[39m\"\u001b[39m, how\u001b[39m=\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mleft\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[1;32m 57\u001b[0m )\n",
"File \u001b[0;32m/workspaces/Thesis/src/notebooks/../data/preprocessing.py:132\u001b[0m, in \u001b[0;36mDataProcessor.get_pv_forecast\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 131\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mget_pv_forecast\u001b[39m(\u001b[39mself\u001b[39m):\n\u001b[0;32m--> 132\u001b[0m df \u001b[39m=\u001b[39m pd\u001b[39m.\u001b[39;49mread_csv(pv_forecast_data_path, delimiter\u001b[39m=\u001b[39;49m\u001b[39m\"\u001b[39;49m\u001b[39m;\u001b[39;49m\u001b[39m\"\u001b[39;49m)\n\u001b[1;32m 134\u001b[0m df \u001b[39m=\u001b[39m df\u001b[39m.\u001b[39mrename(\n\u001b[1;32m 135\u001b[0m columns\u001b[39m=\u001b[39m{\u001b[39m\"\u001b[39m\u001b[39mdayahead11hforecast\u001b[39m\u001b[39m\"\u001b[39m: \u001b[39m\"\u001b[39m\u001b[39mpv_forecast\u001b[39m\u001b[39m\"\u001b[39m, \u001b[39m\"\u001b[39m\u001b[39mDatetime\u001b[39m\u001b[39m\"\u001b[39m: \u001b[39m\"\u001b[39m\u001b[39mdatetime\u001b[39m\u001b[39m\"\u001b[39m}\n\u001b[1;32m 136\u001b[0m )\n\u001b[1;32m 137\u001b[0m df \u001b[39m=\u001b[39m df[[\u001b[39m\"\u001b[39m\u001b[39mdatetime\u001b[39m\u001b[39m\"\u001b[39m, \u001b[39m\"\u001b[39m\u001b[39mpv_forecast\u001b[39m\u001b[39m\"\u001b[39m]]\n",
"File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/pandas/io/parsers/readers.py:912\u001b[0m, in \u001b[0;36mread_csv\u001b[0;34m(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, date_format, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, doublequote, escapechar, comment, encoding, encoding_errors, dialect, on_bad_lines, delim_whitespace, low_memory, memory_map, float_precision, storage_options, dtype_backend)\u001b[0m\n\u001b[1;32m 899\u001b[0m kwds_defaults \u001b[39m=\u001b[39m _refine_defaults_read(\n\u001b[1;32m 900\u001b[0m dialect,\n\u001b[1;32m 901\u001b[0m delimiter,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 908\u001b[0m dtype_backend\u001b[39m=\u001b[39mdtype_backend,\n\u001b[1;32m 909\u001b[0m )\n\u001b[1;32m 910\u001b[0m kwds\u001b[39m.\u001b[39mupdate(kwds_defaults)\n\u001b[0;32m--> 912\u001b[0m \u001b[39mreturn\u001b[39;00m _read(filepath_or_buffer, kwds)\n",
"File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/pandas/io/parsers/readers.py:583\u001b[0m, in \u001b[0;36m_read\u001b[0;34m(filepath_or_buffer, kwds)\u001b[0m\n\u001b[1;32m 580\u001b[0m \u001b[39mreturn\u001b[39;00m parser\n\u001b[1;32m 582\u001b[0m \u001b[39mwith\u001b[39;00m parser:\n\u001b[0;32m--> 583\u001b[0m \u001b[39mreturn\u001b[39;00m parser\u001b[39m.\u001b[39;49mread(nrows)\n",
"File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/pandas/io/parsers/readers.py:1704\u001b[0m, in \u001b[0;36mTextFileReader.read\u001b[0;34m(self, nrows)\u001b[0m\n\u001b[1;32m 1697\u001b[0m nrows \u001b[39m=\u001b[39m validate_integer(\u001b[39m\"\u001b[39m\u001b[39mnrows\u001b[39m\u001b[39m\"\u001b[39m, nrows)\n\u001b[1;32m 1698\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m 1699\u001b[0m \u001b[39m# error: \"ParserBase\" has no attribute \"read\"\u001b[39;00m\n\u001b[1;32m 1700\u001b[0m (\n\u001b[1;32m 1701\u001b[0m index,\n\u001b[1;32m 1702\u001b[0m columns,\n\u001b[1;32m 1703\u001b[0m col_dict,\n\u001b[0;32m-> 1704\u001b[0m ) \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_engine\u001b[39m.\u001b[39;49mread( \u001b[39m# type: ignore[attr-defined]\u001b[39;49;00m\n\u001b[1;32m 1705\u001b[0m nrows\n\u001b[1;32m 1706\u001b[0m )\n\u001b[1;32m 1707\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mException\u001b[39;00m:\n\u001b[1;32m 1708\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mclose()\n",
"File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/pandas/io/parsers/c_parser_wrapper.py:234\u001b[0m, in \u001b[0;36mCParserWrapper.read\u001b[0;34m(self, nrows)\u001b[0m\n\u001b[1;32m 232\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m 233\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mlow_memory:\n\u001b[0;32m--> 234\u001b[0m chunks \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_reader\u001b[39m.\u001b[39;49mread_low_memory(nrows)\n\u001b[1;32m 235\u001b[0m \u001b[39m# destructive to chunks\u001b[39;00m\n\u001b[1;32m 236\u001b[0m data \u001b[39m=\u001b[39m _concatenate_chunks(chunks)\n",
"File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/pandas/_libs/parsers.pyx:814\u001b[0m, in \u001b[0;36mpandas._libs.parsers.TextReader.read_low_memory\u001b[0;34m()\u001b[0m\n",
"File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/pandas/_libs/parsers.pyx:875\u001b[0m, in \u001b[0;36mpandas._libs.parsers.TextReader._read_rows\u001b[0;34m()\u001b[0m\n",
"File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/pandas/_libs/parsers.pyx:850\u001b[0m, in \u001b[0;36mpandas._libs.parsers.TextReader._tokenize_rows\u001b[0;34m()\u001b[0m\n",
"File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/pandas/_libs/parsers.pyx:861\u001b[0m, in \u001b[0;36mpandas._libs.parsers.TextReader._check_tokenize_status\u001b[0;34m()\u001b[0m\n",
"File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/pandas/_libs/parsers.pyx:2029\u001b[0m, in \u001b[0;36mpandas._libs.parsers.raise_parser_error\u001b[0;34m()\u001b[0m\n",
"\u001b[0;31mParserError\u001b[0m: Error tokenizing data. C error: Calling read(nbytes) on source failed. Try engine='python'."
]
}
],
"outputs": [],
"source": [
"#### Hyperparameters ####\n",
"inputDim = 96\n",
@@ -219,23 +154,30 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"# Quantile Regression"
"# Autoregressive Quantile Regression"
]
},
{
"cell_type": "code",
"execution_count": 12,
"execution_count": 10,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"96\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/workspaces/Thesis/src/notebooks/../trainers/quantile_trainer.py:70: UserWarning:\n",
"/workspaces/Thesis/src/notebooks/../../src/trainers/quantile_trainer.py:68: UserWarning:\n",
"\n",
"To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
"\n",
"/workspaces/Thesis/src/notebooks/../losses/pinball_loss.py:8: UserWarning:\n",
"/workspaces/Thesis/src/notebooks/../../src/losses/pinball_loss.py:8: UserWarning:\n",
"\n",
"To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
"\n"
@@ -245,8 +187,8 @@
"name": "stdout",
"output_type": "stream",
"text": [
"ClearML Task: created new task id=215dd7634cf2475693ea6081e2ab7559\n",
"ClearML results page: http://192.168.1.182:8080/projects/2e46d4af6f1e4c399cf9f5aa30bc8795/experiments/215dd7634cf2475693ea6081e2ab7559/output/log\n",
"ClearML Task: created new task id=cbf4a5162c604d6ea8f14e71e2d27410\n",
"ClearML results page: http://192.168.1.182:8080/projects/2e46d4af6f1e4c399cf9f5aa30bc8795/experiments/cbf4a5162c604d6ea8f14e71e2d27410/output/log\n",
"Early stopping triggered\n"
]
}
@@ -264,7 +206,9 @@
").to(\"cuda\")\n",
"\n",
"# model = LinearRegression(inputDim, len(quantiles))\n",
"model = NonLinearRegression(inputDim, len(quantiles), hiddenSize=1024, numLayers=5)\n",
"time_embedding = TimeEmbedding(data_processor.get_time_feature_size(), 4)\n",
"non_linear_regression_model = NonLinearRegression(time_embedding.output_dim(inputDim), len(quantiles), hiddenSize=1024, numLayers=5)\n",
"model = nn.Sequential(time_embedding, non_linear_regression_model)\n",
"optimizer = torch.optim.Adam(model.parameters(), lr=learningRate)\n",
"\n",
"#### Trainer ####\n",
@@ -294,29 +238,35 @@
},
{
"cell_type": "code",
"execution_count": 13,
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/workspaces/Thesis/src/notebooks/../trainers/quantile_trainer.py:335: UserWarning:\n",
"\n",
"To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
"\n",
"/workspaces/Thesis/src/notebooks/../losses/pinball_loss.py:23: UserWarning:\n",
"\n",
"To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
"\n"
"/workspaces/Thesis/src/notebooks/../../src/trainers/quantile_trainer.py:335: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
" quantiles_tensor = torch.tensor(quantiles)\n",
"/workspaces/Thesis/src/notebooks/../../src/losses/pinball_loss.py:22: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
" self.quantiles_tensor = torch.tensor(quantiles, dtype=torch.float32)\n",
"InsecureRequestWarning: Certificate verification is disabled! Adding certificate verification is strongly advised. See: https://urllib3.readthedocs.io/en/latest/advanced-usage.html#ssl-warnings\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"ClearML Task: created new task id=160b4938ae3145db9ef8b55e71452987\n",
"ClearML results page: http://192.168.1.182:8080/projects/2e46d4af6f1e4c399cf9f5aa30bc8795/experiments/160b4938ae3145db9ef8b55e71452987/output/log\n",
"ClearML Task: created new task id=0c748cf6ec0f4c748cc35be78ae4c6c1\n",
"ClearML results page: http://192.168.1.182:8080/projects/2e46d4af6f1e4c399cf9f5aa30bc8795/experiments/0c748cf6ec0f4c748cc35be78ae4c6c1/output/log\n",
"2023-11-26 16:15:07,490 - clearml.Task - INFO - Storing jupyter notebook directly as code\n",
"2023-11-26 16:15:09,255 - clearml.model - WARNING - 500 model found when searching for `file:///workspaces/Thesis/src/notebooks/checkpoint.pt`\n",
"2023-11-26 16:15:09,256 - clearml.model - WARNING - Selected model `Autoregressive Quantile Regression (quarter + day of week)` (id=bc0cb0d7fc614e2e8b0edf5b85348646)\n",
"2023-11-26 16:15:09,265 - clearml.frameworks - INFO - Found existing registered model id=bc0cb0d7fc614e2e8b0edf5b85348646 [/workspaces/Thesis/src/notebooks/checkpoint.pt] reusing it.\n",
"2023-11-26 16:15:09,958 - clearml.Task - INFO - Completed model upload to http://192.168.1.182:8081/Thesis/NrvForecast/Untitled%20Task.0c748cf6ec0f4c748cc35be78ae4c6c1/models/checkpoint.pt\n",
"2023-11-26 16:15:10,998 - clearml.Task - INFO - Completed model upload to http://192.168.1.182:8081/Thesis/NrvForecast/Untitled%20Task.0c748cf6ec0f4c748cc35be78ae4c6c1/models/checkpoint.pt\n",
"2023-11-26 16:15:12,118 - clearml.Task - INFO - Completed model upload to http://192.168.1.182:8081/Thesis/NrvForecast/Untitled%20Task.0c748cf6ec0f4c748cc35be78ae4c6c1/models/checkpoint.pt\n",
"2023-11-26 16:15:13,152 - clearml.Task - INFO - Completed model upload to http://192.168.1.182:8081/Thesis/NrvForecast/Untitled%20Task.0c748cf6ec0f4c748cc35be78ae4c6c1/models/checkpoint.pt\n",
"2023-11-26 16:15:14,540 - clearml.Task - INFO - Completed model upload to http://192.168.1.182:8081/Thesis/NrvForecast/Untitled%20Task.0c748cf6ec0f4c748cc35be78ae4c6c1/models/checkpoint.pt\n",
"Early stopping triggered\n"
]
},
@@ -324,7 +274,7 @@
"name": "stderr",
"output_type": "stream",
"text": [
"/workspaces/Thesis/src/notebooks/../trainers/quantile_trainer.py:366: UserWarning:\n",
"/workspaces/Thesis/src/notebooks/../../src/trainers/quantile_trainer.py:366: UserWarning:\n",
"\n",
"Creating a tensor from a list of numpy.ndarrays is extremely slow. Please consider converting the list to a single numpy.ndarray with numpy.array() before converting to a tensor. (Triggered internally at /opt/conda/conda-bld/pytorch_1682343967769/work/torch/csrc/utils/tensor_new.cpp:245.)\n",
"\n"
@@ -368,18 +318,9 @@
},
{
"cell_type": "code",
"execution_count": 18,
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"torch.Size([3, 192])\n",
"torch.Size([3, 96])\n"
]
}
],
"outputs": [],
"source": [
"\n",
"\n",

View File

@@ -218,7 +218,7 @@ class AutoRegressiveQuantileTrainer(AutoRegressiveTrainer):
if other_features is not None:
prev_features = torch.cat(
new_features, other_features, dim=1
(new_features.to(self.device), other_features.to(self.device)), dim=1
) # (batch_size, 96 + new_features)
else:
prev_features = new_features
@@ -252,36 +252,39 @@ class AutoRegressiveQuantileTrainer(AutoRegressiveTrainer):
def plot_quantile_percentages(
self, task, data_loader, train: bool = True, iteration: int = None
):
quantiles = self.quantiles.cpu().numpy()
total = 0
quantile_counter = {q: 0 for q in self.quantiles.cpu().numpy()}
quantile_counter = {q: 0 for q in quantiles}
self.model.eval()
with torch.no_grad():
for inputs, targets, _ in data_loader:
inputs = inputs.to("cuda")
output = self.model(inputs)
inputs = inputs.to(self.device)
output = self.model(inputs).cpu().numpy()
targets = targets.squeeze(-1).cpu().numpy()
# output shape: (batch_size, num_quantiles)
# target shape: (batch_size, 1)
for i, q in enumerate(self.quantiles.cpu().numpy()):
for i, q in enumerate(quantiles):
quantile_counter[q] += np.sum(
targets.squeeze(-1).cpu().numpy() < output[:, i].cpu().numpy()
targets < output[:, i]
)
total += len(targets)
# to numpy array of length len(quantiles)
percentages = np.array(
[quantile_counter[q] / total for q in self.quantiles.cpu().numpy()]
[quantile_counter[q] / total for q in quantiles]
)
bar_width = 0.35
index = np.arange(len(self.quantiles.cpu().numpy()))
index = np.arange(len(quantiles))
# Plotting the bars
fig, ax = plt.subplots(figsize=(15, 10))
bar1 = ax.bar(
index, self.quantiles.cpu().numpy(), bar_width, label="Ideal", color="brown"
index, quantiles, bar_width, label="Ideal", color="brown"
)
bar2 = ax.bar(
index + bar_width, percentages, bar_width, label="NN model", color="blue"
@@ -305,7 +308,7 @@ class AutoRegressiveQuantileTrainer(AutoRegressiveTrainer):
ax.set_ylabel("Fraction of data under quantile forecast")
ax.set_title(f"Quantile Performance Comparison ({series_name})")
ax.set_xticks(index + bar_width / 2)
ax.set_xticklabels(self.quantiles.cpu().numpy())
ax.set_xticklabels(quantiles)
ax.legend()
task.get_logger().report_matplotlib_figure(

View File

@@ -41,7 +41,7 @@ class Trainer:
self.patience = patience
self.delta = delta
def add_metrics_to_track(self, loss: torch.nn.Module | list[torch.nn.Module]):
def add_metrics_to_track(self, loss):
if isinstance(loss, list):
self.metrics_to_track.extend(loss)
else:
@@ -59,7 +59,8 @@ class Trainer:
if self.debug:
task.add_tags("Debug")
change_description = input("Enter a change description: ")
# change_description = input("Enter a change description: ")
change_description = ""
if change_description:
task.set_comment(change_description)

View File

@@ -5,6 +5,7 @@ class ClearMLHelper:
self.project_name = project_name
def get_task(self, task_name: str = "Model Training"):
Task.add_requirements("requirements.txt")
Task.ignore_requirements("torch")
Task.ignore_requirements("torchvision")
Task.ignore_requirements("tensorboard")