Changed steps in diffusion model
This commit is contained in:
@@ -144,5 +144,14 @@ Test data: 01-01-2023 until 08-10–2023
|
|||||||
- [x] Profit penalty parameter als over charge cycles voor een dag -> parameter bepalen op training data (convex probleem) (< 400 charge cycles per jaar) (over een dag kijken hoeveel charge cycles -> profit - penalty * charge cycles erover, (misschien belonen als eronder charge cycles))
|
- [x] Profit penalty parameter als over charge cycles voor een dag -> parameter bepalen op training data (convex probleem) (< 400 charge cycles per jaar) (over een dag kijken hoeveel charge cycles -> profit - penalty * charge cycles erover, (misschien belonen als eronder charge cycles))
|
||||||
|
|
||||||
- [ ] Meer verschil bekijken tussen GRU en diffusion
|
- [ ] Meer verschil bekijken tussen GRU en diffusion
|
||||||
- [ ] Andere lagen voor diffusion model (GRU, kijken naar TSDiff)
|
- [ ] (In Progress) Andere lagen voor diffusion model (GRU, kijken naar TSDiff)
|
||||||
- [x] Policies met andere modellen (Linear, Non Linear)
|
- [x] Policies met andere modellen (Linear, Non Linear)
|
||||||
|
|
||||||
|
- [ ] Visualize the policies over the whole testr set -> thresholds plotten voor elke dag (elke policy) -> mss distribution om overzichtelijk te houden (mean and std)
|
||||||
|
- [ ] Probleem met diffusion model (activation function? waarom direct grote waardes?)
|
||||||
|
|
||||||
|
- [ ] Autoregressive confidence problem -> Quantiles zelf uit elkaar halen (helpt dit?)
|
||||||
|
|
||||||
|
- [ ] time steps reducing for diffusion model (UNet activation functions?)
|
||||||
|
|
||||||
|
- [ ] (State space model? S4)
|
||||||
@@ -56,7 +56,7 @@ class GRUDiffusionModel(DiffusionModel):
|
|||||||
# GRU layer
|
# GRU layer
|
||||||
self.gru = nn.GRU(input_size=input_size + time_dim + other_inputs_dim,
|
self.gru = nn.GRU(input_size=input_size + time_dim + other_inputs_dim,
|
||||||
hidden_size=gru_hidden_size,
|
hidden_size=gru_hidden_size,
|
||||||
num_layers=2,
|
num_layers=3,
|
||||||
batch_first=True)
|
batch_first=True)
|
||||||
|
|
||||||
# Fully connected layers after GRU
|
# Fully connected layers after GRU
|
||||||
@@ -87,7 +87,8 @@ class GRUDiffusionModel(DiffusionModel):
|
|||||||
# Pass through GRU
|
# Pass through GRU
|
||||||
output, hidden = self.gru(x) # Hidden Shape: [batch_size, seq_len, 1]
|
output, hidden = self.gru(x) # Hidden Shape: [batch_size, seq_len, 1]
|
||||||
|
|
||||||
x = hidden
|
# Get last hidden state
|
||||||
|
x = hidden[-1]
|
||||||
|
|
||||||
# Process each time step's output with fully connected layers
|
# Process each time step's output with fully connected layers
|
||||||
for layer in self.fc_layers:
|
for layer in self.fc_layers:
|
||||||
|
|||||||
@@ -2,7 +2,7 @@
|
|||||||
"cells": [
|
"cells": [
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 3,
|
"execution_count": 1,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
@@ -31,7 +31,7 @@
|
|||||||
"from datetime import datetime\n",
|
"from datetime import datetime\n",
|
||||||
"import torch.nn as nn\n",
|
"import torch.nn as nn\n",
|
||||||
"from src.models.time_embedding_layer import TimeEmbedding\n",
|
"from src.models.time_embedding_layer import TimeEmbedding\n",
|
||||||
"from src.models.diffusion_model import SimpleDiffusionModel\n",
|
"from src.models.diffusion_model import SimpleDiffusionModel, GRUDiffusionModel\n",
|
||||||
"from src.trainers.diffusion_trainer import DiffusionTrainer\n",
|
"from src.trainers.diffusion_trainer import DiffusionTrainer\n",
|
||||||
"from torchinfo import summary\n",
|
"from torchinfo import summary\n",
|
||||||
"\n",
|
"\n",
|
||||||
@@ -62,30 +62,99 @@
|
|||||||
"\n",
|
"\n",
|
||||||
"data_config.NOMINAL_NET_POSITION = True\n",
|
"data_config.NOMINAL_NET_POSITION = True\n",
|
||||||
"\n",
|
"\n",
|
||||||
"data_processor = DataProcessor(data_config, path=\"../../\")\n",
|
"data_processor = DataProcessor(data_config, path=\"../../\", lstm=True)\n",
|
||||||
"data_processor.set_batch_size(1024)\n",
|
"data_processor.set_batch_size(1024)\n",
|
||||||
"data_processor.set_full_day_skip(True)"
|
"data_processor.set_full_day_skip(True)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 5,
|
"execution_count": 4,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
"name": "stdout",
|
"name": "stdout",
|
||||||
"output_type": "stream",
|
"output_type": "stream",
|
||||||
"text": [
|
"text": [
|
||||||
"ClearML Task: created new task id=b71216825809432682ea3c7841c07612\n",
|
"torch.Size([1024, 96, 96])\n"
|
||||||
"ClearML results page: http://192.168.1.182:8080/projects/2e46d4af6f1e4c399cf9f5aa30bc8795/experiments/b71216825809432682ea3c7841c07612/output/log\n"
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "stderr",
|
"name": "stderr",
|
||||||
"output_type": "stream",
|
"output_type": "stream",
|
||||||
"text": [
|
"text": [
|
||||||
"500 model found when searching for `file:///workspaces/Thesis/src/notebooks/checkpoint.pt`\n",
|
"/opt/conda/lib/python3.10/site-packages/torch/nn/modules/loss.py:536: UserWarning: Using a target size (torch.Size([1024, 96])) that is different to the input size (torch.Size([2, 1024, 96])). This will likely lead to incorrect results due to broadcasting. Please ensure they have the same size.\n",
|
||||||
"Selected model `Autoregressive Non Linear Quantile Regression + Quarter + DoW + Net` (id=bc0cb0d7fc614e2e8b0edf5b85348646)\n"
|
" return F.mse_loss(input, target, reduction=self.reduction)\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"torch.Size([556, 96, 96])\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "stderr",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"/opt/conda/lib/python3.10/site-packages/torch/nn/modules/loss.py:536: UserWarning: Using a target size (torch.Size([556, 96])) that is different to the input size (torch.Size([2, 556, 96])). This will likely lead to incorrect results due to broadcasting. Please ensure they have the same size.\n",
|
||||||
|
" return F.mse_loss(input, target, reduction=self.reduction)\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"torch.Size([1024, 96, 96])\n",
|
||||||
|
"torch.Size([556, 96, 96])\n",
|
||||||
|
"torch.Size([1024, 96, 96])\n",
|
||||||
|
"torch.Size([556, 96, 96])\n",
|
||||||
|
"torch.Size([1024, 96, 96])\n",
|
||||||
|
"torch.Size([556, 96, 96])\n",
|
||||||
|
"torch.Size([1024, 96, 96])\n",
|
||||||
|
"torch.Size([556, 96, 96])\n",
|
||||||
|
"torch.Size([1024, 96, 96])\n",
|
||||||
|
"torch.Size([556, 96, 96])\n",
|
||||||
|
"torch.Size([1024, 96, 96])\n",
|
||||||
|
"torch.Size([556, 96, 96])\n",
|
||||||
|
"torch.Size([1024, 96, 96])\n",
|
||||||
|
"torch.Size([556, 96, 96])\n",
|
||||||
|
"torch.Size([1024, 96, 96])\n",
|
||||||
|
"torch.Size([556, 96, 96])\n",
|
||||||
|
"torch.Size([1024, 96, 96])\n",
|
||||||
|
"torch.Size([556, 96, 96])\n",
|
||||||
|
"torch.Size([1024, 96, 96])\n",
|
||||||
|
"torch.Size([556, 96, 96])\n",
|
||||||
|
"torch.Size([1024, 96, 96])\n",
|
||||||
|
"torch.Size([556, 96, 96])\n",
|
||||||
|
"torch.Size([1024, 96, 96])\n",
|
||||||
|
"torch.Size([556, 96, 96])\n",
|
||||||
|
"torch.Size([1024, 96, 96])\n",
|
||||||
|
"torch.Size([556, 96, 96])\n",
|
||||||
|
"torch.Size([1024, 96, 96])\n",
|
||||||
|
"torch.Size([556, 96, 96])\n",
|
||||||
|
"torch.Size([1024, 96, 96])\n",
|
||||||
|
"torch.Size([556, 96, 96])\n",
|
||||||
|
"torch.Size([1024, 96, 96])\n",
|
||||||
|
"torch.Size([556, 96, 96])\n",
|
||||||
|
"torch.Size([1024, 96, 96])\n",
|
||||||
|
"torch.Size([556, 96, 96])\n",
|
||||||
|
"torch.Size([1024, 96, 96])\n",
|
||||||
|
"torch.Size([556, 96, 96])\n",
|
||||||
|
"torch.Size([1024, 96, 96])\n",
|
||||||
|
"torch.Size([556, 96, 96])\n",
|
||||||
|
"torch.Size([1024, 96, 96])\n",
|
||||||
|
"torch.Size([556, 96, 96])\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "stderr",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"\n",
|
||||||
|
"KeyboardInterrupt\n",
|
||||||
|
"\n"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
@@ -95,14 +164,15 @@
|
|||||||
"epochs=150\n",
|
"epochs=150\n",
|
||||||
"\n",
|
"\n",
|
||||||
"#### Model ####\n",
|
"#### Model ####\n",
|
||||||
"model = SimpleDiffusionModel(96, [512, 512, 512], other_inputs_dim=inputDim[1], time_dim=64)\n",
|
"# model = SimpleDiffusionModel(96, [512, 512, 512], other_inputs_dim=inputDim[1], time_dim=64)\n",
|
||||||
|
"model = GRUDiffusionModel(96, [256, 256], other_inputs_dim=inputDim[2], time_dim=64, gru_hidden_size=128)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"#### ClearML ####\n",
|
"#### ClearML ####\n",
|
||||||
"task = clearml_helper.get_task(task_name=\"Diffusion Model\")\n",
|
"# task = clearml_helper.get_task(task_name=\"Diffusion Model\")\n",
|
||||||
"\n",
|
"\n",
|
||||||
"#### Trainer ####\n",
|
"#### Trainer ####\n",
|
||||||
"trainer = DiffusionTrainer(model, data_processor, \"cuda\")\n",
|
"trainer = DiffusionTrainer(model, data_processor, \"cuda\")\n",
|
||||||
"trainer.train(epochs, learningRate, task)"
|
"trainer.train(epochs, learningRate, None)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -246,7 +316,7 @@
|
|||||||
"name": "python",
|
"name": "python",
|
||||||
"nbconvert_exporter": "python",
|
"nbconvert_exporter": "python",
|
||||||
"pygments_lexer": "ipython3",
|
"pygments_lexer": "ipython3",
|
||||||
"version": "3.10.11"
|
"version": "3.10.8"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"nbformat": 4,
|
"nbformat": 4,
|
||||||
|
|||||||
@@ -1197,7 +1197,7 @@
|
|||||||
"name": "python",
|
"name": "python",
|
||||||
"nbconvert_exporter": "python",
|
"nbconvert_exporter": "python",
|
||||||
"pygments_lexer": "ipython3",
|
"pygments_lexer": "ipython3",
|
||||||
"version": "3.10.11"
|
"version": "3.10.8"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"nbformat": 4,
|
"nbformat": 4,
|
||||||
|
|||||||
@@ -51,7 +51,7 @@ class DiffusionTrainer:
|
|||||||
self.model = model
|
self.model = model
|
||||||
self.device = device
|
self.device = device
|
||||||
|
|
||||||
self.noise_steps = 1000
|
self.noise_steps = 20
|
||||||
self.beta_start = 1e-4
|
self.beta_start = 1e-4
|
||||||
self.beta_end = 0.02
|
self.beta_end = 0.02
|
||||||
self.ts_length = 96
|
self.ts_length = 96
|
||||||
@@ -130,8 +130,8 @@ class DiffusionTrainer:
|
|||||||
predict_sequence_length=self.ts_length
|
predict_sequence_length=self.ts_length
|
||||||
)
|
)
|
||||||
|
|
||||||
train_sample_indices = self.random_samples(train=True, num_samples=10)
|
train_sample_indices = self.random_samples(train=True, num_samples=5)
|
||||||
test_sample_indices = self.random_samples(train=False, num_samples=10)
|
test_sample_indices = self.random_samples(train=False, num_samples=5)
|
||||||
|
|
||||||
for epoch in range(epochs):
|
for epoch in range(epochs):
|
||||||
running_loss = 0.0
|
running_loss = 0.0
|
||||||
@@ -153,7 +153,7 @@ class DiffusionTrainer:
|
|||||||
|
|
||||||
running_loss /= len(train_loader.dataset)
|
running_loss /= len(train_loader.dataset)
|
||||||
|
|
||||||
if epoch % 20 == 0 and epoch != 0:
|
if epoch % 40 == 0 and epoch != 0:
|
||||||
self.test(test_loader, epoch, task)
|
self.test(test_loader, epoch, task)
|
||||||
|
|
||||||
if task:
|
if task:
|
||||||
@@ -164,7 +164,7 @@ class DiffusionTrainer:
|
|||||||
value=loss.item(),
|
value=loss.item(),
|
||||||
)
|
)
|
||||||
|
|
||||||
if epoch % 100 == 0 and epoch != 0:
|
if epoch % 150 == 0 and epoch != 0:
|
||||||
self.debug_plots(task, True, train_loader, train_sample_indices, epoch)
|
self.debug_plots(task, True, train_loader, train_sample_indices, epoch)
|
||||||
self.debug_plots(task, False, test_loader, test_sample_indices, epoch)
|
self.debug_plots(task, False, test_loader, test_sample_indices, epoch)
|
||||||
|
|
||||||
@@ -177,6 +177,7 @@ class DiffusionTrainer:
|
|||||||
features, target, _ = data_loader.dataset[idx]
|
features, target, _ = data_loader.dataset[idx]
|
||||||
|
|
||||||
features = features.to(self.device)
|
features = features.to(self.device)
|
||||||
|
features = features.unsqueeze(0)
|
||||||
|
|
||||||
self.model.eval()
|
self.model.eval()
|
||||||
with torch.no_grad():
|
with torch.no_grad():
|
||||||
|
|||||||
@@ -37,7 +37,7 @@ data_config.NOMINAL_NET_POSITION = True
|
|||||||
|
|
||||||
data_config = task.connect(data_config, name="data_features")
|
data_config = task.connect(data_config, name="data_features")
|
||||||
|
|
||||||
data_processor = DataProcessor(data_config, path="", lstm=True)
|
data_processor = DataProcessor(data_config, path="", lstm=False)
|
||||||
data_processor.set_batch_size(128)
|
data_processor.set_batch_size(128)
|
||||||
data_processor.set_full_day_skip(True)
|
data_processor.set_full_day_skip(True)
|
||||||
|
|
||||||
@@ -54,8 +54,8 @@ model_parameters = {
|
|||||||
model_parameters = task.connect(model_parameters, name="model_parameters")
|
model_parameters = task.connect(model_parameters, name="model_parameters")
|
||||||
|
|
||||||
#### Model ####
|
#### Model ####
|
||||||
# model = SimpleDiffusionModel(96, model_parameters["hidden_sizes"], other_inputs_dim=inputDim[1], time_dim=model_parameters["time_dim"])
|
model = SimpleDiffusionModel(96, model_parameters["hidden_sizes"], other_inputs_dim=inputDim[1], time_dim=model_parameters["time_dim"])
|
||||||
model = GRUDiffusionModel(96, model_parameters["hidden_sizes"], other_inputs_dim=inputDim[2], time_dim=model_parameters["time_dim"], gru_hidden_size=256)
|
# model = GRUDiffusionModel(96, model_parameters["hidden_sizes"], other_inputs_dim=inputDim[2], time_dim=model_parameters["time_dim"], gru_hidden_size=256)
|
||||||
|
|
||||||
print("Starting training ...")
|
print("Starting training ...")
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user