Autoregressive Quantile Training with Policy evaluation
This commit is contained in:
@@ -2,6 +2,7 @@ FROM pytorch/pytorch:2.0.1-cuda11.7-cudnn8-runtime
|
|||||||
|
|
||||||
RUN apt-get update
|
RUN apt-get update
|
||||||
RUN apt-get install -y git
|
RUN apt-get install -y git
|
||||||
|
RUN apt-get install texlive-latex-base texlive-fonts-recommended texlive-fonts-extra texlive-bibtex-extra
|
||||||
|
|
||||||
COPY requirements.txt /tmp/requirements.txt
|
COPY requirements.txt /tmp/requirements.txt
|
||||||
|
|
||||||
|
|||||||
@@ -159,7 +159,7 @@ Test data: 01-01-2023 until 08-10–2023
|
|||||||
|
|
||||||
|
|
||||||
TODO:
|
TODO:
|
||||||
- [ ] diffusion model oefening generative models vragen
|
- [x] diffusion model oefening generative models vragen -> geen lab hierop
|
||||||
- [ ] Non autoregressive models policy testen (Non Linear eerst) -> als dit al slect, niet verder kijken, wel vermelden
|
- [ ] Non autoregressive models policy testen (Non Linear eerst) -> als dit al slect, niet verder kijken, wel vermelden
|
||||||
- [ ] Policy in test set -> over charge cycles (stop trading electricity)
|
- [ ] Policy in test set -> over charge cycles (stop trading electricity)
|
||||||
|
|
||||||
|
|||||||
152
src/policies/PolicyEvaluator.py
Normal file
152
src/policies/PolicyEvaluator.py
Normal file
@@ -0,0 +1,152 @@
|
|||||||
|
from clearml import Task
|
||||||
|
from tqdm import tqdm
|
||||||
|
from src.policies.simple_baseline import BaselinePolicy
|
||||||
|
import pandas as pd
|
||||||
|
import numpy as np
|
||||||
|
import torch
|
||||||
|
import plotly.express as px
|
||||||
|
|
||||||
|
from src.utils.imbalance_price_calculator import ImbalancePriceCalculator
|
||||||
|
|
||||||
|
|
||||||
|
class PolicyEvaluator:
|
||||||
|
def __init__(self, baseline_policy: BaselinePolicy, task: Task = None):
|
||||||
|
self.baseline_policy = baseline_policy
|
||||||
|
|
||||||
|
self.ipc = ImbalancePriceCalculator(data_path="")
|
||||||
|
self.dates = baseline_policy.test_data["DateTime"].dt.date.unique()
|
||||||
|
self.dates = pd.to_datetime(self.dates)
|
||||||
|
|
||||||
|
### Load Imbalance Prices ###
|
||||||
|
imbalance_prices = pd.read_csv('data/imbalance_prices.csv', sep=';')
|
||||||
|
imbalance_prices["DateTime"] = pd.to_datetime(imbalance_prices['DateTime'], utc=True)
|
||||||
|
self.imbalance_prices = imbalance_prices.sort_values(by=['DateTime'])
|
||||||
|
|
||||||
|
self.penalties = [0, 100, 300, 500, 800, 1000, 1500]
|
||||||
|
self.profits = []
|
||||||
|
|
||||||
|
self.task = task
|
||||||
|
|
||||||
|
def get_imbanlance_prices_for_date(self, date):
|
||||||
|
imbalance_prices_day = self.imbalance_prices[self.imbalance_prices["DateTime"].dt.date == date]
|
||||||
|
return imbalance_prices_day['Positive imbalance price'].values
|
||||||
|
|
||||||
|
def evaluate_for_date(self, date, idx_samples, test_loader):
|
||||||
|
charge_thresholds = np.arange(-100, 250, 25)
|
||||||
|
discharge_thresholds = np.arange(-100, 250, 25)
|
||||||
|
|
||||||
|
idx = test_loader.dataset.get_idx_for_date(date.date())
|
||||||
|
(initial, samples) = idx_samples[idx]
|
||||||
|
|
||||||
|
initial = initial.cpu().numpy()[0][-1]
|
||||||
|
samples = samples.cpu().numpy()
|
||||||
|
|
||||||
|
initial = np.repeat(initial, samples.shape[0])
|
||||||
|
combined = np.concatenate((initial.reshape(-1, 1), samples), axis=1)
|
||||||
|
|
||||||
|
reconstructed_imbalance_prices = self.ipc.get_imbalance_prices_2023_for_date_vectorized(date, combined)
|
||||||
|
reconstructed_imbalance_prices = torch.tensor(reconstructed_imbalance_prices, device="cuda")
|
||||||
|
|
||||||
|
real_imbalance_prices = self.get_imbanlance_prices_for_date(date.date())
|
||||||
|
|
||||||
|
for penalty in self.penalties:
|
||||||
|
found_charge_thresholds, found_discharge_thresholds = self.baseline_policy.get_optimal_thresholds(
|
||||||
|
reconstructed_imbalance_prices, charge_thresholds, discharge_thresholds, penalty
|
||||||
|
)
|
||||||
|
|
||||||
|
predicted_charge_threshold = found_charge_thresholds.mean(axis=0)
|
||||||
|
predicted_discharge_threshold = found_discharge_thresholds.mean(axis=0)
|
||||||
|
|
||||||
|
### Determine Profits and Charge Cycles ###
|
||||||
|
simulated_profit, simulated_charge_cycles = self.baseline_policy.simulate(
|
||||||
|
torch.tensor([[real_imbalance_prices]]), torch.tensor([predicted_charge_threshold]), torch.tensor([predicted_discharge_threshold])
|
||||||
|
)
|
||||||
|
self.profits.append([date, penalty, simulated_profit[0][0].item(), simulated_charge_cycles[0][0].item(), predicted_charge_threshold.item(), predicted_discharge_threshold.item()])
|
||||||
|
|
||||||
|
def evaluate_test_set(self, idx_samples, test_loader):
|
||||||
|
self.profits = []
|
||||||
|
try:
|
||||||
|
for date in tqdm(self.dates):
|
||||||
|
self.evaluate_for_date(date, idx_samples, test_loader)
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
print("Interrupted")
|
||||||
|
raise KeyboardInterrupt
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(e)
|
||||||
|
pass
|
||||||
|
|
||||||
|
self.profits = pd.DataFrame(self.profits, columns=["Date", "Penalty", "Profit", "Charge Cycles", "Charge Threshold", "Discharge Threshold"])
|
||||||
|
|
||||||
|
def plot_profits_table(self):
|
||||||
|
# Check if task or penalties are not set
|
||||||
|
if self.task is None or not hasattr(self, 'penalties') or not hasattr(self, 'profits'):
|
||||||
|
print("Task, penalties, or profits not defined.")
|
||||||
|
return
|
||||||
|
|
||||||
|
if self.profits.empty:
|
||||||
|
print("Profits DataFrame is empty.")
|
||||||
|
return
|
||||||
|
|
||||||
|
# Aggregate profits and charge cycles by penalty, calculating totals and per-year values
|
||||||
|
aggregated = self.profits.groupby("Penalty").agg(
|
||||||
|
Total_Profit=("Profit", "sum"),
|
||||||
|
Total_Charge_Cycles=("Charge Cycles", "sum"),
|
||||||
|
Num_Days=("Date", "nunique")
|
||||||
|
)
|
||||||
|
aggregated["Profit_Per_Year"] = aggregated["Total_Profit"] / aggregated["Num_Days"] * 365
|
||||||
|
aggregated["Charge_Cycles_Per_Year"] = aggregated["Total_Charge_Cycles"] / aggregated["Num_Days"] * 365
|
||||||
|
|
||||||
|
# Reset index to make 'Penalty' a column again and drop unnecessary columns
|
||||||
|
final_df = aggregated.reset_index().drop(columns=["Total_Profit", "Total_Charge_Cycles", "Num_Days"])
|
||||||
|
|
||||||
|
# Rename columns to match expected output
|
||||||
|
final_df.columns = ["Penalty", "Total Profit", "Total Charge Cycles"]
|
||||||
|
|
||||||
|
# Log the final results table
|
||||||
|
self.task.get_logger().report_table(
|
||||||
|
"Policy Results",
|
||||||
|
"Policy Results",
|
||||||
|
iteration=0,
|
||||||
|
table_plot=final_df
|
||||||
|
)
|
||||||
|
|
||||||
|
def plot_thresholds_per_day(self):
|
||||||
|
if self.task is None:
|
||||||
|
return
|
||||||
|
|
||||||
|
fig = px.line(
|
||||||
|
self.profits[self.profits["Penalty"] == 0],
|
||||||
|
x="Date",
|
||||||
|
y=["Charge Threshold", "Discharge Threshold"],
|
||||||
|
title="Charge and Discharge Thresholds per Day"
|
||||||
|
)
|
||||||
|
|
||||||
|
fig.update_layout(
|
||||||
|
width=1000,
|
||||||
|
height=600,
|
||||||
|
title_x=0.5,
|
||||||
|
)
|
||||||
|
|
||||||
|
self.task.get_logger().report_plotly(
|
||||||
|
"Thresholds per Day",
|
||||||
|
"Thresholds per Day",
|
||||||
|
iteration=0,
|
||||||
|
figure=fig
|
||||||
|
)
|
||||||
|
|
||||||
|
def get_profits_as_scalars(self):
|
||||||
|
aggregated = self.profits.groupby("Penalty").agg(
|
||||||
|
Total_Profit=("Profit", "sum"),
|
||||||
|
Total_Charge_Cycles=("Charge Cycles", "sum"),
|
||||||
|
Num_Days=("Date", "nunique")
|
||||||
|
)
|
||||||
|
aggregated["Profit_Per_Year"] = aggregated["Total_Profit"] / aggregated["Num_Days"] * 365
|
||||||
|
aggregated["Charge_Cycles_Per_Year"] = aggregated["Total_Charge_Cycles"] / aggregated["Num_Days"] * 365
|
||||||
|
|
||||||
|
# Reset index to make 'Penalty' a column again and drop unnecessary columns
|
||||||
|
final_df = aggregated.reset_index().drop(columns=["Total_Profit", "Total_Charge_Cycles", "Num_Days"])
|
||||||
|
|
||||||
|
# Rename columns to match expected output
|
||||||
|
final_df.columns = ["Penalty", "Total Profit", "Total Charge Cycles"]
|
||||||
|
return final_df
|
||||||
@@ -1,5 +1,6 @@
|
|||||||
import torch
|
import torch
|
||||||
from tqdm import tqdm
|
from tqdm import tqdm
|
||||||
|
from src.policies.PolicyEvaluator import PolicyEvaluator
|
||||||
from src.losses.crps_metric import crps_from_samples
|
from src.losses.crps_metric import crps_from_samples
|
||||||
from src.trainers.trainer import Trainer
|
from src.trainers.trainer import Trainer
|
||||||
from src.trainers.autoregressive_trainer import AutoRegressiveTrainer
|
from src.trainers.autoregressive_trainer import AutoRegressiveTrainer
|
||||||
@@ -131,10 +132,13 @@ class AutoRegressiveQuantileTrainer(AutoRegressiveTrainer):
|
|||||||
data_processor: DataProcessor,
|
data_processor: DataProcessor,
|
||||||
quantiles: list,
|
quantiles: list,
|
||||||
device: torch.device,
|
device: torch.device,
|
||||||
|
policy_evaluator: PolicyEvaluator = None,
|
||||||
debug: bool = True,
|
debug: bool = True,
|
||||||
):
|
):
|
||||||
|
|
||||||
self.quantiles = quantiles
|
self.quantiles = quantiles
|
||||||
|
self.test_set_samples = {}
|
||||||
|
self.policy_evaluator = policy_evaluator
|
||||||
|
|
||||||
criterion = PinballLoss(quantiles=quantiles)
|
criterion = PinballLoss(quantiles=quantiles)
|
||||||
super().__init__(
|
super().__init__(
|
||||||
@@ -149,6 +153,7 @@ class AutoRegressiveQuantileTrainer(AutoRegressiveTrainer):
|
|||||||
|
|
||||||
def calculate_crps_from_samples(self, task, dataloader, epoch: int):
|
def calculate_crps_from_samples(self, task, dataloader, epoch: int):
|
||||||
crps_from_samples_metric = []
|
crps_from_samples_metric = []
|
||||||
|
generated_samples = {}
|
||||||
|
|
||||||
with torch.no_grad():
|
with torch.no_grad():
|
||||||
total_samples = len(dataloader.dataset) - 96
|
total_samples = len(dataloader.dataset) - 96
|
||||||
@@ -160,9 +165,12 @@ class AutoRegressiveQuantileTrainer(AutoRegressiveTrainer):
|
|||||||
|
|
||||||
for idx in tqdm(idx_batch):
|
for idx in tqdm(idx_batch):
|
||||||
computed_idx_batch = [idx] * 100
|
computed_idx_batch = [idx] * 100
|
||||||
_, _, samples, targets = self.auto_regressive(
|
initial, _, samples, targets = self.auto_regressive(
|
||||||
dataloader.dataset, idx_batch=computed_idx_batch
|
dataloader.dataset, idx_batch=computed_idx_batch
|
||||||
)
|
)
|
||||||
|
|
||||||
|
generated_samples[idx.item()] = (initial, self.data_processor.inverse_transform(samples))
|
||||||
|
|
||||||
samples = samples.unsqueeze(0)
|
samples = samples.unsqueeze(0)
|
||||||
targets = targets.squeeze(-1)
|
targets = targets.squeeze(-1)
|
||||||
targets = targets[0].unsqueeze(0)
|
targets = targets[0].unsqueeze(0)
|
||||||
@@ -175,6 +183,20 @@ class AutoRegressiveQuantileTrainer(AutoRegressiveTrainer):
|
|||||||
title="CRPS_from_samples", series="test", value=np.mean(crps_from_samples_metric), iteration=epoch
|
title="CRPS_from_samples", series="test", value=np.mean(crps_from_samples_metric), iteration=epoch
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# using the policy evaluator, evaluate the policy with the generated samples
|
||||||
|
if self.policy_evaluator is not None:
|
||||||
|
_, test_loader = self.data_processor.get_dataloaders(
|
||||||
|
predict_sequence_length=self.model.output_size)
|
||||||
|
self.policy_evaluator.evaluate_test_set(generated_samples, test_loader)
|
||||||
|
df = self.policy_evaluator.get_profits_as_scalars()
|
||||||
|
|
||||||
|
# for each row, report the profits
|
||||||
|
for idx, row in df.iterrows():
|
||||||
|
task.get_logger().report_scalar(
|
||||||
|
title="Profit", series=f"penalty_{row['Penalty']}", value=row["Total Profit"], iteration=epoch
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def log_final_metrics(self, task, dataloader, train: bool = True):
|
def log_final_metrics(self, task, dataloader, train: bool = True):
|
||||||
metrics = {metric.__class__.__name__: 0.0 for metric in self.metrics_to_track}
|
metrics = {metric.__class__.__name__: 0.0 for metric in self.metrics_to_track}
|
||||||
transformed_metrics = {
|
transformed_metrics = {
|
||||||
@@ -194,10 +216,14 @@ class AutoRegressiveQuantileTrainer(AutoRegressiveTrainer):
|
|||||||
|
|
||||||
if train == False:
|
if train == False:
|
||||||
for idx in tqdm(idx_batch):
|
for idx in tqdm(idx_batch):
|
||||||
computed_idx_batch = [idx] * 100
|
computed_idx_batch = [idx] * 250
|
||||||
_, outputs, samples, targets = self.auto_regressive(
|
initial, outputs, samples, targets = self.auto_regressive(
|
||||||
dataloader.dataset, idx_batch=computed_idx_batch
|
dataloader.dataset, idx_batch=computed_idx_batch
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# save the samples for the idx, these will be used for evaluating the policy
|
||||||
|
self.test_set_samples[idx.item()] = (initial, self.data_processor.inverse_transform(samples))
|
||||||
|
|
||||||
samples = samples.unsqueeze(0)
|
samples = samples.unsqueeze(0)
|
||||||
targets = targets.squeeze(-1)
|
targets = targets.squeeze(-1)
|
||||||
targets = targets[0].unsqueeze(0)
|
targets = targets[0].unsqueeze(0)
|
||||||
|
|||||||
@@ -196,7 +196,7 @@ class Trainer:
|
|||||||
|
|
||||||
if task:
|
if task:
|
||||||
self.finish_training(task=task)
|
self.finish_training(task=task)
|
||||||
task.close()
|
# task.close()
|
||||||
except Exception:
|
except Exception:
|
||||||
if task:
|
if task:
|
||||||
task.close()
|
task.close()
|
||||||
|
|||||||
@@ -1,3 +1,5 @@
|
|||||||
|
from src.policies.PolicyEvaluator import PolicyEvaluator
|
||||||
|
from src.policies.simple_baseline import BaselinePolicy, Battery
|
||||||
from src.models.lstm_model import GRUModel
|
from src.models.lstm_model import GRUModel
|
||||||
from src.data import DataProcessor, DataConfig
|
from src.data import DataProcessor, DataConfig
|
||||||
from src.trainers.quantile_trainer import AutoRegressiveQuantileTrainer
|
from src.trainers.quantile_trainer import AutoRegressiveQuantileTrainer
|
||||||
@@ -68,12 +70,17 @@ model_parameters = task.connect(model_parameters, name="model_parameters")
|
|||||||
|
|
||||||
time_embedding = TimeEmbedding(data_processor.get_time_feature_size(), model_parameters["time_feature_embedding"])
|
time_embedding = TimeEmbedding(data_processor.get_time_feature_size(), model_parameters["time_feature_embedding"])
|
||||||
# lstm_model = GRUModel(time_embedding.output_dim(inputDim), len(quantiles), hidden_size=model_parameters["hidden_size"], num_layers=model_parameters["num_layers"], dropout=model_parameters["dropout"])
|
# lstm_model = GRUModel(time_embedding.output_dim(inputDim), len(quantiles), hidden_size=model_parameters["hidden_size"], num_layers=model_parameters["num_layers"], dropout=model_parameters["dropout"])
|
||||||
# non_linear_model = NonLinearRegression(time_embedding.output_dim(inputDim), len(quantiles), hiddenSize=model_parameters["hidden_size"], numLayers=model_parameters["num_layers"], dropout=model_parameters["dropout"])
|
non_linear_model = NonLinearRegression(time_embedding.output_dim(inputDim), len(quantiles), hiddenSize=model_parameters["hidden_size"], numLayers=model_parameters["num_layers"], dropout=model_parameters["dropout"])
|
||||||
linear_model = LinearRegression(time_embedding.output_dim(inputDim), len(quantiles))
|
# linear_model = LinearRegression(time_embedding.output_dim(inputDim), len(quantiles))
|
||||||
|
|
||||||
model = nn.Sequential(time_embedding, linear_model)
|
model = nn.Sequential(time_embedding, non_linear_model)
|
||||||
optimizer = torch.optim.Adam(model.parameters(), lr=model_parameters["learning_rate"])
|
optimizer = torch.optim.Adam(model.parameters(), lr=model_parameters["learning_rate"])
|
||||||
|
|
||||||
|
### Policy Evaluator ###
|
||||||
|
battery = Battery(2, 1)
|
||||||
|
baseline_policy = BaselinePolicy(battery, data_path="")
|
||||||
|
policy_evaluator = PolicyEvaluator(baseline_policy, task)
|
||||||
|
|
||||||
#### Trainer ####
|
#### Trainer ####
|
||||||
trainer = AutoRegressiveQuantileTrainer(
|
trainer = AutoRegressiveQuantileTrainer(
|
||||||
model,
|
model,
|
||||||
@@ -82,12 +89,24 @@ trainer = AutoRegressiveQuantileTrainer(
|
|||||||
data_processor,
|
data_processor,
|
||||||
quantiles,
|
quantiles,
|
||||||
"cuda",
|
"cuda",
|
||||||
|
policy_evaluator=policy_evaluator,
|
||||||
debug=False,
|
debug=False,
|
||||||
)
|
)
|
||||||
|
|
||||||
trainer.add_metrics_to_track(
|
trainer.add_metrics_to_track(
|
||||||
[PinballLoss(quantiles), MSELoss(), L1Loss(), CRPSLoss(quantiles)]
|
[PinballLoss(quantiles), MSELoss(), L1Loss(), CRPSLoss(quantiles)]
|
||||||
)
|
)
|
||||||
trainer.early_stopping(patience=30)
|
trainer.early_stopping(patience=10)
|
||||||
trainer.plot_every(5)
|
trainer.plot_every(5)
|
||||||
trainer.train(task=task, epochs=epochs, remotely=True)
|
trainer.train(task=task, epochs=epochs, remotely=False)
|
||||||
|
|
||||||
|
### Policy Evaluation ###
|
||||||
|
idx_samples = trainer.test_set_samples
|
||||||
|
_, test_loader = trainer.data_processor.get_dataloaders(
|
||||||
|
predict_sequence_length=trainer.model.output_size)
|
||||||
|
|
||||||
|
policy_evaluator.evaluate_test_set(idx_samples, test_loader)
|
||||||
|
policy_evaluator.plot_profits_table()
|
||||||
|
policy_evaluator.plot_thresholds_per_day()
|
||||||
|
|
||||||
|
task.close()
|
||||||
Reference in New Issue
Block a user