Policy evaluation during training

This commit is contained in:
2024-02-25 22:13:00 +01:00
parent 90751866a4
commit f1b54df2c9
5 changed files with 450 additions and 158 deletions

View File

@@ -18,9 +18,11 @@ class PolicyEvaluator:
self.dates = pd.to_datetime(self.dates) self.dates = pd.to_datetime(self.dates)
### Load Imbalance Prices ### ### Load Imbalance Prices ###
imbalance_prices = pd.read_csv('data/imbalance_prices.csv', sep=';') imbalance_prices = pd.read_csv("data/imbalance_prices.csv", sep=";")
imbalance_prices["DateTime"] = pd.to_datetime(imbalance_prices['DateTime'], utc=True) imbalance_prices["DateTime"] = pd.to_datetime(
self.imbalance_prices = imbalance_prices.sort_values(by=['DateTime']) imbalance_prices["DateTime"], utc=True
)
self.imbalance_prices = imbalance_prices.sort_values(by=["DateTime"])
self.penalties = [0, 100, 300, 500, 800, 1000, 1500] self.penalties = [0, 100, 300, 500, 800, 1000, 1500]
self.profits = [] self.profits = []
@@ -28,30 +30,46 @@ class PolicyEvaluator:
self.task = task self.task = task
def get_imbanlance_prices_for_date(self, date): def get_imbanlance_prices_for_date(self, date):
imbalance_prices_day = self.imbalance_prices[self.imbalance_prices["DateTime"].dt.date == date] imbalance_prices_day = self.imbalance_prices[
return imbalance_prices_day['Positive imbalance price'].values self.imbalance_prices["DateTime"].dt.date == date
]
return imbalance_prices_day["Positive imbalance price"].values
def evaluate_for_date(self, date, idx_samples, test_loader): def evaluate_for_date(self, date, idx_samples, test_loader):
charge_thresholds = np.arange(-100, 250, 25) charge_thresholds = np.arange(-100, 250, 25)
discharge_thresholds = np.arange(-100, 250, 25) discharge_thresholds = np.arange(-100, 250, 25)
idx = test_loader.dataset.get_idx_for_date(date.date()) idx = test_loader.dataset.get_idx_for_date(date.date())
print("Evaluated for idx: ", idx)
(initial, samples) = idx_samples[idx] (initial, samples) = idx_samples[idx]
if len(initial.shape) == 2:
initial = initial.cpu().numpy()[0][-1] initial = initial.cpu().numpy()[0][-1]
else:
initial = initial.cpu().numpy()[-1]
samples = samples.cpu().numpy() samples = samples.cpu().numpy()
initial = np.repeat(initial, samples.shape[0]) initial = np.repeat(initial, samples.shape[0])
combined = np.concatenate((initial.reshape(-1, 1), samples), axis=1) combined = np.concatenate((initial.reshape(-1, 1), samples), axis=1)
reconstructed_imbalance_prices = self.ipc.get_imbalance_prices_2023_for_date_vectorized(date, combined) reconstructed_imbalance_prices = (
reconstructed_imbalance_prices = torch.tensor(reconstructed_imbalance_prices, device="cuda") self.ipc.get_imbalance_prices_2023_for_date_vectorized(date, combined)
)
reconstructed_imbalance_prices = torch.tensor(
reconstructed_imbalance_prices, device="cuda"
)
real_imbalance_prices = self.get_imbanlance_prices_for_date(date.date()) real_imbalance_prices = self.get_imbanlance_prices_for_date(date.date())
for penalty in self.penalties: for penalty in self.penalties:
found_charge_thresholds, found_discharge_thresholds = self.baseline_policy.get_optimal_thresholds( found_charge_thresholds, found_discharge_thresholds = (
reconstructed_imbalance_prices, charge_thresholds, discharge_thresholds, penalty self.baseline_policy.get_optimal_thresholds(
reconstructed_imbalance_prices,
charge_thresholds,
discharge_thresholds,
penalty,
)
) )
predicted_charge_threshold = found_charge_thresholds.mean(axis=0) predicted_charge_threshold = found_charge_thresholds.mean(axis=0)
@@ -59,13 +77,25 @@ class PolicyEvaluator:
### Determine Profits and Charge Cycles ### ### Determine Profits and Charge Cycles ###
simulated_profit, simulated_charge_cycles = self.baseline_policy.simulate( simulated_profit, simulated_charge_cycles = self.baseline_policy.simulate(
torch.tensor([[real_imbalance_prices]]), torch.tensor([predicted_charge_threshold]), torch.tensor([predicted_discharge_threshold]) torch.tensor([[real_imbalance_prices]]),
torch.tensor([predicted_charge_threshold]),
torch.tensor([predicted_discharge_threshold]),
)
self.profits.append(
[
date,
penalty,
simulated_profit[0][0].item(),
simulated_charge_cycles[0][0].item(),
predicted_charge_threshold.item(),
predicted_discharge_threshold.item(),
]
) )
self.profits.append([date, penalty, simulated_profit[0][0].item(), simulated_charge_cycles[0][0].item(), predicted_charge_threshold.item(), predicted_discharge_threshold.item()])
def evaluate_test_set(self, idx_samples, test_loader): def evaluate_test_set(self, idx_samples, test_loader):
self.profits = [] self.profits = []
try: try:
print(self.dates)
for date in tqdm(self.dates): for date in tqdm(self.dates):
self.evaluate_for_date(date, idx_samples, test_loader) self.evaluate_for_date(date, idx_samples, test_loader)
except KeyboardInterrupt: except KeyboardInterrupt:
@@ -76,11 +106,28 @@ class PolicyEvaluator:
print(e) print(e)
pass pass
self.profits = pd.DataFrame(self.profits, columns=["Date", "Penalty", "Profit", "Charge Cycles", "Charge Threshold", "Discharge Threshold"]) self.profits = pd.DataFrame(
self.profits,
columns=[
"Date",
"Penalty",
"Profit",
"Charge Cycles",
"Charge Threshold",
"Discharge Threshold",
],
)
print("Profits calculated")
print(self.profits.head())
def plot_profits_table(self): def plot_profits_table(self):
# Check if task or penalties are not set # Check if task or penalties are not set
if self.task is None or not hasattr(self, 'penalties') or not hasattr(self, 'profits'): if (
self.task is None
or not hasattr(self, "penalties")
or not hasattr(self, "profits")
):
print("Task, penalties, or profits not defined.") print("Task, penalties, or profits not defined.")
return return
@@ -92,23 +139,32 @@ class PolicyEvaluator:
aggregated = self.profits.groupby("Penalty").agg( aggregated = self.profits.groupby("Penalty").agg(
Total_Profit=("Profit", "sum"), Total_Profit=("Profit", "sum"),
Total_Charge_Cycles=("Charge Cycles", "sum"), Total_Charge_Cycles=("Charge Cycles", "sum"),
Num_Days=("Date", "nunique") Num_Days=("Date", "nunique"),
)
aggregated["Profit_Per_Year"] = (
aggregated["Total_Profit"] / aggregated["Num_Days"] * 365
)
aggregated["Charge_Cycles_Per_Year"] = (
aggregated["Total_Charge_Cycles"] / aggregated["Num_Days"] * 365
) )
aggregated["Profit_Per_Year"] = aggregated["Total_Profit"] / aggregated["Num_Days"] * 365
aggregated["Charge_Cycles_Per_Year"] = aggregated["Total_Charge_Cycles"] / aggregated["Num_Days"] * 365
# Reset index to make 'Penalty' a column again and drop unnecessary columns # Reset index to make 'Penalty' a column again and drop unnecessary columns
final_df = aggregated.reset_index().drop(columns=["Total_Profit", "Total_Charge_Cycles", "Num_Days"]) final_df = aggregated.reset_index().drop(
columns=["Total_Profit", "Total_Charge_Cycles", "Num_Days"]
)
# Rename columns to match expected output # Rename columns to match expected output
final_df.columns = ["Penalty", "Total Profit", "Total Charge Cycles"] final_df.columns = ["Penalty", "Total Profit", "Total Charge Cycles"]
# Profits till 400
profits_till_400 = self.get_profits_till_400()
# aggregate the final_df and profits_till_400 with columns: Penalty, total profit, total charge cycles, profit till 400, total charge cycles
final_df = final_df.merge(profits_till_400, on="Penalty")
# Log the final results table # Log the final results table
self.task.get_logger().report_table( self.task.get_logger().report_table(
"Policy Results", "Policy Results", "Policy Results", iteration=0, table_plot=final_df
"Policy Results",
iteration=0,
table_plot=final_df
) )
def plot_thresholds_per_day(self): def plot_thresholds_per_day(self):
@@ -119,7 +175,7 @@ class PolicyEvaluator:
self.profits[self.profits["Penalty"] == 0], self.profits[self.profits["Penalty"] == 0],
x="Date", x="Date",
y=["Charge Threshold", "Discharge Threshold"], y=["Charge Threshold", "Discharge Threshold"],
title="Charge and Discharge Thresholds per Day" title="Charge and Discharge Thresholds per Day",
) )
fig.update_layout( fig.update_layout(
@@ -129,24 +185,62 @@ class PolicyEvaluator:
) )
self.task.get_logger().report_plotly( self.task.get_logger().report_plotly(
"Thresholds per Day", "Thresholds per Day", "Thresholds per Day", iteration=0, figure=fig
"Thresholds per Day",
iteration=0,
figure=fig
) )
def get_profits_as_scalars(self): def get_profits_as_scalars(self):
aggregated = self.profits.groupby("Penalty").agg( aggregated = self.profits.groupby("Penalty").agg(
Total_Profit=("Profit", "sum"), Total_Profit=("Profit", "sum"),
Total_Charge_Cycles=("Charge Cycles", "sum"), Total_Charge_Cycles=("Charge Cycles", "sum"),
Num_Days=("Date", "nunique") Num_Days=("Date", "nunique"),
)
aggregated["Profit_Per_Year"] = (
aggregated["Total_Profit"] / aggregated["Num_Days"] * 365
)
aggregated["Charge_Cycles_Per_Year"] = (
aggregated["Total_Charge_Cycles"] / aggregated["Num_Days"] * 365
) )
aggregated["Profit_Per_Year"] = aggregated["Total_Profit"] / aggregated["Num_Days"] * 365
aggregated["Charge_Cycles_Per_Year"] = aggregated["Total_Charge_Cycles"] / aggregated["Num_Days"] * 365
# Reset index to make 'Penalty' a column again and drop unnecessary columns # Reset index to make 'Penalty' a column again and drop unnecessary columns
final_df = aggregated.reset_index().drop(columns=["Total_Profit", "Total_Charge_Cycles", "Num_Days"]) final_df = aggregated.reset_index().drop(
columns=["Total_Profit", "Total_Charge_Cycles", "Num_Days"]
)
# Rename columns to match expected output # Rename columns to match expected output
final_df.columns = ["Penalty", "Total Profit", "Total Charge Cycles"] final_df.columns = ["Penalty", "Total Profit", "Total Charge Cycles"]
return final_df return final_df
def get_profits_till_400(self):
# calculates profits until 400 charge cycles per year are reached
number_of_days = len(self.profits["Date"].unique())
usable_charge_cycles = (400 / 365) * number_of_days
# now sum the profit until the usable charge cycles are reached
penalty_profits = {}
penalty_charge_cycles = {}
for index, row in self.profits.iterrows():
penalty = row["Penalty"]
profit = row["Profit"]
charge_cycles = row["Charge Cycles"]
if penalty not in penalty_profits:
penalty_profits[penalty] = 0
penalty_charge_cycles[penalty] = 0
if penalty_charge_cycles[penalty] < usable_charge_cycles:
penalty_profits[penalty] += profit
penalty_charge_cycles[penalty] += charge_cycles
df = pd.DataFrame(
list(
zip(
penalty_profits.keys(),
penalty_profits.values(),
penalty_charge_cycles.values(),
)
),
columns=["Penalty", "Profit_till_400", "Cycles_till_400"],
)
return df

View File

@@ -1,6 +1,7 @@
from clearml import Task from clearml import Task
import torch import torch
import torch.nn as nn import torch.nn as nn
from src.policies.PolicyEvaluator import PolicyEvaluator
from torchinfo import summary from torchinfo import summary
from src.losses.crps_metric import crps_from_samples from src.losses.crps_metric import crps_from_samples
from src.data.preprocessing import DataProcessor from src.data.preprocessing import DataProcessor
@@ -13,10 +14,18 @@ import seaborn as sns
import matplotlib.patches as mpatches import matplotlib.patches as mpatches
def sample_diffusion(model: DiffusionModel, n: int, inputs: torch.tensor, noise_steps=1000, beta_start=1e-4, beta_end=0.02, ts_length=96): def sample_diffusion(
model: DiffusionModel,
n: int,
inputs: torch.tensor,
noise_steps=1000,
beta_start=1e-4,
beta_end=0.02,
ts_length=96,
):
device = next(model.parameters()).device device = next(model.parameters()).device
beta = torch.linspace(beta_start, beta_end, noise_steps).to(device) beta = torch.linspace(beta_start, beta_end, noise_steps).to(device)
alpha = 1. - beta alpha = 1.0 - beta
alpha_hat = torch.cumprod(alpha, dim=0) alpha_hat = torch.cumprod(alpha, dim=0)
if len(inputs.shape) == 2: if len(inputs.shape) == 2:
@@ -39,13 +48,24 @@ def sample_diffusion(model: DiffusionModel, n: int, inputs: torch.tensor, noise_
else: else:
noise = torch.zeros_like(x) noise = torch.zeros_like(x)
x = 1/torch.sqrt(_alpha) * (x-((1-_alpha) / (torch.sqrt(1 - _alpha_hat))) * predicted_noise) + torch.sqrt(_beta) * noise x = (
1
/ torch.sqrt(_alpha)
* (x - ((1 - _alpha) / (torch.sqrt(1 - _alpha_hat))) * predicted_noise)
+ torch.sqrt(_beta) * noise
)
x = torch.clamp(x, -1.0, 1.0) x = torch.clamp(x, -1.0, 1.0)
return x return x
class DiffusionTrainer: class DiffusionTrainer:
def __init__(self, model: nn.Module, data_processor: DataProcessor, device: torch.device): def __init__(
self,
model: nn.Module,
data_processor: DataProcessor,
device: torch.device,
policy_evaluator: PolicyEvaluator = None,
):
self.model = model self.model = model
self.device = device self.device = device
@@ -56,11 +76,14 @@ class DiffusionTrainer:
self.data_processor = data_processor self.data_processor = data_processor
self.beta = torch.linspace(self.beta_start, self.beta_end, self.noise_steps).to(self.device) self.beta = torch.linspace(self.beta_start, self.beta_end, self.noise_steps).to(
self.alpha = 1. - self.beta self.device
)
self.alpha = 1.0 - self.beta
self.alpha_hat = torch.cumprod(self.alpha, dim=0) self.alpha_hat = torch.cumprod(self.alpha, dim=0)
self.best_score = None self.best_score = None
self.policy_evaluator = policy_evaluator
def noise_time_series(self, x: torch.tensor, t: int): def noise_time_series(self, x: torch.tensor, t: int):
"""Add noise to time series """Add noise to time series
@@ -69,7 +92,7 @@ class DiffusionTrainer:
t (int): index of time step t (int): index of time step
""" """
sqrt_alpha_hat = torch.sqrt(self.alpha_hat[t])[:, None] sqrt_alpha_hat = torch.sqrt(self.alpha_hat[t])[:, None]
sqrt_one_minus_alpha_hat = torch.sqrt(1. - self.alpha_hat[t])[:, None] sqrt_one_minus_alpha_hat = torch.sqrt(1.0 - self.alpha_hat[t])[:, None]
noise = torch.randn_like(x) noise = torch.randn_like(x)
return sqrt_alpha_hat * x + sqrt_one_minus_alpha_hat * noise, noise return sqrt_alpha_hat * x + sqrt_one_minus_alpha_hat * noise, noise
@@ -80,9 +103,16 @@ class DiffusionTrainer:
""" """
return torch.randint(low=1, high=self.noise_steps, size=(n,)) return torch.randint(low=1, high=self.noise_steps, size=(n,))
def sample(self, model: DiffusionModel, n: int, inputs: torch.tensor): def sample(self, model: DiffusionModel, n: int, inputs: torch.tensor):
x = sample_diffusion(model, n, inputs, self.noise_steps, self.beta_start, self.beta_end, self.ts_length) x = sample_diffusion(
model,
n,
inputs,
self.noise_steps,
self.beta_start,
self.beta_end,
self.ts_length,
)
model.train() model.train()
return x return x
@@ -99,7 +129,9 @@ class DiffusionTrainer:
# set seed # set seed
np.random.seed(42) np.random.seed(42)
actual_indices = np.random.choice(loader.dataset.full_day_valid_indices, num_samples, replace=False) actual_indices = np.random.choice(
loader.dataset.full_day_valid_indices, num_samples, replace=False
)
indices = {} indices = {}
for i in actual_indices: for i in actual_indices:
indices[i] = loader.dataset.valid_indices.index(i) indices[i] = loader.dataset.valid_indices.index(i)
@@ -117,10 +149,21 @@ class DiffusionTrainer:
if self.data_processor.lstm: if self.data_processor.lstm:
inputDim = self.data_processor.get_input_size() inputDim = self.data_processor.get_input_size()
other_input_data = torch.randn(1024, inputDim[1], self.model.other_inputs_dim).to(self.device) other_input_data = torch.randn(
1024, inputDim[1], self.model.other_inputs_dim
).to(self.device)
else: else:
other_input_data = torch.randn(1024, self.model.other_inputs_dim).to(self.device) other_input_data = torch.randn(1024, self.model.other_inputs_dim).to(
task.set_configuration_object("model", str(summary(self.model, input_data=[input_data, time_steps, other_input_data]))) self.device
)
task.set_configuration_object(
"model",
str(
summary(
self.model, input_data=[input_data, time_steps, other_input_data]
)
),
)
self.data_processor = task.connect(self.data_processor, name="data_processor") self.data_processor = task.connect(self.data_processor, name="data_processor")
@@ -166,19 +209,22 @@ class DiffusionTrainer:
if task: if task:
task.get_logger().report_scalar( task.get_logger().report_scalar(
title=criterion.__class__.__name__, title=criterion.__class__.__name__,
series='train', series="train",
iteration=epoch, iteration=epoch,
value=loss.item(), value=loss.item(),
) )
if epoch % 150 == 0 and epoch != 0: if epoch % 150 == 0 and epoch != 0:
self.debug_plots(task, True, train_loader, train_sample_indices, epoch) self.debug_plots(
self.debug_plots(task, False, test_loader, test_sample_indices, epoch) task, True, train_loader, train_sample_indices, epoch
)
self.debug_plots(
task, False, test_loader, test_sample_indices, epoch
)
if task: if task:
task.close() task.close()
def debug_plots(self, task, training: bool, data_loader, sample_indices, epoch): def debug_plots(self, task, training: bool, data_loader, sample_indices, epoch):
for actual_idx, idx in sample_indices.items(): for actual_idx, idx in sample_indices.items():
features, target, _ = data_loader.dataset[idx] features, target, _ = data_loader.dataset[idx]
@@ -204,42 +250,86 @@ class DiffusionTrainer:
ci_50_lower = np.quantile(samples, 0.25, axis=0) ci_50_lower = np.quantile(samples, 0.25, axis=0)
ci_50_upper = np.quantile(samples, 0.75, axis=0) ci_50_upper = np.quantile(samples, 0.75, axis=0)
sns.set_theme() sns.set_theme()
time_steps = np.arange(0, 96) time_steps = np.arange(0, 96)
fig, ax = plt.subplots(figsize=(20, 10)) fig, ax = plt.subplots(figsize=(20, 10))
ax.plot(time_steps, samples.mean(axis=0), label="Mean of NRV samples", linewidth=3) ax.plot(
time_steps,
samples.mean(axis=0),
label="Mean of NRV samples",
linewidth=3,
)
# ax.fill_between(time_steps, ci_lower, ci_upper, color='b', alpha=0.2, label='Full Interval') # ax.fill_between(time_steps, ci_lower, ci_upper, color='b', alpha=0.2, label='Full Interval')
ax.fill_between(time_steps, ci_99_lower, ci_99_upper, color='b', alpha=0.2, label='99% Interval') ax.fill_between(
ax.fill_between(time_steps, ci_95_lower, ci_95_upper, color='b', alpha=0.2, label='95% Interval') time_steps,
ax.fill_between(time_steps, ci_90_lower, ci_90_upper, color='b', alpha=0.2, label='90% Interval') ci_99_lower,
ax.fill_between(time_steps, ci_50_lower, ci_50_upper, color='b', alpha=0.2, label='50% Interval') ci_99_upper,
color="b",
alpha=0.2,
label="99% Interval",
)
ax.fill_between(
time_steps,
ci_95_lower,
ci_95_upper,
color="b",
alpha=0.2,
label="95% Interval",
)
ax.fill_between(
time_steps,
ci_90_lower,
ci_90_upper,
color="b",
alpha=0.2,
label="90% Interval",
)
ax.fill_between(
time_steps,
ci_50_lower,
ci_50_upper,
color="b",
alpha=0.2,
label="50% Interval",
)
ax.plot(target, label="Real NRV", linewidth=3) ax.plot(target, label="Real NRV", linewidth=3)
# full_interval_patch = mpatches.Patch(color='b', alpha=0.2, label='Full Interval') # full_interval_patch = mpatches.Patch(color='b', alpha=0.2, label='Full Interval')
ci_99_patch = mpatches.Patch(color='b', alpha=0.3, label='99% Interval') ci_99_patch = mpatches.Patch(color="b", alpha=0.3, label="99% Interval")
ci_95_patch = mpatches.Patch(color='b', alpha=0.4, label='95% Interval') ci_95_patch = mpatches.Patch(color="b", alpha=0.4, label="95% Interval")
ci_90_patch = mpatches.Patch(color='b', alpha=0.5, label='90% Interval') ci_90_patch = mpatches.Patch(color="b", alpha=0.5, label="90% Interval")
ci_50_patch = mpatches.Patch(color='b', alpha=0.6, label='50% Interval') ci_50_patch = mpatches.Patch(color="b", alpha=0.6, label="50% Interval")
ax.legend(
ax.legend(handles=[ci_99_patch, ci_95_patch, ci_90_patch, ci_50_patch, ax.lines[0], ax.lines[1]]) handles=[
ci_99_patch,
ci_95_patch,
ci_90_patch,
ci_50_patch,
ax.lines[0],
ax.lines[1],
]
)
task.get_logger().report_matplotlib_figure( task.get_logger().report_matplotlib_figure(
title="Training" if training else "Testing", title="Training" if training else "Testing",
series=f'Sample {actual_idx}', series=f"Sample {actual_idx}",
iteration=epoch, iteration=epoch,
figure=fig, figure=fig,
) )
plt.close() plt.close()
def test(self, data_loader: torch.utils.data.DataLoader, epoch: int, task: Task = None): def test(
self, data_loader: torch.utils.data.DataLoader, epoch: int, task: Task = None
):
all_crps = [] all_crps = []
for inputs, targets, _ in data_loader: generated_samples = {}
for inputs, targets, idx_batch in data_loader:
inputs, targets = inputs.to(self.device), targets.to(self.device) inputs, targets = inputs.to(self.device), targets.to(self.device)
print(inputs.shape, targets.shape)
number_of_samples = 100 number_of_samples = 100
sample = self.sample(self.model, number_of_samples, inputs) sample = self.sample(self.model, number_of_samples, inputs)
@@ -247,6 +337,13 @@ class DiffusionTrainer:
# reduce samples from (batch_size*number_of_samples, time_steps) to (batch_size, number_of_samples, time_steps) # reduce samples from (batch_size*number_of_samples, time_steps) to (batch_size, number_of_samples, time_steps)
samples_batched = sample.reshape(inputs.shape[0], number_of_samples, 96) samples_batched = sample.reshape(inputs.shape[0], number_of_samples, 96)
# add samples to generated_samples generated_samples[idx.item()] = (initial, samples)
for i, (idx, samples) in enumerate(zip(idx_batch, samples_batched)):
generated_samples[idx.item()] = (
self.data_processor.inverse_transform(inputs[i][:96]),
self.data_processor.inverse_transform(samples),
)
# calculate crps # calculate crps
crps = crps_from_samples(samples_batched, targets) crps = crps_from_samples(samples_batched, targets)
crps_mean = crps.mean(axis=1) crps_mean = crps.mean(axis=1)
@@ -262,10 +359,33 @@ class DiffusionTrainer:
if task: if task:
task.get_logger().report_scalar( task.get_logger().report_scalar(
title="CRPS", title="CRPS", series="test", value=mean_crps, iteration=epoch
series='test', )
value=mean_crps,
iteration=epoch if self.policy_evaluator:
_, test_loader = self.data_processor.get_dataloaders(
predict_sequence_length=self.ts_length, full_day_skip=True
)
self.policy_evaluator.evaluate_test_set(generated_samples, test_loader)
df = self.policy_evaluator.get_profits_as_scalars()
for idx, row in df.iterrows():
task.get_logger().report_scalar(
title="Profit",
series=f"penalty_{row['Penalty']}",
value=row["Total Profit"],
iteration=epoch,
)
df = self.policy_evaluator.get_profits_till_400()
for idx, row in df.iterrows():
task.get_logger().report_scalar(
title="Profit_till_400",
series=f"penalty_{row['Penalty']}",
value=row["Profit_till_400"],
iteration=epoch,
) )
def save_checkpoint(self, val_loss, task, iteration: int): def save_checkpoint(self, val_loss, task, iteration: int):
@@ -274,4 +394,3 @@ class DiffusionTrainer:
model_path="checkpoint.pt", iteration=iteration, auto_delete_file=False model_path="checkpoint.pt", iteration=iteration, auto_delete_file=False
) )
self.best_score = val_loss self.best_score = val_loss

View File

@@ -15,6 +15,7 @@ import matplotlib.pyplot as plt
import seaborn as sns import seaborn as sns
import matplotlib.patches as mpatches import matplotlib.patches as mpatches
def sample_from_dist(quantiles, preds): def sample_from_dist(quantiles, preds):
if isinstance(preds, torch.Tensor): if isinstance(preds, torch.Tensor):
preds = preds.detach().cpu() preds = preds.detach().cpu()
@@ -31,6 +32,7 @@ def sample_from_dist(quantiles, preds):
# random probabilities of (1000, 1) # random probabilities of (1000, 1)
import random import random
probs = np.array([random.random() for _ in range(1000)]) probs = np.array([random.random() for _ in range(1000)])
spline = CubicSpline(quantiles, preds, axis=1) spline = CubicSpline(quantiles, preds, axis=1)
@@ -42,6 +44,7 @@ def sample_from_dist(quantiles, preds):
return samples return samples
def auto_regressive(dataset, model, quantiles, idx_batch, sequence_length: int = 96): def auto_regressive(dataset, model, quantiles, idx_batch, sequence_length: int = 96):
device = next(model.parameters()).device device = next(model.parameters()).device
prev_features, targets = dataset.get_batch(idx_batch) prev_features, targets = dataset.get_batch(idx_batch)
@@ -102,9 +105,7 @@ def auto_regressive(dataset, model, quantiles, idx_batch, sequence_length: int =
) # (batch_size, sequence_length) ) # (batch_size, sequence_length)
with torch.no_grad(): with torch.no_grad():
new_predictions_full = model( new_predictions_full = model(prev_features) # (batch_size, quantiles)
prev_features
) # (batch_size, quantiles)
predictions_full = torch.cat( predictions_full = torch.cat(
(predictions_full, new_predictions_full.unsqueeze(1)), dim=1 (predictions_full, new_predictions_full.unsqueeze(1)), dim=1
) # (batch_size, sequence_length, quantiles) ) # (batch_size, sequence_length, quantiles)
@@ -123,6 +124,7 @@ def auto_regressive(dataset, model, quantiles, idx_batch, sequence_length: int =
target_full.unsqueeze(-1), target_full.unsqueeze(-1),
) )
class AutoRegressiveQuantileTrainer(AutoRegressiveTrainer): class AutoRegressiveQuantileTrainer(AutoRegressiveTrainer):
def __init__( def __init__(
self, self,
@@ -169,7 +171,10 @@ class AutoRegressiveQuantileTrainer(AutoRegressiveTrainer):
dataloader.dataset, idx_batch=computed_idx_batch dataloader.dataset, idx_batch=computed_idx_batch
) )
generated_samples[idx.item()] = (initial, self.data_processor.inverse_transform(samples)) generated_samples[idx.item()] = (
self.data_processor.inverse_transform(initial),
self.data_processor.inverse_transform(samples),
)
samples = samples.unsqueeze(0) samples = samples.unsqueeze(0)
targets = targets.squeeze(-1) targets = targets.squeeze(-1)
@@ -180,22 +185,37 @@ class AutoRegressiveQuantileTrainer(AutoRegressiveTrainer):
crps_from_samples_metric.append(crps[0].mean().item()) crps_from_samples_metric.append(crps[0].mean().item())
task.get_logger().report_scalar( task.get_logger().report_scalar(
title="CRPS_from_samples", series="test", value=np.mean(crps_from_samples_metric), iteration=epoch title="CRPS_from_samples",
series="test",
value=np.mean(crps_from_samples_metric),
iteration=epoch,
) )
# using the policy evaluator, evaluate the policy with the generated samples # using the policy evaluator, evaluate the policy with the generated samples
if self.policy_evaluator is not None: if self.policy_evaluator is not None:
_, test_loader = self.data_processor.get_dataloaders( _, test_loader = self.data_processor.get_dataloaders(
predict_sequence_length=self.model.output_size) predict_sequence_length=self.model.output_size, full_day_skip=True
)
self.policy_evaluator.evaluate_test_set(generated_samples, test_loader) self.policy_evaluator.evaluate_test_set(generated_samples, test_loader)
df = self.policy_evaluator.get_profits_as_scalars() df = self.policy_evaluator.get_profits_as_scalars()
# for each row, report the profits # for each row, report the profits
for idx, row in df.iterrows(): for idx, row in df.iterrows():
task.get_logger().report_scalar( task.get_logger().report_scalar(
title="Profit", series=f"penalty_{row['Penalty']}", value=row["Total Profit"], iteration=epoch title="Profit",
series=f"penalty_{row['Penalty']}",
value=row["Total Profit"],
iteration=epoch,
) )
df = self.policy_evaluator.get_profits_till_400()
for idx, row in df.iterrows():
task.get_logger().report_scalar(
title="Profit_till_400",
series=f"penalty_{row['Penalty']}",
value=row["Profit_till_400"],
iteration=epoch,
)
def log_final_metrics(self, task, dataloader, train: bool = True): def log_final_metrics(self, task, dataloader, train: bool = True):
metrics = {metric.__class__.__name__: 0.0 for metric in self.metrics_to_track} metrics = {metric.__class__.__name__: 0.0 for metric in self.metrics_to_track}
@@ -222,7 +242,10 @@ class AutoRegressiveQuantileTrainer(AutoRegressiveTrainer):
) )
# save the samples for the idx, these will be used for evaluating the policy # save the samples for the idx, these will be used for evaluating the policy
self.test_set_samples[idx.item()] = (initial, self.data_processor.inverse_transform(samples)) self.test_set_samples[idx.item()] = (
self.data_processor.inverse_transform(initial),
self.data_processor.inverse_transform(samples),
)
samples = samples.unsqueeze(0) samples = samples.unsqueeze(0)
targets = targets.squeeze(-1) targets = targets.squeeze(-1)
@@ -232,7 +255,6 @@ class AutoRegressiveQuantileTrainer(AutoRegressiveTrainer):
crps_from_samples_metric.append(crps[0].mean().item()) crps_from_samples_metric.append(crps[0].mean().item())
_, outputs, samples, targets = self.auto_regressive( _, outputs, samples, targets = self.auto_regressive(
dataloader.dataset, idx_batch=idx_batch dataloader.dataset, idx_batch=idx_batch
) )
@@ -286,7 +308,8 @@ class AutoRegressiveQuantileTrainer(AutoRegressiveTrainer):
if train == False: if train == False:
task.get_logger().report_single_value( task.get_logger().report_single_value(
name="test_CRPS_from_samples_transformed", value=np.mean(crps_from_samples_metric) name="test_CRPS_from_samples_transformed",
value=np.mean(crps_from_samples_metric),
) )
# def get_plot_error( # def get_plot_error(
@@ -319,7 +342,6 @@ class AutoRegressiveQuantileTrainer(AutoRegressiveTrainer):
# return fig # return fig
def get_plot( def get_plot(
self, self,
current_day, current_day,
@@ -376,30 +398,78 @@ class AutoRegressiveQuantileTrainer(AutoRegressiveTrainer):
time_steps = np.arange(0, 96) time_steps = np.arange(0, 96)
fig, ax = plt.subplots(figsize=(20, 10)) fig, ax = plt.subplots(figsize=(20, 10))
ax.plot(time_steps, predictions_np.mean(axis=0), label="Mean of NRV samples", linewidth=3) ax.plot(
time_steps,
predictions_np.mean(axis=0),
label="Mean of NRV samples",
linewidth=3,
)
# ax.fill_between(time_steps, ci_lower, ci_upper, color='b', alpha=0.2, label='Full Interval') # ax.fill_between(time_steps, ci_lower, ci_upper, color='b', alpha=0.2, label='Full Interval')
ax.fill_between(time_steps, ci_99_lower, ci_99_upper, color='b', alpha=0.2, label='99% Interval') ax.fill_between(
ax.fill_between(time_steps, ci_95_lower, ci_95_upper, color='b', alpha=0.2, label='95% Interval') time_steps,
ax.fill_between(time_steps, ci_90_lower, ci_90_upper, color='b', alpha=0.2, label='90% Interval') ci_99_lower,
ax.fill_between(time_steps, ci_50_lower, ci_50_upper, color='b', alpha=0.2, label='50% Interval') ci_99_upper,
color="b",
alpha=0.2,
label="99% Interval",
)
ax.fill_between(
time_steps,
ci_95_lower,
ci_95_upper,
color="b",
alpha=0.2,
label="95% Interval",
)
ax.fill_between(
time_steps,
ci_90_lower,
ci_90_upper,
color="b",
alpha=0.2,
label="90% Interval",
)
ax.fill_between(
time_steps,
ci_50_lower,
ci_50_upper,
color="b",
alpha=0.2,
label="50% Interval",
)
ax.plot(next_day_np, label="Real NRV", linewidth=3) ax.plot(next_day_np, label="Real NRV", linewidth=3)
# full_interval_patch = mpatches.Patch(color='b', alpha=0.2, label='Full Interval') # full_interval_patch = mpatches.Patch(color='b', alpha=0.2, label='Full Interval')
ci_99_patch = mpatches.Patch(color='b', alpha=0.3, label='99% Interval') ci_99_patch = mpatches.Patch(color="b", alpha=0.3, label="99% Interval")
ci_95_patch = mpatches.Patch(color='b', alpha=0.4, label='95% Interval') ci_95_patch = mpatches.Patch(color="b", alpha=0.4, label="95% Interval")
ci_90_patch = mpatches.Patch(color='b', alpha=0.5, label='90% Interval') ci_90_patch = mpatches.Patch(color="b", alpha=0.5, label="90% Interval")
ci_50_patch = mpatches.Patch(color='b', alpha=0.6, label='50% Interval') ci_50_patch = mpatches.Patch(color="b", alpha=0.6, label="50% Interval")
ax.legend(
ax.legend(handles=[ci_99_patch, ci_95_patch, ci_90_patch, ci_50_patch, ax.lines[0], ax.lines[1]]) handles=[
ci_99_patch,
ci_95_patch,
ci_90_patch,
ci_50_patch,
ax.lines[0],
ax.lines[1],
]
)
return fig return fig
def auto_regressive(self, dataset, idx_batch, sequence_length: int = 96): def auto_regressive(self, dataset, idx_batch, sequence_length: int = 96):
return auto_regressive(dataset, self.model, self.quantiles, idx_batch, sequence_length) return auto_regressive(
dataset, self.model, self.quantiles, idx_batch, sequence_length
)
def plot_quantile_percentages( def plot_quantile_percentages(
self, task, data_loader, train: bool = True, iteration: int = None, full_day: bool = False self,
task,
data_loader,
train: bool = True,
iteration: int = None,
full_day: bool = False,
): ):
quantiles = self.quantiles quantiles = self.quantiles
total = 0 total = 0
@@ -429,20 +499,18 @@ class AutoRegressiveQuantileTrainer(AutoRegressiveTrainer):
else: else:
inputs = inputs.to(self.device) inputs = inputs.to(self.device)
outputs = self.model(inputs).cpu().numpy() # (batch_size, num_quantiles) outputs = (
self.model(inputs).cpu().numpy()
) # (batch_size, num_quantiles)
targets = targets.squeeze(-1).cpu().numpy() # (batch_size, 1) targets = targets.squeeze(-1).cpu().numpy() # (batch_size, 1)
for i, q in enumerate(quantiles): for i, q in enumerate(quantiles):
quantile_counter[q] += np.sum( quantile_counter[q] += np.sum(targets < outputs[:, i])
targets < outputs[:, i]
)
total += len(targets) total += len(targets)
# to numpy array of length len(quantiles) # to numpy array of length len(quantiles)
percentages = np.array( percentages = np.array([quantile_counter[q] / total for q in quantiles])
[quantile_counter[q] / total for q in quantiles]
)
bar_width = 0.35 bar_width = 0.35
index = np.arange(len(quantiles)) index = np.arange(len(quantiles))
@@ -450,9 +518,7 @@ class AutoRegressiveQuantileTrainer(AutoRegressiveTrainer):
# Plotting the bars # Plotting the bars
fig, ax = plt.subplots(figsize=(15, 10)) fig, ax = plt.subplots(figsize=(15, 10))
bar1 = ax.bar( bar1 = ax.bar(index, quantiles, bar_width, label="Ideal", color="brown")
index, quantiles, bar_width, label="Ideal", color="brown"
)
bar2 = ax.bar( bar2 = ax.bar(
index + bar_width, percentages, bar_width, label="NN model", color="blue" index + bar_width, percentages, bar_width, label="NN model", color="blue"
) )
@@ -502,7 +568,6 @@ class NonAutoRegressiveQuantileRegression(Trainer):
): ):
self.quantiles = quantiles self.quantiles = quantiles
criterion = NonAutoRegressivePinballLoss(quantiles=quantiles) criterion = NonAutoRegressivePinballLoss(quantiles=quantiles)
super().__init__( super().__init__(
model=model, model=model,

View File

@@ -1,3 +1,12 @@
from src.utils.clearml import ClearMLHelper
#### ClearML ####
clearml_helper = ClearMLHelper(project_name="Thesis/NrvForecast")
task = clearml_helper.get_task(
task_name="Autoregressive Quantile Regression: Non Linear"
)
task.execute_remotely(queue_name="default", exit_process=True)
from src.policies.PolicyEvaluator import PolicyEvaluator from src.policies.PolicyEvaluator import PolicyEvaluator
from src.policies.simple_baseline import BaselinePolicy, Battery from src.policies.simple_baseline import BaselinePolicy, Battery
from src.models.lstm_model import GRUModel from src.models.lstm_model import GRUModel
@@ -13,11 +22,6 @@ import torch.nn as nn
from src.models.time_embedding_layer import TimeEmbedding from src.models.time_embedding_layer import TimeEmbedding
#### ClearML ####
clearml_helper = ClearMLHelper(project_name="Thesis/NrvForecast")
task = clearml_helper.get_task(task_name="Autoregressive Quantile Regression: Non Linear")
#### Data Processor #### #### Data Processor ####
data_config = DataConfig() data_config = DataConfig()
@@ -34,7 +38,6 @@ data_config.DAY_OF_WEEK = True
data_config.NOMINAL_NET_POSITION = True data_config.NOMINAL_NET_POSITION = True
data_config = task.connect(data_config, name="data_features") data_config = task.connect(data_config, name="data_features")
data_processor = DataProcessor(data_config, path="", lstm=False) data_processor = DataProcessor(data_config, path="", lstm=False)
@@ -68,9 +71,17 @@ model_parameters = {
model_parameters = task.connect(model_parameters, name="model_parameters") model_parameters = task.connect(model_parameters, name="model_parameters")
time_embedding = TimeEmbedding(data_processor.get_time_feature_size(), model_parameters["time_feature_embedding"]) time_embedding = TimeEmbedding(
data_processor.get_time_feature_size(), model_parameters["time_feature_embedding"]
)
# lstm_model = GRUModel(time_embedding.output_dim(inputDim), len(quantiles), hidden_size=model_parameters["hidden_size"], num_layers=model_parameters["num_layers"], dropout=model_parameters["dropout"]) # lstm_model = GRUModel(time_embedding.output_dim(inputDim), len(quantiles), hidden_size=model_parameters["hidden_size"], num_layers=model_parameters["num_layers"], dropout=model_parameters["dropout"])
non_linear_model = NonLinearRegression(time_embedding.output_dim(inputDim), len(quantiles), hiddenSize=model_parameters["hidden_size"], numLayers=model_parameters["num_layers"], dropout=model_parameters["dropout"]) non_linear_model = NonLinearRegression(
time_embedding.output_dim(inputDim),
len(quantiles),
hiddenSize=model_parameters["hidden_size"],
numLayers=model_parameters["num_layers"],
dropout=model_parameters["dropout"],
)
# linear_model = LinearRegression(time_embedding.output_dim(inputDim), len(quantiles)) # linear_model = LinearRegression(time_embedding.output_dim(inputDim), len(quantiles))
model = nn.Sequential(time_embedding, non_linear_model) model = nn.Sequential(time_embedding, non_linear_model)
@@ -103,7 +114,8 @@ trainer.train(task=task, epochs=epochs, remotely=True)
### Policy Evaluation ### ### Policy Evaluation ###
idx_samples = trainer.test_set_samples idx_samples = trainer.test_set_samples
_, test_loader = trainer.data_processor.get_dataloaders( _, test_loader = trainer.data_processor.get_dataloaders(
predict_sequence_length=trainer.model.output_size) predict_sequence_length=trainer.model.output_size, full_day_skip=True
)
policy_evaluator.evaluate_test_set(idx_samples, test_loader) policy_evaluator.evaluate_test_set(idx_samples, test_loader)
policy_evaluator.plot_profits_table() policy_evaluator.plot_profits_table()

View File

@@ -1,25 +1,17 @@
from clearml import Task
from src.data import DataProcessor, DataConfig
from src.trainers.trainer import Trainer
from src.utils.clearml import ClearMLHelper from src.utils.clearml import ClearMLHelper
from src.models import *
from src.losses import *
import torch
import numpy as np
from torch.nn import MSELoss, L1Loss
from datetime import datetime
import torch.nn as nn
from src.models.time_embedding_layer import TimeEmbedding
from src.models.diffusion_model import GRUDiffusionModel, SimpleDiffusionModel
from src.trainers.diffusion_trainer import DiffusionTrainer
clearml_helper = ClearMLHelper(project_name="Thesis/NrvForecast") clearml_helper = ClearMLHelper(project_name="Thesis/NrvForecast")
task = clearml_helper.get_task(task_name="Diffusion Training") task = clearml_helper.get_task(task_name="Diffusion Training")
# execute remotely
task.execute_remotely(queue_name="default", exit_process=True) task.execute_remotely(queue_name="default", exit_process=True)
print("Running remotely")
from src.models import *
from src.losses import *
from src.models.time_embedding_layer import TimeEmbedding
from src.models.diffusion_model import GRUDiffusionModel, SimpleDiffusionModel
from src.trainers.diffusion_trainer import DiffusionTrainer
from src.data import DataProcessor, DataConfig
from src.policies.simple_baseline import BaselinePolicy, Battery
from src.policies.PolicyEvaluator import PolicyEvaluator
#### Data Processor #### #### Data Processor ####
data_config = DataConfig() data_config = DataConfig()
@@ -54,11 +46,21 @@ model_parameters = {
model_parameters = task.connect(model_parameters, name="model_parameters") model_parameters = task.connect(model_parameters, name="model_parameters")
#### Model #### #### Model ####
model = SimpleDiffusionModel(96, model_parameters["hidden_sizes"], other_inputs_dim=inputDim[1], time_dim=model_parameters["time_dim"]) model = SimpleDiffusionModel(
96,
model_parameters["hidden_sizes"],
other_inputs_dim=inputDim[1],
time_dim=model_parameters["time_dim"],
)
# model = GRUDiffusionModel(96, model_parameters["hidden_sizes"], other_inputs_dim=inputDim[2], time_dim=model_parameters["time_dim"], gru_hidden_size=128) # model = GRUDiffusionModel(96, model_parameters["hidden_sizes"], other_inputs_dim=inputDim[2], time_dim=model_parameters["time_dim"], gru_hidden_size=128)
print("Starting training ...") ### Policy Evaluator ###
battery = Battery(2, 1)
baseline_policy = BaselinePolicy(battery, data_path="")
policy_evaluator = PolicyEvaluator(baseline_policy, task)
#### Trainer #### #### Trainer ####
trainer = DiffusionTrainer(model, data_processor, "cuda") trainer = DiffusionTrainer(
model, data_processor, "cuda", policy_evaluator=policy_evaluator
)
trainer.train(model_parameters["epochs"], model_parameters["learning_rate"], task) trainer.train(model_parameters["epochs"], model_parameters["learning_rate"], task)