Updated training scripts

This commit is contained in:
2024-03-18 12:15:06 +01:00
parent 34335cd9fe
commit 1a8e735cbc
10 changed files with 487 additions and 308 deletions

View File

@@ -5,6 +5,7 @@ import pandas as pd
import numpy as np import numpy as np
import torch import torch
import plotly.express as px import plotly.express as px
from functools import lru_cache
from src.utils.imbalance_price_calculator import ImbalancePriceCalculator from src.utils.imbalance_price_calculator import ImbalancePriceCalculator
@@ -24,11 +25,14 @@ class PolicyEvaluator:
) )
self.imbalance_prices = imbalance_prices.sort_values(by=["DateTime"]) self.imbalance_prices = imbalance_prices.sort_values(by=["DateTime"])
self.penalties = [0, 100, 300, 500, 800, 1000, 1500] self.penalties = [0, 1000, 1500]
self.profits = [] self.profits = []
self.task = task self.task = task
self.cache = {}
@lru_cache(maxsize=None)
def get_imbanlance_prices_for_date(self, date): def get_imbanlance_prices_for_date(self, date):
imbalance_prices_day = self.imbalance_prices[ imbalance_prices_day = self.imbalance_prices[
self.imbalance_prices["DateTime"].dt.date == date self.imbalance_prices["DateTime"].dt.date == date
@@ -40,69 +44,152 @@ class PolicyEvaluator:
date, date,
idx_samples, idx_samples,
test_loader, test_loader,
charge_thresholds=np.arange(-100, 250, 25), charge_thresholds=np.arange(-1500, 1500, 50),
discharge_thresholds=np.arange(-100, 250, 25), discharge_thresholds=np.arange(-1500, 1500, 50),
penalty: int = 0,
): ):
idx = test_loader.dataset.get_idx_for_date(date.date()) if date in self.cache:
(reconstructed_imbalance_prices, real_imbalance_prices) = self.cache[date]
if idx not in idx_samples:
print("No samples for idx: ", idx, date)
(initial, samples) = idx_samples[idx]
if len(initial.shape) == 2:
initial = initial.cpu().numpy()[0][-1]
else: else:
initial = initial.cpu().numpy()[-1] idx = test_loader.dataset.get_idx_for_date(date.date())
samples = samples.cpu().numpy()
initial = np.repeat(initial, samples.shape[0]) if idx not in idx_samples:
combined = np.concatenate((initial.reshape(-1, 1), samples), axis=1) print("No samples for idx: ", idx, date)
(initial, samples) = idx_samples[idx]
reconstructed_imbalance_prices = ( if len(initial.shape) == 2:
self.ipc.get_imbalance_prices_2023_for_date_vectorized(date, combined) initial = initial.cpu().numpy()[0][-1]
) else:
reconstructed_imbalance_prices = torch.tensor( initial = initial.cpu().numpy()[-1]
reconstructed_imbalance_prices, device="cuda" samples = samples.cpu().numpy()
initial = np.repeat(initial, samples.shape[0])
combined = np.concatenate((initial.reshape(-1, 1), samples), axis=1)
reconstructed_imbalance_prices = (
self.ipc.get_imbalance_prices_2023_for_date_vectorized(date, combined)
)
reconstructed_imbalance_prices = torch.tensor(
reconstructed_imbalance_prices, device="cuda"
)
real_imbalance_prices = self.get_imbanlance_prices_for_date(date.date())
self.cache[date] = (reconstructed_imbalance_prices, real_imbalance_prices)
return self.profit_for_penalty(
reconstructed_imbalance_prices,
real_imbalance_prices,
penalty,
charge_thresholds,
discharge_thresholds,
) )
real_imbalance_prices = self.get_imbanlance_prices_for_date(date.date()) def optimize_penalty_for_target_charge_cycles(
self,
for penalty in self.penalties: idx_samples,
found_charge_thresholds, found_discharge_thresholds = ( test_loader,
self.baseline_policy.get_optimal_thresholds( initial_penalty,
reconstructed_imbalance_prices, target_charge_cycles,
charge_thresholds, learning_rate=2,
discharge_thresholds, max_iterations=10,
penalty, tolerance=10,
) ):
self.cache = {}
penalty = initial_penalty
for iteration in range(max_iterations):
# Calculate profit and charge cycles for the current penalty
simulated_profit, simulated_charge_cycles = (
self.evaluate_test_set_for_penalty(idx_samples, test_loader, penalty)
) )
predicted_charge_threshold = found_charge_thresholds.mean(axis=0) print(
predicted_discharge_threshold = found_discharge_thresholds.mean(axis=0) f"Penalty: {penalty}, Charge Cycles: {simulated_charge_cycles}, Profit: {simulated_profit}"
)
### Determine Profits and Charge Cycles ### # Calculate the gradient (difference) between the simulated and target charge cycles
simulated_profit, simulated_charge_cycles = self.baseline_policy.simulate( gradient = simulated_charge_cycles - target_charge_cycles
torch.tensor([[real_imbalance_prices]]),
torch.tensor([predicted_charge_threshold]), # Update the penalty parameter in the direction of the gradient
torch.tensor([predicted_discharge_threshold]), penalty += learning_rate * gradient
# Check if the charge cycles are close enough to the target
if abs(gradient) < tolerance:
print(f"Optimal penalty found after {iteration+1} iterations")
break
else:
print(
f"Reached max iterations ({max_iterations}) without converging to the target charge cycles"
) )
self.profits.append(
[ # Re-calculate profit and charge cycles for the final penalty to return accurate results
date, profit, charge_cycles = self.evaluate_test_set_for_penalty(
penalty, idx_samples, test_loader, penalty
simulated_profit[0][0].item(), )
simulated_charge_cycles[0][0].item(),
predicted_charge_threshold.item(), return penalty, profit, charge_cycles
predicted_discharge_threshold.item(),
] def profit_for_penalty(
self,
reconstructed_imbalance_prices,
real_imbalance_prices,
penalty: int,
charge_thresholds,
discharge_thresholds,
):
"""_summary_
Args:
date (_type_): date to evaluate
reconstructed_imbalance_prices (_type_): predicted imbalance price
real_imbalance_prices (_type_): real imbalance price
penalty (int): penalty parameter to take into account
charge_thresholds (_type_): list of charge thresholds
discharge_thresholds (_type_): list of discharge thresholds
Returns:
_type_: returns the simulated profit, charge cycles, the found charge threshold and discharge threshold
"""
found_charge_thresholds, found_discharge_thresholds = (
self.baseline_policy.get_optimal_thresholds(
reconstructed_imbalance_prices,
charge_thresholds,
discharge_thresholds,
penalty,
) )
)
predicted_charge_threshold = found_charge_thresholds.mean(axis=0)
predicted_discharge_threshold = found_discharge_thresholds.mean(axis=0)
### Determine Profits and Charge Cycles ###
simulated_profit, simulated_charge_cycles = self.baseline_policy.simulate(
torch.tensor([[real_imbalance_prices]]),
torch.tensor([predicted_charge_threshold]),
torch.tensor([predicted_discharge_threshold]),
)
return (
simulated_profit[0][0].item(),
simulated_charge_cycles[0][0].item(),
predicted_charge_threshold.item(),
predicted_discharge_threshold.item(),
)
def evaluate_test_set(self, idx_samples, test_loader): def evaluate_test_set(self, idx_samples, test_loader):
self.profits = [] self.profits = []
self.cache = {}
for date in tqdm(self.dates): for date in tqdm(self.dates):
try: try:
self.evaluate_for_date(date, idx_samples, test_loader) for penalty in self.penalties:
self.profits.append(
[
date,
penalty,
*self.evaluate_for_date(
date, idx_samples, test_loader, penalty=penalty
),
]
)
except KeyboardInterrupt: except KeyboardInterrupt:
print("Interrupted") print("Interrupted")
raise KeyboardInterrupt raise KeyboardInterrupt
@@ -123,6 +210,27 @@ class PolicyEvaluator:
], ],
) )
def evaluate_test_set_for_penalty(self, idx_samples, test_loader, penalty):
total_profit = 0
total_charge_cycles = 0
for date in tqdm(self.dates):
try:
profit, charge_cycles, _, _ = self.evaluate_for_date(
date, idx_samples, test_loader, penalty=penalty
)
total_profit += profit
total_charge_cycles += charge_cycles
except KeyboardInterrupt:
print("Interrupted")
raise KeyboardInterrupt
except Exception as e:
print(e)
pass
return total_profit, total_charge_cycles
def plot_profits_table(self): def plot_profits_table(self):
# Check if task or penalties are not set # Check if task or penalties are not set
if ( if (

View File

@@ -13,49 +13,46 @@ class BaselinePolicyEvaluator(PolicyEvaluator):
self.train_profits = [] self.train_profits = []
def determine_thresholds_for_date(self, date): def determine_thresholds_for_date(self, date, penalty):
charge_thresholds = np.arange(-100, 250, 25) charge_thresholds = np.arange(-500, 500, 25)
discharge_thresholds = np.arange(-100, 250, 25) discharge_thresholds = np.arange(-500, 500, 25)
real_imbalance_prices = self.get_imbanlance_prices_for_date(date.date()) real_imbalance_prices = self.get_imbanlance_prices_for_date(date.date())
for penalty in self.penalties: found_charge_thresholds, found_discharge_thresholds = (
found_charge_thresholds, found_discharge_thresholds = ( self.baseline_policy.get_optimal_thresholds(
self.baseline_policy.get_optimal_thresholds( torch.tensor([real_imbalance_prices]),
torch.tensor([real_imbalance_prices]), charge_thresholds,
charge_thresholds, discharge_thresholds,
discharge_thresholds, penalty,
penalty,
)
) )
)
best_charge_threshold = found_charge_thresholds best_charge_threshold = found_charge_thresholds
best_discharge_threshold = found_discharge_thresholds best_discharge_threshold = found_discharge_thresholds
simulated_profit, simulated_charge_cycles = self.baseline_policy.simulate( simulated_profit, simulated_charge_cycles = self.baseline_policy.simulate(
torch.tensor([[real_imbalance_prices]]), torch.tensor([[real_imbalance_prices]]),
torch.tensor([best_charge_threshold]), torch.tensor([best_charge_threshold]),
torch.tensor([best_discharge_threshold]), torch.tensor([best_discharge_threshold]),
) )
self.train_profits.append( self.train_profits.append(
[ [
date, simulated_profit[0][0].item(),
penalty, simulated_charge_cycles[0][0].item(),
simulated_profit[0][0].item(), best_charge_threshold.item(),
simulated_charge_cycles[0][0].item(), best_discharge_threshold.item(),
best_charge_threshold.item(), ]
best_discharge_threshold.item(), )
]
)
def determine_best_thresholds(self): def determine_best_thresholds(self, penalty):
self.train_profits = [] self.train_profits = []
dates = self.baseline_policy.train_data["DateTime"].dt.date.unique() dates = self.baseline_policy.train_data["DateTime"].dt.date.unique()
dates = pd.to_datetime(dates) dates = pd.to_datetime(dates)
try: try:
for date in tqdm(dates): for date in tqdm(dates):
self.determine_thresholds_for_date(date) self.determine_thresholds_for_date(date, penalty)
except Exception as e: except Exception as e:
print(e) print(e)
pass pass
@@ -63,8 +60,6 @@ class BaselinePolicyEvaluator(PolicyEvaluator):
self.train_profits = pd.DataFrame( self.train_profits = pd.DataFrame(
self.train_profits, self.train_profits,
columns=[ columns=[
"Date",
"Penalty",
"Profit", "Profit",
"Charge Cycles", "Charge Cycles",
"Charge Threshold", "Charge Threshold",
@@ -72,91 +67,18 @@ class BaselinePolicyEvaluator(PolicyEvaluator):
], ],
) )
number_of_days = len(self.train_profits["Date"].unique()) # get the best thresholds combination based on the sum of profits
usable_charge_cycles = (400 / 365) * number_of_days best_thresholds = self.train_profits.groupby(
["Charge Threshold", "Discharge Threshold"]
).sum()["Profit"]
intermediate_values = {penalty: {} for penalty in self.penalties} best_thresholds = best_thresholds.idxmax()
return (best_thresholds[0], best_thresholds[1])
# find the best threshold combination for each penalty based on the total profit on the data def evaluate_test_set(
for penalty in self.penalties: self, charge_threshold, discharge_threshold, data_processor=None
profits_for_penalty = self.train_profits[ ):
self.train_profits["Penalty"] == penalty """Evaluate the test set using the given thresholds"""
]
for index, row in profits_for_penalty.iterrows():
charge_threshold = row["Charge Threshold"]
discharge_threshold = row["Discharge Threshold"]
if (charge_threshold, discharge_threshold) not in intermediate_values[
penalty
]:
intermediate_values[penalty][
(charge_threshold, discharge_threshold)
] = (0, 0)
new_charge_cycles = (
intermediate_values[penalty][
(charge_threshold, discharge_threshold)
][1]
+ row["Charge Cycles"]
)
new_profit = (
intermediate_values[penalty][
(charge_threshold, discharge_threshold)
][0]
+ row["Profit"]
)
if new_charge_cycles <= usable_charge_cycles:
intermediate_values[penalty][
(charge_threshold, discharge_threshold)
] = (new_profit, new_charge_cycles)
best_thresholds = {penalty: [0, 0, 0, 0] for penalty in self.penalties}
for penalty in self.penalties:
best_profit = 0
for threshold, values in intermediate_values[penalty].items():
if values[0] > best_profit:
best_profit = values[0]
best_thresholds[penalty][0] = threshold[0]
best_thresholds[penalty][1] = threshold[1]
best_thresholds[penalty][2] = best_profit
best_thresholds[penalty][3] = values[1]
# create dataframe from best_thresholds with columns, Penalty, Charge Threshold, Discharge Threshold, Profit
data = [
(penalty, values[0], values[1], values[2], values[3])
for penalty, values in best_thresholds.items()
]
best_thresholds_df = pd.DataFrame(
data,
columns=[
"Penalty",
"Charge Threshold",
"Discharge Threshold",
"Profit (training data)",
f"Charge Cycles (training data: max {usable_charge_cycles})",
],
)
if self.task:
self.task.get_logger().report_table(
"Baseline Train Data",
"Best Thresholds for each Penalty on Training Data (up to 400 cycles / year)",
iteration=0,
table_plot=best_thresholds_df,
)
return best_thresholds
def evaluate_test_set(self, thresholds: dict, data_processor=None):
"""Evaluate the test set using the given thresholds (multiple penalties)
Args:
thresholds (dict): Dictionary with penalties as keys and the corresponding thresholds tuple as values
"""
self.profits = [] self.profits = []
if data_processor: if data_processor:
@@ -173,40 +95,63 @@ class BaselinePolicyEvaluator(PolicyEvaluator):
try: try:
for date in tqdm(self.dates): for date in tqdm(self.dates):
real_imbalance_prices = self.get_imbanlance_prices_for_date(date.date()) real_imbalance_prices = self.get_imbanlance_prices_for_date(date.date())
for penalty in thresholds.keys():
charge_threshold = thresholds[penalty][0]
discharge_threshold = thresholds[penalty][1]
simulated_profit, simulated_charge_cycles = ( simulated_profit, simulated_charge_cycles = (
self.baseline_policy.simulate( self.baseline_policy.simulate(
torch.tensor([[real_imbalance_prices]]), torch.tensor([[real_imbalance_prices]]),
torch.tensor([charge_threshold]), torch.tensor([charge_threshold]),
torch.tensor([discharge_threshold]), torch.tensor([discharge_threshold]),
)
) )
)
self.profits.append( self.profits.append(
[ [
date, date,
penalty, simulated_profit[0][0].item(),
simulated_profit[0][0].item(), simulated_charge_cycles[0][0].item(),
simulated_charge_cycles[0][0].item(), ]
charge_threshold, )
discharge_threshold,
]
)
self.profits = pd.DataFrame( self.profits = pd.DataFrame(
self.profits, self.profits,
columns=[ columns=["Date", "Profit", "Charge Cycles"],
"Date",
"Penalty",
"Profit",
"Charge Cycles",
"Charge Threshold",
"Discharge Threshold",
],
) )
except Exception as e: except Exception as e:
print(e) print(e)
pass pass
# return the total profit and total charge cycles
return self.profits["Profit"].sum(), self.profits["Charge Cycles"].sum()
def optimize_penalty_for_target_charge_cycles(
self,
initial_penalty,
target_charge_cycles,
learning_rate=2,
max_iterations=10,
tolerance=10,
):
penalty = initial_penalty
for i in range(max_iterations):
charge_threshold, discharge_threshold = self.determine_best_thresholds(
penalty
)
total_profit, total_charge_cycles = self.evaluate_test_set(
charge_threshold, discharge_threshold
)
gradient = total_charge_cycles - target_charge_cycles
penalty += learning_rate * gradient
print(
f"Iteration {i+1}: Penalty: {penalty}, Total Profit: {total_profit}, Total Charge Cycles: {total_charge_cycles}, Gradient: {gradient}, Charge Threshold: {charge_threshold}, Discharge Threshold: {discharge_threshold}"
)
if abs(gradient) < tolerance:
print(f"Optimal penalty found after {i+1} iterations")
break
else:
print(f"Optimal penalty not found after {max_iterations} iterations")
return penalty, total_profit, total_charge_cycles

View File

@@ -17,6 +17,7 @@ class YesterdayBaselinePolicyEvaluator(PolicyEvaluator):
date, date,
charge_thresholds=np.arange(-100, 250, 25), charge_thresholds=np.arange(-100, 250, 25),
discharge_thresholds=np.arange(-100, 250, 25), discharge_thresholds=np.arange(-100, 250, 25),
penalty: int = 0
): ):
real_imbalance_prices = self.get_imbanlance_prices_for_date(date.date()) real_imbalance_prices = self.get_imbanlance_prices_for_date(date.date())
@@ -27,7 +28,6 @@ class YesterdayBaselinePolicyEvaluator(PolicyEvaluator):
np.array([yesterday_imbalance_prices]), device="cpu" np.array([yesterday_imbalance_prices]), device="cpu"
) )
for penalty in self.penalties:
yesterday_charge_thresholds, yesterday_discharge_thresholds = ( yesterday_charge_thresholds, yesterday_discharge_thresholds = (
self.baseline_policy.get_optimal_thresholds( self.baseline_policy.get_optimal_thresholds(
yesterday_imbalance_prices, yesterday_imbalance_prices,

View File

@@ -32,9 +32,14 @@ battery = Battery(2, 1)
baseline_policy = BaselinePolicy(battery, data_path="") baseline_policy = BaselinePolicy(battery, data_path="")
policy_evaluator = BaselinePolicyEvaluator(baseline_policy, task) policy_evaluator = BaselinePolicyEvaluator(baseline_policy, task)
thresholds = policy_evaluator.determine_best_thresholds() total_profit, total_charge_cycles = (
policy_evaluator.evaluate_test_set(thresholds, data_processor=data_processor) policy_evaluator.optimize_penalty_for_target_charge_cycles(
initial_penalty=100,
policy_evaluator.plot_profits_table() target_charge_cycles=283,
learning_rate=0.2,
max_iterations=150,
tolerance=1,
)
)
print(f"Total Profit: {total_profit}, Total Charge Cycles: {total_charge_cycles}")
task.close() task.close()

View File

@@ -9,6 +9,8 @@ import plotly.subplots as sp
from plotly.subplots import make_subplots from plotly.subplots import make_subplots
from src.trainers.trainer import Trainer from src.trainers.trainer import Trainer
from tqdm import tqdm from tqdm import tqdm
import matplotlib.pyplot as plt
class AutoRegressiveTrainer(Trainer): class AutoRegressiveTrainer(Trainer):
def __init__( def __init__(
@@ -34,28 +36,41 @@ class AutoRegressiveTrainer(Trainer):
def debug_plots(self, task, train: bool, data_loader, sample_indices, epoch): def debug_plots(self, task, train: bool, data_loader, sample_indices, epoch):
for actual_idx, idx in sample_indices.items(): for actual_idx, idx in sample_indices.items():
auto_regressive_output = self.auto_regressive(data_loader.dataset, [idx]*1000) print(f"Plotting sample {actual_idx}")
auto_regressive_output = self.auto_regressive(
data_loader.dataset, [idx] * 1000
)
if len(auto_regressive_output) == 3: if len(auto_regressive_output) == 3:
initial, predictions, target = auto_regressive_output initial, predictions, target = auto_regressive_output
else: else:
initial, _, predictions, target = auto_regressive_output initial, _, predictions, target = auto_regressive_output
# keep one initial # keep one initial
initial = initial[0] initial = initial[0]
target = target[0] target = target[0]
predictions = predictions predictions = predictions
fig = self.get_plot(initial, target, predictions, show_legend=(0 == 0)) fig, fig2 = self.get_plot(
initial, target, predictions, show_legend=(0 == 0)
)
task.get_logger().report_matplotlib_figure( task.get_logger().report_matplotlib_figure(
title="Training" if train else "Testing", title="Training" if train else "Testing",
series=f'Sample {actual_idx}', series=f"Sample {actual_idx}",
iteration=epoch, iteration=epoch,
figure=fig, figure=fig,
) )
task.get_logger().report_matplotlib_figure(
title="Training Samples" if train else "Testing Samples",
series=f"Sample {actual_idx} samples",
iteration=epoch,
figure=fig2,
report_interactive=False,
)
plt.close()
def auto_regressive(self, data_loader, idx, sequence_length: int = 96): def auto_regressive(self, data_loader, idx, sequence_length: int = 96):
self.model.eval() self.model.eval()

View File

@@ -85,6 +85,8 @@ class DiffusionTrainer:
self.best_score = None self.best_score = None
self.policy_evaluator = policy_evaluator self.policy_evaluator = policy_evaluator
self.prev_optimal_penalty = 0
def noise_time_series(self, x: torch.tensor, t: int): def noise_time_series(self, x: torch.tensor, t: int):
"""Add noise to time series """Add noise to time series
Args: Args:
@@ -206,8 +208,8 @@ class DiffusionTrainer:
running_loss /= len(train_loader.dataset) running_loss /= len(train_loader.dataset)
if epoch % 40 == 0 and epoch != 0: if epoch % 150 == 0 and epoch != 0:
crps = self.test(test_loader, epoch, task) crps, _ = self.test(test_loader, epoch, task)
if best_crps is None or crps < best_crps: if best_crps is None or crps < best_crps:
best_crps = crps best_crps = crps
@@ -215,7 +217,7 @@ class DiffusionTrainer:
else: else:
early_stopping += 1 early_stopping += 1
if early_stopping > 5: if early_stopping > 15:
break break
if task: if task:
@@ -238,8 +240,32 @@ class DiffusionTrainer:
self.model = torch.load("checkpoint.pt") self.model = torch.load("checkpoint.pt")
self.model.to(self.device) self.model.to(self.device)
self.test(test_loader, None, task) _, generated_sampels = self.test(test_loader, None, task)
self.policy_evaluator.plot_profits_table() # self.policy_evaluator.plot_profits_table()
optimal_penalty, profit, charge_cycles = (
self.policy_evaluator.optimize_penalty_for_target_charge_cycles(
idx_samples=generated_sampels,
test_loader=test_loader,
initial_penalty=900,
target_charge_cycles=283,
learning_rate=1,
max_iterations=50,
tolerance=1,
)
)
print(
f"Optimal Penalty: {optimal_penalty}, Profit: {profit}, Charge Cycles: {charge_cycles}"
)
task.get_logger().report_single_value(
name="Optimal Penalty", value=optimal_penalty
)
task.get_logger().report_single_value(name="Optimal Profit", value=profit)
task.get_logger().report_single_value(
name="Optimal Charge Cycles", value=charge_cycles
)
if task: if task:
task.close() task.close()
@@ -332,6 +358,8 @@ class DiffusionTrainer:
] ]
) )
ax.set_ylim([-1500, 1500])
task.get_logger().report_matplotlib_figure( task.get_logger().report_matplotlib_figure(
title="Training" if training else "Testing", title="Training" if training else "Testing",
series=f"Sample {actual_idx}", series=f"Sample {actual_idx}",
@@ -341,6 +369,25 @@ class DiffusionTrainer:
plt.close() plt.close()
# plot some samples for the nrv genearations (10 samples) (scale -1500 to 1500)
fig, ax = plt.subplots(figsize=(20, 10))
for i in range(10):
ax.plot(samples[i], label=f"Sample {i}")
ax.plot(target, label="Real NRV", linewidth=3)
ax.legend()
ax.set_ylim([-1500, 1500])
task.get_logger().report_matplotlib_figure(
title="Training Samples" if training else "Testing Samples",
series=f"Sample {actual_idx} samples",
iteration=epoch,
figure=fig,
report_interactive=False,
)
plt.close()
def test( def test(
self, data_loader: torch.utils.data.DataLoader, epoch: int, task: Task = None self, data_loader: torch.utils.data.DataLoader, epoch: int, task: Task = None
): ):
@@ -385,28 +432,39 @@ class DiffusionTrainer:
predict_sequence_length=self.ts_length, full_day_skip=True predict_sequence_length=self.ts_length, full_day_skip=True
) )
self.policy_evaluator.evaluate_test_set(generated_samples, test_loader) optimal_penalty, profit, charge_cycles = (
self.policy_evaluator.optimize_penalty_for_target_charge_cycles(
df = self.policy_evaluator.get_profits_as_scalars() idx_samples=generated_samples,
test_loader=test_loader,
for idx, row in df.iterrows(): initial_penalty=self.prev_optimal_penalty,
task.get_logger().report_scalar( target_charge_cycles=283,
title="Profit", learning_rate=1,
series=f"penalty_{row['Penalty']}", max_iterations=50,
value=row["Total Profit"], tolerance=1,
iteration=epoch,
) )
)
df = self.policy_evaluator.get_profits_till_400() self.prev_optimal_penalty = optimal_penalty
for idx, row in df.iterrows():
task.get_logger().report_scalar(
title="Profit_till_400",
series=f"penalty_{row['Penalty']}",
value=row["Profit_till_400"],
iteration=epoch,
)
return mean_crps task.get_logger().report_scalar(
title="Optimal Penalty",
series="test",
value=optimal_penalty,
iteration=epoch,
)
task.get_logger().report_scalar(
title="Optimal Profit", series="test", value=profit, iteration=epoch
)
task.get_logger().report_scalar(
title="Optimal Charge Cycles",
series="test",
value=charge_cycles,
iteration=epoch,
)
return mean_crps, generated_samples
def save_checkpoint(self, val_loss, task, iteration: int): def save_checkpoint(self, val_loss, task, iteration: int):
torch.save(self.model, "checkpoint.pt") torch.save(self.model, "checkpoint.pt")

View File

@@ -155,18 +155,6 @@ class AutoRegressiveQuantileTrainer(AutoRegressiveTrainer):
generated_samples = {} generated_samples = {}
with torch.no_grad(): with torch.no_grad():
total_samples = len(dataloader.dataset)
print(
"Full day valid indices: ",
len(dataloader.dataset.full_day_valid_indices),
)
print(
"Valid indices: ",
len(dataloader.dataset.valid_indices),
)
print(dataloader.dataset.valid_indices)
for i in tqdm(dataloader.dataset.full_day_valid_indices): for i in tqdm(dataloader.dataset.full_day_valid_indices):
idx = dataloader.dataset.valid_indices.index(i) idx = dataloader.dataset.valid_indices.index(i)
@@ -188,74 +176,64 @@ class AutoRegressiveQuantileTrainer(AutoRegressiveTrainer):
crps_from_samples_metric.append(crps[0].mean().item()) crps_from_samples_metric.append(crps[0].mean().item())
task.get_logger().report_scalar( if epoch is not None:
title="CRPS_from_samples", task.get_logger().report_scalar(
series="test", title="CRPS_from_samples",
value=np.mean(crps_from_samples_metric), series="test",
iteration=epoch, value=np.mean(crps_from_samples_metric),
) iteration=epoch,
)
# using the policy evaluator, evaluate the policy with the generated samples # using the policy evaluator, evaluate the policy with the generated samples
if self.policy_evaluator is not None: if self.policy_evaluator is not None:
self.policy_evaluator.evaluate_test_set(generated_samples, dataloader) optimal_penalty, profit, charge_cycles = (
df = self.policy_evaluator.get_profits_as_scalars() self.policy_evaluator.optimize_penalty_for_target_charge_cycles(
idx_samples=generated_samples,
test_loader=dataloader,
initial_penalty=900,
target_charge_cycles=283,
learning_rate=2,
max_iterations=100,
tolerance=1,
)
)
print(
f"Optimal Penalty: {optimal_penalty}, Profit: {profit}, Charge Cycles: {charge_cycles}"
)
# for each row, report the profits
for idx, row in df.iterrows():
task.get_logger().report_scalar( task.get_logger().report_scalar(
title="Profit", title="Optimal Penalty",
series=f"penalty_{row['Penalty']}", series="test",
value=row["Total Profit"], value=optimal_penalty,
iteration=epoch, iteration=epoch,
) )
df = self.policy_evaluator.get_profits_till_400()
for idx, row in df.iterrows():
task.get_logger().report_scalar( task.get_logger().report_scalar(
title="Profit_till_400", title="Optimal Profit", series="test", value=profit, iteration=epoch
series=f"penalty_{row['Penalty']}", )
value=row["Profit_till_400"],
task.get_logger().report_scalar(
title="Optimal Charge Cycles",
series="test",
value=charge_cycles,
iteration=epoch, iteration=epoch,
) )
return np.mean(crps_from_samples_metric), generated_samples
def log_final_metrics(self, task, dataloader, train: bool = True): def log_final_metrics(self, task, dataloader, train: bool = True):
metrics = {metric.__class__.__name__: 0.0 for metric in self.metrics_to_track} metrics = {metric.__class__.__name__: 0.0 for metric in self.metrics_to_track}
transformed_metrics = { transformed_metrics = {
metric.__class__.__name__: 0.0 for metric in self.metrics_to_track metric.__class__.__name__: 0.0 for metric in self.metrics_to_track
} }
crps_from_samples_metric = []
with torch.no_grad(): with torch.no_grad():
total_samples = len(dataloader.dataset) - 96 total_samples = len(dataloader.dataset) - 96
batches = 0 batches = 0
for _, _, idx_batch in tqdm(dataloader): for _, _, idx_batch in tqdm(dataloader):
idx_batch = [idx for idx in idx_batch if idx < total_samples] idx_batch = [idx for idx in idx_batch if idx < total_samples]
if len(idx_batch) == 0:
continue
if train == False:
for idx in tqdm(idx_batch):
computed_idx_batch = [idx] * 250
initial, outputs, samples, targets = self.auto_regressive(
dataloader.dataset, idx_batch=computed_idx_batch
)
# save the samples for the idx, these will be used for evaluating the policy
self.test_set_samples[idx.item()] = (
self.data_processor.inverse_transform(initial),
self.data_processor.inverse_transform(samples),
)
samples = samples.unsqueeze(0)
targets = targets.squeeze(-1)
targets = targets[0].unsqueeze(0)
crps = crps_from_samples(samples, targets)
crps_from_samples_metric.append(crps[0].mean().item())
_, outputs, samples, targets = self.auto_regressive( _, outputs, samples, targets = self.auto_regressive(
dataloader.dataset, idx_batch=idx_batch dataloader.dataset, idx_batch=idx_batch
) )
@@ -308,6 +286,9 @@ class AutoRegressiveQuantileTrainer(AutoRegressiveTrainer):
task.get_logger().report_single_value(name=name, value=metric_value) task.get_logger().report_single_value(name=name, value=metric_value)
if train == False: if train == False:
crps_from_samples_metric, self.test_set_samples = (
self.calculate_crps_from_samples(None, dataloader, None)
)
task.get_logger().report_single_value( task.get_logger().report_single_value(
name="test_CRPS_from_samples_transformed", name="test_CRPS_from_samples_transformed",
value=np.mean(crps_from_samples_metric), value=np.mean(crps_from_samples_metric),
@@ -320,6 +301,7 @@ class AutoRegressiveQuantileTrainer(AutoRegressiveTrainer):
predictions, predictions,
show_legend: bool = True, show_legend: bool = True,
retransform: bool = True, retransform: bool = True,
task=None,
): ):
fig = go.Figure() fig = go.Figure()
@@ -427,7 +409,19 @@ class AutoRegressiveQuantileTrainer(AutoRegressiveTrainer):
ax.lines[1], ax.lines[1],
] ]
) )
return fig
ax.set_ylim(-1500, 1500)
fig2, ax2 = plt.subplots(figsize=(20, 10))
for i in range(10):
ax2.plot(predictions_np[i], label=f"Sample {i}")
ax2.plot(next_day_np, label="Real NRV", linewidth=3)
ax2.legend()
ax2.set_ylim(-1500, 1500)
return fig, fig2
def auto_regressive(self, dataset, idx_batch, sequence_length: int = 96): def auto_regressive(self, dataset, idx_batch, sequence_length: int = 96):
return auto_regressive( return auto_regressive(
@@ -646,6 +640,21 @@ class NonAutoRegressiveQuantileRegression(Trainer):
figure=fig, figure=fig,
) )
fig, ax = plt.subplots(figsize=(20, 10))
for i in range(10):
ax.plot(samples[i], label=f"Sample {i}")
ax.plot(target, label="Real NRV", linewidth=3)
ax.legend()
task.get_logger().report_matplotlib_figure(
title="Training" if train else "Testing",
series=f"Sample {actual_idx} Samples",
iteration=epoch,
figure=fig,
)
plt.close()
def get_plot( def get_plot(
self, self,
current_day, current_day,
@@ -740,7 +749,15 @@ class NonAutoRegressiveQuantileRegression(Trainer):
ax.lines[1], ax.lines[1],
] ]
) )
return fig
fig2, ax2 = plt.subplots(figsize=(20, 10))
for i in range(10):
ax2.plot(predictions_np[i], label=f"Sample {i}")
ax2.plot(next_day_np, label="Real NRV", linewidth=3)
ax2.legend()
return fig, fig2
def calculate_crps_from_samples(self, task, dataloader, epoch: int): def calculate_crps_from_samples(self, task, dataloader, epoch: int):
crps_from_samples_metric = [] crps_from_samples_metric = []

View File

@@ -261,8 +261,7 @@ class Trainer:
self.model.eval() self.model.eval()
# set full day skip # set full day skip
self.data_processor.set_full_day_skip(True) _, test_loader = self.data_processor.get_dataloaders(
train_loader, test_loader = self.data_processor.get_dataloaders(
predict_sequence_length=self.model.output_size predict_sequence_length=self.model.output_size
) )

View File

@@ -2,9 +2,7 @@ from src.utils.clearml import ClearMLHelper
#### ClearML #### #### ClearML ####
clearml_helper = ClearMLHelper(project_name="Thesis/NrvForecast") clearml_helper = ClearMLHelper(project_name="Thesis/NrvForecast")
task = clearml_helper.get_task( task = clearml_helper.get_task(task_name="AQR: Non Linear")
task_name="Non Autoregressive Quantile Regression: Non Linear"
)
task.execute_remotely(queue_name="default", exit_process=True) task.execute_remotely(queue_name="default", exit_process=True)
from src.policies.PolicyEvaluator import PolicyEvaluator from src.policies.PolicyEvaluator import PolicyEvaluator
@@ -60,16 +58,16 @@ if quantiles is None:
quantiles = [0.01, 0.05, 0.1, 0.15, 0.3, 0.4, 0.5, 0.6, 0.7, 0.85, 0.9, 0.95, 0.99] quantiles = [0.01, 0.05, 0.1, 0.15, 0.3, 0.4, 0.5, 0.6, 0.7, 0.85, 0.9, 0.95, 0.99]
task.set_parameter("general/quantiles", quantiles) task.set_parameter("general/quantiles", quantiles)
else: else:
# if string, convert to list "[0.01, 0.05, 0.1, 0.15, 0.3, 0.4, 0.5, 0.6, 0.7, 0.85, 0.9, 0.95, 0.99]"" # if string, convert to list "[0.01, 0.05, 0.1, 0.15, 0.3, 0.4, 0.5, 0.6, 0.7, 0.85, 0.9, 0.95, 0.99]"
if isinstance(quantiles, str): if isinstance(quantiles, str):
quantiles = eval(quantiles) quantiles = eval(quantiles)
model_parameters = { model_parameters = {
"learning_rate": 0.0001, "learning_rate": 0.0001,
"hidden_size": 256, "hidden_size": 512,
"num_layers": 4, "num_layers": 5,
"dropout": 0.2, "dropout": 0.2,
"time_feature_embedding": 16, "time_feature_embedding": 8,
} }
model_parameters = task.connect(model_parameters, name="model_parameters") model_parameters = task.connect(model_parameters, name="model_parameters")
@@ -77,7 +75,14 @@ model_parameters = task.connect(model_parameters, name="model_parameters")
time_embedding = TimeEmbedding( time_embedding = TimeEmbedding(
data_processor.get_time_feature_size(), model_parameters["time_feature_embedding"] data_processor.get_time_feature_size(), model_parameters["time_feature_embedding"]
) )
# lstm_model = GRUModel(time_embedding.output_dim(inputDim), len(quantiles), hidden_size=model_parameters["hidden_size"], num_layers=model_parameters["num_layers"], dropout=model_parameters["dropout"]) # lstm_model = GRUModel(
# time_embedding.output_dim(inputDim),
# len(quantiles),
# hidden_size=model_parameters["hidden_size"],
# num_layers=model_parameters["num_layers"],
# dropout=model_parameters["dropout"],
# )
non_linear_model = NonLinearRegression( non_linear_model = NonLinearRegression(
time_embedding.output_dim(inputDim), time_embedding.output_dim(inputDim),
len(quantiles), len(quantiles),
@@ -85,10 +90,11 @@ non_linear_model = NonLinearRegression(
numLayers=model_parameters["num_layers"], numLayers=model_parameters["num_layers"],
dropout=model_parameters["dropout"], dropout=model_parameters["dropout"],
) )
# linear_model = LinearRegression(time_embedding.output_dim(inputDim), len(quantiles)) # linear_model = LinearRegression(time_embedding.output_dim(inputDim), len(quantiles))
model = nn.Sequential(time_embedding, non_linear_model) model = nn.Sequential(time_embedding, non_linear_model)
model.output_size = 96 model.output_size = 1
optimizer = torch.optim.Adam(model.parameters(), lr=model_parameters["learning_rate"]) optimizer = torch.optim.Adam(model.parameters(), lr=model_parameters["learning_rate"])
### Policy Evaluator ### ### Policy Evaluator ###
@@ -122,18 +128,37 @@ trainer = AutoRegressiveQuantileTrainer(
trainer.add_metrics_to_track( trainer.add_metrics_to_track(
[PinballLoss(quantiles), MSELoss(), L1Loss(), CRPSLoss(quantiles)] [PinballLoss(quantiles), MSELoss(), L1Loss(), CRPSLoss(quantiles)]
) )
trainer.early_stopping(patience=30) trainer.early_stopping(patience=10)
trainer.plot_every(5) trainer.plot_every(5)
trainer.train(task=task, epochs=epochs, remotely=True) trainer.train(task=task, epochs=epochs, remotely=True)
### Policy Evaluation ### ### Policy Evaluation ###
idx_samples = trainer.test_set_samples idx_samples = trainer.test_set_samples
_, test_loader = trainer.data_processor.get_dataloaders( _, test_loader = trainer.data_processor.get_dataloaders(
predict_sequence_length=trainer.model.output_size, full_day_skip=True predict_sequence_length=trainer.model.output_size, full_day_skip=False
) )
policy_evaluator.evaluate_test_set(idx_samples, test_loader) # policy_evaluator.evaluate_test_set(idx_samples, test_loader)
policy_evaluator.plot_profits_table() # policy_evaluator.plot_profits_table()
policy_evaluator.plot_thresholds_per_day() # policy_evaluator.plot_thresholds_per_day()
optimal_penalty, profit, charge_cycles = (
policy_evaluator.optimize_penalty_for_target_charge_cycles(
idx_samples=idx_samples,
test_loader=test_loader,
initial_penalty=1000,
target_charge_cycles=283,
learning_rate=15,
max_iterations=150,
tolerance=1,
)
)
print(
f"Optimal Penalty: {optimal_penalty}, Profit: {profit}, Charge Cycles: {charge_cycles}"
)
task.get_logger().report_single_value(name="Optimal Penalty", value=optimal_penalty)
task.get_logger().report_single_value(name="Optimal Profit", value=profit)
task.get_logger().report_single_value(name="Optimal Charge Cycles", value=charge_cycles)
task.close() task.close()

View File

@@ -2,7 +2,7 @@ from src.utils.clearml import ClearMLHelper
clearml_helper = ClearMLHelper(project_name="Thesis/NrvForecast") clearml_helper = ClearMLHelper(project_name="Thesis/NrvForecast")
task = clearml_helper.get_task( task = clearml_helper.get_task(
task_name="Diffusion Training: hidden_sizes=[64, 64], lr=0.0001, time_dim=8" task_name="Diffusion Training: hidden_sizes=[256, 256], lr=0.0001, time_dim=8"
) )
task.execute_remotely(queue_name="default", exit_process=True) task.execute_remotely(queue_name="default", exit_process=True)
@@ -18,16 +18,16 @@ from src.policies.PolicyEvaluator import PolicyEvaluator
#### Data Processor #### #### Data Processor ####
data_config = DataConfig() data_config = DataConfig()
data_config.NRV_HISTORY = True data_config.NRV_HISTORY = True
data_config.LOAD_HISTORY = True data_config.LOAD_HISTORY = False
data_config.LOAD_FORECAST = True data_config.LOAD_FORECAST = False
data_config.WIND_FORECAST = True data_config.WIND_FORECAST = False
data_config.WIND_HISTORY = True data_config.WIND_HISTORY = False
data_config.QUARTER = False data_config.QUARTER = False
data_config.DAY_OF_WEEK = False data_config.DAY_OF_WEEK = False
data_config.NOMINAL_NET_POSITION = True data_config.NOMINAL_NET_POSITION = False
data_config = task.connect(data_config, name="data_features") data_config = task.connect(data_config, name="data_features")
@@ -39,9 +39,9 @@ inputDim = data_processor.get_input_size()
print("Input dim: ", inputDim) print("Input dim: ", inputDim)
model_parameters = { model_parameters = {
"epochs": 8000, "epochs": 15000,
"learning_rate": 0.0001, "learning_rate": 0.0001,
"hidden_sizes": [64, 64], "hidden_sizes": [256, 256],
"time_dim": 8, "time_dim": 8,
} }
@@ -54,7 +54,14 @@ model = SimpleDiffusionModel(
other_inputs_dim=inputDim[1], other_inputs_dim=inputDim[1],
time_dim=model_parameters["time_dim"], time_dim=model_parameters["time_dim"],
) )
# model = GRUDiffusionModel(96, model_parameters["hidden_sizes"], other_inputs_dim=inputDim[2], time_dim=model_parameters["time_dim"], gru_hidden_size=128)
# model = GRUDiffusionModel(
# 96,
# model_parameters["hidden_sizes"],
# other_inputs_dim=inputDim[2],
# time_dim=model_parameters["time_dim"],
# gru_hidden_size=128,
# )
### Policy Evaluator ### ### Policy Evaluator ###
battery = Battery(2, 1) battery = Battery(2, 1)