Updated training scripts

This commit is contained in:
2024-03-18 12:15:06 +01:00
parent 34335cd9fe
commit 1a8e735cbc
10 changed files with 487 additions and 308 deletions

View File

@@ -5,6 +5,7 @@ import pandas as pd
import numpy as np
import torch
import plotly.express as px
from functools import lru_cache
from src.utils.imbalance_price_calculator import ImbalancePriceCalculator
@@ -24,11 +25,14 @@ class PolicyEvaluator:
)
self.imbalance_prices = imbalance_prices.sort_values(by=["DateTime"])
self.penalties = [0, 100, 300, 500, 800, 1000, 1500]
self.penalties = [0, 1000, 1500]
self.profits = []
self.task = task
self.cache = {}
@lru_cache(maxsize=None)
def get_imbanlance_prices_for_date(self, date):
imbalance_prices_day = self.imbalance_prices[
self.imbalance_prices["DateTime"].dt.date == date
@@ -40,69 +44,152 @@ class PolicyEvaluator:
date,
idx_samples,
test_loader,
charge_thresholds=np.arange(-100, 250, 25),
discharge_thresholds=np.arange(-100, 250, 25),
charge_thresholds=np.arange(-1500, 1500, 50),
discharge_thresholds=np.arange(-1500, 1500, 50),
penalty: int = 0,
):
idx = test_loader.dataset.get_idx_for_date(date.date())
if idx not in idx_samples:
print("No samples for idx: ", idx, date)
(initial, samples) = idx_samples[idx]
if len(initial.shape) == 2:
initial = initial.cpu().numpy()[0][-1]
if date in self.cache:
(reconstructed_imbalance_prices, real_imbalance_prices) = self.cache[date]
else:
initial = initial.cpu().numpy()[-1]
samples = samples.cpu().numpy()
idx = test_loader.dataset.get_idx_for_date(date.date())
initial = np.repeat(initial, samples.shape[0])
combined = np.concatenate((initial.reshape(-1, 1), samples), axis=1)
if idx not in idx_samples:
print("No samples for idx: ", idx, date)
(initial, samples) = idx_samples[idx]
reconstructed_imbalance_prices = (
self.ipc.get_imbalance_prices_2023_for_date_vectorized(date, combined)
)
reconstructed_imbalance_prices = torch.tensor(
reconstructed_imbalance_prices, device="cuda"
if len(initial.shape) == 2:
initial = initial.cpu().numpy()[0][-1]
else:
initial = initial.cpu().numpy()[-1]
samples = samples.cpu().numpy()
initial = np.repeat(initial, samples.shape[0])
combined = np.concatenate((initial.reshape(-1, 1), samples), axis=1)
reconstructed_imbalance_prices = (
self.ipc.get_imbalance_prices_2023_for_date_vectorized(date, combined)
)
reconstructed_imbalance_prices = torch.tensor(
reconstructed_imbalance_prices, device="cuda"
)
real_imbalance_prices = self.get_imbanlance_prices_for_date(date.date())
self.cache[date] = (reconstructed_imbalance_prices, real_imbalance_prices)
return self.profit_for_penalty(
reconstructed_imbalance_prices,
real_imbalance_prices,
penalty,
charge_thresholds,
discharge_thresholds,
)
real_imbalance_prices = self.get_imbanlance_prices_for_date(date.date())
for penalty in self.penalties:
found_charge_thresholds, found_discharge_thresholds = (
self.baseline_policy.get_optimal_thresholds(
reconstructed_imbalance_prices,
charge_thresholds,
discharge_thresholds,
penalty,
)
def optimize_penalty_for_target_charge_cycles(
self,
idx_samples,
test_loader,
initial_penalty,
target_charge_cycles,
learning_rate=2,
max_iterations=10,
tolerance=10,
):
self.cache = {}
penalty = initial_penalty
for iteration in range(max_iterations):
# Calculate profit and charge cycles for the current penalty
simulated_profit, simulated_charge_cycles = (
self.evaluate_test_set_for_penalty(idx_samples, test_loader, penalty)
)
predicted_charge_threshold = found_charge_thresholds.mean(axis=0)
predicted_discharge_threshold = found_discharge_thresholds.mean(axis=0)
print(
f"Penalty: {penalty}, Charge Cycles: {simulated_charge_cycles}, Profit: {simulated_profit}"
)
### Determine Profits and Charge Cycles ###
simulated_profit, simulated_charge_cycles = self.baseline_policy.simulate(
torch.tensor([[real_imbalance_prices]]),
torch.tensor([predicted_charge_threshold]),
torch.tensor([predicted_discharge_threshold]),
# Calculate the gradient (difference) between the simulated and target charge cycles
gradient = simulated_charge_cycles - target_charge_cycles
# Update the penalty parameter in the direction of the gradient
penalty += learning_rate * gradient
# Check if the charge cycles are close enough to the target
if abs(gradient) < tolerance:
print(f"Optimal penalty found after {iteration+1} iterations")
break
else:
print(
f"Reached max iterations ({max_iterations}) without converging to the target charge cycles"
)
self.profits.append(
[
date,
penalty,
simulated_profit[0][0].item(),
simulated_charge_cycles[0][0].item(),
predicted_charge_threshold.item(),
predicted_discharge_threshold.item(),
]
# Re-calculate profit and charge cycles for the final penalty to return accurate results
profit, charge_cycles = self.evaluate_test_set_for_penalty(
idx_samples, test_loader, penalty
)
return penalty, profit, charge_cycles
def profit_for_penalty(
self,
reconstructed_imbalance_prices,
real_imbalance_prices,
penalty: int,
charge_thresholds,
discharge_thresholds,
):
"""_summary_
Args:
date (_type_): date to evaluate
reconstructed_imbalance_prices (_type_): predicted imbalance price
real_imbalance_prices (_type_): real imbalance price
penalty (int): penalty parameter to take into account
charge_thresholds (_type_): list of charge thresholds
discharge_thresholds (_type_): list of discharge thresholds
Returns:
_type_: returns the simulated profit, charge cycles, the found charge threshold and discharge threshold
"""
found_charge_thresholds, found_discharge_thresholds = (
self.baseline_policy.get_optimal_thresholds(
reconstructed_imbalance_prices,
charge_thresholds,
discharge_thresholds,
penalty,
)
)
predicted_charge_threshold = found_charge_thresholds.mean(axis=0)
predicted_discharge_threshold = found_discharge_thresholds.mean(axis=0)
### Determine Profits and Charge Cycles ###
simulated_profit, simulated_charge_cycles = self.baseline_policy.simulate(
torch.tensor([[real_imbalance_prices]]),
torch.tensor([predicted_charge_threshold]),
torch.tensor([predicted_discharge_threshold]),
)
return (
simulated_profit[0][0].item(),
simulated_charge_cycles[0][0].item(),
predicted_charge_threshold.item(),
predicted_discharge_threshold.item(),
)
def evaluate_test_set(self, idx_samples, test_loader):
self.profits = []
self.cache = {}
for date in tqdm(self.dates):
try:
self.evaluate_for_date(date, idx_samples, test_loader)
for penalty in self.penalties:
self.profits.append(
[
date,
penalty,
*self.evaluate_for_date(
date, idx_samples, test_loader, penalty=penalty
),
]
)
except KeyboardInterrupt:
print("Interrupted")
raise KeyboardInterrupt
@@ -123,6 +210,27 @@ class PolicyEvaluator:
],
)
def evaluate_test_set_for_penalty(self, idx_samples, test_loader, penalty):
total_profit = 0
total_charge_cycles = 0
for date in tqdm(self.dates):
try:
profit, charge_cycles, _, _ = self.evaluate_for_date(
date, idx_samples, test_loader, penalty=penalty
)
total_profit += profit
total_charge_cycles += charge_cycles
except KeyboardInterrupt:
print("Interrupted")
raise KeyboardInterrupt
except Exception as e:
print(e)
pass
return total_profit, total_charge_cycles
def plot_profits_table(self):
# Check if task or penalties are not set
if (