Updated training scripts
This commit is contained in:
@@ -5,6 +5,7 @@ import pandas as pd
|
||||
import numpy as np
|
||||
import torch
|
||||
import plotly.express as px
|
||||
from functools import lru_cache
|
||||
|
||||
from src.utils.imbalance_price_calculator import ImbalancePriceCalculator
|
||||
|
||||
@@ -24,11 +25,14 @@ class PolicyEvaluator:
|
||||
)
|
||||
self.imbalance_prices = imbalance_prices.sort_values(by=["DateTime"])
|
||||
|
||||
self.penalties = [0, 100, 300, 500, 800, 1000, 1500]
|
||||
self.penalties = [0, 1000, 1500]
|
||||
self.profits = []
|
||||
|
||||
self.task = task
|
||||
|
||||
self.cache = {}
|
||||
|
||||
@lru_cache(maxsize=None)
|
||||
def get_imbanlance_prices_for_date(self, date):
|
||||
imbalance_prices_day = self.imbalance_prices[
|
||||
self.imbalance_prices["DateTime"].dt.date == date
|
||||
@@ -40,69 +44,152 @@ class PolicyEvaluator:
|
||||
date,
|
||||
idx_samples,
|
||||
test_loader,
|
||||
charge_thresholds=np.arange(-100, 250, 25),
|
||||
discharge_thresholds=np.arange(-100, 250, 25),
|
||||
charge_thresholds=np.arange(-1500, 1500, 50),
|
||||
discharge_thresholds=np.arange(-1500, 1500, 50),
|
||||
penalty: int = 0,
|
||||
):
|
||||
idx = test_loader.dataset.get_idx_for_date(date.date())
|
||||
|
||||
if idx not in idx_samples:
|
||||
print("No samples for idx: ", idx, date)
|
||||
(initial, samples) = idx_samples[idx]
|
||||
|
||||
if len(initial.shape) == 2:
|
||||
initial = initial.cpu().numpy()[0][-1]
|
||||
if date in self.cache:
|
||||
(reconstructed_imbalance_prices, real_imbalance_prices) = self.cache[date]
|
||||
else:
|
||||
initial = initial.cpu().numpy()[-1]
|
||||
samples = samples.cpu().numpy()
|
||||
idx = test_loader.dataset.get_idx_for_date(date.date())
|
||||
|
||||
initial = np.repeat(initial, samples.shape[0])
|
||||
combined = np.concatenate((initial.reshape(-1, 1), samples), axis=1)
|
||||
if idx not in idx_samples:
|
||||
print("No samples for idx: ", idx, date)
|
||||
(initial, samples) = idx_samples[idx]
|
||||
|
||||
reconstructed_imbalance_prices = (
|
||||
self.ipc.get_imbalance_prices_2023_for_date_vectorized(date, combined)
|
||||
)
|
||||
reconstructed_imbalance_prices = torch.tensor(
|
||||
reconstructed_imbalance_prices, device="cuda"
|
||||
if len(initial.shape) == 2:
|
||||
initial = initial.cpu().numpy()[0][-1]
|
||||
else:
|
||||
initial = initial.cpu().numpy()[-1]
|
||||
samples = samples.cpu().numpy()
|
||||
|
||||
initial = np.repeat(initial, samples.shape[0])
|
||||
combined = np.concatenate((initial.reshape(-1, 1), samples), axis=1)
|
||||
|
||||
reconstructed_imbalance_prices = (
|
||||
self.ipc.get_imbalance_prices_2023_for_date_vectorized(date, combined)
|
||||
)
|
||||
reconstructed_imbalance_prices = torch.tensor(
|
||||
reconstructed_imbalance_prices, device="cuda"
|
||||
)
|
||||
|
||||
real_imbalance_prices = self.get_imbanlance_prices_for_date(date.date())
|
||||
self.cache[date] = (reconstructed_imbalance_prices, real_imbalance_prices)
|
||||
|
||||
return self.profit_for_penalty(
|
||||
reconstructed_imbalance_prices,
|
||||
real_imbalance_prices,
|
||||
penalty,
|
||||
charge_thresholds,
|
||||
discharge_thresholds,
|
||||
)
|
||||
|
||||
real_imbalance_prices = self.get_imbanlance_prices_for_date(date.date())
|
||||
|
||||
for penalty in self.penalties:
|
||||
found_charge_thresholds, found_discharge_thresholds = (
|
||||
self.baseline_policy.get_optimal_thresholds(
|
||||
reconstructed_imbalance_prices,
|
||||
charge_thresholds,
|
||||
discharge_thresholds,
|
||||
penalty,
|
||||
)
|
||||
def optimize_penalty_for_target_charge_cycles(
|
||||
self,
|
||||
idx_samples,
|
||||
test_loader,
|
||||
initial_penalty,
|
||||
target_charge_cycles,
|
||||
learning_rate=2,
|
||||
max_iterations=10,
|
||||
tolerance=10,
|
||||
):
|
||||
self.cache = {}
|
||||
penalty = initial_penalty
|
||||
for iteration in range(max_iterations):
|
||||
# Calculate profit and charge cycles for the current penalty
|
||||
simulated_profit, simulated_charge_cycles = (
|
||||
self.evaluate_test_set_for_penalty(idx_samples, test_loader, penalty)
|
||||
)
|
||||
|
||||
predicted_charge_threshold = found_charge_thresholds.mean(axis=0)
|
||||
predicted_discharge_threshold = found_discharge_thresholds.mean(axis=0)
|
||||
print(
|
||||
f"Penalty: {penalty}, Charge Cycles: {simulated_charge_cycles}, Profit: {simulated_profit}"
|
||||
)
|
||||
|
||||
### Determine Profits and Charge Cycles ###
|
||||
simulated_profit, simulated_charge_cycles = self.baseline_policy.simulate(
|
||||
torch.tensor([[real_imbalance_prices]]),
|
||||
torch.tensor([predicted_charge_threshold]),
|
||||
torch.tensor([predicted_discharge_threshold]),
|
||||
# Calculate the gradient (difference) between the simulated and target charge cycles
|
||||
gradient = simulated_charge_cycles - target_charge_cycles
|
||||
|
||||
# Update the penalty parameter in the direction of the gradient
|
||||
penalty += learning_rate * gradient
|
||||
|
||||
# Check if the charge cycles are close enough to the target
|
||||
if abs(gradient) < tolerance:
|
||||
print(f"Optimal penalty found after {iteration+1} iterations")
|
||||
break
|
||||
else:
|
||||
print(
|
||||
f"Reached max iterations ({max_iterations}) without converging to the target charge cycles"
|
||||
)
|
||||
self.profits.append(
|
||||
[
|
||||
date,
|
||||
penalty,
|
||||
simulated_profit[0][0].item(),
|
||||
simulated_charge_cycles[0][0].item(),
|
||||
predicted_charge_threshold.item(),
|
||||
predicted_discharge_threshold.item(),
|
||||
]
|
||||
|
||||
# Re-calculate profit and charge cycles for the final penalty to return accurate results
|
||||
profit, charge_cycles = self.evaluate_test_set_for_penalty(
|
||||
idx_samples, test_loader, penalty
|
||||
)
|
||||
|
||||
return penalty, profit, charge_cycles
|
||||
|
||||
def profit_for_penalty(
|
||||
self,
|
||||
reconstructed_imbalance_prices,
|
||||
real_imbalance_prices,
|
||||
penalty: int,
|
||||
charge_thresholds,
|
||||
discharge_thresholds,
|
||||
):
|
||||
"""_summary_
|
||||
|
||||
Args:
|
||||
date (_type_): date to evaluate
|
||||
reconstructed_imbalance_prices (_type_): predicted imbalance price
|
||||
real_imbalance_prices (_type_): real imbalance price
|
||||
penalty (int): penalty parameter to take into account
|
||||
charge_thresholds (_type_): list of charge thresholds
|
||||
discharge_thresholds (_type_): list of discharge thresholds
|
||||
|
||||
Returns:
|
||||
_type_: returns the simulated profit, charge cycles, the found charge threshold and discharge threshold
|
||||
"""
|
||||
found_charge_thresholds, found_discharge_thresholds = (
|
||||
self.baseline_policy.get_optimal_thresholds(
|
||||
reconstructed_imbalance_prices,
|
||||
charge_thresholds,
|
||||
discharge_thresholds,
|
||||
penalty,
|
||||
)
|
||||
)
|
||||
|
||||
predicted_charge_threshold = found_charge_thresholds.mean(axis=0)
|
||||
predicted_discharge_threshold = found_discharge_thresholds.mean(axis=0)
|
||||
|
||||
### Determine Profits and Charge Cycles ###
|
||||
simulated_profit, simulated_charge_cycles = self.baseline_policy.simulate(
|
||||
torch.tensor([[real_imbalance_prices]]),
|
||||
torch.tensor([predicted_charge_threshold]),
|
||||
torch.tensor([predicted_discharge_threshold]),
|
||||
)
|
||||
return (
|
||||
simulated_profit[0][0].item(),
|
||||
simulated_charge_cycles[0][0].item(),
|
||||
predicted_charge_threshold.item(),
|
||||
predicted_discharge_threshold.item(),
|
||||
)
|
||||
|
||||
def evaluate_test_set(self, idx_samples, test_loader):
|
||||
self.profits = []
|
||||
self.cache = {}
|
||||
|
||||
for date in tqdm(self.dates):
|
||||
try:
|
||||
self.evaluate_for_date(date, idx_samples, test_loader)
|
||||
for penalty in self.penalties:
|
||||
self.profits.append(
|
||||
[
|
||||
date,
|
||||
penalty,
|
||||
*self.evaluate_for_date(
|
||||
date, idx_samples, test_loader, penalty=penalty
|
||||
),
|
||||
]
|
||||
)
|
||||
except KeyboardInterrupt:
|
||||
print("Interrupted")
|
||||
raise KeyboardInterrupt
|
||||
@@ -123,6 +210,27 @@ class PolicyEvaluator:
|
||||
],
|
||||
)
|
||||
|
||||
def evaluate_test_set_for_penalty(self, idx_samples, test_loader, penalty):
|
||||
total_profit = 0
|
||||
total_charge_cycles = 0
|
||||
|
||||
for date in tqdm(self.dates):
|
||||
try:
|
||||
profit, charge_cycles, _, _ = self.evaluate_for_date(
|
||||
date, idx_samples, test_loader, penalty=penalty
|
||||
)
|
||||
total_profit += profit
|
||||
total_charge_cycles += charge_cycles
|
||||
except KeyboardInterrupt:
|
||||
print("Interrupted")
|
||||
raise KeyboardInterrupt
|
||||
|
||||
except Exception as e:
|
||||
print(e)
|
||||
pass
|
||||
|
||||
return total_profit, total_charge_cycles
|
||||
|
||||
def plot_profits_table(self):
|
||||
# Check if task or penalties are not set
|
||||
if (
|
||||
|
||||
@@ -13,49 +13,46 @@ class BaselinePolicyEvaluator(PolicyEvaluator):
|
||||
|
||||
self.train_profits = []
|
||||
|
||||
def determine_thresholds_for_date(self, date):
|
||||
charge_thresholds = np.arange(-100, 250, 25)
|
||||
discharge_thresholds = np.arange(-100, 250, 25)
|
||||
def determine_thresholds_for_date(self, date, penalty):
|
||||
charge_thresholds = np.arange(-500, 500, 25)
|
||||
discharge_thresholds = np.arange(-500, 500, 25)
|
||||
|
||||
real_imbalance_prices = self.get_imbanlance_prices_for_date(date.date())
|
||||
|
||||
for penalty in self.penalties:
|
||||
found_charge_thresholds, found_discharge_thresholds = (
|
||||
self.baseline_policy.get_optimal_thresholds(
|
||||
torch.tensor([real_imbalance_prices]),
|
||||
charge_thresholds,
|
||||
discharge_thresholds,
|
||||
penalty,
|
||||
)
|
||||
found_charge_thresholds, found_discharge_thresholds = (
|
||||
self.baseline_policy.get_optimal_thresholds(
|
||||
torch.tensor([real_imbalance_prices]),
|
||||
charge_thresholds,
|
||||
discharge_thresholds,
|
||||
penalty,
|
||||
)
|
||||
)
|
||||
|
||||
best_charge_threshold = found_charge_thresholds
|
||||
best_discharge_threshold = found_discharge_thresholds
|
||||
best_charge_threshold = found_charge_thresholds
|
||||
best_discharge_threshold = found_discharge_thresholds
|
||||
|
||||
simulated_profit, simulated_charge_cycles = self.baseline_policy.simulate(
|
||||
torch.tensor([[real_imbalance_prices]]),
|
||||
torch.tensor([best_charge_threshold]),
|
||||
torch.tensor([best_discharge_threshold]),
|
||||
)
|
||||
simulated_profit, simulated_charge_cycles = self.baseline_policy.simulate(
|
||||
torch.tensor([[real_imbalance_prices]]),
|
||||
torch.tensor([best_charge_threshold]),
|
||||
torch.tensor([best_discharge_threshold]),
|
||||
)
|
||||
|
||||
self.train_profits.append(
|
||||
[
|
||||
date,
|
||||
penalty,
|
||||
simulated_profit[0][0].item(),
|
||||
simulated_charge_cycles[0][0].item(),
|
||||
best_charge_threshold.item(),
|
||||
best_discharge_threshold.item(),
|
||||
]
|
||||
)
|
||||
self.train_profits.append(
|
||||
[
|
||||
simulated_profit[0][0].item(),
|
||||
simulated_charge_cycles[0][0].item(),
|
||||
best_charge_threshold.item(),
|
||||
best_discharge_threshold.item(),
|
||||
]
|
||||
)
|
||||
|
||||
def determine_best_thresholds(self):
|
||||
def determine_best_thresholds(self, penalty):
|
||||
self.train_profits = []
|
||||
dates = self.baseline_policy.train_data["DateTime"].dt.date.unique()
|
||||
dates = pd.to_datetime(dates)
|
||||
try:
|
||||
for date in tqdm(dates):
|
||||
self.determine_thresholds_for_date(date)
|
||||
self.determine_thresholds_for_date(date, penalty)
|
||||
except Exception as e:
|
||||
print(e)
|
||||
pass
|
||||
@@ -63,8 +60,6 @@ class BaselinePolicyEvaluator(PolicyEvaluator):
|
||||
self.train_profits = pd.DataFrame(
|
||||
self.train_profits,
|
||||
columns=[
|
||||
"Date",
|
||||
"Penalty",
|
||||
"Profit",
|
||||
"Charge Cycles",
|
||||
"Charge Threshold",
|
||||
@@ -72,91 +67,18 @@ class BaselinePolicyEvaluator(PolicyEvaluator):
|
||||
],
|
||||
)
|
||||
|
||||
number_of_days = len(self.train_profits["Date"].unique())
|
||||
usable_charge_cycles = (400 / 365) * number_of_days
|
||||
# get the best thresholds combination based on the sum of profits
|
||||
best_thresholds = self.train_profits.groupby(
|
||||
["Charge Threshold", "Discharge Threshold"]
|
||||
).sum()["Profit"]
|
||||
|
||||
intermediate_values = {penalty: {} for penalty in self.penalties}
|
||||
best_thresholds = best_thresholds.idxmax()
|
||||
return (best_thresholds[0], best_thresholds[1])
|
||||
|
||||
# find the best threshold combination for each penalty based on the total profit on the data
|
||||
for penalty in self.penalties:
|
||||
profits_for_penalty = self.train_profits[
|
||||
self.train_profits["Penalty"] == penalty
|
||||
]
|
||||
|
||||
for index, row in profits_for_penalty.iterrows():
|
||||
charge_threshold = row["Charge Threshold"]
|
||||
discharge_threshold = row["Discharge Threshold"]
|
||||
|
||||
if (charge_threshold, discharge_threshold) not in intermediate_values[
|
||||
penalty
|
||||
]:
|
||||
intermediate_values[penalty][
|
||||
(charge_threshold, discharge_threshold)
|
||||
] = (0, 0)
|
||||
|
||||
new_charge_cycles = (
|
||||
intermediate_values[penalty][
|
||||
(charge_threshold, discharge_threshold)
|
||||
][1]
|
||||
+ row["Charge Cycles"]
|
||||
)
|
||||
new_profit = (
|
||||
intermediate_values[penalty][
|
||||
(charge_threshold, discharge_threshold)
|
||||
][0]
|
||||
+ row["Profit"]
|
||||
)
|
||||
|
||||
if new_charge_cycles <= usable_charge_cycles:
|
||||
intermediate_values[penalty][
|
||||
(charge_threshold, discharge_threshold)
|
||||
] = (new_profit, new_charge_cycles)
|
||||
|
||||
best_thresholds = {penalty: [0, 0, 0, 0] for penalty in self.penalties}
|
||||
|
||||
for penalty in self.penalties:
|
||||
best_profit = 0
|
||||
for threshold, values in intermediate_values[penalty].items():
|
||||
if values[0] > best_profit:
|
||||
best_profit = values[0]
|
||||
best_thresholds[penalty][0] = threshold[0]
|
||||
best_thresholds[penalty][1] = threshold[1]
|
||||
best_thresholds[penalty][2] = best_profit
|
||||
best_thresholds[penalty][3] = values[1]
|
||||
|
||||
# create dataframe from best_thresholds with columns, Penalty, Charge Threshold, Discharge Threshold, Profit
|
||||
data = [
|
||||
(penalty, values[0], values[1], values[2], values[3])
|
||||
for penalty, values in best_thresholds.items()
|
||||
]
|
||||
|
||||
best_thresholds_df = pd.DataFrame(
|
||||
data,
|
||||
columns=[
|
||||
"Penalty",
|
||||
"Charge Threshold",
|
||||
"Discharge Threshold",
|
||||
"Profit (training data)",
|
||||
f"Charge Cycles (training data: max {usable_charge_cycles})",
|
||||
],
|
||||
)
|
||||
|
||||
if self.task:
|
||||
self.task.get_logger().report_table(
|
||||
"Baseline Train Data",
|
||||
"Best Thresholds for each Penalty on Training Data (up to 400 cycles / year)",
|
||||
iteration=0,
|
||||
table_plot=best_thresholds_df,
|
||||
)
|
||||
|
||||
return best_thresholds
|
||||
|
||||
def evaluate_test_set(self, thresholds: dict, data_processor=None):
|
||||
"""Evaluate the test set using the given thresholds (multiple penalties)
|
||||
|
||||
Args:
|
||||
thresholds (dict): Dictionary with penalties as keys and the corresponding thresholds tuple as values
|
||||
"""
|
||||
def evaluate_test_set(
|
||||
self, charge_threshold, discharge_threshold, data_processor=None
|
||||
):
|
||||
"""Evaluate the test set using the given thresholds"""
|
||||
self.profits = []
|
||||
|
||||
if data_processor:
|
||||
@@ -173,40 +95,63 @@ class BaselinePolicyEvaluator(PolicyEvaluator):
|
||||
try:
|
||||
for date in tqdm(self.dates):
|
||||
real_imbalance_prices = self.get_imbanlance_prices_for_date(date.date())
|
||||
for penalty in thresholds.keys():
|
||||
charge_threshold = thresholds[penalty][0]
|
||||
discharge_threshold = thresholds[penalty][1]
|
||||
|
||||
simulated_profit, simulated_charge_cycles = (
|
||||
self.baseline_policy.simulate(
|
||||
torch.tensor([[real_imbalance_prices]]),
|
||||
torch.tensor([charge_threshold]),
|
||||
torch.tensor([discharge_threshold]),
|
||||
)
|
||||
simulated_profit, simulated_charge_cycles = (
|
||||
self.baseline_policy.simulate(
|
||||
torch.tensor([[real_imbalance_prices]]),
|
||||
torch.tensor([charge_threshold]),
|
||||
torch.tensor([discharge_threshold]),
|
||||
)
|
||||
)
|
||||
|
||||
self.profits.append(
|
||||
[
|
||||
date,
|
||||
penalty,
|
||||
simulated_profit[0][0].item(),
|
||||
simulated_charge_cycles[0][0].item(),
|
||||
charge_threshold,
|
||||
discharge_threshold,
|
||||
]
|
||||
)
|
||||
self.profits.append(
|
||||
[
|
||||
date,
|
||||
simulated_profit[0][0].item(),
|
||||
simulated_charge_cycles[0][0].item(),
|
||||
]
|
||||
)
|
||||
|
||||
self.profits = pd.DataFrame(
|
||||
self.profits,
|
||||
columns=[
|
||||
"Date",
|
||||
"Penalty",
|
||||
"Profit",
|
||||
"Charge Cycles",
|
||||
"Charge Threshold",
|
||||
"Discharge Threshold",
|
||||
],
|
||||
columns=["Date", "Profit", "Charge Cycles"],
|
||||
)
|
||||
except Exception as e:
|
||||
print(e)
|
||||
pass
|
||||
|
||||
# return the total profit and total charge cycles
|
||||
return self.profits["Profit"].sum(), self.profits["Charge Cycles"].sum()
|
||||
|
||||
def optimize_penalty_for_target_charge_cycles(
|
||||
self,
|
||||
initial_penalty,
|
||||
target_charge_cycles,
|
||||
learning_rate=2,
|
||||
max_iterations=10,
|
||||
tolerance=10,
|
||||
):
|
||||
penalty = initial_penalty
|
||||
|
||||
for i in range(max_iterations):
|
||||
charge_threshold, discharge_threshold = self.determine_best_thresholds(
|
||||
penalty
|
||||
)
|
||||
total_profit, total_charge_cycles = self.evaluate_test_set(
|
||||
charge_threshold, discharge_threshold
|
||||
)
|
||||
|
||||
gradient = total_charge_cycles - target_charge_cycles
|
||||
penalty += learning_rate * gradient
|
||||
|
||||
print(
|
||||
f"Iteration {i+1}: Penalty: {penalty}, Total Profit: {total_profit}, Total Charge Cycles: {total_charge_cycles}, Gradient: {gradient}, Charge Threshold: {charge_threshold}, Discharge Threshold: {discharge_threshold}"
|
||||
)
|
||||
|
||||
if abs(gradient) < tolerance:
|
||||
print(f"Optimal penalty found after {i+1} iterations")
|
||||
break
|
||||
else:
|
||||
print(f"Optimal penalty not found after {max_iterations} iterations")
|
||||
|
||||
return penalty, total_profit, total_charge_cycles
|
||||
|
||||
@@ -17,6 +17,7 @@ class YesterdayBaselinePolicyEvaluator(PolicyEvaluator):
|
||||
date,
|
||||
charge_thresholds=np.arange(-100, 250, 25),
|
||||
discharge_thresholds=np.arange(-100, 250, 25),
|
||||
penalty: int = 0
|
||||
):
|
||||
|
||||
real_imbalance_prices = self.get_imbanlance_prices_for_date(date.date())
|
||||
@@ -27,7 +28,6 @@ class YesterdayBaselinePolicyEvaluator(PolicyEvaluator):
|
||||
np.array([yesterday_imbalance_prices]), device="cpu"
|
||||
)
|
||||
|
||||
for penalty in self.penalties:
|
||||
yesterday_charge_thresholds, yesterday_discharge_thresholds = (
|
||||
self.baseline_policy.get_optimal_thresholds(
|
||||
yesterday_imbalance_prices,
|
||||
|
||||
@@ -32,9 +32,14 @@ battery = Battery(2, 1)
|
||||
baseline_policy = BaselinePolicy(battery, data_path="")
|
||||
policy_evaluator = BaselinePolicyEvaluator(baseline_policy, task)
|
||||
|
||||
thresholds = policy_evaluator.determine_best_thresholds()
|
||||
policy_evaluator.evaluate_test_set(thresholds, data_processor=data_processor)
|
||||
|
||||
policy_evaluator.plot_profits_table()
|
||||
|
||||
total_profit, total_charge_cycles = (
|
||||
policy_evaluator.optimize_penalty_for_target_charge_cycles(
|
||||
initial_penalty=100,
|
||||
target_charge_cycles=283,
|
||||
learning_rate=0.2,
|
||||
max_iterations=150,
|
||||
tolerance=1,
|
||||
)
|
||||
)
|
||||
print(f"Total Profit: {total_profit}, Total Charge Cycles: {total_charge_cycles}")
|
||||
task.close()
|
||||
|
||||
Reference in New Issue
Block a user