Finished baseline policy evaluator
This commit is contained in:
@@ -121,9 +121,6 @@ class PolicyEvaluator:
|
|||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
|
||||||
print("Profits calculated")
|
|
||||||
print(self.profits.head())
|
|
||||||
|
|
||||||
def plot_profits_table(self):
|
def plot_profits_table(self):
|
||||||
# Check if task or penalties are not set
|
# Check if task or penalties are not set
|
||||||
if (
|
if (
|
||||||
@@ -157,7 +154,11 @@ class PolicyEvaluator:
|
|||||||
)
|
)
|
||||||
|
|
||||||
# Rename columns to match expected output
|
# Rename columns to match expected output
|
||||||
final_df.columns = ["Penalty", "Total Profit", "Total Charge Cycles"]
|
final_df.columns = [
|
||||||
|
"Penalty",
|
||||||
|
"Total Profit (per year)",
|
||||||
|
"Total Charge Cycles (per year)",
|
||||||
|
]
|
||||||
|
|
||||||
# Profits till 400
|
# Profits till 400
|
||||||
profits_till_400 = self.get_profits_till_400()
|
profits_till_400 = self.get_profits_till_400()
|
||||||
@@ -167,7 +168,7 @@ class PolicyEvaluator:
|
|||||||
|
|
||||||
# Log the final results table
|
# Log the final results table
|
||||||
self.task.get_logger().report_table(
|
self.task.get_logger().report_table(
|
||||||
"Policy Results", "Policy Results", iteration=0, table_plot=final_df
|
"Test Set Results", "Profits per Penalty", iteration=0, table_plot=final_df
|
||||||
)
|
)
|
||||||
|
|
||||||
def plot_thresholds_per_day(self):
|
def plot_thresholds_per_day(self):
|
||||||
@@ -213,16 +214,19 @@ class PolicyEvaluator:
|
|||||||
final_df.columns = ["Penalty", "Total Profit", "Total Charge Cycles"]
|
final_df.columns = ["Penalty", "Total Profit", "Total Charge Cycles"]
|
||||||
return final_df
|
return final_df
|
||||||
|
|
||||||
def get_profits_till_400(self):
|
def get_profits_till_400(self, profits: pd.DataFrame = None):
|
||||||
|
if profits is None:
|
||||||
|
profits = self.profits
|
||||||
|
|
||||||
# calculates profits until 400 charge cycles per year are reached
|
# calculates profits until 400 charge cycles per year are reached
|
||||||
number_of_days = len(self.profits["Date"].unique())
|
number_of_days = len(profits["Date"].unique())
|
||||||
usable_charge_cycles = (400 / 365) * number_of_days
|
usable_charge_cycles = (400 / 365) * number_of_days
|
||||||
|
|
||||||
# now sum the profit until the usable charge cycles are reached
|
# now sum the profit until the usable charge cycles are reached
|
||||||
penalty_profits = {}
|
penalty_profits = {}
|
||||||
penalty_charge_cycles = {}
|
penalty_charge_cycles = {}
|
||||||
|
|
||||||
for index, row in self.profits.iterrows():
|
for index, row in profits.iterrows():
|
||||||
penalty = row["Penalty"]
|
penalty = row["Penalty"]
|
||||||
profit = row["Profit"]
|
profit = row["Profit"]
|
||||||
charge_cycles = row["Charge Cycles"]
|
charge_cycles = row["Charge Cycles"]
|
||||||
|
|||||||
@@ -9,11 +9,9 @@ import torch
|
|||||||
|
|
||||||
class BaselinePolicyEvaluator(PolicyEvaluator):
|
class BaselinePolicyEvaluator(PolicyEvaluator):
|
||||||
def __init__(self, baseline_policy: BaselinePolicy, task: Task = None):
|
def __init__(self, baseline_policy: BaselinePolicy, task: Task = None):
|
||||||
super(baseline_policy, task)
|
super(BaselinePolicyEvaluator, self).__init__(baseline_policy, task)
|
||||||
self.dates = baseline_policy.train_data["DateTime"].dt.date.unique()
|
|
||||||
self.dates = pd.to_datetime(self.dates)
|
self.train_profits = []
|
||||||
self.penalties = [0, 100, 300, 500, 800, 1000, 1500]
|
|
||||||
self.profits = []
|
|
||||||
|
|
||||||
def determine_thresholds_for_date(self, date):
|
def determine_thresholds_for_date(self, date):
|
||||||
charge_thresholds = np.arange(-100, 250, 25)
|
charge_thresholds = np.arange(-100, 250, 25)
|
||||||
@@ -31,8 +29,8 @@ class BaselinePolicyEvaluator(PolicyEvaluator):
|
|||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
best_charge_threshold = found_charge_thresholds.item()
|
best_charge_threshold = found_charge_thresholds
|
||||||
best_discharge_threshold = found_discharge_thresholds.item()
|
best_discharge_threshold = found_discharge_thresholds
|
||||||
|
|
||||||
simulated_profit, simulated_charge_cycles = self.baseline_policy.simulate(
|
simulated_profit, simulated_charge_cycles = self.baseline_policy.simulate(
|
||||||
torch.tensor([[real_imbalance_prices]]),
|
torch.tensor([[real_imbalance_prices]]),
|
||||||
@@ -40,7 +38,7 @@ class BaselinePolicyEvaluator(PolicyEvaluator):
|
|||||||
torch.tensor([best_discharge_threshold]),
|
torch.tensor([best_discharge_threshold]),
|
||||||
)
|
)
|
||||||
|
|
||||||
self.profits.append(
|
self.train_profits.append(
|
||||||
[
|
[
|
||||||
date,
|
date,
|
||||||
penalty,
|
penalty,
|
||||||
@@ -52,16 +50,18 @@ class BaselinePolicyEvaluator(PolicyEvaluator):
|
|||||||
)
|
)
|
||||||
|
|
||||||
def determine_best_thresholds(self):
|
def determine_best_thresholds(self):
|
||||||
self.profits = []
|
self.train_profits = []
|
||||||
|
dates = self.baseline_policy.train_data["DateTime"].dt.date.unique()
|
||||||
|
dates = pd.to_datetime(dates)
|
||||||
try:
|
try:
|
||||||
for date in tqdm(self.dates):
|
for date in tqdm(dates):
|
||||||
self.determine_thresholds_for_date(date)
|
self.determine_thresholds_for_date(date)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(e)
|
print(e)
|
||||||
pass
|
pass
|
||||||
|
|
||||||
self.profits = pd.DataFrame(
|
self.train_profits = pd.DataFrame(
|
||||||
self.profits,
|
self.train_profits,
|
||||||
columns=[
|
columns=[
|
||||||
"Date",
|
"Date",
|
||||||
"Penalty",
|
"Penalty",
|
||||||
@@ -71,3 +71,130 @@ class BaselinePolicyEvaluator(PolicyEvaluator):
|
|||||||
"Discharge Threshold",
|
"Discharge Threshold",
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
number_of_days = len(self.train_profits["Date"].unique())
|
||||||
|
usable_charge_cycles = (400 / 365) * number_of_days
|
||||||
|
|
||||||
|
intermediate_values = {penalty: {} for penalty in self.penalties}
|
||||||
|
|
||||||
|
# find the best threshold combination for each penalty based on the total profit on the data
|
||||||
|
for penalty in self.penalties:
|
||||||
|
profits_for_penalty = self.train_profits[
|
||||||
|
self.train_profits["Penalty"] == penalty
|
||||||
|
]
|
||||||
|
|
||||||
|
for index, row in profits_for_penalty.iterrows():
|
||||||
|
charge_threshold = row["Charge Threshold"]
|
||||||
|
discharge_threshold = row["Discharge Threshold"]
|
||||||
|
|
||||||
|
if (charge_threshold, discharge_threshold) not in intermediate_values[
|
||||||
|
penalty
|
||||||
|
]:
|
||||||
|
intermediate_values[penalty][
|
||||||
|
(charge_threshold, discharge_threshold)
|
||||||
|
] = (0, 0)
|
||||||
|
|
||||||
|
new_charge_cycles = (
|
||||||
|
intermediate_values[penalty][
|
||||||
|
(charge_threshold, discharge_threshold)
|
||||||
|
][1]
|
||||||
|
+ row["Charge Cycles"]
|
||||||
|
)
|
||||||
|
new_profit = (
|
||||||
|
intermediate_values[penalty][
|
||||||
|
(charge_threshold, discharge_threshold)
|
||||||
|
][0]
|
||||||
|
+ row["Profit"]
|
||||||
|
)
|
||||||
|
|
||||||
|
if new_charge_cycles <= usable_charge_cycles:
|
||||||
|
intermediate_values[penalty][
|
||||||
|
(charge_threshold, discharge_threshold)
|
||||||
|
] = (new_profit, new_charge_cycles)
|
||||||
|
|
||||||
|
best_thresholds = {penalty: [0, 0, 0, 0] for penalty in self.penalties}
|
||||||
|
|
||||||
|
for penalty in self.penalties:
|
||||||
|
best_profit = 0
|
||||||
|
for threshold, values in intermediate_values[penalty].items():
|
||||||
|
if values[0] > best_profit:
|
||||||
|
best_profit = values[0]
|
||||||
|
best_thresholds[penalty][0] = threshold[0]
|
||||||
|
best_thresholds[penalty][1] = threshold[1]
|
||||||
|
best_thresholds[penalty][2] = best_profit
|
||||||
|
best_thresholds[penalty][3] = values[1]
|
||||||
|
|
||||||
|
# create dataframe from best_thresholds with columns, Penalty, Charge Threshold, Discharge Threshold, Profit
|
||||||
|
data = [
|
||||||
|
(penalty, values[0], values[1], values[2], values[3])
|
||||||
|
for penalty, values in best_thresholds.items()
|
||||||
|
]
|
||||||
|
|
||||||
|
best_thresholds_df = pd.DataFrame(
|
||||||
|
data,
|
||||||
|
columns=[
|
||||||
|
"Penalty",
|
||||||
|
"Charge Threshold",
|
||||||
|
"Discharge Threshold",
|
||||||
|
"Profit (training data)",
|
||||||
|
f"Charge Cycles (training data: max {usable_charge_cycles})",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
if self.task:
|
||||||
|
self.task.get_logger().report_table(
|
||||||
|
"Baseline Train Data",
|
||||||
|
"Best Thresholds for each Penalty on Training Data (up to 400 cycles / year)",
|
||||||
|
iteration=0,
|
||||||
|
table_plot=best_thresholds_df,
|
||||||
|
)
|
||||||
|
|
||||||
|
return best_thresholds
|
||||||
|
|
||||||
|
def evaluate_test_set(self, thresholds: dict):
|
||||||
|
"""Evaluate the test set using the given thresholds (multiple penalties)
|
||||||
|
|
||||||
|
Args:
|
||||||
|
thresholds (dict): Dictionary with penalties as keys and the corresponding thresholds tuple as values
|
||||||
|
"""
|
||||||
|
self.profits = []
|
||||||
|
try:
|
||||||
|
for date in tqdm(self.dates):
|
||||||
|
real_imbalance_prices = self.get_imbanlance_prices_for_date(date.date())
|
||||||
|
for penalty in thresholds.keys():
|
||||||
|
charge_threshold = thresholds[penalty][0]
|
||||||
|
discharge_threshold = thresholds[penalty][1]
|
||||||
|
|
||||||
|
simulated_profit, simulated_charge_cycles = (
|
||||||
|
self.baseline_policy.simulate(
|
||||||
|
torch.tensor([[real_imbalance_prices]]),
|
||||||
|
torch.tensor([charge_threshold]),
|
||||||
|
torch.tensor([discharge_threshold]),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
self.profits.append(
|
||||||
|
[
|
||||||
|
date,
|
||||||
|
penalty,
|
||||||
|
simulated_profit[0][0].item(),
|
||||||
|
simulated_charge_cycles[0][0].item(),
|
||||||
|
charge_threshold,
|
||||||
|
discharge_threshold,
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
self.profits = pd.DataFrame(
|
||||||
|
self.profits,
|
||||||
|
columns=[
|
||||||
|
"Date",
|
||||||
|
"Penalty",
|
||||||
|
"Profit",
|
||||||
|
"Charge Cycles",
|
||||||
|
"Charge Threshold",
|
||||||
|
"Discharge Threshold",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
print(e)
|
||||||
|
pass
|
||||||
|
|||||||
@@ -2,7 +2,7 @@ from src.utils.clearml import ClearMLHelper
|
|||||||
|
|
||||||
#### ClearML ####
|
#### ClearML ####
|
||||||
clearml_helper = ClearMLHelper(project_name="Thesis/NrvForecast")
|
clearml_helper = ClearMLHelper(project_name="Thesis/NrvForecast")
|
||||||
task = clearml_helper.get_task(task_name="Global Thresholds Baselien")
|
task = clearml_helper.get_task(task_name="Global Thresholds Baseline")
|
||||||
task.execute_remotely(queue_name="default", exit_process=True)
|
task.execute_remotely(queue_name="default", exit_process=True)
|
||||||
|
|
||||||
from src.policies.baselines.BaselinePolicyEvaluator import BaselinePolicyEvaluator
|
from src.policies.baselines.BaselinePolicyEvaluator import BaselinePolicyEvaluator
|
||||||
@@ -13,4 +13,9 @@ battery = Battery(2, 1)
|
|||||||
baseline_policy = BaselinePolicy(battery, data_path="")
|
baseline_policy = BaselinePolicy(battery, data_path="")
|
||||||
policy_evaluator = BaselinePolicyEvaluator(baseline_policy, task)
|
policy_evaluator = BaselinePolicyEvaluator(baseline_policy, task)
|
||||||
|
|
||||||
policy_evaluator.determine_best_thresholds()
|
thresholds = policy_evaluator.determine_best_thresholds()
|
||||||
|
policy_evaluator.evaluate_test_set(thresholds)
|
||||||
|
|
||||||
|
policy_evaluator.plot_profits_table()
|
||||||
|
|
||||||
|
task.close()
|
||||||
|
|||||||
Reference in New Issue
Block a user