Not resetting state of charge

This commit is contained in:
2024-03-23 19:18:55 +01:00
parent e780b46af7
commit 65ec8fcd54
8 changed files with 470 additions and 184 deletions

View File

@@ -203,19 +203,17 @@ RESULTATEN FIXEN
Nog eens 3e meeting opbrengen voor 2e deel maart.
Baseline -> thresholds bepalen training data, penalty aanpassen na evaluatie op test
Baseline -> thresholds bepalen training data, penalty aanpassen na evaluatie op test (In Progress, see last comment)
Schaal van plotjes aan zelfde
NRV generaties plotten
Profit during training
Beste diffusion and best GRU model -> plotjes van profits during training
- [x] Schaal van plotjes aan zelfde
- [x] NRV generaties plotten
- [x] Profit during training
- [x] Beste diffusion and best GRU model -> plotjes van profits during training
# !!!! Baseline + Non autoregressive !!!!!!!!!!!!!!!
# profit evaluation done day by day. Start with fresh battery. Maybe electritiy bought but not sold -> negative profits? What to do with this?
# profit evaluation done day by day. Start with fresh battery. Maybe electritiy bought but not sold -> negative profits? What to do with this?
2 solutions: - work further with the state of charge of the battery
- don't count the electricity bought but not sold in the profit calculation

View File

@@ -47,6 +47,7 @@ class PolicyEvaluator:
charge_thresholds=np.arange(-1500, 1500, 50),
discharge_thresholds=np.arange(-1500, 1500, 50),
penalty: int = 0,
state_of_charge: float = 0.0,
):
if date in self.cache:
(reconstructed_imbalance_prices, real_imbalance_prices) = self.cache[date]
@@ -82,6 +83,7 @@ class PolicyEvaluator:
penalty,
charge_thresholds,
discharge_thresholds,
state_of_charge=state_of_charge,
)
def optimize_penalty_for_target_charge_cycles(
@@ -135,6 +137,7 @@ class PolicyEvaluator:
penalty: int,
charge_thresholds,
discharge_thresholds,
state_of_charge=0.0,
):
"""_summary_
@@ -155,6 +158,7 @@ class PolicyEvaluator:
charge_thresholds,
discharge_thresholds,
penalty,
battery_state_of_charge=state_of_charge,
)
)
@@ -162,16 +166,20 @@ class PolicyEvaluator:
predicted_discharge_threshold = found_discharge_thresholds.mean(axis=0)
### Determine Profits and Charge Cycles ###
simulated_profit, simulated_charge_cycles = self.baseline_policy.simulate(
torch.tensor([[real_imbalance_prices]]),
torch.tensor([predicted_charge_threshold]),
torch.tensor([predicted_discharge_threshold]),
simulated_profit, simulated_charge_cycles, new_state_of_charge = (
self.baseline_policy.simulate(
torch.tensor([[real_imbalance_prices]]),
torch.tensor([predicted_charge_threshold]),
torch.tensor([predicted_discharge_threshold]),
battery_state_of_charge=torch.tensor([state_of_charge]),
)
)
return (
simulated_profit[0][0].item(),
simulated_charge_cycles[0][0].item(),
predicted_charge_threshold.item(),
predicted_discharge_threshold.item(),
new_state_of_charge.squeeze(0).item(),
)
def evaluate_test_set(self, idx_samples, test_loader):
@@ -213,12 +221,20 @@ class PolicyEvaluator:
def evaluate_test_set_for_penalty(self, idx_samples, test_loader, penalty):
total_profit = 0
total_charge_cycles = 0
state_of_charge = 0.0
for date in tqdm(self.dates):
try:
profit, charge_cycles, _, _ = self.evaluate_for_date(
date, idx_samples, test_loader, penalty=penalty
profit, charge_cycles, _, _, new_state_of_charge = (
self.evaluate_for_date(
date,
idx_samples,
test_loader,
penalty=penalty,
state_of_charge=state_of_charge,
)
)
state_of_charge = new_state_of_charge
total_profit += profit
total_charge_cycles += charge_cycles
except KeyboardInterrupt:

View File

@@ -11,70 +11,80 @@ class BaselinePolicyEvaluator(PolicyEvaluator):
def __init__(self, baseline_policy: BaselinePolicy, task: Task = None):
super(BaselinePolicyEvaluator, self).__init__(baseline_policy, task)
self.current_state_of_charge = 0.0
self.train_profits = []
def determine_thresholds_for_date(self, date, penalty):
charge_thresholds = np.arange(-500, 500, 25)
discharge_thresholds = np.arange(-500, 500, 25)
self.charge_discharge_thresholds = [
(charge_threshold, discharge_threshold)
for charge_threshold in charge_thresholds
for discharge_threshold in discharge_thresholds
if charge_threshold < discharge_threshold
]
# state of charge to zero for all thresholds
self.current_state_of_charge = torch.zeros(
len(self.charge_discharge_thresholds)
)
self.profits = torch.zeros(len(self.charge_discharge_thresholds))
self.charge_cycles = torch.zeros(len(self.charge_discharge_thresholds))
def determine_thresholds_for_date(self, date):
# all combinations where charge_threshold is less than discharge_threshold
real_imbalance_prices = self.get_imbanlance_prices_for_date(date.date())
found_charge_thresholds, found_discharge_thresholds = (
self.baseline_policy.get_optimal_thresholds(
torch.tensor([real_imbalance_prices]),
charge_thresholds,
discharge_thresholds,
penalty,
simulated_profit, simulated_charge_cycles, simulated_state_of_charge = (
self.baseline_policy.simulate(
torch.tensor(
[[real_imbalance_prices] * len(self.charge_discharge_thresholds)]
),
torch.tensor([c for c, _ in self.charge_discharge_thresholds]),
torch.tensor([d for _, d in self.charge_discharge_thresholds]),
battery_state_of_charge=self.current_state_of_charge,
)
)
best_charge_threshold = found_charge_thresholds
best_discharge_threshold = found_discharge_thresholds
self.current_state_of_charge = simulated_state_of_charge.squeeze(0)
self.profits += simulated_profit.squeeze(0)
self.charge_cycles += simulated_charge_cycles.squeeze(0)
simulated_profit, simulated_charge_cycles = self.baseline_policy.simulate(
torch.tensor([[real_imbalance_prices]]),
torch.tensor([best_charge_threshold]),
torch.tensor([best_discharge_threshold]),
def determine_best_thresholds(self):
self.current_state_of_charge = torch.zeros(
len(self.charge_discharge_thresholds)
)
self.train_profits.append(
[
simulated_profit[0][0].item(),
simulated_charge_cycles[0][0].item(),
best_charge_threshold.item(),
best_discharge_threshold.item(),
]
)
self.profits = torch.zeros(len(self.charge_discharge_thresholds))
self.charge_cycles = torch.zeros(len(self.charge_discharge_thresholds))
def determine_best_thresholds(self, penalty):
self.train_profits = []
dates = self.baseline_policy.train_data["DateTime"].dt.date.unique()
dates = pd.to_datetime(dates)
total_dates = 0
try:
for date in tqdm(dates):
self.determine_thresholds_for_date(date, penalty)
self.determine_thresholds_for_date(date)
total_dates += 1
except Exception as e:
print(e)
pass
self.train_profits = pd.DataFrame(
self.train_profits,
columns=[
"Profit",
"Charge Cycles",
"Charge Threshold",
"Discharge Threshold",
],
wanted_charge_cycles = 400 / 365 * total_dates
best_idx = torch.argmin(
torch.abs(self.charge_cycles - wanted_charge_cycles)
).item()
return (
self.charge_discharge_thresholds[best_idx],
self.profits[best_idx].item(),
self.charge_cycles[best_idx].item(),
)
# get the best thresholds combination based on the sum of profits
best_thresholds = self.train_profits.groupby(
["Charge Threshold", "Discharge Threshold"]
).sum()["Profit"]
best_thresholds = best_thresholds.idxmax()
return (best_thresholds[0], best_thresholds[1])
def evaluate_test_set(
self, charge_threshold, discharge_threshold, data_processor=None
):
@@ -93,17 +103,23 @@ class BaselinePolicyEvaluator(PolicyEvaluator):
self.dates = filtered_dates
try:
battery_state_of_charge = torch.zeros(1)
for date in tqdm(self.dates):
real_imbalance_prices = self.get_imbanlance_prices_for_date(date.date())
simulated_profit, simulated_charge_cycles = (
self.baseline_policy.simulate(
torch.tensor([[real_imbalance_prices]]),
torch.tensor([charge_threshold]),
torch.tensor([discharge_threshold]),
)
(
simulated_profit,
simulated_charge_cycles,
simulated_battery_state_of_charge,
) = self.baseline_policy.simulate(
torch.tensor([[real_imbalance_prices]]),
torch.tensor([charge_threshold]),
torch.tensor([discharge_threshold]),
battery_state_of_charge=battery_state_of_charge,
)
battery_state_of_charge = simulated_battery_state_of_charge.squeeze(0)
self.profits.append(
[
date,
@@ -123,35 +139,45 @@ class BaselinePolicyEvaluator(PolicyEvaluator):
# return the total profit and total charge cycles
return self.profits["Profit"].sum(), self.profits["Charge Cycles"].sum()
def optimize_penalty_for_target_charge_cycles(
self,
initial_penalty,
target_charge_cycles,
learning_rate=2,
max_iterations=10,
tolerance=10,
):
penalty = initial_penalty
def determine_best_thresholds_test_set(self, data_processor=None):
self.current_state_of_charge = torch.zeros(
len(self.charge_discharge_thresholds)
)
for i in range(max_iterations):
charge_threshold, discharge_threshold = self.determine_best_thresholds(
penalty
)
total_profit, total_charge_cycles = self.evaluate_test_set(
charge_threshold, discharge_threshold
)
self.profits = torch.zeros(len(self.charge_discharge_thresholds))
self.charge_cycles = torch.zeros(len(self.charge_discharge_thresholds))
gradient = total_charge_cycles - target_charge_cycles
penalty += learning_rate * gradient
dates = self.baseline_policy.train_data["DateTime"].dt.date.unique()
dates = pd.to_datetime(dates)
print(
f"Iteration {i+1}: Penalty: {penalty}, Total Profit: {total_profit}, Total Charge Cycles: {total_charge_cycles}, Gradient: {gradient}, Charge Threshold: {charge_threshold}, Discharge Threshold: {discharge_threshold}"
)
if data_processor:
filtered_dates = []
_, test_loader = data_processor.get_dataloaders()
for date in self.dates:
try:
test_loader.dataset.get_idx_for_date(date.date())
filtered_dates.append(date)
except:
pass
dates = filtered_dates
if abs(gradient) < tolerance:
print(f"Optimal penalty found after {i+1} iterations")
break
else:
print(f"Optimal penalty not found after {max_iterations} iterations")
total_dates = 0
try:
for date in tqdm(dates):
self.determine_thresholds_for_date(date)
total_dates += 1
except Exception as e:
print(e)
pass
return penalty, total_profit, total_charge_cycles
wanted_charge_cycles = 400 / 365 * total_dates
best_idx = torch.argmin(
torch.abs(self.charge_cycles - wanted_charge_cycles)
).item()
return (
self.charge_discharge_thresholds[best_idx],
self.profits[best_idx].item(),
self.charge_cycles[best_idx].item(),
)

View File

@@ -17,7 +17,8 @@ class YesterdayBaselinePolicyEvaluator(PolicyEvaluator):
date,
charge_thresholds=np.arange(-100, 250, 25),
discharge_thresholds=np.arange(-100, 250, 25),
penalty: int = 0
penalty: int = 0,
current_state_of_charge=0.0,
):
real_imbalance_prices = self.get_imbanlance_prices_for_date(date.date())
@@ -28,33 +29,32 @@ class YesterdayBaselinePolicyEvaluator(PolicyEvaluator):
np.array([yesterday_imbalance_prices]), device="cpu"
)
yesterday_charge_thresholds, yesterday_discharge_thresholds = (
self.baseline_policy.get_optimal_thresholds(
yesterday_imbalance_prices,
charge_thresholds,
discharge_thresholds,
penalty,
)
yesterday_charge_thresholds, yesterday_discharge_thresholds = (
self.baseline_policy.get_optimal_thresholds(
yesterday_imbalance_prices,
charge_thresholds,
discharge_thresholds,
penalty,
battery_state_of_charge=current_state_of_charge,
)
)
yesterday_profit, yesterday_charge_cycles = self.baseline_policy.simulate(
yesterday_profit, yesterday_charge_cycles, new_state_of_charge = (
self.baseline_policy.simulate(
torch.tensor([[real_imbalance_prices]]),
torch.tensor([yesterday_charge_thresholds.mean(axis=0)]),
torch.tensor([yesterday_discharge_thresholds.mean(axis=0)]),
battery_state_of_charge=torch.tensor([current_state_of_charge]),
)
)
self.profits.append(
[
date,
penalty,
yesterday_profit[0][0].item(),
yesterday_charge_cycles[0][0].item(),
yesterday_charge_thresholds.mean(axis=0).item(),
yesterday_discharge_thresholds.mean(axis=0).item(),
]
)
return (
yesterday_profit[0][0].item(),
yesterday_charge_cycles[0][0].item(),
new_state_of_charge.squeeze(0).item(),
)
def evaluate_test_set(self, data_processor):
def evaluate_test_set_for_penalty(self, data_processor, penalty: int = 0):
if data_processor:
filtered_dates = []
@@ -67,22 +67,65 @@ class YesterdayBaselinePolicyEvaluator(PolicyEvaluator):
pass
self.dates = filtered_dates
self.profits = []
profit = 0
charge_cycles = 0
state_of_charge = 0.0
for date in tqdm(self.dates):
try:
self.evaluate_for_date(date)
new_profit, new_charge_cycles, new_state_of_charge = (
self.evaluate_for_date(
date, penalty=penalty, current_state_of_charge=state_of_charge
)
)
profit += new_profit
charge_cycles += new_charge_cycles
state_of_charge = new_state_of_charge
except Exception as e:
print(e)
pass
self.profits = pd.DataFrame(
self.profits,
columns=[
"Date",
"Penalty",
"Profit",
"Charge Cycles",
"Charge Threshold",
"Discharge Threshold",
],
return profit, charge_cycles
def optimize_penalty_for_target_charge_cycles(
self,
data_processor,
initial_penalty,
target_charge_cycles,
learning_rate=2,
max_iterations=10,
tolerance=10,
):
self.cache = {}
penalty = initial_penalty
for iteration in range(max_iterations):
# Calculate profit and charge cycles for the current penalty
simulated_profit, simulated_charge_cycles = (
self.evaluate_test_set_for_penalty(data_processor, penalty)
)
print(
f"Penalty: {penalty}, Charge Cycles: {simulated_charge_cycles}, Profit: {simulated_profit}"
)
# Calculate the gradient (difference) between the simulated and target charge cycles
gradient = simulated_charge_cycles - target_charge_cycles
# Update the penalty parameter in the direction of the gradient
penalty += learning_rate * gradient
# Check if the charge cycles are close enough to the target
if abs(gradient) < tolerance:
print(f"Optimal penalty found after {iteration+1} iterations")
break
else:
print(
f"Reached max iterations ({max_iterations}) without converging to the target charge cycles"
)
# Re-calculate profit and charge cycles for the final penalty to return accurate results
profit, charge_cycles = self.evaluate_test_set_for_penalty(
data_processor, penalty
)
return penalty, profit, charge_cycles

View File

@@ -32,14 +32,36 @@ battery = Battery(2, 1)
baseline_policy = BaselinePolicy(battery, data_path="")
policy_evaluator = BaselinePolicyEvaluator(baseline_policy, task)
total_profit, total_charge_cycles = (
policy_evaluator.optimize_penalty_for_target_charge_cycles(
initial_penalty=100,
target_charge_cycles=283,
learning_rate=0.2,
max_iterations=150,
tolerance=1,
)
charge_discharge_threshold, total_profit, total_charge_cycles = (
policy_evaluator.determine_best_thresholds()
)
print(f"Total Profit: {total_profit}, Total Charge Cycles: {total_charge_cycles}")
print(f"Training set results:")
print(f"Best Charge Discharge Threshold: {charge_discharge_threshold}")
print(f"Total Profit: {total_profit}")
print(f"Total Charge Cycles: {total_charge_cycles}")
profit, charge_cycles = policy_evaluator.evaluate_test_set(
charge_discharge_threshold[0],
charge_discharge_threshold[1],
data_processor=data_processor,
)
print()
print("Test Set Results")
print(f"Profit: {profit}, Charge Cycles: {charge_cycles}")
# Thresholds determined on test set
charge_discharge_threshold, total_profit, total_charge_cycles = (
policy_evaluator.determine_best_thresholds_test_set(data_processor)
)
task.get_logger().report_single_value(name="Optimal Profit", value=profit)
task.get_logger().report_single_value(name="Optimal Charge Cycles", value=charge_cycles)
task.get_logger().report_single_value(
name="Optimal Charge Threshold", value=charge_discharge_threshold[0]
)
task.get_logger().report_single_value(
name="Optimal Discharge Threshold", value=charge_discharge_threshold[1]
)
task.close()

View File

@@ -35,7 +35,23 @@ battery = Battery(2, 1)
baseline_policy = BaselinePolicy(battery, data_path="")
policy_evaluator = YesterdayBaselinePolicyEvaluator(baseline_policy, task)
policy_evaluator.evaluate_test_set(data_processor=data_processor)
policy_evaluator.plot_profits_table()
penalty, profit, charge_cycles = (
policy_evaluator.optimize_penalty_for_target_charge_cycles(
data_processor=data_processor,
initial_penalty=0,
target_charge_cycles=283,
learning_rate=2,
max_iterations=100,
tolerance=1,
)
)
# policy_evaluator.plot_profits_table()
print()
print("Test Set Results")
print(f"Penalty: {penalty}, Profit: {profit}, Charge Cycles: {charge_cycles}")
task.get_logger().report_single_value(name="Optimal Penalty", value=penalty)
task.get_logger().report_single_value(name="Optimal Profit", value=profit)
task.get_logger().report_single_value(name="Optimal Charge Cycles", value=charge_cycles)
task.close()

View File

@@ -6,6 +6,7 @@ import torch
imbalance_prices = "data/imbalance_prices.csv"
class Battery:
def __init__(self, capacity: float, power: float):
"""
@@ -26,11 +27,11 @@ class Battery:
return 0
self.current_charge -= self.power / 4
self.charge_cycles += 1/16
self.charge_cycles += 1 / 16
if self.current_charge <= 0:
self.current_charge = 0
return self.power / 4
def charge(self):
@@ -41,13 +42,13 @@ class Battery:
return 0
self.current_charge += self.power / 4
self.charge_cycles += 1/16
self.charge_cycles += 1 / 16
if self.current_charge >= self.capacity:
self.current_charge = self.capacity
return self.power / 4
def reset(self):
"""
Reset the battery to its initial state
@@ -57,32 +58,47 @@ class Battery:
self.discharging = False
self.charge_cycles = 0
class BaselinePolicy():
class BaselinePolicy:
def __init__(self, battery: Battery, data_path: str = ""):
self.data_path = data_path
self.battery = battery
self.train_data = self.load_imbalance_prices(train=True)
self.train_data = self.load_imbalance_prices(train=True)
self.test_data = self.load_imbalance_prices(train=False)
# print first datetime of train and test data
print(f"Training range: {self.train_data.iloc[0]['DateTime'].strftime('%d-%m-%Y')} - {self.train_data.iloc[-1]['DateTime'].strftime('%d-%m-%Y')}")
print(f"Test range: {self.test_data.iloc[0]['DateTime'].strftime('%d-%m-%Y')} - {self.test_data.iloc[-1]['DateTime'].strftime('%d-%m-%Y')}")
print(
f"Training range: {self.train_data.iloc[0]['DateTime'].strftime('%d-%m-%Y')} - {self.train_data.iloc[-1]['DateTime'].strftime('%d-%m-%Y')}"
)
print(
f"Test range: {self.test_data.iloc[0]['DateTime'].strftime('%d-%m-%Y')} - {self.test_data.iloc[-1]['DateTime'].strftime('%d-%m-%Y')}"
)
def load_imbalance_prices(self, train: bool = True):
imbalance_prices = pd.read_csv(self.data_path + 'data/imbalance_prices.csv', parse_dates=True, sep=";")
imbalance_prices = imbalance_prices[['DateTime', 'Positive imbalance price']]
imbalance_prices['DateTime'] = pd.to_datetime(imbalance_prices['DateTime'], utc=True)
imbalance_prices = pd.read_csv(
self.data_path + "data/imbalance_prices.csv", parse_dates=True, sep=";"
)
imbalance_prices = imbalance_prices[["DateTime", "Positive imbalance price"]]
imbalance_prices["DateTime"] = pd.to_datetime(
imbalance_prices["DateTime"], utc=True
)
if train:
imbalance_prices = imbalance_prices.loc[imbalance_prices['DateTime'].dt.year < 2023]
imbalance_prices = imbalance_prices.loc[imbalance_prices['DateTime'].dt.year >= 2020]
imbalance_prices = imbalance_prices.loc[
imbalance_prices["DateTime"].dt.year < 2023
]
imbalance_prices = imbalance_prices.loc[
imbalance_prices["DateTime"].dt.year >= 2020
]
else:
imbalance_prices = imbalance_prices.loc[imbalance_prices['DateTime'].dt.year == 2023]
imbalance_prices = imbalance_prices.sort_values(by=['DateTime'], ascending=True)
imbalance_prices = imbalance_prices.loc[
imbalance_prices["DateTime"].dt.year == 2023
]
imbalance_prices = imbalance_prices.sort_values(by=["DateTime"], ascending=True)
return imbalance_prices
def get_train_score(self, charge_threshold, discharge_threshold):
return self.get_score(self.train_data, charge_threshold, discharge_threshold)
def get_test_score(self, charge_threshold, discharge_threshold):
return self.get_score(self.test_data, charge_threshold, discharge_threshold)
@@ -96,38 +112,145 @@ class BaselinePolicy():
mean_discharging_price = 0
number_of_charges = 0
number_of_discharges = 0
for index, row in df.iterrows():
if row['Positive imbalance price'] < charge_threshold:
total_charging_cost += self.battery.charge() * row['Positive imbalance price']
mean_charging_price += row['Positive imbalance price']
if row["Positive imbalance price"] < charge_threshold:
total_charging_cost += (
self.battery.charge() * row["Positive imbalance price"]
)
mean_charging_price += row["Positive imbalance price"]
number_of_charges += 1
elif row['Positive imbalance price'] > discharge_threshold:
total_discharging_profit += self.battery.discharge() * row['Positive imbalance price']
mean_discharging_price += row['Positive imbalance price']
elif row["Positive imbalance price"] > discharge_threshold:
total_discharging_profit += (
self.battery.discharge() * row["Positive imbalance price"]
)
mean_discharging_price += row["Positive imbalance price"]
number_of_discharges += 1
return total_charging_cost, total_discharging_profit, self.battery.charge_cycles, mean_charging_price / number_of_charges, mean_discharging_price / number_of_discharges
return (
total_charging_cost,
total_discharging_profit,
self.battery.charge_cycles,
mean_charging_price / number_of_charges,
mean_discharging_price / number_of_discharges,
)
def threshold_scores(self, charge_thresholds, discharge_thresholds):
df = pd.DataFrame(columns=["Charge threshold", "Discharge threshold", "Charging Cost", "Discharging Profit", "Total Profit", "Charge cycles", "Mean charging price", "Mean discharging price"])
df_test = pd.DataFrame(columns=["Charge threshold", "Discharge threshold", "Charging Cost", "Discharging Profit", "Total Profit", "Charge cycles", "Mean charging price", "Mean discharging price"])
df = pd.DataFrame(
columns=[
"Charge threshold",
"Discharge threshold",
"Charging Cost",
"Discharging Profit",
"Total Profit",
"Charge cycles",
"Mean charging price",
"Mean discharging price",
]
)
df_test = pd.DataFrame(
columns=[
"Charge threshold",
"Discharge threshold",
"Charging Cost",
"Discharging Profit",
"Total Profit",
"Charge cycles",
"Mean charging price",
"Mean discharging price",
]
)
threshold_pairs = itertools.product(charge_thresholds, discharge_thresholds)
threshold_pairs = filter(lambda x: x[0] < x[1], threshold_pairs)
for charge_threshold, discharge_threshold in tqdm(threshold_pairs):
total_charging_cost, total_discharge_profit, charge_cycles, mean_charging_price, mean_discharging_price = self.get_train_score(charge_threshold, discharge_threshold)
df = pd.concat([df, pd.DataFrame([[charge_threshold, discharge_threshold, total_charging_cost, total_discharge_profit, total_discharge_profit - total_charging_cost, charge_cycles, mean_charging_price, mean_discharging_price]], columns=["Charge threshold", "Discharge threshold", "Charging Cost", "Discharging Profit", "Total Profit", "Charge cycles", "Mean charging price", "Mean discharging price"])])
(
total_charging_cost,
total_discharge_profit,
charge_cycles,
mean_charging_price,
mean_discharging_price,
) = self.get_train_score(charge_threshold, discharge_threshold)
df = pd.concat(
[
df,
pd.DataFrame(
[
[
charge_threshold,
discharge_threshold,
total_charging_cost,
total_discharge_profit,
total_discharge_profit - total_charging_cost,
charge_cycles,
mean_charging_price,
mean_discharging_price,
]
],
columns=[
"Charge threshold",
"Discharge threshold",
"Charging Cost",
"Discharging Profit",
"Total Profit",
"Charge cycles",
"Mean charging price",
"Mean discharging price",
],
),
]
)
total_charging_cost, total_discharge_profit, charge_cycles, mean_charging_price, mean_discharging_price = self.get_test_score(charge_threshold, discharge_threshold)
df_test = pd.concat([df_test, pd.DataFrame([[charge_threshold, discharge_threshold, total_charging_cost, total_discharge_profit, total_discharge_profit - total_charging_cost, charge_cycles, mean_charging_price, mean_discharging_price]], columns=["Charge threshold", "Discharge threshold", "Charging Cost", "Discharging Profit", "Total Profit", "Charge cycles", "Mean charging price", "Mean discharging price"])])
(
total_charging_cost,
total_discharge_profit,
charge_cycles,
mean_charging_price,
mean_discharging_price,
) = self.get_test_score(charge_threshold, discharge_threshold)
df_test = pd.concat(
[
df_test,
pd.DataFrame(
[
[
charge_threshold,
discharge_threshold,
total_charging_cost,
total_discharge_profit,
total_discharge_profit - total_charging_cost,
charge_cycles,
mean_charging_price,
mean_discharging_price,
]
],
columns=[
"Charge threshold",
"Discharge threshold",
"Charging Cost",
"Discharging Profit",
"Total Profit",
"Charge cycles",
"Mean charging price",
"Mean discharging price",
],
),
]
)
df = df.sort_values(by=['Total Profit'], ascending=False)
df = df.sort_values(by=["Total Profit"], ascending=False)
return df, df_test
def get_optimal_thresholds(self, imbalance_prices, charge_thresholds, discharge_thresholds, charge_cycles_penalty: float = 0):
def get_optimal_thresholds(
self,
imbalance_prices,
charge_thresholds,
discharge_thresholds,
charge_cycles_penalty: float = 0,
battery_state_of_charge: float = 0,
):
threshold_pairs = itertools.product(charge_thresholds, discharge_thresholds)
threshold_pairs = filter(lambda x: x[0] < x[1], threshold_pairs)
@@ -143,9 +266,19 @@ class BaselinePolicy():
next_day_charge_thresholds, next_day_discharge_thresholds = [], []
# imbalance_prices: (1000, 96) -> (1000, threshold_pairs, 96)
imbalance_prices = imbalance_prices.unsqueeze(1).expand(-1, len(threshold_pairs), -1)
imbalance_prices = imbalance_prices.unsqueeze(1).expand(
-1, len(threshold_pairs), -1
)
profits, charge_cycles = self.simulate(imbalance_prices, charge_thresholds, discharge_thresholds, charge_cycles_penalty=charge_cycles_penalty)
profits, charge_cycles, state_of_charge = self.simulate(
imbalance_prices,
charge_thresholds,
discharge_thresholds,
charge_cycles_penalty=charge_cycles_penalty,
battery_state_of_charge=torch.tensor(
[battery_state_of_charge] * len(charge_thresholds)
),
)
# get the index of the best threshold pair for each day (1000, 96) -> (1000)
best_threshold_indices = torch.argmax(profits, dim=1)
@@ -159,16 +292,27 @@ class BaselinePolicy():
return next_day_charge_thresholds, next_day_discharge_thresholds
def simulate(self, price_matrix, charge_thresholds: torch.tensor, discharge_thresholds: torch.tensor, charge_cycles_penalty: float = 0):
def simulate(
self,
price_matrix,
charge_thresholds: torch.tensor,
discharge_thresholds: torch.tensor,
charge_cycles_penalty: float = 0,
battery_state_of_charge: float = 0,
):
# make sure all on the same device
charge_thresholds = charge_thresholds.to(price_matrix.device)
discharge_thresholds = discharge_thresholds.to(price_matrix.device)
batch_size, num_thresholds, num_time_steps = price_matrix.shape
# Reshape thresholds for broadcasting
charge_thresholds = charge_thresholds.view(1, num_thresholds, 1).expand(batch_size, -1, num_time_steps)
discharge_thresholds = discharge_thresholds.view(1, num_thresholds, 1).expand(batch_size, -1, num_time_steps)
charge_thresholds = charge_thresholds.view(1, num_thresholds, 1).expand(
batch_size, -1, num_time_steps
)
discharge_thresholds = discharge_thresholds.view(1, num_thresholds, 1).expand(
batch_size, -1, num_time_steps
)
charge_matrix = torch.zeros_like(price_matrix)
@@ -176,6 +320,11 @@ class BaselinePolicy():
charge_matrix[price_matrix > discharge_thresholds] = -1
battery_states = torch.zeros(batch_size, num_thresholds)
battery_states = battery_state_of_charge.view(1, num_thresholds).expand(
batch_size, -1
)
profits = torch.zeros_like(battery_states)
charge_cycles = torch.zeros_like(battery_states)
@@ -185,23 +334,36 @@ class BaselinePolicy():
for i in range(num_time_steps):
discharge_mask = ~((charge_matrix[:, :, i] == -1) & (battery_states == 0))
charge_mask = ~((charge_matrix[:, :, i] == 1) & (battery_states == self.battery.capacity))
charge_mask = ~(
(charge_matrix[:, :, i] == 1)
& (battery_states == self.battery.capacity)
)
mask = discharge_mask & charge_mask
battery_states[mask] += charge_matrix[:, :, i][mask] * self.battery.power / 4
profits[mask] += -charge_matrix[:, :, i][mask] * price_matrix[:, :, i][mask] * self.battery.power / 4
charge_cycles[mask] += torch.abs(charge_matrix[:, :, i][mask]) * (self.battery.power / 4) / self.battery.capacity / 2
battery_states[mask] += (
charge_matrix[:, :, i][mask] * self.battery.power / 4
)
profits[mask] += (
-charge_matrix[:, :, i][mask]
* price_matrix[:, :, i][mask]
* self.battery.power
/ 4
)
charge_cycles[mask] += (
torch.abs(charge_matrix[:, :, i][mask])
* (self.battery.power / 4)
/ self.battery.capacity
/ 2
)
# penalize for excess charge cycles
excess_charge_cycles = (charge_cycles - 400/365).clamp(min=0)
profits -= excess_charge_cycles * charge_cycles_penalty
excess_charge_cycles = (charge_cycles - 400 / 365).clamp(min=0)
profits -= excess_charge_cycles * charge_cycles_penalty
return profits, charge_cycles, battery_states
return profits, charge_cycles
# battery = Battery(2, 1)
# policy = BaselinePolicy(battery)
@@ -214,4 +376,4 @@ class BaselinePolicy():
# print(df_test.to_markdown())
# # print(policy.get_test_score(150, 100))
# # print(policy.get_test_score(150, 100))

View File

@@ -557,6 +557,9 @@ class NonAutoRegressiveQuantileRegression(Trainer):
inputs, targets = inputs.to(self.device), targets.to(self.device)
outputs = self.model(inputs)
outputs = outputs.reshape(-1, len(self.quantiles))
outputted_samples = [
sample_from_dist(self.quantiles, output.cpu()) for output in outputs
]