diff --git a/Result-Reports/Policies.md b/Result-Reports/Policies.md index 1eddedc..9d3f499 100644 --- a/Result-Reports/Policies.md +++ b/Result-Reports/Policies.md @@ -203,19 +203,17 @@ RESULTATEN FIXEN Nog eens 3e meeting opbrengen voor 2e deel maart. -Baseline -> thresholds bepalen training data, penalty aanpassen na evaluatie op test +Baseline -> thresholds bepalen training data, penalty aanpassen na evaluatie op test (In Progress, see last comment) -Schaal van plotjes aan zelfde - -NRV generaties plotten - -Profit during training - - -Beste diffusion and best GRU model -> plotjes van profits during training +- [x] Schaal van plotjes aan zelfde +- [x] NRV generaties plotten +- [x] Profit during training +- [x] Beste diffusion and best GRU model -> plotjes van profits during training # !!!! Baseline + Non autoregressive !!!!!!!!!!!!!!! -# profit evaluation done day by day. Start with fresh battery. Maybe electritiy bought but not sold -> negative profits? What to do with this? \ No newline at end of file +# profit evaluation done day by day. Start with fresh battery. Maybe electritiy bought but not sold -> negative profits? What to do with this? +2 solutions: - work further with the state of charge of the battery + - don't count the electricity bought but not sold in the profit calculation \ No newline at end of file diff --git a/src/policies/PolicyEvaluator.py b/src/policies/PolicyEvaluator.py index ba72be9..80691c2 100644 --- a/src/policies/PolicyEvaluator.py +++ b/src/policies/PolicyEvaluator.py @@ -47,6 +47,7 @@ class PolicyEvaluator: charge_thresholds=np.arange(-1500, 1500, 50), discharge_thresholds=np.arange(-1500, 1500, 50), penalty: int = 0, + state_of_charge: float = 0.0, ): if date in self.cache: (reconstructed_imbalance_prices, real_imbalance_prices) = self.cache[date] @@ -82,6 +83,7 @@ class PolicyEvaluator: penalty, charge_thresholds, discharge_thresholds, + state_of_charge=state_of_charge, ) def optimize_penalty_for_target_charge_cycles( @@ -135,6 +137,7 @@ class PolicyEvaluator: penalty: int, charge_thresholds, discharge_thresholds, + state_of_charge=0.0, ): """_summary_ @@ -155,6 +158,7 @@ class PolicyEvaluator: charge_thresholds, discharge_thresholds, penalty, + battery_state_of_charge=state_of_charge, ) ) @@ -162,16 +166,20 @@ class PolicyEvaluator: predicted_discharge_threshold = found_discharge_thresholds.mean(axis=0) ### Determine Profits and Charge Cycles ### - simulated_profit, simulated_charge_cycles = self.baseline_policy.simulate( - torch.tensor([[real_imbalance_prices]]), - torch.tensor([predicted_charge_threshold]), - torch.tensor([predicted_discharge_threshold]), + simulated_profit, simulated_charge_cycles, new_state_of_charge = ( + self.baseline_policy.simulate( + torch.tensor([[real_imbalance_prices]]), + torch.tensor([predicted_charge_threshold]), + torch.tensor([predicted_discharge_threshold]), + battery_state_of_charge=torch.tensor([state_of_charge]), + ) ) return ( simulated_profit[0][0].item(), simulated_charge_cycles[0][0].item(), predicted_charge_threshold.item(), predicted_discharge_threshold.item(), + new_state_of_charge.squeeze(0).item(), ) def evaluate_test_set(self, idx_samples, test_loader): @@ -213,12 +221,20 @@ class PolicyEvaluator: def evaluate_test_set_for_penalty(self, idx_samples, test_loader, penalty): total_profit = 0 total_charge_cycles = 0 + state_of_charge = 0.0 for date in tqdm(self.dates): try: - profit, charge_cycles, _, _ = self.evaluate_for_date( - date, idx_samples, test_loader, penalty=penalty + profit, charge_cycles, _, _, new_state_of_charge = ( + self.evaluate_for_date( + date, + idx_samples, + test_loader, + penalty=penalty, + state_of_charge=state_of_charge, + ) ) + state_of_charge = new_state_of_charge total_profit += profit total_charge_cycles += charge_cycles except KeyboardInterrupt: diff --git a/src/policies/baselines/BaselinePolicyEvaluator.py b/src/policies/baselines/BaselinePolicyEvaluator.py index c091b06..f2227fa 100644 --- a/src/policies/baselines/BaselinePolicyEvaluator.py +++ b/src/policies/baselines/BaselinePolicyEvaluator.py @@ -11,70 +11,80 @@ class BaselinePolicyEvaluator(PolicyEvaluator): def __init__(self, baseline_policy: BaselinePolicy, task: Task = None): super(BaselinePolicyEvaluator, self).__init__(baseline_policy, task) + self.current_state_of_charge = 0.0 self.train_profits = [] - def determine_thresholds_for_date(self, date, penalty): charge_thresholds = np.arange(-500, 500, 25) discharge_thresholds = np.arange(-500, 500, 25) + self.charge_discharge_thresholds = [ + (charge_threshold, discharge_threshold) + for charge_threshold in charge_thresholds + for discharge_threshold in discharge_thresholds + if charge_threshold < discharge_threshold + ] + + # state of charge to zero for all thresholds + self.current_state_of_charge = torch.zeros( + len(self.charge_discharge_thresholds) + ) + + self.profits = torch.zeros(len(self.charge_discharge_thresholds)) + self.charge_cycles = torch.zeros(len(self.charge_discharge_thresholds)) + + def determine_thresholds_for_date(self, date): + + # all combinations where charge_threshold is less than discharge_threshold + real_imbalance_prices = self.get_imbanlance_prices_for_date(date.date()) - found_charge_thresholds, found_discharge_thresholds = ( - self.baseline_policy.get_optimal_thresholds( - torch.tensor([real_imbalance_prices]), - charge_thresholds, - discharge_thresholds, - penalty, + simulated_profit, simulated_charge_cycles, simulated_state_of_charge = ( + self.baseline_policy.simulate( + torch.tensor( + [[real_imbalance_prices] * len(self.charge_discharge_thresholds)] + ), + torch.tensor([c for c, _ in self.charge_discharge_thresholds]), + torch.tensor([d for _, d in self.charge_discharge_thresholds]), + battery_state_of_charge=self.current_state_of_charge, ) ) - best_charge_threshold = found_charge_thresholds - best_discharge_threshold = found_discharge_thresholds + self.current_state_of_charge = simulated_state_of_charge.squeeze(0) + self.profits += simulated_profit.squeeze(0) + self.charge_cycles += simulated_charge_cycles.squeeze(0) - simulated_profit, simulated_charge_cycles = self.baseline_policy.simulate( - torch.tensor([[real_imbalance_prices]]), - torch.tensor([best_charge_threshold]), - torch.tensor([best_discharge_threshold]), + def determine_best_thresholds(self): + self.current_state_of_charge = torch.zeros( + len(self.charge_discharge_thresholds) ) - self.train_profits.append( - [ - simulated_profit[0][0].item(), - simulated_charge_cycles[0][0].item(), - best_charge_threshold.item(), - best_discharge_threshold.item(), - ] - ) + self.profits = torch.zeros(len(self.charge_discharge_thresholds)) + self.charge_cycles = torch.zeros(len(self.charge_discharge_thresholds)) - def determine_best_thresholds(self, penalty): - self.train_profits = [] dates = self.baseline_policy.train_data["DateTime"].dt.date.unique() dates = pd.to_datetime(dates) + + total_dates = 0 try: for date in tqdm(dates): - self.determine_thresholds_for_date(date, penalty) + self.determine_thresholds_for_date(date) + total_dates += 1 except Exception as e: print(e) pass - self.train_profits = pd.DataFrame( - self.train_profits, - columns=[ - "Profit", - "Charge Cycles", - "Charge Threshold", - "Discharge Threshold", - ], + wanted_charge_cycles = 400 / 365 * total_dates + + best_idx = torch.argmin( + torch.abs(self.charge_cycles - wanted_charge_cycles) + ).item() + + return ( + self.charge_discharge_thresholds[best_idx], + self.profits[best_idx].item(), + self.charge_cycles[best_idx].item(), ) - # get the best thresholds combination based on the sum of profits - best_thresholds = self.train_profits.groupby( - ["Charge Threshold", "Discharge Threshold"] - ).sum()["Profit"] - - best_thresholds = best_thresholds.idxmax() - return (best_thresholds[0], best_thresholds[1]) - def evaluate_test_set( self, charge_threshold, discharge_threshold, data_processor=None ): @@ -93,17 +103,23 @@ class BaselinePolicyEvaluator(PolicyEvaluator): self.dates = filtered_dates try: + battery_state_of_charge = torch.zeros(1) for date in tqdm(self.dates): real_imbalance_prices = self.get_imbanlance_prices_for_date(date.date()) - simulated_profit, simulated_charge_cycles = ( - self.baseline_policy.simulate( - torch.tensor([[real_imbalance_prices]]), - torch.tensor([charge_threshold]), - torch.tensor([discharge_threshold]), - ) + ( + simulated_profit, + simulated_charge_cycles, + simulated_battery_state_of_charge, + ) = self.baseline_policy.simulate( + torch.tensor([[real_imbalance_prices]]), + torch.tensor([charge_threshold]), + torch.tensor([discharge_threshold]), + battery_state_of_charge=battery_state_of_charge, ) + battery_state_of_charge = simulated_battery_state_of_charge.squeeze(0) + self.profits.append( [ date, @@ -123,35 +139,45 @@ class BaselinePolicyEvaluator(PolicyEvaluator): # return the total profit and total charge cycles return self.profits["Profit"].sum(), self.profits["Charge Cycles"].sum() - def optimize_penalty_for_target_charge_cycles( - self, - initial_penalty, - target_charge_cycles, - learning_rate=2, - max_iterations=10, - tolerance=10, - ): - penalty = initial_penalty + def determine_best_thresholds_test_set(self, data_processor=None): + self.current_state_of_charge = torch.zeros( + len(self.charge_discharge_thresholds) + ) - for i in range(max_iterations): - charge_threshold, discharge_threshold = self.determine_best_thresholds( - penalty - ) - total_profit, total_charge_cycles = self.evaluate_test_set( - charge_threshold, discharge_threshold - ) + self.profits = torch.zeros(len(self.charge_discharge_thresholds)) + self.charge_cycles = torch.zeros(len(self.charge_discharge_thresholds)) - gradient = total_charge_cycles - target_charge_cycles - penalty += learning_rate * gradient + dates = self.baseline_policy.train_data["DateTime"].dt.date.unique() + dates = pd.to_datetime(dates) - print( - f"Iteration {i+1}: Penalty: {penalty}, Total Profit: {total_profit}, Total Charge Cycles: {total_charge_cycles}, Gradient: {gradient}, Charge Threshold: {charge_threshold}, Discharge Threshold: {discharge_threshold}" - ) + if data_processor: + filtered_dates = [] + _, test_loader = data_processor.get_dataloaders() + for date in self.dates: + try: + test_loader.dataset.get_idx_for_date(date.date()) + filtered_dates.append(date) + except: + pass + dates = filtered_dates - if abs(gradient) < tolerance: - print(f"Optimal penalty found after {i+1} iterations") - break - else: - print(f"Optimal penalty not found after {max_iterations} iterations") + total_dates = 0 + try: + for date in tqdm(dates): + self.determine_thresholds_for_date(date) + total_dates += 1 + except Exception as e: + print(e) + pass - return penalty, total_profit, total_charge_cycles + wanted_charge_cycles = 400 / 365 * total_dates + + best_idx = torch.argmin( + torch.abs(self.charge_cycles - wanted_charge_cycles) + ).item() + + return ( + self.charge_discharge_thresholds[best_idx], + self.profits[best_idx].item(), + self.charge_cycles[best_idx].item(), + ) diff --git a/src/policies/baselines/YesterdayBaselinePolicyExecutor.py b/src/policies/baselines/YesterdayBaselinePolicyExecutor.py index 486f242..f849a68 100644 --- a/src/policies/baselines/YesterdayBaselinePolicyExecutor.py +++ b/src/policies/baselines/YesterdayBaselinePolicyExecutor.py @@ -17,7 +17,8 @@ class YesterdayBaselinePolicyEvaluator(PolicyEvaluator): date, charge_thresholds=np.arange(-100, 250, 25), discharge_thresholds=np.arange(-100, 250, 25), - penalty: int = 0 + penalty: int = 0, + current_state_of_charge=0.0, ): real_imbalance_prices = self.get_imbanlance_prices_for_date(date.date()) @@ -28,33 +29,32 @@ class YesterdayBaselinePolicyEvaluator(PolicyEvaluator): np.array([yesterday_imbalance_prices]), device="cpu" ) - yesterday_charge_thresholds, yesterday_discharge_thresholds = ( - self.baseline_policy.get_optimal_thresholds( - yesterday_imbalance_prices, - charge_thresholds, - discharge_thresholds, - penalty, - ) + yesterday_charge_thresholds, yesterday_discharge_thresholds = ( + self.baseline_policy.get_optimal_thresholds( + yesterday_imbalance_prices, + charge_thresholds, + discharge_thresholds, + penalty, + battery_state_of_charge=current_state_of_charge, ) + ) - yesterday_profit, yesterday_charge_cycles = self.baseline_policy.simulate( + yesterday_profit, yesterday_charge_cycles, new_state_of_charge = ( + self.baseline_policy.simulate( torch.tensor([[real_imbalance_prices]]), torch.tensor([yesterday_charge_thresholds.mean(axis=0)]), torch.tensor([yesterday_discharge_thresholds.mean(axis=0)]), + battery_state_of_charge=torch.tensor([current_state_of_charge]), ) + ) - self.profits.append( - [ - date, - penalty, - yesterday_profit[0][0].item(), - yesterday_charge_cycles[0][0].item(), - yesterday_charge_thresholds.mean(axis=0).item(), - yesterday_discharge_thresholds.mean(axis=0).item(), - ] - ) + return ( + yesterday_profit[0][0].item(), + yesterday_charge_cycles[0][0].item(), + new_state_of_charge.squeeze(0).item(), + ) - def evaluate_test_set(self, data_processor): + def evaluate_test_set_for_penalty(self, data_processor, penalty: int = 0): if data_processor: filtered_dates = [] @@ -67,22 +67,65 @@ class YesterdayBaselinePolicyEvaluator(PolicyEvaluator): pass self.dates = filtered_dates - self.profits = [] + profit = 0 + charge_cycles = 0 + state_of_charge = 0.0 + for date in tqdm(self.dates): try: - self.evaluate_for_date(date) + new_profit, new_charge_cycles, new_state_of_charge = ( + self.evaluate_for_date( + date, penalty=penalty, current_state_of_charge=state_of_charge + ) + ) + profit += new_profit + charge_cycles += new_charge_cycles + state_of_charge = new_state_of_charge except Exception as e: print(e) pass - self.profits = pd.DataFrame( - self.profits, - columns=[ - "Date", - "Penalty", - "Profit", - "Charge Cycles", - "Charge Threshold", - "Discharge Threshold", - ], + return profit, charge_cycles + + def optimize_penalty_for_target_charge_cycles( + self, + data_processor, + initial_penalty, + target_charge_cycles, + learning_rate=2, + max_iterations=10, + tolerance=10, + ): + self.cache = {} + penalty = initial_penalty + for iteration in range(max_iterations): + # Calculate profit and charge cycles for the current penalty + simulated_profit, simulated_charge_cycles = ( + self.evaluate_test_set_for_penalty(data_processor, penalty) + ) + + print( + f"Penalty: {penalty}, Charge Cycles: {simulated_charge_cycles}, Profit: {simulated_profit}" + ) + + # Calculate the gradient (difference) between the simulated and target charge cycles + gradient = simulated_charge_cycles - target_charge_cycles + + # Update the penalty parameter in the direction of the gradient + penalty += learning_rate * gradient + + # Check if the charge cycles are close enough to the target + if abs(gradient) < tolerance: + print(f"Optimal penalty found after {iteration+1} iterations") + break + else: + print( + f"Reached max iterations ({max_iterations}) without converging to the target charge cycles" + ) + + # Re-calculate profit and charge cycles for the final penalty to return accurate results + profit, charge_cycles = self.evaluate_test_set_for_penalty( + data_processor, penalty ) + + return penalty, profit, charge_cycles diff --git a/src/policies/baselines/global_threshold_baseline.py b/src/policies/baselines/global_threshold_baseline.py index bda7cff..fb7ea79 100644 --- a/src/policies/baselines/global_threshold_baseline.py +++ b/src/policies/baselines/global_threshold_baseline.py @@ -32,14 +32,36 @@ battery = Battery(2, 1) baseline_policy = BaselinePolicy(battery, data_path="") policy_evaluator = BaselinePolicyEvaluator(baseline_policy, task) -total_profit, total_charge_cycles = ( - policy_evaluator.optimize_penalty_for_target_charge_cycles( - initial_penalty=100, - target_charge_cycles=283, - learning_rate=0.2, - max_iterations=150, - tolerance=1, - ) +charge_discharge_threshold, total_profit, total_charge_cycles = ( + policy_evaluator.determine_best_thresholds() ) -print(f"Total Profit: {total_profit}, Total Charge Cycles: {total_charge_cycles}") + +print(f"Training set results:") +print(f"Best Charge Discharge Threshold: {charge_discharge_threshold}") +print(f"Total Profit: {total_profit}") +print(f"Total Charge Cycles: {total_charge_cycles}") + +profit, charge_cycles = policy_evaluator.evaluate_test_set( + charge_discharge_threshold[0], + charge_discharge_threshold[1], + data_processor=data_processor, +) +print() +print("Test Set Results") +print(f"Profit: {profit}, Charge Cycles: {charge_cycles}") + +# Thresholds determined on test set +charge_discharge_threshold, total_profit, total_charge_cycles = ( + policy_evaluator.determine_best_thresholds_test_set(data_processor) +) + +task.get_logger().report_single_value(name="Optimal Profit", value=profit) +task.get_logger().report_single_value(name="Optimal Charge Cycles", value=charge_cycles) +task.get_logger().report_single_value( + name="Optimal Charge Threshold", value=charge_discharge_threshold[0] +) +task.get_logger().report_single_value( + name="Optimal Discharge Threshold", value=charge_discharge_threshold[1] +) + task.close() diff --git a/src/policies/baselines/yesterday_baseline.py b/src/policies/baselines/yesterday_baseline.py index 8456c5a..02bf406 100644 --- a/src/policies/baselines/yesterday_baseline.py +++ b/src/policies/baselines/yesterday_baseline.py @@ -35,7 +35,23 @@ battery = Battery(2, 1) baseline_policy = BaselinePolicy(battery, data_path="") policy_evaluator = YesterdayBaselinePolicyEvaluator(baseline_policy, task) -policy_evaluator.evaluate_test_set(data_processor=data_processor) -policy_evaluator.plot_profits_table() +penalty, profit, charge_cycles = ( + policy_evaluator.optimize_penalty_for_target_charge_cycles( + data_processor=data_processor, + initial_penalty=0, + target_charge_cycles=283, + learning_rate=2, + max_iterations=100, + tolerance=1, + ) +) +# policy_evaluator.plot_profits_table() +print() +print("Test Set Results") +print(f"Penalty: {penalty}, Profit: {profit}, Charge Cycles: {charge_cycles}") + +task.get_logger().report_single_value(name="Optimal Penalty", value=penalty) +task.get_logger().report_single_value(name="Optimal Profit", value=profit) +task.get_logger().report_single_value(name="Optimal Charge Cycles", value=charge_cycles) task.close() diff --git a/src/policies/simple_baseline.py b/src/policies/simple_baseline.py index fc03108..c86038e 100644 --- a/src/policies/simple_baseline.py +++ b/src/policies/simple_baseline.py @@ -6,6 +6,7 @@ import torch imbalance_prices = "data/imbalance_prices.csv" + class Battery: def __init__(self, capacity: float, power: float): """ @@ -26,11 +27,11 @@ class Battery: return 0 self.current_charge -= self.power / 4 - self.charge_cycles += 1/16 + self.charge_cycles += 1 / 16 if self.current_charge <= 0: self.current_charge = 0 - + return self.power / 4 def charge(self): @@ -41,13 +42,13 @@ class Battery: return 0 self.current_charge += self.power / 4 - self.charge_cycles += 1/16 - + self.charge_cycles += 1 / 16 + if self.current_charge >= self.capacity: self.current_charge = self.capacity return self.power / 4 - + def reset(self): """ Reset the battery to its initial state @@ -57,32 +58,47 @@ class Battery: self.discharging = False self.charge_cycles = 0 -class BaselinePolicy(): + +class BaselinePolicy: def __init__(self, battery: Battery, data_path: str = ""): self.data_path = data_path self.battery = battery - self.train_data = self.load_imbalance_prices(train=True) + self.train_data = self.load_imbalance_prices(train=True) self.test_data = self.load_imbalance_prices(train=False) # print first datetime of train and test data - print(f"Training range: {self.train_data.iloc[0]['DateTime'].strftime('%d-%m-%Y')} - {self.train_data.iloc[-1]['DateTime'].strftime('%d-%m-%Y')}") - print(f"Test range: {self.test_data.iloc[0]['DateTime'].strftime('%d-%m-%Y')} - {self.test_data.iloc[-1]['DateTime'].strftime('%d-%m-%Y')}") + print( + f"Training range: {self.train_data.iloc[0]['DateTime'].strftime('%d-%m-%Y')} - {self.train_data.iloc[-1]['DateTime'].strftime('%d-%m-%Y')}" + ) + print( + f"Test range: {self.test_data.iloc[0]['DateTime'].strftime('%d-%m-%Y')} - {self.test_data.iloc[-1]['DateTime'].strftime('%d-%m-%Y')}" + ) def load_imbalance_prices(self, train: bool = True): - imbalance_prices = pd.read_csv(self.data_path + 'data/imbalance_prices.csv', parse_dates=True, sep=";") - imbalance_prices = imbalance_prices[['DateTime', 'Positive imbalance price']] - imbalance_prices['DateTime'] = pd.to_datetime(imbalance_prices['DateTime'], utc=True) + imbalance_prices = pd.read_csv( + self.data_path + "data/imbalance_prices.csv", parse_dates=True, sep=";" + ) + imbalance_prices = imbalance_prices[["DateTime", "Positive imbalance price"]] + imbalance_prices["DateTime"] = pd.to_datetime( + imbalance_prices["DateTime"], utc=True + ) if train: - imbalance_prices = imbalance_prices.loc[imbalance_prices['DateTime'].dt.year < 2023] - imbalance_prices = imbalance_prices.loc[imbalance_prices['DateTime'].dt.year >= 2020] + imbalance_prices = imbalance_prices.loc[ + imbalance_prices["DateTime"].dt.year < 2023 + ] + imbalance_prices = imbalance_prices.loc[ + imbalance_prices["DateTime"].dt.year >= 2020 + ] else: - imbalance_prices = imbalance_prices.loc[imbalance_prices['DateTime'].dt.year == 2023] - imbalance_prices = imbalance_prices.sort_values(by=['DateTime'], ascending=True) + imbalance_prices = imbalance_prices.loc[ + imbalance_prices["DateTime"].dt.year == 2023 + ] + imbalance_prices = imbalance_prices.sort_values(by=["DateTime"], ascending=True) return imbalance_prices - + def get_train_score(self, charge_threshold, discharge_threshold): return self.get_score(self.train_data, charge_threshold, discharge_threshold) - + def get_test_score(self, charge_threshold, discharge_threshold): return self.get_score(self.test_data, charge_threshold, discharge_threshold) @@ -96,38 +112,145 @@ class BaselinePolicy(): mean_discharging_price = 0 number_of_charges = 0 number_of_discharges = 0 - + for index, row in df.iterrows(): - if row['Positive imbalance price'] < charge_threshold: - total_charging_cost += self.battery.charge() * row['Positive imbalance price'] - mean_charging_price += row['Positive imbalance price'] + if row["Positive imbalance price"] < charge_threshold: + total_charging_cost += ( + self.battery.charge() * row["Positive imbalance price"] + ) + mean_charging_price += row["Positive imbalance price"] number_of_charges += 1 - elif row['Positive imbalance price'] > discharge_threshold: - total_discharging_profit += self.battery.discharge() * row['Positive imbalance price'] - mean_discharging_price += row['Positive imbalance price'] + elif row["Positive imbalance price"] > discharge_threshold: + total_discharging_profit += ( + self.battery.discharge() * row["Positive imbalance price"] + ) + mean_discharging_price += row["Positive imbalance price"] number_of_discharges += 1 - return total_charging_cost, total_discharging_profit, self.battery.charge_cycles, mean_charging_price / number_of_charges, mean_discharging_price / number_of_discharges + return ( + total_charging_cost, + total_discharging_profit, + self.battery.charge_cycles, + mean_charging_price / number_of_charges, + mean_discharging_price / number_of_discharges, + ) def threshold_scores(self, charge_thresholds, discharge_thresholds): - df = pd.DataFrame(columns=["Charge threshold", "Discharge threshold", "Charging Cost", "Discharging Profit", "Total Profit", "Charge cycles", "Mean charging price", "Mean discharging price"]) - df_test = pd.DataFrame(columns=["Charge threshold", "Discharge threshold", "Charging Cost", "Discharging Profit", "Total Profit", "Charge cycles", "Mean charging price", "Mean discharging price"]) + df = pd.DataFrame( + columns=[ + "Charge threshold", + "Discharge threshold", + "Charging Cost", + "Discharging Profit", + "Total Profit", + "Charge cycles", + "Mean charging price", + "Mean discharging price", + ] + ) + df_test = pd.DataFrame( + columns=[ + "Charge threshold", + "Discharge threshold", + "Charging Cost", + "Discharging Profit", + "Total Profit", + "Charge cycles", + "Mean charging price", + "Mean discharging price", + ] + ) threshold_pairs = itertools.product(charge_thresholds, discharge_thresholds) threshold_pairs = filter(lambda x: x[0] < x[1], threshold_pairs) for charge_threshold, discharge_threshold in tqdm(threshold_pairs): - total_charging_cost, total_discharge_profit, charge_cycles, mean_charging_price, mean_discharging_price = self.get_train_score(charge_threshold, discharge_threshold) - df = pd.concat([df, pd.DataFrame([[charge_threshold, discharge_threshold, total_charging_cost, total_discharge_profit, total_discharge_profit - total_charging_cost, charge_cycles, mean_charging_price, mean_discharging_price]], columns=["Charge threshold", "Discharge threshold", "Charging Cost", "Discharging Profit", "Total Profit", "Charge cycles", "Mean charging price", "Mean discharging price"])]) + ( + total_charging_cost, + total_discharge_profit, + charge_cycles, + mean_charging_price, + mean_discharging_price, + ) = self.get_train_score(charge_threshold, discharge_threshold) + df = pd.concat( + [ + df, + pd.DataFrame( + [ + [ + charge_threshold, + discharge_threshold, + total_charging_cost, + total_discharge_profit, + total_discharge_profit - total_charging_cost, + charge_cycles, + mean_charging_price, + mean_discharging_price, + ] + ], + columns=[ + "Charge threshold", + "Discharge threshold", + "Charging Cost", + "Discharging Profit", + "Total Profit", + "Charge cycles", + "Mean charging price", + "Mean discharging price", + ], + ), + ] + ) - total_charging_cost, total_discharge_profit, charge_cycles, mean_charging_price, mean_discharging_price = self.get_test_score(charge_threshold, discharge_threshold) - df_test = pd.concat([df_test, pd.DataFrame([[charge_threshold, discharge_threshold, total_charging_cost, total_discharge_profit, total_discharge_profit - total_charging_cost, charge_cycles, mean_charging_price, mean_discharging_price]], columns=["Charge threshold", "Discharge threshold", "Charging Cost", "Discharging Profit", "Total Profit", "Charge cycles", "Mean charging price", "Mean discharging price"])]) + ( + total_charging_cost, + total_discharge_profit, + charge_cycles, + mean_charging_price, + mean_discharging_price, + ) = self.get_test_score(charge_threshold, discharge_threshold) + df_test = pd.concat( + [ + df_test, + pd.DataFrame( + [ + [ + charge_threshold, + discharge_threshold, + total_charging_cost, + total_discharge_profit, + total_discharge_profit - total_charging_cost, + charge_cycles, + mean_charging_price, + mean_discharging_price, + ] + ], + columns=[ + "Charge threshold", + "Discharge threshold", + "Charging Cost", + "Discharging Profit", + "Total Profit", + "Charge cycles", + "Mean charging price", + "Mean discharging price", + ], + ), + ] + ) - df = df.sort_values(by=['Total Profit'], ascending=False) + df = df.sort_values(by=["Total Profit"], ascending=False) return df, df_test - - def get_optimal_thresholds(self, imbalance_prices, charge_thresholds, discharge_thresholds, charge_cycles_penalty: float = 0): + + def get_optimal_thresholds( + self, + imbalance_prices, + charge_thresholds, + discharge_thresholds, + charge_cycles_penalty: float = 0, + battery_state_of_charge: float = 0, + ): threshold_pairs = itertools.product(charge_thresholds, discharge_thresholds) threshold_pairs = filter(lambda x: x[0] < x[1], threshold_pairs) @@ -143,9 +266,19 @@ class BaselinePolicy(): next_day_charge_thresholds, next_day_discharge_thresholds = [], [] # imbalance_prices: (1000, 96) -> (1000, threshold_pairs, 96) - imbalance_prices = imbalance_prices.unsqueeze(1).expand(-1, len(threshold_pairs), -1) + imbalance_prices = imbalance_prices.unsqueeze(1).expand( + -1, len(threshold_pairs), -1 + ) - profits, charge_cycles = self.simulate(imbalance_prices, charge_thresholds, discharge_thresholds, charge_cycles_penalty=charge_cycles_penalty) + profits, charge_cycles, state_of_charge = self.simulate( + imbalance_prices, + charge_thresholds, + discharge_thresholds, + charge_cycles_penalty=charge_cycles_penalty, + battery_state_of_charge=torch.tensor( + [battery_state_of_charge] * len(charge_thresholds) + ), + ) # get the index of the best threshold pair for each day (1000, 96) -> (1000) best_threshold_indices = torch.argmax(profits, dim=1) @@ -159,16 +292,27 @@ class BaselinePolicy(): return next_day_charge_thresholds, next_day_discharge_thresholds - def simulate(self, price_matrix, charge_thresholds: torch.tensor, discharge_thresholds: torch.tensor, charge_cycles_penalty: float = 0): + def simulate( + self, + price_matrix, + charge_thresholds: torch.tensor, + discharge_thresholds: torch.tensor, + charge_cycles_penalty: float = 0, + battery_state_of_charge: float = 0, + ): # make sure all on the same device charge_thresholds = charge_thresholds.to(price_matrix.device) discharge_thresholds = discharge_thresholds.to(price_matrix.device) - + batch_size, num_thresholds, num_time_steps = price_matrix.shape # Reshape thresholds for broadcasting - charge_thresholds = charge_thresholds.view(1, num_thresholds, 1).expand(batch_size, -1, num_time_steps) - discharge_thresholds = discharge_thresholds.view(1, num_thresholds, 1).expand(batch_size, -1, num_time_steps) + charge_thresholds = charge_thresholds.view(1, num_thresholds, 1).expand( + batch_size, -1, num_time_steps + ) + discharge_thresholds = discharge_thresholds.view(1, num_thresholds, 1).expand( + batch_size, -1, num_time_steps + ) charge_matrix = torch.zeros_like(price_matrix) @@ -176,6 +320,11 @@ class BaselinePolicy(): charge_matrix[price_matrix > discharge_thresholds] = -1 battery_states = torch.zeros(batch_size, num_thresholds) + + battery_states = battery_state_of_charge.view(1, num_thresholds).expand( + batch_size, -1 + ) + profits = torch.zeros_like(battery_states) charge_cycles = torch.zeros_like(battery_states) @@ -185,23 +334,36 @@ class BaselinePolicy(): for i in range(num_time_steps): discharge_mask = ~((charge_matrix[:, :, i] == -1) & (battery_states == 0)) - charge_mask = ~((charge_matrix[:, :, i] == 1) & (battery_states == self.battery.capacity)) + charge_mask = ~( + (charge_matrix[:, :, i] == 1) + & (battery_states == self.battery.capacity) + ) mask = discharge_mask & charge_mask - battery_states[mask] += charge_matrix[:, :, i][mask] * self.battery.power / 4 - profits[mask] += -charge_matrix[:, :, i][mask] * price_matrix[:, :, i][mask] * self.battery.power / 4 - charge_cycles[mask] += torch.abs(charge_matrix[:, :, i][mask]) * (self.battery.power / 4) / self.battery.capacity / 2 + battery_states[mask] += ( + charge_matrix[:, :, i][mask] * self.battery.power / 4 + ) + profits[mask] += ( + -charge_matrix[:, :, i][mask] + * price_matrix[:, :, i][mask] + * self.battery.power + / 4 + ) + charge_cycles[mask] += ( + torch.abs(charge_matrix[:, :, i][mask]) + * (self.battery.power / 4) + / self.battery.capacity + / 2 + ) # penalize for excess charge cycles - excess_charge_cycles = (charge_cycles - 400/365).clamp(min=0) - profits -= excess_charge_cycles * charge_cycles_penalty + excess_charge_cycles = (charge_cycles - 400 / 365).clamp(min=0) + profits -= excess_charge_cycles * charge_cycles_penalty + + return profits, charge_cycles, battery_states - return profits, charge_cycles - - - # battery = Battery(2, 1) # policy = BaselinePolicy(battery) @@ -214,4 +376,4 @@ class BaselinePolicy(): # print(df_test.to_markdown()) -# # print(policy.get_test_score(150, 100)) \ No newline at end of file +# # print(policy.get_test_score(150, 100)) diff --git a/src/trainers/quantile_trainer.py b/src/trainers/quantile_trainer.py index a6dff73..378dbbd 100644 --- a/src/trainers/quantile_trainer.py +++ b/src/trainers/quantile_trainer.py @@ -557,6 +557,9 @@ class NonAutoRegressiveQuantileRegression(Trainer): inputs, targets = inputs.to(self.device), targets.to(self.device) outputs = self.model(inputs) + + outputs = outputs.reshape(-1, len(self.quantiles)) + outputted_samples = [ sample_from_dist(self.quantiles, output.cpu()) for output in outputs ]