diff --git a/data/bid_ladder.pkl b/data/bid_ladder.pkl new file mode 100644 index 0000000..faac51b Binary files /dev/null and b/data/bid_ladder.pkl differ diff --git a/src/policies/policy_executer.py b/src/policies/policy_executer.py index d36d258..001de94 100644 --- a/src/policies/policy_executer.py +++ b/src/policies/policy_executer.py @@ -1,17 +1,42 @@ import argparse from clearml import Task, Model +from src.policies.simple_baseline import BaselinePolicy, Battery from src.data import DataProcessor, DataConfig import torch +import numpy as np +import pandas as pd +import datetime +from tqdm import tqdm +from src.utils.imbalance_price_calculator import ImbalancePriceCalculator +import time + +### import functions ### +from src.trainers.quantile_trainer import auto_regressive as quantile_auto_regressive +from src.trainers.diffusion_trainer import sample_diffusion +from src.utils.clearml import ClearMLHelper # argparse to parse task id and model type parser = argparse.ArgumentParser() -parser.add_argument('--task_id', type=int, default=None) +parser.add_argument('--task_id', type=str, default=None) parser.add_argument('--model_type', type=str, default=None) args = parser.parse_args() assert args.task_id is not None, "Please specify task id" assert args.model_type is not None, "Please specify model type" +battery = Battery(2, 1) +baseline_policy = BaselinePolicy(battery, data_path="") + + +### Load Imbalance Prices ### +imbalance_prices = pd.read_csv('data/imbalance_prices.csv', sep=';') +imbalance_prices["DateTime"] = pd.to_datetime(imbalance_prices['DateTime'], utc=True) +imbalance_prices = imbalance_prices.sort_values(by=['DateTime']) + +def get_imbalance_prices(date): + imbalance_prices_day = imbalance_prices[imbalance_prices["DateTime"].dt.date == date] + return imbalance_prices_day['Positive imbalance price'].values + def load_model(task_id: str): """ Load model from task id @@ -31,7 +56,7 @@ def load_model(task_id: str): data_config.DAY_OF_WEEK = False ### Data Processor ### - data_processor = DataProcessor(data_config, path="../../", lstm=False) + data_processor = DataProcessor(data_config, path="", lstm=False) data_processor.set_batch_size(8192) data_processor.set_full_day_skip(True) @@ -50,4 +75,155 @@ def load_model(task_id: str): predict_sequence_length=96 ) - return configuration, model, test_loader \ No newline at end of file + return configuration, model, data_processor, test_loader + + +def quantile_auto_regressive_predicted_NRV(model, date, data_processor, test_loader): + idx = test_loader.dataset.get_idx_for_date(date.date()) + initial, _, samples, target = quantile_auto_regressive(test_loader.dataset, model, [idx]*500, 96) + samples = samples.cpu().numpy() + target = target.cpu().numpy() + + # inverse using data_processor + samples = data_processor.inverse_transform(samples) + target = data_processor.inverse_transform(target) + + return initial.cpu().numpy()[0][-1], samples, target + +def diffusion_predicted_NRV(model, date, _, test_loader): + device = next(model.parameters()).device + + idx = test_loader.dataset.get_idx_for_date(date.date()) + prev_features, targets = test_loader.dataset.get_batch([idx]) + + if len(list(prev_features.shape)) == 2: + initial_sequence = prev_features[:, :96] + else: + initial_sequence = prev_features[:, :, 0] + + prev_features = prev_features.to(device) + targets = targets.to(device) + + samples = sample_diffusion(model, 1000, prev_features) + + return initial_sequence.cpu().numpy()[0][-1], samples.cpu().numpy(), targets.cpu().numpy() + +def get_next_day_profits_for_date(model, data_processor, test_loader, date, ipc, predict_NRV: callable, penalties: list): + charge_thresholds = np.arange(-100, 250, 25) + discharge_thresholds = np.arange(-100, 250, 25) + + predicted_nrv_profits_cycles = {i: [0, 0] for i in penalties} + baseline_profits_cycles = {i: [0, 0] for i in penalties} + + initial, nrvs, target = predict_NRV(model, date, data_processor, test_loader) + + initial = np.repeat(initial, nrvs.shape[0]) + combined = np.concatenate((initial.reshape(-1, 1), nrvs), axis=1) + + reconstructed_imbalance_prices = ipc.get_imbalance_prices_2023_for_date_vectorized(date, combined) + reconstructed_imbalance_prices = torch.tensor(reconstructed_imbalance_prices, device="cuda") + + yesterday_imbalance_prices = get_imbalance_prices(date.date() - datetime.timedelta(days=1)) + yesterday_imbalance_prices = torch.tensor(np.array([yesterday_imbalance_prices]), device="cpu") + + real_imbalance_prices = get_imbalance_prices(date.date()) + + for penalty in penalties: + found_charge_thresholds, found_discharge_thresholds = baseline_policy.get_optimal_thresholds(reconstructed_imbalance_prices, charge_thresholds, discharge_thresholds, penalty) + next_day_charge_threshold = found_charge_thresholds.mean(axis=0) + next_day_discharge_threshold = found_discharge_thresholds.mean(axis=0) + yesterday_charge_thresholds, yesterday_discharge_thresholds = baseline_policy.get_optimal_thresholds(yesterday_imbalance_prices, charge_thresholds, discharge_thresholds, penalty) + + + next_day_profit, next_day_charge_cycles = baseline_policy.simulate(torch.tensor([[real_imbalance_prices]]), torch.tensor([next_day_charge_threshold]), torch.tensor([next_day_discharge_threshold])) + yesterday_profit, yesterday_charge_cycles = baseline_policy.simulate(torch.tensor([[real_imbalance_prices]]), torch.tensor([yesterday_charge_thresholds.mean(axis=0)]), torch.tensor([yesterday_discharge_thresholds.mean(axis=0)])) + + predicted_nrv_profits_cycles[penalty][0] += next_day_profit.item() + predicted_nrv_profits_cycles[penalty][1] += next_day_charge_cycles.item() + + baseline_profits_cycles[penalty][0] += yesterday_profit.item() + baseline_profits_cycles[penalty][1] += yesterday_charge_cycles.item() + + return predicted_nrv_profits_cycles, baseline_profits_cycles + +def next_day_test_set(model, data_processor, test_loader, ipc, predict_NRV: callable): + penalties = [0, 10, 50, 150, 250, 350, 500] + predicted_nrv_profits_cycles = {i: [0, 0] for i in penalties} + baseline_profits_cycles = {i: [0, 0] for i in penalties} + + # get all dates in test set + dates = baseline_policy.test_data["DateTime"].dt.date.unique() + + # dates back to datetime + dates = pd.to_datetime(dates) + + for date in tqdm(dates): + try: + new_predicted_nrv_profits_cycles, new_baseline_profits_cycles = get_next_day_profits_for_date(model, data_processor, test_loader, date, ipc, predict_NRV, penalties) + + for penalty in penalties: + predicted_nrv_profits_cycles[penalty][0] += new_predicted_nrv_profits_cycles[penalty][0] + predicted_nrv_profits_cycles[penalty][1] += new_predicted_nrv_profits_cycles[penalty][1] + + baseline_profits_cycles[penalty][0] += new_baseline_profits_cycles[penalty][0] + baseline_profits_cycles[penalty][1] += new_baseline_profits_cycles[penalty][1] + + except Exception as e: + # raise e + # print(f"Error for date {date}") + continue + + return predicted_nrv_profits_cycles, baseline_profits_cycles + +def main(): + configuration, model, data_processor, test_loader = load_model(args.task_id) + + clearml_helper = ClearMLHelper(project_name="Thesis/NrvForecast") + task = clearml_helper.get_task(task_name="Policy Test") + + task.connect(args, name="Arguments") + task.execute_remotely(queue_name="default", exit_process=True) + + + if args.model_type == "quantile": + predict_NRV = quantile_auto_regressive_predicted_NRV + task.add_tags(["quantile"]) + elif args.model_type == "diffusion": + predict_NRV = diffusion_predicted_NRV + task.add_tags(["diffusion"]) + else: + raise ValueError("Please specify model type") + + ipc = ImbalancePriceCalculator(data_path="") + + predicted_nrv_profits_cycles, baseline_profits_cycles = next_day_test_set(model, data_processor, test_loader, ipc, predict_NRV) + + # create dataframe with columns "name", "penalty", "profit", "cycles" + df = pd.DataFrame(columns=["name", "penalty", "profit", "cycles"]) + + # use concat + for penalty in predicted_nrv_profits_cycles.keys(): + new_rows = pd.DataFrame({ + "name": [args.model_type, "baseline"], + "penalty": [penalty, penalty], + "profit": [predicted_nrv_profits_cycles[penalty][0], baseline_profits_cycles[penalty][0]], + "cycles": [predicted_nrv_profits_cycles[penalty][1], baseline_profits_cycles[penalty][1]] + }) + df = pd.concat([df, new_rows], ignore_index=True) + + # sort by name, penalty ascending + df = df.sort_values(by=["name", "penalty"]) + + task.get_logger().report_table( + "Policy Results", + "Policy Results", + iteration=0, + table_plot=df + ) + + # close task + task.close() + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/src/policies/simple_baseline.py b/src/policies/simple_baseline.py index d750138..fc03108 100644 --- a/src/policies/simple_baseline.py +++ b/src/policies/simple_baseline.py @@ -127,7 +127,7 @@ class BaselinePolicy(): return df, df_test - def get_optimal_thresholds(self, imbalance_prices, charge_thresholds, discharge_thresholds): + def get_optimal_thresholds(self, imbalance_prices, charge_thresholds, discharge_thresholds, charge_cycles_penalty: float = 0): threshold_pairs = itertools.product(charge_thresholds, discharge_thresholds) threshold_pairs = filter(lambda x: x[0] < x[1], threshold_pairs) @@ -136,12 +136,16 @@ class BaselinePolicy(): charge_thresholds = torch.tensor([x[0] for x in threshold_pairs]) discharge_thresholds = torch.tensor([x[1] for x in threshold_pairs]) + # set device to imbalance_prices device + charge_thresholds = charge_thresholds.to(imbalance_prices.device) + discharge_thresholds = discharge_thresholds.to(imbalance_prices.device) + next_day_charge_thresholds, next_day_discharge_thresholds = [], [] # imbalance_prices: (1000, 96) -> (1000, threshold_pairs, 96) - imbalance_prices = imbalance_prices.unsqueeze(1).repeat(1, charge_thresholds.shape[0], 1) + imbalance_prices = imbalance_prices.unsqueeze(1).expand(-1, len(threshold_pairs), -1) - profits, charge_cycles = self.simulate(imbalance_prices, charge_thresholds, discharge_thresholds) + profits, charge_cycles = self.simulate(imbalance_prices, charge_thresholds, discharge_thresholds, charge_cycles_penalty=charge_cycles_penalty) # get the index of the best threshold pair for each day (1000, 96) -> (1000) best_threshold_indices = torch.argmax(profits, dim=1) @@ -155,7 +159,11 @@ class BaselinePolicy(): return next_day_charge_thresholds, next_day_discharge_thresholds - def simulate(self, price_matrix, charge_thresholds: torch.tensor, discharge_thresholds: torch.tensor, charge_cycles_penalty: float = 250): + def simulate(self, price_matrix, charge_thresholds: torch.tensor, discharge_thresholds: torch.tensor, charge_cycles_penalty: float = 0): + # make sure all on the same device + charge_thresholds = charge_thresholds.to(price_matrix.device) + discharge_thresholds = discharge_thresholds.to(price_matrix.device) + batch_size, num_thresholds, num_time_steps = price_matrix.shape # Reshape thresholds for broadcasting @@ -171,6 +179,10 @@ class BaselinePolicy(): profits = torch.zeros_like(battery_states) charge_cycles = torch.zeros_like(battery_states) + battery_states = battery_states.to(price_matrix.device) + profits = profits.to(price_matrix.device) + charge_cycles = charge_cycles.to(price_matrix.device) + for i in range(num_time_steps): discharge_mask = ~((charge_matrix[:, :, i] == -1) & (battery_states == 0)) charge_mask = ~((charge_matrix[:, :, i] == 1) & (battery_states == self.battery.capacity)) diff --git a/src/trainers/autoregressive_trainer.py b/src/trainers/autoregressive_trainer.py index 4ceb459..d459bd7 100644 --- a/src/trainers/autoregressive_trainer.py +++ b/src/trainers/autoregressive_trainer.py @@ -10,7 +10,6 @@ from plotly.subplots import make_subplots from src.trainers.trainer import Trainer from tqdm import tqdm - class AutoRegressiveTrainer(Trainer): def __init__( self, diff --git a/src/trainers/diffusion_trainer.py b/src/trainers/diffusion_trainer.py index 37b6e1c..6f35ae3 100644 --- a/src/trainers/diffusion_trainer.py +++ b/src/trainers/diffusion_trainer.py @@ -12,6 +12,34 @@ import matplotlib.pyplot as plt import seaborn as sns import matplotlib.patches as mpatches + +def sample_diffusion(model: DiffusionModel, n: int, inputs: torch.tensor, noise_steps=1000, beta_start=1e-4, beta_end=0.02, ts_length=96): + device = next(model.parameters()).device + beta = torch.linspace(beta_start, beta_end, noise_steps).to(device) + alpha = 1. - beta + alpha_hat = torch.cumprod(alpha, dim=0) + + inputs = inputs.repeat(n, 1).to(device) + model.eval() + with torch.no_grad(): + x = torch.randn(inputs.shape[0], ts_length).to(device) + for i in reversed(range(1, noise_steps)): + t = (torch.ones(inputs.shape[0]) * i).long().to(device) + predicted_noise = model(x, t, inputs) + _alpha = alpha[t][:, None] + _alpha_hat = alpha_hat[t][:, None] + _beta = beta[t][:, None] + + if i > 1: + noise = torch.randn_like(x) + else: + noise = torch.zeros_like(x) + + x = 1/torch.sqrt(_alpha) * (x-((1-_alpha) / (torch.sqrt(1 - _alpha_hat))) * predicted_noise) + torch.sqrt(_beta) * noise + return x + + + class DiffusionTrainer: def __init__(self, model: nn.Module, data_processor: DataProcessor, device: torch.device): self.model = model @@ -50,23 +78,7 @@ class DiffusionTrainer: def sample(self, model: DiffusionModel, n: int, inputs: torch.tensor): - inputs = inputs.repeat(n, 1).to(self.device) - model.eval() - with torch.no_grad(): - x = torch.randn(inputs.shape[0], self.ts_length).to(self.device) - for i in reversed(range(1, self.noise_steps)): - t = (torch.ones(inputs.shape[0]) * i).long().to(self.device) - predicted_noise = model(x, t, inputs) - alpha = self.alpha[t][:, None] - alpha_hat = self.alpha_hat[t][:, None] - beta = self.beta[t][:, None] - - if i > 1: - noise = torch.randn_like(x) - else: - noise = torch.zeros_like(x) - - x = 1/torch.sqrt(alpha) * (x-((1-alpha) / (torch.sqrt(1 - alpha_hat))) * predicted_noise) + torch.sqrt(beta) * noise + x = sample_diffusion(model, n, inputs, self.noise_steps, self.beta_start, self.beta_end, self.ts_length) model.train() return x diff --git a/src/trainers/quantile_trainer.py b/src/trainers/quantile_trainer.py index a5810c7..4660c4f 100644 --- a/src/trainers/quantile_trainer.py +++ b/src/trainers/quantile_trainer.py @@ -57,6 +57,86 @@ def sample_from_dist(quantiles, output_values): # Return the mean of the samples return np.mean(new_samples, axis=1) +def auto_regressive(dataset, model, idx_batch, sequence_length: int = 96): + device = model.device + prev_features, targets = dataset.get_batch(idx_batch) + prev_features = prev_features.to(device) + targets = targets.to(device) + + if len(list(prev_features.shape)) == 2: + initial_sequence = prev_features[:, :96] + else: + initial_sequence = prev_features[:, :, 0] + + target_full = targets[:, 0].unsqueeze(1) # (batch_size, 1) + with torch.no_grad(): + new_predictions_full = model(prev_features) # (batch_size, quantiles) + samples = ( + torch.tensor(sample_from_dist( new_predictions_full)) + .unsqueeze(1) + .to(device) + ) # (batch_size, 1) + predictions_samples = samples + predictions_full = new_predictions_full.unsqueeze(1) + + for i in range(sequence_length - 1): + if len(list(prev_features.shape)) == 2: + new_features = torch.cat( + (prev_features[:, 1:96], samples), dim=1 + ) # (batch_size, 96) + + new_features = new_features.float() + + other_features, new_targets = dataset.get_batch_autoregressive( + np.array(idx_batch) + i + 1 + ) # (batch_size, new_features) + + if other_features is not None: + prev_features = torch.cat( + (new_features.to(device), other_features.to(device)), dim=1 + ) # (batch_size, 96 + new_features) + else: + prev_features = new_features + + else: + other_features, new_targets = dataset.get_batch_autoregressive( + np.array(idx_batch) + i + 1 + ) # (batch_size, 1, new_features) + + # change the other_features nrv based on the samples + other_features[:, 0, 0] = samples.squeeze(-1) + # make sure on same device + other_features = other_features.to(device) + prev_features = prev_features.to(device) + prev_features = torch.cat( + (prev_features[:, 1:, :], other_features), dim=1 + ) # (batch_size, 96, new_features) + + target_full = torch.cat( + (target_full, new_targets.to(device)), dim=1 + ) # (batch_size, sequence_length) + + with torch.no_grad(): + new_predictions_full = model( + prev_features + ) # (batch_size, quantiles) + predictions_full = torch.cat( + (predictions_full, new_predictions_full.unsqueeze(1)), dim=1 + ) # (batch_size, sequence_length, quantiles) + + samples = ( + torch.tensor(sample_from_dist(new_predictions_full)) + .unsqueeze(-1) + .to(device) + ) # (batch_size, 1) + predictions_samples = torch.cat((predictions_samples, samples), dim=1) + + return ( + initial_sequence, + predictions_full, + predictions_samples, + target_full.unsqueeze(-1), + ) class AutoRegressiveQuantileTrainer(AutoRegressiveTrainer): def __init__( @@ -273,84 +353,7 @@ class AutoRegressiveQuantileTrainer(AutoRegressiveTrainer): return fig def auto_regressive(self, dataset, idx_batch, sequence_length: int = 96): - prev_features, targets = dataset.get_batch(idx_batch) - prev_features = prev_features.to(self.device) - targets = targets.to(self.device) - - if len(list(prev_features.shape)) == 2: - initial_sequence = prev_features[:, :96] - else: - initial_sequence = prev_features[:, :, 0] - - target_full = targets[:, 0].unsqueeze(1) # (batch_size, 1) - with torch.no_grad(): - new_predictions_full = self.model(prev_features) # (batch_size, quantiles) - samples = ( - torch.tensor(sample_from_dist(self.quantiles, new_predictions_full)) - .unsqueeze(1) - .to(self.device) - ) # (batch_size, 1) - predictions_samples = samples - predictions_full = new_predictions_full.unsqueeze(1) - - for i in range(sequence_length - 1): - if len(list(prev_features.shape)) == 2: - new_features = torch.cat( - (prev_features[:, 1:96], samples), dim=1 - ) # (batch_size, 96) - - new_features = new_features.float() - - other_features, new_targets = dataset.get_batch_autoregressive( - np.array(idx_batch) + i + 1 - ) # (batch_size, new_features) - - if other_features is not None: - prev_features = torch.cat( - (new_features.to(self.device), other_features.to(self.device)), dim=1 - ) # (batch_size, 96 + new_features) - else: - prev_features = new_features - - else: - other_features, new_targets = dataset.get_batch_autoregressive( - np.array(idx_batch) + i + 1 - ) # (batch_size, 1, new_features) - - # change the other_features nrv based on the samples - other_features[:, 0, 0] = samples.squeeze(-1) - # make sure on same device - other_features = other_features.to(self.device) - prev_features = prev_features.to(self.device) - prev_features = torch.cat( - (prev_features[:, 1:, :], other_features), dim=1 - ) # (batch_size, 96, new_features) - - target_full = torch.cat( - (target_full, new_targets.to(self.device)), dim=1 - ) # (batch_size, sequence_length) - - with torch.no_grad(): - new_predictions_full = self.model( - prev_features - ) # (batch_size, quantiles) - predictions_full = torch.cat( - (predictions_full, new_predictions_full.unsqueeze(1)), dim=1 - ) # (batch_size, sequence_length, quantiles) - - samples = ( - torch.tensor(sample_from_dist(self.quantiles, new_predictions_full)) - .unsqueeze(-1) - .to(self.device) - ) # (batch_size, 1) - predictions_samples = torch.cat((predictions_samples, samples), dim=1) - - return ( - initial_sequence, - predictions_full, - predictions_samples, - target_full.unsqueeze(-1), - ) + return auto_regressive(dataset, self.model, idx_batch, sequence_length) def plot_quantile_percentages( self, task, data_loader, train: bool = True, iteration: int = None, full_day: bool = False diff --git a/src/utils/imbalance_price_calculator.py b/src/utils/imbalance_price_calculator.py index 8b77b54..874ff72 100644 --- a/src/utils/imbalance_price_calculator.py +++ b/src/utils/imbalance_price_calculator.py @@ -2,17 +2,27 @@ import plotly.graph_objects as go import numpy as np import pytz import pandas as pd +import os -incremental_bids = "../../data/incremental_bids.csv" -decremental_bids = "../../data/decremental_bids.csv" +incremental_bids = "data/incremental_bids.csv" +decremental_bids = "data/decremental_bids.csv" class ImbalancePriceCalculator: - def __init__(self, method: int = 1) -> None: + def __init__(self, method: int = 1, data_path: str = "../../") -> None: self.method = method - self.load_bids() + self.data_path = data_path + + # check if pickle of bid_ladder exists: data/bid_ladder.pkl + if not os.path.isfile(self.data_path + "data/bid_ladder.pkl"): + print("Bid ladder pickle not found, loading bids and generating bid ladder...") + self.load_bids() + self.bid_ladder.to_pickle(self.data_path + "data/bid_ladder.pkl") + else: + print("Bid ladder pickle found, loading bid ladder...") + self.bid_ladder = pd.read_pickle(self.data_path + "data/bid_ladder.pkl") def load_bids(self): - df = pd.read_csv(incremental_bids, sep=";") + df = pd.read_csv(self.data_path + incremental_bids, sep=";") df["Datetime"] = pd.to_datetime(df["Datetime"], utc=True) # sort by Datetime, Activation Order, Bid Price @@ -21,7 +31,7 @@ class ImbalancePriceCalculator: # next we need to calculate the cummulative bids for every datetime incremental_cum_df = df.groupby(["Datetime"]).apply(self.generate_bid_ladder) - df = pd.read_csv(decremental_bids, sep=";") + df = pd.read_csv(self.data_path + decremental_bids, sep=";") df["Datetime"] = pd.to_datetime(df["Datetime"], utc=True) decremental_cum_df = df.groupby(["Datetime"]).apply(self.generate_bid_ladder, incremental=False) @@ -129,7 +139,8 @@ class ImbalancePriceCalculator: MIPS = self.get_imbalance_price_vectorized(datetimes, np.abs(NRVS)) MDPS = self.get_imbalance_price_vectorized(datetimes, -np.abs(NRVS)) - return calculate_imbalance_price(-NRV_PREVS, -NRVS, MIPS, MDPS) + return calculate_imbalance_price(-NRV_PREVS, -NRVS, MIPS, MDPS) + def calculate_imbalance_price(SI_PREV, SI, MIP, MDP): # Convert parameters to numpy arrays for vectorized operations