Added policy executer file for remotely executing

This commit is contained in:
Victor Mylle
2024-01-15 21:19:33 +00:00
parent 67cc6d4bb9
commit 428e3d9e4b
7 changed files with 323 additions and 110 deletions

BIN
data/bid_ladder.pkl Normal file

Binary file not shown.

View File

@@ -1,17 +1,42 @@
import argparse
from clearml import Task, Model
from src.policies.simple_baseline import BaselinePolicy, Battery
from src.data import DataProcessor, DataConfig
import torch
import numpy as np
import pandas as pd
import datetime
from tqdm import tqdm
from src.utils.imbalance_price_calculator import ImbalancePriceCalculator
import time
### import functions ###
from src.trainers.quantile_trainer import auto_regressive as quantile_auto_regressive
from src.trainers.diffusion_trainer import sample_diffusion
from src.utils.clearml import ClearMLHelper
# argparse to parse task id and model type
parser = argparse.ArgumentParser()
parser.add_argument('--task_id', type=int, default=None)
parser.add_argument('--task_id', type=str, default=None)
parser.add_argument('--model_type', type=str, default=None)
args = parser.parse_args()
assert args.task_id is not None, "Please specify task id"
assert args.model_type is not None, "Please specify model type"
battery = Battery(2, 1)
baseline_policy = BaselinePolicy(battery, data_path="")
### Load Imbalance Prices ###
imbalance_prices = pd.read_csv('data/imbalance_prices.csv', sep=';')
imbalance_prices["DateTime"] = pd.to_datetime(imbalance_prices['DateTime'], utc=True)
imbalance_prices = imbalance_prices.sort_values(by=['DateTime'])
def get_imbalance_prices(date):
imbalance_prices_day = imbalance_prices[imbalance_prices["DateTime"].dt.date == date]
return imbalance_prices_day['Positive imbalance price'].values
def load_model(task_id: str):
"""
Load model from task id
@@ -31,7 +56,7 @@ def load_model(task_id: str):
data_config.DAY_OF_WEEK = False
### Data Processor ###
data_processor = DataProcessor(data_config, path="../../", lstm=False)
data_processor = DataProcessor(data_config, path="", lstm=False)
data_processor.set_batch_size(8192)
data_processor.set_full_day_skip(True)
@@ -50,4 +75,155 @@ def load_model(task_id: str):
predict_sequence_length=96
)
return configuration, model, test_loader
return configuration, model, data_processor, test_loader
def quantile_auto_regressive_predicted_NRV(model, date, data_processor, test_loader):
idx = test_loader.dataset.get_idx_for_date(date.date())
initial, _, samples, target = quantile_auto_regressive(test_loader.dataset, model, [idx]*500, 96)
samples = samples.cpu().numpy()
target = target.cpu().numpy()
# inverse using data_processor
samples = data_processor.inverse_transform(samples)
target = data_processor.inverse_transform(target)
return initial.cpu().numpy()[0][-1], samples, target
def diffusion_predicted_NRV(model, date, _, test_loader):
device = next(model.parameters()).device
idx = test_loader.dataset.get_idx_for_date(date.date())
prev_features, targets = test_loader.dataset.get_batch([idx])
if len(list(prev_features.shape)) == 2:
initial_sequence = prev_features[:, :96]
else:
initial_sequence = prev_features[:, :, 0]
prev_features = prev_features.to(device)
targets = targets.to(device)
samples = sample_diffusion(model, 1000, prev_features)
return initial_sequence.cpu().numpy()[0][-1], samples.cpu().numpy(), targets.cpu().numpy()
def get_next_day_profits_for_date(model, data_processor, test_loader, date, ipc, predict_NRV: callable, penalties: list):
charge_thresholds = np.arange(-100, 250, 25)
discharge_thresholds = np.arange(-100, 250, 25)
predicted_nrv_profits_cycles = {i: [0, 0] for i in penalties}
baseline_profits_cycles = {i: [0, 0] for i in penalties}
initial, nrvs, target = predict_NRV(model, date, data_processor, test_loader)
initial = np.repeat(initial, nrvs.shape[0])
combined = np.concatenate((initial.reshape(-1, 1), nrvs), axis=1)
reconstructed_imbalance_prices = ipc.get_imbalance_prices_2023_for_date_vectorized(date, combined)
reconstructed_imbalance_prices = torch.tensor(reconstructed_imbalance_prices, device="cuda")
yesterday_imbalance_prices = get_imbalance_prices(date.date() - datetime.timedelta(days=1))
yesterday_imbalance_prices = torch.tensor(np.array([yesterday_imbalance_prices]), device="cpu")
real_imbalance_prices = get_imbalance_prices(date.date())
for penalty in penalties:
found_charge_thresholds, found_discharge_thresholds = baseline_policy.get_optimal_thresholds(reconstructed_imbalance_prices, charge_thresholds, discharge_thresholds, penalty)
next_day_charge_threshold = found_charge_thresholds.mean(axis=0)
next_day_discharge_threshold = found_discharge_thresholds.mean(axis=0)
yesterday_charge_thresholds, yesterday_discharge_thresholds = baseline_policy.get_optimal_thresholds(yesterday_imbalance_prices, charge_thresholds, discharge_thresholds, penalty)
next_day_profit, next_day_charge_cycles = baseline_policy.simulate(torch.tensor([[real_imbalance_prices]]), torch.tensor([next_day_charge_threshold]), torch.tensor([next_day_discharge_threshold]))
yesterday_profit, yesterday_charge_cycles = baseline_policy.simulate(torch.tensor([[real_imbalance_prices]]), torch.tensor([yesterday_charge_thresholds.mean(axis=0)]), torch.tensor([yesterday_discharge_thresholds.mean(axis=0)]))
predicted_nrv_profits_cycles[penalty][0] += next_day_profit.item()
predicted_nrv_profits_cycles[penalty][1] += next_day_charge_cycles.item()
baseline_profits_cycles[penalty][0] += yesterday_profit.item()
baseline_profits_cycles[penalty][1] += yesterday_charge_cycles.item()
return predicted_nrv_profits_cycles, baseline_profits_cycles
def next_day_test_set(model, data_processor, test_loader, ipc, predict_NRV: callable):
penalties = [0, 10, 50, 150, 250, 350, 500]
predicted_nrv_profits_cycles = {i: [0, 0] for i in penalties}
baseline_profits_cycles = {i: [0, 0] for i in penalties}
# get all dates in test set
dates = baseline_policy.test_data["DateTime"].dt.date.unique()
# dates back to datetime
dates = pd.to_datetime(dates)
for date in tqdm(dates):
try:
new_predicted_nrv_profits_cycles, new_baseline_profits_cycles = get_next_day_profits_for_date(model, data_processor, test_loader, date, ipc, predict_NRV, penalties)
for penalty in penalties:
predicted_nrv_profits_cycles[penalty][0] += new_predicted_nrv_profits_cycles[penalty][0]
predicted_nrv_profits_cycles[penalty][1] += new_predicted_nrv_profits_cycles[penalty][1]
baseline_profits_cycles[penalty][0] += new_baseline_profits_cycles[penalty][0]
baseline_profits_cycles[penalty][1] += new_baseline_profits_cycles[penalty][1]
except Exception as e:
# raise e
# print(f"Error for date {date}")
continue
return predicted_nrv_profits_cycles, baseline_profits_cycles
def main():
configuration, model, data_processor, test_loader = load_model(args.task_id)
clearml_helper = ClearMLHelper(project_name="Thesis/NrvForecast")
task = clearml_helper.get_task(task_name="Policy Test")
task.connect(args, name="Arguments")
task.execute_remotely(queue_name="default", exit_process=True)
if args.model_type == "quantile":
predict_NRV = quantile_auto_regressive_predicted_NRV
task.add_tags(["quantile"])
elif args.model_type == "diffusion":
predict_NRV = diffusion_predicted_NRV
task.add_tags(["diffusion"])
else:
raise ValueError("Please specify model type")
ipc = ImbalancePriceCalculator(data_path="")
predicted_nrv_profits_cycles, baseline_profits_cycles = next_day_test_set(model, data_processor, test_loader, ipc, predict_NRV)
# create dataframe with columns "name", "penalty", "profit", "cycles"
df = pd.DataFrame(columns=["name", "penalty", "profit", "cycles"])
# use concat
for penalty in predicted_nrv_profits_cycles.keys():
new_rows = pd.DataFrame({
"name": [args.model_type, "baseline"],
"penalty": [penalty, penalty],
"profit": [predicted_nrv_profits_cycles[penalty][0], baseline_profits_cycles[penalty][0]],
"cycles": [predicted_nrv_profits_cycles[penalty][1], baseline_profits_cycles[penalty][1]]
})
df = pd.concat([df, new_rows], ignore_index=True)
# sort by name, penalty ascending
df = df.sort_values(by=["name", "penalty"])
task.get_logger().report_table(
"Policy Results",
"Policy Results",
iteration=0,
table_plot=df
)
# close task
task.close()
if __name__ == "__main__":
main()

View File

@@ -127,7 +127,7 @@ class BaselinePolicy():
return df, df_test
def get_optimal_thresholds(self, imbalance_prices, charge_thresholds, discharge_thresholds):
def get_optimal_thresholds(self, imbalance_prices, charge_thresholds, discharge_thresholds, charge_cycles_penalty: float = 0):
threshold_pairs = itertools.product(charge_thresholds, discharge_thresholds)
threshold_pairs = filter(lambda x: x[0] < x[1], threshold_pairs)
@@ -136,12 +136,16 @@ class BaselinePolicy():
charge_thresholds = torch.tensor([x[0] for x in threshold_pairs])
discharge_thresholds = torch.tensor([x[1] for x in threshold_pairs])
# set device to imbalance_prices device
charge_thresholds = charge_thresholds.to(imbalance_prices.device)
discharge_thresholds = discharge_thresholds.to(imbalance_prices.device)
next_day_charge_thresholds, next_day_discharge_thresholds = [], []
# imbalance_prices: (1000, 96) -> (1000, threshold_pairs, 96)
imbalance_prices = imbalance_prices.unsqueeze(1).repeat(1, charge_thresholds.shape[0], 1)
imbalance_prices = imbalance_prices.unsqueeze(1).expand(-1, len(threshold_pairs), -1)
profits, charge_cycles = self.simulate(imbalance_prices, charge_thresholds, discharge_thresholds)
profits, charge_cycles = self.simulate(imbalance_prices, charge_thresholds, discharge_thresholds, charge_cycles_penalty=charge_cycles_penalty)
# get the index of the best threshold pair for each day (1000, 96) -> (1000)
best_threshold_indices = torch.argmax(profits, dim=1)
@@ -155,7 +159,11 @@ class BaselinePolicy():
return next_day_charge_thresholds, next_day_discharge_thresholds
def simulate(self, price_matrix, charge_thresholds: torch.tensor, discharge_thresholds: torch.tensor, charge_cycles_penalty: float = 250):
def simulate(self, price_matrix, charge_thresholds: torch.tensor, discharge_thresholds: torch.tensor, charge_cycles_penalty: float = 0):
# make sure all on the same device
charge_thresholds = charge_thresholds.to(price_matrix.device)
discharge_thresholds = discharge_thresholds.to(price_matrix.device)
batch_size, num_thresholds, num_time_steps = price_matrix.shape
# Reshape thresholds for broadcasting
@@ -171,6 +179,10 @@ class BaselinePolicy():
profits = torch.zeros_like(battery_states)
charge_cycles = torch.zeros_like(battery_states)
battery_states = battery_states.to(price_matrix.device)
profits = profits.to(price_matrix.device)
charge_cycles = charge_cycles.to(price_matrix.device)
for i in range(num_time_steps):
discharge_mask = ~((charge_matrix[:, :, i] == -1) & (battery_states == 0))
charge_mask = ~((charge_matrix[:, :, i] == 1) & (battery_states == self.battery.capacity))

View File

@@ -10,7 +10,6 @@ from plotly.subplots import make_subplots
from src.trainers.trainer import Trainer
from tqdm import tqdm
class AutoRegressiveTrainer(Trainer):
def __init__(
self,

View File

@@ -12,6 +12,34 @@ import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib.patches as mpatches
def sample_diffusion(model: DiffusionModel, n: int, inputs: torch.tensor, noise_steps=1000, beta_start=1e-4, beta_end=0.02, ts_length=96):
device = next(model.parameters()).device
beta = torch.linspace(beta_start, beta_end, noise_steps).to(device)
alpha = 1. - beta
alpha_hat = torch.cumprod(alpha, dim=0)
inputs = inputs.repeat(n, 1).to(device)
model.eval()
with torch.no_grad():
x = torch.randn(inputs.shape[0], ts_length).to(device)
for i in reversed(range(1, noise_steps)):
t = (torch.ones(inputs.shape[0]) * i).long().to(device)
predicted_noise = model(x, t, inputs)
_alpha = alpha[t][:, None]
_alpha_hat = alpha_hat[t][:, None]
_beta = beta[t][:, None]
if i > 1:
noise = torch.randn_like(x)
else:
noise = torch.zeros_like(x)
x = 1/torch.sqrt(_alpha) * (x-((1-_alpha) / (torch.sqrt(1 - _alpha_hat))) * predicted_noise) + torch.sqrt(_beta) * noise
return x
class DiffusionTrainer:
def __init__(self, model: nn.Module, data_processor: DataProcessor, device: torch.device):
self.model = model
@@ -50,23 +78,7 @@ class DiffusionTrainer:
def sample(self, model: DiffusionModel, n: int, inputs: torch.tensor):
inputs = inputs.repeat(n, 1).to(self.device)
model.eval()
with torch.no_grad():
x = torch.randn(inputs.shape[0], self.ts_length).to(self.device)
for i in reversed(range(1, self.noise_steps)):
t = (torch.ones(inputs.shape[0]) * i).long().to(self.device)
predicted_noise = model(x, t, inputs)
alpha = self.alpha[t][:, None]
alpha_hat = self.alpha_hat[t][:, None]
beta = self.beta[t][:, None]
if i > 1:
noise = torch.randn_like(x)
else:
noise = torch.zeros_like(x)
x = 1/torch.sqrt(alpha) * (x-((1-alpha) / (torch.sqrt(1 - alpha_hat))) * predicted_noise) + torch.sqrt(beta) * noise
x = sample_diffusion(model, n, inputs, self.noise_steps, self.beta_start, self.beta_end, self.ts_length)
model.train()
return x

View File

@@ -57,6 +57,86 @@ def sample_from_dist(quantiles, output_values):
# Return the mean of the samples
return np.mean(new_samples, axis=1)
def auto_regressive(dataset, model, idx_batch, sequence_length: int = 96):
device = model.device
prev_features, targets = dataset.get_batch(idx_batch)
prev_features = prev_features.to(device)
targets = targets.to(device)
if len(list(prev_features.shape)) == 2:
initial_sequence = prev_features[:, :96]
else:
initial_sequence = prev_features[:, :, 0]
target_full = targets[:, 0].unsqueeze(1) # (batch_size, 1)
with torch.no_grad():
new_predictions_full = model(prev_features) # (batch_size, quantiles)
samples = (
torch.tensor(sample_from_dist( new_predictions_full))
.unsqueeze(1)
.to(device)
) # (batch_size, 1)
predictions_samples = samples
predictions_full = new_predictions_full.unsqueeze(1)
for i in range(sequence_length - 1):
if len(list(prev_features.shape)) == 2:
new_features = torch.cat(
(prev_features[:, 1:96], samples), dim=1
) # (batch_size, 96)
new_features = new_features.float()
other_features, new_targets = dataset.get_batch_autoregressive(
np.array(idx_batch) + i + 1
) # (batch_size, new_features)
if other_features is not None:
prev_features = torch.cat(
(new_features.to(device), other_features.to(device)), dim=1
) # (batch_size, 96 + new_features)
else:
prev_features = new_features
else:
other_features, new_targets = dataset.get_batch_autoregressive(
np.array(idx_batch) + i + 1
) # (batch_size, 1, new_features)
# change the other_features nrv based on the samples
other_features[:, 0, 0] = samples.squeeze(-1)
# make sure on same device
other_features = other_features.to(device)
prev_features = prev_features.to(device)
prev_features = torch.cat(
(prev_features[:, 1:, :], other_features), dim=1
) # (batch_size, 96, new_features)
target_full = torch.cat(
(target_full, new_targets.to(device)), dim=1
) # (batch_size, sequence_length)
with torch.no_grad():
new_predictions_full = model(
prev_features
) # (batch_size, quantiles)
predictions_full = torch.cat(
(predictions_full, new_predictions_full.unsqueeze(1)), dim=1
) # (batch_size, sequence_length, quantiles)
samples = (
torch.tensor(sample_from_dist(new_predictions_full))
.unsqueeze(-1)
.to(device)
) # (batch_size, 1)
predictions_samples = torch.cat((predictions_samples, samples), dim=1)
return (
initial_sequence,
predictions_full,
predictions_samples,
target_full.unsqueeze(-1),
)
class AutoRegressiveQuantileTrainer(AutoRegressiveTrainer):
def __init__(
@@ -273,84 +353,7 @@ class AutoRegressiveQuantileTrainer(AutoRegressiveTrainer):
return fig
def auto_regressive(self, dataset, idx_batch, sequence_length: int = 96):
prev_features, targets = dataset.get_batch(idx_batch)
prev_features = prev_features.to(self.device)
targets = targets.to(self.device)
if len(list(prev_features.shape)) == 2:
initial_sequence = prev_features[:, :96]
else:
initial_sequence = prev_features[:, :, 0]
target_full = targets[:, 0].unsqueeze(1) # (batch_size, 1)
with torch.no_grad():
new_predictions_full = self.model(prev_features) # (batch_size, quantiles)
samples = (
torch.tensor(sample_from_dist(self.quantiles, new_predictions_full))
.unsqueeze(1)
.to(self.device)
) # (batch_size, 1)
predictions_samples = samples
predictions_full = new_predictions_full.unsqueeze(1)
for i in range(sequence_length - 1):
if len(list(prev_features.shape)) == 2:
new_features = torch.cat(
(prev_features[:, 1:96], samples), dim=1
) # (batch_size, 96)
new_features = new_features.float()
other_features, new_targets = dataset.get_batch_autoregressive(
np.array(idx_batch) + i + 1
) # (batch_size, new_features)
if other_features is not None:
prev_features = torch.cat(
(new_features.to(self.device), other_features.to(self.device)), dim=1
) # (batch_size, 96 + new_features)
else:
prev_features = new_features
else:
other_features, new_targets = dataset.get_batch_autoregressive(
np.array(idx_batch) + i + 1
) # (batch_size, 1, new_features)
# change the other_features nrv based on the samples
other_features[:, 0, 0] = samples.squeeze(-1)
# make sure on same device
other_features = other_features.to(self.device)
prev_features = prev_features.to(self.device)
prev_features = torch.cat(
(prev_features[:, 1:, :], other_features), dim=1
) # (batch_size, 96, new_features)
target_full = torch.cat(
(target_full, new_targets.to(self.device)), dim=1
) # (batch_size, sequence_length)
with torch.no_grad():
new_predictions_full = self.model(
prev_features
) # (batch_size, quantiles)
predictions_full = torch.cat(
(predictions_full, new_predictions_full.unsqueeze(1)), dim=1
) # (batch_size, sequence_length, quantiles)
samples = (
torch.tensor(sample_from_dist(self.quantiles, new_predictions_full))
.unsqueeze(-1)
.to(self.device)
) # (batch_size, 1)
predictions_samples = torch.cat((predictions_samples, samples), dim=1)
return (
initial_sequence,
predictions_full,
predictions_samples,
target_full.unsqueeze(-1),
)
return auto_regressive(dataset, self.model, idx_batch, sequence_length)
def plot_quantile_percentages(
self, task, data_loader, train: bool = True, iteration: int = None, full_day: bool = False

View File

@@ -2,17 +2,27 @@ import plotly.graph_objects as go
import numpy as np
import pytz
import pandas as pd
import os
incremental_bids = "../../data/incremental_bids.csv"
decremental_bids = "../../data/decremental_bids.csv"
incremental_bids = "data/incremental_bids.csv"
decremental_bids = "data/decremental_bids.csv"
class ImbalancePriceCalculator:
def __init__(self, method: int = 1) -> None:
def __init__(self, method: int = 1, data_path: str = "../../") -> None:
self.method = method
self.load_bids()
self.data_path = data_path
# check if pickle of bid_ladder exists: data/bid_ladder.pkl
if not os.path.isfile(self.data_path + "data/bid_ladder.pkl"):
print("Bid ladder pickle not found, loading bids and generating bid ladder...")
self.load_bids()
self.bid_ladder.to_pickle(self.data_path + "data/bid_ladder.pkl")
else:
print("Bid ladder pickle found, loading bid ladder...")
self.bid_ladder = pd.read_pickle(self.data_path + "data/bid_ladder.pkl")
def load_bids(self):
df = pd.read_csv(incremental_bids, sep=";")
df = pd.read_csv(self.data_path + incremental_bids, sep=";")
df["Datetime"] = pd.to_datetime(df["Datetime"], utc=True)
# sort by Datetime, Activation Order, Bid Price
@@ -21,7 +31,7 @@ class ImbalancePriceCalculator:
# next we need to calculate the cummulative bids for every datetime
incremental_cum_df = df.groupby(["Datetime"]).apply(self.generate_bid_ladder)
df = pd.read_csv(decremental_bids, sep=";")
df = pd.read_csv(self.data_path + decremental_bids, sep=";")
df["Datetime"] = pd.to_datetime(df["Datetime"], utc=True)
decremental_cum_df = df.groupby(["Datetime"]).apply(self.generate_bid_ladder, incremental=False)
@@ -129,7 +139,8 @@ class ImbalancePriceCalculator:
MIPS = self.get_imbalance_price_vectorized(datetimes, np.abs(NRVS))
MDPS = self.get_imbalance_price_vectorized(datetimes, -np.abs(NRVS))
return calculate_imbalance_price(-NRV_PREVS, -NRVS, MIPS, MDPS)
return calculate_imbalance_price(-NRV_PREVS, -NRVS, MIPS, MDPS)
def calculate_imbalance_price(SI_PREV, SI, MIP, MDP):
# Convert parameters to numpy arrays for vectorized operations