diff --git a/Dockerfile b/Dockerfile index 482d478..9c55510 100644 --- a/Dockerfile +++ b/Dockerfile @@ -2,6 +2,7 @@ FROM pytorch/pytorch:2.0.1-cuda11.7-cudnn8-runtime RUN apt-get update RUN apt-get install -y git +RUN apt-get install texlive-latex-base texlive-fonts-recommended texlive-fonts-extra texlive-bibtex-extra COPY requirements.txt /tmp/requirements.txt diff --git a/Result-Reports/Policies.md b/Result-Reports/Policies.md index aeb7177..b2f03eb 100644 --- a/Result-Reports/Policies.md +++ b/Result-Reports/Policies.md @@ -159,7 +159,7 @@ Test data: 01-01-2023 until 08-10–2023 TODO: -- [ ] diffusion model oefening generative models vragen +- [x] diffusion model oefening generative models vragen -> geen lab hierop - [ ] Non autoregressive models policy testen (Non Linear eerst) -> als dit al slect, niet verder kijken, wel vermelden - [ ] Policy in test set -> over charge cycles (stop trading electricity) diff --git a/src/policies/PolicyEvaluator.py b/src/policies/PolicyEvaluator.py new file mode 100644 index 0000000..1369dc4 --- /dev/null +++ b/src/policies/PolicyEvaluator.py @@ -0,0 +1,152 @@ +from clearml import Task +from tqdm import tqdm +from src.policies.simple_baseline import BaselinePolicy +import pandas as pd +import numpy as np +import torch +import plotly.express as px + +from src.utils.imbalance_price_calculator import ImbalancePriceCalculator + + +class PolicyEvaluator: + def __init__(self, baseline_policy: BaselinePolicy, task: Task = None): + self.baseline_policy = baseline_policy + + self.ipc = ImbalancePriceCalculator(data_path="") + self.dates = baseline_policy.test_data["DateTime"].dt.date.unique() + self.dates = pd.to_datetime(self.dates) + + ### Load Imbalance Prices ### + imbalance_prices = pd.read_csv('data/imbalance_prices.csv', sep=';') + imbalance_prices["DateTime"] = pd.to_datetime(imbalance_prices['DateTime'], utc=True) + self.imbalance_prices = imbalance_prices.sort_values(by=['DateTime']) + + self.penalties = [0, 100, 300, 500, 800, 1000, 1500] + self.profits = [] + + self.task = task + + def get_imbanlance_prices_for_date(self, date): + imbalance_prices_day = self.imbalance_prices[self.imbalance_prices["DateTime"].dt.date == date] + return imbalance_prices_day['Positive imbalance price'].values + + def evaluate_for_date(self, date, idx_samples, test_loader): + charge_thresholds = np.arange(-100, 250, 25) + discharge_thresholds = np.arange(-100, 250, 25) + + idx = test_loader.dataset.get_idx_for_date(date.date()) + (initial, samples) = idx_samples[idx] + + initial = initial.cpu().numpy()[0][-1] + samples = samples.cpu().numpy() + + initial = np.repeat(initial, samples.shape[0]) + combined = np.concatenate((initial.reshape(-1, 1), samples), axis=1) + + reconstructed_imbalance_prices = self.ipc.get_imbalance_prices_2023_for_date_vectorized(date, combined) + reconstructed_imbalance_prices = torch.tensor(reconstructed_imbalance_prices, device="cuda") + + real_imbalance_prices = self.get_imbanlance_prices_for_date(date.date()) + + for penalty in self.penalties: + found_charge_thresholds, found_discharge_thresholds = self.baseline_policy.get_optimal_thresholds( + reconstructed_imbalance_prices, charge_thresholds, discharge_thresholds, penalty + ) + + predicted_charge_threshold = found_charge_thresholds.mean(axis=0) + predicted_discharge_threshold = found_discharge_thresholds.mean(axis=0) + + ### Determine Profits and Charge Cycles ### + simulated_profit, simulated_charge_cycles = self.baseline_policy.simulate( + torch.tensor([[real_imbalance_prices]]), torch.tensor([predicted_charge_threshold]), torch.tensor([predicted_discharge_threshold]) + ) + self.profits.append([date, penalty, simulated_profit[0][0].item(), simulated_charge_cycles[0][0].item(), predicted_charge_threshold.item(), predicted_discharge_threshold.item()]) + + def evaluate_test_set(self, idx_samples, test_loader): + self.profits = [] + try: + for date in tqdm(self.dates): + self.evaluate_for_date(date, idx_samples, test_loader) + except KeyboardInterrupt: + print("Interrupted") + raise KeyboardInterrupt + + except Exception as e: + print(e) + pass + + self.profits = pd.DataFrame(self.profits, columns=["Date", "Penalty", "Profit", "Charge Cycles", "Charge Threshold", "Discharge Threshold"]) + + def plot_profits_table(self): + # Check if task or penalties are not set + if self.task is None or not hasattr(self, 'penalties') or not hasattr(self, 'profits'): + print("Task, penalties, or profits not defined.") + return + + if self.profits.empty: + print("Profits DataFrame is empty.") + return + + # Aggregate profits and charge cycles by penalty, calculating totals and per-year values + aggregated = self.profits.groupby("Penalty").agg( + Total_Profit=("Profit", "sum"), + Total_Charge_Cycles=("Charge Cycles", "sum"), + Num_Days=("Date", "nunique") + ) + aggregated["Profit_Per_Year"] = aggregated["Total_Profit"] / aggregated["Num_Days"] * 365 + aggregated["Charge_Cycles_Per_Year"] = aggregated["Total_Charge_Cycles"] / aggregated["Num_Days"] * 365 + + # Reset index to make 'Penalty' a column again and drop unnecessary columns + final_df = aggregated.reset_index().drop(columns=["Total_Profit", "Total_Charge_Cycles", "Num_Days"]) + + # Rename columns to match expected output + final_df.columns = ["Penalty", "Total Profit", "Total Charge Cycles"] + + # Log the final results table + self.task.get_logger().report_table( + "Policy Results", + "Policy Results", + iteration=0, + table_plot=final_df + ) + + def plot_thresholds_per_day(self): + if self.task is None: + return + + fig = px.line( + self.profits[self.profits["Penalty"] == 0], + x="Date", + y=["Charge Threshold", "Discharge Threshold"], + title="Charge and Discharge Thresholds per Day" + ) + + fig.update_layout( + width=1000, + height=600, + title_x=0.5, + ) + + self.task.get_logger().report_plotly( + "Thresholds per Day", + "Thresholds per Day", + iteration=0, + figure=fig + ) + + def get_profits_as_scalars(self): + aggregated = self.profits.groupby("Penalty").agg( + Total_Profit=("Profit", "sum"), + Total_Charge_Cycles=("Charge Cycles", "sum"), + Num_Days=("Date", "nunique") + ) + aggregated["Profit_Per_Year"] = aggregated["Total_Profit"] / aggregated["Num_Days"] * 365 + aggregated["Charge_Cycles_Per_Year"] = aggregated["Total_Charge_Cycles"] / aggregated["Num_Days"] * 365 + + # Reset index to make 'Penalty' a column again and drop unnecessary columns + final_df = aggregated.reset_index().drop(columns=["Total_Profit", "Total_Charge_Cycles", "Num_Days"]) + + # Rename columns to match expected output + final_df.columns = ["Penalty", "Total Profit", "Total Charge Cycles"] + return final_df \ No newline at end of file diff --git a/src/trainers/quantile_trainer.py b/src/trainers/quantile_trainer.py index d1c8c13..e0acef6 100644 --- a/src/trainers/quantile_trainer.py +++ b/src/trainers/quantile_trainer.py @@ -1,5 +1,6 @@ import torch from tqdm import tqdm +from src.policies.PolicyEvaluator import PolicyEvaluator from src.losses.crps_metric import crps_from_samples from src.trainers.trainer import Trainer from src.trainers.autoregressive_trainer import AutoRegressiveTrainer @@ -131,10 +132,13 @@ class AutoRegressiveQuantileTrainer(AutoRegressiveTrainer): data_processor: DataProcessor, quantiles: list, device: torch.device, + policy_evaluator: PolicyEvaluator = None, debug: bool = True, ): self.quantiles = quantiles + self.test_set_samples = {} + self.policy_evaluator = policy_evaluator criterion = PinballLoss(quantiles=quantiles) super().__init__( @@ -149,6 +153,7 @@ class AutoRegressiveQuantileTrainer(AutoRegressiveTrainer): def calculate_crps_from_samples(self, task, dataloader, epoch: int): crps_from_samples_metric = [] + generated_samples = {} with torch.no_grad(): total_samples = len(dataloader.dataset) - 96 @@ -160,9 +165,12 @@ class AutoRegressiveQuantileTrainer(AutoRegressiveTrainer): for idx in tqdm(idx_batch): computed_idx_batch = [idx] * 100 - _, _, samples, targets = self.auto_regressive( + initial, _, samples, targets = self.auto_regressive( dataloader.dataset, idx_batch=computed_idx_batch ) + + generated_samples[idx.item()] = (initial, self.data_processor.inverse_transform(samples)) + samples = samples.unsqueeze(0) targets = targets.squeeze(-1) targets = targets[0].unsqueeze(0) @@ -175,6 +183,20 @@ class AutoRegressiveQuantileTrainer(AutoRegressiveTrainer): title="CRPS_from_samples", series="test", value=np.mean(crps_from_samples_metric), iteration=epoch ) + # using the policy evaluator, evaluate the policy with the generated samples + if self.policy_evaluator is not None: + _, test_loader = self.data_processor.get_dataloaders( + predict_sequence_length=self.model.output_size) + self.policy_evaluator.evaluate_test_set(generated_samples, test_loader) + df = self.policy_evaluator.get_profits_as_scalars() + + # for each row, report the profits + for idx, row in df.iterrows(): + task.get_logger().report_scalar( + title="Profit", series=f"penalty_{row['Penalty']}", value=row["Total Profit"], iteration=epoch + ) + + def log_final_metrics(self, task, dataloader, train: bool = True): metrics = {metric.__class__.__name__: 0.0 for metric in self.metrics_to_track} transformed_metrics = { @@ -194,10 +216,14 @@ class AutoRegressiveQuantileTrainer(AutoRegressiveTrainer): if train == False: for idx in tqdm(idx_batch): - computed_idx_batch = [idx] * 100 - _, outputs, samples, targets = self.auto_regressive( + computed_idx_batch = [idx] * 250 + initial, outputs, samples, targets = self.auto_regressive( dataloader.dataset, idx_batch=computed_idx_batch ) + + # save the samples for the idx, these will be used for evaluating the policy + self.test_set_samples[idx.item()] = (initial, self.data_processor.inverse_transform(samples)) + samples = samples.unsqueeze(0) targets = targets.squeeze(-1) targets = targets[0].unsqueeze(0) diff --git a/src/trainers/trainer.py b/src/trainers/trainer.py index 41e5944..27ff21f 100644 --- a/src/trainers/trainer.py +++ b/src/trainers/trainer.py @@ -196,7 +196,7 @@ class Trainer: if task: self.finish_training(task=task) - task.close() + # task.close() except Exception: if task: task.close() diff --git a/src/training_scripts/autoregressive_quantiles.py b/src/training_scripts/autoregressive_quantiles.py index 4bf5e06..c323c12 100644 --- a/src/training_scripts/autoregressive_quantiles.py +++ b/src/training_scripts/autoregressive_quantiles.py @@ -1,3 +1,5 @@ +from src.policies.PolicyEvaluator import PolicyEvaluator +from src.policies.simple_baseline import BaselinePolicy, Battery from src.models.lstm_model import GRUModel from src.data import DataProcessor, DataConfig from src.trainers.quantile_trainer import AutoRegressiveQuantileTrainer @@ -68,12 +70,17 @@ model_parameters = task.connect(model_parameters, name="model_parameters") time_embedding = TimeEmbedding(data_processor.get_time_feature_size(), model_parameters["time_feature_embedding"]) # lstm_model = GRUModel(time_embedding.output_dim(inputDim), len(quantiles), hidden_size=model_parameters["hidden_size"], num_layers=model_parameters["num_layers"], dropout=model_parameters["dropout"]) -# non_linear_model = NonLinearRegression(time_embedding.output_dim(inputDim), len(quantiles), hiddenSize=model_parameters["hidden_size"], numLayers=model_parameters["num_layers"], dropout=model_parameters["dropout"]) -linear_model = LinearRegression(time_embedding.output_dim(inputDim), len(quantiles)) +non_linear_model = NonLinearRegression(time_embedding.output_dim(inputDim), len(quantiles), hiddenSize=model_parameters["hidden_size"], numLayers=model_parameters["num_layers"], dropout=model_parameters["dropout"]) +# linear_model = LinearRegression(time_embedding.output_dim(inputDim), len(quantiles)) -model = nn.Sequential(time_embedding, linear_model) +model = nn.Sequential(time_embedding, non_linear_model) optimizer = torch.optim.Adam(model.parameters(), lr=model_parameters["learning_rate"]) +### Policy Evaluator ### +battery = Battery(2, 1) +baseline_policy = BaselinePolicy(battery, data_path="") +policy_evaluator = PolicyEvaluator(baseline_policy, task) + #### Trainer #### trainer = AutoRegressiveQuantileTrainer( model, @@ -82,12 +89,24 @@ trainer = AutoRegressiveQuantileTrainer( data_processor, quantiles, "cuda", + policy_evaluator=policy_evaluator, debug=False, ) trainer.add_metrics_to_track( [PinballLoss(quantiles), MSELoss(), L1Loss(), CRPSLoss(quantiles)] ) -trainer.early_stopping(patience=30) +trainer.early_stopping(patience=10) trainer.plot_every(5) -trainer.train(task=task, epochs=epochs, remotely=True) +trainer.train(task=task, epochs=epochs, remotely=False) + +### Policy Evaluation ### +idx_samples = trainer.test_set_samples +_, test_loader = trainer.data_processor.get_dataloaders( + predict_sequence_length=trainer.model.output_size) + +policy_evaluator.evaluate_test_set(idx_samples, test_loader) +policy_evaluator.plot_profits_table() +policy_evaluator.plot_thresholds_per_day() + +task.close() \ No newline at end of file