diff --git a/Result-Reports/Policies.md b/Result-Reports/Policies.md index b2f03eb..2bf8139 100644 --- a/Result-Reports/Policies.md +++ b/Result-Reports/Policies.md @@ -160,19 +160,15 @@ Test data: 01-01-2023 until 08-10–2023 TODO: - [x] diffusion model oefening generative models vragen -> geen lab hierop -- [ ] Non autoregressive models policy testen (Non Linear eerst) -> als dit al slect, niet verder kijken, wel vermelden -- [ ] Policy in test set -> over charge cycles (stop trading electricity) +- [x] Non autoregressive models policy testen (Non Linear eerst) -> als dit al slect, niet verder kijken, wel vermelden (IN PROGRESS) +- [x] Policy in test set -> over charge cycles (stop trading electricity) -- [ ] penalty bepalen op training data +- [x] penalty bepalen op training data -- [ ] cycles en profit herschalen naar per jaar +- [x] cycles en profit herschalen naar per jaar -baseline -> NRV van gisteren gebruiken om thresholds te bepalen voor vandaag -andere policies -> NRV van vandaag voorspellen met model en thresholds bepalen voor vandaag - - -Eerste baseline -> thresholds bepalen op training data maar ook stoppen als 400 cycles (herschalen) per jaar bereikt zijn -> thresholds zouden anders moeten zijn (Ook met penalty parameter) +- [x] Eerste baseline -> thresholds bepalen op training data maar ook stoppen als 400 cycles (herschalen) per jaar bereikt zijn -> thresholds zouden anders moeten zijn (Ook met penalty parameter) -> deze toepassen op test set (ook stoppen als 400/jaar bereikt zijn) diff --git a/src/policies/PolicyEvaluator.py b/src/policies/PolicyEvaluator.py index 71ec211..57b77ee 100644 --- a/src/policies/PolicyEvaluator.py +++ b/src/policies/PolicyEvaluator.py @@ -239,15 +239,28 @@ class PolicyEvaluator: penalty_profits[penalty] += profit penalty_charge_cycles[penalty] += charge_cycles + # transform profits to per year: penalty_profits / penalty_charge_cycles * 400 cycles per year + transformed_profits_per_year = {} + for penalty in penalty_profits: + transformed_profits_per_year[penalty] = ( + penalty_profits[penalty] / penalty_charge_cycles[penalty] * 400 + ) + df = pd.DataFrame( list( zip( penalty_profits.keys(), penalty_profits.values(), penalty_charge_cycles.values(), + transformed_profits_per_year.values(), ) ), - columns=["Penalty", "Profit_till_400", "Cycles_till_400"], + columns=[ + "Penalty", + "Profit_till_400", + f"Cycles_till_400 (max {usable_charge_cycles})", + "Profit_per_year_till_400", + ], ) return df diff --git a/src/trainers/diffusion_trainer.py b/src/trainers/diffusion_trainer.py index 3532e1e..9b1dd5c 100644 --- a/src/trainers/diffusion_trainer.py +++ b/src/trainers/diffusion_trainer.py @@ -173,6 +173,9 @@ class DiffusionTrainer: criterion = nn.MSELoss() self.model.to(self.device) + early_stopping = 0 + best_crps = None + if task: self.init_clearml_task(task) @@ -204,7 +207,16 @@ class DiffusionTrainer: running_loss /= len(train_loader.dataset) if epoch % 40 == 0 and epoch != 0: - self.test(test_loader, epoch, task) + crps = self.test(test_loader, epoch, task) + + if best_crps is None or crps < best_crps: + best_crps = crps + early_stopping = 0 + else: + early_stopping += 1 + + if early_stopping > 5: + break if task: task.get_logger().report_scalar( @@ -222,6 +234,13 @@ class DiffusionTrainer: task, False, test_loader, test_sample_indices, epoch ) + # load the best model + self.model = torch.load("checkpoint.pt") + self.model.to(self.device) + + self.test(test_loader, None, task) + self.policy_evaluator.plot_profits_table() + if task: task.close() @@ -329,7 +348,6 @@ class DiffusionTrainer: generated_samples = {} for inputs, targets, idx_batch in data_loader: inputs, targets = inputs.to(self.device), targets.to(self.device) - print(inputs.shape, targets.shape) number_of_samples = 100 sample = self.sample(self.model, number_of_samples, inputs) @@ -388,6 +406,8 @@ class DiffusionTrainer: iteration=epoch, ) + return mean_crps + def save_checkpoint(self, val_loss, task, iteration: int): torch.save(self.model, "checkpoint.pt") task.update_output_model( diff --git a/src/trainers/quantile_trainer.py b/src/trainers/quantile_trainer.py index 8dd233f..dd59ef6 100644 --- a/src/trainers/quantile_trainer.py +++ b/src/trainers/quantile_trainer.py @@ -23,9 +23,6 @@ def sample_from_dist(quantiles, preds): # if preds more than 2 dimensions, flatten to 2 if len(preds.shape) > 2: preds = preds.reshape(-1, preds.shape[-1]) - # target will be reshaped from (1024, 96, 15) to (1024*96, 15) - # our target (1024, 96) also needs to be reshaped to (1024*96, 1) - target = target.reshape(-1, 1) # preds and target as numpy preds = preds.numpy() @@ -312,36 +309,6 @@ class AutoRegressiveQuantileTrainer(AutoRegressiveTrainer): value=np.mean(crps_from_samples_metric), ) - # def get_plot_error( - # self, - # next_day, - # predictions, - # ): - # metric = PinballLoss(quantiles=self.quantiles) - # fig = go.Figure() - - # next_day_np = next_day.view(-1).cpu().numpy() - # predictions_np = predictions.cpu().numpy() - - # if True: - # next_day_np = self.data_processor.inverse_transform(next_day_np) - # predictions_np = self.data_processor.inverse_transform(predictions_np) - - # # for each time step, calculate the error using the metric - # errors = [] - # for i in range(96): - - # target_tensor = torch.tensor(next_day_np[i]).unsqueeze(0) - # prediction_tensor = torch.tensor(predictions_np[i]).unsqueeze(0) - - # errors.append(metric(prediction_tensor, target_tensor)) - - # # plot the error - # fig.add_trace(go.Scatter(x=np.arange(96), y=errors, name=metric.__class__.__name__)) - # fig.update_layout(title=f"Error of {metric.__class__.__name__} for each time step") - - # return fig - def get_plot( self, current_day, @@ -565,8 +532,10 @@ class NonAutoRegressiveQuantileRegression(Trainer): quantiles: list, device: torch.device, debug: bool = True, + policy_evaluator: PolicyEvaluator = None, ): self.quantiles = quantiles + self.policy_evaluator = policy_evaluator criterion = NonAutoRegressivePinballLoss(quantiles=quantiles) super().__init__( @@ -649,31 +618,199 @@ class NonAutoRegressiveQuantileRegression(Trainer): name=metric_name, value=metric_value ) - def get_plot(self, current_day, next_day, predictions, show_legend: bool = True): + def debug_plots(self, task, train: bool, data_loader, sample_indices, epoch): + for actual_idx, idx in sample_indices.items(): + initial, target, _ = data_loader.dataset[idx] + + # get predictions + initial = initial.to(self.device) + + predicted_quantiles = self.model(initial) + predictions = predicted_quantiles.reshape(-1, len(self.quantiles)) + + samples = [ + sample_from_dist(self.quantiles, predictions) for _ in range(100) + ] + samples = torch.tensor(samples) + + fig = self.get_plot(initial, target, samples, show_legend=(0 == 0)) + + task.get_logger().report_matplotlib_figure( + title="Training" if train else "Testing", + series=f"Sample {actual_idx}", + iteration=epoch, + figure=fig, + ) + + def get_plot( + self, + current_day, + next_day, + predictions, + show_legend: bool = True, + retransform: bool = True, + ): fig = go.Figure() # Convert to numpy for plotting current_day_np = current_day.view(-1).cpu().numpy() next_day_np = next_day.view(-1).cpu().numpy() + predictions_np = predictions.cpu().numpy() - # reshape predictions to (n, len(quantiles))$ - predictions_np = predictions.cpu().numpy().reshape(-1, len(self.quantiles)) + if retransform: + current_day_np = self.data_processor.inverse_transform(current_day_np) + next_day_np = self.data_processor.inverse_transform(next_day_np) + predictions_np = self.data_processor.inverse_transform(predictions_np) - # Add traces for current and next day - fig.add_trace(go.Scatter(x=np.arange(96), y=current_day_np, name="Current Day")) - fig.add_trace(go.Scatter(x=96 + np.arange(96), y=next_day_np, name="Next Day")) + ci_99_upper = np.quantile(predictions_np, 0.995, axis=0) + ci_99_lower = np.quantile(predictions_np, 0.005, axis=0) - for i, q in enumerate(self.quantiles): - fig.add_trace( - go.Scatter( - x=96 + np.arange(96), - y=predictions_np[:, i], - name=f"Prediction (Q={q})", - line=dict(dash="dash"), - ) - ) + ci_95_upper = np.quantile(predictions_np, 0.975, axis=0) + ci_95_lower = np.quantile(predictions_np, 0.025, axis=0) - # Update the layout - fig.update_layout(title="Predictions and Quantiles", showlegend=show_legend) + ci_90_upper = np.quantile(predictions_np, 0.95, axis=0) + ci_90_lower = np.quantile(predictions_np, 0.05, axis=0) + ci_50_lower = np.quantile(predictions_np, 0.25, axis=0) + ci_50_upper = np.quantile(predictions_np, 0.75, axis=0) + + sns.set_theme() + time_steps = np.arange(0, 96) + + fig, ax = plt.subplots(figsize=(20, 10)) + ax.plot( + time_steps, + predictions_np.mean(axis=0), + label="Mean of NRV samples", + linewidth=3, + ) + # ax.fill_between(time_steps, ci_lower, ci_upper, color='b', alpha=0.2, label='Full Interval') + + ax.fill_between( + time_steps, + ci_99_lower, + ci_99_upper, + color="b", + alpha=0.2, + label="99% Interval", + ) + ax.fill_between( + time_steps, + ci_95_lower, + ci_95_upper, + color="b", + alpha=0.2, + label="95% Interval", + ) + ax.fill_between( + time_steps, + ci_90_lower, + ci_90_upper, + color="b", + alpha=0.2, + label="90% Interval", + ) + ax.fill_between( + time_steps, + ci_50_lower, + ci_50_upper, + color="b", + alpha=0.2, + label="50% Interval", + ) + + ax.plot(next_day_np, label="Real NRV", linewidth=3) + # full_interval_patch = mpatches.Patch(color='b', alpha=0.2, label='Full Interval') + ci_99_patch = mpatches.Patch(color="b", alpha=0.3, label="99% Interval") + ci_95_patch = mpatches.Patch(color="b", alpha=0.4, label="95% Interval") + ci_90_patch = mpatches.Patch(color="b", alpha=0.5, label="90% Interval") + ci_50_patch = mpatches.Patch(color="b", alpha=0.6, label="50% Interval") + + ax.legend( + handles=[ + ci_99_patch, + ci_95_patch, + ci_90_patch, + ci_50_patch, + ax.lines[0], + ax.lines[1], + ] + ) return fig + + def calculate_crps_from_samples(self, task, dataloader, epoch: int): + crps_from_samples_metric = [] + generated_samples = {} + + with torch.no_grad(): + total_samples = len(dataloader.dataset) + for _, _, idx_batch in tqdm(dataloader): + idx_batch = [idx for idx in idx_batch if idx < total_samples] + + if len(idx_batch) == 0: + continue + + for idx in tqdm(idx_batch): + computed_idx_batch = [idx] * 100 + initial, targets, _ = dataloader.dataset[idx] + + initial = initial.to(self.device) + targets = targets.to(self.device) + + predicted_quantiles = self.model(initial) + predictions = predicted_quantiles.reshape(-1, len(self.quantiles)) + + samples = [ + sample_from_dist(self.quantiles, predictions) + for _ in range(100) + ] + + samples = torch.tensor(samples) + + generated_samples[idx.item()] = ( + self.data_processor.inverse_transform(initial), + self.data_processor.inverse_transform(samples), + ) + + samples = samples.unsqueeze(0) + targets = targets.squeeze(-1) + targets = targets[0].unsqueeze(0) + + samples = samples.to(self.device) + + crps = crps_from_samples(samples, targets) + + crps_from_samples_metric.append(crps[0].mean().item()) + + task.get_logger().report_scalar( + title="CRPS_from_samples", + series="test", + value=np.mean(crps_from_samples_metric), + iteration=epoch, + ) + + # using the policy evaluator, evaluate the policy with the generated samples + if self.policy_evaluator is not None: + _, test_loader = self.data_processor.get_dataloaders( + predict_sequence_length=self.model.output_size, full_day_skip=True + ) + self.policy_evaluator.evaluate_test_set(generated_samples, test_loader) + df = self.policy_evaluator.get_profits_as_scalars() + + # for each row, report the profits + for idx, row in df.iterrows(): + task.get_logger().report_scalar( + title="Profit", + series=f"penalty_{row['Penalty']}", + value=row["Total Profit"], + iteration=epoch, + ) + + df = self.policy_evaluator.get_profits_till_400() + for idx, row in df.iterrows(): + task.get_logger().report_scalar( + title="Profit_till_400", + series=f"penalty_{row['Penalty']}", + value=row["Profit_till_400"], + iteration=epoch, + ) diff --git a/src/training_scripts/autoregressive_quantiles.py b/src/training_scripts/autoregressive_quantiles.py index 83d3cfb..bc4b96b 100644 --- a/src/training_scripts/autoregressive_quantiles.py +++ b/src/training_scripts/autoregressive_quantiles.py @@ -3,7 +3,7 @@ from src.utils.clearml import ClearMLHelper #### ClearML #### clearml_helper = ClearMLHelper(project_name="Thesis/NrvForecast") task = clearml_helper.get_task( - task_name="Autoregressive Quantile Regression: Non Linear" + task_name="Non Autoregressive Quantile Regression: Non Linear" ) task.execute_remotely(queue_name="default", exit_process=True) @@ -11,7 +11,10 @@ from src.policies.PolicyEvaluator import PolicyEvaluator from src.policies.simple_baseline import BaselinePolicy, Battery from src.models.lstm_model import GRUModel from src.data import DataProcessor, DataConfig -from src.trainers.quantile_trainer import AutoRegressiveQuantileTrainer +from src.trainers.quantile_trainer import ( + AutoRegressiveQuantileTrainer, + NonAutoRegressiveQuantileRegression, +) from src.trainers.trainer import Trainer from src.utils.clearml import ClearMLHelper from src.models import * @@ -46,7 +49,7 @@ data_processor.set_full_day_skip(False) #### Hyperparameters #### -data_processor.set_output_size(1) +data_processor.set_output_size(96) inputDim = data_processor.get_input_size() epochs = 300 @@ -77,7 +80,7 @@ time_embedding = TimeEmbedding( # lstm_model = GRUModel(time_embedding.output_dim(inputDim), len(quantiles), hidden_size=model_parameters["hidden_size"], num_layers=model_parameters["num_layers"], dropout=model_parameters["dropout"]) non_linear_model = NonLinearRegression( time_embedding.output_dim(inputDim), - len(quantiles), + len(quantiles) * 96, hiddenSize=model_parameters["hidden_size"], numLayers=model_parameters["num_layers"], dropout=model_parameters["dropout"], @@ -85,6 +88,7 @@ non_linear_model = NonLinearRegression( # linear_model = LinearRegression(time_embedding.output_dim(inputDim), len(quantiles)) model = nn.Sequential(time_embedding, non_linear_model) +model.output_size = 96 optimizer = torch.optim.Adam(model.parameters(), lr=model_parameters["learning_rate"]) ### Policy Evaluator ### @@ -93,7 +97,18 @@ baseline_policy = BaselinePolicy(battery, data_path="") policy_evaluator = PolicyEvaluator(baseline_policy, task) #### Trainer #### -trainer = AutoRegressiveQuantileTrainer( +# trainer = AutoRegressiveQuantileTrainer( +# model, +# inputDim, +# optimizer, +# data_processor, +# quantiles, +# "cuda", +# policy_evaluator=policy_evaluator, +# debug=False, +# ) + +trainer = NonAutoRegressiveQuantileRegression( model, inputDim, optimizer, diff --git a/src/training_scripts/diffusion_training.py b/src/training_scripts/diffusion_training.py index 6c13d63..fe5029f 100644 --- a/src/training_scripts/diffusion_training.py +++ b/src/training_scripts/diffusion_training.py @@ -1,7 +1,9 @@ from src.utils.clearml import ClearMLHelper clearml_helper = ClearMLHelper(project_name="Thesis/NrvForecast") -task = clearml_helper.get_task(task_name="Diffusion Training") +task = clearml_helper.get_task( + task_name="Diffusion Training: hidden_sizes=[64, 64], lr=0.0001, time_dim=8" +) task.execute_remotely(queue_name="default", exit_process=True) from src.models import * @@ -37,9 +39,9 @@ inputDim = data_processor.get_input_size() print("Input dim: ", inputDim) model_parameters = { - "epochs": 5000, + "epochs": 8000, "learning_rate": 0.0001, - "hidden_sizes": [128, 128], + "hidden_sizes": [64, 64], "time_dim": 8, }