Updated training scripts

This commit is contained in:
2024-03-18 12:15:06 +01:00
parent 34335cd9fe
commit 1a8e735cbc
10 changed files with 487 additions and 308 deletions

View File

@@ -155,18 +155,6 @@ class AutoRegressiveQuantileTrainer(AutoRegressiveTrainer):
generated_samples = {}
with torch.no_grad():
total_samples = len(dataloader.dataset)
print(
"Full day valid indices: ",
len(dataloader.dataset.full_day_valid_indices),
)
print(
"Valid indices: ",
len(dataloader.dataset.valid_indices),
)
print(dataloader.dataset.valid_indices)
for i in tqdm(dataloader.dataset.full_day_valid_indices):
idx = dataloader.dataset.valid_indices.index(i)
@@ -188,74 +176,64 @@ class AutoRegressiveQuantileTrainer(AutoRegressiveTrainer):
crps_from_samples_metric.append(crps[0].mean().item())
task.get_logger().report_scalar(
title="CRPS_from_samples",
series="test",
value=np.mean(crps_from_samples_metric),
iteration=epoch,
)
if epoch is not None:
task.get_logger().report_scalar(
title="CRPS_from_samples",
series="test",
value=np.mean(crps_from_samples_metric),
iteration=epoch,
)
# using the policy evaluator, evaluate the policy with the generated samples
if self.policy_evaluator is not None:
self.policy_evaluator.evaluate_test_set(generated_samples, dataloader)
df = self.policy_evaluator.get_profits_as_scalars()
# using the policy evaluator, evaluate the policy with the generated samples
if self.policy_evaluator is not None:
optimal_penalty, profit, charge_cycles = (
self.policy_evaluator.optimize_penalty_for_target_charge_cycles(
idx_samples=generated_samples,
test_loader=dataloader,
initial_penalty=900,
target_charge_cycles=283,
learning_rate=2,
max_iterations=100,
tolerance=1,
)
)
print(
f"Optimal Penalty: {optimal_penalty}, Profit: {profit}, Charge Cycles: {charge_cycles}"
)
# for each row, report the profits
for idx, row in df.iterrows():
task.get_logger().report_scalar(
title="Profit",
series=f"penalty_{row['Penalty']}",
value=row["Total Profit"],
title="Optimal Penalty",
series="test",
value=optimal_penalty,
iteration=epoch,
)
df = self.policy_evaluator.get_profits_till_400()
for idx, row in df.iterrows():
task.get_logger().report_scalar(
title="Profit_till_400",
series=f"penalty_{row['Penalty']}",
value=row["Profit_till_400"],
title="Optimal Profit", series="test", value=profit, iteration=epoch
)
task.get_logger().report_scalar(
title="Optimal Charge Cycles",
series="test",
value=charge_cycles,
iteration=epoch,
)
return np.mean(crps_from_samples_metric), generated_samples
def log_final_metrics(self, task, dataloader, train: bool = True):
metrics = {metric.__class__.__name__: 0.0 for metric in self.metrics_to_track}
transformed_metrics = {
metric.__class__.__name__: 0.0 for metric in self.metrics_to_track
}
crps_from_samples_metric = []
with torch.no_grad():
total_samples = len(dataloader.dataset) - 96
batches = 0
for _, _, idx_batch in tqdm(dataloader):
idx_batch = [idx for idx in idx_batch if idx < total_samples]
if len(idx_batch) == 0:
continue
if train == False:
for idx in tqdm(idx_batch):
computed_idx_batch = [idx] * 250
initial, outputs, samples, targets = self.auto_regressive(
dataloader.dataset, idx_batch=computed_idx_batch
)
# save the samples for the idx, these will be used for evaluating the policy
self.test_set_samples[idx.item()] = (
self.data_processor.inverse_transform(initial),
self.data_processor.inverse_transform(samples),
)
samples = samples.unsqueeze(0)
targets = targets.squeeze(-1)
targets = targets[0].unsqueeze(0)
crps = crps_from_samples(samples, targets)
crps_from_samples_metric.append(crps[0].mean().item())
_, outputs, samples, targets = self.auto_regressive(
dataloader.dataset, idx_batch=idx_batch
)
@@ -308,6 +286,9 @@ class AutoRegressiveQuantileTrainer(AutoRegressiveTrainer):
task.get_logger().report_single_value(name=name, value=metric_value)
if train == False:
crps_from_samples_metric, self.test_set_samples = (
self.calculate_crps_from_samples(None, dataloader, None)
)
task.get_logger().report_single_value(
name="test_CRPS_from_samples_transformed",
value=np.mean(crps_from_samples_metric),
@@ -320,6 +301,7 @@ class AutoRegressiveQuantileTrainer(AutoRegressiveTrainer):
predictions,
show_legend: bool = True,
retransform: bool = True,
task=None,
):
fig = go.Figure()
@@ -427,7 +409,19 @@ class AutoRegressiveQuantileTrainer(AutoRegressiveTrainer):
ax.lines[1],
]
)
return fig
ax.set_ylim(-1500, 1500)
fig2, ax2 = plt.subplots(figsize=(20, 10))
for i in range(10):
ax2.plot(predictions_np[i], label=f"Sample {i}")
ax2.plot(next_day_np, label="Real NRV", linewidth=3)
ax2.legend()
ax2.set_ylim(-1500, 1500)
return fig, fig2
def auto_regressive(self, dataset, idx_batch, sequence_length: int = 96):
return auto_regressive(
@@ -646,6 +640,21 @@ class NonAutoRegressiveQuantileRegression(Trainer):
figure=fig,
)
fig, ax = plt.subplots(figsize=(20, 10))
for i in range(10):
ax.plot(samples[i], label=f"Sample {i}")
ax.plot(target, label="Real NRV", linewidth=3)
ax.legend()
task.get_logger().report_matplotlib_figure(
title="Training" if train else "Testing",
series=f"Sample {actual_idx} Samples",
iteration=epoch,
figure=fig,
)
plt.close()
def get_plot(
self,
current_day,
@@ -740,7 +749,15 @@ class NonAutoRegressiveQuantileRegression(Trainer):
ax.lines[1],
]
)
return fig
fig2, ax2 = plt.subplots(figsize=(20, 10))
for i in range(10):
ax2.plot(predictions_np[i], label=f"Sample {i}")
ax2.plot(next_day_np, label="Real NRV", linewidth=3)
ax2.legend()
return fig, fig2
def calculate_crps_from_samples(self, task, dataloader, epoch: int):
crps_from_samples_metric = []