Sped up sampling 20x
This commit is contained in:
@@ -45,12 +45,16 @@ class AutoRegressiveTrainer(Trainer):
|
||||
)
|
||||
|
||||
for i, idx in enumerate(sample_indices):
|
||||
auto_regressive_output = self.auto_regressive(data_loader, idx)
|
||||
auto_regressive_output = self.auto_regressive(data_loader.dataset, [idx])
|
||||
if len(auto_regressive_output) == 3:
|
||||
initial, predictions, target = auto_regressive_output
|
||||
else:
|
||||
initial, predictions, _, target = auto_regressive_output
|
||||
|
||||
initial = initial.squeeze(0)
|
||||
predictions = predictions.squeeze(0)
|
||||
target = target.squeeze(0)
|
||||
|
||||
sub_fig = self.get_plot(initial, target, predictions, show_legend=(i == 0))
|
||||
|
||||
row = i + 1
|
||||
@@ -64,13 +68,13 @@ class AutoRegressiveTrainer(Trainer):
|
||||
).item()
|
||||
|
||||
fig["layout"]["annotations"][i].update(
|
||||
text=f"{loss.__class__.__name__}: {loss:.6f}"
|
||||
text=f"{self.criterion.__class__.__name__}: {loss:.6f}"
|
||||
)
|
||||
|
||||
# y axis same for all plots
|
||||
fig.update_yaxes(range=[-1, 1], col=1)
|
||||
# fig.update_yaxes(range=[-1, 1], col=1)
|
||||
|
||||
fig.update_layout(height=300 * rows)
|
||||
fig.update_layout(height=1000 * rows)
|
||||
task.get_logger().report_plotly(
|
||||
title=f"{'Training' if train else 'Test'} Samples",
|
||||
series="full_day",
|
||||
@@ -140,7 +144,7 @@ class AutoRegressiveTrainer(Trainer):
|
||||
total_amount_samples = len(dataloader.dataset) - 95
|
||||
|
||||
for idx in tqdm(range(total_amount_samples)):
|
||||
_, outputs, targets = self.auto_regressive(dataloader, idx)
|
||||
_, outputs, targets = self.auto_regressive(dataloader.dataset, idx)
|
||||
|
||||
inversed_outputs = torch.tensor(
|
||||
self.data_processor.inverse_transform(outputs)
|
||||
|
||||
@@ -52,6 +52,11 @@ class ProbabilisticBaselineTrainer(Trainer):
|
||||
for i in range(96):
|
||||
time_steps[i].extend(inputs[:, i].numpy())
|
||||
|
||||
mean_fig = self.plot_mean_nrv(time_steps)
|
||||
task.get_logger().report_plotly(
|
||||
title=f"Train NRV", series="Mean NRV", figure=mean_fig
|
||||
)
|
||||
|
||||
all_quantiles = []
|
||||
for i, time_values in enumerate(time_steps):
|
||||
quantiles = np.quantile(time_values, self.quantiles)
|
||||
@@ -84,7 +89,7 @@ class ProbabilisticBaselineTrainer(Trainer):
|
||||
quantile_values_tensor = torch.tensor(quantile_values)
|
||||
quantile_values_expanded = quantile_values_tensor.unsqueeze(0)
|
||||
|
||||
for _, targets in dataloader:
|
||||
for _, targets, _ in dataloader:
|
||||
# Expand quantile_values for each batch
|
||||
quantile_values_batch = quantile_values_expanded.repeat(
|
||||
targets.size(0), 1, 1
|
||||
@@ -157,3 +162,19 @@ class ProbabilisticBaselineTrainer(Trainer):
|
||||
fig.update_yaxes(range=[-1, 1])
|
||||
|
||||
return fig
|
||||
|
||||
def plot_mean_nrv(self, timesteps):
|
||||
# create ndarray of time steps
|
||||
timesteps = np.array(timesteps)
|
||||
|
||||
timesteps = self.data_processor.inverse_transform(timesteps)
|
||||
|
||||
# for every row calculate mean
|
||||
mean = np.mean(timesteps, axis=1)
|
||||
|
||||
# plot mean
|
||||
fig = go.Figure()
|
||||
fig.add_trace(go.Scatter(x=np.arange(96), y=mean, name="Mean"))
|
||||
fig.update_layout(title="Mean NRV")
|
||||
|
||||
return fig
|
||||
|
||||
@@ -13,6 +13,49 @@ from tqdm import tqdm
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
|
||||
def sample_from_dist(quantiles, output_values):
|
||||
# both to numpy
|
||||
quantiles = quantiles.cpu().numpy()
|
||||
|
||||
if isinstance(output_values, torch.Tensor):
|
||||
output_values = output_values.cpu().numpy()
|
||||
|
||||
reshaped_values = output_values.reshape(-1, len(quantiles))
|
||||
|
||||
uniform_random_numbers = np.random.uniform(0, 1, (reshaped_values.shape[0], 1000))
|
||||
|
||||
idx_below = np.searchsorted(quantiles, uniform_random_numbers, side="right") - 1
|
||||
idx_above = np.clip(idx_below + 1, 0, len(quantiles) - 1)
|
||||
|
||||
# handle edge case where idx_below is -1
|
||||
idx_below = np.clip(idx_below, 0, len(quantiles) - 1)
|
||||
|
||||
y_below = reshaped_values[np.arange(reshaped_values.shape[0])[:, None], idx_below]
|
||||
y_above = reshaped_values[np.arange(reshaped_values.shape[0])[:, None], idx_above]
|
||||
|
||||
# Calculate the slopes for interpolation
|
||||
x_below = quantiles[idx_below]
|
||||
x_above = quantiles[idx_above]
|
||||
|
||||
# Interpolate
|
||||
# Ensure all variables are NumPy arrays
|
||||
x_below_np = x_below.cpu().numpy() if isinstance(x_below, torch.Tensor) else x_below
|
||||
x_above_np = x_above.cpu().numpy() if isinstance(x_above, torch.Tensor) else x_above
|
||||
y_below_np = y_below.cpu().numpy() if isinstance(y_below, torch.Tensor) else y_below
|
||||
y_above_np = y_above.cpu().numpy() if isinstance(y_above, torch.Tensor) else y_above
|
||||
|
||||
# Compute slopes for interpolation
|
||||
slopes_np = (y_above_np - y_below_np) / (
|
||||
np.clip(x_above_np - x_below_np, 1e-6, np.inf)
|
||||
)
|
||||
|
||||
# Perform the interpolation
|
||||
new_samples = y_below_np + slopes_np * (uniform_random_numbers - x_below_np)
|
||||
|
||||
# Return the mean of the samples
|
||||
return np.mean(new_samples, axis=1)
|
||||
|
||||
|
||||
class AutoRegressiveQuantileTrainer(AutoRegressiveTrainer):
|
||||
def __init__(
|
||||
self,
|
||||
@@ -46,19 +89,26 @@ class AutoRegressiveQuantileTrainer(AutoRegressiveTrainer):
|
||||
}
|
||||
|
||||
with torch.no_grad():
|
||||
total_amount_samples = len(dataloader.dataset) - 95
|
||||
total_samples = len(dataloader.dataset) - 96
|
||||
batches = 0
|
||||
for _, _, idx_batch in dataloader:
|
||||
idx_batch = [idx for idx in idx_batch if idx < total_samples]
|
||||
|
||||
for idx in tqdm(range(total_amount_samples)):
|
||||
_, outputs, samples, targets = self.auto_regressive(dataloader, idx)
|
||||
if len(idx_batch) == 0:
|
||||
continue
|
||||
|
||||
_, outputs, samples, targets = self.auto_regressive(
|
||||
dataloader.dataset, idx_batch=idx_batch
|
||||
)
|
||||
|
||||
samples = samples.to(self.device)
|
||||
outputs = outputs.to(self.device)
|
||||
targets = targets.to(self.device)
|
||||
|
||||
inversed_samples = self.data_processor.inverse_transform(samples)
|
||||
inversed_targets = self.data_processor.inverse_transform(targets)
|
||||
inversed_outputs = self.data_processor.inverse_transform(outputs)
|
||||
|
||||
outputs = outputs.to(self.device)
|
||||
targets = targets.to(self.device)
|
||||
samples = samples.to(self.device)
|
||||
|
||||
inversed_samples = inversed_samples.to(self.device)
|
||||
inversed_targets = inversed_targets.to(self.device)
|
||||
inversed_outputs = inversed_outputs.to(self.device)
|
||||
@@ -66,10 +116,10 @@ class AutoRegressiveQuantileTrainer(AutoRegressiveTrainer):
|
||||
for metric in self.metrics_to_track:
|
||||
if metric.__class__ != PinballLoss and metric.__class__ != CRPSLoss:
|
||||
transformed_metrics[metric.__class__.__name__] += metric(
|
||||
samples, targets
|
||||
samples, targets.squeeze(-1)
|
||||
)
|
||||
metrics[metric.__class__.__name__] += metric(
|
||||
inversed_samples, inversed_targets
|
||||
inversed_samples, inversed_targets.squeeze(-1)
|
||||
)
|
||||
else:
|
||||
transformed_metrics[metric.__class__.__name__] += metric(
|
||||
@@ -78,10 +128,11 @@ class AutoRegressiveQuantileTrainer(AutoRegressiveTrainer):
|
||||
metrics[metric.__class__.__name__] += metric(
|
||||
inversed_outputs, inversed_targets
|
||||
)
|
||||
batches += 1
|
||||
|
||||
for metric in self.metrics_to_track:
|
||||
metrics[metric.__class__.__name__] /= total_amount_samples
|
||||
transformed_metrics[metric.__class__.__name__] /= total_amount_samples
|
||||
metrics[metric.__class__.__name__] /= batches
|
||||
transformed_metrics[metric.__class__.__name__] /= batches
|
||||
|
||||
for metric_name, metric_value in metrics.items():
|
||||
if PinballLoss.__name__ in metric_name:
|
||||
@@ -97,7 +148,14 @@ class AutoRegressiveQuantileTrainer(AutoRegressiveTrainer):
|
||||
)
|
||||
task.get_logger().report_single_value(name=name, value=metric_value)
|
||||
|
||||
def get_plot(self, current_day, next_day, predictions, show_legend: bool = True):
|
||||
def get_plot(
|
||||
self,
|
||||
current_day,
|
||||
next_day,
|
||||
predictions,
|
||||
show_legend: bool = True,
|
||||
retransform: bool = True,
|
||||
):
|
||||
fig = go.Figure()
|
||||
|
||||
# Convert to numpy for plotting
|
||||
@@ -105,6 +163,11 @@ class AutoRegressiveQuantileTrainer(AutoRegressiveTrainer):
|
||||
next_day_np = next_day.view(-1).cpu().numpy()
|
||||
predictions_np = predictions.cpu().numpy()
|
||||
|
||||
if retransform:
|
||||
current_day_np = self.data_processor.inverse_transform(current_day_np)
|
||||
next_day_np = self.data_processor.inverse_transform(next_day_np)
|
||||
predictions_np = self.data_processor.inverse_transform(predictions_np)
|
||||
|
||||
# Add traces for current and next day
|
||||
fig.add_trace(go.Scatter(x=np.arange(96), y=current_day_np, name="Current Day"))
|
||||
fig.add_trace(go.Scatter(x=96 + np.arange(96), y=next_day_np, name="Next Day"))
|
||||
@@ -127,86 +190,68 @@ class AutoRegressiveQuantileTrainer(AutoRegressiveTrainer):
|
||||
|
||||
return fig
|
||||
|
||||
def auto_regressive(self, data_loader, idx, sequence_length: int = 96):
|
||||
self.model.eval()
|
||||
target_full = []
|
||||
predictions_sampled = []
|
||||
predictions_full = []
|
||||
|
||||
prev_features, target = data_loader.dataset[idx]
|
||||
def auto_regressive(self, dataset, idx_batch, sequence_length: int = 96):
|
||||
prev_features, targets = dataset.get_batch(idx_batch)
|
||||
prev_features = prev_features.to(self.device)
|
||||
targets = targets.to(self.device)
|
||||
|
||||
initial_sequence = prev_features[:96]
|
||||
initial_sequence = prev_features[:, :96]
|
||||
|
||||
target_full.append(target)
|
||||
target_full = targets[:, 0].unsqueeze(1) # (batch_size, 1)
|
||||
with torch.no_grad():
|
||||
prediction = self.model(prev_features.unsqueeze(0))
|
||||
predictions_full.append(prediction.squeeze(0))
|
||||
|
||||
# sample from the distribution
|
||||
sample = self.sample_from_dist(
|
||||
self.quantiles.cpu(), prediction.squeeze(-1).cpu().numpy()
|
||||
)
|
||||
predictions_sampled.append(sample)
|
||||
new_predictions_full = self.model(prev_features) # (batch_size, quantiles)
|
||||
samples = (
|
||||
torch.tensor(sample_from_dist(self.quantiles, new_predictions_full))
|
||||
.unsqueeze(1)
|
||||
.to(self.device)
|
||||
) # (batch_size, 1)
|
||||
predictions_samples = samples
|
||||
predictions_full = new_predictions_full.unsqueeze(1)
|
||||
|
||||
for i in range(sequence_length - 1):
|
||||
new_features = torch.cat(
|
||||
(prev_features[1:96].cpu(), torch.tensor([predictions_sampled[-1]])),
|
||||
dim=0,
|
||||
)
|
||||
(prev_features[:, 1:96], samples), dim=1
|
||||
) # (batch_size, 96)
|
||||
|
||||
new_features = new_features.float()
|
||||
|
||||
# get the other needed features
|
||||
other_features, new_target = data_loader.dataset.random_day_autoregressive(
|
||||
idx + i + 1
|
||||
)
|
||||
other_features, new_targets = dataset.get_batch_autoregressive(
|
||||
np.array(idx_batch) + i + 1
|
||||
) # (batch_size, new_features)
|
||||
|
||||
if other_features is not None:
|
||||
prev_features = torch.cat((new_features, other_features), dim=0)
|
||||
prev_features = torch.cat(
|
||||
new_features, other_features, dim=1
|
||||
) # (batch_size, 96 + new_features)
|
||||
else:
|
||||
prev_features = new_features
|
||||
|
||||
# add target to target_full
|
||||
target_full.append(new_target)
|
||||
target_full = torch.cat(
|
||||
(target_full, new_targets.to(self.device)), dim=1
|
||||
) # (batch_size, sequence_length)
|
||||
|
||||
# predict
|
||||
with torch.no_grad():
|
||||
prediction = self.model(prev_features.unsqueeze(0).to(self.device))
|
||||
predictions_full.append(prediction.squeeze(0))
|
||||
new_predictions_full = self.model(
|
||||
prev_features
|
||||
) # (batch_size, quantiles)
|
||||
predictions_full = torch.cat(
|
||||
(predictions_full, new_predictions_full.unsqueeze(1)), dim=1
|
||||
) # (batch_size, sequence_length, quantiles)
|
||||
|
||||
# sample from the distribution
|
||||
sample = self.sample_from_dist(
|
||||
self.quantiles.cpu(), prediction.squeeze(-1).cpu().numpy()
|
||||
)
|
||||
predictions_sampled.append(sample)
|
||||
samples = (
|
||||
torch.tensor(sample_from_dist(self.quantiles, new_predictions_full))
|
||||
.unsqueeze(-1)
|
||||
.to(self.device)
|
||||
) # (batch_size, 1)
|
||||
predictions_samples = torch.cat((predictions_samples, samples), dim=1)
|
||||
|
||||
return (
|
||||
initial_sequence.cpu(),
|
||||
torch.stack(predictions_full).cpu(),
|
||||
torch.tensor(predictions_sampled).reshape(-1, 1),
|
||||
torch.stack(target_full).cpu(),
|
||||
initial_sequence,
|
||||
predictions_full,
|
||||
predictions_samples,
|
||||
target_full.unsqueeze(-1),
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def sample_from_dist(quantiles, output_values):
|
||||
# Interpolate the inverse CDF
|
||||
inverse_cdf = interp1d(
|
||||
quantiles,
|
||||
output_values,
|
||||
kind="linear",
|
||||
bounds_error=False,
|
||||
fill_value="extrapolate",
|
||||
)
|
||||
|
||||
# generate one random uniform number
|
||||
uniform_random_numbers = np.random.uniform(0, 1, 1000)
|
||||
|
||||
# Apply the inverse CDF to the uniform random numbers
|
||||
samples = inverse_cdf(uniform_random_numbers)
|
||||
|
||||
# Return the mean of the samples
|
||||
return np.mean(samples)
|
||||
|
||||
def plot_quantile_percentages(
|
||||
self, task, data_loader, train: bool = True, iteration: int = None
|
||||
):
|
||||
@@ -214,7 +259,7 @@ class AutoRegressiveQuantileTrainer(AutoRegressiveTrainer):
|
||||
quantile_counter = {q: 0 for q in self.quantiles.cpu().numpy()}
|
||||
|
||||
with torch.no_grad():
|
||||
for inputs, targets in data_loader:
|
||||
for inputs, targets, _ in data_loader:
|
||||
inputs = inputs.to("cuda")
|
||||
output = self.model(inputs)
|
||||
|
||||
@@ -302,23 +347,6 @@ class NonAutoRegressiveQuantileRegression(Trainer):
|
||||
debug=debug,
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def sample_from_dist(quantiles, output_values):
|
||||
reshaped_values = output_values.reshape(-1, len(quantiles))
|
||||
samples = []
|
||||
for row in reshaped_values:
|
||||
inverse_cdf = interp1d(
|
||||
quantiles,
|
||||
row,
|
||||
kind="linear",
|
||||
bounds_error=False,
|
||||
fill_value="extrapolate",
|
||||
)
|
||||
uniform_random_numbers = np.random.uniform(0, 1, 1000)
|
||||
new_samples = inverse_cdf(uniform_random_numbers)
|
||||
samples.append(np.mean(new_samples))
|
||||
return np.array(samples)
|
||||
|
||||
def log_final_metrics(self, task, dataloader, train: bool = True):
|
||||
metrics = {metric.__class__.__name__: 0.0 for metric in self.metrics_to_track}
|
||||
transformed_metrics = {
|
||||
@@ -326,12 +354,12 @@ class NonAutoRegressiveQuantileRegression(Trainer):
|
||||
}
|
||||
|
||||
with torch.no_grad():
|
||||
for inputs, targets in dataloader:
|
||||
for inputs, targets, _ in dataloader:
|
||||
inputs, targets = inputs.to(self.device), targets.to(self.device)
|
||||
|
||||
outputs = self.model(inputs)
|
||||
outputted_samples = [
|
||||
self.sample_from_dist(self.quantiles.cpu(), output.cpu().numpy())
|
||||
sample_from_dist(self.quantiles.cpu(), output.cpu().numpy())
|
||||
for output in outputs
|
||||
]
|
||||
|
||||
@@ -359,10 +387,10 @@ class NonAutoRegressiveQuantileRegression(Trainer):
|
||||
)
|
||||
else:
|
||||
transformed_metrics[metric.__class__.__name__] += metric(
|
||||
outputs, targets
|
||||
outputs, targets.unsqueeze(-1)
|
||||
)
|
||||
metrics[metric.__class__.__name__] += metric(
|
||||
inversed_outputs, inversed_targets
|
||||
inversed_outputs, inversed_targets.unsqueeze(-1)
|
||||
)
|
||||
|
||||
for metric in self.metrics_to_track:
|
||||
|
||||
@@ -7,8 +7,18 @@ import numpy as np
|
||||
import plotly.subplots as sp
|
||||
from plotly.subplots import make_subplots
|
||||
|
||||
|
||||
class Trainer:
|
||||
def __init__(self, model: torch.nn.Module, optimizer: torch.optim.Optimizer, criterion: torch.nn.Module, data_processor: DataProcessor, device: torch.device, clearml_helper: ClearMLHelper = None, debug: bool = True):
|
||||
def __init__(
|
||||
self,
|
||||
model: torch.nn.Module,
|
||||
optimizer: torch.optim.Optimizer,
|
||||
criterion: torch.nn.Module,
|
||||
data_processor: DataProcessor,
|
||||
device: torch.device,
|
||||
clearml_helper: ClearMLHelper = None,
|
||||
debug: bool = True,
|
||||
):
|
||||
self.model = model
|
||||
self.optimizer = optimizer
|
||||
self.criterion = criterion
|
||||
@@ -49,7 +59,7 @@ class Trainer:
|
||||
task = self.clearml_helper.get_task(task_name=task_name)
|
||||
|
||||
if self.debug:
|
||||
task.add_tags('Debug')
|
||||
task.add_tags("Debug")
|
||||
|
||||
change_description = input("Enter a change description: ")
|
||||
if change_description:
|
||||
@@ -70,9 +80,11 @@ class Trainer:
|
||||
task.connect(self.data_processor.data_config, name="data_features")
|
||||
|
||||
return task
|
||||
|
||||
|
||||
def random_samples(self, train: bool = True, num_samples: int = 10):
|
||||
train_loader, test_loader = self.data_processor.get_dataloaders(predict_sequence_length=self.model.output_size)
|
||||
train_loader, test_loader = self.data_processor.get_dataloaders(
|
||||
predict_sequence_length=self.model.output_size
|
||||
)
|
||||
|
||||
if train:
|
||||
loader = train_loader
|
||||
@@ -82,10 +94,11 @@ class Trainer:
|
||||
indices = np.random.randint(0, len(loader.dataset) - 1, size=num_samples)
|
||||
return indices
|
||||
|
||||
|
||||
def train(self, epochs: int):
|
||||
try:
|
||||
train_loader, test_loader = self.data_processor.get_dataloaders(predict_sequence_length=self.model.output_size)
|
||||
train_loader, test_loader = self.data_processor.get_dataloaders(
|
||||
predict_sequence_length=self.model.output_size
|
||||
)
|
||||
|
||||
train_samples = self.random_samples(train=True)
|
||||
test_samples = self.random_samples(train=False)
|
||||
@@ -99,7 +112,7 @@ class Trainer:
|
||||
self.model.train()
|
||||
running_loss = 0.0
|
||||
|
||||
for inputs, targets in train_loader:
|
||||
for inputs, targets, _ in train_loader:
|
||||
inputs, targets = inputs.to(self.device), targets.to(self.device)
|
||||
|
||||
self.optimizer.zero_grad()
|
||||
@@ -110,33 +123,48 @@ class Trainer:
|
||||
self.optimizer.step()
|
||||
|
||||
running_loss += loss.item()
|
||||
|
||||
|
||||
running_loss /= len(train_loader.dataset)
|
||||
test_loss = self.test(test_loader)
|
||||
|
||||
if self.patience is not None:
|
||||
if self.best_score is None or test_loss < self.best_score + self.delta:
|
||||
if (
|
||||
self.best_score is None
|
||||
or test_loss < self.best_score + self.delta
|
||||
):
|
||||
self.save_checkpoint(test_loss, task, epoch)
|
||||
counter = 0
|
||||
else:
|
||||
counter += 1
|
||||
if counter >= self.patience:
|
||||
print('Early stopping triggered')
|
||||
print("Early stopping triggered")
|
||||
break
|
||||
|
||||
if task:
|
||||
task.get_logger().report_scalar(title=self.criterion.__class__.__name__, series="train", value=running_loss, iteration=epoch)
|
||||
task.get_logger().report_scalar(title=self.criterion.__class__.__name__, series="test", value=test_loss, iteration=epoch)
|
||||
|
||||
task.get_logger().report_scalar(
|
||||
title=self.criterion.__class__.__name__,
|
||||
series="train",
|
||||
value=running_loss,
|
||||
iteration=epoch,
|
||||
)
|
||||
task.get_logger().report_scalar(
|
||||
title=self.criterion.__class__.__name__,
|
||||
series="test",
|
||||
value=test_loss,
|
||||
iteration=epoch,
|
||||
)
|
||||
|
||||
if epoch % self.plot_every_n_epochs == 0:
|
||||
self.debug_plots(task, True, train_loader, train_samples, epoch)
|
||||
self.debug_plots(task, False, test_loader, test_samples, epoch)
|
||||
|
||||
if hasattr(self, 'plot_quantile_percentages'):
|
||||
self.plot_quantile_percentages(task, train_loader, True, epoch)
|
||||
self.plot_quantile_percentages(task, test_loader, False, epoch)
|
||||
if hasattr(self, "plot_quantile_percentages"):
|
||||
self.plot_quantile_percentages(
|
||||
task, train_loader, True, epoch
|
||||
)
|
||||
self.plot_quantile_percentages(
|
||||
task, test_loader, False, epoch
|
||||
)
|
||||
|
||||
if task:
|
||||
self.finish_training(task=task)
|
||||
@@ -147,23 +175,32 @@ class Trainer:
|
||||
task.set_archived(True)
|
||||
raise
|
||||
|
||||
|
||||
def log_final_metrics(self, task, dataloader, train: bool = True):
|
||||
metrics = { metric.__class__.__name__: 0.0 for metric in self.metrics_to_track }
|
||||
transformed_metrics = { metric.__class__.__name__: 0.0 for metric in self.metrics_to_track }
|
||||
metrics = {metric.__class__.__name__: 0.0 for metric in self.metrics_to_track}
|
||||
transformed_metrics = {
|
||||
metric.__class__.__name__: 0.0 for metric in self.metrics_to_track
|
||||
}
|
||||
|
||||
with torch.no_grad():
|
||||
for inputs, targets in dataloader:
|
||||
for inputs, targets, _ in dataloader:
|
||||
inputs, targets = inputs.to(self.device), targets
|
||||
|
||||
outputs = self.model(inputs)
|
||||
|
||||
inversed_outputs = torch.tensor(self.data_processor.inverse_transform(outputs))
|
||||
inversed_inputs = torch.tensor(self.data_processor.inverse_transform(targets))
|
||||
inversed_outputs = torch.tensor(
|
||||
self.data_processor.inverse_transform(outputs)
|
||||
)
|
||||
inversed_inputs = torch.tensor(
|
||||
self.data_processor.inverse_transform(targets)
|
||||
)
|
||||
|
||||
for metric in self.metrics_to_track:
|
||||
transformed_metrics[metric.__class__.__name__] += metric(outputs, targets.to(self.device))
|
||||
metrics[metric.__class__.__name__] += metric(inversed_outputs, inversed_inputs)
|
||||
transformed_metrics[metric.__class__.__name__] += metric(
|
||||
outputs, targets.to(self.device)
|
||||
)
|
||||
metrics[metric.__class__.__name__] += metric(
|
||||
inversed_outputs, inversed_inputs
|
||||
)
|
||||
|
||||
for metric in self.metrics_to_track:
|
||||
metrics[metric.__class__.__name__] /= len(dataloader)
|
||||
@@ -171,74 +208,109 @@ class Trainer:
|
||||
|
||||
for metric_name, metric_value in metrics.items():
|
||||
if train:
|
||||
metric_name = f'train_{metric_name}'
|
||||
metric_name = f"train_{metric_name}"
|
||||
else:
|
||||
metric_name = f'test_{metric_name}'
|
||||
|
||||
task.get_logger().report_single_value(name=metric_name, value=metric_value)
|
||||
metric_name = f"test_{metric_name}"
|
||||
|
||||
task.get_logger().report_single_value(
|
||||
name=metric_name, value=metric_value
|
||||
)
|
||||
|
||||
for metric_name, metric_value in transformed_metrics.items():
|
||||
if train:
|
||||
metric_name = f'train_transformed_{metric_name}'
|
||||
metric_name = f"train_transformed_{metric_name}"
|
||||
else:
|
||||
metric_name = f'test_transformed_{metric_name}'
|
||||
metric_name = f"test_transformed_{metric_name}"
|
||||
|
||||
task.get_logger().report_single_value(name=metric_name, value=metric_value)
|
||||
task.get_logger().report_single_value(
|
||||
name=metric_name, value=metric_value
|
||||
)
|
||||
|
||||
def finish_training(self, task):
|
||||
if self.best_score is not None:
|
||||
self.model.load_state_dict(torch.load('checkpoint.pt'))
|
||||
self.model.load_state_dict(torch.load("checkpoint.pt"))
|
||||
self.model.eval()
|
||||
|
||||
train_loader, test_loader = self.data_processor.get_dataloaders(predict_sequence_length=self.model.output_size)
|
||||
train_loader, test_loader = self.data_processor.get_dataloaders(
|
||||
predict_sequence_length=self.model.output_size
|
||||
)
|
||||
|
||||
if not hasattr(self, 'plot_quantile_percentages'):
|
||||
if not hasattr(self, "plot_quantile_percentages"):
|
||||
self.log_final_metrics(task, train_loader, train=True)
|
||||
|
||||
self.log_final_metrics(task, test_loader, train=False)
|
||||
|
||||
|
||||
def test(self, test_loader: torch.utils.data.DataLoader):
|
||||
self.model.eval()
|
||||
test_loss = 0
|
||||
|
||||
with torch.no_grad():
|
||||
for data, target in test_loader:
|
||||
for data, target, _ in test_loader:
|
||||
data, target = data.to(self.device), target.to(self.device)
|
||||
output = self.model(data)
|
||||
|
||||
|
||||
test_loss += self.criterion(output, target).item()
|
||||
|
||||
test_loss /= len(test_loader.dataset)
|
||||
return test_loss
|
||||
|
||||
|
||||
def save_checkpoint(self, val_loss, task, iteration: int):
|
||||
torch.save(self.model.state_dict(), 'checkpoint.pt')
|
||||
task.update_output_model(model_path='checkpoint.pt', iteration=iteration, auto_delete_file=False)
|
||||
torch.save(self.model.state_dict(), "checkpoint.pt")
|
||||
task.update_output_model(
|
||||
model_path="checkpoint.pt", iteration=iteration, auto_delete_file=False
|
||||
)
|
||||
self.best_score = val_loss
|
||||
|
||||
def get_plot(self, current_day, next_day, predictions, show_legend: bool = True):
|
||||
|
||||
def get_plot(
|
||||
self,
|
||||
current_day,
|
||||
next_day,
|
||||
predictions,
|
||||
show_legend: bool = True,
|
||||
retransform: bool = True,
|
||||
):
|
||||
if retransform:
|
||||
current_day = self.data_processor.inverse_transform(current_day)
|
||||
next_day = self.data_processor.inverse_transform(next_day)
|
||||
predictions = self.data_processor.inverse_transform(predictions)
|
||||
|
||||
fig = go.Figure()
|
||||
|
||||
fig.add_trace(go.Scatter(x=np.arange(96), y=current_day.view(-1).cpu().numpy(), name="Current Day"))
|
||||
fig.add_trace(go.Scatter(x=96 + np.arange(96), y=next_day.view(-1).cpu().numpy(), name="Next Day"))
|
||||
fig.add_trace(
|
||||
go.Scatter(
|
||||
x=np.arange(96),
|
||||
y=current_day.view(-1).cpu().numpy(),
|
||||
name="Current Day",
|
||||
)
|
||||
)
|
||||
fig.add_trace(
|
||||
go.Scatter(
|
||||
x=96 + np.arange(96), y=next_day.view(-1).cpu().numpy(), name="Next Day"
|
||||
)
|
||||
)
|
||||
|
||||
fig.add_trace(go.Scatter(x=96 + np.arange(96), y=predictions.reshape(-1), name="Predictions"))
|
||||
fig.add_trace(
|
||||
go.Scatter(
|
||||
x=96 + np.arange(96), y=predictions.reshape(-1), name="Predictions"
|
||||
)
|
||||
)
|
||||
|
||||
fig.update_layout(title="Predictions of the Linear Model")
|
||||
return fig
|
||||
|
||||
|
||||
def debug_plots(self, task, train: bool, data_loader, sample_indices, epoch):
|
||||
num_samples = len(sample_indices)
|
||||
rows = num_samples # One row per sample since we only want one column
|
||||
cols = 1
|
||||
|
||||
fig = make_subplots(rows=rows, cols=cols, subplot_titles=[f'Sample {i+1}' for i in range(num_samples)])
|
||||
|
||||
fig = make_subplots(
|
||||
rows=rows,
|
||||
cols=cols,
|
||||
subplot_titles=[f"Sample {i+1}" for i in range(num_samples)],
|
||||
)
|
||||
|
||||
for i, idx in enumerate(sample_indices):
|
||||
|
||||
features, target = data_loader.dataset[idx]
|
||||
features, target, _ = data_loader.dataset[idx]
|
||||
|
||||
features = features.to(self.device)
|
||||
target = target.to(self.device)
|
||||
@@ -247,29 +319,29 @@ class Trainer:
|
||||
with torch.no_grad():
|
||||
predictions = self.model(features).cpu()
|
||||
|
||||
sub_fig = self.get_plot(features[:96], target, predictions, show_legend=(i == 0))
|
||||
|
||||
sub_fig = self.get_plot(
|
||||
features[:96], target, predictions, show_legend=(i == 0)
|
||||
)
|
||||
|
||||
row = i + 1
|
||||
col = 1
|
||||
|
||||
|
||||
for trace in sub_fig.data:
|
||||
fig.add_trace(trace, row=row, col=col)
|
||||
|
||||
|
||||
# loss = self.criterion(predictions.to(self.device), target.squeeze(-1).to(self.device)).item()
|
||||
|
||||
# fig['layout']['annotations'][i].update(text=f"{loss.__class__.__name__}: {loss:.6f}")
|
||||
|
||||
# y axis same for all plots
|
||||
fig.update_yaxes(range=[-1, 1], col=1)
|
||||
# fig.update_yaxes(range=[-1, 1], col=1)
|
||||
|
||||
|
||||
fig.update_layout(height=300 * rows)
|
||||
fig.update_layout(height=1000 * rows)
|
||||
task.get_logger().report_plotly(
|
||||
title=f"{'Training' if train else 'Test'} Samples",
|
||||
series="full_day",
|
||||
iteration=epoch,
|
||||
figure=fig
|
||||
figure=fig,
|
||||
)
|
||||
|
||||
def debug_scatter_plot(self, task, train: bool, samples, epoch):
|
||||
@@ -285,7 +357,11 @@ class Trainer:
|
||||
rows = -(-num_samples // 2) # Ceiling division to handle odd number of samples
|
||||
cols = 2
|
||||
|
||||
fig = make_subplots(rows=rows, cols=cols, subplot_titles=[f'Sample {i+1}' for i in range(num_samples)])
|
||||
fig = make_subplots(
|
||||
rows=rows,
|
||||
cols=cols,
|
||||
subplot_titles=[f"Sample {i+1}" for i in range(num_samples)],
|
||||
)
|
||||
|
||||
for i, (current_day, next_value, pred) in enumerate(zip(X, y, predictions)):
|
||||
sub_fig = self.scatter_plot(current_day, pred, next_value)
|
||||
@@ -299,14 +375,16 @@ class Trainer:
|
||||
title=f"{'Training' if train else 'Test'} Samples",
|
||||
series="scatter",
|
||||
iteration=epoch,
|
||||
figure=fig
|
||||
figure=fig,
|
||||
)
|
||||
|
||||
def scatter_plot(self, x, y, real_y):
|
||||
fig = go.Figure()
|
||||
|
||||
# 96 values of x
|
||||
fig.add_trace(go.Scatter(x=np.arange(96), y=x.view(-1).cpu().numpy(), name="Current Day"))
|
||||
fig.add_trace(
|
||||
go.Scatter(x=np.arange(96), y=x.view(-1).cpu().numpy(), name="Current Day")
|
||||
)
|
||||
|
||||
# add one value of y
|
||||
fig.add_trace(go.Scatter(x=[96], y=[y.item()], name="Next Day"))
|
||||
@@ -315,4 +393,4 @@ class Trainer:
|
||||
fig.add_trace(go.Scatter(x=[96], y=[real_y.item()], name="Real Next Day"))
|
||||
|
||||
fig.update_layout(title="Predictions of the Linear Model")
|
||||
return fig
|
||||
return fig
|
||||
|
||||
Reference in New Issue
Block a user