Added new training scripts
This commit is contained in:
@@ -18,6 +18,7 @@ class CRPSLoss(nn.Module):
|
||||
# target = target.unsqueeze(-1)
|
||||
|
||||
mask = (preds > target).float()
|
||||
self.quantiles_tensor = self.quantiles_tensor.to(preds.device)
|
||||
test = self.quantiles_tensor - mask
|
||||
# square them
|
||||
test = test * test
|
||||
|
||||
@@ -9,8 +9,9 @@ class PinballLoss(nn.Module):
|
||||
|
||||
def forward(self, pred, target):
|
||||
error = target - pred
|
||||
upper = self.quantiles_tensor * error
|
||||
lower = (self.quantiles_tensor - 1) * error
|
||||
quantiles = self.quantiles_tensor.to(error.device)
|
||||
upper = quantiles * error
|
||||
lower = (quantiles - 1) * error
|
||||
losses = torch.max(lower, upper)
|
||||
loss = torch.mean(torch.mean(losses, dim=0))
|
||||
return loss
|
||||
@@ -26,8 +27,10 @@ class NonAutoRegressivePinballLoss(nn.Module):
|
||||
pred = pred.reshape(-1, 96, len(self.quantiles_tensor))
|
||||
target_expanded = target.unsqueeze(2)
|
||||
error = target_expanded - pred
|
||||
upper = self.quantiles_tensor * error
|
||||
lower = (self.quantiles_tensor - 1) * error
|
||||
quantiles = self.quantiles_tensor.to(error.device)
|
||||
|
||||
upper = quantiles * error
|
||||
lower = (quantiles - 1) * error
|
||||
losses = torch.max(lower, upper)
|
||||
loss = torch.mean(losses)
|
||||
return loss
|
||||
|
||||
@@ -19,7 +19,6 @@ class AutoRegressiveTrainer(Trainer):
|
||||
criterion: torch.nn.Module,
|
||||
data_processor: DataProcessor,
|
||||
device: torch.device,
|
||||
clearml_helper: ClearMLHelper = None,
|
||||
debug: bool = True,
|
||||
):
|
||||
super().__init__(
|
||||
@@ -28,7 +27,6 @@ class AutoRegressiveTrainer(Trainer):
|
||||
criterion=criterion,
|
||||
data_processor=data_processor,
|
||||
device=device,
|
||||
clearml_helper=clearml_helper,
|
||||
debug=debug,
|
||||
)
|
||||
self.model.output_size = 1
|
||||
|
||||
@@ -10,12 +10,16 @@ import matplotlib.pyplot as plt
|
||||
|
||||
|
||||
def sample_from_dist(quantiles, output_values):
|
||||
# both to numpy
|
||||
quantiles = quantiles.cpu().numpy()
|
||||
# check if tensor:
|
||||
if isinstance(quantiles, torch.Tensor):
|
||||
quantiles = quantiles.cpu().numpy()
|
||||
|
||||
if isinstance(output_values, torch.Tensor):
|
||||
output_values = output_values.cpu().numpy()
|
||||
|
||||
if isinstance(quantiles, list):
|
||||
quantiles = np.array(quantiles)
|
||||
|
||||
reshaped_values = output_values.reshape(-1, len(quantiles))
|
||||
|
||||
uniform_random_numbers = np.random.uniform(0, 1, (reshaped_values.shape[0], 1000))
|
||||
@@ -60,22 +64,18 @@ class AutoRegressiveQuantileTrainer(AutoRegressiveTrainer):
|
||||
data_processor: DataProcessor,
|
||||
quantiles: list,
|
||||
device: torch.device,
|
||||
clearml_helper: ClearMLHelper = None,
|
||||
debug: bool = True,
|
||||
):
|
||||
|
||||
self.quantiles = quantiles
|
||||
quantiles_tensor = torch.tensor(quantiles)
|
||||
quantiles_tensor = quantiles_tensor.to(device)
|
||||
|
||||
criterion = PinballLoss(quantiles=quantiles_tensor)
|
||||
criterion = PinballLoss(quantiles=quantiles)
|
||||
super().__init__(
|
||||
model=model,
|
||||
optimizer=optimizer,
|
||||
criterion=criterion,
|
||||
data_processor=data_processor,
|
||||
device=device,
|
||||
clearml_helper=clearml_helper,
|
||||
debug=debug,
|
||||
)
|
||||
|
||||
@@ -252,7 +252,7 @@ class AutoRegressiveQuantileTrainer(AutoRegressiveTrainer):
|
||||
def plot_quantile_percentages(
|
||||
self, task, data_loader, train: bool = True, iteration: int = None
|
||||
):
|
||||
quantiles = self.quantiles.cpu().numpy()
|
||||
quantiles = self.quantiles
|
||||
total = 0
|
||||
quantile_counter = {q: 0 for q in quantiles}
|
||||
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
from clearml import Task
|
||||
import torch
|
||||
from src.data.preprocessing import DataProcessor
|
||||
from src.utils.clearml import ClearMLHelper
|
||||
@@ -15,14 +16,12 @@ class Trainer:
|
||||
criterion: torch.nn.Module,
|
||||
data_processor: DataProcessor,
|
||||
device: torch.device,
|
||||
clearml_helper: ClearMLHelper = None,
|
||||
debug: bool = True,
|
||||
):
|
||||
self.model = model
|
||||
self.optimizer = optimizer
|
||||
self.criterion = criterion
|
||||
self.device = device
|
||||
self.clearml_helper = clearml_helper
|
||||
self.debug = debug
|
||||
|
||||
self.metrics_to_track = []
|
||||
@@ -48,12 +47,9 @@ class Trainer:
|
||||
else:
|
||||
self.metrics_to_track.append(loss)
|
||||
|
||||
def init_clearml_task(self):
|
||||
if not self.clearml_helper:
|
||||
return None
|
||||
|
||||
|
||||
task = self.clearml_helper.get_task(task_name="None")
|
||||
def init_clearml_task(self, task):
|
||||
if task is None:
|
||||
return
|
||||
|
||||
# check if running remotely
|
||||
|
||||
@@ -77,15 +73,14 @@ class Trainer:
|
||||
self.optimizer.name = self.optimizer.__class__.__name__
|
||||
self.criterion.name = self.criterion.__class__.__name__
|
||||
|
||||
task.connect(self.optimizer, name="optimizer")
|
||||
task.connect(self.criterion, name="criterion")
|
||||
task.connect(self.data_processor, name="data_processor")
|
||||
task.connect(self, name="trainer")
|
||||
self.optimizer = task.connect(self.optimizer, name="optimizer")
|
||||
self.criterion = task.connect(self.criterion, name="criterion")
|
||||
self.data_processor = task.connect(self.data_processor, name="data_processor")
|
||||
self = task.connect(self, name="trainer")
|
||||
|
||||
task.delete_parameter("trainer/quantiles")
|
||||
task.connect(self.data_processor.data_config, name="data_features")
|
||||
|
||||
return task
|
||||
|
||||
def random_samples(self, train: bool = True, num_samples: int = 10):
|
||||
train_loader, test_loader = self.data_processor.get_dataloaders(
|
||||
predict_sequence_length=self.model.output_size
|
||||
@@ -99,7 +94,7 @@ class Trainer:
|
||||
indices = np.random.randint(0, len(loader.dataset) - 1, size=num_samples)
|
||||
return indices
|
||||
|
||||
def train(self, epochs: int, remotely: bool = False):
|
||||
def train(self, epochs: int, remotely: bool = False, task: Task = None):
|
||||
try:
|
||||
train_loader, test_loader = self.data_processor.get_dataloaders(
|
||||
predict_sequence_length=self.model.output_size
|
||||
@@ -108,7 +103,7 @@ class Trainer:
|
||||
train_samples = self.random_samples(train=True)
|
||||
test_samples = self.random_samples(train=False)
|
||||
|
||||
task = self.init_clearml_task()
|
||||
self.init_clearml_task(task)
|
||||
|
||||
if remotely:
|
||||
task.execute_remotely(queue_name="default", exit_process=True)
|
||||
|
||||
@@ -16,6 +16,8 @@ from src.models.time_embedding_layer import TimeEmbedding
|
||||
|
||||
#### ClearML ####
|
||||
clearml_helper = ClearMLHelper(project_name="Thesis/NrvForecast")
|
||||
task = clearml_helper.get_task(task_name="None")
|
||||
|
||||
|
||||
#### Data Processor ####
|
||||
data_config = DataConfig()
|
||||
@@ -40,10 +42,12 @@ inputDim = data_processor.get_input_size()
|
||||
learningRate = 0.0001
|
||||
epochs = 100
|
||||
|
||||
# quantiles = torch.tensor([0.01, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.95, 0.99]).to("cuda")
|
||||
quantiles = torch.tensor(
|
||||
[0.01, 0.05, 0.1, 0.15, 0.3, 0.4, 0.5, 0.6, 0.7, 0.85, 0.9, 0.95, 0.99]
|
||||
).to("cuda")
|
||||
# add parameters to clearml
|
||||
quantiles = task.get_parameter("general/quantiles", cast=True)
|
||||
if quantiles is None:
|
||||
quantiles = [0.01, 0.05, 0.1, 0.15, 0.3, 0.4, 0.5, 0.6, 0.7, 0.85, 0.9, 0.95, 0.99]
|
||||
task.set_parameter("general/quantiles", quantiles)
|
||||
|
||||
|
||||
# model = LinearRegression(inputDim, len(quantiles))
|
||||
time_embedding = TimeEmbedding(data_processor.get_time_feature_size(), 4)
|
||||
@@ -59,11 +63,10 @@ trainer = AutoRegressiveQuantileTrainer(
|
||||
quantiles,
|
||||
"cuda",
|
||||
debug=True,
|
||||
clearml_helper=clearml_helper,
|
||||
)
|
||||
trainer.add_metrics_to_track(
|
||||
[PinballLoss(quantiles), MSELoss(), L1Loss(), CRPSLoss(quantiles)]
|
||||
)
|
||||
trainer.early_stopping(patience=10)
|
||||
trainer.plot_every(5)
|
||||
trainer.train(epochs=epochs, remotely=True)
|
||||
trainer.train(task=task, epochs=epochs, remotely=True)
|
||||
@@ -11,4 +11,5 @@ class ClearMLHelper:
|
||||
Task.ignore_requirements("tensorboard")
|
||||
task = Task.init(project_name=self.project_name, task_name=task_name, continue_last_task=False)
|
||||
task.set_base_docker(f"docker.io/clearml/pytorch-cuda-gcc:2.0.0-cuda11.7-cudnn8-runtime --env GIT_SSL_NO_VERIFY=true --env CLEARML_AGENT_GIT_USER=VictorMylle --env CLEARML_AGENT_GIT_PASS=Voetballer1" )
|
||||
task.set_packages("requirements.txt")
|
||||
return task
|
||||
Reference in New Issue
Block a user