diff --git a/src/losses/crps_metric.py b/src/losses/crps_metric.py index c010a78..af55618 100644 --- a/src/losses/crps_metric.py +++ b/src/losses/crps_metric.py @@ -18,6 +18,7 @@ class CRPSLoss(nn.Module): # target = target.unsqueeze(-1) mask = (preds > target).float() + self.quantiles_tensor = self.quantiles_tensor.to(preds.device) test = self.quantiles_tensor - mask # square them test = test * test diff --git a/src/losses/pinball_loss.py b/src/losses/pinball_loss.py index 6927083..1c49841 100644 --- a/src/losses/pinball_loss.py +++ b/src/losses/pinball_loss.py @@ -9,8 +9,9 @@ class PinballLoss(nn.Module): def forward(self, pred, target): error = target - pred - upper = self.quantiles_tensor * error - lower = (self.quantiles_tensor - 1) * error + quantiles = self.quantiles_tensor.to(error.device) + upper = quantiles * error + lower = (quantiles - 1) * error losses = torch.max(lower, upper) loss = torch.mean(torch.mean(losses, dim=0)) return loss @@ -26,8 +27,10 @@ class NonAutoRegressivePinballLoss(nn.Module): pred = pred.reshape(-1, 96, len(self.quantiles_tensor)) target_expanded = target.unsqueeze(2) error = target_expanded - pred - upper = self.quantiles_tensor * error - lower = (self.quantiles_tensor - 1) * error + quantiles = self.quantiles_tensor.to(error.device) + + upper = quantiles * error + lower = (quantiles - 1) * error losses = torch.max(lower, upper) loss = torch.mean(losses) return loss diff --git a/src/trainers/autoregressive_trainer.py b/src/trainers/autoregressive_trainer.py index 3ddb3ff..d170693 100644 --- a/src/trainers/autoregressive_trainer.py +++ b/src/trainers/autoregressive_trainer.py @@ -19,7 +19,6 @@ class AutoRegressiveTrainer(Trainer): criterion: torch.nn.Module, data_processor: DataProcessor, device: torch.device, - clearml_helper: ClearMLHelper = None, debug: bool = True, ): super().__init__( @@ -28,7 +27,6 @@ class AutoRegressiveTrainer(Trainer): criterion=criterion, data_processor=data_processor, device=device, - clearml_helper=clearml_helper, debug=debug, ) self.model.output_size = 1 diff --git a/src/trainers/quantile_trainer.py b/src/trainers/quantile_trainer.py index ca6c542..5146e53 100644 --- a/src/trainers/quantile_trainer.py +++ b/src/trainers/quantile_trainer.py @@ -10,12 +10,16 @@ import matplotlib.pyplot as plt def sample_from_dist(quantiles, output_values): - # both to numpy - quantiles = quantiles.cpu().numpy() + # check if tensor: + if isinstance(quantiles, torch.Tensor): + quantiles = quantiles.cpu().numpy() if isinstance(output_values, torch.Tensor): output_values = output_values.cpu().numpy() + if isinstance(quantiles, list): + quantiles = np.array(quantiles) + reshaped_values = output_values.reshape(-1, len(quantiles)) uniform_random_numbers = np.random.uniform(0, 1, (reshaped_values.shape[0], 1000)) @@ -60,22 +64,18 @@ class AutoRegressiveQuantileTrainer(AutoRegressiveTrainer): data_processor: DataProcessor, quantiles: list, device: torch.device, - clearml_helper: ClearMLHelper = None, debug: bool = True, ): self.quantiles = quantiles - quantiles_tensor = torch.tensor(quantiles) - quantiles_tensor = quantiles_tensor.to(device) - criterion = PinballLoss(quantiles=quantiles_tensor) + criterion = PinballLoss(quantiles=quantiles) super().__init__( model=model, optimizer=optimizer, criterion=criterion, data_processor=data_processor, device=device, - clearml_helper=clearml_helper, debug=debug, ) @@ -252,7 +252,7 @@ class AutoRegressiveQuantileTrainer(AutoRegressiveTrainer): def plot_quantile_percentages( self, task, data_loader, train: bool = True, iteration: int = None ): - quantiles = self.quantiles.cpu().numpy() + quantiles = self.quantiles total = 0 quantile_counter = {q: 0 for q in quantiles} diff --git a/src/trainers/trainer.py b/src/trainers/trainer.py index cfda178..bc4a895 100644 --- a/src/trainers/trainer.py +++ b/src/trainers/trainer.py @@ -1,3 +1,4 @@ +from clearml import Task import torch from src.data.preprocessing import DataProcessor from src.utils.clearml import ClearMLHelper @@ -15,14 +16,12 @@ class Trainer: criterion: torch.nn.Module, data_processor: DataProcessor, device: torch.device, - clearml_helper: ClearMLHelper = None, debug: bool = True, ): self.model = model self.optimizer = optimizer self.criterion = criterion self.device = device - self.clearml_helper = clearml_helper self.debug = debug self.metrics_to_track = [] @@ -48,12 +47,9 @@ class Trainer: else: self.metrics_to_track.append(loss) - def init_clearml_task(self): - if not self.clearml_helper: - return None - - - task = self.clearml_helper.get_task(task_name="None") + def init_clearml_task(self, task): + if task is None: + return # check if running remotely @@ -77,15 +73,14 @@ class Trainer: self.optimizer.name = self.optimizer.__class__.__name__ self.criterion.name = self.criterion.__class__.__name__ - task.connect(self.optimizer, name="optimizer") - task.connect(self.criterion, name="criterion") - task.connect(self.data_processor, name="data_processor") - task.connect(self, name="trainer") + self.optimizer = task.connect(self.optimizer, name="optimizer") + self.criterion = task.connect(self.criterion, name="criterion") + self.data_processor = task.connect(self.data_processor, name="data_processor") + self = task.connect(self, name="trainer") + task.delete_parameter("trainer/quantiles") task.connect(self.data_processor.data_config, name="data_features") - return task - def random_samples(self, train: bool = True, num_samples: int = 10): train_loader, test_loader = self.data_processor.get_dataloaders( predict_sequence_length=self.model.output_size @@ -99,7 +94,7 @@ class Trainer: indices = np.random.randint(0, len(loader.dataset) - 1, size=num_samples) return indices - def train(self, epochs: int, remotely: bool = False): + def train(self, epochs: int, remotely: bool = False, task: Task = None): try: train_loader, test_loader = self.data_processor.get_dataloaders( predict_sequence_length=self.model.output_size @@ -108,7 +103,7 @@ class Trainer: train_samples = self.random_samples(train=True) test_samples = self.random_samples(train=False) - task = self.init_clearml_task() + self.init_clearml_task(task) if remotely: task.execute_remotely(queue_name="default", exit_process=True) diff --git a/src/notebooks/training.py b/src/training_scripts/autoregressive_quantiles.py similarity index 84% rename from src/notebooks/training.py rename to src/training_scripts/autoregressive_quantiles.py index c0d4d0f..e9e05e2 100644 --- a/src/notebooks/training.py +++ b/src/training_scripts/autoregressive_quantiles.py @@ -16,6 +16,8 @@ from src.models.time_embedding_layer import TimeEmbedding #### ClearML #### clearml_helper = ClearMLHelper(project_name="Thesis/NrvForecast") +task = clearml_helper.get_task(task_name="None") + #### Data Processor #### data_config = DataConfig() @@ -40,10 +42,12 @@ inputDim = data_processor.get_input_size() learningRate = 0.0001 epochs = 100 -# quantiles = torch.tensor([0.01, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.95, 0.99]).to("cuda") -quantiles = torch.tensor( - [0.01, 0.05, 0.1, 0.15, 0.3, 0.4, 0.5, 0.6, 0.7, 0.85, 0.9, 0.95, 0.99] -).to("cuda") +# add parameters to clearml +quantiles = task.get_parameter("general/quantiles", cast=True) +if quantiles is None: + quantiles = [0.01, 0.05, 0.1, 0.15, 0.3, 0.4, 0.5, 0.6, 0.7, 0.85, 0.9, 0.95, 0.99] + task.set_parameter("general/quantiles", quantiles) + # model = LinearRegression(inputDim, len(quantiles)) time_embedding = TimeEmbedding(data_processor.get_time_feature_size(), 4) @@ -59,11 +63,10 @@ trainer = AutoRegressiveQuantileTrainer( quantiles, "cuda", debug=True, - clearml_helper=clearml_helper, ) trainer.add_metrics_to_track( [PinballLoss(quantiles), MSELoss(), L1Loss(), CRPSLoss(quantiles)] ) trainer.early_stopping(patience=10) trainer.plot_every(5) -trainer.train(epochs=epochs, remotely=True) \ No newline at end of file +trainer.train(task=task, epochs=epochs, remotely=True) \ No newline at end of file diff --git a/src/utils/clearml.py b/src/utils/clearml.py index 825c283..0fbf3f8 100644 --- a/src/utils/clearml.py +++ b/src/utils/clearml.py @@ -11,4 +11,5 @@ class ClearMLHelper: Task.ignore_requirements("tensorboard") task = Task.init(project_name=self.project_name, task_name=task_name, continue_last_task=False) task.set_base_docker(f"docker.io/clearml/pytorch-cuda-gcc:2.0.0-cuda11.7-cudnn8-runtime --env GIT_SSL_NO_VERIFY=true --env CLEARML_AGENT_GIT_USER=VictorMylle --env CLEARML_AGENT_GIT_PASS=Voetballer1" ) + task.set_packages("requirements.txt") return task \ No newline at end of file