Rewrote dataset to be able to include new features

2023-11-08 23:17:47 +00:00
parent 56c763a6f4
commit 2f48363292
10 changed files with 311218 additions and 118 deletions
--- a/src/trainers/trainer.py
+++ b/src/trainers/trainer.py
@@ -70,23 +70,22 @@ class Trainer:
        return task
    
    def random_samples(self, train: bool = True, num_samples: int = 10):
-        random_X = []
-        random_Y = []
+        train_loader, test_loader = self.data_processor.get_dataloaders(predict_sequence_length=self.model.output_size)

-        for _ in range(num_samples):
-            X, y = self.data_processor.get_random_day(train=train)
-            random_X.append(X)
-            random_Y.append(y)
+        if train:
+            loader = train_loader
+        else:
+            loader = test_loader
+
+        indices = np.random.randint(0, len(loader.dataset) - 1, size=num_samples)
+        return indices

-        random_X = torch.stack(random_X)
-        random_Y = torch.stack(random_Y)
-        return random_X, random_Y

    def train(self, epochs: int):
        train_loader, test_loader = self.data_processor.get_dataloaders(predict_sequence_length=self.model.output_size)

-        train_random_X, train_random_y = self.random_samples(train=True)
-        test_random_X, test_random_y = self.random_samples(train=False)
+        train_samples = self.random_samples(train=True)
+        test_samples = self.random_samples(train=False)

        task = self.init_clearml_task()

@@ -129,8 +128,8 @@ class Trainer:
                

                if epoch % self.plot_every_n_epochs == 0:
-                    self.debug_plots(task, True, (train_random_X, train_random_y), epoch)
-                    self.debug_plots(task, False, (test_random_X, test_random_y), epoch)
+                    self.debug_plots(task, True, train_loader, train_samples, epoch)
+                    self.debug_plots(task, False, test_loader, test_samples, epoch)

        if task:
            self.finish_training(task=task)
@@ -144,6 +143,7 @@ class Trainer:
        with torch.no_grad():
            for inputs, targets in dataloader:
                inputs, targets = inputs.to(self.device), targets
+
                outputs = self.model(inputs)

                inversed_outputs = torch.tensor(self.data_processor.inverse_transform(outputs))
@@ -215,22 +215,25 @@ class Trainer:
        return fig


-    def debug_plots(self, task, train: bool, samples, epoch):
-        X, y = samples
-        X = X.to(self.device)
-
-        num_samples = len(X)
+    def debug_plots(self, task, train: bool, data_loader, sample_indices, epoch):
+        num_samples = len(sample_indices)
        rows = num_samples  # One row per sample since we only want one column
        cols = 1
        
        fig = make_subplots(rows=rows, cols=cols, subplot_titles=[f'Sample {i+1}' for i in range(num_samples)])

-        for i, (current_day, next_day) in enumerate(zip(X, y)):
+        for i, idx in enumerate(sample_indices):
+
+            features, target = data_loader.dataset[idx]
+
+            features = features.to(self.device)
+            target = target.to(self.device)
+
            self.model.eval()
            with torch.no_grad():
-                predictions = self.model(current_day).cpu()
+                predictions = self.model(features).cpu()

-            sub_fig = self.get_plot(current_day, next_day, predictions, show_legend=(i == 0))
+            sub_fig = self.get_plot(features[:96], target, predictions, show_legend=(i == 0))
            
            row = i + 1
            col = 1
@@ -239,7 +242,7 @@ class Trainer:
                fig.add_trace(trace, row=row, col=col)


-            loss = self.criterion(predictions.to(self.device), next_day.squeeze(-1).to(self.device)).item()
+            loss = self.criterion(predictions.to(self.device), target.squeeze(-1).to(self.device)).item()

            fig['layout']['annotations'][i].update(text=f"{loss.__class__.__name__}: {loss:.6f}")