From 75f1f64c382ac62c2a89bb99d9853635ff893c07 Mon Sep 17 00:00:00 2001 From: Victor Mylle Date: Mon, 13 Nov 2023 23:14:07 +0000 Subject: [PATCH] Autoregressive test score calculated on 96 values --- src/data/dataset.py | 3 +- src/data/preprocessing.py | 9 ------ src/trainers/autoregressive_trainer.py | 44 +++++++++++++++++++++++++- src/trainers/trainer.py | 2 +- 4 files changed, 45 insertions(+), 13 deletions(-) diff --git a/src/data/dataset.py b/src/data/dataset.py index 3beb1fb..d923742 100644 --- a/src/data/dataset.py +++ b/src/data/dataset.py @@ -31,7 +31,7 @@ class NrvDataset(Dataset): return skip_indices def __len__(self): - return len(self.nrv) - self.sequence_length - self.predict_sequence_length - len(self.samples_to_skip) + return len(self.valid_indices) def __getitem__(self, idx): actual_idx = self.valid_indices[idx] @@ -63,7 +63,6 @@ class NrvDataset(Dataset): print(f"Found nan values in the features of sample {idx}.") print(f"Actual index: {actual_idx}") raise ValueError("There are nan values in the features.") - return all_features, nrv_target diff --git a/src/data/preprocessing.py b/src/data/preprocessing.py index 8ec7bcd..3a8b1c2 100644 --- a/src/data/preprocessing.py +++ b/src/data/preprocessing.py @@ -66,15 +66,6 @@ class DataProcessor: df = df[['datetime', 'load_forecast', 'total_load']] df['datetime'] = pd.to_datetime(df['datetime'], utc=True) - - # check if there are nan values - if df.isnull().values.any(): - # print the rows with nan values - # print(df[df.isnull().any(axis=1)]) - # export to temp csv - df[df.isnull().any(axis=1)].to_csv("temp.csv") - # raise ValueError("There are nan values in the load forecast data.") - df.sort_values(by="datetime", inplace=True) return df diff --git a/src/trainers/autoregressive_trainer.py b/src/trainers/autoregressive_trainer.py index ecbd7d0..9fb0b36 100644 --- a/src/trainers/autoregressive_trainer.py +++ b/src/trainers/autoregressive_trainer.py @@ -8,6 +8,7 @@ import numpy as np import plotly.subplots as sp from plotly.subplots import make_subplots from trainers.trainer import Trainer +from tqdm import tqdm class AutoRegressiveTrainer(Trainer): def debug_plots(self, task, train: bool, data_loader, sample_indices, epoch): @@ -77,4 +78,45 @@ class AutoRegressiveTrainer(Trainer): prediction = self.model(new_features.unsqueeze(0).to(self.device)) predictions_full.append(prediction.squeeze(-1)) - return initial_sequence.cpu(), torch.stack(predictions_full).cpu(), torch.stack(target_full).cpu() \ No newline at end of file + return initial_sequence.cpu(), torch.stack(predictions_full).cpu(), torch.stack(target_full).cpu() + + def log_final_metrics(self, task, dataloader, train: bool = True): + metrics = { metric.__class__.__name__: 0.0 for metric in self.metrics_to_track } + transformed_metrics = { metric.__class__.__name__: 0.0 for metric in self.metrics_to_track } + + with torch.no_grad(): + # iterate idx over dataset + total_amount_samples = len(dataloader.dataset) - 95 + + for idx in tqdm(range(total_amount_samples)): + _, outputs, targets = self.auto_regressive(dataloader, idx) + + inversed_outputs = torch.tensor(self.data_processor.inverse_transform(outputs)) + inversed_inputs = torch.tensor(self.data_processor.inverse_transform(targets)) + + outputs = outputs.to(self.device) + targets = targets.to(self.device) + + for metric in self.metrics_to_track: + transformed_metrics[metric.__class__.__name__] += metric(outputs, targets) + metrics[metric.__class__.__name__] += metric(inversed_outputs, inversed_inputs) + + for metric in self.metrics_to_track: + metrics[metric.__class__.__name__] /= total_amount_samples + transformed_metrics[metric.__class__.__name__] /= total_amount_samples + + for metric_name, metric_value in metrics.items(): + if train: + metric_name = f'train_{metric_name}' + else: + metric_name = f'test_{metric_name}' + + task.get_logger().report_single_value(name=metric_name, value=metric_value) + + for metric_name, metric_value in transformed_metrics.items(): + if train: + metric_name = f'train_transformed_{metric_name}' + else: + metric_name = f'test_transformed_{metric_name}' + + task.get_logger().report_single_value(name=metric_name, value=metric_value) \ No newline at end of file diff --git a/src/trainers/trainer.py b/src/trainers/trainer.py index 4e8a604..2c9ac63 100644 --- a/src/trainers/trainer.py +++ b/src/trainers/trainer.py @@ -180,7 +180,7 @@ class Trainer: transformed_train_loader, transformed_test_loader = self.data_processor.get_dataloaders(predict_sequence_length=self.model.output_size) - self.log_final_metrics(task, transformed_train_loader, train=True) + # self.log_final_metrics(task, transformed_train_loader, train=True) self.log_final_metrics(task, transformed_test_loader, train=False)