diff --git a/Result-Reports/November_1.md b/Result-Reports/November_1.md index e73c792..03c5faf 100644 --- a/Result-Reports/November_1.md +++ b/Result-Reports/November_1.md @@ -2,10 +2,17 @@ ## 1. TODOs - [x] Compare autoregressive vs non-autoregressive -- [ ] Add more input parameters (load forecast) +- [x] Rewrite dataloader for more input parameters (load forecast) +- [ ] Explore more input parameters (load forecast) - [x] Quantile Regression sampling fix - [x] Quantile Regression exploration -- [ ] Plots with good scaling (y-axis) +- [x] Plots with good scaling (y-axis) + +- [x] Some days in load forecast are missing, remove samples from dataset (Implemented a skip in the NRVDataset) +- [ ] Quantile Regression nakijken +- [ ] Test scores voor 96 values + +- [ ] (Optional) Andere modellen (LSTM?) ## 2. Autoregressive vs Non-Autoregressive diff --git a/src/data/dataset.py b/src/data/dataset.py index caeac47..3beb1fb 100644 --- a/src/data/dataset.py +++ b/src/data/dataset.py @@ -5,29 +5,48 @@ import pandas as pd class NrvDataset(Dataset): def __init__(self, dataframe, data_config, sequence_length=96, predict_sequence_length=96): self.data_config = data_config - + self.dataframe = dataframe + # reset dataframe index + self.dataframe.reset_index(drop=True, inplace=True) + self.nrv = torch.tensor(dataframe['nrv'].to_numpy(), dtype=torch.float32) self.load_forecast = torch.tensor(dataframe['load_forecast'].to_numpy(), dtype=torch.float32) + self.total_load = torch.tensor(dataframe['total_load'].to_numpy(), dtype=torch.float32) self.sequence_length = sequence_length self.predict_sequence_length = predict_sequence_length + self.samples_to_skip = self.skip_samples() + total_indices = set(range(len(self.nrv) - self.sequence_length - self.predict_sequence_length)) + self.valid_indices = sorted(list(total_indices - set(self.samples_to_skip))) + + def skip_samples(self): + nan_rows = self.dataframe[self.dataframe.isnull().any(axis=1)] + nan_indices = nan_rows.index + skip_indices = [list(range(idx-self.sequence_length-self.predict_sequence_length, idx+1)) for idx in nan_indices] + + skip_indices = [item for sublist in skip_indices for item in sublist] + skip_indices = list(set(skip_indices)) + skip_indices.sort() + return skip_indices + def __len__(self): - return len(self.nrv) - self.sequence_length - self.predict_sequence_length + return len(self.nrv) - self.sequence_length - self.predict_sequence_length - len(self.samples_to_skip) def __getitem__(self, idx): + actual_idx = self.valid_indices[idx] features = [] if self.data_config.NRV_HISTORY: - nrv = self.nrv[idx:idx+self.sequence_length] + nrv = self.nrv[actual_idx:actual_idx+self.sequence_length] features.append(nrv.view(-1)) if self.data_config.LOAD_HISTORY: - load_history = self.load_forecast[idx:idx+self.sequence_length] + load_history = self.total_load[actual_idx:actual_idx+self.sequence_length] features.append(load_history.view(-1)) if self.data_config.LOAD_FORECAST: - load_forecast = self.load_forecast[idx+self.sequence_length:idx+self.sequence_length+self.predict_sequence_length] + load_forecast = self.load_forecast[actual_idx+self.sequence_length:actual_idx+self.sequence_length+self.predict_sequence_length] features.append(load_forecast.view(-1)) if not features: @@ -37,17 +56,24 @@ class NrvDataset(Dataset): all_features = torch.cat(features, dim=0) # Target sequence, flattened if necessary - nrv_target = self.nrv[idx+self.sequence_length:idx+self.sequence_length+self.predict_sequence_length].view(-1) + nrv_target = self.nrv[actual_idx+self.sequence_length:actual_idx+self.sequence_length+self.predict_sequence_length].view(-1) + + # check if nan values are present + if torch.isnan(all_features).any(): + print(f"Found nan values in the features of sample {idx}.") + print(f"Actual index: {actual_idx}") + raise ValueError("There are nan values in the features.") + return all_features, nrv_target def random_day_autoregressive(self, idx: int): + idx = self.valid_indices[idx] features = [] # we already have the NRV history with the newly predicted values, so we don't need to include the last 96 values - if self.data_config.LOAD_HISTORY: - load_history = self.load_forecast[idx:idx+self.sequence_length] + load_history = self.total_load[idx:idx+self.sequence_length] features.append(load_history.view(-1)) if self.data_config.LOAD_FORECAST: diff --git a/src/data/preprocessing.py b/src/data/preprocessing.py index c24ea4a..8ec7bcd 100644 --- a/src/data/preprocessing.py +++ b/src/data/preprocessing.py @@ -62,16 +62,18 @@ class DataProcessor: def get_load_forecast(self): df = pd.read_csv(forecast_data_path, delimiter=';') - df = df.rename(columns={'Day-ahead 6PM forecast': 'load_forecast', 'Datetime': 'datetime'}) - df = df[['datetime', 'load_forecast']] + df = df.rename(columns={'Day-ahead 6PM forecast': 'load_forecast', 'Datetime': 'datetime', 'Total Load': 'total_load'}) + df = df[['datetime', 'load_forecast', 'total_load']] df['datetime'] = pd.to_datetime(df['datetime'], utc=True) # check if there are nan values - # if df.isnull().values.any(): - # # print the rows with nan values - # print(df[df.isnull().any(axis=1)]) - # raise ValueError("There are nan values in the load forecast data.") + if df.isnull().values.any(): + # print the rows with nan values + # print(df[df.isnull().any(axis=1)]) + # export to temp csv + df[df.isnull().any(axis=1)].to_csv("temp.csv") + # raise ValueError("There are nan values in the load forecast data.") df.sort_values(by="datetime", inplace=True) return df @@ -94,6 +96,7 @@ class DataProcessor: if transform: train_df['nrv'] = self.nrv_scaler.fit_transform(train_df['nrv'].values.reshape(-1, 1)).reshape(-1) train_df['load_forecast'] = self.load_forecast_scaler.fit_transform(train_df['load_forecast'].values.reshape(-1, 1)).reshape(-1) + train_df['total_load'] = self.load_forecast_scaler.transform(train_df['total_load'].values.reshape(-1, 1)).reshape(-1) train_dataset = NrvDataset(train_df, data_config=self.data_config, predict_sequence_length=predict_sequence_length) return self.get_dataloader(train_dataset) @@ -111,6 +114,7 @@ class DataProcessor: if transform: test_df['nrv'] = self.nrv_scaler.transform(test_df['nrv'].values.reshape(-1, 1)).reshape(-1) test_df['load_forecast'] = self.load_forecast_scaler.transform(test_df['load_forecast'].values.reshape(-1, 1)).reshape(-1) + test_df['total_load'] = self.load_forecast_scaler.transform(test_df['total_load'].values.reshape(-1, 1)).reshape(-1) test_dataset = NrvDataset(test_df, data_config=self.data_config, predict_sequence_length=predict_sequence_length) return self.get_dataloader(test_dataset, shuffle=False) @@ -120,4 +124,9 @@ class DataProcessor: return self.get_train_dataloader(transform=transform, predict_sequence_length=predict_sequence_length), self.get_test_dataloader(transform=transform, predict_sequence_length=predict_sequence_length) def inverse_transform(self, tensor: torch.Tensor): - return self.nrv_scaler.inverse_transform(tensor.cpu().numpy()).reshape(-1) \ No newline at end of file + return self.nrv_scaler.inverse_transform(tensor.cpu().numpy()).reshape(-1) + + def get_input_size(self): + data_loader = self.get_train_dataloader() + input, _ = next(iter(data_loader)) + return input.shape[-1]