Added skip in NRVDataset for incomplete samples

This commit is contained in:
Victor Mylle
2023-11-13 15:28:51 +00:00
parent 2f48363292
commit caeba6b841
3 changed files with 59 additions and 17 deletions

View File

@@ -62,16 +62,18 @@ class DataProcessor:
def get_load_forecast(self):
df = pd.read_csv(forecast_data_path, delimiter=';')
df = df.rename(columns={'Day-ahead 6PM forecast': 'load_forecast', 'Datetime': 'datetime'})
df = df[['datetime', 'load_forecast']]
df = df.rename(columns={'Day-ahead 6PM forecast': 'load_forecast', 'Datetime': 'datetime', 'Total Load': 'total_load'})
df = df[['datetime', 'load_forecast', 'total_load']]
df['datetime'] = pd.to_datetime(df['datetime'], utc=True)
# check if there are nan values
# if df.isnull().values.any():
# # print the rows with nan values
# print(df[df.isnull().any(axis=1)])
# raise ValueError("There are nan values in the load forecast data.")
if df.isnull().values.any():
# print the rows with nan values
# print(df[df.isnull().any(axis=1)])
# export to temp csv
df[df.isnull().any(axis=1)].to_csv("temp.csv")
# raise ValueError("There are nan values in the load forecast data.")
df.sort_values(by="datetime", inplace=True)
return df
@@ -94,6 +96,7 @@ class DataProcessor:
if transform:
train_df['nrv'] = self.nrv_scaler.fit_transform(train_df['nrv'].values.reshape(-1, 1)).reshape(-1)
train_df['load_forecast'] = self.load_forecast_scaler.fit_transform(train_df['load_forecast'].values.reshape(-1, 1)).reshape(-1)
train_df['total_load'] = self.load_forecast_scaler.transform(train_df['total_load'].values.reshape(-1, 1)).reshape(-1)
train_dataset = NrvDataset(train_df, data_config=self.data_config, predict_sequence_length=predict_sequence_length)
return self.get_dataloader(train_dataset)
@@ -111,6 +114,7 @@ class DataProcessor:
if transform:
test_df['nrv'] = self.nrv_scaler.transform(test_df['nrv'].values.reshape(-1, 1)).reshape(-1)
test_df['load_forecast'] = self.load_forecast_scaler.transform(test_df['load_forecast'].values.reshape(-1, 1)).reshape(-1)
test_df['total_load'] = self.load_forecast_scaler.transform(test_df['total_load'].values.reshape(-1, 1)).reshape(-1)
test_dataset = NrvDataset(test_df, data_config=self.data_config, predict_sequence_length=predict_sequence_length)
return self.get_dataloader(test_dataset, shuffle=False)
@@ -120,4 +124,9 @@ class DataProcessor:
return self.get_train_dataloader(transform=transform, predict_sequence_length=predict_sequence_length), self.get_test_dataloader(transform=transform, predict_sequence_length=predict_sequence_length)
def inverse_transform(self, tensor: torch.Tensor):
return self.nrv_scaler.inverse_transform(tensor.cpu().numpy()).reshape(-1)
return self.nrv_scaler.inverse_transform(tensor.cpu().numpy()).reshape(-1)
def get_input_size(self):
data_loader = self.get_train_dataloader()
input, _ = next(iter(data_loader))
return input.shape[-1]