Added skip in NRVDataset for incomplete samples
This commit is contained in:
@@ -2,10 +2,17 @@
|
|||||||
## 1. TODOs
|
## 1. TODOs
|
||||||
|
|
||||||
- [x] Compare autoregressive vs non-autoregressive
|
- [x] Compare autoregressive vs non-autoregressive
|
||||||
- [ ] Add more input parameters (load forecast)
|
- [x] Rewrite dataloader for more input parameters (load forecast)
|
||||||
|
- [ ] Explore more input parameters (load forecast)
|
||||||
- [x] Quantile Regression sampling fix
|
- [x] Quantile Regression sampling fix
|
||||||
- [x] Quantile Regression exploration
|
- [x] Quantile Regression exploration
|
||||||
- [ ] Plots with good scaling (y-axis)
|
- [x] Plots with good scaling (y-axis)
|
||||||
|
|
||||||
|
- [x] Some days in load forecast are missing, remove samples from dataset (Implemented a skip in the NRVDataset)
|
||||||
|
- [ ] Quantile Regression nakijken
|
||||||
|
- [ ] Test scores voor 96 values
|
||||||
|
|
||||||
|
- [ ] (Optional) Andere modellen (LSTM?)
|
||||||
|
|
||||||
## 2. Autoregressive vs Non-Autoregressive
|
## 2. Autoregressive vs Non-Autoregressive
|
||||||
|
|
||||||
|
|||||||
@@ -5,29 +5,48 @@ import pandas as pd
|
|||||||
class NrvDataset(Dataset):
|
class NrvDataset(Dataset):
|
||||||
def __init__(self, dataframe, data_config, sequence_length=96, predict_sequence_length=96):
|
def __init__(self, dataframe, data_config, sequence_length=96, predict_sequence_length=96):
|
||||||
self.data_config = data_config
|
self.data_config = data_config
|
||||||
|
self.dataframe = dataframe
|
||||||
|
# reset dataframe index
|
||||||
|
self.dataframe.reset_index(drop=True, inplace=True)
|
||||||
|
|
||||||
self.nrv = torch.tensor(dataframe['nrv'].to_numpy(), dtype=torch.float32)
|
self.nrv = torch.tensor(dataframe['nrv'].to_numpy(), dtype=torch.float32)
|
||||||
self.load_forecast = torch.tensor(dataframe['load_forecast'].to_numpy(), dtype=torch.float32)
|
self.load_forecast = torch.tensor(dataframe['load_forecast'].to_numpy(), dtype=torch.float32)
|
||||||
|
self.total_load = torch.tensor(dataframe['total_load'].to_numpy(), dtype=torch.float32)
|
||||||
|
|
||||||
self.sequence_length = sequence_length
|
self.sequence_length = sequence_length
|
||||||
self.predict_sequence_length = predict_sequence_length
|
self.predict_sequence_length = predict_sequence_length
|
||||||
|
|
||||||
|
self.samples_to_skip = self.skip_samples()
|
||||||
|
total_indices = set(range(len(self.nrv) - self.sequence_length - self.predict_sequence_length))
|
||||||
|
self.valid_indices = sorted(list(total_indices - set(self.samples_to_skip)))
|
||||||
|
|
||||||
|
def skip_samples(self):
|
||||||
|
nan_rows = self.dataframe[self.dataframe.isnull().any(axis=1)]
|
||||||
|
nan_indices = nan_rows.index
|
||||||
|
skip_indices = [list(range(idx-self.sequence_length-self.predict_sequence_length, idx+1)) for idx in nan_indices]
|
||||||
|
|
||||||
|
skip_indices = [item for sublist in skip_indices for item in sublist]
|
||||||
|
skip_indices = list(set(skip_indices))
|
||||||
|
skip_indices.sort()
|
||||||
|
return skip_indices
|
||||||
|
|
||||||
def __len__(self):
|
def __len__(self):
|
||||||
return len(self.nrv) - self.sequence_length - self.predict_sequence_length
|
return len(self.nrv) - self.sequence_length - self.predict_sequence_length - len(self.samples_to_skip)
|
||||||
|
|
||||||
def __getitem__(self, idx):
|
def __getitem__(self, idx):
|
||||||
|
actual_idx = self.valid_indices[idx]
|
||||||
features = []
|
features = []
|
||||||
|
|
||||||
if self.data_config.NRV_HISTORY:
|
if self.data_config.NRV_HISTORY:
|
||||||
nrv = self.nrv[idx:idx+self.sequence_length]
|
nrv = self.nrv[actual_idx:actual_idx+self.sequence_length]
|
||||||
features.append(nrv.view(-1))
|
features.append(nrv.view(-1))
|
||||||
|
|
||||||
if self.data_config.LOAD_HISTORY:
|
if self.data_config.LOAD_HISTORY:
|
||||||
load_history = self.load_forecast[idx:idx+self.sequence_length]
|
load_history = self.total_load[actual_idx:actual_idx+self.sequence_length]
|
||||||
features.append(load_history.view(-1))
|
features.append(load_history.view(-1))
|
||||||
|
|
||||||
if self.data_config.LOAD_FORECAST:
|
if self.data_config.LOAD_FORECAST:
|
||||||
load_forecast = self.load_forecast[idx+self.sequence_length:idx+self.sequence_length+self.predict_sequence_length]
|
load_forecast = self.load_forecast[actual_idx+self.sequence_length:actual_idx+self.sequence_length+self.predict_sequence_length]
|
||||||
features.append(load_forecast.view(-1))
|
features.append(load_forecast.view(-1))
|
||||||
|
|
||||||
if not features:
|
if not features:
|
||||||
@@ -37,17 +56,24 @@ class NrvDataset(Dataset):
|
|||||||
all_features = torch.cat(features, dim=0)
|
all_features = torch.cat(features, dim=0)
|
||||||
|
|
||||||
# Target sequence, flattened if necessary
|
# Target sequence, flattened if necessary
|
||||||
nrv_target = self.nrv[idx+self.sequence_length:idx+self.sequence_length+self.predict_sequence_length].view(-1)
|
nrv_target = self.nrv[actual_idx+self.sequence_length:actual_idx+self.sequence_length+self.predict_sequence_length].view(-1)
|
||||||
|
|
||||||
|
# check if nan values are present
|
||||||
|
if torch.isnan(all_features).any():
|
||||||
|
print(f"Found nan values in the features of sample {idx}.")
|
||||||
|
print(f"Actual index: {actual_idx}")
|
||||||
|
raise ValueError("There are nan values in the features.")
|
||||||
|
|
||||||
|
|
||||||
return all_features, nrv_target
|
return all_features, nrv_target
|
||||||
|
|
||||||
def random_day_autoregressive(self, idx: int):
|
def random_day_autoregressive(self, idx: int):
|
||||||
|
idx = self.valid_indices[idx]
|
||||||
features = []
|
features = []
|
||||||
|
|
||||||
# we already have the NRV history with the newly predicted values, so we don't need to include the last 96 values
|
# we already have the NRV history with the newly predicted values, so we don't need to include the last 96 values
|
||||||
|
|
||||||
if self.data_config.LOAD_HISTORY:
|
if self.data_config.LOAD_HISTORY:
|
||||||
load_history = self.load_forecast[idx:idx+self.sequence_length]
|
load_history = self.total_load[idx:idx+self.sequence_length]
|
||||||
features.append(load_history.view(-1))
|
features.append(load_history.view(-1))
|
||||||
|
|
||||||
if self.data_config.LOAD_FORECAST:
|
if self.data_config.LOAD_FORECAST:
|
||||||
|
|||||||
@@ -62,16 +62,18 @@ class DataProcessor:
|
|||||||
|
|
||||||
def get_load_forecast(self):
|
def get_load_forecast(self):
|
||||||
df = pd.read_csv(forecast_data_path, delimiter=';')
|
df = pd.read_csv(forecast_data_path, delimiter=';')
|
||||||
df = df.rename(columns={'Day-ahead 6PM forecast': 'load_forecast', 'Datetime': 'datetime'})
|
df = df.rename(columns={'Day-ahead 6PM forecast': 'load_forecast', 'Datetime': 'datetime', 'Total Load': 'total_load'})
|
||||||
df = df[['datetime', 'load_forecast']]
|
df = df[['datetime', 'load_forecast', 'total_load']]
|
||||||
|
|
||||||
df['datetime'] = pd.to_datetime(df['datetime'], utc=True)
|
df['datetime'] = pd.to_datetime(df['datetime'], utc=True)
|
||||||
|
|
||||||
# check if there are nan values
|
# check if there are nan values
|
||||||
# if df.isnull().values.any():
|
if df.isnull().values.any():
|
||||||
# # print the rows with nan values
|
# print the rows with nan values
|
||||||
# print(df[df.isnull().any(axis=1)])
|
# print(df[df.isnull().any(axis=1)])
|
||||||
# raise ValueError("There are nan values in the load forecast data.")
|
# export to temp csv
|
||||||
|
df[df.isnull().any(axis=1)].to_csv("temp.csv")
|
||||||
|
# raise ValueError("There are nan values in the load forecast data.")
|
||||||
|
|
||||||
df.sort_values(by="datetime", inplace=True)
|
df.sort_values(by="datetime", inplace=True)
|
||||||
return df
|
return df
|
||||||
@@ -94,6 +96,7 @@ class DataProcessor:
|
|||||||
if transform:
|
if transform:
|
||||||
train_df['nrv'] = self.nrv_scaler.fit_transform(train_df['nrv'].values.reshape(-1, 1)).reshape(-1)
|
train_df['nrv'] = self.nrv_scaler.fit_transform(train_df['nrv'].values.reshape(-1, 1)).reshape(-1)
|
||||||
train_df['load_forecast'] = self.load_forecast_scaler.fit_transform(train_df['load_forecast'].values.reshape(-1, 1)).reshape(-1)
|
train_df['load_forecast'] = self.load_forecast_scaler.fit_transform(train_df['load_forecast'].values.reshape(-1, 1)).reshape(-1)
|
||||||
|
train_df['total_load'] = self.load_forecast_scaler.transform(train_df['total_load'].values.reshape(-1, 1)).reshape(-1)
|
||||||
|
|
||||||
train_dataset = NrvDataset(train_df, data_config=self.data_config, predict_sequence_length=predict_sequence_length)
|
train_dataset = NrvDataset(train_df, data_config=self.data_config, predict_sequence_length=predict_sequence_length)
|
||||||
return self.get_dataloader(train_dataset)
|
return self.get_dataloader(train_dataset)
|
||||||
@@ -111,6 +114,7 @@ class DataProcessor:
|
|||||||
if transform:
|
if transform:
|
||||||
test_df['nrv'] = self.nrv_scaler.transform(test_df['nrv'].values.reshape(-1, 1)).reshape(-1)
|
test_df['nrv'] = self.nrv_scaler.transform(test_df['nrv'].values.reshape(-1, 1)).reshape(-1)
|
||||||
test_df['load_forecast'] = self.load_forecast_scaler.transform(test_df['load_forecast'].values.reshape(-1, 1)).reshape(-1)
|
test_df['load_forecast'] = self.load_forecast_scaler.transform(test_df['load_forecast'].values.reshape(-1, 1)).reshape(-1)
|
||||||
|
test_df['total_load'] = self.load_forecast_scaler.transform(test_df['total_load'].values.reshape(-1, 1)).reshape(-1)
|
||||||
|
|
||||||
test_dataset = NrvDataset(test_df, data_config=self.data_config, predict_sequence_length=predict_sequence_length)
|
test_dataset = NrvDataset(test_df, data_config=self.data_config, predict_sequence_length=predict_sequence_length)
|
||||||
return self.get_dataloader(test_dataset, shuffle=False)
|
return self.get_dataloader(test_dataset, shuffle=False)
|
||||||
@@ -120,4 +124,9 @@ class DataProcessor:
|
|||||||
return self.get_train_dataloader(transform=transform, predict_sequence_length=predict_sequence_length), self.get_test_dataloader(transform=transform, predict_sequence_length=predict_sequence_length)
|
return self.get_train_dataloader(transform=transform, predict_sequence_length=predict_sequence_length), self.get_test_dataloader(transform=transform, predict_sequence_length=predict_sequence_length)
|
||||||
|
|
||||||
def inverse_transform(self, tensor: torch.Tensor):
|
def inverse_transform(self, tensor: torch.Tensor):
|
||||||
return self.nrv_scaler.inverse_transform(tensor.cpu().numpy()).reshape(-1)
|
return self.nrv_scaler.inverse_transform(tensor.cpu().numpy()).reshape(-1)
|
||||||
|
|
||||||
|
def get_input_size(self):
|
||||||
|
data_loader = self.get_train_dataloader()
|
||||||
|
input, _ = next(iter(data_loader))
|
||||||
|
return input.shape[-1]
|
||||||
|
|||||||
Reference in New Issue
Block a user