Implemented Probabilistic Baseline
This commit is contained in:
@@ -3,9 +3,11 @@ from torch.utils.data import Dataset, DataLoader
|
||||
import pandas as pd
|
||||
|
||||
class NrvDataset(Dataset):
|
||||
def __init__(self, dataframe, data_config, sequence_length=96, predict_sequence_length=96):
|
||||
def __init__(self, dataframe, data_config, full_day_skip: bool = False, sequence_length=96, predict_sequence_length=96):
|
||||
self.data_config = data_config
|
||||
self.dataframe = dataframe
|
||||
self.full_day_skip = full_day_skip
|
||||
|
||||
# reset dataframe index
|
||||
self.dataframe.reset_index(drop=True, inplace=True)
|
||||
|
||||
@@ -22,6 +24,9 @@ class NrvDataset(Dataset):
|
||||
total_indices = set(range(len(self.nrv) - self.sequence_length - self.predict_sequence_length))
|
||||
self.valid_indices = sorted(list(total_indices - set(self.samples_to_skip)))
|
||||
|
||||
### TODO: Option to only use full day samples ###
|
||||
### skip all samples between is the easiest way I think (not most efficient though) ###
|
||||
|
||||
def skip_samples(self):
|
||||
nan_rows = self.dataframe[self.dataframe.isnull().any(axis=1)]
|
||||
nan_indices = nan_rows.index
|
||||
@@ -30,6 +35,14 @@ class NrvDataset(Dataset):
|
||||
skip_indices = [item for sublist in skip_indices for item in sublist]
|
||||
skip_indices = list(set(skip_indices))
|
||||
skip_indices.sort()
|
||||
|
||||
# add indices that are not the start of a day (00:15) to the skip indices (use datetime column)
|
||||
# get indices of all 00:15 timestamps
|
||||
if self.full_day_skip:
|
||||
start_of_day_indices = self.dataframe[self.dataframe['datetime'].dt.time == pd.Timestamp('00:15:00').time()].index
|
||||
skip_indices.extend(start_of_day_indices)
|
||||
skip_indices = list(set(skip_indices))
|
||||
|
||||
return skip_indices
|
||||
|
||||
def __len__(self):
|
||||
|
||||
@@ -17,7 +17,7 @@ class DataConfig:
|
||||
self.NRV_HISTORY: bool = True
|
||||
|
||||
### LOAD ###
|
||||
self.LOAD_FORECAST: bool = True
|
||||
self.LOAD_FORECAST: bool = False
|
||||
self.LOAD_HISTORY: bool = False
|
||||
|
||||
### PV ###
|
||||
@@ -51,6 +51,13 @@ class DataProcessor:
|
||||
self.nrv_scaler = MinMaxScaler(feature_range=(-1, 1))
|
||||
self.load_forecast_scaler = MinMaxScaler(feature_range=(-1, 1))
|
||||
|
||||
self.full_day_skip = False
|
||||
|
||||
def set_data_config(self, data_config: DataConfig):
|
||||
self.data_config = data_config
|
||||
|
||||
def set_full_day_skip(self, full_day_skip: bool):
|
||||
self.full_day_skip = full_day_skip
|
||||
|
||||
def set_train_range(self, train_range: tuple):
|
||||
self.train_range = train_range
|
||||
@@ -115,7 +122,8 @@ class DataProcessor:
|
||||
self.batch_size = batch_size
|
||||
|
||||
def get_dataloader(self, dataset, shuffle: bool = True):
|
||||
return torch.utils.data.DataLoader(dataset, batch_size=self.batch_size, shuffle=shuffle, num_workers=4)
|
||||
batch_size = len(dataset) if self.batch_size is None else self.batch_size
|
||||
return torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=shuffle, num_workers=4)
|
||||
|
||||
def get_train_dataloader(self, transform: bool = True, predict_sequence_length: int = 96):
|
||||
train_df = self.all_features.copy()
|
||||
@@ -131,7 +139,7 @@ class DataProcessor:
|
||||
train_df['load_forecast'] = self.load_forecast_scaler.fit_transform(train_df['load_forecast'].values.reshape(-1, 1)).reshape(-1)
|
||||
train_df['total_load'] = self.load_forecast_scaler.transform(train_df['total_load'].values.reshape(-1, 1)).reshape(-1)
|
||||
|
||||
train_dataset = NrvDataset(train_df, data_config=self.data_config, predict_sequence_length=predict_sequence_length)
|
||||
train_dataset = NrvDataset(train_df, data_config=self.data_config, full_day_skip=self.full_day_skip, predict_sequence_length=predict_sequence_length)
|
||||
return self.get_dataloader(train_dataset)
|
||||
|
||||
def get_test_dataloader(self, transform: bool = True, predict_sequence_length: int = 96):
|
||||
@@ -149,7 +157,7 @@ class DataProcessor:
|
||||
test_df['load_forecast'] = self.load_forecast_scaler.transform(test_df['load_forecast'].values.reshape(-1, 1)).reshape(-1)
|
||||
test_df['total_load'] = self.load_forecast_scaler.transform(test_df['total_load'].values.reshape(-1, 1)).reshape(-1)
|
||||
|
||||
test_dataset = NrvDataset(test_df, data_config=self.data_config, predict_sequence_length=predict_sequence_length)
|
||||
test_dataset = NrvDataset(test_df, data_config=self.data_config, full_day_skip=self.full_day_skip, predict_sequence_length=predict_sequence_length)
|
||||
return self.get_dataloader(test_dataset, shuffle=False)
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user