Rewrote dataset to be able to include new features
This commit is contained in:
@@ -8,9 +8,15 @@ import pytz
|
||||
|
||||
|
||||
history_data_path = "../../data/history-quarter-hour-data.csv"
|
||||
forecast_data_path = "../../data/load_forecast.csv"
|
||||
|
||||
class DataConfig:
|
||||
NRV_HISTORY: bool = True
|
||||
LOAD_FORECAST: bool = True
|
||||
LOAD_HISTORY: bool = False
|
||||
|
||||
class DataProcessor:
|
||||
def __init__(self):
|
||||
def __init__(self, data_config: DataConfig):
|
||||
self.batch_size = 2048
|
||||
|
||||
self.train_range = (-np.inf, datetime(year=2022, month=11, day=30, tzinfo=pytz.UTC))
|
||||
@@ -18,10 +24,15 @@ class DataProcessor:
|
||||
|
||||
self.update_range_str()
|
||||
|
||||
self.features = ['nrv']
|
||||
self.history_features = self.get_nrv_history()
|
||||
self.future_features = self.get_load_forecast()
|
||||
|
||||
self.all_features = self.history_features.merge(self.future_features, on='datetime', how='left')
|
||||
|
||||
self.data_config = data_config
|
||||
|
||||
self.nrv_df = self.get_nrv_history()
|
||||
self.nrv_scaler = MinMaxScaler(feature_range=(-1, 1))
|
||||
self.load_forecast_scaler = MinMaxScaler(feature_range=(-1, 1))
|
||||
|
||||
|
||||
def set_train_range(self, train_range: tuple):
|
||||
@@ -49,6 +60,22 @@ class DataProcessor:
|
||||
df.sort_values(by="datetime", inplace=True)
|
||||
return df
|
||||
|
||||
def get_load_forecast(self):
|
||||
df = pd.read_csv(forecast_data_path, delimiter=';')
|
||||
df = df.rename(columns={'Day-ahead 6PM forecast': 'load_forecast', 'Datetime': 'datetime'})
|
||||
df = df[['datetime', 'load_forecast']]
|
||||
|
||||
df['datetime'] = pd.to_datetime(df['datetime'], utc=True)
|
||||
|
||||
# check if there are nan values
|
||||
# if df.isnull().values.any():
|
||||
# # print the rows with nan values
|
||||
# print(df[df.isnull().any(axis=1)])
|
||||
# raise ValueError("There are nan values in the load forecast data.")
|
||||
|
||||
df.sort_values(by="datetime", inplace=True)
|
||||
return df
|
||||
|
||||
def set_batch_size(self, batch_size: int):
|
||||
self.batch_size = batch_size
|
||||
|
||||
@@ -56,7 +83,7 @@ class DataProcessor:
|
||||
return torch.utils.data.DataLoader(dataset, batch_size=self.batch_size, shuffle=shuffle, num_workers=4)
|
||||
|
||||
def get_train_dataloader(self, transform: bool = True, predict_sequence_length: int = 96):
|
||||
train_df = self.nrv_df.copy()
|
||||
train_df = self.all_features.copy()
|
||||
|
||||
if self.train_range[0] != -np.inf:
|
||||
train_df = train_df[(train_df['datetime'] >= self.train_range[0])]
|
||||
@@ -66,13 +93,14 @@ class DataProcessor:
|
||||
|
||||
if transform:
|
||||
train_df['nrv'] = self.nrv_scaler.fit_transform(train_df['nrv'].values.reshape(-1, 1)).reshape(-1)
|
||||
train_df['load_forecast'] = self.load_forecast_scaler.fit_transform(train_df['load_forecast'].values.reshape(-1, 1)).reshape(-1)
|
||||
|
||||
train_dataset = NrvDataset(train_df, predict_sequence_length=predict_sequence_length)
|
||||
train_dataset = NrvDataset(train_df, data_config=self.data_config, predict_sequence_length=predict_sequence_length)
|
||||
return self.get_dataloader(train_dataset)
|
||||
|
||||
def get_test_dataloader(self, transform: bool = True, predict_sequence_length: int = 96):
|
||||
|
||||
test_df = self.nrv_df.copy()
|
||||
test_df = self.all_features.copy()
|
||||
|
||||
if self.test_range[0] != -np.inf:
|
||||
test_df = test_df[(test_df['datetime'] >= self.test_range[0])]
|
||||
@@ -82,35 +110,14 @@ class DataProcessor:
|
||||
|
||||
if transform:
|
||||
test_df['nrv'] = self.nrv_scaler.transform(test_df['nrv'].values.reshape(-1, 1)).reshape(-1)
|
||||
test_dataset = NrvDataset(test_df, predict_sequence_length=predict_sequence_length)
|
||||
test_df['load_forecast'] = self.load_forecast_scaler.transform(test_df['load_forecast'].values.reshape(-1, 1)).reshape(-1)
|
||||
|
||||
test_dataset = NrvDataset(test_df, data_config=self.data_config, predict_sequence_length=predict_sequence_length)
|
||||
return self.get_dataloader(test_dataset, shuffle=False)
|
||||
|
||||
|
||||
def get_dataloaders(self, transform: bool = True, predict_sequence_length: int = 96):
|
||||
return self.get_train_dataloader(transform=transform, predict_sequence_length=predict_sequence_length), self.get_test_dataloader(transform=transform, predict_sequence_length=predict_sequence_length)
|
||||
|
||||
def get_random_day(self, train: bool = True, transform: bool = True):
|
||||
df = self.nrv_df.copy()
|
||||
|
||||
range = self.train_range if train else self.test_range
|
||||
|
||||
if range[0] != -np.inf:
|
||||
df = df[(df['datetime'] >= range[0])]
|
||||
|
||||
if range[1] != np.inf:
|
||||
df = df[(df['datetime'] <= range[1])]
|
||||
|
||||
if transform:
|
||||
df['nrv'] = self.nrv_scaler.transform(df['nrv'].values.reshape(-1, 1)).reshape(-1)
|
||||
|
||||
data_tensor = torch.tensor(df[self.features].values, dtype=torch.float32)
|
||||
|
||||
random_start_idx = np.random.randint(0, len(df) - 191)
|
||||
|
||||
current_day_features = data_tensor[random_start_idx:random_start_idx+96]
|
||||
next_day_features = data_tensor[random_start_idx+96:random_start_idx+192]
|
||||
|
||||
return (current_day_features, next_day_features)
|
||||
|
||||
def inverse_transform(self, tensor: torch.Tensor):
|
||||
return self.nrv_scaler.inverse_transform(tensor.cpu().numpy()).reshape(-1)
|
||||
Reference in New Issue
Block a user