Fixed small summary with model architectures until now

This commit is contained in:
Victor Mylle
2023-11-30 21:53:35 +00:00
parent eba10c8f83
commit 120b6aa5bd
23 changed files with 402 additions and 185 deletions

View File

@@ -15,19 +15,18 @@ class NrvDataset(Dataset):
lstm: bool = False,
):
self.data_config = data_config
self.dataframe = dataframe
self.full_day_skip = full_day_skip
self.lstm = lstm
# reset dataframe index
self.dataframe.reset_index(drop=True, inplace=True)
dataframe.reset_index(drop=True, inplace=True)
self.sequence_length = sequence_length
self.predict_sequence_length = predict_sequence_length
self.samples_to_skip = self.skip_samples()
self.samples_to_skip = self.skip_samples(dataframe=dataframe)
total_indices = set(
range(len(self.dataframe) - self.sequence_length - self.predict_sequence_length)
range(len(dataframe) - self.sequence_length - self.predict_sequence_length)
)
self.valid_indices = sorted(list(total_indices - set(self.samples_to_skip)))
@@ -48,20 +47,28 @@ class NrvDataset(Dataset):
self.forecast_features.append("wind_gen_forecast")
# add time feature to dataframe
time_feature = np.array([0] * len(self.dataframe))
time_feature = np.array([0] * len(dataframe))
if self.data_config.QUARTER:
time_feature += self.dataframe["quarter"]
time_feature += dataframe["quarter"]
if self.data_config.DAY_OF_WEEK:
d_w = self.dataframe["day_of_week"]
d_w = dataframe["day_of_week"]
if self.data_config.QUARTER:
d_w *= 96
time_feature += d_w
self.dataframe["time_feature"] = time_feature
# if not all zero:
if time_feature.any():
self.time_feature = torch.tensor(time_feature).float().reshape(-1)
else:
self.time_feature = None
self.nrv = torch.tensor(dataframe["nrv"].values).float().reshape(-1)
def skip_samples(self):
nan_rows = self.dataframe[self.dataframe.isnull().any(axis=1)]
self.history_features, self.forecast_features = self.preprocess_data(dataframe)
def skip_samples(self, dataframe):
nan_rows = dataframe[dataframe.isnull().any(axis=1)]
nan_indices = nan_rows.index
skip_indices = [
list(
@@ -79,18 +86,22 @@ class NrvDataset(Dataset):
# add indices that are not the start of a day (00:15) to the skip indices (use datetime column)
# get indices of all 00:15 timestamps
if self.full_day_skip:
start_of_day_indices = self.dataframe[
self.dataframe["datetime"].dt.time != pd.Timestamp("00:15:00").time()
start_of_day_indices = dataframe[
dataframe["datetime"].dt.time != pd.Timestamp("00:15:00").time()
].index
skip_indices.extend(start_of_day_indices)
skip_indices = list(set(skip_indices))
return skip_indices
def preprocess_data(self, dataframe):
return torch.tensor(dataframe[self.history_features].values).float(), torch.tensor(dataframe[self.forecast_features].values).float()
def __len__(self):
return len(self.valid_indices)
def _get__all_data(self, idx: int):
def _get_all_data(self, idx: int):
history_df = self.dataframe.iloc[idx : idx + self.sequence_length]
forecast_df = self.dataframe.iloc[
idx + self.sequence_length : idx + self.sequence_length + self.predict_sequence_length
@@ -99,39 +110,47 @@ class NrvDataset(Dataset):
def __getitem__(self, idx):
actual_idx = self.valid_indices[idx]
history_df, forecast_df = self._get__all_data(actual_idx)
# get nrv history features
nrv_features = torch.tensor(history_df[["nrv"]].values).reshape(-1)
nrv_features = self.nrv[actual_idx : actual_idx + self.sequence_length]
# get history featues
history_features = history_df[self.history_features].values
history_features = self.history_features[actual_idx : actual_idx + self.sequence_length, :]
forecast_features = self.forecast_features[actual_idx + self.sequence_length : actual_idx + self.sequence_length + self.predict_sequence_length, :]
# combine the history features to one tensor (first one feature, then the next one, etc.)
history_features = torch.tensor(history_features)
# get forecast features
forecast_features = forecast_df[self.forecast_features].values
forecast_features = torch.tensor(forecast_features)
# add last time feature of the history
time_feature = history_df["time_feature"].iloc[-1]
if self.time_feature is not None:
time_features = self.time_feature[actual_idx : actual_idx + self.sequence_length]
else:
time_features = None
## all features
if not self.lstm:
all_features = torch.cat(
[nrv_features, history_features.reshape(-1), forecast_features.reshape(-1), torch.tensor([time_feature])], dim=0
)
all_features_list = [nrv_features]
if history_features.numel() > 0:
all_features_list.append(history_features.reshape(-1))
if forecast_features.numel() > 0:
all_features_list.append(forecast_features.reshape(-1))
if time_features is not None:
all_features_list.append(torch.tensor([time_features[-1]]))
all_features = torch.cat(all_features_list, dim=0)
else:
time_features = torch.tensor(history_df["time_feature"].values).reshape(-1, 1)
# combine (96, ) and (96, 2) to (96, 3)
all_features = torch.cat(
[nrv_features.unsqueeze(1), time_features], dim=1
)
all_features_list = [nrv_features.unsqueeze(1)]
if self.forecast_features.numel() > 0:
history_forecast_features = self.forecast_features[actual_idx + 1 : actual_idx + self.sequence_length + 1, :]
all_features_list.append(history_forecast_features)
if time_features is not None:
all_features_list.append(time_features.unsqueeze(-1))
all_features = torch.cat(all_features_list, dim=1)
# Target sequence, flattened if necessary
nrv_target = forecast_df["nrv"].values
nrv_target = self.nrv[actual_idx + self.sequence_length : actual_idx + self.sequence_length + self.predict_sequence_length]
# check if nan values are present
if torch.isnan(all_features).any():
@@ -141,16 +160,18 @@ class NrvDataset(Dataset):
# all features and target to float
all_features = all_features.float()
# to tens&éazzaéaz"ezéors
nrv_target = torch.tensor(nrv_target).float()
return all_features, nrv_target, idx
def random_day_autoregressive(self, idx: int):
all_features, nrv_target, _ = self.__getitem__(idx)
# remove the first 96 values of the features (the nrv history)
all_features = all_features[self.sequence_length :]
if not self.lstm:
all_features = all_features[self.sequence_length :]
else:
# last time step
all_features = all_features[-1, :]
all_features = all_features.unsqueeze(0)
return all_features, nrv_target