Fixed small summary with model architectures until now

2023-11-30 21:53:35 +00:00
parent eba10c8f83
commit 120b6aa5bd
23 changed files with 402 additions and 185 deletions
--- a/src/data/dataset.py
+++ b/src/data/dataset.py
@@ -15,19 +15,18 @@ class NrvDataset(Dataset):
        lstm: bool = False,
    ):
        self.data_config = data_config
-        self.dataframe = dataframe
        self.full_day_skip = full_day_skip
        self.lstm = lstm

        # reset dataframe index
-        self.dataframe.reset_index(drop=True, inplace=True)
+        dataframe.reset_index(drop=True, inplace=True)

        self.sequence_length = sequence_length
        self.predict_sequence_length = predict_sequence_length

-        self.samples_to_skip = self.skip_samples()
+        self.samples_to_skip = self.skip_samples(dataframe=dataframe)
        total_indices = set(
-            range(len(self.dataframe) - self.sequence_length - self.predict_sequence_length)
+            range(len(dataframe) - self.sequence_length - self.predict_sequence_length)
        )
        self.valid_indices = sorted(list(total_indices - set(self.samples_to_skip)))

@@ -48,20 +47,28 @@ class NrvDataset(Dataset):
            self.forecast_features.append("wind_gen_forecast")

        # add time feature to dataframe
-        time_feature = np.array([0] * len(self.dataframe))
+        time_feature = np.array([0] * len(dataframe))
        if self.data_config.QUARTER:
-            time_feature += self.dataframe["quarter"]
+            time_feature += dataframe["quarter"]

        if self.data_config.DAY_OF_WEEK:
-            d_w = self.dataframe["day_of_week"]
+            d_w = dataframe["day_of_week"]
            if self.data_config.QUARTER:
                d_w *= 96
            time_feature += d_w

-        self.dataframe["time_feature"] = time_feature
+        # if not all zero:
+        if time_feature.any():
+            self.time_feature = torch.tensor(time_feature).float().reshape(-1)
+        else:
+            self.time_feature = None
+    
+        self.nrv = torch.tensor(dataframe["nrv"].values).float().reshape(-1)

-    def skip_samples(self):
-        nan_rows = self.dataframe[self.dataframe.isnull().any(axis=1)]
+        self.history_features, self.forecast_features = self.preprocess_data(dataframe)
+
+    def skip_samples(self, dataframe):
+        nan_rows = dataframe[dataframe.isnull().any(axis=1)]
        nan_indices = nan_rows.index
        skip_indices = [
            list(
@@ -79,18 +86,22 @@ class NrvDataset(Dataset):
        # add indices that are not the start of a day (00:15) to the skip indices (use datetime column)
        # get indices of all 00:15 timestamps
        if self.full_day_skip:
-            start_of_day_indices = self.dataframe[
-                self.dataframe["datetime"].dt.time != pd.Timestamp("00:15:00").time()
+            start_of_day_indices = dataframe[
+                dataframe["datetime"].dt.time != pd.Timestamp("00:15:00").time()
            ].index
            skip_indices.extend(start_of_day_indices)
            skip_indices = list(set(skip_indices))

        return skip_indices
+    
+    def preprocess_data(self, dataframe):
+        return torch.tensor(dataframe[self.history_features].values).float(), torch.tensor(dataframe[self.forecast_features].values).float()
+

    def __len__(self):
        return len(self.valid_indices)

-    def _get__all_data(self, idx: int):
+    def _get_all_data(self, idx: int):
        history_df = self.dataframe.iloc[idx : idx + self.sequence_length]
        forecast_df = self.dataframe.iloc[
            idx + self.sequence_length : idx + self.sequence_length + self.predict_sequence_length
@@ -99,39 +110,47 @@ class NrvDataset(Dataset):

    def __getitem__(self, idx):
        actual_idx = self.valid_indices[idx]
-        
-        history_df, forecast_df = self._get__all_data(actual_idx)
-        
+                
        # get nrv history features
-        nrv_features = torch.tensor(history_df[["nrv"]].values).reshape(-1)
+        nrv_features = self.nrv[actual_idx : actual_idx + self.sequence_length]

-        # get history featues
-        history_features = history_df[self.history_features].values
+        history_features = self.history_features[actual_idx : actual_idx + self.sequence_length, :]
+        forecast_features = self.forecast_features[actual_idx + self.sequence_length : actual_idx + self.sequence_length + self.predict_sequence_length, :]

-        # combine the history features to one tensor (first one feature, then the next one, etc.)
-        history_features = torch.tensor(history_features)
-
-        # get forecast features
-        forecast_features = forecast_df[self.forecast_features].values
-        forecast_features = torch.tensor(forecast_features)
-
-        # add last time feature of the history
-        time_feature = history_df["time_feature"].iloc[-1]
+        if self.time_feature is not None:
+            time_features = self.time_feature[actual_idx : actual_idx + self.sequence_length]
+        else:
+            time_features = None

        ## all features
        if not self.lstm:
-            all_features = torch.cat(
-                [nrv_features, history_features.reshape(-1), forecast_features.reshape(-1), torch.tensor([time_feature])], dim=0
-            )
+            all_features_list = [nrv_features]
+
+            if history_features.numel() > 0:
+                all_features_list.append(history_features.reshape(-1))
+
+            if forecast_features.numel() > 0:
+                all_features_list.append(forecast_features.reshape(-1))
+
+            if time_features is not None:
+                all_features_list.append(torch.tensor([time_features[-1]]))
+
+            all_features = torch.cat(all_features_list, dim=0)
+
        else:
-            time_features = torch.tensor(history_df["time_feature"].values).reshape(-1, 1)
-            # combine (96, ) and (96, 2) to  (96, 3)
-            all_features = torch.cat(
-                [nrv_features.unsqueeze(1), time_features], dim=1
-            )
+            all_features_list = [nrv_features.unsqueeze(1)]
+
+            if self.forecast_features.numel() > 0:
+                history_forecast_features = self.forecast_features[actual_idx + 1 : actual_idx + self.sequence_length + 1, :]
+                all_features_list.append(history_forecast_features)
+
+            if time_features is not None:
+                all_features_list.append(time_features.unsqueeze(-1))
+
+            all_features = torch.cat(all_features_list, dim=1)

        # Target sequence, flattened if necessary
-        nrv_target = forecast_df["nrv"].values
+        nrv_target = self.nrv[actual_idx + self.sequence_length : actual_idx + self.sequence_length + self.predict_sequence_length]

        # check if nan values are present
        if torch.isnan(all_features).any():
@@ -141,16 +160,18 @@ class NrvDataset(Dataset):

        # all features and target to float
        all_features = all_features.float()
-
-        # to tens&éazzaéaz"ezéors
-        nrv_target = torch.tensor(nrv_target).float()
        return all_features, nrv_target, idx

    def random_day_autoregressive(self, idx: int):
        all_features, nrv_target, _ = self.__getitem__(idx)

        # remove the first 96 values of the features (the nrv history)
-        all_features = all_features[self.sequence_length :]
+        if not self.lstm:
+            all_features = all_features[self.sequence_length :]
+        else:
+            # last time step
+            all_features = all_features[-1, :]
+            all_features = all_features.unsqueeze(0)

        return all_features, nrv_target