Fixed small summary with model architectures until now

2023-11-30 21:53:35 +00:00
parent eba10c8f83
commit 120b6aa5bd
23 changed files with 402 additions and 185 deletions
--- a/src/data/dataset.py
+++ b/src/data/dataset.py
@@ -15,19 +15,18 @@ class NrvDataset(Dataset):
        lstm: bool = False,
    ):
        self.data_config = data_config
-        self.dataframe = dataframe
        self.full_day_skip = full_day_skip
        self.lstm = lstm

        # reset dataframe index
-        self.dataframe.reset_index(drop=True, inplace=True)
+        dataframe.reset_index(drop=True, inplace=True)

        self.sequence_length = sequence_length
        self.predict_sequence_length = predict_sequence_length

-        self.samples_to_skip = self.skip_samples()
+        self.samples_to_skip = self.skip_samples(dataframe=dataframe)
        total_indices = set(
-            range(len(self.dataframe) - self.sequence_length - self.predict_sequence_length)
+            range(len(dataframe) - self.sequence_length - self.predict_sequence_length)
        )
        self.valid_indices = sorted(list(total_indices - set(self.samples_to_skip)))

@@ -48,20 +47,28 @@ class NrvDataset(Dataset):
            self.forecast_features.append("wind_gen_forecast")

        # add time feature to dataframe
-        time_feature = np.array([0] * len(self.dataframe))
+        time_feature = np.array([0] * len(dataframe))
        if self.data_config.QUARTER:
-            time_feature += self.dataframe["quarter"]
+            time_feature += dataframe["quarter"]

        if self.data_config.DAY_OF_WEEK:
-            d_w = self.dataframe["day_of_week"]
+            d_w = dataframe["day_of_week"]
            if self.data_config.QUARTER:
                d_w *= 96
            time_feature += d_w

-        self.dataframe["time_feature"] = time_feature
+        # if not all zero:
+        if time_feature.any():
+            self.time_feature = torch.tensor(time_feature).float().reshape(-1)
+        else:
+            self.time_feature = None
+    
+        self.nrv = torch.tensor(dataframe["nrv"].values).float().reshape(-1)

-    def skip_samples(self):
-        nan_rows = self.dataframe[self.dataframe.isnull().any(axis=1)]
+        self.history_features, self.forecast_features = self.preprocess_data(dataframe)
+
+    def skip_samples(self, dataframe):
+        nan_rows = dataframe[dataframe.isnull().any(axis=1)]
        nan_indices = nan_rows.index
        skip_indices = [
            list(
@@ -79,18 +86,22 @@ class NrvDataset(Dataset):
        # add indices that are not the start of a day (00:15) to the skip indices (use datetime column)
        # get indices of all 00:15 timestamps
        if self.full_day_skip:
-            start_of_day_indices = self.dataframe[
-                self.dataframe["datetime"].dt.time != pd.Timestamp("00:15:00").time()
+            start_of_day_indices = dataframe[
+                dataframe["datetime"].dt.time != pd.Timestamp("00:15:00").time()
            ].index
            skip_indices.extend(start_of_day_indices)
            skip_indices = list(set(skip_indices))

        return skip_indices
+    
+    def preprocess_data(self, dataframe):
+        return torch.tensor(dataframe[self.history_features].values).float(), torch.tensor(dataframe[self.forecast_features].values).float()
+

    def __len__(self):
        return len(self.valid_indices)

-    def _get__all_data(self, idx: int):
+    def _get_all_data(self, idx: int):
        history_df = self.dataframe.iloc[idx : idx + self.sequence_length]
        forecast_df = self.dataframe.iloc[
            idx + self.sequence_length : idx + self.sequence_length + self.predict_sequence_length
@@ -99,39 +110,47 @@ class NrvDataset(Dataset):

    def __getitem__(self, idx):
        actual_idx = self.valid_indices[idx]
-        
-        history_df, forecast_df = self._get__all_data(actual_idx)
-        
+                
        # get nrv history features
-        nrv_features = torch.tensor(history_df[["nrv"]].values).reshape(-1)
+        nrv_features = self.nrv[actual_idx : actual_idx + self.sequence_length]

-        # get history featues
-        history_features = history_df[self.history_features].values
+        history_features = self.history_features[actual_idx : actual_idx + self.sequence_length, :]
+        forecast_features = self.forecast_features[actual_idx + self.sequence_length : actual_idx + self.sequence_length + self.predict_sequence_length, :]

-        # combine the history features to one tensor (first one feature, then the next one, etc.)
-        history_features = torch.tensor(history_features)
-
-        # get forecast features
-        forecast_features = forecast_df[self.forecast_features].values
-        forecast_features = torch.tensor(forecast_features)
-
-        # add last time feature of the history
-        time_feature = history_df["time_feature"].iloc[-1]
+        if self.time_feature is not None:
+            time_features = self.time_feature[actual_idx : actual_idx + self.sequence_length]
+        else:
+            time_features = None

        ## all features
        if not self.lstm:
-            all_features = torch.cat(
-                [nrv_features, history_features.reshape(-1), forecast_features.reshape(-1), torch.tensor([time_feature])], dim=0
-            )
+            all_features_list = [nrv_features]
+
+            if history_features.numel() > 0:
+                all_features_list.append(history_features.reshape(-1))
+
+            if forecast_features.numel() > 0:
+                all_features_list.append(forecast_features.reshape(-1))
+
+            if time_features is not None:
+                all_features_list.append(torch.tensor([time_features[-1]]))
+
+            all_features = torch.cat(all_features_list, dim=0)
+
        else:
-            time_features = torch.tensor(history_df["time_feature"].values).reshape(-1, 1)
-            # combine (96, ) and (96, 2) to  (96, 3)
-            all_features = torch.cat(
-                [nrv_features.unsqueeze(1), time_features], dim=1
-            )
+            all_features_list = [nrv_features.unsqueeze(1)]
+
+            if self.forecast_features.numel() > 0:
+                history_forecast_features = self.forecast_features[actual_idx + 1 : actual_idx + self.sequence_length + 1, :]
+                all_features_list.append(history_forecast_features)
+
+            if time_features is not None:
+                all_features_list.append(time_features.unsqueeze(-1))
+
+            all_features = torch.cat(all_features_list, dim=1)

        # Target sequence, flattened if necessary
-        nrv_target = forecast_df["nrv"].values
+        nrv_target = self.nrv[actual_idx + self.sequence_length : actual_idx + self.sequence_length + self.predict_sequence_length]

        # check if nan values are present
        if torch.isnan(all_features).any():
@@ -141,16 +160,18 @@ class NrvDataset(Dataset):

        # all features and target to float
        all_features = all_features.float()
-
-        # to tens&éazzaéaz"ezéors
-        nrv_target = torch.tensor(nrv_target).float()
        return all_features, nrv_target, idx

    def random_day_autoregressive(self, idx: int):
        all_features, nrv_target, _ = self.__getitem__(idx)

        # remove the first 96 values of the features (the nrv history)
-        all_features = all_features[self.sequence_length :]
+        if not self.lstm:
+            all_features = all_features[self.sequence_length :]
+        else:
+            # last time step
+            all_features = all_features[-1, :]
+            all_features = all_features.unsqueeze(0)

        return all_features, nrv_target

--- a/src/models/linear_regression.py
+++ b/src/models/linear_regression.py
@@ -1,11 +1,17 @@
 import torch
+import numpy as np

 class LinearRegression(torch.nn.Module):
    def __init__(self, inputSize, output_size):
        super(LinearRegression, self).__init__()
        self.inputSize = inputSize
        self.output_size = output_size
-        self.linear = torch.nn.Linear(inputSize, output_size)
+
+        # dimension multiplication without first one
+        dim = inputSize[1:]
+        dim = [int(x) for x in dim]
+        dim = np.prod(dim)
+        self.linear = torch.nn.Linear(dim, output_size)

    def forward(self, x):
        x = torch.squeeze(x, -1)
--- a/src/models/lstm_model.py
+++ b/src/models/lstm_model.py
@@ -37,9 +37,9 @@ class GRUModel(torch.nn.Module):

    def forward(self, x):
        # Forward pass through the GRU layers
-        _, hidden_state = self.gru(x)
-        
+        x, _ = self.gru(x)
+        x = x[:, -1, :]
        # Use the hidden state from the last time step for the output
-        output = self.linear(hidden_state[-1])
+        output = self.linear(x)
        
        return output
--- a/src/models/non_linear_regression.py
+++ b/src/models/non_linear_regression.py
@@ -13,7 +13,7 @@ class NonLinearRegression(torch.nn.Module):

        # add linear layers with relu
        self.layers = torch.nn.ModuleList()
-        self.layers.append(torch.nn.Linear(inputSize, hiddenSize))
+        self.layers.append(torch.nn.Linear(inputSize[-1], hiddenSize))
        self.layers.append(torch.nn.Dropout(dropout))
        for _ in range(numLayers - 2):
            self.layers.append(torch.nn.Linear(hiddenSize, hiddenSize))
--- a/src/models/time_embedding_layer.py
+++ b/src/models/time_embedding_layer.py
@@ -10,6 +10,8 @@ class TimeEmbedding(nn.Module):

    def forward(self, x):
        # Extract the last 'time_features' from the input
+        if self.time_features == 0:
+            return x
        time_feature = x[..., -1]  # Use ellipsis to access the last dimension
        # convert to int
        time_feature = time_feature.int()
@@ -20,6 +22,8 @@ class TimeEmbedding(nn.Module):

    
    def output_dim(self, input_dim):
+        if self.time_features == 0:
+            return input_dim
        # Create a list from the input dimension
        input_dim_list = list(input_dim)
        # Modify the last dimension
--- a/src/notebooks/training.ipynb
+++ b/src/notebooks/training.ipynb
@@ -42,15 +42,15 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "#### Data Processor ####\n",
    "data_config = DataConfig()\n",
    "data_config.NRV_HISTORY = True\n",
-    "data_config.LOAD_HISTORY = False\n",
-    "data_config.LOAD_FORECAST = False\n",
+    "data_config.LOAD_HISTORY = True\n",
+    "data_config.LOAD_FORECAST = True\n",
    "\n",
    "data_config.WIND_FORECAST = False\n",
    "data_config.WIND_HISTORY = False\n",
@@ -72,16 +72,32 @@
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 11,
   "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "ename": "KeyboardInterrupt",
+     "evalue": "Interrupted by user",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
+      "\u001b[1;32m/workspaces/Thesis/src/notebooks/training.ipynb Cell 5\u001b[0m line \u001b[0;36m7\n\u001b[1;32m      <a href='vscode-notebook-cell://dev-container%2B7b22686f737450617468223a222f686f6d652f766963746f726d796c6c652f53656144726976652f4d79204c69627261726965732f4750552d7365727665722f546865736973222c226c6f63616c446f636b6572223a66616c73652c22636f6e66696746696c65223a7b22246d6964223a312c2270617468223a222f686f6d652f766963746f726d796c6c652f53656144726976652f4d79204c69627261726965732f4750552d7365727665722f5468657369732f2e646576636f6e7461696e65722f646576636f6e7461696e65722e6a736f6e222c22736368656d65223a227673636f64652d66696c65486f7374227d7d@ssh-remote%2Bvictormylle.be/workspaces/Thesis/src/notebooks/training.ipynb#W4sdnNjb2RlLXJlbW90ZQ%3D%3D?line=2'>3</a>\u001b[0m trainer \u001b[39m=\u001b[39m ProbabilisticBaselineTrainer(\n\u001b[1;32m      <a href='vscode-notebook-cell://dev-container%2B7b22686f737450617468223a222f686f6d652f766963746f726d796c6c652f53656144726976652f4d79204c69627261726965732f4750552d7365727665722f546865736973222c226c6f63616c446f636b6572223a66616c73652c22636f6e66696746696c65223a7b22246d6964223a312c2270617468223a222f686f6d652f766963746f726d796c6c652f53656144726976652f4d79204c69627261726965732f4750552d7365727665722f5468657369732f2e646576636f6e7461696e65722f646576636f6e7461696e65722e6a736f6e222c22736368656d65223a227673636f64652d66696c65486f7374227d7d@ssh-remote%2Bvictormylle.be/workspaces/Thesis/src/notebooks/training.ipynb#W4sdnNjb2RlLXJlbW90ZQ%3D%3D?line=3'>4</a>\u001b[0m     quantiles\u001b[39m=\u001b[39mquantiles, data_processor\u001b[39m=\u001b[39mdata_processor, clearml_helper\u001b[39m=\u001b[39mclearml_helper\n\u001b[1;32m      <a href='vscode-notebook-cell://dev-container%2B7b22686f737450617468223a222f686f6d652f766963746f726d796c6c652f53656144726976652f4d79204c69627261726965732f4750552d7365727665722f546865736973222c226c6f63616c446f636b6572223a66616c73652c22636f6e66696746696c65223a7b22246d6964223a312c2270617468223a222f686f6d652f766963746f726d796c6c652f53656144726976652f4d79204c69627261726965732f4750552d7365727665722f5468657369732f2e646576636f6e7461696e65722f646576636f6e7461696e65722e6a736f6e222c22736368656d65223a227673636f64652d66696c65486f7374227d7d@ssh-remote%2Bvictormylle.be/workspaces/Thesis/src/notebooks/training.ipynb#W4sdnNjb2RlLXJlbW90ZQ%3D%3D?line=4'>5</a>\u001b[0m )\n\u001b[1;32m      <a href='vscode-notebook-cell://dev-container%2B7b22686f737450617468223a222f686f6d652f766963746f726d796c6c652f53656144726976652f4d79204c69627261726965732f4750552d7365727665722f546865736973222c226c6f63616c446f636b6572223a66616c73652c22636f6e66696746696c65223a7b22246d6964223a312c2270617468223a222f686f6d652f766963746f726d796c6c652f53656144726976652f4d79204c69627261726965732f4750552d7365727665722f5468657369732f2e646576636f6e7461696e65722f646576636f6e7461696e65722e6a736f6e222c22736368656d65223a227673636f64652d66696c65486f7374227d7d@ssh-remote%2Bvictormylle.be/workspaces/Thesis/src/notebooks/training.ipynb#W4sdnNjb2RlLXJlbW90ZQ%3D%3D?line=5'>6</a>\u001b[0m trainer\u001b[39m.\u001b[39madd_metrics_to_track([CRPSLoss()])\n\u001b[0;32m----> <a href='vscode-notebook-cell://dev-container%2B7b22686f737450617468223a222f686f6d652f766963746f726d796c6c652f53656144726976652f4d79204c69627261726965732f4750552d7365727665722f546865736973222c226c6f63616c446f636b6572223a66616c73652c22636f6e66696746696c65223a7b22246d6964223a312c2270617468223a222f686f6d652f766963746f726d796c6c652f53656144726976652f4d79204c69627261726965732f4750552d7365727665722f5468657369732f2e646576636f6e7461696e65722f646576636f6e7461696e65722e6a736f6e222c22736368656d65223a227673636f64652d66696c65486f7374227d7d@ssh-remote%2Bvictormylle.be/workspaces/Thesis/src/notebooks/training.ipynb#W4sdnNjb2RlLXJlbW90ZQ%3D%3D?line=6'>7</a>\u001b[0m trainer\u001b[39m.\u001b[39;49mtrain()\n",
+      "File \u001b[0;32m/workspaces/Thesis/src/notebooks/../../src/trainers/probabilistic_baseline.py:43\u001b[0m, in \u001b[0;36mProbabilisticBaselineTrainer.train\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m     42\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mtrain\u001b[39m(\u001b[39mself\u001b[39m):\n\u001b[0;32m---> 43\u001b[0m     task \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49minit_clearml_task()\n\u001b[1;32m     44\u001b[0m     \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m     45\u001b[0m         time_steps \u001b[39m=\u001b[39m [[] \u001b[39mfor\u001b[39;00m _ \u001b[39min\u001b[39;00m \u001b[39mrange\u001b[39m(\u001b[39m96\u001b[39m)]\n",
+      "File \u001b[0;32m/workspaces/Thesis/src/notebooks/../../src/trainers/probabilistic_baseline.py:28\u001b[0m, in \u001b[0;36mProbabilisticBaselineTrainer.init_clearml_task\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m     25\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mclearml_helper:\n\u001b[1;32m     26\u001b[0m     \u001b[39mreturn\u001b[39;00m \u001b[39mNone\u001b[39;00m\n\u001b[0;32m---> 28\u001b[0m task_name \u001b[39m=\u001b[39m \u001b[39minput\u001b[39;49m(\u001b[39m\"\u001b[39;49m\u001b[39mEnter a task name: \u001b[39;49m\u001b[39m\"\u001b[39;49m)\n\u001b[1;32m     29\u001b[0m \u001b[39mif\u001b[39;00m task_name \u001b[39m==\u001b[39m \u001b[39m\"\u001b[39m\u001b[39m\"\u001b[39m:\n\u001b[1;32m     30\u001b[0m     task_name \u001b[39m=\u001b[39m \u001b[39m\"\u001b[39m\u001b[39mUntitled Task\u001b[39m\u001b[39m\"\u001b[39m\n",
+      "File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/ipykernel/kernelbase.py:1251\u001b[0m, in \u001b[0;36mKernel.raw_input\u001b[0;34m(self, prompt)\u001b[0m\n\u001b[1;32m   1249\u001b[0m     msg \u001b[39m=\u001b[39m \u001b[39m\"\u001b[39m\u001b[39mraw_input was called, but this frontend does not support input requests.\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[1;32m   1250\u001b[0m     \u001b[39mraise\u001b[39;00m StdinNotImplementedError(msg)\n\u001b[0;32m-> 1251\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_input_request(\n\u001b[1;32m   1252\u001b[0m     \u001b[39mstr\u001b[39;49m(prompt),\n\u001b[1;32m   1253\u001b[0m     \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_parent_ident[\u001b[39m\"\u001b[39;49m\u001b[39mshell\u001b[39;49m\u001b[39m\"\u001b[39;49m],\n\u001b[1;32m   1254\u001b[0m     \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mget_parent(\u001b[39m\"\u001b[39;49m\u001b[39mshell\u001b[39;49m\u001b[39m\"\u001b[39;49m),\n\u001b[1;32m   1255\u001b[0m     password\u001b[39m=\u001b[39;49m\u001b[39mFalse\u001b[39;49;00m,\n\u001b[1;32m   1256\u001b[0m )\n",
+      "File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/ipykernel/kernelbase.py:1295\u001b[0m, in \u001b[0;36mKernel._input_request\u001b[0;34m(self, prompt, ident, parent, password)\u001b[0m\n\u001b[1;32m   1292\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mKeyboardInterrupt\u001b[39;00m:\n\u001b[1;32m   1293\u001b[0m     \u001b[39m# re-raise KeyboardInterrupt, to truncate traceback\u001b[39;00m\n\u001b[1;32m   1294\u001b[0m     msg \u001b[39m=\u001b[39m \u001b[39m\"\u001b[39m\u001b[39mInterrupted by user\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[0;32m-> 1295\u001b[0m     \u001b[39mraise\u001b[39;00m \u001b[39mKeyboardInterrupt\u001b[39;00m(msg) \u001b[39mfrom\u001b[39;00m \u001b[39mNone\u001b[39;00m\n\u001b[1;32m   1296\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mException\u001b[39;00m:\n\u001b[1;32m   1297\u001b[0m     \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mlog\u001b[39m.\u001b[39mwarning(\u001b[39m\"\u001b[39m\u001b[39mInvalid Message:\u001b[39m\u001b[39m\"\u001b[39m, exc_info\u001b[39m=\u001b[39m\u001b[39mTrue\u001b[39;00m)\n",
+      "\u001b[0;31mKeyboardInterrupt\u001b[0m: Interrupted by user"
+     ]
+    }
+   ],
   "source": [
    "data_processor.set_full_day_skip(True)\n",
-    "quantiles = [0.01, 0.05, 0.1, 0.15, 0.4, 0.5, 0.6, 0.85, 0.9, 0.95, 0.99]\n",
+    "quantiles = [0.01, 0.05, 0.1, 0.15, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.85, 0.9, 0.95, 0.99]\n",
    "trainer = ProbabilisticBaselineTrainer(\n",
    "    quantiles=quantiles, data_processor=data_processor, clearml_helper=clearml_helper\n",
    ")\n",
-    "trainer.add_metrics_to_track([CRPSLoss(quantiles=quantiles)])\n",
+    "trainer.add_metrics_to_track([CRPSLoss()])\n",
    "trainer.train()"
   ]
  },
@@ -100,7 +116,7 @@
   "source": [
    "#### Hyperparameters ####\n",
    "inputDim = data_processor.get_input_size()\n",
-    "learningRate = 0.00005\n",
+    "learningRate = 0.0001\n",
    "epochs = 150\n",
    "\n",
    "# model = LinearRegression(inputDim, 96)\n",
@@ -133,8 +149,8 @@
    "learningRate = 0.0003\n",
    "epochs = 50\n",
    "\n",
-    "# model = LinearRegression(inputDim, 1)\n",
-    "model = NonLinearRegression(inputDim, 1, hiddenSize=1024, numLayers=5)\n",
+    "model = LinearRegression(inputDim, 1)\n",
+    "# model = NonLinearRegression(inputDim, 1, hiddenSize=1024, numLayers=5)\n",
    "optimizer = torch.optim.Adam(model.parameters(), lr=learningRate)\n",
    "\n",
    "#### Data Processor ####\n",
@@ -167,13 +183,15 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
+      "ClearML Task: created new task id=1182d39a984b478c9301aafb4a81ff1b\n",
+      "ClearML results page: http://192.168.1.182:8080/projects/2e46d4af6f1e4c399cf9f5aa30bc8795/experiments/1182d39a984b478c9301aafb4a81ff1b/output/log\n",
      "96\n"
     ]
    },
@@ -181,72 +199,66 @@
     "name": "stderr",
     "output_type": "stream",
     "text": [
-      "/workspaces/Thesis/src/notebooks/../../src/trainers/quantile_trainer.py:68: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
-      "  quantiles_tensor = torch.tensor(quantiles)\n",
-      "/workspaces/Thesis/src/notebooks/../../src/losses/pinball_loss.py:8: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
-      "  self.quantiles_tensor = torch.tensor(quantiles, dtype=torch.float32)\n",
-      "InsecureRequestWarning: Certificate verification is disabled! Adding certificate verification is strongly advised. See: https://urllib3.readthedocs.io/en/latest/advanced-usage.html#ssl-warnings\n"
+      "Connecting multiple input models with the same name: `checkpoint`. This might result in the wrong model being used when executing remotely\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "ClearML Task: created new task id=4652507a84f5435fb6bd98c645d15f24\n",
-      "ClearML results page: http://192.168.1.182:8080/projects/2e46d4af6f1e4c399cf9f5aa30bc8795/experiments/4652507a84f5435fb6bd98c645d15f24/output/log\n",
-      "2023-11-26 22:15:47,860 - clearml.Task - INFO - Storing jupyter notebook directly as code\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Switching to remote execution, output log page http://192.168.1.182:8080/projects/2e46d4af6f1e4c399cf9f5aa30bc8795/experiments/4652507a84f5435fb6bd98c645d15f24/output/log\n"
-     ]
-    },
-    {
-     "ename": "",
-     "evalue": "",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[1;31mThe Kernel crashed while executing code in the the current cell or a previous cell. Please review the code in the cell(s) to identify a possible cause of the failure. Click <a href='https://aka.ms/vscodeJupyterKernelCrash'>here</a> for more info. View Jupyter <a href='command:jupyter.viewOutput'>log</a> for further details."
+      "Early stopping triggered\n"
     ]
    }
   ],
   "source": [
+    "task = clearml_helper.get_task(task_name=\"Autoregressive Quantile Regression + Quarter + Load Forecast + Load History\")\n",
+    "data_config = task.connect(data_config, name=\"data_features\")\n",
+    "\n",
    "#### Hyperparameters ####\n",
    "data_processor.set_output_size(1)\n",
    "inputDim = data_processor.get_input_size()\n",
-    "learningRate = 0.0001\n",
-    "epochs = 100\n",
+    "epochs = 300\n",
    "\n",
-    "# quantiles = torch.tensor([0.01, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.95, 0.99]).to(\"cuda\")\n",
-    "quantiles = torch.tensor(\n",
-    "    [0.01, 0.05, 0.1, 0.15, 0.3, 0.4, 0.5, 0.6, 0.7, 0.85, 0.9, 0.95, 0.99]\n",
-    ").to(\"cuda\")\n",
+    "quantiles = task.get_parameter(\"general/quantiles\", cast=True)\n",
+    "if quantiles is None:\n",
+    "    quantiles = [0.01, 0.05, 0.1, 0.15, 0.3, 0.4, 0.5, 0.6, 0.7, 0.85, 0.9, 0.95, 0.99]\n",
+    "    task.set_parameter(\"general/quantiles\", quantiles)\n",
+    "else:\n",
+    "    if isinstance(quantiles, str):\n",
+    "        quantiles = eval(quantiles)\n",
    "\n",
-    "# model = LinearRegression(inputDim, len(quantiles))\n",
-    "time_embedding = TimeEmbedding(data_processor.get_time_feature_size(), 4)\n",
-    "non_linear_regression_model = NonLinearRegression(time_embedding.output_dim(inputDim), len(quantiles), hiddenSize=1024, numLayers=5)\n",
+    "model_parameters = {\n",
+    "    \"learning_rate\": 0.0001,\n",
+    "    \"hidden_size\": 512,\n",
+    "    \"num_layers\": 3,\n",
+    "    \"dropout\": 0.2,\n",
+    "    \"time_feature_embedding\": 2,\n",
+    "}\n",
+    "\n",
+    "model_parameters = task.connect(model_parameters, name=\"model_parameters\")\n",
+    "\n",
+    "time_embedding = TimeEmbedding(data_processor.get_time_feature_size(), model_parameters[\"time_feature_embedding\"])\n",
+    "# linear_regression = LinearRegression(time_embedding.output_dim(inputDim), len(quantiles))\n",
+    "non_linear_regression_model = NonLinearRegression(time_embedding.output_dim(inputDim), len(quantiles), hiddenSize=model_parameters[\"hidden_size\"], numLayers=model_parameters[\"num_layers\"], dropout=model_parameters[\"dropout\"])\n",
    "model = nn.Sequential(time_embedding, non_linear_regression_model)\n",
-    "optimizer = torch.optim.Adam(model.parameters(), lr=learningRate)\n",
+    "optimizer = torch.optim.Adam(model.parameters(), lr=model_parameters[\"learning_rate\"])\n",
    "\n",
    "#### Trainer ####\n",
    "trainer = AutoRegressiveQuantileTrainer(\n",
    "    model,\n",
+    "    inputDim,\n",
    "    optimizer,\n",
    "    data_processor,\n",
    "    quantiles,\n",
    "    \"cuda\",\n",
-    "    debug=True,\n",
-    "    clearml_helper=clearml_helper,\n",
+    "    debug=False,\n",
    ")\n",
    "trainer.add_metrics_to_track(\n",
-    "    [PinballLoss(quantiles), MSELoss(), L1Loss(), CRPSLoss(quantiles)]\n",
+    "    [PinballLoss(quantiles), MSELoss(), L1Loss(), CRPSLoss()]\n",
    ")\n",
    "trainer.early_stopping(patience=10)\n",
    "trainer.plot_every(5)\n",
-    "trainer.train(epochs=epochs, remotely=True)"
+    "trainer.train(task=task, epochs=epochs, remotely=False)"
   ]
  },
  {
@@ -258,49 +270,9 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": null,
   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/workspaces/Thesis/src/notebooks/../../src/trainers/quantile_trainer.py:335: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
-      "  quantiles_tensor = torch.tensor(quantiles)\n",
-      "/workspaces/Thesis/src/notebooks/../../src/losses/pinball_loss.py:22: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
-      "  self.quantiles_tensor = torch.tensor(quantiles, dtype=torch.float32)\n",
-      "InsecureRequestWarning: Certificate verification is disabled! Adding certificate verification is strongly advised. See: https://urllib3.readthedocs.io/en/latest/advanced-usage.html#ssl-warnings\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "ClearML Task: created new task id=0c748cf6ec0f4c748cc35be78ae4c6c1\n",
-      "ClearML results page: http://192.168.1.182:8080/projects/2e46d4af6f1e4c399cf9f5aa30bc8795/experiments/0c748cf6ec0f4c748cc35be78ae4c6c1/output/log\n",
-      "2023-11-26 16:15:07,490 - clearml.Task - INFO - Storing jupyter notebook directly as code\n",
-      "2023-11-26 16:15:09,255 - clearml.model - WARNING - 500 model found when searching for `file:///workspaces/Thesis/src/notebooks/checkpoint.pt`\n",
-      "2023-11-26 16:15:09,256 - clearml.model - WARNING - Selected model `Autoregressive Quantile Regression (quarter + day of week)` (id=bc0cb0d7fc614e2e8b0edf5b85348646)\n",
-      "2023-11-26 16:15:09,265 - clearml.frameworks - INFO - Found existing registered model id=bc0cb0d7fc614e2e8b0edf5b85348646 [/workspaces/Thesis/src/notebooks/checkpoint.pt] reusing it.\n",
-      "2023-11-26 16:15:09,958 - clearml.Task - INFO - Completed model upload to http://192.168.1.182:8081/Thesis/NrvForecast/Untitled%20Task.0c748cf6ec0f4c748cc35be78ae4c6c1/models/checkpoint.pt\n",
-      "2023-11-26 16:15:10,998 - clearml.Task - INFO - Completed model upload to http://192.168.1.182:8081/Thesis/NrvForecast/Untitled%20Task.0c748cf6ec0f4c748cc35be78ae4c6c1/models/checkpoint.pt\n",
-      "2023-11-26 16:15:12,118 - clearml.Task - INFO - Completed model upload to http://192.168.1.182:8081/Thesis/NrvForecast/Untitled%20Task.0c748cf6ec0f4c748cc35be78ae4c6c1/models/checkpoint.pt\n",
-      "2023-11-26 16:15:13,152 - clearml.Task - INFO - Completed model upload to http://192.168.1.182:8081/Thesis/NrvForecast/Untitled%20Task.0c748cf6ec0f4c748cc35be78ae4c6c1/models/checkpoint.pt\n",
-      "2023-11-26 16:15:14,540 - clearml.Task - INFO - Completed model upload to http://192.168.1.182:8081/Thesis/NrvForecast/Untitled%20Task.0c748cf6ec0f4c748cc35be78ae4c6c1/models/checkpoint.pt\n",
-      "Early stopping triggered\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/workspaces/Thesis/src/notebooks/../../src/trainers/quantile_trainer.py:366: UserWarning:\n",
-      "\n",
-      "Creating a tensor from a list of numpy.ndarrays is extremely slow. Please consider converting the list to a single numpy.ndarray with numpy.array() before converting to a tensor. (Triggered internally at /opt/conda/conda-bld/pytorch_1682343967769/work/torch/csrc/utils/tensor_new.cpp:245.)\n",
-      "\n"
-     ]
-    }
-   ],
+   "outputs": [],
   "source": [
    "### Data Processor ###\n",
    "data_processor.set_full_day_skip(True)\n",
--- a/src/trainers/autoregressive_trainer.py
+++ b/src/trainers/autoregressive_trainer.py
@@ -15,6 +15,7 @@ class AutoRegressiveTrainer(Trainer):
    def __init__(
        self,
        model: torch.nn.Module,
+        input_dim: tuple,
        optimizer: torch.optim.Optimizer,
        criterion: torch.nn.Module,
        data_processor: DataProcessor,
@@ -23,6 +24,7 @@ class AutoRegressiveTrainer(Trainer):
    ):
        super().__init__(
            model=model,
+            input_dim=input_dim,
            optimizer=optimizer,
            criterion=criterion,
            data_processor=data_processor,
--- a/src/trainers/probabilistic_baseline.py
+++ b/src/trainers/probabilistic_baseline.py
@@ -48,7 +48,7 @@ class ProbabilisticBaselineTrainer(Trainer):
                predict_sequence_length=96
            )

-            for inputs, _ in train_loader:
+            for inputs, _, _ in train_loader:
                for i in range(96):
                    time_steps[i].extend(inputs[:, i].numpy())

@@ -80,7 +80,7 @@ class ProbabilisticBaselineTrainer(Trainer):
            raise

    def log_final_metrics(self, task, dataloader, quantile_values, train: bool = True):
-        metric = CRPSLoss(self.quantiles)
+        metric = CRPSLoss()

        crps_values = []
        crps_inversed_values = []
@@ -147,6 +147,9 @@ class ProbabilisticBaselineTrainer(Trainer):
    def plot_quantiles(self, quantile_values):
        fig = go.Figure()

+        # inverse transform quantile_values
+        quantile_values = self.data_processor.inverse_transform(quantile_values)
+
        for i, q in enumerate(self.quantiles):
            values_for_quantile = quantile_values[:, i]
            fig.add_trace(
@@ -159,7 +162,8 @@ class ProbabilisticBaselineTrainer(Trainer):
            )

        fig.update_layout(title="Quantile Values")
-        fig.update_yaxes(range=[-1, 1])
+
+        fig.update_layout(height=600)

        return fig

--- a/src/trainers/quantile_trainer.py
+++ b/src/trainers/quantile_trainer.py
@@ -60,6 +60,7 @@ class AutoRegressiveQuantileTrainer(AutoRegressiveTrainer):
    def __init__(
        self,
        model: torch.nn.Module,
+        input_dim: tuple,
        optimizer: torch.optim.Optimizer,
        data_processor: DataProcessor,
        quantiles: list,
@@ -72,6 +73,7 @@ class AutoRegressiveQuantileTrainer(AutoRegressiveTrainer):
        criterion = PinballLoss(quantiles=quantiles)
        super().__init__(
            model=model,
+            input_dim=input_dim,
            optimizer=optimizer,
            criterion=criterion,
            data_processor=data_processor,
@@ -192,7 +194,10 @@ class AutoRegressiveQuantileTrainer(AutoRegressiveTrainer):
        prev_features = prev_features.to(self.device)
        targets = targets.to(self.device)

-        initial_sequence = prev_features[:, :96]
+        if len(list(prev_features.shape)) == 2:
+            initial_sequence = prev_features[:, :96]
+        else:
+            initial_sequence = prev_features[:, :, 0]

        target_full = targets[:, 0].unsqueeze(1)  # (batch_size, 1)
        with torch.no_grad():
@@ -206,22 +211,37 @@ class AutoRegressiveQuantileTrainer(AutoRegressiveTrainer):
        predictions_full = new_predictions_full.unsqueeze(1)

        for i in range(sequence_length - 1):
-            new_features = torch.cat(
-                (prev_features[:, 1:96], samples), dim=1
-            )  # (batch_size, 96)
+            if len(list(prev_features.shape)) == 2: 
+                new_features = torch.cat(
+                    (prev_features[:, 1:96], samples), dim=1
+                )  # (batch_size, 96)

-            new_features = new_features.float()
+                new_features = new_features.float()

-            other_features, new_targets = dataset.get_batch_autoregressive(
-                np.array(idx_batch) + i + 1
-            )  # (batch_size, new_features)
+                other_features, new_targets = dataset.get_batch_autoregressive(
+                    np.array(idx_batch) + i + 1
+                )  # (batch_size, new_features)
+
+                if other_features is not None:
+                    prev_features = torch.cat(
+                        (new_features.to(self.device), other_features.to(self.device)), dim=1
+                    )  # (batch_size, 96 + new_features)
+                else:
+                    prev_features = new_features

-            if other_features is not None:
-                prev_features = torch.cat(
-                    (new_features.to(self.device), other_features.to(self.device)), dim=1
-                )  # (batch_size, 96 + new_features)
            else:
-                prev_features = new_features
+                other_features, new_targets = dataset.get_batch_autoregressive(
+                    np.array(idx_batch) + i + 1
+                )  # (batch_size, 1, new_features)
+
+                # change the other_features nrv based on the samples
+                other_features[:, 0, 0] = samples.squeeze(-1)
+                # make sure on same device
+                other_features = other_features.to(self.device)
+                prev_features = prev_features.to(self.device)
+                prev_features = torch.cat(
+                    (prev_features[:, 1:, :], other_features), dim=1
+                )  # (batch_size, 96, new_features)

            target_full = torch.cat(
                (target_full, new_targets.to(self.device)), dim=1
--- a/src/trainers/trainer.py
+++ b/src/trainers/trainer.py
@@ -6,18 +6,20 @@ import plotly.graph_objects as go
 import numpy as np
 from plotly.subplots import make_subplots
 from clearml.config import running_remotely
-
+from torchinfo import summary

 class Trainer:
    def __init__(
        self,
        model: torch.nn.Module,
+        input_dim: tuple,
        optimizer: torch.optim.Optimizer,
        criterion: torch.nn.Module,
        data_processor: DataProcessor,
        device: torch.device,
        debug: bool = True,
    ):
+        self.input_dim = input_dim
        self.model = model
        self.optimizer = optimizer
        self.criterion = criterion
@@ -70,6 +72,8 @@ class Trainer:
        task.add_tags(self.optimizer.__class__.__name__)
        task.add_tags(self.__class__.__name__)

+        task.set_configuration_object("model", str(summary(self.model, self.input_dim)))
+
        self.optimizer.name = self.optimizer.__class__.__name__
        self.criterion.name = self.criterion.__class__.__name__

--- a/src/training_scripts/autoregressive_quantiles.py
+++ b/src/training_scripts/autoregressive_quantiles.py
@@ -17,19 +17,18 @@ from src.models.time_embedding_layer import TimeEmbedding

 #### ClearML ####
 clearml_helper = ClearMLHelper(project_name="Thesis/NrvForecast")
-task = clearml_helper.get_task(task_name="Autoregressive Quantile Regression")
+task = clearml_helper.get_task(task_name="Autoregressive Quantile Regression: GRU + Quarter + Load Forecast")


 #### Data Processor ####
 data_config = DataConfig()
 data_config.NRV_HISTORY = True
-data_config.LOAD_HISTORY = True
 data_config.LOAD_FORECAST = True

 data_config.QUARTER = True
-data_config.DAY_OF_WEEK = True
+data_config.DAY_OF_WEEK = False

-# data_config = task.connect(data_config, name="data_features")
+data_config = task.connect(data_config, name="data_features")

 data_processor = DataProcessor(data_config, path="", lstm=True)
 data_processor.set_batch_size(512)
@@ -39,37 +38,49 @@ data_processor.set_full_day_skip(False)
 #### Hyperparameters ####
 data_processor.set_output_size(1)
 inputDim = data_processor.get_input_size()
-learningRate = 0.001
-epochs = 100
-
-print("Input dim: ", inputDim)
+epochs = 400

 # add parameters to clearml
 quantiles = task.get_parameter("general/quantiles", cast=True)
+# make sure it is a list
 if quantiles is None:
    quantiles = [0.01, 0.05, 0.1, 0.15, 0.3, 0.4, 0.5, 0.6, 0.7, 0.85, 0.9, 0.95, 0.99]
    task.set_parameter("general/quantiles", quantiles)
+else:
+    # if string, convert to list "[0.01, 0.05, 0.1, 0.15, 0.3, 0.4, 0.5, 0.6, 0.7, 0.85, 0.9, 0.95, 0.99]""
+    if isinstance(quantiles, str):
+        quantiles = eval(quantiles)
+
+model_parameters = {
+    "learning_rate": 0.0001,
+    "hidden_size": 512,
+    "num_layers": 2,
+    "dropout": 0.2,
+    "time_feature_embedding": 4,
+}
+
+model_parameters = task.connect(model_parameters, name="model_parameters")
+
+time_embedding = TimeEmbedding(data_processor.get_time_feature_size(), model_parameters["time_feature_embedding"])
+lstm_model = GRUModel(time_embedding.output_dim(inputDim), len(quantiles), hidden_size=model_parameters["hidden_size"], num_layers=model_parameters["num_layers"], dropout=model_parameters["dropout"])

-# model = LinearRegression(inputDim, len(quantiles))
-time_embedding = TimeEmbedding(data_processor.get_time_feature_size(), 4)
-# non_linear_regression_model = NonLinearRegression(time_embedding.output_dim(inputDim), len(quantiles), hiddenSize=1024, numLayers=5)
-lstm_model = GRUModel(time_embedding.output_dim(inputDim), len(quantiles), hidden_size=512, num_layers=2)
 model = nn.Sequential(time_embedding, lstm_model)
-optimizer = torch.optim.Adam(model.parameters(), lr=learningRate)
+optimizer = torch.optim.Adam(model.parameters(), lr=model_parameters["learning_rate"])

 #### Trainer ####
 trainer = AutoRegressiveQuantileTrainer(
    model,
+    inputDim,
    optimizer,
    data_processor,
    quantiles,
    "cuda",
-    debug=True,
+    debug=False,
 )

 trainer.add_metrics_to_track(
    [PinballLoss(quantiles), MSELoss(), L1Loss(), CRPSLoss()]
 )
-trainer.early_stopping(patience=10)
-trainer.plot_every(100)
-trainer.train(task=task, epochs=epochs, remotely=True)
+trainer.early_stopping(patience=30)
+trainer.plot_every(5)
+trainer.train(task=task, epochs=epochs, remotely=True)
--- a/src/training_scripts/hyperparameter_optimizer.py
+++ b/src/training_scripts/hyperparameter_optimizer.py
@@ -60,11 +60,16 @@ quantile_lists = [
 quantiles_range = DiscreteParameterRange("general/quantiles", values=quantile_lists)

 #### Data Config ####
-quarter_range = DiscreteParameterRange("data_features/quarter", values=[True, False])
-day_of_week_range = DiscreteParameterRange("data_features/day_of_week", values=[True, False])
+quarter_range = DiscreteParameterRange("data_features/quarter", values=[True])
+day_of_week_range = DiscreteParameterRange("data_features/day_of_week", values=[True])

-load_forecast_range = DiscreteParameterRange("data_features/load_forecast", values=[True, False])
-load_history_range = DiscreteParameterRange("data_features/load_history", values=[True, False])
+load_forecast_range = DiscreteParameterRange("data_features/load_forecast", values=[True])
+
+learning_rate = DiscreteParameterRange("model_parameters/learning_rate", values=[0.00001, 0.00005, 0.0001, 0.0005, 0.001])
+hidden_size = DiscreteParameterRange("model_parameters/hidden_size", values=[64, 128, 256, 512, 1024, 2048])
+num_layers = DiscreteParameterRange("model_parameters/num_layers", values=[1, 2, 3, 4, 5, 6])
+dropout = DiscreteParameterRange("model_parameters/dropout", values=[0.0, 0.1, 0.2, 0.3, 0.4, 0.5])
+time_feature_embedding = DiscreteParameterRange("model_parameters/time_feature_embedding", values=[1,2,3,4,5,6])

 ### OPTIMIZER OBJECT ###
 optimizer = HyperParameterOptimizer(
@@ -75,24 +80,27 @@ optimizer = HyperParameterOptimizer(
    execution_queue=execution_queue,
    max_number_of_concurrent_tasks=1,
    optimizer_class=aSearchStrategy,
-    max_iteration_per_job=50,
+    max_iteration_per_job=300,
    # save_top_k_tasks_only=3,
-    pool_period_min=0.2,
-    total_max_jobs=15,
+    pool_period_min=1,
+    total_max_jobs=40,

    hyper_parameters=[
-        quantiles_range,
        quarter_range,
        day_of_week_range,
        load_forecast_range,
-        load_history_range
+        learning_rate,
+        hidden_size,
+        num_layers,
+        dropout,
+        time_feature_embedding
    ]

 )
        
 task.execute_remotely(queue_name="hypertuning", exit_process=True)

-optimizer.set_report_period(0.2)
+optimizer.set_report_period(1)

 def job_complete_callback(
    job_id,                 # type: str
@@ -106,9 +114,9 @@ def job_complete_callback(
        print('WOOT WOOT we broke the record! Objective reached {}'.format(objective_value))

 optimizer.start(job_complete_callback=job_complete_callback)
-optimizer.set_time_limit(in_minutes=120.0)
+optimizer.set_time_limit(in_minutes=120.0*8)
 optimizer.wait()
-top_exp = optimizer.get_top_experiments(top_k=3)
+top_exp = optimizer.get_top_experiments(top_k=5)
 print([t.id for t in top_exp])
 # make sure background optimization stopped
 optimizer.stop()