Fixed losses and multiple other things

2023-12-12 11:02:42 +00:00
parent d3bf04d68c
commit c06cc10aa6
12 changed files with 5093936 additions and 122 deletions
--- a/src/notebooks/loss_test.ipynb
+++ b/src/notebooks/loss_test.ipynb
--- a/src/notebooks/training.ipynb
+++ b/src/notebooks/training.ipynb
@@ -187,79 +187,30 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "ClearML Task: created new task id=520015e1e6ec4a83821e0bcb5411445f\n",
-      "ClearML results page: http://192.168.1.182:8080/projects/2e46d4af6f1e4c399cf9f5aa30bc8795/experiments/520015e1e6ec4a83821e0bcb5411445f/output/log\n",
-      "151780\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "ERROR: Unexpected bus error encountered in worker. This might be caused by insufficient shared memory (shm).\n",
-      "\u0000ERROR: Unexpected bus error encountered in worker. This might be caused by insufficient shared memory (shm).\n",
-      "\u0000Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7fd4cc70bac0>\n",
-      "Traceback (most recent call last):\n",
-      "  File \"/opt/conda/lib/python3.10/site-packages/torch/utils/data/dataloader.py\", line 1478, in __del__\n",
-      "    self._shutdown_workers()\n",
-      "  File \"/opt/conda/lib/python3.10/site-packages/torch/utils/data/dataloader.py\", line 1442, in _shutdown_workers\n",
-      "    w.join(timeout=_utils.MP_STATUS_CHECK_INTERVAL)\n",
-      "  File \"/opt/conda/lib/python3.10/multiprocessing/process.py\", line 149, in join\n",
-      "    res = self._popen.wait(timeout)\n",
-      "  File \"/opt/conda/lib/python3.10/multiprocessing/popen_fork.py\", line 40, in wait\n",
-      "    if not wait([self.sentinel], timeout):\n",
-      "  File \"/opt/conda/lib/python3.10/multiprocessing/connection.py\", line 931, in wait\n",
-      "    ready = selector.select(timeout)\n",
-      "  File \"/opt/conda/lib/python3.10/selectors.py\", line 416, in select\n",
-      "    fd_event_list = self._selector.poll(timeout)\n",
-      "  File \"/opt/conda/lib/python3.10/site-packages/torch/utils/data/_utils/signal_handling.py\", line 66, in handler\n",
-      "    _error_if_any_worker_fails()\n",
-      "RuntimeError: DataLoader worker (pid 1930288) is killed by signal: Bus error. It is possible that dataloader's workers are out of shared memory. Please try to raise your shared memory limit.\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
+      "ClearML Task: created new task id=6b50442e1cec4bf9b3bd5a34077b4217\n",
+      "ClearML results page: http://192.168.1.182:8080/projects/2e46d4af6f1e4c399cf9f5aa30bc8795/experiments/6b50442e1cec4bf9b3bd5a34077b4217/output/log\n",
+      "151780\n",
      "151780\n",
      "24979\n",
      "151780\n",
      "24979\n",
      "151780\n",
+      "24979\n",
+      "Early stopping triggered\n",
+      "151780\n",
      "24979\n"
     ]
-    },
-    {
-     "ename": "KeyboardInterrupt",
-     "evalue": "",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
-      "\u001b[0;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
-      "\u001b[1;32m/workspaces/Thesis/src/notebooks/training.ipynb Cell 11\u001b[0m line \u001b[0;36m4\n\u001b[1;32m     <a href='vscode-notebook-cell://dev-container%2B7b22686f737450617468223a222f686f6d652f766963746f726d796c6c652f53656144726976652f4d79204c69627261726965732f4750552d7365727665722f546865736973222c226c6f63616c446f636b6572223a66616c73652c22636f6e66696746696c65223a7b22246d6964223a312c2270617468223a222f686f6d652f766963746f726d796c6c652f53656144726976652f4d79204c69627261726965732f4750552d7365727665722f5468657369732f2e646576636f6e7461696e65722f646576636f6e7461696e65722e6a736f6e222c22736368656d65223a227673636f64652d66696c65486f7374227d7d@ssh-remote%2Bvictormylle.be/workspaces/Thesis/src/notebooks/training.ipynb#X13sdnNjb2RlLXJlbW90ZQ%3D%3D?line=45'>46</a>\u001b[0m trainer\u001b[39m.\u001b[39mearly_stopping(patience\u001b[39m=\u001b[39m\u001b[39m10\u001b[39m)\n\u001b[1;32m     <a href='vscode-notebook-cell://dev-container%2B7b22686f737450617468223a222f686f6d652f766963746f726d796c6c652f53656144726976652f4d79204c69627261726965732f4750552d7365727665722f546865736973222c226c6f63616c446f636b6572223a66616c73652c22636f6e66696746696c65223a7b22246d6964223a312c2270617468223a222f686f6d652f766963746f726d796c6c652f53656144726976652f4d79204c69627261726965732f4750552d7365727665722f5468657369732f2e646576636f6e7461696e65722f646576636f6e7461696e65722e6a736f6e222c22736368656d65223a227673636f64652d66696c65486f7374227d7d@ssh-remote%2Bvictormylle.be/workspaces/Thesis/src/notebooks/training.ipynb#X13sdnNjb2RlLXJlbW90ZQ%3D%3D?line=46'>47</a>\u001b[0m trainer\u001b[39m.\u001b[39mplot_every(\u001b[39m15\u001b[39m)\n\u001b[0;32m---> <a href='vscode-notebook-cell://dev-container%2B7b22686f737450617468223a222f686f6d652f766963746f726d796c6c652f53656144726976652f4d79204c69627261726965732f4750552d7365727665722f546865736973222c226c6f63616c446f636b6572223a66616c73652c22636f6e66696746696c65223a7b22246d6964223a312c2270617468223a222f686f6d652f766963746f726d796c6c652f53656144726976652f4d79204c69627261726965732f4750552d7365727665722f5468657369732f2e646576636f6e7461696e65722f646576636f6e7461696e65722e6a736f6e222c22736368656d65223a227673636f64652d66696c65486f7374227d7d@ssh-remote%2Bvictormylle.be/workspaces/Thesis/src/notebooks/training.ipynb#X13sdnNjb2RlLXJlbW90ZQ%3D%3D?line=47'>48</a>\u001b[0m trainer\u001b[39m.\u001b[39;49mtrain(task\u001b[39m=\u001b[39;49mtask, epochs\u001b[39m=\u001b[39;49mepochs, remotely\u001b[39m=\u001b[39;49m\u001b[39mFalse\u001b[39;49;00m)\n",
-      "File \u001b[0;32m/workspaces/Thesis/src/notebooks/../../src/trainers/trainer.py:168\u001b[0m, in \u001b[0;36mTrainer.train\u001b[0;34m(self, epochs, remotely, task)\u001b[0m\n\u001b[1;32m    165\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mdebug_plots(task, \u001b[39mFalse\u001b[39;00m, test_loader, test_samples, epoch)\n\u001b[1;32m    167\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mhasattr\u001b[39m(\u001b[39mself\u001b[39m, \u001b[39m\"\u001b[39m\u001b[39mplot_quantile_percentages\u001b[39m\u001b[39m\"\u001b[39m):\n\u001b[0;32m--> 168\u001b[0m     \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mplot_quantile_percentages(\n\u001b[1;32m    169\u001b[0m         task, train_loader, \u001b[39mTrue\u001b[39;49;00m, epoch, \u001b[39mFalse\u001b[39;49;00m\n\u001b[1;32m    170\u001b[0m     )\n\u001b[1;32m    171\u001b[0m     \u001b[39m# self.plot_quantile_percentages(\u001b[39;00m\n\u001b[1;32m    172\u001b[0m     \u001b[39m#     task, train_loader, True, epoch, True\u001b[39;00m\n\u001b[1;32m    173\u001b[0m     \u001b[39m# )\u001b[39;00m\n\u001b[1;32m    174\u001b[0m     \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mplot_quantile_percentages(\n\u001b[1;32m    175\u001b[0m         task, test_loader, \u001b[39mFalse\u001b[39;00m, epoch, \u001b[39mFalse\u001b[39;00m\n\u001b[1;32m    176\u001b[0m     )\n",
-      "File \u001b[0;32m/workspaces/Thesis/src/notebooks/../../src/trainers/quantile_trainer.py:283\u001b[0m, in \u001b[0;36mAutoRegressiveQuantileTrainer.plot_quantile_percentages\u001b[0;34m(self, task, data_loader, train, iteration, full_day)\u001b[0m\n\u001b[1;32m    280\u001b[0m \u001b[39mwith\u001b[39;00m torch\u001b[39m.\u001b[39mno_grad():\n\u001b[1;32m    281\u001b[0m     total_samples \u001b[39m=\u001b[39m \u001b[39mlen\u001b[39m(data_loader\u001b[39m.\u001b[39mdataset) \u001b[39m-\u001b[39m \u001b[39m96\u001b[39m\n\u001b[0;32m--> 283\u001b[0m     \u001b[39mfor\u001b[39;00m inputs, targets, idx_batch \u001b[39min\u001b[39;00m data_loader:\n\u001b[1;32m    284\u001b[0m         idx_batch \u001b[39m=\u001b[39m [idx \u001b[39mfor\u001b[39;00m idx \u001b[39min\u001b[39;00m idx_batch \u001b[39mif\u001b[39;00m idx \u001b[39m<\u001b[39m total_samples]\n\u001b[1;32m    286\u001b[0m         \u001b[39mif\u001b[39;00m full_day:\n",
-      "File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/torch/utils/data/dataloader.py:633\u001b[0m, in \u001b[0;36m_BaseDataLoaderIter.__next__\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m    630\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_sampler_iter \u001b[39mis\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[1;32m    631\u001b[0m     \u001b[39m# TODO(https://github.com/pytorch/pytorch/issues/76750)\u001b[39;00m\n\u001b[1;32m    632\u001b[0m     \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_reset()  \u001b[39m# type: ignore[call-arg]\u001b[39;00m\n\u001b[0;32m--> 633\u001b[0m data \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_next_data()\n\u001b[1;32m    634\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_num_yielded \u001b[39m+\u001b[39m\u001b[39m=\u001b[39m \u001b[39m1\u001b[39m\n\u001b[1;32m    635\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_dataset_kind \u001b[39m==\u001b[39m _DatasetKind\u001b[39m.\u001b[39mIterable \u001b[39mand\u001b[39;00m \\\n\u001b[1;32m    636\u001b[0m         \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_IterableDataset_len_called \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mNone\u001b[39;00m \u001b[39mand\u001b[39;00m \\\n\u001b[1;32m    637\u001b[0m         \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_num_yielded \u001b[39m>\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_IterableDataset_len_called:\n",
-      "File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/torch/utils/data/dataloader.py:1317\u001b[0m, in \u001b[0;36m_MultiProcessingDataLoaderIter._next_data\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m   1314\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[1;32m   1315\u001b[0m     \u001b[39m# no valid `self._rcvd_idx` is found (i.e., didn't break)\u001b[39;00m\n\u001b[1;32m   1316\u001b[0m     \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_persistent_workers:\n\u001b[0;32m-> 1317\u001b[0m         \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_shutdown_workers()\n\u001b[1;32m   1318\u001b[0m     \u001b[39mraise\u001b[39;00m \u001b[39mStopIteration\u001b[39;00m\n\u001b[1;32m   1320\u001b[0m \u001b[39m# Now `self._rcvd_idx` is the batch index we want to fetch\u001b[39;00m\n\u001b[1;32m   1321\u001b[0m \n\u001b[1;32m   1322\u001b[0m \u001b[39m# Check if the next sample has already been generated\u001b[39;00m\n",
-      "File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/torch/utils/data/dataloader.py:1442\u001b[0m, in \u001b[0;36m_MultiProcessingDataLoaderIter._shutdown_workers\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m   1437\u001b[0m         \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_mark_worker_as_unavailable(worker_id, shutdown\u001b[39m=\u001b[39m\u001b[39mTrue\u001b[39;00m)\n\u001b[1;32m   1438\u001b[0m \u001b[39mfor\u001b[39;00m w \u001b[39min\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_workers:\n\u001b[1;32m   1439\u001b[0m     \u001b[39m# We should be able to join here, but in case anything went\u001b[39;00m\n\u001b[1;32m   1440\u001b[0m     \u001b[39m# wrong, we set a timeout and if the workers fail to join,\u001b[39;00m\n\u001b[1;32m   1441\u001b[0m     \u001b[39m# they are killed in the `finally` block.\u001b[39;00m\n\u001b[0;32m-> 1442\u001b[0m     w\u001b[39m.\u001b[39;49mjoin(timeout\u001b[39m=\u001b[39;49m_utils\u001b[39m.\u001b[39;49mMP_STATUS_CHECK_INTERVAL)\n\u001b[1;32m   1443\u001b[0m \u001b[39mfor\u001b[39;00m q \u001b[39min\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_index_queues:\n\u001b[1;32m   1444\u001b[0m     q\u001b[39m.\u001b[39mcancel_join_thread()\n",
-      "File \u001b[0;32m/opt/conda/lib/python3.10/multiprocessing/process.py:149\u001b[0m, in \u001b[0;36mBaseProcess.join\u001b[0;34m(self, timeout)\u001b[0m\n\u001b[1;32m    147\u001b[0m \u001b[39massert\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_parent_pid \u001b[39m==\u001b[39m os\u001b[39m.\u001b[39mgetpid(), \u001b[39m'\u001b[39m\u001b[39mcan only join a child process\u001b[39m\u001b[39m'\u001b[39m\n\u001b[1;32m    148\u001b[0m \u001b[39massert\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_popen \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mNone\u001b[39;00m, \u001b[39m'\u001b[39m\u001b[39mcan only join a started process\u001b[39m\u001b[39m'\u001b[39m\n\u001b[0;32m--> 149\u001b[0m res \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_popen\u001b[39m.\u001b[39;49mwait(timeout)\n\u001b[1;32m    150\u001b[0m \u001b[39mif\u001b[39;00m res \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[1;32m    151\u001b[0m     _children\u001b[39m.\u001b[39mdiscard(\u001b[39mself\u001b[39m)\n",
-      "File \u001b[0;32m/opt/conda/lib/python3.10/multiprocessing/popen_fork.py:40\u001b[0m, in \u001b[0;36mPopen.wait\u001b[0;34m(self, timeout)\u001b[0m\n\u001b[1;32m     38\u001b[0m \u001b[39mif\u001b[39;00m timeout \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[1;32m     39\u001b[0m     \u001b[39mfrom\u001b[39;00m \u001b[39mmultiprocessing\u001b[39;00m\u001b[39m.\u001b[39;00m\u001b[39mconnection\u001b[39;00m \u001b[39mimport\u001b[39;00m wait\n\u001b[0;32m---> 40\u001b[0m     \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m wait([\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49msentinel], timeout):\n\u001b[1;32m     41\u001b[0m         \u001b[39mreturn\u001b[39;00m \u001b[39mNone\u001b[39;00m\n\u001b[1;32m     42\u001b[0m \u001b[39m# This shouldn't block if wait() returned successfully.\u001b[39;00m\n",
-      "File \u001b[0;32m/opt/conda/lib/python3.10/multiprocessing/connection.py:931\u001b[0m, in \u001b[0;36mwait\u001b[0;34m(object_list, timeout)\u001b[0m\n\u001b[1;32m    928\u001b[0m     deadline \u001b[39m=\u001b[39m time\u001b[39m.\u001b[39mmonotonic() \u001b[39m+\u001b[39m timeout\n\u001b[1;32m    930\u001b[0m \u001b[39mwhile\u001b[39;00m \u001b[39mTrue\u001b[39;00m:\n\u001b[0;32m--> 931\u001b[0m     ready \u001b[39m=\u001b[39m selector\u001b[39m.\u001b[39;49mselect(timeout)\n\u001b[1;32m    932\u001b[0m     \u001b[39mif\u001b[39;00m ready:\n\u001b[1;32m    933\u001b[0m         \u001b[39mreturn\u001b[39;00m [key\u001b[39m.\u001b[39mfileobj \u001b[39mfor\u001b[39;00m (key, events) \u001b[39min\u001b[39;00m ready]\n",
-      "File \u001b[0;32m/opt/conda/lib/python3.10/selectors.py:416\u001b[0m, in \u001b[0;36m_PollLikeSelector.select\u001b[0;34m(self, timeout)\u001b[0m\n\u001b[1;32m    414\u001b[0m ready \u001b[39m=\u001b[39m []\n\u001b[1;32m    415\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m--> 416\u001b[0m     fd_event_list \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_selector\u001b[39m.\u001b[39;49mpoll(timeout)\n\u001b[1;32m    417\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mInterruptedError\u001b[39;00m:\n\u001b[1;32m    418\u001b[0m     \u001b[39mreturn\u001b[39;00m ready\n",
-      "File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/clearml/task.py:4313\u001b[0m, in \u001b[0;36mTask.__register_at_exit.<locals>.ExitHooks.signal_handler\u001b[0;34m(self, sig, frame)\u001b[0m\n\u001b[1;32m   4310\u001b[0m \u001b[39m# if this is a sig term, we wait until __at_exit is called (basically do nothing)\u001b[39;00m\n\u001b[1;32m   4311\u001b[0m \u001b[39mif\u001b[39;00m sig \u001b[39m==\u001b[39m signal\u001b[39m.\u001b[39mSIGINT:\n\u001b[1;32m   4312\u001b[0m     \u001b[39m# return original handler result\u001b[39;00m\n\u001b[0;32m-> 4313\u001b[0m     \u001b[39mreturn\u001b[39;00m org_handler \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mcallable\u001b[39m(org_handler) \u001b[39melse\u001b[39;00m org_handler(sig, frame)\n\u001b[1;32m   4315\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_signal_recursion_protection_flag:\n\u001b[1;32m   4316\u001b[0m     \u001b[39m# call original\u001b[39;00m\n\u001b[1;32m   4317\u001b[0m     os\u001b[39m.\u001b[39mkill(os\u001b[39m.\u001b[39mgetpid(), sig)\n",
-      "\u001b[0;31mKeyboardInterrupt\u001b[0m: "
-     ]
    }
   ],
   "source": [
-    "task = clearml_helper.get_task(task_name=\"Autoregressive Quantile Regression + Quarter + Nominal Net Position + Wind + Load\")\n",
+    "task = clearml_helper.get_task(task_name=\"Autoregressive Non Linear Quantile Regression + Quarter + DoW\")\n",
    "data_config = task.connect(data_config, name=\"data_features\")\n",
    "\n",
    "#### Hyperparameters ####\n",
@@ -318,43 +269,101 @@
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 5,
   "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "ClearML Task: created new task id=a2ae664b9a1c44f2af99b109cce78d51\n",
+      "ClearML results page: http://192.168.1.182:8080/projects/2e46d4af6f1e4c399cf9f5aa30bc8795/experiments/a2ae664b9a1c44f2af99b109cce78d51/output/log\n",
+      "1579\n",
+      "1579\n",
+      "259\n",
+      "1579\n",
+      "259\n",
+      "1579\n",
+      "259\n",
+      "Early stopping triggered\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Connecting multiple input models with the same name: `checkpoint`. This might result in the wrong model being used when executing remotely\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "1579\n",
+      "259\n"
+     ]
+    }
+   ],
   "source": [
+    "task = clearml_helper.get_task(task_name=\"Non Autoregressive Non Linear Quantile Regression\")\n",
+    "data_config = task.connect(data_config, name=\"data_features\")\n",
+    "\n",
    "### Data Processor ###\n",
    "data_processor.set_full_day_skip(True)\n",
    "data_processor.set_output_size(96)\n",
-    "\n",
-    "#### Hyperparameters ####\n",
    "inputDim = data_processor.get_input_size()\n",
-    "learningRate = 0.0001\n",
-    "epochs = 100\n",
+    "epochs = 300\n",
    "\n",
-    "quantiles = torch.tensor(\n",
-    "    [0.01, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.95, 0.99]\n",
-    ").to(\"cuda\")\n",
+    "quantiles = task.get_parameter(\"general/quantiles\", cast=True)\n",
+    "if quantiles is None:\n",
+    "    quantiles = [0.01, 0.05, 0.1, 0.15, 0.3, 0.4, 0.5, 0.6, 0.7, 0.85, 0.9, 0.95, 0.99]\n",
+    "    task.set_parameter(\"general/quantiles\", quantiles)\n",
+    "else:\n",
+    "    if isinstance(quantiles, str):\n",
+    "        quantiles = eval(quantiles)\n",
+    "\n",
+    "model_parameters = {\n",
+    "    \"learning_rate\": 0.0001,\n",
+    "    \"hidden_size\": 1024,\n",
+    "    \"num_layers\": 3,\n",
+    "    \"dropout\": 0.2,\n",
+    "    \"time_feature_embedding\": 2,\n",
+    "}\n",
+    "\n",
+    "model_parameters = task.connect(model_parameters, name=\"model_parameters\")\n",
+    "\n",
+    "time_embedding = TimeEmbedding(data_processor.get_time_feature_size(), model_parameters[\"time_feature_embedding\"])\n",
+    "\n",
+    "# linear_regression = LinearRegression(time_embedding.output_dim(inputDim), len(quantiles)*96)\n",
+    "non_linear_regression_model = NonLinearRegression(time_embedding.output_dim(inputDim), len(quantiles)*96, hiddenSize=model_parameters[\"hidden_size\"], numLayers=model_parameters[\"num_layers\"], dropout=model_parameters[\"dropout\"])\n",
+    "\n",
+    "model = nn.Sequential(time_embedding, non_linear_regression_model)\n",
    "\n",
-    "# model = LinearRegression(inputDim, len(quantiles))\n",
-    "model = NonLinearRegression(inputDim, len(quantiles) * 96, hiddenSize=1024, numLayers=5)\n",
    "model.output_size = 96\n",
-    "optimizer = torch.optim.Adam(model.parameters(), lr=learningRate)\n",
+    "optimizer = torch.optim.Adam(model.parameters(), lr=model_parameters[\"learning_rate\"])\n",
    "\n",
    "#### Trainer ####\n",
    "trainer = NonAutoRegressiveQuantileRegression(\n",
    "    model,\n",
+    "    inputDim,\n",
    "    optimizer,\n",
    "    data_processor,\n",
    "    quantiles,\n",
    "    \"cuda\",\n",
    "    debug=False,\n",
-    "    clearml_helper=clearml_helper,\n",
    ")\n",
    "trainer.add_metrics_to_track([MSELoss(), L1Loss(), CRPSLoss(quantiles)])\n",
    "trainer.early_stopping(patience=10)\n",
    "trainer.plot_every(5)\n",
-    "trainer.train(epochs=epochs)"
+    "trainer.train(task=task, epochs=epochs, remotely=False)"
   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
  }
 ],
 "metadata": {
--- a/src/trainers/autoregressive_trainer.py
+++ b/src/trainers/autoregressive_trainer.py
@@ -36,7 +36,14 @@ class AutoRegressiveTrainer(Trainer):
    def debug_plots(self, task, train: bool, data_loader, sample_indices, epoch):
        num_samples = len(sample_indices)
        rows = num_samples  # One row per sample since we only want one column
-        cols = 1
+
+        #  check if self has get_plot_error
+        if hasattr(self, "get_plot_error"):
+            cols = 2
+            print("Using get_plot_error")
+        else:
+            cols = 1
+            print("Using get_plot")

        fig = make_subplots(
            rows=rows,
@@ -63,6 +70,13 @@ class AutoRegressiveTrainer(Trainer):
            for trace in sub_fig.data:
                fig.add_trace(trace, row=row, col=col)

+            if cols == 2:
+                error_sub_fig = self.get_plot_error(
+                    target, predictions
+                )
+                for trace in error_sub_fig.data:
+                    fig.add_trace(trace, row=row, col=col + 1)
+
            loss = self.criterion(
                predictions.to(self.device), target.to(self.device)
            ).item()
--- a/src/trainers/quantile_trainer.py
+++ b/src/trainers/quantile_trainer.py
@@ -147,6 +147,37 @@ class AutoRegressiveQuantileTrainer(AutoRegressiveTrainer):
                )
                task.get_logger().report_single_value(name=name, value=metric_value)

+    def get_plot_error(
+        self,
+        next_day,
+        predictions,
+    ):
+        metric = PinballLoss(quantiles=self.quantiles)
+        fig = go.Figure()
+
+        next_day_np = next_day.view(-1).cpu().numpy()
+        predictions_np = predictions.cpu().numpy()
+
+        if True:
+            next_day_np = self.data_processor.inverse_transform(next_day_np)
+            predictions_np = self.data_processor.inverse_transform(predictions_np)
+
+        # for each time step, calculate the error using the metric
+        errors = []
+        for i in range(96):
+
+            target_tensor = torch.tensor(next_day_np[i]).unsqueeze(0)
+            prediction_tensor = torch.tensor(predictions_np[i]).unsqueeze(0)
+
+            errors.append(metric(prediction_tensor, target_tensor))
+
+        # plot the error 
+        fig.add_trace(go.Scatter(x=np.arange(96), y=errors, name=metric.__class__.__name__))
+        fig.update_layout(title=f"Error of {metric.__class__.__name__} for each time step")
+
+        return fig
+
+
    def get_plot(
        self,
        current_day,
@@ -364,25 +395,24 @@ class NonAutoRegressiveQuantileRegression(Trainer):
    def __init__(
        self,
        model: torch.nn.Module,
+        input_dim: tuple,
        optimizer: torch.optim.Optimizer,
        data_processor: DataProcessor,
        quantiles: list,
        device: torch.device,
-        clearml_helper: ClearMLHelper = None,
        debug: bool = True,
    ):
-        quantiles_tensor = torch.tensor(quantiles)
-        quantiles_tensor = quantiles_tensor.to(device)
        self.quantiles = quantiles

-        criterion = NonAutoRegressivePinballLoss(quantiles=quantiles_tensor)
+
+        criterion = NonAutoRegressivePinballLoss(quantiles=quantiles)
        super().__init__(
            model=model,
+            input_dim=input_dim,
            optimizer=optimizer,
            criterion=criterion,
            data_processor=data_processor,
            device=device,
-            clearml_helper=clearml_helper,
            debug=debug,
        )

@@ -398,7 +428,7 @@ class NonAutoRegressiveQuantileRegression(Trainer):

                outputs = self.model(inputs)
                outputted_samples = [
-                    sample_from_dist(self.quantiles.cpu(), output.cpu().numpy())
+                    sample_from_dist(self.quantiles, output.cpu().numpy())
                    for output in outputs
                ]

--- a/src/trainers/trainer.py
+++ b/src/trainers/trainer.py
@@ -313,6 +313,7 @@ class Trainer:
    def debug_plots(self, task, train: bool, data_loader, sample_indices, epoch):
        num_samples = len(sample_indices)
        rows = num_samples  # One row per sample since we only want one column
+
        cols = 1

        fig = make_subplots(
@@ -341,6 +342,7 @@ class Trainer:
            for trace in sub_fig.data:
                fig.add_trace(trace, row=row, col=col)

+
            # loss = self.criterion(predictions.to(self.device), target.squeeze(-1).to(self.device)).item()

            # fig['layout']['annotations'][i].update(text=f"{loss.__class__.__name__}: {loss:.6f}")
--- a/src/training_scripts/autoregressive_quantiles.py
+++ b/src/training_scripts/autoregressive_quantiles.py
@@ -17,7 +17,7 @@ from src.models.time_embedding_layer import TimeEmbedding

 #### ClearML ####
 clearml_helper = ClearMLHelper(project_name="Thesis/NrvForecast")
-task = clearml_helper.get_task(task_name="Autoregressive Quantile Regression: GRU + Quarter + Load + Wind")
+task = clearml_helper.get_task(task_name="Autoregressive Quantile Regression: GRU + Quarter + DoW + Load + Wind + Net")


 #### Data Processor ####
@@ -33,7 +33,7 @@ data_config.WIND_HISTORY = True
 data_config.QUARTER = True
 data_config.DAY_OF_WEEK = True

-data_config.NOMINAL_NET_POSITION = False
+data_config.NOMINAL_NET_POSITION = True



@@ -61,8 +61,8 @@ else:
        quantiles = eval(quantiles)

 model_parameters = {
-    "learning_rate": 0.0002,
-    "hidden_size": 512,
+    "learning_rate": 0.0001,
+    "hidden_size": 1024,
    "num_layers": 2,
    "dropout": 0.2,
    "time_feature_embedding": 4,
--- a/src/utils/bid_ladder.ipynb
+++ b/src/utils/bid_ladder.ipynb
--- a/src/utils/imbalance_price_calculator.py
+++ b/src/utils/imbalance_price_calculator.py
@@ -0,0 +1,114 @@
+from datetime import datetime
+import plotly.graph_objects as go
+import numpy as np
+
+
+class ImbalancePriceCalculator:
+    def __init__(self, bids) -> None:
+        self.bids = bids
+        self.bid_ladder = self.generate_bid_ladder()
+
+    def generate_bid_ladder(self):
+        # Sort bids by Product (3th column of tuple, use order defined above) and then by price (second column of tuple)
+        # first sort on activaation order, then on bid price
+        bids = sorted(self.bids, key=lambda x: (x[3], x[1]))
+
+        # Calculate cummulative bids
+        cummulative_volume = 0
+        cummulative_bids = []
+        max_price = 0
+        for volume, price, _, order in bids:
+            cummulative_volume += volume
+            if price > max_price:
+                max_price = price
+            cummulative_bids.append((cummulative_volume, max_price, price, order))
+            print((cummulative_volume, max_price, price, order))
+
+        cummulative_bids = [(vol, price) for vol, price, _, _ in cummulative_bids]
+
+        return cummulative_bids
+    
+    def get_imbalance_price(self, volume):
+        for bid in self.bid_ladder:
+            if bid[0] > volume:
+                return bid[1]
+        return self.bid_ladder[-1][1]
+    
+    def plot(self):
+        # Prepare data for plot
+        x_interpolated = [vol for i in range(len(self.bid_ladder) - 1) for vol in [self.bid_ladder[i][0], self.bid_ladder[i+1][0]]]
+        y_interpolated = [price for cum_vol, price in self.bid_ladder for _ in (0, 1)]
+
+        # Create and show the plot make sure hovering works in between the steps
+        fig = go.Figure()
+        fig.add_trace(go.Scatter(x=x_interpolated, y=y_interpolated, mode='lines+markers'))
+        fig.update_layout(
+            title='Bid ladder',
+            xaxis_title='Volume',
+            yaxis_title='Price',
+            hovermode='x unified'
+        )
+        fig.show()
+
+        # also print the prices needed for 100MW, 200MW, 300MW, 400MW, 500MW, 600MW, 700MW, 800MW, 900MW, 1000MW
+        for i in range(1, 11):
+            # get first cummulative_bid i where i*100 > cummulative_volume of i and smaller than i+1
+            
+            mw = 100
+            
+            for bid in self.bid_ladder:
+                if bid[0] > mw*i:
+                    print(f"{mw*i}MW: {bid[1]}")
+                    break
+
+        print(f"Max: {self.bid_ladder[-1][1]}")
+
+    def get_imbalance_price_2023(self, NRV_PREV, NRV):
+        pass
+
+
+# SI = -NRV
+def calculate_imbalance_price_2023(SI_PREV: float, SI: float, MIP: float, MDP: float):
+    # parameters a, b, c, d, x
+    a = 0 # EUR/MWh
+    b = 200 # EUR/MWh
+    c = 450 # MW
+    d = 65 # MW
+    # x = average abs(SI) and abs(SI_PREV)
+    x = (abs(SI_PREV) + abs(SI)) / 2
+
+    cp = 0
+    if SI <= 0:
+        if MIP > 200 and MIP <= 400:
+            cp = (400-MIP)/200
+        else:
+            cp = 1
+    else:
+        if MDP >= 0:
+            cp = 1
+        elif MDP >= -200 and MDP < 0:
+            cp = (MDP+200)/200
+
+
+    alpha = 0
+    if abs(SI) > 150:
+        alpha = (a + b/(1+np.exp((c-x)/d))) * cp
+
+    pos_imbalance_price = MIP + alpha
+    neg_imbalance_price = MDP - alpha
+
+    # imbalance_price based on SI
+    imbalance_price = 0
+    if SI < 0:
+        imbalance_price = pos_imbalance_price
+    elif SI > 0:
+        imbalance_price = neg_imbalance_price
+    else:
+        imbalance_price = (pos_imbalance_price + neg_imbalance_price) / 2
+
+    # return alpha, imbalance price
+    return alpha, imbalance_price
+
+
+
+