From c26ae769519fd46cbe1809ff2659179ef9f5ae99 Mon Sep 17 00:00:00 2001 From: Victor Mylle Date: Sat, 30 Dec 2023 15:22:32 +0000 Subject: [PATCH] Fixing some stuff --- src/data/dataset.py | 1 - src/data/preprocessing.py | 8 +-- src/notebooks/training.ipynb | 60 ++++++++++++------- src/trainers/quantile_trainer.py | 51 +++++++++++++++- src/trainers/trainer.py | 16 ++++- .../autoregressive_quantiles.py | 4 +- 6 files changed, 107 insertions(+), 33 deletions(-) diff --git a/src/data/dataset.py b/src/data/dataset.py index 6d92baa..21408f3 100644 --- a/src/data/dataset.py +++ b/src/data/dataset.py @@ -71,7 +71,6 @@ class NrvDataset(Dataset): self.nrv = torch.tensor(dataframe["nrv"].values).float().reshape(-1) self.datetime = dataframe["datetime"] - print(dataframe.columns) self.history_features, self.forecast_features = self.preprocess_data(dataframe) def skip_samples(self, dataframe): diff --git a/src/data/preprocessing.py b/src/data/preprocessing.py index c68d527..d4b0eb6 100644 --- a/src/data/preprocessing.py +++ b/src/data/preprocessing.py @@ -253,7 +253,7 @@ class DataProcessor: return self.get_dataloader(train_dataset, shuffle=shuffle) def get_test_dataloader( - self, transform: bool = True, predict_sequence_length: int = 96 + self, transform: bool = True, predict_sequence_length: int = 96, full_day_skip: bool = False ): test_df = self.all_features.copy() @@ -287,19 +287,19 @@ class DataProcessor: test_dataset = NrvDataset( test_df, data_config=self.data_config, - full_day_skip=self.full_day_skip, + full_day_skip=self.full_day_skip or full_day_skip, predict_sequence_length=predict_sequence_length, lstm=self.lstm, ) return self.get_dataloader(test_dataset, shuffle=False) def get_dataloaders( - self, transform: bool = True, predict_sequence_length: int = 96 + self, transform: bool = True, predict_sequence_length: int = 96, full_day_skip: bool = False ): return self.get_train_dataloader( transform=transform, predict_sequence_length=predict_sequence_length ), self.get_test_dataloader( - transform=transform, predict_sequence_length=predict_sequence_length + transform=transform, predict_sequence_length=predict_sequence_length, full_day_skip=full_day_skip ) def inverse_transform(self, input_data): diff --git a/src/notebooks/training.ipynb b/src/notebooks/training.ipynb index 7f96261..1edaf11 100644 --- a/src/notebooks/training.ipynb +++ b/src/notebooks/training.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -12,7 +12,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ @@ -42,7 +42,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ @@ -187,16 +187,17 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "ClearML Task: created new task id=d19c767120a24f97b3231f0e8ac9f2b5\n", - "ClearML results page: http://192.168.1.182:8080/projects/2e46d4af6f1e4c399cf9f5aa30bc8795/experiments/d19c767120a24f97b3231f0e8ac9f2b5/output/log\n", - "151780\n" + "2023-12-30 10:58:11,474 - clearml.task - WARNING - Requirement ignored, Task.add_requirements() must be called before Task.init()\n", + "2023-12-30 10:58:11,476 - clearml.task - WARNING - Requirement ignored, Task.ignore_requirements() must be called before Task.init()\n", + "2023-12-30 10:58:11,476 - clearml.task - WARNING - Requirement ignored, Task.ignore_requirements() must be called before Task.init()\n", + "2023-12-30 10:58:11,477 - clearml.task - WARNING - Requirement ignored, Task.ignore_requirements() must be called before Task.init()\n" ] }, { @@ -204,7 +205,8 @@ "output_type": "stream", "text": [ "ERROR: Unexpected bus error encountered in worker. This might be caused by insufficient shared memory (shm).\n", - "\u0000Exception ignored in: \n", + "\u0000ERROR: Unexpected bus error encountered in worker. This might be caused by insufficient shared memory (shm).\n", + "\u0000Exception ignored in: \n", "Traceback (most recent call last):\n", " File \"/opt/conda/lib/python3.10/site-packages/torch/utils/data/dataloader.py\", line 1478, in __del__\n", " self._shutdown_workers()\n", @@ -220,26 +222,33 @@ " fd_event_list = self._selector.poll(timeout)\n", " File \"/opt/conda/lib/python3.10/site-packages/torch/utils/data/_utils/signal_handling.py\", line 66, in handler\n", " _error_if_any_worker_fails()\n", - "RuntimeError: DataLoader worker (pid 326715) is killed by signal: Bus error. It is possible that dataloader's workers are out of shared memory. Please try to raise your shared memory limit.\n" + "RuntimeError: DataLoader worker (pid 610020) is killed by signal: Bus error. It is possible that dataloader's workers are out of shared memory. Please try to raise your shared memory limit.\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "151780\n", - "24979\n", - "151780\n", - "24979\n", - "151780\n", - "24979\n", - "Using get_plot_error\n", - "Using get_plot_error\n", - "Using get_plot_error\n", - "Using get_plot_error\n", - "Early stopping triggered\n", - "151780\n", - "24979\n" + "2023-12-30 10:58:28,073 - clearml.Task - INFO - Completed model upload to http://192.168.1.182:8081/Thesis/NrvForecast/Autoregressive%20Non%20Linear%20Quantile%20Regression%20%2B%20Quarter%20%2B%20DoW%20%2B%20Net.79133ec0ca4f497e815cabc31683b626/models/checkpoint.pt\n", + "2023-12-30 10:58:31,506 - clearml.Task - INFO - Completed model upload to http://192.168.1.182:8081/Thesis/NrvForecast/Autoregressive%20Non%20Linear%20Quantile%20Regression%20%2B%20Quarter%20%2B%20DoW%20%2B%20Net.79133ec0ca4f497e815cabc31683b626/models/checkpoint.pt\n" + ] + }, + { + "ename": "KeyboardInterrupt", + "evalue": "", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[8], line 48\u001b[0m\n\u001b[1;32m 46\u001b[0m trainer\u001b[38;5;241m.\u001b[39mearly_stopping(patience\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m10\u001b[39m)\n\u001b[1;32m 47\u001b[0m trainer\u001b[38;5;241m.\u001b[39mplot_every(\u001b[38;5;241m25\u001b[39m)\n\u001b[0;32m---> 48\u001b[0m \u001b[43mtrainer\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtrain\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtask\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtask\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mepochs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mepochs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mremotely\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/workspaces/Thesis/src/notebooks/../../src/trainers/trainer.py:182\u001b[0m, in \u001b[0;36mTrainer.train\u001b[0;34m(self, epochs, remotely, task)\u001b[0m\n\u001b[1;32m 177\u001b[0m \u001b[38;5;66;03m# self.plot_quantile_percentages(\u001b[39;00m\n\u001b[1;32m 178\u001b[0m \u001b[38;5;66;03m# task, test_loader, False, epoch, True\u001b[39;00m\n\u001b[1;32m 179\u001b[0m \u001b[38;5;66;03m# )\u001b[39;00m\n\u001b[1;32m 181\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m task:\n\u001b[0;32m--> 182\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfinish_training\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtask\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtask\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 183\u001b[0m task\u001b[38;5;241m.\u001b[39mclose()\n\u001b[1;32m 184\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m:\n", + "File \u001b[0;32m/workspaces/Thesis/src/notebooks/../../src/trainers/trainer.py:253\u001b[0m, in \u001b[0;36mTrainer.finish_training\u001b[0;34m(self, task)\u001b[0m\n\u001b[1;32m 250\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28mhasattr\u001b[39m(\u001b[38;5;28mself\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mplot_quantile_percentages\u001b[39m\u001b[38;5;124m\"\u001b[39m):\n\u001b[1;32m 251\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mlog_final_metrics(task, train_loader, train\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n\u001b[0;32m--> 253\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mlog_final_metrics\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtask\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtest_loader\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtrain\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/workspaces/Thesis/src/notebooks/../../src/trainers/quantile_trainer.py:105\u001b[0m, in \u001b[0;36mAutoRegressiveQuantileTrainer.log_final_metrics\u001b[0;34m(self, task, dataloader, train)\u001b[0m\n\u001b[1;32m 103\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m idx \u001b[38;5;129;01min\u001b[39;00m idx_batch:\n\u001b[1;32m 104\u001b[0m computed_idx_batch \u001b[38;5;241m=\u001b[39m [idx] \u001b[38;5;241m*\u001b[39m \u001b[38;5;241m100\u001b[39m\n\u001b[0;32m--> 105\u001b[0m _, outputs, samples, targets \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mauto_regressive\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 106\u001b[0m \u001b[43m \u001b[49m\u001b[43mdataloader\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdataset\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43midx_batch\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcomputed_idx_batch\u001b[49m\n\u001b[1;32m 107\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 108\u001b[0m samples \u001b[38;5;241m=\u001b[39m samples\u001b[38;5;241m.\u001b[39munsqueeze(\u001b[38;5;241m0\u001b[39m)\n\u001b[1;32m 109\u001b[0m targets \u001b[38;5;241m=\u001b[39m targets\u001b[38;5;241m.\u001b[39msqueeze(\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m)\n", + "File \u001b[0;32m/workspaces/Thesis/src/notebooks/../../src/trainers/quantile_trainer.py:275\u001b[0m, in \u001b[0;36mAutoRegressiveQuantileTrainer.auto_regressive\u001b[0;34m(self, dataset, idx_batch, sequence_length)\u001b[0m\n\u001b[1;32m 269\u001b[0m new_features \u001b[38;5;241m=\u001b[39m torch\u001b[38;5;241m.\u001b[39mcat(\n\u001b[1;32m 270\u001b[0m (prev_features[:, \u001b[38;5;241m1\u001b[39m:\u001b[38;5;241m96\u001b[39m], samples), dim\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m1\u001b[39m\n\u001b[1;32m 271\u001b[0m ) \u001b[38;5;66;03m# (batch_size, 96)\u001b[39;00m\n\u001b[1;32m 273\u001b[0m new_features \u001b[38;5;241m=\u001b[39m new_features\u001b[38;5;241m.\u001b[39mfloat()\n\u001b[0;32m--> 275\u001b[0m other_features, new_targets \u001b[38;5;241m=\u001b[39m \u001b[43mdataset\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_batch_autoregressive\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 276\u001b[0m \u001b[43m \u001b[49m\u001b[43mnp\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43marray\u001b[49m\u001b[43m(\u001b[49m\u001b[43midx_batch\u001b[49m\u001b[43m)\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m+\u001b[39;49m\u001b[43m \u001b[49m\u001b[43mi\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m+\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;241;43m1\u001b[39;49m\n\u001b[1;32m 277\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m \u001b[38;5;66;03m# (batch_size, new_features)\u001b[39;00m\n\u001b[1;32m 279\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m other_features \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 280\u001b[0m prev_features \u001b[38;5;241m=\u001b[39m torch\u001b[38;5;241m.\u001b[39mcat(\n\u001b[1;32m 281\u001b[0m (new_features\u001b[38;5;241m.\u001b[39mto(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdevice), other_features\u001b[38;5;241m.\u001b[39mto(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdevice)), dim\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m1\u001b[39m\n\u001b[1;32m 282\u001b[0m ) \u001b[38;5;66;03m# (batch_size, 96 + new_features)\u001b[39;00m\n", + "File \u001b[0;32m/workspaces/Thesis/src/notebooks/../../src/data/dataset.py:199\u001b[0m, in \u001b[0;36mNrvDataset.get_batch_autoregressive\u001b[0;34m(self, idx)\u001b[0m\n\u001b[1;32m 197\u001b[0m targets \u001b[38;5;241m=\u001b[39m []\n\u001b[1;32m 198\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m i \u001b[38;5;129;01min\u001b[39;00m idx:\n\u001b[0;32m--> 199\u001b[0m f, t \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrandom_day_autoregressive\u001b[49m\u001b[43m(\u001b[49m\u001b[43mi\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 200\u001b[0m features\u001b[38;5;241m.\u001b[39mappend(f)\n\u001b[1;32m 201\u001b[0m targets\u001b[38;5;241m.\u001b[39mappend(t)\n", + "File \u001b[0;32m/workspaces/Thesis/src/notebooks/../../src/data/dataset.py:172\u001b[0m, in \u001b[0;36mNrvDataset.random_day_autoregressive\u001b[0;34m(self, idx)\u001b[0m\n\u001b[1;32m 171\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mrandom_day_autoregressive\u001b[39m(\u001b[38;5;28mself\u001b[39m, idx: \u001b[38;5;28mint\u001b[39m):\n\u001b[0;32m--> 172\u001b[0m all_features, nrv_target, _ \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[38;5;21;43m__getitem__\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43midx\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 174\u001b[0m \u001b[38;5;66;03m# remove the first 96 values of the features (the nrv history)\u001b[39;00m\n\u001b[1;32m 175\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mlstm:\n", + "File \u001b[0;32m/workspaces/Thesis/src/notebooks/../../src/data/dataset.py:162\u001b[0m, in \u001b[0;36mNrvDataset.__getitem__\u001b[0;34m(self, idx)\u001b[0m\n\u001b[1;32m 159\u001b[0m nrv_target \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mnrv[actual_idx \u001b[38;5;241m+\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39msequence_length : actual_idx \u001b[38;5;241m+\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39msequence_length \u001b[38;5;241m+\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mpredict_sequence_length]\n\u001b[1;32m 161\u001b[0m \u001b[38;5;66;03m# check if nan values are present\u001b[39;00m\n\u001b[0;32m--> 162\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[43mtorch\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43misnan\u001b[49m\u001b[43m(\u001b[49m\u001b[43mall_features\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43many\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m:\n\u001b[1;32m 163\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mFound nan values in the features of sample \u001b[39m\u001b[38;5;132;01m{\u001b[39;00midx\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m.\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 164\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mActual index: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mactual_idx\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n", + "\u001b[0;31mKeyboardInterrupt\u001b[0m: " ] } ], @@ -262,7 +271,7 @@ "\n", "model_parameters = {\n", " \"learning_rate\": 0.0002,\n", - " \"hidden_size\": 1024,\n", + " \"hidden_size\": 512,\n", " \"num_layers\": 3,\n", " \"dropout\": 0.2,\n", " \"time_feature_embedding\": 2,\n", @@ -301,6 +310,11 @@ "# Non Autoregressive Quantile Regression" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, { "cell_type": "code", "execution_count": 5, diff --git a/src/trainers/quantile_trainer.py b/src/trainers/quantile_trainer.py index 9e4e656..47a2a89 100644 --- a/src/trainers/quantile_trainer.py +++ b/src/trainers/quantile_trainer.py @@ -1,4 +1,6 @@ import torch +from tqdm import tqdm +from src.losses.crps_metric import crps_from_samples from src.trainers.trainer import Trainer from src.trainers.autoregressive_trainer import AutoRegressiveTrainer from src.data.preprocessing import DataProcessor @@ -81,21 +83,63 @@ class AutoRegressiveQuantileTrainer(AutoRegressiveTrainer): debug=debug, ) + def calculate_crps_from_samples(self, task, dataloader, epoch: int): + crps_from_samples_metric = [] + + with torch.no_grad(): + for _, _, idx_batch in tqdm(dataloader): + if len(idx_batch) == 0: + continue + + for idx in tqdm(idx_batch): + computed_idx_batch = [idx] * 100 + _, _, samples, targets = self.auto_regressive( + dataloader.dataset, idx_batch=computed_idx_batch + ) + samples = samples.unsqueeze(0) + targets = targets.squeeze(-1) + targets = targets[0].unsqueeze(0) + + crps = crps_from_samples(samples, targets) + + crps_from_samples_metric.append(crps[0].mean().item()) + + task.get_logger().report_scalar( + title="CRPS_from_samples", series="test", value=np.mean(crps_from_samples_metric), iteration=epoch + ) + def log_final_metrics(self, task, dataloader, train: bool = True): metrics = {metric.__class__.__name__: 0.0 for metric in self.metrics_to_track} transformed_metrics = { metric.__class__.__name__: 0.0 for metric in self.metrics_to_track } + crps_from_samples_metric = [] + with torch.no_grad(): total_samples = len(dataloader.dataset) - 96 batches = 0 - for _, _, idx_batch in dataloader: + for _, _, idx_batch in tqdm(dataloader): idx_batch = [idx for idx in idx_batch if idx < total_samples] if len(idx_batch) == 0: continue + if train == False: + for idx in tqdm(idx_batch): + computed_idx_batch = [idx] * 100 + _, outputs, samples, targets = self.auto_regressive( + dataloader.dataset, idx_batch=computed_idx_batch + ) + samples = samples.unsqueeze(0) + targets = targets.squeeze(-1) + targets = targets[0].unsqueeze(0) + + crps = crps_from_samples(samples, targets) + + crps_from_samples_metric.append(crps[0].mean().item()) + + _, outputs, samples, targets = self.auto_regressive( dataloader.dataset, idx_batch=idx_batch ) @@ -147,6 +191,11 @@ class AutoRegressiveQuantileTrainer(AutoRegressiveTrainer): ) task.get_logger().report_single_value(name=name, value=metric_value) + if train == False: + task.get_logger().report_single_value( + name="test_CRPS_from_samples_transformed", value=np.mean(crps_from_samples_metric) + ) + def get_plot_error( self, next_day, diff --git a/src/trainers/trainer.py b/src/trainers/trainer.py index efae10c..23ef867 100644 --- a/src/trainers/trainer.py +++ b/src/trainers/trainer.py @@ -99,6 +99,10 @@ class Trainer: def train(self, epochs: int, remotely: bool = False, task: Task = None): try: + _, full_day_skip_test_loader = self.data_processor.get_dataloaders( + predict_sequence_length=self.model.output_size, full_day_skip=True + ) + train_loader, test_loader = self.data_processor.get_dataloaders( predict_sequence_length=self.model.output_size ) @@ -178,6 +182,11 @@ class Trainer: # task, test_loader, False, epoch, True # ) + if hasattr(self, "calculate_crps_from_samples"): + self.calculate_crps_from_samples( + task, full_day_skip_test_loader, epoch + ) + if task: self.finish_training(task=task) task.close() @@ -243,12 +252,15 @@ class Trainer: self.model.load_state_dict(torch.load("checkpoint.pt")) self.model.eval() + + # set full day skip + self.data_processor.set_full_day_skip(True) train_loader, test_loader = self.data_processor.get_dataloaders( predict_sequence_length=self.model.output_size ) - if not hasattr(self, "plot_quantile_percentages"): - self.log_final_metrics(task, train_loader, train=True) + # if not hasattr(self, "plot_quantile_percentages"): + # self.log_final_metrics(task, train_loader, train=True) self.log_final_metrics(task, test_loader, train=False) diff --git a/src/training_scripts/autoregressive_quantiles.py b/src/training_scripts/autoregressive_quantiles.py index fa5edb9..b430dbe 100644 --- a/src/training_scripts/autoregressive_quantiles.py +++ b/src/training_scripts/autoregressive_quantiles.py @@ -43,7 +43,7 @@ data_processor.set_full_day_skip(False) #### Hyperparameters #### data_processor.set_output_size(1) inputDim = data_processor.get_input_size() -epochs = 400 +epochs = 300 # add parameters to clearml quantiles = task.get_parameter("general/quantiles", cast=True) @@ -58,7 +58,7 @@ else: model_parameters = { "learning_rate": 0.0001, - "hidden_size": 1024, + "hidden_size": 512, "num_layers": 2, "dropout": 0.2, "time_feature_embedding": 4,