diff --git a/Result-Reports/Policies.md b/Result-Reports/Policies.md index 48c4fb4..dbd2386 100644 --- a/Result-Reports/Policies.md +++ b/Result-Reports/Policies.md @@ -144,5 +144,14 @@ Test data: 01-01-2023 until 08-10–2023 - [x] Profit penalty parameter als over charge cycles voor een dag -> parameter bepalen op training data (convex probleem) (< 400 charge cycles per jaar) (over een dag kijken hoeveel charge cycles -> profit - penalty * charge cycles erover, (misschien belonen als eronder charge cycles)) - [ ] Meer verschil bekijken tussen GRU en diffusion -- [ ] Andere lagen voor diffusion model (GRU, kijken naar TSDiff) -- [x] Policies met andere modellen (Linear, Non Linear) \ No newline at end of file +- [ ] (In Progress) Andere lagen voor diffusion model (GRU, kijken naar TSDiff) +- [x] Policies met andere modellen (Linear, Non Linear) + +- [ ] Visualize the policies over the whole testr set -> thresholds plotten voor elke dag (elke policy) -> mss distribution om overzichtelijk te houden (mean and std) +- [ ] Probleem met diffusion model (activation function? waarom direct grote waardes?) + +- [ ] Autoregressive confidence problem -> Quantiles zelf uit elkaar halen (helpt dit?) + +- [ ] time steps reducing for diffusion model (UNet activation functions?) + +- [ ] (State space model? S4) \ No newline at end of file diff --git a/src/models/diffusion_model.py b/src/models/diffusion_model.py index 18a2d4d..bfd076b 100644 --- a/src/models/diffusion_model.py +++ b/src/models/diffusion_model.py @@ -56,7 +56,7 @@ class GRUDiffusionModel(DiffusionModel): # GRU layer self.gru = nn.GRU(input_size=input_size + time_dim + other_inputs_dim, hidden_size=gru_hidden_size, - num_layers=2, + num_layers=3, batch_first=True) # Fully connected layers after GRU @@ -87,7 +87,8 @@ class GRUDiffusionModel(DiffusionModel): # Pass through GRU output, hidden = self.gru(x) # Hidden Shape: [batch_size, seq_len, 1] - x = hidden + # Get last hidden state + x = hidden[-1] # Process each time step's output with fully connected layers for layer in self.fc_layers: diff --git a/src/notebooks/diffusion-training.ipynb b/src/notebooks/diffusion-training.ipynb index c6dbfb2..b18e547 100644 --- a/src/notebooks/diffusion-training.ipynb +++ b/src/notebooks/diffusion-training.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 3, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ @@ -31,7 +31,7 @@ "from datetime import datetime\n", "import torch.nn as nn\n", "from src.models.time_embedding_layer import TimeEmbedding\n", - "from src.models.diffusion_model import SimpleDiffusionModel\n", + "from src.models.diffusion_model import SimpleDiffusionModel, GRUDiffusionModel\n", "from src.trainers.diffusion_trainer import DiffusionTrainer\n", "from torchinfo import summary\n", "\n", @@ -62,30 +62,99 @@ "\n", "data_config.NOMINAL_NET_POSITION = True\n", "\n", - "data_processor = DataProcessor(data_config, path=\"../../\")\n", + "data_processor = DataProcessor(data_config, path=\"../../\", lstm=True)\n", "data_processor.set_batch_size(1024)\n", "data_processor.set_full_day_skip(True)" ] }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "ClearML Task: created new task id=b71216825809432682ea3c7841c07612\n", - "ClearML results page: http://192.168.1.182:8080/projects/2e46d4af6f1e4c399cf9f5aa30bc8795/experiments/b71216825809432682ea3c7841c07612/output/log\n" + "torch.Size([1024, 96, 96])\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "500 model found when searching for `file:///workspaces/Thesis/src/notebooks/checkpoint.pt`\n", - "Selected model `Autoregressive Non Linear Quantile Regression + Quarter + DoW + Net` (id=bc0cb0d7fc614e2e8b0edf5b85348646)\n" + "/opt/conda/lib/python3.10/site-packages/torch/nn/modules/loss.py:536: UserWarning: Using a target size (torch.Size([1024, 96])) that is different to the input size (torch.Size([2, 1024, 96])). This will likely lead to incorrect results due to broadcasting. Please ensure they have the same size.\n", + " return F.mse_loss(input, target, reduction=self.reduction)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "torch.Size([556, 96, 96])\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/opt/conda/lib/python3.10/site-packages/torch/nn/modules/loss.py:536: UserWarning: Using a target size (torch.Size([556, 96])) that is different to the input size (torch.Size([2, 556, 96])). This will likely lead to incorrect results due to broadcasting. Please ensure they have the same size.\n", + " return F.mse_loss(input, target, reduction=self.reduction)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "torch.Size([1024, 96, 96])\n", + "torch.Size([556, 96, 96])\n", + "torch.Size([1024, 96, 96])\n", + "torch.Size([556, 96, 96])\n", + "torch.Size([1024, 96, 96])\n", + "torch.Size([556, 96, 96])\n", + "torch.Size([1024, 96, 96])\n", + "torch.Size([556, 96, 96])\n", + "torch.Size([1024, 96, 96])\n", + "torch.Size([556, 96, 96])\n", + "torch.Size([1024, 96, 96])\n", + "torch.Size([556, 96, 96])\n", + "torch.Size([1024, 96, 96])\n", + "torch.Size([556, 96, 96])\n", + "torch.Size([1024, 96, 96])\n", + "torch.Size([556, 96, 96])\n", + "torch.Size([1024, 96, 96])\n", + "torch.Size([556, 96, 96])\n", + "torch.Size([1024, 96, 96])\n", + "torch.Size([556, 96, 96])\n", + "torch.Size([1024, 96, 96])\n", + "torch.Size([556, 96, 96])\n", + "torch.Size([1024, 96, 96])\n", + "torch.Size([556, 96, 96])\n", + "torch.Size([1024, 96, 96])\n", + "torch.Size([556, 96, 96])\n", + "torch.Size([1024, 96, 96])\n", + "torch.Size([556, 96, 96])\n", + "torch.Size([1024, 96, 96])\n", + "torch.Size([556, 96, 96])\n", + "torch.Size([1024, 96, 96])\n", + "torch.Size([556, 96, 96])\n", + "torch.Size([1024, 96, 96])\n", + "torch.Size([556, 96, 96])\n", + "torch.Size([1024, 96, 96])\n", + "torch.Size([556, 96, 96])\n", + "torch.Size([1024, 96, 96])\n", + "torch.Size([556, 96, 96])\n", + "torch.Size([1024, 96, 96])\n", + "torch.Size([556, 96, 96])\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "KeyboardInterrupt\n", + "\n" ] } ], @@ -95,14 +164,15 @@ "epochs=150\n", "\n", "#### Model ####\n", - "model = SimpleDiffusionModel(96, [512, 512, 512], other_inputs_dim=inputDim[1], time_dim=64)\n", + "# model = SimpleDiffusionModel(96, [512, 512, 512], other_inputs_dim=inputDim[1], time_dim=64)\n", + "model = GRUDiffusionModel(96, [256, 256], other_inputs_dim=inputDim[2], time_dim=64, gru_hidden_size=128)\n", "\n", "#### ClearML ####\n", - "task = clearml_helper.get_task(task_name=\"Diffusion Model\")\n", + "# task = clearml_helper.get_task(task_name=\"Diffusion Model\")\n", "\n", "#### Trainer ####\n", "trainer = DiffusionTrainer(model, data_processor, \"cuda\")\n", - "trainer.train(epochs, learningRate, task)" + "trainer.train(epochs, learningRate, None)" ] }, { @@ -246,7 +316,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.11" + "version": "3.10.8" } }, "nbformat": 4, diff --git a/src/policies/plot_combiner.ipynb b/src/policies/plot_combiner.ipynb index 09cb0a8..fdc5023 100644 --- a/src/policies/plot_combiner.ipynb +++ b/src/policies/plot_combiner.ipynb @@ -1197,7 +1197,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.11" + "version": "3.10.8" } }, "nbformat": 4, diff --git a/src/trainers/diffusion_trainer.py b/src/trainers/diffusion_trainer.py index 5d882c7..f96ebd6 100644 --- a/src/trainers/diffusion_trainer.py +++ b/src/trainers/diffusion_trainer.py @@ -51,7 +51,7 @@ class DiffusionTrainer: self.model = model self.device = device - self.noise_steps = 1000 + self.noise_steps = 20 self.beta_start = 1e-4 self.beta_end = 0.02 self.ts_length = 96 @@ -130,8 +130,8 @@ class DiffusionTrainer: predict_sequence_length=self.ts_length ) - train_sample_indices = self.random_samples(train=True, num_samples=10) - test_sample_indices = self.random_samples(train=False, num_samples=10) + train_sample_indices = self.random_samples(train=True, num_samples=5) + test_sample_indices = self.random_samples(train=False, num_samples=5) for epoch in range(epochs): running_loss = 0.0 @@ -153,7 +153,7 @@ class DiffusionTrainer: running_loss /= len(train_loader.dataset) - if epoch % 20 == 0 and epoch != 0: + if epoch % 40 == 0 and epoch != 0: self.test(test_loader, epoch, task) if task: @@ -164,7 +164,7 @@ class DiffusionTrainer: value=loss.item(), ) - if epoch % 100 == 0 and epoch != 0: + if epoch % 150 == 0 and epoch != 0: self.debug_plots(task, True, train_loader, train_sample_indices, epoch) self.debug_plots(task, False, test_loader, test_sample_indices, epoch) @@ -177,6 +177,7 @@ class DiffusionTrainer: features, target, _ = data_loader.dataset[idx] features = features.to(self.device) + features = features.unsqueeze(0) self.model.eval() with torch.no_grad(): diff --git a/src/training_scripts/diffusion_training.py b/src/training_scripts/diffusion_training.py index c206c2a..f68e786 100644 --- a/src/training_scripts/diffusion_training.py +++ b/src/training_scripts/diffusion_training.py @@ -37,7 +37,7 @@ data_config.NOMINAL_NET_POSITION = True data_config = task.connect(data_config, name="data_features") -data_processor = DataProcessor(data_config, path="", lstm=True) +data_processor = DataProcessor(data_config, path="", lstm=False) data_processor.set_batch_size(128) data_processor.set_full_day_skip(True) @@ -54,8 +54,8 @@ model_parameters = { model_parameters = task.connect(model_parameters, name="model_parameters") #### Model #### -# model = SimpleDiffusionModel(96, model_parameters["hidden_sizes"], other_inputs_dim=inputDim[1], time_dim=model_parameters["time_dim"]) -model = GRUDiffusionModel(96, model_parameters["hidden_sizes"], other_inputs_dim=inputDim[2], time_dim=model_parameters["time_dim"], gru_hidden_size=256) +model = SimpleDiffusionModel(96, model_parameters["hidden_sizes"], other_inputs_dim=inputDim[1], time_dim=model_parameters["time_dim"]) +# model = GRUDiffusionModel(96, model_parameters["hidden_sizes"], other_inputs_dim=inputDim[2], time_dim=model_parameters["time_dim"], gru_hidden_size=256) print("Starting training ...")