diff --git a/Result-Reports/Policies.md b/Result-Reports/Policies.md
index 48c4fb4..dbd2386 100644
--- a/Result-Reports/Policies.md
+++ b/Result-Reports/Policies.md
@@ -144,5 +144,14 @@ Test data: 01-01-2023 until 08-10–2023
 - [x] Profit penalty parameter als over charge cycles voor een dag -> parameter bepalen op training data (convex probleem) (< 400 charge cycles per jaar) (over een dag kijken hoeveel charge cycles -> profit - penalty * charge cycles erover, (misschien belonen als eronder charge cycles))
 
 - [ ] Meer verschil bekijken tussen GRU en diffusion
-- [ ] Andere lagen voor diffusion model (GRU, kijken naar TSDiff)
-- [x] Policies met andere modellen (Linear, Non Linear)
\ No newline at end of file
+- [ ] (In Progress) Andere lagen voor diffusion model (GRU, kijken naar TSDiff)
+- [x] Policies met andere modellen (Linear, Non Linear)
+
+- [ ] Visualize the policies over the whole testr set -> thresholds plotten voor elke dag (elke policy) -> mss distribution om overzichtelijk te houden (mean and std)
+- [ ] Probleem met diffusion model (activation function? waarom direct grote waardes?)
+
+- [ ] Autoregressive confidence problem -> Quantiles zelf uit elkaar halen (helpt dit?)
+
+- [ ] time steps reducing for diffusion model (UNet activation functions?)
+
+- [ ] (State space model? S4)
\ No newline at end of file
diff --git a/src/models/diffusion_model.py b/src/models/diffusion_model.py
index 18a2d4d..bfd076b 100644
--- a/src/models/diffusion_model.py
+++ b/src/models/diffusion_model.py
@@ -56,7 +56,7 @@ class GRUDiffusionModel(DiffusionModel):
         # GRU layer
         self.gru = nn.GRU(input_size=input_size + time_dim + other_inputs_dim,
                           hidden_size=gru_hidden_size,
-                          num_layers=2,
+                          num_layers=3,
                           batch_first=True)
 
         # Fully connected layers after GRU
@@ -87,7 +87,8 @@ class GRUDiffusionModel(DiffusionModel):
         # Pass through GRU
         output, hidden = self.gru(x) # Hidden Shape: [batch_size, seq_len, 1]
 
-        x = hidden
+        # Get last hidden state
+        x = hidden[-1]
 
         # Process each time step's output with fully connected layers
         for layer in self.fc_layers:
diff --git a/src/notebooks/diffusion-training.ipynb b/src/notebooks/diffusion-training.ipynb
index c6dbfb2..b18e547 100644
--- a/src/notebooks/diffusion-training.ipynb
+++ b/src/notebooks/diffusion-training.ipynb
@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 1,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -31,7 +31,7 @@
     "from datetime import datetime\n",
     "import torch.nn as nn\n",
     "from src.models.time_embedding_layer import TimeEmbedding\n",
-    "from src.models.diffusion_model import SimpleDiffusionModel\n",
+    "from src.models.diffusion_model import SimpleDiffusionModel, GRUDiffusionModel\n",
     "from src.trainers.diffusion_trainer import DiffusionTrainer\n",
     "from torchinfo import summary\n",
     "\n",
@@ -62,30 +62,99 @@
     "\n",
     "data_config.NOMINAL_NET_POSITION = True\n",
     "\n",
-    "data_processor = DataProcessor(data_config, path=\"../../\")\n",
+    "data_processor = DataProcessor(data_config, path=\"../../\", lstm=True)\n",
     "data_processor.set_batch_size(1024)\n",
     "data_processor.set_full_day_skip(True)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 4,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "ClearML Task: created new task id=b71216825809432682ea3c7841c07612\n",
-      "ClearML results page: http://192.168.1.182:8080/projects/2e46d4af6f1e4c399cf9f5aa30bc8795/experiments/b71216825809432682ea3c7841c07612/output/log\n"
+      "torch.Size([1024, 96, 96])\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "500 model found when searching for `file:///workspaces/Thesis/src/notebooks/checkpoint.pt`\n",
-      "Selected model `Autoregressive Non Linear Quantile Regression + Quarter + DoW + Net` (id=bc0cb0d7fc614e2e8b0edf5b85348646)\n"
+      "/opt/conda/lib/python3.10/site-packages/torch/nn/modules/loss.py:536: UserWarning: Using a target size (torch.Size([1024, 96])) that is different to the input size (torch.Size([2, 1024, 96])). This will likely lead to incorrect results due to broadcasting. Please ensure they have the same size.\n",
+      "  return F.mse_loss(input, target, reduction=self.reduction)\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "torch.Size([556, 96, 96])\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/opt/conda/lib/python3.10/site-packages/torch/nn/modules/loss.py:536: UserWarning: Using a target size (torch.Size([556, 96])) that is different to the input size (torch.Size([2, 556, 96])). This will likely lead to incorrect results due to broadcasting. Please ensure they have the same size.\n",
+      "  return F.mse_loss(input, target, reduction=self.reduction)\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "torch.Size([1024, 96, 96])\n",
+      "torch.Size([556, 96, 96])\n",
+      "torch.Size([1024, 96, 96])\n",
+      "torch.Size([556, 96, 96])\n",
+      "torch.Size([1024, 96, 96])\n",
+      "torch.Size([556, 96, 96])\n",
+      "torch.Size([1024, 96, 96])\n",
+      "torch.Size([556, 96, 96])\n",
+      "torch.Size([1024, 96, 96])\n",
+      "torch.Size([556, 96, 96])\n",
+      "torch.Size([1024, 96, 96])\n",
+      "torch.Size([556, 96, 96])\n",
+      "torch.Size([1024, 96, 96])\n",
+      "torch.Size([556, 96, 96])\n",
+      "torch.Size([1024, 96, 96])\n",
+      "torch.Size([556, 96, 96])\n",
+      "torch.Size([1024, 96, 96])\n",
+      "torch.Size([556, 96, 96])\n",
+      "torch.Size([1024, 96, 96])\n",
+      "torch.Size([556, 96, 96])\n",
+      "torch.Size([1024, 96, 96])\n",
+      "torch.Size([556, 96, 96])\n",
+      "torch.Size([1024, 96, 96])\n",
+      "torch.Size([556, 96, 96])\n",
+      "torch.Size([1024, 96, 96])\n",
+      "torch.Size([556, 96, 96])\n",
+      "torch.Size([1024, 96, 96])\n",
+      "torch.Size([556, 96, 96])\n",
+      "torch.Size([1024, 96, 96])\n",
+      "torch.Size([556, 96, 96])\n",
+      "torch.Size([1024, 96, 96])\n",
+      "torch.Size([556, 96, 96])\n",
+      "torch.Size([1024, 96, 96])\n",
+      "torch.Size([556, 96, 96])\n",
+      "torch.Size([1024, 96, 96])\n",
+      "torch.Size([556, 96, 96])\n",
+      "torch.Size([1024, 96, 96])\n",
+      "torch.Size([556, 96, 96])\n",
+      "torch.Size([1024, 96, 96])\n",
+      "torch.Size([556, 96, 96])\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "KeyboardInterrupt\n",
+      "\n"
      ]
     }
    ],
@@ -95,14 +164,15 @@
     "epochs=150\n",
     "\n",
     "#### Model ####\n",
-    "model = SimpleDiffusionModel(96, [512, 512, 512], other_inputs_dim=inputDim[1], time_dim=64)\n",
+    "# model = SimpleDiffusionModel(96, [512, 512, 512], other_inputs_dim=inputDim[1], time_dim=64)\n",
+    "model = GRUDiffusionModel(96, [256, 256], other_inputs_dim=inputDim[2], time_dim=64, gru_hidden_size=128)\n",
     "\n",
     "#### ClearML ####\n",
-    "task = clearml_helper.get_task(task_name=\"Diffusion Model\")\n",
+    "# task = clearml_helper.get_task(task_name=\"Diffusion Model\")\n",
     "\n",
     "#### Trainer ####\n",
     "trainer = DiffusionTrainer(model, data_processor, \"cuda\")\n",
-    "trainer.train(epochs, learningRate, task)"
+    "trainer.train(epochs, learningRate, None)"
    ]
   },
   {
@@ -246,7 +316,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.10.11"
+   "version": "3.10.8"
   }
  },
  "nbformat": 4,
diff --git a/src/policies/plot_combiner.ipynb b/src/policies/plot_combiner.ipynb
index 09cb0a8..fdc5023 100644
--- a/src/policies/plot_combiner.ipynb
+++ b/src/policies/plot_combiner.ipynb
@@ -1197,7 +1197,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.10.11"
+   "version": "3.10.8"
   }
  },
  "nbformat": 4,
diff --git a/src/trainers/diffusion_trainer.py b/src/trainers/diffusion_trainer.py
index 5d882c7..f96ebd6 100644
--- a/src/trainers/diffusion_trainer.py
+++ b/src/trainers/diffusion_trainer.py
@@ -51,7 +51,7 @@ class DiffusionTrainer:
         self.model = model
         self.device = device
 
-        self.noise_steps = 1000
+        self.noise_steps = 20
         self.beta_start = 1e-4
         self.beta_end = 0.02
         self.ts_length = 96
@@ -130,8 +130,8 @@ class DiffusionTrainer:
             predict_sequence_length=self.ts_length
         )
 
-        train_sample_indices = self.random_samples(train=True, num_samples=10)
-        test_sample_indices = self.random_samples(train=False, num_samples=10)
+        train_sample_indices = self.random_samples(train=True, num_samples=5)
+        test_sample_indices = self.random_samples(train=False, num_samples=5)
 
         for epoch in range(epochs):
             running_loss = 0.0
@@ -153,7 +153,7 @@ class DiffusionTrainer:
             
             running_loss /= len(train_loader.dataset)
 
-            if epoch % 20 == 0 and epoch != 0:
+            if epoch % 40 == 0 and epoch != 0:
                 self.test(test_loader, epoch, task)
 
             if task:
@@ -164,7 +164,7 @@ class DiffusionTrainer:
                     value=loss.item(),
                 )
 
-                if epoch % 100 == 0 and epoch != 0:
+                if epoch % 150 == 0 and epoch != 0:
                     self.debug_plots(task, True, train_loader, train_sample_indices, epoch)
                     self.debug_plots(task, False, test_loader, test_sample_indices, epoch)
 
@@ -177,6 +177,7 @@ class DiffusionTrainer:
             features, target, _ = data_loader.dataset[idx]
 
             features = features.to(self.device)
+            features = features.unsqueeze(0)
 
             self.model.eval()
             with torch.no_grad():
diff --git a/src/training_scripts/diffusion_training.py b/src/training_scripts/diffusion_training.py
index c206c2a..f68e786 100644
--- a/src/training_scripts/diffusion_training.py
+++ b/src/training_scripts/diffusion_training.py
@@ -37,7 +37,7 @@ data_config.NOMINAL_NET_POSITION = True
 
 data_config = task.connect(data_config, name="data_features")
 
-data_processor = DataProcessor(data_config, path="", lstm=True)
+data_processor = DataProcessor(data_config, path="", lstm=False)
 data_processor.set_batch_size(128)
 data_processor.set_full_day_skip(True)
 
@@ -54,8 +54,8 @@ model_parameters = {
 model_parameters = task.connect(model_parameters, name="model_parameters")
 
 #### Model ####
-# model = SimpleDiffusionModel(96, model_parameters["hidden_sizes"], other_inputs_dim=inputDim[1], time_dim=model_parameters["time_dim"])
-model = GRUDiffusionModel(96, model_parameters["hidden_sizes"], other_inputs_dim=inputDim[2], time_dim=model_parameters["time_dim"], gru_hidden_size=256)
+model = SimpleDiffusionModel(96, model_parameters["hidden_sizes"], other_inputs_dim=inputDim[1], time_dim=model_parameters["time_dim"])
+# model = GRUDiffusionModel(96, model_parameters["hidden_sizes"], other_inputs_dim=inputDim[2], time_dim=model_parameters["time_dim"], gru_hidden_size=256)
 
 print("Starting training ...")