diff --git a/Reports/Thesis/verslag.log b/Reports/Thesis/verslag.log
index cc0dabe..deebed7 100644
--- a/Reports/Thesis/verslag.log
+++ b/Reports/Thesis/verslag.log
@@ -1,4 +1,4 @@
-This is pdfTeX, Version 3.141592653-2.6-1.40.25 (TeX Live 2023) (preloaded format=pdflatex 2023.9.17)  20 MAR 2024 16:47
+This is pdfTeX, Version 3.141592653-2.6-1.40.25 (TeX Live 2023) (preloaded format=pdflatex 2023.9.17)  20 MAR 2024 22:13
 entering extended mode
  restricted \write18 enabled.
  file:line:error style messages enabled.
@@ -1055,7 +1055,27 @@ Underfull \hbox (badness 10000) in paragraph at lines 226--233
 
  []
 
-[3{/usr/local/texlive/2023/texmf-dist/fonts/enc/dvips/libertinust1math/libusMI.enc}] [4] (./verslag.aux)
+[3{/usr/local/texlive/2023/texmf-dist/fonts/enc/dvips/libertinust1math/libusMI.enc}]
+Underfull \hbox (badness 10000) in paragraph at lines 237--246
+
+ []
+
+
+Underfull \hbox (badness 10000) in paragraph at lines 237--246
+
+ []
+
+
+Underfull \hbox (badness 10000) in paragraph at lines 237--246
+
+ []
+
+
+Underfull \hbox (badness 10000) in paragraph at lines 237--246
+
+ []
+
+[4] (./verslag.aux)
 Package rerunfilecheck Info: File `verslag.out' has not changed.
 (rerunfilecheck)             Checksum: 93BB3520344D4F0680BD23B0E2A0C01A;781.
 Package logreq Info: Writing requests to 'verslag.run.xml'.
@@ -1071,7 +1091,7 @@ Here is how much of TeX's memory you used:
  1141 hyphenation exceptions out of 8191
  72i,11n,108p,1343b,5180s stack positions out of 10000i,1000n,20000p,200000b,200000s
 </usr/local/texlive/2023/texmf-dist/fonts/type1/public/libertinust1math/LibertinusT1Math.pfb></usr/local/texlive/2023/texmf-dist/fonts/type1/public/libertine/LinBiolinumT.pfb></usr/local/texlive/2023/texmf-dist/fonts/type1/public/libertine/LinBiolinumTB.pfb></usr/local/texlive/2023/texmf-dist/fonts/type1/public/libertine/LinLibertineT.pfb></usr/local/texlive/2023/texmf-dist/fonts/type1/public/libertine/LinLibertineTB.pfb>
-Output written on verslag.pdf (5 pages, 435646 bytes).
+Output written on verslag.pdf (5 pages, 436330 bytes).
 PDF statistics:
  97 PDF objects out of 1000 (max. 8388607)
  75 compressed objects within 1 object stream
diff --git a/Reports/Thesis/verslag.pdf b/Reports/Thesis/verslag.pdf
index 640a716..e204709 100644
Binary files a/Reports/Thesis/verslag.pdf and b/Reports/Thesis/verslag.pdf differ
diff --git a/Reports/Thesis/verslag.synctex.gz b/Reports/Thesis/verslag.synctex.gz
index becbae7..82ddfe3 100644
Binary files a/Reports/Thesis/verslag.synctex.gz and b/Reports/Thesis/verslag.synctex.gz differ
diff --git a/Reports/Thesis/verslag.tex b/Reports/Thesis/verslag.tex
index ad58921..c3a4d67 100644
--- a/Reports/Thesis/verslag.tex
+++ b/Reports/Thesis/verslag.tex
@@ -231,9 +231,21 @@ The imbalance price calculation includes the following variables: \\
 % TODO: Add more information about the imbalance price calculation, alpha?
 TODO: Add more information about the imbalance price calculation, alpha?
 
-The imbalance price can be reconstructed given the bids of a certain quarter / day and the System Imbalance. During this thesis, the system imbalance is assumed to be almost the same as the Net Regulation Volume. This is a simplification but it is a good approximation. The goal of this thesis is to model the Net Regulation Volume which can then be used to reconstruct the imbalance price and to make decisions on when to buy or sell electricity.
+The imbalance price can be reconstructed given the bids of a certain quarter/day and the System Imbalance. During this thesis, the system imbalance is assumed to be almost the same as the Net Regulation Volume. This is a simplification but it is a good approximation. The goal of this thesis is to model the Net Regulation Volume which can then be used to reconstruct the imbalance price and to make decisions on when to buy or sell electricity.
 
 \subsection{Generative modeling}
+Simple forecasting of the NRV is often not accurate and defining a policy using this forecast will lead to wrong decisions. A better method would be to try to model the NRV and sample multiple generations of the NRV. This gives a better prediction and confidence intervals can be calculated from this.
+\\\\
+Generative modeling is a type of machine learning that is used to generate new data samples. The goal of generative modeling is to learn the true data distribution of the training data. From this learned distribution, new samples can be generated. Generative modeling is used in many different fields including image generation, text generation etc.
+\\\\
+TODO: Formulas of generative modeling
+\\\\
+In this thesis, generative modeling can be used to model the NRV of the Belgian electricity market using different input features like the weather, the electricity price etc. The model can then be used to generate new samples of the NRV.
+\\\\
+Multiple methods can be used to generatively model the NRV.
+
+
+
 
 
 \section{Literature Study}
diff --git a/src/trainers/quantile_trainer.py b/src/trainers/quantile_trainer.py
index 38bfe66..a6dff73 100644
--- a/src/trainers/quantile_trainer.py
+++ b/src/trainers/quantile_trainer.py
@@ -558,8 +558,7 @@ class NonAutoRegressiveQuantileRegression(Trainer):
 
                 outputs = self.model(inputs)
                 outputted_samples = [
-                    sample_from_dist(self.quantiles, output.cpu().numpy())
-                    for output in outputs
+                    sample_from_dist(self.quantiles, output.cpu()) for output in outputs
                 ]
 
                 outputted_samples = torch.tensor(outputted_samples)
@@ -618,20 +617,24 @@ class NonAutoRegressiveQuantileRegression(Trainer):
 
     def debug_plots(self, task, train: bool, data_loader, sample_indices, epoch):
         for actual_idx, idx in sample_indices.items():
-            initial, target, _ = data_loader.dataset[idx]
+            features, target, _ = data_loader.dataset[idx]
 
-            # get predictions
-            initial = initial.to(self.device)
+            features = features.to(self.device)
+            target = target.to(self.device)
 
-            predicted_quantiles = self.model(initial)
-            predictions = predicted_quantiles.reshape(-1, len(self.quantiles))
+            self.model.eval()
+            with torch.no_grad():
+                predicted_quantiles = self.model(features)
+                predictions = predicted_quantiles.reshape(-1, len(self.quantiles))
 
             samples = [
                 sample_from_dist(self.quantiles, predictions) for _ in range(100)
             ]
             samples = torch.tensor(samples)
 
-            fig = self.get_plot(initial, target, samples, show_legend=(0 == 0))
+            fig, fig2 = self.get_plot(
+                features[:96], target, samples, show_legend=(0 == 0)
+            )
 
             task.get_logger().report_matplotlib_figure(
                 title="Training" if train else "Testing",
@@ -640,17 +643,12 @@ class NonAutoRegressiveQuantileRegression(Trainer):
                 figure=fig,
             )
 
-            fig, ax = plt.subplots(figsize=(20, 10))
-            for i in range(10):
-                ax.plot(samples[i], label=f"Sample {i}")
-
-            ax.plot(target, label="Real NRV", linewidth=3)
-            ax.legend()
             task.get_logger().report_matplotlib_figure(
-                title="Training" if train else "Testing",
-                series=f"Sample {actual_idx} Samples",
+                title="Training Samples" if train else "Testing Samples",
+                series=f"Sample {actual_idx} samples",
                 iteration=epoch,
-                figure=fig,
+                figure=fig2,
+                report_interactive=False,
             )
 
             plt.close()
@@ -750,6 +748,8 @@ class NonAutoRegressiveQuantileRegression(Trainer):
             ]
         )
 
+        ax.set_ylim(-1500, 1500)
+
         fig2, ax2 = plt.subplots(figsize=(20, 10))
         for i in range(10):
             ax2.plot(predictions_np[i], label=f"Sample {i}")
@@ -757,6 +757,8 @@ class NonAutoRegressiveQuantileRegression(Trainer):
         ax2.plot(next_day_np, label="Real NRV", linewidth=3)
         ax2.legend()
 
+        ax2.set_ylim(-1500, 1500)
+
         return fig, fig2
 
     def calculate_crps_from_samples(self, task, dataloader, epoch: int):
@@ -812,26 +814,36 @@ class NonAutoRegressiveQuantileRegression(Trainer):
 
         #  using the policy evaluator, evaluate the policy with the generated samples
         if self.policy_evaluator is not None:
-            _, test_loader = self.data_processor.get_dataloaders(
-                predict_sequence_length=self.model.output_size, full_day_skip=True
+            optimal_penalty, profit, charge_cycles = (
+                self.policy_evaluator.optimize_penalty_for_target_charge_cycles(
+                    idx_samples=generated_samples,
+                    test_loader=dataloader,
+                    initial_penalty=500,
+                    target_charge_cycles=283,
+                    learning_rate=2,
+                    max_iterations=100,
+                    tolerance=1,
+                )
             )
-            self.policy_evaluator.evaluate_test_set(generated_samples, test_loader)
-            df = self.policy_evaluator.get_profits_as_scalars()
 
-            # for each row, report the profits
-            for idx, row in df.iterrows():
-                task.get_logger().report_scalar(
-                    title="Profit",
-                    series=f"penalty_{row['Penalty']}",
-                    value=row["Total Profit"],
-                    iteration=epoch,
-                )
+            print(
+                f"Optimal Penalty: {optimal_penalty}, Profit: {profit}, Charge Cycles: {charge_cycles}"
+            )
 
-            df = self.policy_evaluator.get_profits_till_400()
-            for idx, row in df.iterrows():
-                task.get_logger().report_scalar(
-                    title="Profit_till_400",
-                    series=f"penalty_{row['Penalty']}",
-                    value=row["Profit_till_400"],
-                    iteration=epoch,
-                )
+            task.get_logger().report_scalar(
+                title="Optimal Penalty",
+                series="test",
+                value=optimal_penalty,
+                iteration=epoch,
+            )
+
+            task.get_logger().report_scalar(
+                title="Optimal Profit", series="test", value=profit, iteration=epoch
+            )
+
+            task.get_logger().report_scalar(
+                title="Optimal Charge Cycles",
+                series="test",
+                value=charge_cycles,
+                iteration=epoch,
+            )
diff --git a/src/trainers/trainer.py b/src/trainers/trainer.py
index c01b2ba..6e3769e 100644
--- a/src/trainers/trainer.py
+++ b/src/trainers/trainer.py
@@ -7,6 +7,7 @@ import numpy as np
 from plotly.subplots import make_subplots
 from clearml.config import running_remotely
 from torchinfo import summary
+import matplotlib.pyplot as plt
 
 
 class Trainer:
@@ -329,18 +330,7 @@ class Trainer:
         return fig
 
     def debug_plots(self, task, train: bool, data_loader, sample_indices, epoch):
-        num_samples = len(sample_indices)
-        rows = num_samples  # One row per sample since we only want one column
-
-        cols = 1
-
-        fig = make_subplots(
-            rows=rows,
-            cols=cols,
-            subplot_titles=[f"Sample {i+1}" for i in range(num_samples)],
-        )
-
-        for i, idx in enumerate(sample_indices):
+        for actual_idx, idx in sample_indices.items():
             features, target, _ = data_loader.dataset[idx]
 
             features = features.to(self.device)
@@ -350,30 +340,26 @@ class Trainer:
             with torch.no_grad():
                 predictions = self.model(features).cpu()
 
-            sub_fig = self.get_plot(
-                features[:96], target, predictions, show_legend=(i == 0)
+            fig, fig2 = self.get_plot(
+                features[:96], target, predictions, show_legend=(0 == 0)
             )
 
-            row = i + 1
-            col = 1
+            task.get_logger().report_matplotlib_figure(
+                title="Training" if train else "Testing",
+                series=f"Sample {actual_idx}",
+                iteration=epoch,
+                figure=fig,
+            )
 
-            for trace in sub_fig.data:
-                fig.add_trace(trace, row=row, col=col)
+            task.get_logger().report_matplotlib_figure(
+                title="Training Samples" if train else "Testing Samples",
+                series=f"Sample {actual_idx} samples",
+                iteration=epoch,
+                figure=fig2,
+                report_interactive=False,
+            )
 
-            # loss = self.criterion(predictions.to(self.device), target.squeeze(-1).to(self.device)).item()
-
-            # fig['layout']['annotations'][i].update(text=f"{loss.__class__.__name__}: {loss:.6f}")
-
-        # y axis same for all plots
-        # fig.update_yaxes(range=[-1, 1], col=1)
-
-        fig.update_layout(height=1000 * rows)
-        task.get_logger().report_plotly(
-            title=f"{'Training' if train else 'Test'} Samples",
-            series="full_day",
-            iteration=epoch,
-            figure=fig,
-        )
+            plt.close()
 
     def debug_scatter_plot(self, task, train: bool, samples, epoch):
         X, y = samples
diff --git a/src/training_scripts/non_autoregressive_quantiles.py b/src/training_scripts/non_autoregressive_quantiles.py
index 1749ab8..754e0d0 100644
--- a/src/training_scripts/non_autoregressive_quantiles.py
+++ b/src/training_scripts/non_autoregressive_quantiles.py
@@ -85,7 +85,7 @@ time_embedding = TimeEmbedding(
 
 non_linear_model = NonLinearRegression(
     time_embedding.output_dim(inputDim),
-    len(quantiles),
+    len(quantiles) * 96,
     hiddenSize=model_parameters["hidden_size"],
     numLayers=model_parameters["num_layers"],
     dropout=model_parameters["dropout"],
@@ -94,7 +94,7 @@ non_linear_model = NonLinearRegression(
 # linear_model = LinearRegression(time_embedding.output_dim(inputDim), len(quantiles))
 
 model = nn.Sequential(time_embedding, non_linear_model)
-model.output_size = 1
+model.output_size = 96
 optimizer = torch.optim.Adam(model.parameters(), lr=model_parameters["learning_rate"])
 
 ### Policy Evaluator ###