Plots to compare between quantile regression and diffusion

Fixed diffusion confidence interval plot
Added plots thresholds densities
2024-02-18 19:21:59 +01:00 · 2024-02-18 16:01:18 +01:00 · 2024-02-14 18:12:11 +00:00 · 2024-02-12 09:54:56 +00:00 · 2024-02-05 16:22:22 +00:00 · 2024-01-20 09:44:14 +00:00
13 changed files with 412 additions and 152 deletions
--- a/.gitattributes
+++ b/.gitattributes
@@ -0,0 +1 @@
+*.csv filter=lfs diff=lfs merge=lfs -text
--- a/Result-Reports/Policies.md
+++ b/Result-Reports/Policies.md
@@ -144,5 +144,14 @@ Test data: 01-01-2023 until 08-10–2023
 - [x] Profit penalty parameter als over charge cycles voor een dag -> parameter bepalen op training data (convex probleem) (< 400 charge cycles per jaar) (over een dag kijken hoeveel charge cycles -> profit - penalty * charge cycles erover, (misschien belonen als eronder charge cycles))

 - [ ] Meer verschil bekijken tussen GRU en diffusion
- [ ] Andere lagen voor diffusion model (GRU, kijken naar TSDiff)
+- [ ] (In Progress) Andere lagen voor diffusion model (GRU, kijken naar TSDiff)
 - [x] Policies met andere modellen (Linear, Non Linear)
+
+- [ ] Visualize the policies over the whole testr set -> thresholds plotten voor elke dag (elke policy) -> mss distribution om overzichtelijk te houden (mean and std)
+- [ ] Probleem met diffusion model (activation function? waarom direct grote waardes?)
+
+- [ ] Autoregressive confidence problem -> Quantiles zelf uit elkaar halen (helpt dit?)
+
+- [ ] time steps reducing for diffusion model (UNet activation functions?)
+
+- [ ] (State space model? S4)
--- a/src/data/dataset.py
+++ b/src/data/dataset.py
@@ -25,12 +25,19 @@ class NrvDataset(Dataset):
        self.sequence_length = sequence_length
        self.predict_sequence_length = predict_sequence_length

-        self.samples_to_skip = self.skip_samples(dataframe=dataframe)
+        self.samples_to_skip = self.skip_samples(dataframe=dataframe, full_day_skip=self.full_day_skip)
        total_indices = set(
            range(len(dataframe) - self.sequence_length - self.predict_sequence_length)
        )
        self.valid_indices = sorted(list(total_indices - set(self.samples_to_skip)))

+        # full day indices
+        full_day_skipped_samples = self.skip_samples(dataframe=dataframe, full_day_skip=True)
+        full_day_total_indices = set(
+            range(len(dataframe) - self.sequence_length - self.predict_sequence_length)
+        )
+        self.full_day_valid_indices = sorted(list(full_day_total_indices - set(full_day_skipped_samples)))
+
        self.history_features = []
        if self.data_config.LOAD_HISTORY:
            self.history_features.append("total_load")
@@ -73,7 +80,7 @@ class NrvDataset(Dataset):

        self.history_features, self.forecast_features = self.preprocess_data(dataframe)

-    def skip_samples(self, dataframe):
+    def skip_samples(self, dataframe, full_day_skip):
        nan_rows = dataframe[dataframe.isnull().any(axis=1)]
        nan_indices = nan_rows.index
        skip_indices = [
@@ -91,7 +98,7 @@ class NrvDataset(Dataset):

        # add indices that are not the start of a day (00:15) to the skip indices (use datetime column)
        # get indices of all 00:15 timestamps
-        if self.full_day_skip:
+        if full_day_skip:
            start_of_day_indices = dataframe[
                dataframe["datetime"].dt.time != pd.Timestamp("00:00:00").time()
            ].index
--- a/src/models/diffusion_model.py
+++ b/src/models/diffusion_model.py
@@ -45,3 +45,53 @@ class SimpleDiffusionModel(DiffusionModel):
            self.layers.append(nn.ReLU())

        self.layers.append(nn.Linear(hidden_sizes[-1] + time_dim + other_inputs_dim, input_size))
+
+class GRUDiffusionModel(DiffusionModel):
+    def __init__(self, input_size: int, hidden_sizes: list, other_inputs_dim: int, gru_hidden_size: int, time_dim: int = 64):
+        super(GRUDiffusionModel, self).__init__(time_dim)
+        
+        self.other_inputs_dim = other_inputs_dim
+        self.gru_hidden_size = gru_hidden_size
+
+        # GRU layer
+        self.gru = nn.GRU(input_size=input_size + time_dim + other_inputs_dim,
+                          hidden_size=gru_hidden_size,
+                          num_layers=3,
+                          batch_first=True)
+
+        # Fully connected layers after GRU
+        self.fc_layers = nn.ModuleList()
+        prev_size = gru_hidden_size
+        for hidden_size in hidden_sizes:
+            self.fc_layers.append(nn.Linear(prev_size, hidden_size))
+            self.fc_layers.append(nn.ReLU())
+            prev_size = hidden_size
+
+        # Final output layer
+        self.fc_layers.append(nn.Linear(prev_size, input_size))
+
+    def forward(self, x, t, inputs):
+        batch_size, seq_len = x.shape
+        x = x.unsqueeze(-1).repeat(1, 1, seq_len)
+
+        # Positional encoding for each time step
+        t = t.unsqueeze(-1).type(torch.float)
+        t = self.pos_encoding(t, self.time_dim) # Shape: [batch_size, seq_len, time_dim]
+
+        # repeat time encoding for each time step t is shape [batch_size, time_dim], i want [batch_size, seq_len, time_dim]
+        t = t.unsqueeze(1).repeat(1, seq_len, 1)
+
+        # Concatenate x, t, and inputs along the feature dimension
+        x = torch.cat((x, t, inputs), dim=-1) # Shape: [batch_size, seq_len, input_size + time_dim + other_inputs_dim]
+
+        # Pass through GRU
+        output, hidden = self.gru(x) # Hidden Shape: [batch_size, seq_len, 1]
+
+        # Get last hidden state
+        x = hidden[-1]
+
+        # Process each time step's output with fully connected layers
+        for layer in self.fc_layers:
+            x = layer(x)
+
+        return x
--- a/src/notebooks/diffusion-training.ipynb
+++ b/src/notebooks/diffusion-training.ipynb
@@ -2,7 +2,7 @@
 "cells": [
  {
   "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -31,7 +31,7 @@
    "from datetime import datetime\n",
    "import torch.nn as nn\n",
    "from src.models.time_embedding_layer import TimeEmbedding\n",
-    "from src.models.diffusion_model import SimpleDiffusionModel\n",
+    "from src.models.diffusion_model import SimpleDiffusionModel, GRUDiffusionModel\n",
    "from src.trainers.diffusion_trainer import DiffusionTrainer\n",
    "from torchinfo import summary\n",
    "\n",
@@ -62,30 +62,99 @@
    "\n",
    "data_config.NOMINAL_NET_POSITION = True\n",
    "\n",
-    "data_processor = DataProcessor(data_config, path=\"../../\")\n",
+    "data_processor = DataProcessor(data_config, path=\"../../\", lstm=True)\n",
    "data_processor.set_batch_size(1024)\n",
    "data_processor.set_full_day_skip(True)"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "ClearML Task: created new task id=b71216825809432682ea3c7841c07612\n",
-      "ClearML results page: http://192.168.1.182:8080/projects/2e46d4af6f1e4c399cf9f5aa30bc8795/experiments/b71216825809432682ea3c7841c07612/output/log\n"
+      "torch.Size([1024, 96, 96])\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
-      "500 model found when searching for `file:///workspaces/Thesis/src/notebooks/checkpoint.pt`\n",
-      "Selected model `Autoregressive Non Linear Quantile Regression + Quarter + DoW + Net` (id=bc0cb0d7fc614e2e8b0edf5b85348646)\n"
+      "/opt/conda/lib/python3.10/site-packages/torch/nn/modules/loss.py:536: UserWarning: Using a target size (torch.Size([1024, 96])) that is different to the input size (torch.Size([2, 1024, 96])). This will likely lead to incorrect results due to broadcasting. Please ensure they have the same size.\n",
+      "  return F.mse_loss(input, target, reduction=self.reduction)\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "torch.Size([556, 96, 96])\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/opt/conda/lib/python3.10/site-packages/torch/nn/modules/loss.py:536: UserWarning: Using a target size (torch.Size([556, 96])) that is different to the input size (torch.Size([2, 556, 96])). This will likely lead to incorrect results due to broadcasting. Please ensure they have the same size.\n",
+      "  return F.mse_loss(input, target, reduction=self.reduction)\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "torch.Size([1024, 96, 96])\n",
+      "torch.Size([556, 96, 96])\n",
+      "torch.Size([1024, 96, 96])\n",
+      "torch.Size([556, 96, 96])\n",
+      "torch.Size([1024, 96, 96])\n",
+      "torch.Size([556, 96, 96])\n",
+      "torch.Size([1024, 96, 96])\n",
+      "torch.Size([556, 96, 96])\n",
+      "torch.Size([1024, 96, 96])\n",
+      "torch.Size([556, 96, 96])\n",
+      "torch.Size([1024, 96, 96])\n",
+      "torch.Size([556, 96, 96])\n",
+      "torch.Size([1024, 96, 96])\n",
+      "torch.Size([556, 96, 96])\n",
+      "torch.Size([1024, 96, 96])\n",
+      "torch.Size([556, 96, 96])\n",
+      "torch.Size([1024, 96, 96])\n",
+      "torch.Size([556, 96, 96])\n",
+      "torch.Size([1024, 96, 96])\n",
+      "torch.Size([556, 96, 96])\n",
+      "torch.Size([1024, 96, 96])\n",
+      "torch.Size([556, 96, 96])\n",
+      "torch.Size([1024, 96, 96])\n",
+      "torch.Size([556, 96, 96])\n",
+      "torch.Size([1024, 96, 96])\n",
+      "torch.Size([556, 96, 96])\n",
+      "torch.Size([1024, 96, 96])\n",
+      "torch.Size([556, 96, 96])\n",
+      "torch.Size([1024, 96, 96])\n",
+      "torch.Size([556, 96, 96])\n",
+      "torch.Size([1024, 96, 96])\n",
+      "torch.Size([556, 96, 96])\n",
+      "torch.Size([1024, 96, 96])\n",
+      "torch.Size([556, 96, 96])\n",
+      "torch.Size([1024, 96, 96])\n",
+      "torch.Size([556, 96, 96])\n",
+      "torch.Size([1024, 96, 96])\n",
+      "torch.Size([556, 96, 96])\n",
+      "torch.Size([1024, 96, 96])\n",
+      "torch.Size([556, 96, 96])\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "KeyboardInterrupt\n",
+      "\n"
     ]
    }
   ],
@@ -95,14 +164,15 @@
    "epochs=150\n",
    "\n",
    "#### Model ####\n",
-    "model = SimpleDiffusionModel(96, [512, 512, 512], other_inputs_dim=inputDim[1], time_dim=64)\n",
+    "# model = SimpleDiffusionModel(96, [512, 512, 512], other_inputs_dim=inputDim[1], time_dim=64)\n",
+    "model = GRUDiffusionModel(96, [256, 256], other_inputs_dim=inputDim[2], time_dim=64, gru_hidden_size=128)\n",
    "\n",
    "#### ClearML ####\n",
-    "task = clearml_helper.get_task(task_name=\"Diffusion Model\")\n",
+    "# task = clearml_helper.get_task(task_name=\"Diffusion Model\")\n",
    "\n",
    "#### Trainer ####\n",
    "trainer = DiffusionTrainer(model, data_processor, \"cuda\")\n",
-    "trainer.train(epochs, learningRate, task)"
+    "trainer.train(epochs, learningRate, None)"
   ]
  },
  {
@@ -246,7 +316,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.10.11"
+   "version": "3.10.8"
  }
 },
 "nbformat": 4,
--- a/src/policies/plot_combiner.ipynb
+++ b/src/policies/plot_combiner.ipynb
@@ -1197,7 +1197,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.10.11"
+   "version": "3.10.8"
  }
 },
 "nbformat": 4,
--- a/src/policies/policy_executer.py
+++ b/src/policies/policy_executer.py
@@ -8,7 +8,8 @@ import pandas as pd
 import datetime
 from tqdm import tqdm
 from src.utils.imbalance_price_calculator import ImbalancePriceCalculator
-import time
+import seaborn as sns
+import matplotlib.pyplot as plt
 import plotly.express as px

 ### import functions ###
@@ -16,7 +17,7 @@ from src.trainers.quantile_trainer import auto_regressive as quantile_auto_regre
 from src.trainers.diffusion_trainer import sample_diffusion
 from src.utils.clearml import ClearMLHelper

-# argparse to parse task id and model type
+### Arguments ###
 parser = argparse.ArgumentParser()
 parser.add_argument('--task_id', type=str, default=None)
 parser.add_argument('--model_type', type=str, default=None)
@@ -27,6 +28,7 @@ assert args.task_id is not None, "Please specify task id"
 assert args.model_type is not None, "Please specify model type"
 assert args.model_name is not None, "Please specify model name"

+### Baseline Policy ###
 battery = Battery(2, 1)
 baseline_policy = BaselinePolicy(battery, data_path="")

@@ -124,6 +126,9 @@ def get_next_day_profits_for_date(model, data_processor, test_loader, date, ipc,
    predicted_nrv_profits_cycles = {i: [0, 0] for i in penalties}
    baseline_profits_cycles = {i: [0, 0] for i in penalties}

+    _charge_thresholds = {}
+    _discharge_thresholds = {}
+
    initial, nrvs, target = predict_NRV(model, date, data_processor, test_loader)

    initial = np.repeat(initial, nrvs.shape[0])
@@ -139,6 +144,10 @@ def get_next_day_profits_for_date(model, data_processor, test_loader, date, ipc,

    for penalty in penalties:
        found_charge_thresholds, found_discharge_thresholds = baseline_policy.get_optimal_thresholds(reconstructed_imbalance_prices, charge_thresholds, discharge_thresholds, penalty)
+        
+        _charge_thresholds[penalty] = found_charge_thresholds
+        _discharge_thresholds[penalty] = found_discharge_thresholds
+
        next_day_charge_threshold = found_charge_thresholds.mean(axis=0)
        next_day_discharge_threshold = found_discharge_thresholds.mean(axis=0)
        yesterday_charge_thresholds, yesterday_discharge_thresholds = baseline_policy.get_optimal_thresholds(yesterday_imbalance_prices, charge_thresholds, discharge_thresholds, penalty)
@@ -153,22 +162,25 @@ def get_next_day_profits_for_date(model, data_processor, test_loader, date, ipc,
        baseline_profits_cycles[penalty][0] += yesterday_profit.item()
        baseline_profits_cycles[penalty][1] += yesterday_charge_cycles.item()

-    return predicted_nrv_profits_cycles, baseline_profits_cycles
+    return predicted_nrv_profits_cycles, baseline_profits_cycles, _charge_thresholds, _discharge_thresholds

 def next_day_test_set(model, data_processor, test_loader, ipc, predict_NRV: callable):
-    penalties = [0, 10, 50, 150, 300, 500, 600, 800, 1000, 1500, 2000, 2500]
+    penalties = [0, 50, 250, 500, 1000, 1500]
    predicted_nrv_profits_cycles = {i: [0, 0] for i in penalties}
    baseline_profits_cycles = {i: [0, 0] for i in penalties}

-    # get all dates in test set
-    dates = baseline_policy.test_data["DateTime"].dt.date.unique()
+    charge_thresholds = {}
+    discharge_thresholds = {}

-    # dates back to datetime
+    dates = baseline_policy.test_data["DateTime"].dt.date.unique()
    dates = pd.to_datetime(dates)

    for date in tqdm(dates):
        try:
-            new_predicted_nrv_profits_cycles, new_baseline_profits_cycles = get_next_day_profits_for_date(model, data_processor, test_loader, date, ipc, predict_NRV, penalties)
+            new_predicted_nrv_profits_cycles, new_baseline_profits_cycles, new_charge_thresholds, new_discharge_thresholds = get_next_day_profits_for_date(model, data_processor, test_loader, date, ipc, predict_NRV, penalties)
+            
+            charge_thresholds[date] = new_charge_thresholds
+            discharge_thresholds[date] = new_discharge_thresholds

            for penalty in penalties:
                predicted_nrv_profits_cycles[penalty][0] += new_predicted_nrv_profits_cycles[penalty][0]
@@ -178,16 +190,15 @@ def next_day_test_set(model, data_processor, test_loader, ipc, predict_NRV: call
                baseline_profits_cycles[penalty][1] += new_baseline_profits_cycles[penalty][1]

        except Exception as e:
-            # print(f"Error for date {date}")
-            continue
+            print(f"Error for date {date}")

-    return predicted_nrv_profits_cycles, baseline_profits_cycles
+    return predicted_nrv_profits_cycles, baseline_profits_cycles, charge_thresholds, discharge_thresholds

 def main():
    clearml_helper = ClearMLHelper(project_name="Thesis/NrvForecast")
    task = clearml_helper.get_task(task_name="Policy Test")

-    task.execute_remotely(queue_name="default", exit_process=True)
+    # task.execute_remotely(queue_name="default", exit_process=True)

    configuration, model, data_processor, test_loader = load_model(args.task_id)

@@ -205,7 +216,92 @@ def main():

    ipc = ImbalancePriceCalculator(data_path="")

-    predicted_nrv_profits_cycles, baseline_profits_cycles = next_day_test_set(model, data_processor, test_loader, ipc, predict_NRV)
+    predicted_nrv_profits_cycles, baseline_profits_cycles, charge_thresholds, discharge_thresholds = next_day_test_set(model, data_processor, test_loader, ipc, predict_NRV)
+    # the charge_thresholds is a dictionary with date as key. The values of the dictionary is another dictionary with keys as penalties and values as the charge thresholds
+    # create density plot that shows a density plot of the charge thresholds for each penalty (use seaborn displot) (One plot with a different color for each penalty)
+
+    charge_thresholds_for_penalty = {}
+    for d in charge_thresholds.values():
+        for penalty, thresholds in d.items():
+            if penalty not in charge_thresholds_for_penalty:
+                charge_thresholds_for_penalty[penalty] = []
+            charge_thresholds_for_penalty[penalty].extend(thresholds)
+
+    discharge_thresholds_for_penalty = {}
+    for d in discharge_thresholds.values():
+        for penalty, thresholds in d.items():
+            if penalty not in discharge_thresholds_for_penalty:
+                discharge_thresholds_for_penalty[penalty] = []
+            discharge_thresholds_for_penalty[penalty].extend(thresholds)
+
+    def plot_threshold_distribution(thresholds: dict, title: str):
+        data_to_plot = []
+        for penalty, values in thresholds.items():
+            for value in values:
+                data_to_plot.append({'Penalty': penalty, 'Value': value.item()})
+        df = pd.DataFrame(data_to_plot)
+        palette = sns.color_palette("bright", len(thresholds.keys()))
+        fig = sns.displot(data=df, x="Value", hue="Penalty", kind="kde", palette=palette)
+        plt.title('Density of Charge Thresholds by Penalty')
+        plt.xlabel('Charge Threshold')
+        plt.ylabel('Density')
+        plt.legend(title='Penalty')
+        task.get_logger().report_matplotlib_figure(
+            "Policy Results", 
+            title, 
+            iteration=0, 
+            figure=fig
+        )
+        plt.close()
+
+    ### Plot charge thresholds distribution ###
+    plot_threshold_distribution(charge_thresholds_for_penalty, "Charge Thresholds")
+
+    ### Plot discharge thresholds distribution ###
+    plot_threshold_distribution(discharge_thresholds_for_penalty, "Discharge Thresholds")
+
+    def plot_thresholds_per_day(thresholds: dict, title: str):
+        # plot mean charge threshold per day (per penalty (other color))
+        data_to_plot = []
+        for date, values in thresholds.items():
+            for penalty, value in values.items():
+                mean_val = value.mean().item()
+                std_val = value.std().item()  # Calculate standard deviation
+                data_to_plot.append({'Date': date, 'Penalty': penalty, 'Mean': mean_val, 'StdDev': std_val})
+                print(f"Date: {date}, Penalty: {penalty}, Mean: {mean_val}, StdDev: {std_val}")
+        df = pd.DataFrame(data_to_plot)
+        df["Date"] = pd.to_datetime(df["Date"])
+
+        fig = px.line(
+            df,
+            x="Date",
+            y="Mean",
+            color="Penalty",
+            title=title,
+            labels={"Mean": "Threshold", "Date": "Date"},
+            markers=True,  # Adds markers to the lines
+            hover_data=["Penalty"],  # Adds additional hover information
+        )
+
+        fig.update_layout(
+            width=1000,  # Set the width of the figure
+            height=600,  # Set the height of the figure
+            title_x=0.5,  # Center the title horizontally
+        )
+
+        task.get_logger().report_plotly(
+            "Thresholds per Day", 
+            title, 
+            iteration=0, 
+            figure=fig
+        )
+
+    ### Plot mean charge thresholds per day ###
+    plot_thresholds_per_day(charge_thresholds, "Mean Charge Thresholds per Day")
+
+    ### Plot mean discharge thresholds per day ###
+    plot_thresholds_per_day(discharge_thresholds, "Mean Discharge Thresholds per Day")
+

    # create dataframe with columns "name", "penalty", "profit", "cycles"
    df = pd.DataFrame(columns=["name", "penalty", "profit", "cycles"])
--- a/src/trainers/autoregressive_trainer.py
+++ b/src/trainers/autoregressive_trainer.py
@@ -33,68 +33,30 @@ class AutoRegressiveTrainer(Trainer):
        self.model.output_size = 1

    def debug_plots(self, task, train: bool, data_loader, sample_indices, epoch):
-        num_samples = len(sample_indices)
-        rows = num_samples  # One row per sample since we only want one column
-
-        #  check if self has get_plot_error
-        if hasattr(self, "get_plot_error"):
-            cols = 2
-            print("Using get_plot_error")
-        else:
-            cols = 1
-            print("Using get_plot")
-
-        fig = make_subplots(
-            rows=rows,
-            cols=cols,
-            subplot_titles=[f"Sample {i+1}" for i in range(num_samples)],
-        )
-
-        for i, idx in enumerate(sample_indices):
-            auto_regressive_output = self.auto_regressive(data_loader.dataset, [idx])
+        for actual_idx, idx in sample_indices.items():
+            auto_regressive_output = self.auto_regressive(data_loader.dataset, [idx]*1000)
            if len(auto_regressive_output) == 3:
                initial, predictions, target = auto_regressive_output
            else:
-                initial, predictions, _, target = auto_regressive_output
+                initial, _, predictions, target = auto_regressive_output

-            initial = initial.squeeze(0)
-            predictions = predictions.squeeze(0)
-            target = target.squeeze(0)
            
-            sub_fig = self.get_plot(initial, target, predictions, show_legend=(i == 0))
+            # keep one initial
+            initial = initial[0]
+            target = target[0]

-            row = i + 1
-            col = 1
+            predictions = predictions

-            for trace in sub_fig.data:
-                fig.add_trace(trace, row=row, col=col)
+            fig = self.get_plot(initial, target, predictions, show_legend=(0 == 0))

-            if cols == 2:
-                error_sub_fig = self.get_plot_error(
-                    target, predictions
-                )
-                for trace in error_sub_fig.data:
-                    fig.add_trace(trace, row=row, col=col + 1)
-
-            loss = self.criterion(
-                predictions.to(self.device), target.to(self.device)
-            ).item()
-
-            fig["layout"]["annotations"][i].update(
-                text=f"{self.criterion.__class__.__name__}: {loss:.6f}"
-            )
-
-        # y axis same for all plots
-        # fig.update_yaxes(range=[-1, 1], col=1)
-
-        fig.update_layout(height=1000 * rows)
-        task.get_logger().report_plotly(
-            title=f"{'Training' if train else 'Test'} Samples",
-            series="full_day",
+            task.get_logger().report_matplotlib_figure(
+                title="Training" if train else "Testing",
+                series=f'Sample {actual_idx}',
                iteration=epoch,
                figure=fig,
            )

+
    def auto_regressive(self, data_loader, idx, sequence_length: int = 96):
        self.model.eval()
        target_full = []
--- a/src/trainers/diffusion_trainer.py
+++ b/src/trainers/diffusion_trainer.py
@@ -19,7 +19,11 @@ def sample_diffusion(model: DiffusionModel, n: int, inputs: torch.tensor, noise_
    alpha = 1. - beta
    alpha_hat = torch.cumprod(alpha, dim=0)

-    inputs = inputs.repeat(n, 1).to(device)
+    if len(inputs.shape) == 2:
+        inputs = inputs.repeat(n, 1)
+    elif len(inputs.shape) == 3:
+        inputs = inputs.repeat(n, 1, 1)
+
    model.eval()
    with torch.no_grad():
        x = torch.randn(inputs.shape[0], ts_length).to(device)
@@ -36,17 +40,17 @@ def sample_diffusion(model: DiffusionModel, n: int, inputs: torch.tensor, noise_
                noise = torch.zeros_like(x)

            x = 1/torch.sqrt(_alpha) * (x-((1-_alpha) / (torch.sqrt(1 - _alpha_hat))) * predicted_noise) + torch.sqrt(_beta) * noise
+    x = torch.clamp(x, -1.0, 1.0)
    return x


-
 class DiffusionTrainer:
    def __init__(self, model: nn.Module, data_processor: DataProcessor, device: torch.device):
        self.model = model
        self.device = device

-        self.noise_steps = 1000
-        self.beta_start = 1e-4
+        self.noise_steps = 30
+        self.beta_start = 0.0001
        self.beta_end = 0.02
        self.ts_length = 96
        
@@ -92,7 +96,16 @@ class DiffusionTrainer:
        else:
            loader = test_loader

-        indices = np.random.randint(0, len(loader.dataset) - 1, size=num_samples)
+        # set seed
+        np.random.seed(42)
+
+        actual_indices = np.random.choice(loader.dataset.full_day_valid_indices, num_samples, replace=False)
+        indices = {}
+        for i in actual_indices:
+            indices[i] = loader.dataset.valid_indices.index(i)
+
+        print(actual_indices)
+        
        return indices
    
    def init_clearml_task(self, task):
@@ -101,8 +114,12 @@ class DiffusionTrainer:

        input_data = torch.randn(1024, 96).to(self.device)
        time_steps = torch.randn(1024).long().to(self.device)
-        other_input_data = torch.randn(1024, self.model.other_inputs_dim).to(self.device)

+        if self.data_processor.lstm:
+            inputDim = self.data_processor.get_input_size()
+            other_input_data = torch.randn(1024, inputDim[1], self.model.other_inputs_dim).to(self.device)
+        else:
+            other_input_data = torch.randn(1024, self.model.other_inputs_dim).to(self.device)
        task.set_configuration_object("model", str(summary(self.model, input_data=[input_data, time_steps, other_input_data])))

        self.data_processor = task.connect(self.data_processor, name="data_processor")
@@ -120,8 +137,8 @@ class DiffusionTrainer:
            predict_sequence_length=self.ts_length
        )

-        train_sample_indices = self.random_samples(train=True, num_samples=10)
-        test_sample_indices = self.random_samples(train=False, num_samples=10)
+        train_sample_indices = self.random_samples(train=True, num_samples=5)
+        test_sample_indices = self.random_samples(train=False, num_samples=5)

        for epoch in range(epochs):
            running_loss = 0.0
@@ -143,7 +160,7 @@ class DiffusionTrainer:
            
            running_loss /= len(train_loader.dataset)

-            if epoch % 20 == 0 and epoch != 0:
+            if epoch % 40 == 0 and epoch != 0:
                self.test(test_loader, epoch, task)

            if task:
@@ -154,7 +171,7 @@ class DiffusionTrainer:
                    value=loss.item(),
                )

-                if epoch % 100 == 0 and epoch != 0:
+                if epoch % 150 == 0 and epoch != 0:
                    self.debug_plots(task, True, train_loader, train_sample_indices, epoch)
                    self.debug_plots(task, False, test_loader, test_sample_indices, epoch)

@@ -163,26 +180,30 @@ class DiffusionTrainer:

        
    def debug_plots(self, task, training: bool, data_loader, sample_indices, epoch):
-        for i, idx in enumerate(sample_indices):
+        for actual_idx, idx in sample_indices.items():
            features, target, _ = data_loader.dataset[idx]

            features = features.to(self.device)
+            features = features.unsqueeze(0)

            self.model.eval()
            with torch.no_grad():
                samples = self.sample(self.model, 100, features).cpu().numpy()
+                samples = self.data_processor.inverse_transform(samples)
+                target = self.data_processor.inverse_transform(target)
               
-            ci_99_upper = np.quantile(samples, 0.99, axis=0)
-            ci_99_lower = np.quantile(samples, 0.01, axis=0)
+            ci_99_upper = np.quantile(samples, 0.995, axis=0)
+            ci_99_lower = np.quantile(samples, 0.005, axis=0)

-            ci_95_upper = np.quantile(samples, 0.95, axis=0)
-            ci_95_lower = np.quantile(samples, 0.05, axis=0)
+            ci_95_upper = np.quantile(samples, 0.975, axis=0)
+            ci_95_lower = np.quantile(samples, 0.025, axis=0)

-            ci_90_upper = np.quantile(samples, 0.9, axis=0)
-            ci_90_lower = np.quantile(samples, 0.1, axis=0)
+            ci_90_upper = np.quantile(samples, 0.95, axis=0)
+            ci_90_lower = np.quantile(samples, 0.05, axis=0)
+
+            ci_50_lower = np.quantile(samples, 0.25, axis=0)
+            ci_50_upper = np.quantile(samples, 0.75, axis=0)

-            ci_50_upper = np.quantile(samples, 0.5, axis=0)
-            ci_50_lower = np.quantile(samples, 0.5, axis=0)

            sns.set_theme()
            time_steps = np.arange(0, 96)
@@ -208,7 +229,7 @@ class DiffusionTrainer:

            task.get_logger().report_matplotlib_figure(
                title="Training" if training else "Testing",
-                series=f'Sample {i}',
+                series=f'Sample {actual_idx}',
                iteration=epoch,
                figure=fig,
            )
--- a/src/trainers/quantile_trainer.py
+++ b/src/trainers/quantile_trainer.py
@@ -10,7 +10,9 @@ import plotly.graph_objects as go
 import numpy as np
 import matplotlib.pyplot as plt
 from scipy.interpolate import CubicSpline
-
+import matplotlib.pyplot as plt
+import seaborn as sns
+import matplotlib.patches as mpatches

 def sample_from_dist(quantiles, preds):
    if isinstance(preds, torch.Tensor):
@@ -261,35 +263,35 @@ class AutoRegressiveQuantileTrainer(AutoRegressiveTrainer):
                    name="test_CRPS_from_samples_transformed", value=np.mean(crps_from_samples_metric)
                )

-    def get_plot_error(
-        self,
-        next_day,
-        predictions,
-    ):
-        metric = PinballLoss(quantiles=self.quantiles)
-        fig = go.Figure()
+    # def get_plot_error(
+    #     self,
+    #     next_day,
+    #     predictions,
+    # ):
+    #     metric = PinballLoss(quantiles=self.quantiles)
+    #     fig = go.Figure()

-        next_day_np = next_day.view(-1).cpu().numpy()
-        predictions_np = predictions.cpu().numpy()
+    #     next_day_np = next_day.view(-1).cpu().numpy()
+    #     predictions_np = predictions.cpu().numpy()

-        if True:
-            next_day_np = self.data_processor.inverse_transform(next_day_np)
-            predictions_np = self.data_processor.inverse_transform(predictions_np)
+    #     if True:
+    #         next_day_np = self.data_processor.inverse_transform(next_day_np)
+    #         predictions_np = self.data_processor.inverse_transform(predictions_np)

-        # for each time step, calculate the error using the metric
-        errors = []
-        for i in range(96):
+    #     # for each time step, calculate the error using the metric
+    #     errors = []
+    #     for i in range(96):

-            target_tensor = torch.tensor(next_day_np[i]).unsqueeze(0)
-            prediction_tensor = torch.tensor(predictions_np[i]).unsqueeze(0)
+    #         target_tensor = torch.tensor(next_day_np[i]).unsqueeze(0)
+    #         prediction_tensor = torch.tensor(predictions_np[i]).unsqueeze(0)

-            errors.append(metric(prediction_tensor, target_tensor))
+    #         errors.append(metric(prediction_tensor, target_tensor))

-        # plot the error 
-        fig.add_trace(go.Scatter(x=np.arange(96), y=errors, name=metric.__class__.__name__))
-        fig.update_layout(title=f"Error of {metric.__class__.__name__} for each time step")
+    #     # plot the error 
+    #     fig.add_trace(go.Scatter(x=np.arange(96), y=errors, name=metric.__class__.__name__))
+    #     fig.update_layout(title=f"Error of {metric.__class__.__name__} for each time step")

-        return fig
+    #     return fig


    def get_plot(
@@ -312,26 +314,59 @@ class AutoRegressiveQuantileTrainer(AutoRegressiveTrainer):
            next_day_np = self.data_processor.inverse_transform(next_day_np)
            predictions_np = self.data_processor.inverse_transform(predictions_np)

+        ci_99_upper = np.quantile(predictions_np, 0.995, axis=0)
+        ci_99_lower = np.quantile(predictions_np, 0.005, axis=0)
+
+        ci_95_upper = np.quantile(predictions_np, 0.975, axis=0)
+        ci_95_lower = np.quantile(predictions_np, 0.025, axis=0)
+
+        ci_90_upper = np.quantile(predictions_np, 0.95, axis=0)
+        ci_90_lower = np.quantile(predictions_np, 0.05, axis=0)
+
+        ci_50_lower = np.quantile(predictions_np, 0.25, axis=0)
+        ci_50_upper = np.quantile(predictions_np, 0.75, axis=0)
+
        # Add traces for current and next day
-        fig.add_trace(go.Scatter(x=np.arange(96), y=current_day_np, name="Current Day"))
-        fig.add_trace(go.Scatter(x=96 + np.arange(96), y=next_day_np, name="Next Day"))
+        # fig.add_trace(go.Scatter(x=np.arange(96), y=current_day_np, name="Current Day"))
+        # fig.add_trace(go.Scatter(x=96 + np.arange(96), y=next_day_np, name="Next Day"))

-        for i, q in enumerate(self.quantiles):
-            fig.add_trace(
-                go.Scatter(
-                    x=96 + np.arange(96),
-                    y=predictions_np[:, i],
-                    name=f"Prediction (Q={q})",
-                    line=dict(dash="dash"),
-                )
-            )
+        # for i, q in enumerate(self.quantiles):
+        #     fig.add_trace(
+        #         go.Scatter(
+        #             x=96 + np.arange(96),
+        #             y=predictions_np[:, i],
+        #             name=f"Prediction (Q={q})",
+        #             line=dict(dash="dash"),
+        #         )
+        #     )

-        # Update the layout
-        fig.update_layout(
-            title="Predictions and Quantiles of the Linear Model",
-            showlegend=show_legend,
-        )
+        # # Update the layout
+        # fig.update_layout(
+        #     title="Predictions and Quantiles of the Linear Model",
+        #     showlegend=show_legend,
+        # )

+        sns.set_theme()
+        time_steps = np.arange(0, 96)
+
+        fig, ax = plt.subplots(figsize=(20, 10))
+        ax.plot(time_steps, predictions_np.mean(axis=0), label="Mean of NRV samples", linewidth=3)
+        # ax.fill_between(time_steps, ci_lower, ci_upper, color='b', alpha=0.2, label='Full Interval')
+
+        ax.fill_between(time_steps, ci_99_lower, ci_99_upper, color='b', alpha=0.2, label='99% Interval')
+        ax.fill_between(time_steps, ci_95_lower, ci_95_upper, color='b', alpha=0.2, label='95% Interval')
+        ax.fill_between(time_steps, ci_90_lower, ci_90_upper, color='b', alpha=0.2, label='90% Interval')
+        ax.fill_between(time_steps, ci_50_lower, ci_50_upper, color='b', alpha=0.2, label='50% Interval')
+
+        ax.plot(next_day_np, label="Real NRV", linewidth=3)
+        # full_interval_patch = mpatches.Patch(color='b', alpha=0.2, label='Full Interval')
+        ci_99_patch = mpatches.Patch(color='b', alpha=0.3, label='99% Interval')
+        ci_95_patch = mpatches.Patch(color='b', alpha=0.4, label='95% Interval')
+        ci_90_patch = mpatches.Patch(color='b', alpha=0.5, label='90% Interval')
+        ci_50_patch = mpatches.Patch(color='b', alpha=0.6, label='50% Interval')
+
+
+        ax.legend(handles=[ci_99_patch, ci_95_patch, ci_90_patch, ci_50_patch, ax.lines[0], ax.lines[1]])
        return fig

    def auto_regressive(self, dataset, idx_batch, sequence_length: int = 96):
--- a/src/trainers/trainer.py
+++ b/src/trainers/trainer.py
@@ -86,7 +86,7 @@ class Trainer:

    def random_samples(self, train: bool = True, num_samples: int = 10):
        train_loader, test_loader = self.data_processor.get_dataloaders(
-            predict_sequence_length=self.model.output_size
+            predict_sequence_length=96
        )

        if train:
@@ -94,7 +94,14 @@ class Trainer:
        else:
            loader = test_loader

-        indices = np.random.randint(0, len(loader.dataset) - 1, size=num_samples)
+        np.random.seed(42)
+        actual_indices = np.random.choice(loader.dataset.full_day_valid_indices, num_samples, replace=False)
+        indices = {}
+        for i in actual_indices:
+            indices[i] = loader.dataset.valid_indices.index(i)
+
+        print(actual_indices)
+        
        return indices

    def train(self, epochs: int, remotely: bool = False, task: Task = None):
@@ -107,8 +114,8 @@ class Trainer:
                predict_sequence_length=self.model.output_size
            )

-            train_samples = self.random_samples(train=True)
-            test_samples = self.random_samples(train=False)
+            train_samples = self.random_samples(train=True, num_samples=5)
+            test_samples = self.random_samples(train=False, num_samples=5)

            self.init_clearml_task(task)

--- a/src/training_scripts/diffusion_training.py
+++ b/src/training_scripts/diffusion_training.py
@@ -10,7 +10,7 @@ from torch.nn import MSELoss, L1Loss
 from datetime import datetime
 import torch.nn as nn
 from src.models.time_embedding_layer import TimeEmbedding
-from src.models.diffusion_model import SimpleDiffusionModel
+from src.models.diffusion_model import GRUDiffusionModel, SimpleDiffusionModel
 from src.trainers.diffusion_trainer import DiffusionTrainer


@@ -38,22 +38,24 @@ data_config.NOMINAL_NET_POSITION = True
 data_config = task.connect(data_config, name="data_features")

 data_processor = DataProcessor(data_config, path="", lstm=False)
-data_processor.set_batch_size(8192)
+data_processor.set_batch_size(64)
 data_processor.set_full_day_skip(True)

 inputDim = data_processor.get_input_size()
+print("Input dim: ", inputDim)

 model_parameters = {
    "epochs": 5000,
    "learning_rate": 0.0001,
-    "hidden_sizes": [512, 512, 512],
-    "time_dim": 64,
+    "hidden_sizes": [128, 128],
+    "time_dim": 8,
 }

 model_parameters = task.connect(model_parameters, name="model_parameters")

 #### Model ####
 model = SimpleDiffusionModel(96, model_parameters["hidden_sizes"], other_inputs_dim=inputDim[1], time_dim=model_parameters["time_dim"])
+# model = GRUDiffusionModel(96, model_parameters["hidden_sizes"], other_inputs_dim=inputDim[2], time_dim=model_parameters["time_dim"], gru_hidden_size=128)

 print("Starting training ...")

--- a/src/utils/clearml.py
+++ b/src/utils/clearml.py
@@ -10,6 +10,6 @@ class ClearMLHelper:
        Task.ignore_requirements("torchvision")
        Task.ignore_requirements("tensorboard")
        task = Task.init(project_name=self.project_name, task_name=task_name, continue_last_task=False)
-        task.set_base_docker(f"docker.io/clearml/pytorch-cuda-gcc:2.0.0-cuda11.7-cudnn8-runtime --env GIT_SSL_NO_VERIFY=true --env CLEARML_AGENT_GIT_USER=VictorMylle --env CLEARML_AGENT_GIT_PASS=Voetballer1" )
+        task.set_base_docker(f"docker.io/clearml/pytorch-cuda-gcc:2.0.0-cuda11.7-cudnn8-runtime")
        task.set_packages("requirements.txt")
        return task
Author	SHA1	Message	Date
Victor Mylle	174a82fab2	Plots to compare between quantile regression and diffusion	2024-02-18 19:21:59 +01:00
Victor Mylle	bd250a664b	Fixed diffusion confidence interval plot	2024-02-18 16:01:18 +01:00
Victor Mylle	7bd0476085	Added plots thresholds densities	2024-02-14 18:12:11 +00:00
Victor Mylle	d10f8a5ff6	Clamping diffusion output	2024-02-12 09:54:56 +00:00
Victor Mylle	77be7371df	Track large files with Git LFS	2024-02-05 16:22:22 +00:00
Victor Mylle	acaad2710a	Changed steps in diffusion model	2024-01-20 09:44:14 +00:00
Victor Mylle	c6fa17fa40	Fixed sampling for GRU and reduced batch size	2024-01-19 00:10:12 +00:00
Victor Mylle	e8e53ab185	Updated training script for GRU model	2024-01-18 23:21:57 +00:00
Victor Mylle	32de50b87e	Added GRU diffusion model	2024-01-18 23:21:01 +00:00