Added notebook to sample from predictions and load model

2023-12-18 21:12:14 +00:00
parent c437c23566
commit fee948cc09
11 changed files with 1226 additions and 236 deletions
--- a/src/data/dataset.py
+++ b/src/data/dataset.py
@@ -1,3 +1,4 @@
+import datetime
 import torch
 from torch.utils.data import Dataset, DataLoader
 import pandas as pd
@@ -29,7 +30,6 @@ class NrvDataset(Dataset):
            range(len(dataframe) - self.sequence_length - self.predict_sequence_length)
        )
        self.valid_indices = sorted(list(total_indices - set(self.samples_to_skip)))
-        print(len(self.valid_indices))

        self.history_features = []
        if self.data_config.LOAD_HISTORY:
@@ -69,7 +69,9 @@ class NrvDataset(Dataset):
            self.time_feature = None
    
        self.nrv = torch.tensor(dataframe["nrv"].values).float().reshape(-1)
+        self.datetime = dataframe["datetime"]

+        print(dataframe.columns)
        self.history_features, self.forecast_features = self.preprocess_data(dataframe)

    def skip_samples(self, dataframe):
@@ -203,3 +205,6 @@ class NrvDataset(Dataset):
            return None, torch.stack(targets)

        return torch.stack(features), torch.stack(targets)
+
+    def get_idx_for_date(self, date: datetime.date):
+        return self.datetime[self.datetime.dt.date == date].index[0]
--- a/src/notebooks/loss_test.ipynb
+++ b/src/notebooks/loss_test.ipynb
--- a/src/notebooks/training.ipynb
+++ b/src/notebooks/training.ipynb
@@ -187,22 +187,56 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "ClearML Task: created new task id=6b50442e1cec4bf9b3bd5a34077b4217\n",
-      "ClearML results page: http://192.168.1.182:8080/projects/2e46d4af6f1e4c399cf9f5aa30bc8795/experiments/6b50442e1cec4bf9b3bd5a34077b4217/output/log\n",
-      "151780\n",
+      "ClearML Task: created new task id=d19c767120a24f97b3231f0e8ac9f2b5\n",
+      "ClearML results page: http://192.168.1.182:8080/projects/2e46d4af6f1e4c399cf9f5aa30bc8795/experiments/d19c767120a24f97b3231f0e8ac9f2b5/output/log\n",
+      "151780\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "ERROR: Unexpected bus error encountered in worker. This might be caused by insufficient shared memory (shm).\n",
+      "\u0000Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f37044039a0>\n",
+      "Traceback (most recent call last):\n",
+      "  File \"/opt/conda/lib/python3.10/site-packages/torch/utils/data/dataloader.py\", line 1478, in __del__\n",
+      "    self._shutdown_workers()\n",
+      "  File \"/opt/conda/lib/python3.10/site-packages/torch/utils/data/dataloader.py\", line 1442, in _shutdown_workers\n",
+      "    w.join(timeout=_utils.MP_STATUS_CHECK_INTERVAL)\n",
+      "  File \"/opt/conda/lib/python3.10/multiprocessing/process.py\", line 149, in join\n",
+      "    res = self._popen.wait(timeout)\n",
+      "  File \"/opt/conda/lib/python3.10/multiprocessing/popen_fork.py\", line 40, in wait\n",
+      "    if not wait([self.sentinel], timeout):\n",
+      "  File \"/opt/conda/lib/python3.10/multiprocessing/connection.py\", line 931, in wait\n",
+      "    ready = selector.select(timeout)\n",
+      "  File \"/opt/conda/lib/python3.10/selectors.py\", line 416, in select\n",
+      "    fd_event_list = self._selector.poll(timeout)\n",
+      "  File \"/opt/conda/lib/python3.10/site-packages/torch/utils/data/_utils/signal_handling.py\", line 66, in handler\n",
+      "    _error_if_any_worker_fails()\n",
+      "RuntimeError: DataLoader worker (pid 326715) is killed by signal: Bus error. It is possible that dataloader's workers are out of shared memory. Please try to raise your shared memory limit.\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
      "151780\n",
      "24979\n",
      "151780\n",
      "24979\n",
      "151780\n",
      "24979\n",
+      "Using get_plot_error\n",
+      "Using get_plot_error\n",
+      "Using get_plot_error\n",
+      "Using get_plot_error\n",
      "Early stopping triggered\n",
      "151780\n",
      "24979\n"
@@ -210,7 +244,7 @@
    }
   ],
   "source": [
-    "task = clearml_helper.get_task(task_name=\"Autoregressive Non Linear Quantile Regression + Quarter + DoW\")\n",
+    "task = clearml_helper.get_task(task_name=\"Autoregressive Non Linear Quantile Regression + Quarter + DoW + Net\")\n",
    "data_config = task.connect(data_config, name=\"data_features\")\n",
    "\n",
    "#### Hyperparameters ####\n",
@@ -227,8 +261,8 @@
    "        quantiles = eval(quantiles)\n",
    "\n",
    "model_parameters = {\n",
-    "    \"learning_rate\": 0.0001,\n",
-    "    \"hidden_size\": 512,\n",
+    "    \"learning_rate\": 0.0002,\n",
+    "    \"hidden_size\": 1024,\n",
    "    \"num_layers\": 3,\n",
    "    \"dropout\": 0.2,\n",
    "    \"time_feature_embedding\": 2,\n",
@@ -256,7 +290,7 @@
    "    [PinballLoss(quantiles), MSELoss(), L1Loss(), CRPSLoss(quantiles)]\n",
    ")\n",
    "trainer.early_stopping(patience=10)\n",
-    "trainer.plot_every(15)\n",
+    "trainer.plot_every(25)\n",
    "trainer.train(task=task, epochs=epochs, remotely=False)"
   ]
  },
--- a/src/policies/policy_tester.ipynb
+++ b/src/policies/policy_tester.ipynb
--- a/src/policies/simple_baseline.py
+++ b/src/policies/simple_baseline.py
@@ -17,32 +17,18 @@ class Battery:
        self.power = power
        self.charge_cycles = 0

-        self.charging = False
-        self.discharging = False
-
-    def simulate(self):
-        """
-        Simulate the battery for one time step (one quarter of an hour)
-        """
-        if self.charging:
-            return self.charge()
-        elif self.discharging:
-            return self.discharge()
-        return 0
-
    def discharge(self):
        """
        Discharge the battery by one time step (one quarter of an hour)
        """
        if self.current_charge == 0:
            return 0
-        self.discharging = True
        self.current_charge -= self.power / 4

+        self.charge_cycles += 1/16
+
        if self.current_charge <= 0:
            self.current_charge = 0
-            self.discharging = False
-            self.charge_cycles += 1
        
        return self.power / 4

@@ -52,12 +38,12 @@ class Battery:
        """
        if self.current_charge == self.capacity:
            return 0
-        self.charging = True
        self.current_charge += self.power / 4
+
+        self.charge_cycles += 1/16
        
        if self.current_charge >= self.capacity:
            self.current_charge = self.capacity
-            self.charging = False

        return self.power / 4
    
@@ -89,7 +75,7 @@ class BaselinePolicy():
            imbalance_prices = imbalance_prices.loc[imbalance_prices['DateTime'].dt.year >= 2020]
        else:
            imbalance_prices = imbalance_prices.loc[imbalance_prices['DateTime'].dt.year == 2023]
-        imbalance_prices = imbalance_prices.sort_values(by=['DateTime'])
+        imbalance_prices = imbalance_prices.sort_values(by=['DateTime'], ascending=True)
        return imbalance_prices
    
    def get_train_score(self, charge_treshold, discharge_treshold):
@@ -98,42 +84,53 @@ class BaselinePolicy():
    def get_test_score(self, charge_treshold, discharge_treshold):
        return self.get_score(self.test_data, charge_treshold, discharge_treshold)

-    # if price is below treshold -> charge battery: total_profit -= charge * price
+    # if price is below charging treshold (cheap charging) -> charge battery: total_profit -= charge * price
    # if price is above treshold -> discharge battery: total_profit += discharge * price
    def get_score(self, df, charge_treshold, discharge_treshold):
        self.battery.reset()
-        total_profit = 0
+        total_charging_cost = 0
+        total_discharging_profit = 0
+        mean_charging_price = 0
+        mean_discharging_price = 0
+        number_of_charges = 0
+        number_of_discharges = 0
    
        for index, row in df.iterrows():
-            if self.battery.charging:
-                total_profit -= self.battery.simulate() * row['Positive imbalance price']
-            elif self.battery.discharging:
-                total_profit += self.battery.simulate() * row['Positive imbalance price']
-            else:
-                if row['Positive imbalance price'] < charge_treshold:
-                    total_profit -= self.battery.charge() * row['Positive imbalance price']
-                elif row['Positive imbalance price'] > discharge_treshold:
-                    total_profit += self.battery.discharge() * row['Positive imbalance price']
+            if row['Positive imbalance price'] < charge_treshold:
+                total_charging_cost += self.battery.charge() * row['Positive imbalance price']
+                mean_charging_price += row['Positive imbalance price']
+                number_of_charges += 1
+            elif row['Positive imbalance price'] > discharge_treshold:
+                total_discharging_profit += self.battery.discharge() * row['Positive imbalance price']
+                mean_discharging_price += row['Positive imbalance price']
+                number_of_discharges += 1

-        return total_profit, self.battery.charge_cycles
+        return total_charging_cost, total_discharging_profit, self.battery.charge_cycles, mean_charging_price / number_of_charges, mean_discharging_price / number_of_discharges

    def treshold_scores(self, charge_tresholds, discharge_tresholds):
-        df = pd.DataFrame(columns=["Charge treshold", "Discharge treshold", "Total Profit", "Charge cycles"])
-
+        df = pd.DataFrame(columns=["Charge treshold", "Discharge treshold", "Charging Cost", "Discharging Profit", "Total Profit", "Charge cycles", "Mean charging price", "Mean discharging price"])
+        df_test = pd.DataFrame(columns=["Charge treshold", "Discharge treshold", "Charging Cost", "Discharging Profit", "Total Profit", "Charge cycles", "Mean charging price", "Mean discharging price"])
        for charge_treshold, discharge_treshold in tqdm(itertools.product(charge_tresholds, discharge_tresholds)):
-                total_profit, charge_cycles = self.get_train_score(charge_treshold, discharge_treshold)
-                df = pd.concat([df, pd.DataFrame([[charge_treshold, discharge_treshold, total_profit, charge_cycles]], columns=["Charge treshold", "Discharge treshold", "Total Profit", "Charge cycles"])])
+                total_charging_cost, total_discharge_profit, charge_cycles, mean_charging_price, mean_discharging_price = self.get_train_score(charge_treshold, discharge_treshold)
+                df = pd.concat([df, pd.DataFrame([[charge_treshold, discharge_treshold, total_charging_cost, total_discharge_profit, total_discharge_profit - total_charging_cost, charge_cycles, mean_charging_price, mean_discharging_price]], columns=["Charge treshold", "Discharge treshold", "Charging Cost", "Discharging Profit", "Total Profit", "Charge cycles", "Mean charging price", "Mean discharging price"])])
+
+                total_charging_cost, total_discharge_profit, charge_cycles, mean_charging_price, mean_discharging_price = self.get_test_score(charge_treshold, discharge_treshold)
+                df_test = pd.concat([df_test, pd.DataFrame([[charge_treshold, discharge_treshold, total_charging_cost, total_discharge_profit, total_discharge_profit - total_charging_cost, charge_cycles, mean_charging_price, mean_discharging_price]], columns=["Charge treshold", "Discharge treshold", "Charging Cost", "Discharging Profit", "Total Profit", "Charge cycles", "Mean charging price", "Mean discharging price"])])

        df = df.sort_values(by=['Total Profit'], ascending=False)
-        return df
+
+        return df, df_test
                
    
 battery = Battery(2, 1)
 policy = BaselinePolicy(battery)

-# charge_tresholds = [0, 50, 100, 150, 200, 250, 300, 350]
-# discharge_tresholds = [0, 50, 100, 150, 200, 250, 300, 350]
-# df = policy.treshold_scores(charge_tresholds, discharge_tresholds)
-# print(df.to_markdown())
+charge_tresholds = np.arange(-100, 250, 50)
+discharge_tresholds = np.arange(-100, 250, 50)

-print(policy.get_test_score(150, 100))
+df, df_test = policy.treshold_scores(charge_tresholds, discharge_tresholds)
+print(df.to_markdown())
+
+print(df_test.to_markdown())
+
+# print(policy.get_test_score(150, 100))
--- a/src/training_scripts/autoregressive_quantiles.py
+++ b/src/training_scripts/autoregressive_quantiles.py
@@ -1,16 +1,12 @@
-from src.models.lstm_model import LSTMModel, GRUModel
+from src.models.lstm_model import GRUModel
 from src.data import DataProcessor, DataConfig
-from src.trainers.quantile_trainer import AutoRegressiveQuantileTrainer, NonAutoRegressiveQuantileRegression
-from src.trainers.probabilistic_baseline import ProbabilisticBaselineTrainer
-from src.trainers.autoregressive_trainer import AutoRegressiveTrainer
+from src.trainers.quantile_trainer import AutoRegressiveQuantileTrainer
 from src.trainers.trainer import Trainer
 from src.utils.clearml import ClearMLHelper
 from src.models import *
 from src.losses import *
 import torch
-import numpy as np
 from torch.nn import MSELoss, L1Loss
-from datetime import datetime
 import torch.nn as nn
 from src.models.time_embedding_layer import TimeEmbedding

--- a/src/utils/bid_ladder.ipynb
+++ b/src/utils/bid_ladder.ipynb
--- a/src/utils/cdf_pdf.py
+++ b/src/utils/cdf_pdf.py
@@ -1,8 +0,0 @@
-import numpy as np
-import matplotlib.pyplot as plt
-
-# Given lists of quantiles and their corresponding probabilities
-quantiles = [-0.23013, -0.19831, -0.15217, -0.13654, -0.05726,
-             0.011687, 0.015129, 0.043187, 0.047704]
-probs = [0.025, 0.05, 0.1, 0.15, 0.5, 0.85, 0.9, 0.95, 0.975]
-
--- a/src/utils/imbalance_price_calculator.py
+++ b/src/utils/imbalance_price_calculator.py
@@ -83,8 +83,8 @@ class ImbalancePriceCalculator:
        fig.add_trace(go.Scatter(x=x_dec_interpolated, y=y_dec_interpolated, mode='lines+markers', name="dec"))
        fig.update_layout(
            title='Bid ladder',
-            xaxis_title='Volume',
-            yaxis_title='Price',
+            xaxis_title='Volume [MWh]',
+            yaxis_title='Price [EUR/MWh]',
            hovermode='x unified'
        )