Compare commits
1 Commits
174a82fab2
...
TSDiff-S4
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
281f0d6f46 |
1
.gitattributes
vendored
1
.gitattributes
vendored
@@ -1 +0,0 @@
|
|||||||
*.csv filter=lfs diff=lfs merge=lfs -text
|
|
||||||
@@ -1,5 +1,7 @@
|
|||||||
FROM pytorch/pytorch:2.0.1-cuda11.7-cudnn8-runtime
|
|
||||||
|
|
||||||
|
FROM pytorch/pytorch:1.13.1-cuda11.6-cudnn8-devel
|
||||||
|
#FROM getkeops/keops-full:2.1-geomloss0.2.5-cuda11.8-pytorch2.0.0-python3.10
|
||||||
|
# FROM pytorch/pytorch:2.1.0-cuda11.8-cudnn8-devel
|
||||||
RUN apt-get update
|
RUN apt-get update
|
||||||
RUN apt-get install -y git
|
RUN apt-get install -y git
|
||||||
|
|
||||||
|
|||||||
@@ -144,14 +144,5 @@ Test data: 01-01-2023 until 08-10–2023
|
|||||||
- [x] Profit penalty parameter als over charge cycles voor een dag -> parameter bepalen op training data (convex probleem) (< 400 charge cycles per jaar) (over een dag kijken hoeveel charge cycles -> profit - penalty * charge cycles erover, (misschien belonen als eronder charge cycles))
|
- [x] Profit penalty parameter als over charge cycles voor een dag -> parameter bepalen op training data (convex probleem) (< 400 charge cycles per jaar) (over een dag kijken hoeveel charge cycles -> profit - penalty * charge cycles erover, (misschien belonen als eronder charge cycles))
|
||||||
|
|
||||||
- [ ] Meer verschil bekijken tussen GRU en diffusion
|
- [ ] Meer verschil bekijken tussen GRU en diffusion
|
||||||
- [ ] (In Progress) Andere lagen voor diffusion model (GRU, kijken naar TSDiff)
|
- [ ] Andere lagen voor diffusion model (GRU, kijken naar TSDiff)
|
||||||
- [x] Policies met andere modellen (Linear, Non Linear)
|
- [x] Policies met andere modellen (Linear, Non Linear)
|
||||||
|
|
||||||
- [ ] Visualize the policies over the whole testr set -> thresholds plotten voor elke dag (elke policy) -> mss distribution om overzichtelijk te houden (mean and std)
|
|
||||||
- [ ] Probleem met diffusion model (activation function? waarom direct grote waardes?)
|
|
||||||
|
|
||||||
- [ ] Autoregressive confidence problem -> Quantiles zelf uit elkaar halen (helpt dit?)
|
|
||||||
|
|
||||||
- [ ] time steps reducing for diffusion model (UNet activation functions?)
|
|
||||||
|
|
||||||
- [ ] (State space model? S4)
|
|
||||||
@@ -11,4 +11,6 @@ clearml
|
|||||||
properscoring
|
properscoring
|
||||||
nbconvert
|
nbconvert
|
||||||
torchinfo
|
torchinfo
|
||||||
tabulate
|
tabulate
|
||||||
|
einops
|
||||||
|
opt_einsum
|
||||||
@@ -25,19 +25,12 @@ class NrvDataset(Dataset):
|
|||||||
self.sequence_length = sequence_length
|
self.sequence_length = sequence_length
|
||||||
self.predict_sequence_length = predict_sequence_length
|
self.predict_sequence_length = predict_sequence_length
|
||||||
|
|
||||||
self.samples_to_skip = self.skip_samples(dataframe=dataframe, full_day_skip=self.full_day_skip)
|
self.samples_to_skip = self.skip_samples(dataframe=dataframe)
|
||||||
total_indices = set(
|
total_indices = set(
|
||||||
range(len(dataframe) - self.sequence_length - self.predict_sequence_length)
|
range(len(dataframe) - self.sequence_length - self.predict_sequence_length)
|
||||||
)
|
)
|
||||||
self.valid_indices = sorted(list(total_indices - set(self.samples_to_skip)))
|
self.valid_indices = sorted(list(total_indices - set(self.samples_to_skip)))
|
||||||
|
|
||||||
# full day indices
|
|
||||||
full_day_skipped_samples = self.skip_samples(dataframe=dataframe, full_day_skip=True)
|
|
||||||
full_day_total_indices = set(
|
|
||||||
range(len(dataframe) - self.sequence_length - self.predict_sequence_length)
|
|
||||||
)
|
|
||||||
self.full_day_valid_indices = sorted(list(full_day_total_indices - set(full_day_skipped_samples)))
|
|
||||||
|
|
||||||
self.history_features = []
|
self.history_features = []
|
||||||
if self.data_config.LOAD_HISTORY:
|
if self.data_config.LOAD_HISTORY:
|
||||||
self.history_features.append("total_load")
|
self.history_features.append("total_load")
|
||||||
@@ -80,7 +73,7 @@ class NrvDataset(Dataset):
|
|||||||
|
|
||||||
self.history_features, self.forecast_features = self.preprocess_data(dataframe)
|
self.history_features, self.forecast_features = self.preprocess_data(dataframe)
|
||||||
|
|
||||||
def skip_samples(self, dataframe, full_day_skip):
|
def skip_samples(self, dataframe):
|
||||||
nan_rows = dataframe[dataframe.isnull().any(axis=1)]
|
nan_rows = dataframe[dataframe.isnull().any(axis=1)]
|
||||||
nan_indices = nan_rows.index
|
nan_indices = nan_rows.index
|
||||||
skip_indices = [
|
skip_indices = [
|
||||||
@@ -98,7 +91,7 @@ class NrvDataset(Dataset):
|
|||||||
|
|
||||||
# add indices that are not the start of a day (00:15) to the skip indices (use datetime column)
|
# add indices that are not the start of a day (00:15) to the skip indices (use datetime column)
|
||||||
# get indices of all 00:15 timestamps
|
# get indices of all 00:15 timestamps
|
||||||
if full_day_skip:
|
if self.full_day_skip:
|
||||||
start_of_day_indices = dataframe[
|
start_of_day_indices = dataframe[
|
||||||
dataframe["datetime"].dt.time != pd.Timestamp("00:00:00").time()
|
dataframe["datetime"].dt.time != pd.Timestamp("00:00:00").time()
|
||||||
].index
|
].index
|
||||||
|
|||||||
@@ -45,53 +45,3 @@ class SimpleDiffusionModel(DiffusionModel):
|
|||||||
self.layers.append(nn.ReLU())
|
self.layers.append(nn.ReLU())
|
||||||
|
|
||||||
self.layers.append(nn.Linear(hidden_sizes[-1] + time_dim + other_inputs_dim, input_size))
|
self.layers.append(nn.Linear(hidden_sizes[-1] + time_dim + other_inputs_dim, input_size))
|
||||||
|
|
||||||
class GRUDiffusionModel(DiffusionModel):
|
|
||||||
def __init__(self, input_size: int, hidden_sizes: list, other_inputs_dim: int, gru_hidden_size: int, time_dim: int = 64):
|
|
||||||
super(GRUDiffusionModel, self).__init__(time_dim)
|
|
||||||
|
|
||||||
self.other_inputs_dim = other_inputs_dim
|
|
||||||
self.gru_hidden_size = gru_hidden_size
|
|
||||||
|
|
||||||
# GRU layer
|
|
||||||
self.gru = nn.GRU(input_size=input_size + time_dim + other_inputs_dim,
|
|
||||||
hidden_size=gru_hidden_size,
|
|
||||||
num_layers=3,
|
|
||||||
batch_first=True)
|
|
||||||
|
|
||||||
# Fully connected layers after GRU
|
|
||||||
self.fc_layers = nn.ModuleList()
|
|
||||||
prev_size = gru_hidden_size
|
|
||||||
for hidden_size in hidden_sizes:
|
|
||||||
self.fc_layers.append(nn.Linear(prev_size, hidden_size))
|
|
||||||
self.fc_layers.append(nn.ReLU())
|
|
||||||
prev_size = hidden_size
|
|
||||||
|
|
||||||
# Final output layer
|
|
||||||
self.fc_layers.append(nn.Linear(prev_size, input_size))
|
|
||||||
|
|
||||||
def forward(self, x, t, inputs):
|
|
||||||
batch_size, seq_len = x.shape
|
|
||||||
x = x.unsqueeze(-1).repeat(1, 1, seq_len)
|
|
||||||
|
|
||||||
# Positional encoding for each time step
|
|
||||||
t = t.unsqueeze(-1).type(torch.float)
|
|
||||||
t = self.pos_encoding(t, self.time_dim) # Shape: [batch_size, seq_len, time_dim]
|
|
||||||
|
|
||||||
# repeat time encoding for each time step t is shape [batch_size, time_dim], i want [batch_size, seq_len, time_dim]
|
|
||||||
t = t.unsqueeze(1).repeat(1, seq_len, 1)
|
|
||||||
|
|
||||||
# Concatenate x, t, and inputs along the feature dimension
|
|
||||||
x = torch.cat((x, t, inputs), dim=-1) # Shape: [batch_size, seq_len, input_size + time_dim + other_inputs_dim]
|
|
||||||
|
|
||||||
# Pass through GRU
|
|
||||||
output, hidden = self.gru(x) # Hidden Shape: [batch_size, seq_len, 1]
|
|
||||||
|
|
||||||
# Get last hidden state
|
|
||||||
x = hidden[-1]
|
|
||||||
|
|
||||||
# Process each time step's output with fully connected layers
|
|
||||||
for layer in self.fc_layers:
|
|
||||||
x = layer(x)
|
|
||||||
|
|
||||||
return x
|
|
||||||
172
src/models/tsdiff_s4/backbones.py
Normal file
172
src/models/tsdiff_s4/backbones.py
Normal file
@@ -0,0 +1,172 @@
|
|||||||
|
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
|
||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
import math
|
||||||
|
|
||||||
|
import torch
|
||||||
|
from torch import nn
|
||||||
|
|
||||||
|
from src.models.tsdiff_s4.s4 import S4
|
||||||
|
|
||||||
|
|
||||||
|
class SinusoidalPositionEmbeddings(nn.Module):
|
||||||
|
def __init__(self, dim):
|
||||||
|
super().__init__()
|
||||||
|
self.dim = dim
|
||||||
|
|
||||||
|
def forward(self, time):
|
||||||
|
device = time.device
|
||||||
|
half_dim = self.dim // 2
|
||||||
|
embeddings = math.log(10000) / (half_dim - 1)
|
||||||
|
embeddings = torch.exp(
|
||||||
|
torch.arange(half_dim, device=device) * -embeddings
|
||||||
|
)
|
||||||
|
embeddings = time[:, None] * embeddings[None, :]
|
||||||
|
embeddings = torch.cat((embeddings.sin(), embeddings.cos()), dim=-1)
|
||||||
|
return embeddings
|
||||||
|
|
||||||
|
|
||||||
|
class S4Layer(nn.Module):
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
d_model,
|
||||||
|
dropout=0.0,
|
||||||
|
):
|
||||||
|
super().__init__()
|
||||||
|
self.layer = S4(
|
||||||
|
d_model=d_model,
|
||||||
|
d_state=128,
|
||||||
|
bidirectional=True,
|
||||||
|
dropout=dropout,
|
||||||
|
transposed=True,
|
||||||
|
postact=None,
|
||||||
|
)
|
||||||
|
self.norm = nn.LayerNorm(d_model)
|
||||||
|
self.dropout = (
|
||||||
|
nn.Dropout1d(dropout) if dropout > 0.0 else nn.Identity()
|
||||||
|
)
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
"""
|
||||||
|
Input x is shape (B, d_input, L)
|
||||||
|
"""
|
||||||
|
z = x
|
||||||
|
# Prenorm
|
||||||
|
z = self.norm(z.transpose(-1, -2)).transpose(-1, -2)
|
||||||
|
# Apply layer: we ignore the state input and output for training
|
||||||
|
z, _ = self.layer(z)
|
||||||
|
# Dropout on the output of the layer
|
||||||
|
z = self.dropout(z)
|
||||||
|
# Residual connection
|
||||||
|
x = z + x
|
||||||
|
return x, None
|
||||||
|
|
||||||
|
def default_state(self, *args, **kwargs):
|
||||||
|
return self.layer.default_state(*args, **kwargs)
|
||||||
|
|
||||||
|
def step(self, x, state, **kwargs):
|
||||||
|
z = x
|
||||||
|
# Prenorm
|
||||||
|
z = self.norm(z.transpose(-1, -2)).transpose(-1, -2)
|
||||||
|
# Apply layer
|
||||||
|
z, state = self.layer.step(z, state, **kwargs)
|
||||||
|
# Residual connection
|
||||||
|
x = z + x
|
||||||
|
return x, state
|
||||||
|
|
||||||
|
|
||||||
|
class S4Block(nn.Module):
|
||||||
|
def __init__(self, d_model, dropout=0.0, expand=2, num_features=0):
|
||||||
|
super().__init__()
|
||||||
|
self.s4block = S4Layer(d_model, dropout=dropout)
|
||||||
|
|
||||||
|
self.time_linear = nn.Linear(d_model, d_model)
|
||||||
|
self.tanh = nn.Tanh()
|
||||||
|
self.sigm = nn.Sigmoid()
|
||||||
|
self.out_linear1 = nn.Conv1d(
|
||||||
|
in_channels=d_model, out_channels=d_model, kernel_size=1
|
||||||
|
)
|
||||||
|
self.out_linear2 = nn.Conv1d(
|
||||||
|
in_channels=d_model, out_channels=d_model, kernel_size=1
|
||||||
|
)
|
||||||
|
self.feature_encoder = nn.Conv1d(num_features, d_model, kernel_size=1)
|
||||||
|
|
||||||
|
def forward(self, x, t, features=None):
|
||||||
|
t = self.time_linear(t)[:, None, :].repeat(1, x.shape[2], 1)
|
||||||
|
t = t.transpose(-1, -2)
|
||||||
|
out, _ = self.s4block(x + t)
|
||||||
|
if features is not None:
|
||||||
|
out = out + self.feature_encoder(features)
|
||||||
|
out = self.tanh(out) * self.sigm(out)
|
||||||
|
out1 = self.out_linear1(out)
|
||||||
|
out2 = self.out_linear2(out)
|
||||||
|
return out1 + x, out2
|
||||||
|
|
||||||
|
|
||||||
|
def Conv1dKaiming(in_channels, out_channels, kernel_size):
|
||||||
|
layer = nn.Conv1d(in_channels, out_channels, kernel_size)
|
||||||
|
nn.init.kaiming_normal_(layer.weight)
|
||||||
|
return layer
|
||||||
|
|
||||||
|
|
||||||
|
class BackboneModel(nn.Module):
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
input_dim,
|
||||||
|
hidden_dim,
|
||||||
|
output_dim,
|
||||||
|
step_emb,
|
||||||
|
num_residual_blocks,
|
||||||
|
num_features,
|
||||||
|
residual_block="s4",
|
||||||
|
dropout=0.0,
|
||||||
|
init_skip=True,
|
||||||
|
):
|
||||||
|
super().__init__()
|
||||||
|
if residual_block == "s4":
|
||||||
|
residual_block = S4Block
|
||||||
|
else:
|
||||||
|
raise ValueError(f"Unknown residual block {residual_block}")
|
||||||
|
self.input_init = nn.Sequential(
|
||||||
|
nn.Linear(input_dim, hidden_dim),
|
||||||
|
nn.ReLU(),
|
||||||
|
)
|
||||||
|
self.time_init = nn.Sequential(
|
||||||
|
nn.Linear(step_emb, hidden_dim),
|
||||||
|
nn.SiLU(),
|
||||||
|
nn.Linear(hidden_dim, hidden_dim),
|
||||||
|
nn.SiLU(),
|
||||||
|
)
|
||||||
|
self.out_linear = nn.Sequential(
|
||||||
|
nn.Linear(hidden_dim, hidden_dim),
|
||||||
|
nn.ReLU(),
|
||||||
|
nn.Linear(hidden_dim, output_dim),
|
||||||
|
)
|
||||||
|
residual_blocks = []
|
||||||
|
for i in range(num_residual_blocks):
|
||||||
|
residual_blocks.append(
|
||||||
|
residual_block(
|
||||||
|
hidden_dim, num_features=num_features, dropout=dropout
|
||||||
|
)
|
||||||
|
)
|
||||||
|
self.residual_blocks = nn.ModuleList(residual_blocks)
|
||||||
|
self.step_embedding = SinusoidalPositionEmbeddings(step_emb)
|
||||||
|
self.init_skip = init_skip
|
||||||
|
|
||||||
|
def forward(self, input, t, features=None):
|
||||||
|
x = self.input_init(input) # B, L ,C
|
||||||
|
step_emb = self.step_embedding(t)
|
||||||
|
t = self.time_init(step_emb)
|
||||||
|
x = x.transpose(-1, -2)
|
||||||
|
if features is not None:
|
||||||
|
features = features.transpose(-1, -2)
|
||||||
|
skips = []
|
||||||
|
for layer in self.residual_blocks:
|
||||||
|
x, skip = layer(x, t, features)
|
||||||
|
skips.append(skip)
|
||||||
|
|
||||||
|
skip = torch.stack(skips).sum(0)
|
||||||
|
skip = skip.transpose(-1, -2)
|
||||||
|
out = self.out_linear(skip)
|
||||||
|
if self.init_skip:
|
||||||
|
out = out + input
|
||||||
|
return out
|
||||||
1836
src/models/tsdiff_s4/s4.py
Normal file
1836
src/models/tsdiff_s4/s4.py
Normal file
File diff suppressed because it is too large
Load Diff
@@ -4,18 +4,39 @@
|
|||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 1,
|
"execution_count": 1,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stderr",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"/opt/conda/lib/python3.10/site-packages/tqdm/auto.py:22: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
|
||||||
|
" from .autonotebook import tqdm as notebook_tqdm\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"import sys\n",
|
"import sys\n",
|
||||||
"sys.path.append('../..')\n",
|
"sys.path.append('../..')\n",
|
||||||
"import torch"
|
"import torch\n",
|
||||||
|
"\n",
|
||||||
|
"%load_ext autoreload\n",
|
||||||
|
"%autoreload 2"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 2,
|
"execution_count": 3,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"The autoreload extension is already loaded. To reload it, use:\n",
|
||||||
|
" %reload_ext autoreload\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"from src.data import DataProcessor, DataConfig\n",
|
"from src.data import DataProcessor, DataConfig\n",
|
||||||
"from src.trainers.quantile_trainer import AutoRegressiveQuantileTrainer, NonAutoRegressiveQuantileRegression\n",
|
"from src.trainers.quantile_trainer import AutoRegressiveQuantileTrainer, NonAutoRegressiveQuantileRegression\n",
|
||||||
@@ -31,7 +52,7 @@
|
|||||||
"from datetime import datetime\n",
|
"from datetime import datetime\n",
|
||||||
"import torch.nn as nn\n",
|
"import torch.nn as nn\n",
|
||||||
"from src.models.time_embedding_layer import TimeEmbedding\n",
|
"from src.models.time_embedding_layer import TimeEmbedding\n",
|
||||||
"from src.models.diffusion_model import SimpleDiffusionModel, GRUDiffusionModel\n",
|
"from src.models.diffusion_model import SimpleDiffusionModel\n",
|
||||||
"from src.trainers.diffusion_trainer import DiffusionTrainer\n",
|
"from src.trainers.diffusion_trainer import DiffusionTrainer\n",
|
||||||
"from torchinfo import summary\n",
|
"from torchinfo import summary\n",
|
||||||
"\n",
|
"\n",
|
||||||
@@ -44,7 +65,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 3,
|
"execution_count": 8,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
@@ -63,98 +84,29 @@
|
|||||||
"data_config.NOMINAL_NET_POSITION = True\n",
|
"data_config.NOMINAL_NET_POSITION = True\n",
|
||||||
"\n",
|
"\n",
|
||||||
"data_processor = DataProcessor(data_config, path=\"../../\", lstm=True)\n",
|
"data_processor = DataProcessor(data_config, path=\"../../\", lstm=True)\n",
|
||||||
"data_processor.set_batch_size(1024)\n",
|
"data_processor.set_batch_size(128)\n",
|
||||||
"data_processor.set_full_day_skip(True)"
|
"data_processor.set_full_day_skip(True)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 4,
|
"execution_count": 5,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
"name": "stdout",
|
"name": "stdout",
|
||||||
"output_type": "stream",
|
"output_type": "stream",
|
||||||
"text": [
|
"text": [
|
||||||
"torch.Size([1024, 96, 96])\n"
|
"ClearML Task: created new task id=b71216825809432682ea3c7841c07612\n",
|
||||||
|
"ClearML results page: http://192.168.1.182:8080/projects/2e46d4af6f1e4c399cf9f5aa30bc8795/experiments/b71216825809432682ea3c7841c07612/output/log\n"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "stderr",
|
"name": "stderr",
|
||||||
"output_type": "stream",
|
"output_type": "stream",
|
||||||
"text": [
|
"text": [
|
||||||
"/opt/conda/lib/python3.10/site-packages/torch/nn/modules/loss.py:536: UserWarning: Using a target size (torch.Size([1024, 96])) that is different to the input size (torch.Size([2, 1024, 96])). This will likely lead to incorrect results due to broadcasting. Please ensure they have the same size.\n",
|
"500 model found when searching for `file:///workspaces/Thesis/src/notebooks/checkpoint.pt`\n",
|
||||||
" return F.mse_loss(input, target, reduction=self.reduction)\n"
|
"Selected model `Autoregressive Non Linear Quantile Regression + Quarter + DoW + Net` (id=bc0cb0d7fc614e2e8b0edf5b85348646)\n"
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "stdout",
|
|
||||||
"output_type": "stream",
|
|
||||||
"text": [
|
|
||||||
"torch.Size([556, 96, 96])\n"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "stderr",
|
|
||||||
"output_type": "stream",
|
|
||||||
"text": [
|
|
||||||
"/opt/conda/lib/python3.10/site-packages/torch/nn/modules/loss.py:536: UserWarning: Using a target size (torch.Size([556, 96])) that is different to the input size (torch.Size([2, 556, 96])). This will likely lead to incorrect results due to broadcasting. Please ensure they have the same size.\n",
|
|
||||||
" return F.mse_loss(input, target, reduction=self.reduction)\n"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "stdout",
|
|
||||||
"output_type": "stream",
|
|
||||||
"text": [
|
|
||||||
"torch.Size([1024, 96, 96])\n",
|
|
||||||
"torch.Size([556, 96, 96])\n",
|
|
||||||
"torch.Size([1024, 96, 96])\n",
|
|
||||||
"torch.Size([556, 96, 96])\n",
|
|
||||||
"torch.Size([1024, 96, 96])\n",
|
|
||||||
"torch.Size([556, 96, 96])\n",
|
|
||||||
"torch.Size([1024, 96, 96])\n",
|
|
||||||
"torch.Size([556, 96, 96])\n",
|
|
||||||
"torch.Size([1024, 96, 96])\n",
|
|
||||||
"torch.Size([556, 96, 96])\n",
|
|
||||||
"torch.Size([1024, 96, 96])\n",
|
|
||||||
"torch.Size([556, 96, 96])\n",
|
|
||||||
"torch.Size([1024, 96, 96])\n",
|
|
||||||
"torch.Size([556, 96, 96])\n",
|
|
||||||
"torch.Size([1024, 96, 96])\n",
|
|
||||||
"torch.Size([556, 96, 96])\n",
|
|
||||||
"torch.Size([1024, 96, 96])\n",
|
|
||||||
"torch.Size([556, 96, 96])\n",
|
|
||||||
"torch.Size([1024, 96, 96])\n",
|
|
||||||
"torch.Size([556, 96, 96])\n",
|
|
||||||
"torch.Size([1024, 96, 96])\n",
|
|
||||||
"torch.Size([556, 96, 96])\n",
|
|
||||||
"torch.Size([1024, 96, 96])\n",
|
|
||||||
"torch.Size([556, 96, 96])\n",
|
|
||||||
"torch.Size([1024, 96, 96])\n",
|
|
||||||
"torch.Size([556, 96, 96])\n",
|
|
||||||
"torch.Size([1024, 96, 96])\n",
|
|
||||||
"torch.Size([556, 96, 96])\n",
|
|
||||||
"torch.Size([1024, 96, 96])\n",
|
|
||||||
"torch.Size([556, 96, 96])\n",
|
|
||||||
"torch.Size([1024, 96, 96])\n",
|
|
||||||
"torch.Size([556, 96, 96])\n",
|
|
||||||
"torch.Size([1024, 96, 96])\n",
|
|
||||||
"torch.Size([556, 96, 96])\n",
|
|
||||||
"torch.Size([1024, 96, 96])\n",
|
|
||||||
"torch.Size([556, 96, 96])\n",
|
|
||||||
"torch.Size([1024, 96, 96])\n",
|
|
||||||
"torch.Size([556, 96, 96])\n",
|
|
||||||
"torch.Size([1024, 96, 96])\n",
|
|
||||||
"torch.Size([556, 96, 96])\n"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "stderr",
|
|
||||||
"output_type": "stream",
|
|
||||||
"text": [
|
|
||||||
"\n",
|
|
||||||
"KeyboardInterrupt\n",
|
|
||||||
"\n"
|
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
@@ -164,15 +116,14 @@
|
|||||||
"epochs=150\n",
|
"epochs=150\n",
|
||||||
"\n",
|
"\n",
|
||||||
"#### Model ####\n",
|
"#### Model ####\n",
|
||||||
"# model = SimpleDiffusionModel(96, [512, 512, 512], other_inputs_dim=inputDim[1], time_dim=64)\n",
|
"model = SimpleDiffusionModel(96, [512, 512, 512], other_inputs_dim=inputDim[1], time_dim=64)\n",
|
||||||
"model = GRUDiffusionModel(96, [256, 256], other_inputs_dim=inputDim[2], time_dim=64, gru_hidden_size=128)\n",
|
|
||||||
"\n",
|
"\n",
|
||||||
"#### ClearML ####\n",
|
"#### ClearML ####\n",
|
||||||
"# task = clearml_helper.get_task(task_name=\"Diffusion Model\")\n",
|
"task = clearml_helper.get_task(task_name=\"Diffusion Model\")\n",
|
||||||
"\n",
|
"\n",
|
||||||
"#### Trainer ####\n",
|
"#### Trainer ####\n",
|
||||||
"trainer = DiffusionTrainer(model, data_processor, \"cuda\")\n",
|
"trainer = DiffusionTrainer(model, data_processor, \"cuda\")\n",
|
||||||
"trainer.train(epochs, learningRate, None)"
|
"trainer.train(epochs, learningRate, task)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -292,6 +243,165 @@
|
|||||||
"sample_diffusion(new_model, 1, inputs)"
|
"sample_diffusion(new_model, 1, inputs)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"# Trying out BackboneModel using S4 state space model"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 1,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"[KeOps] Compiling cuda jit compiler engine ... \n",
|
||||||
|
"[KeOps] Warning : There were warnings or errors compiling formula :\n",
|
||||||
|
"/usr/bin/ld: warning: /opt/conda/lib/libstdc++.so: unsupported GNU_PROPERTY_TYPE (5) type: 0xc0010001\n",
|
||||||
|
"/usr/bin/ld: warning: /opt/conda/lib/libstdc++.so: unsupported GNU_PROPERTY_TYPE (5) type: 0xc0010002\n",
|
||||||
|
"/usr/bin/ld: warning: /opt/conda/lib/libgcc_s.so.1: unsupported GNU_PROPERTY_TYPE (5) type: 0xc0010001\n",
|
||||||
|
"/usr/bin/ld: warning: /opt/conda/lib/libgcc_s.so.1: unsupported GNU_PROPERTY_TYPE (5) type: 0xc0010002\n",
|
||||||
|
"/usr/bin/ld: warning: /opt/conda/lib/libgcc_s.so.1: unsupported GNU_PROPERTY_TYPE (5) type: 0xc0010001\n",
|
||||||
|
"/usr/bin/ld: warning: /opt/conda/lib/libgcc_s.so.1: unsupported GNU_PROPERTY_TYPE (5) type: 0xc0010002\n",
|
||||||
|
"\n",
|
||||||
|
"OK\n",
|
||||||
|
"[pyKeOps] Compiling nvrtc binder for python ... \n",
|
||||||
|
"[KeOps] Warning : There were warnings or errors compiling formula :\n",
|
||||||
|
"/usr/bin/ld: warning: /opt/conda/lib/libstdc++.so: unsupported GNU_PROPERTY_TYPE (5) type: 0xc0010001\n",
|
||||||
|
"/usr/bin/ld: warning: /opt/conda/lib/libstdc++.so: unsupported GNU_PROPERTY_TYPE (5) type: 0xc0010002\n",
|
||||||
|
"/usr/bin/ld: warning: /opt/conda/lib/libgcc_s.so.1: unsupported GNU_PROPERTY_TYPE (5) type: 0xc0010001\n",
|
||||||
|
"/usr/bin/ld: warning: /opt/conda/lib/libgcc_s.so.1: unsupported GNU_PROPERTY_TYPE (5) type: 0xc0010002\n",
|
||||||
|
"/usr/bin/ld: warning: /opt/conda/lib/libgcc_s.so.1: unsupported GNU_PROPERTY_TYPE (5) type: 0xc0010001\n",
|
||||||
|
"/usr/bin/ld: warning: /opt/conda/lib/libgcc_s.so.1: unsupported GNU_PROPERTY_TYPE (5) type: 0xc0010002\n",
|
||||||
|
"\n",
|
||||||
|
"OK\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"import sys\n",
|
||||||
|
"sys.path.append('../..')\n",
|
||||||
|
"import torch\n",
|
||||||
|
"\n",
|
||||||
|
"%load_ext autoreload\n",
|
||||||
|
"%autoreload 2\n",
|
||||||
|
"from src.models.tsdiff_s4.backbones import BackboneModel\n",
|
||||||
|
"from src.trainers.diffusion_trainer import DiffusionTrainer\n",
|
||||||
|
"\n",
|
||||||
|
"backbone = BackboneModel(\n",
|
||||||
|
" input_dim=1,\n",
|
||||||
|
" hidden_dim=512,\n",
|
||||||
|
" output_dim=1,\n",
|
||||||
|
" step_emb=128,\n",
|
||||||
|
" num_residual_blocks=3,\n",
|
||||||
|
" num_features=2\n",
|
||||||
|
")\n",
|
||||||
|
"backbone = backbone.to(\"cuda\")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 2,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"[KeOps] Generating code for formula Sum_Reduction(ComplexMult(Real2Complex(1/ComplexSquareAbs(ComplexMult(Var(1,2,0)-Var(2,2,1),Var(1,2,0)-Conj(Var(2,2,1))))),ComplexMult(Var(1,2,0)*ComplexReal(Var(0,2,1))-Real2Complex(Sum(Var(0,2,1)*Var(2,2,1))),Conj(ComplexMult(Var(1,2,0)-Var(2,2,1),Var(1,2,0)-Conj(Var(2,2,1)))))),0) ... "
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"ename": "",
|
||||||
|
"evalue": "",
|
||||||
|
"output_type": "error",
|
||||||
|
"traceback": [
|
||||||
|
"\u001b[1;31mThe Kernel crashed while executing code in the the current cell or a previous cell. Please review the code in the cell(s) to identify a possible cause of the failure. Click <a href='https://aka.ms/vscodeJupyterKernelCrash'>here</a> for more info. View Jupyter <a href='command:jupyter.viewOutput'>log</a> for further details."
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"# now lets find out what the input shape of the featues and input must be\n",
|
||||||
|
"\n",
|
||||||
|
"# input: (B, L, C)\n",
|
||||||
|
"# features: (B, L, F)\n",
|
||||||
|
"# time: (B, 1)\n",
|
||||||
|
"\n",
|
||||||
|
"# output: (B, L, C)? \n",
|
||||||
|
"\n",
|
||||||
|
"input = torch.randn(2, 96, 1).to(\"cuda\")\n",
|
||||||
|
"features = torch.randn(2, 96, 2).to(\"cuda\")\n",
|
||||||
|
"times = torch.randn(2).to(\"cuda\")\n",
|
||||||
|
"\n",
|
||||||
|
"backbone(input, times, features).shape"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 3,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"ename": "KeyError",
|
||||||
|
"evalue": "'nvrtc'",
|
||||||
|
"output_type": "error",
|
||||||
|
"traceback": [
|
||||||
|
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
|
||||||
|
"\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)",
|
||||||
|
"Cell \u001b[0;32mIn[3], line 13\u001b[0m\n\u001b[1;32m 10\u001b[0m times \u001b[38;5;241m=\u001b[39m torch\u001b[38;5;241m.\u001b[39mtensor([\u001b[38;5;241m0\u001b[39m]\u001b[38;5;241m*\u001b[39m\u001b[38;5;241m2\u001b[39m)\u001b[38;5;241m.\u001b[39mto(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcuda\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 11\u001b[0m features \u001b[38;5;241m=\u001b[39m torch\u001b[38;5;241m.\u001b[39mrandn(\u001b[38;5;241m2\u001b[39m, \u001b[38;5;241m96\u001b[39m, \u001b[38;5;241m2\u001b[39m)\u001b[38;5;241m.\u001b[39mto(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcuda\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m---> 13\u001b[0m \u001b[43mmodel\u001b[49m\u001b[43m(\u001b[49m\u001b[43minputs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtimes\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfeatures\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241m.\u001b[39mshape\n",
|
||||||
|
"File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/torch/nn/modules/module.py:1501\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1496\u001b[0m \u001b[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[1;32m 1497\u001b[0m \u001b[38;5;66;03m# this function, and just call forward.\u001b[39;00m\n\u001b[1;32m 1498\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_pre_hooks\n\u001b[1;32m 1499\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[1;32m 1500\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[0;32m-> 1501\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mforward_call\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1502\u001b[0m \u001b[38;5;66;03m# Do not call functions when jit is used\u001b[39;00m\n\u001b[1;32m 1503\u001b[0m full_backward_hooks, non_full_backward_hooks \u001b[38;5;241m=\u001b[39m [], []\n",
|
||||||
|
"File \u001b[0;32m/workspaces/Thesis/src/notebooks/../../src/models/tsdiff_s4/backbones.py:164\u001b[0m, in \u001b[0;36mBackboneModel.forward\u001b[0;34m(self, input, t, features)\u001b[0m\n\u001b[1;32m 162\u001b[0m skips \u001b[38;5;241m=\u001b[39m []\n\u001b[1;32m 163\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m layer \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mresidual_blocks:\n\u001b[0;32m--> 164\u001b[0m x, skip \u001b[38;5;241m=\u001b[39m \u001b[43mlayer\u001b[49m\u001b[43m(\u001b[49m\u001b[43mx\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mt\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfeatures\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 165\u001b[0m skips\u001b[38;5;241m.\u001b[39mappend(skip)\n\u001b[1;32m 167\u001b[0m skip \u001b[38;5;241m=\u001b[39m torch\u001b[38;5;241m.\u001b[39mstack(skips)\u001b[38;5;241m.\u001b[39msum(\u001b[38;5;241m0\u001b[39m)\n",
|
||||||
|
"File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/torch/nn/modules/module.py:1501\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1496\u001b[0m \u001b[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[1;32m 1497\u001b[0m \u001b[38;5;66;03m# this function, and just call forward.\u001b[39;00m\n\u001b[1;32m 1498\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_pre_hooks\n\u001b[1;32m 1499\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[1;32m 1500\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[0;32m-> 1501\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mforward_call\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1502\u001b[0m \u001b[38;5;66;03m# Do not call functions when jit is used\u001b[39;00m\n\u001b[1;32m 1503\u001b[0m full_backward_hooks, non_full_backward_hooks \u001b[38;5;241m=\u001b[39m [], []\n",
|
||||||
|
"File \u001b[0;32m/workspaces/Thesis/src/notebooks/../../src/models/tsdiff_s4/backbones.py:96\u001b[0m, in \u001b[0;36mS4Block.forward\u001b[0;34m(self, x, t, features)\u001b[0m\n\u001b[1;32m 94\u001b[0m t \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtime_linear(t)[:, \u001b[38;5;28;01mNone\u001b[39;00m, :]\u001b[38;5;241m.\u001b[39mrepeat(\u001b[38;5;241m1\u001b[39m, x\u001b[38;5;241m.\u001b[39mshape[\u001b[38;5;241m2\u001b[39m], \u001b[38;5;241m1\u001b[39m)\n\u001b[1;32m 95\u001b[0m t \u001b[38;5;241m=\u001b[39m t\u001b[38;5;241m.\u001b[39mtranspose(\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m, \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m2\u001b[39m)\n\u001b[0;32m---> 96\u001b[0m out, _ \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43ms4block\u001b[49m\u001b[43m(\u001b[49m\u001b[43mx\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m+\u001b[39;49m\u001b[43m \u001b[49m\u001b[43mt\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 97\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m features \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 98\u001b[0m out \u001b[38;5;241m=\u001b[39m out \u001b[38;5;241m+\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mfeature_encoder(features)\n",
|
||||||
|
"File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/torch/nn/modules/module.py:1501\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1496\u001b[0m \u001b[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[1;32m 1497\u001b[0m \u001b[38;5;66;03m# this function, and just call forward.\u001b[39;00m\n\u001b[1;32m 1498\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_pre_hooks\n\u001b[1;32m 1499\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[1;32m 1500\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[0;32m-> 1501\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mforward_call\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1502\u001b[0m \u001b[38;5;66;03m# Do not call functions when jit is used\u001b[39;00m\n\u001b[1;32m 1503\u001b[0m full_backward_hooks, non_full_backward_hooks \u001b[38;5;241m=\u001b[39m [], []\n",
|
||||||
|
"File \u001b[0;32m/workspaces/Thesis/src/notebooks/../../src/models/tsdiff_s4/backbones.py:56\u001b[0m, in \u001b[0;36mS4Layer.forward\u001b[0;34m(self, x)\u001b[0m\n\u001b[1;32m 54\u001b[0m z \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mnorm(z\u001b[38;5;241m.\u001b[39mtranspose(\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m, \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m2\u001b[39m))\u001b[38;5;241m.\u001b[39mtranspose(\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m, \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m2\u001b[39m)\n\u001b[1;32m 55\u001b[0m \u001b[38;5;66;03m# Apply layer: we ignore the state input and output for training\u001b[39;00m\n\u001b[0;32m---> 56\u001b[0m z, _ \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mlayer\u001b[49m\u001b[43m(\u001b[49m\u001b[43mz\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 57\u001b[0m \u001b[38;5;66;03m# Dropout on the output of the layer\u001b[39;00m\n\u001b[1;32m 58\u001b[0m z \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdropout(z)\n",
|
||||||
|
"File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/torch/nn/modules/module.py:1501\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1496\u001b[0m \u001b[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[1;32m 1497\u001b[0m \u001b[38;5;66;03m# this function, and just call forward.\u001b[39;00m\n\u001b[1;32m 1498\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_pre_hooks\n\u001b[1;32m 1499\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[1;32m 1500\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[0;32m-> 1501\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mforward_call\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1502\u001b[0m \u001b[38;5;66;03m# Do not call functions when jit is used\u001b[39;00m\n\u001b[1;32m 1503\u001b[0m full_backward_hooks, non_full_backward_hooks \u001b[38;5;241m=\u001b[39m [], []\n",
|
||||||
|
"File \u001b[0;32m/workspaces/Thesis/src/notebooks/../../src/models/tsdiff_s4/s4.py:1761\u001b[0m, in \u001b[0;36mS4.forward\u001b[0;34m(self, u, state, rate, lengths, **kwargs)\u001b[0m\n\u001b[1;32m 1759\u001b[0m \u001b[38;5;66;03m# Compute SS Kernel\u001b[39;00m\n\u001b[1;32m 1760\u001b[0m L_kernel \u001b[38;5;241m=\u001b[39m L \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mL \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;28mmin\u001b[39m(L, \u001b[38;5;28mround\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mL \u001b[38;5;241m/\u001b[39m rate))\n\u001b[0;32m-> 1761\u001b[0m k, k_state \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mkernel\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1762\u001b[0m \u001b[43m \u001b[49m\u001b[43mL\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mL_kernel\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mrate\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrate\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstate\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstate\u001b[49m\n\u001b[1;32m 1763\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m \u001b[38;5;66;03m# (C H L) (B C H L)\u001b[39;00m\n\u001b[1;32m 1765\u001b[0m \u001b[38;5;66;03m# Convolution\u001b[39;00m\n\u001b[1;32m 1766\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mbidirectional:\n",
|
||||||
|
"File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/torch/nn/modules/module.py:1501\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1496\u001b[0m \u001b[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[1;32m 1497\u001b[0m \u001b[38;5;66;03m# this function, and just call forward.\u001b[39;00m\n\u001b[1;32m 1498\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_pre_hooks\n\u001b[1;32m 1499\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[1;32m 1500\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[0;32m-> 1501\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mforward_call\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1502\u001b[0m \u001b[38;5;66;03m# Do not call functions when jit is used\u001b[39;00m\n\u001b[1;32m 1503\u001b[0m full_backward_hooks, non_full_backward_hooks \u001b[38;5;241m=\u001b[39m [], []\n",
|
||||||
|
"File \u001b[0;32m/workspaces/Thesis/src/notebooks/../../src/models/tsdiff_s4/s4.py:1549\u001b[0m, in \u001b[0;36mSSKernel.forward\u001b[0;34m(self, state, L, rate)\u001b[0m\n\u001b[1;32m 1548\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mforward\u001b[39m(\u001b[38;5;28mself\u001b[39m, state\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m, L\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m, rate\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m1.0\u001b[39m):\n\u001b[0;32m-> 1549\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mkernel\u001b[49m\u001b[43m(\u001b[49m\u001b[43mstate\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstate\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mL\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mL\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mrate\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrate\u001b[49m\u001b[43m)\u001b[49m\n",
|
||||||
|
"File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/torch/nn/modules/module.py:1501\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1496\u001b[0m \u001b[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[1;32m 1497\u001b[0m \u001b[38;5;66;03m# this function, and just call forward.\u001b[39;00m\n\u001b[1;32m 1498\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_pre_hooks\n\u001b[1;32m 1499\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[1;32m 1500\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[0;32m-> 1501\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mforward_call\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1502\u001b[0m \u001b[38;5;66;03m# Do not call functions when jit is used\u001b[39;00m\n\u001b[1;32m 1503\u001b[0m full_backward_hooks, non_full_backward_hooks \u001b[38;5;241m=\u001b[39m [], []\n",
|
||||||
|
"File \u001b[0;32m/workspaces/Thesis/src/notebooks/../../src/models/tsdiff_s4/s4.py:925\u001b[0m, in \u001b[0;36mSSKernelNPLR.forward\u001b[0;34m(self, state, rate, L)\u001b[0m\n\u001b[1;32m 923\u001b[0m r \u001b[38;5;241m=\u001b[39m cauchy_mult(v, z, w, symmetric\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n\u001b[1;32m 924\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m has_pykeops:\n\u001b[0;32m--> 925\u001b[0m r \u001b[38;5;241m=\u001b[39m \u001b[43mcauchy_conj\u001b[49m\u001b[43m(\u001b[49m\u001b[43mv\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mz\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mw\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 926\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 927\u001b[0m r \u001b[38;5;241m=\u001b[39m cauchy_naive(v, z, w)\n",
|
||||||
|
"File \u001b[0;32m/workspaces/Thesis/src/notebooks/../../src/models/tsdiff_s4/s4.py:89\u001b[0m, in \u001b[0;36mcauchy_conj\u001b[0;34m(v, z, w)\u001b[0m\n\u001b[1;32m 86\u001b[0m z \u001b[38;5;241m=\u001b[39m _c2r(z)\n\u001b[1;32m 87\u001b[0m w \u001b[38;5;241m=\u001b[39m _c2r(w)\n\u001b[0;32m---> 89\u001b[0m r \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m2\u001b[39m \u001b[38;5;241m*\u001b[39m \u001b[43mcauchy_mult\u001b[49m\u001b[43m(\u001b[49m\u001b[43mv\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mz\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mw\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mbackend\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mGPU\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m 90\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m _r2c(r)\n",
|
||||||
|
"File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/pykeops/torch/generic/generic_red.py:688\u001b[0m, in \u001b[0;36mGenred.__call__\u001b[0;34m(self, backend, device_id, ranges, out, *args)\u001b[0m\n\u001b[1;32m 686\u001b[0m params\u001b[38;5;241m.\u001b[39mny \u001b[38;5;241m=\u001b[39m ny\n\u001b[1;32m 687\u001b[0m params\u001b[38;5;241m.\u001b[39mout \u001b[38;5;241m=\u001b[39m out\n\u001b[0;32m--> 688\u001b[0m out \u001b[38;5;241m=\u001b[39m \u001b[43mGenredAutograd_fun\u001b[49m\u001b[43m(\u001b[49m\u001b[43mparams\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 690\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m postprocess(out, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtorch\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mreduction_op, nout, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mopt_arg, dtype)\n",
|
||||||
|
"File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/pykeops/torch/generic/generic_red.py:384\u001b[0m, in \u001b[0;36mGenredAutograd_fun\u001b[0;34m(*inputs)\u001b[0m\n\u001b[1;32m 383\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mGenredAutograd_fun\u001b[39m(\u001b[38;5;241m*\u001b[39minputs):\n\u001b[0;32m--> 384\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mGenredAutograd\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mapply\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43minputs\u001b[49m\u001b[43m)\u001b[49m[\u001b[38;5;241m0\u001b[39m]\n",
|
||||||
|
"File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/torch/autograd/function.py:506\u001b[0m, in \u001b[0;36mFunction.apply\u001b[0;34m(cls, *args, **kwargs)\u001b[0m\n\u001b[1;32m 503\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m torch\u001b[38;5;241m.\u001b[39m_C\u001b[38;5;241m.\u001b[39m_are_functorch_transforms_active():\n\u001b[1;32m 504\u001b[0m \u001b[38;5;66;03m# See NOTE: [functorch vjp and autograd interaction]\u001b[39;00m\n\u001b[1;32m 505\u001b[0m args \u001b[38;5;241m=\u001b[39m _functorch\u001b[38;5;241m.\u001b[39mutils\u001b[38;5;241m.\u001b[39munwrap_dead_wrappers(args)\n\u001b[0;32m--> 506\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43msuper\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mapply\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m \u001b[38;5;66;03m# type: ignore[misc]\u001b[39;00m\n\u001b[1;32m 508\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mcls\u001b[39m\u001b[38;5;241m.\u001b[39msetup_context \u001b[38;5;241m==\u001b[39m _SingleLevelFunction\u001b[38;5;241m.\u001b[39msetup_context:\n\u001b[1;32m 509\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mRuntimeError\u001b[39;00m(\n\u001b[1;32m 510\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mIn order to use an autograd.Function with functorch transforms \u001b[39m\u001b[38;5;124m'\u001b[39m\n\u001b[1;32m 511\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124m(vmap, grad, jvp, jacrev, ...), it must override the setup_context \u001b[39m\u001b[38;5;124m'\u001b[39m\n\u001b[1;32m 512\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mstaticmethod. For more details, please see \u001b[39m\u001b[38;5;124m'\u001b[39m\n\u001b[1;32m 513\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mhttps://pytorch.org/docs/master/notes/extending.func.html\u001b[39m\u001b[38;5;124m'\u001b[39m)\n",
|
||||||
|
"File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/pykeops/torch/generic/generic_red.py:295\u001b[0m, in \u001b[0;36mGenredAutograd.forward\u001b[0;34m(*inputs)\u001b[0m\n\u001b[1;32m 293\u001b[0m \u001b[38;5;129m@staticmethod\u001b[39m\n\u001b[1;32m 294\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mforward\u001b[39m(\u001b[38;5;241m*\u001b[39minputs):\n\u001b[0;32m--> 295\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mGenredAutograd_base\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_forward\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43minputs\u001b[49m\u001b[43m)\u001b[49m\n",
|
||||||
|
"File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/pykeops/torch/generic/generic_red.py:91\u001b[0m, in \u001b[0;36mGenredAutograd_base._forward\u001b[0;34m(params, *args)\u001b[0m\n\u001b[1;32m 85\u001b[0m device_id, device_args \u001b[38;5;241m=\u001b[39m set_device(\n\u001b[1;32m 86\u001b[0m tagCPUGPU, tagHostDevice, params\u001b[38;5;241m.\u001b[39mdevice_id_request, \u001b[38;5;241m*\u001b[39margs\n\u001b[1;32m 87\u001b[0m )\n\u001b[1;32m 89\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mpykeops\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mcommon\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mkeops_io\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m keops_binder\n\u001b[0;32m---> 91\u001b[0m myconv \u001b[38;5;241m=\u001b[39m \u001b[43mkeops_binder\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mnvrtc\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mif\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mtagCPUGPU\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01melse\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mcpp\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m(\n\u001b[1;32m 92\u001b[0m tagCPUGPU,\n\u001b[1;32m 93\u001b[0m tag1D2D,\n\u001b[1;32m 94\u001b[0m tagHostDevice,\n\u001b[1;32m 95\u001b[0m use_ranges,\n\u001b[1;32m 96\u001b[0m device_id,\n\u001b[1;32m 97\u001b[0m params\u001b[38;5;241m.\u001b[39mformula,\n\u001b[1;32m 98\u001b[0m params\u001b[38;5;241m.\u001b[39maliases,\n\u001b[1;32m 99\u001b[0m \u001b[38;5;28mlen\u001b[39m(args),\n\u001b[1;32m 100\u001b[0m params\u001b[38;5;241m.\u001b[39mdtype,\n\u001b[1;32m 101\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtorch\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m 102\u001b[0m params\u001b[38;5;241m.\u001b[39moptional_flags,\n\u001b[1;32m 103\u001b[0m )\u001b[38;5;241m.\u001b[39mimport_module()\n\u001b[1;32m 105\u001b[0m \u001b[38;5;66;03m# N.B.: KeOps C++ expects contiguous data arrays\u001b[39;00m\n\u001b[1;32m 106\u001b[0m test_contig \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mall\u001b[39m(arg\u001b[38;5;241m.\u001b[39mis_contiguous() \u001b[38;5;28;01mfor\u001b[39;00m arg \u001b[38;5;129;01min\u001b[39;00m args)\n",
|
||||||
|
"\u001b[0;31mKeyError\u001b[0m: 'nvrtc'"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"# inputDim = data_processor.get_input_size()\n",
|
||||||
|
"learningRate = 0.0001\n",
|
||||||
|
"epochs=150\n",
|
||||||
|
"\n",
|
||||||
|
"#### Model ####\n",
|
||||||
|
"model = BackboneModel(1, 512, output_dim=1, step_emb=64, num_residual_blocks=4, num_features=2)\n",
|
||||||
|
"model.to(\"cuda\")\n",
|
||||||
|
"\n",
|
||||||
|
"inputs = torch.randn(2, 96, 1).to(\"cuda\")\n",
|
||||||
|
"times = torch.tensor([0]*2).to(\"cuda\")\n",
|
||||||
|
"features = torch.randn(2, 96, 2).to(\"cuda\")\n",
|
||||||
|
"\n",
|
||||||
|
"model(inputs, times, features).shape\n",
|
||||||
|
"\n",
|
||||||
|
"#### Trainer ####\n",
|
||||||
|
"# trainer = DiffusionTrainer(model, data_processor, \"cuda\")\n",
|
||||||
|
"# trainer.train(epochs, learningRate, None)"
|
||||||
|
]
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
|
|||||||
@@ -1197,7 +1197,7 @@
|
|||||||
"name": "python",
|
"name": "python",
|
||||||
"nbconvert_exporter": "python",
|
"nbconvert_exporter": "python",
|
||||||
"pygments_lexer": "ipython3",
|
"pygments_lexer": "ipython3",
|
||||||
"version": "3.10.8"
|
"version": "3.10.11"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"nbformat": 4,
|
"nbformat": 4,
|
||||||
|
|||||||
@@ -8,8 +8,7 @@ import pandas as pd
|
|||||||
import datetime
|
import datetime
|
||||||
from tqdm import tqdm
|
from tqdm import tqdm
|
||||||
from src.utils.imbalance_price_calculator import ImbalancePriceCalculator
|
from src.utils.imbalance_price_calculator import ImbalancePriceCalculator
|
||||||
import seaborn as sns
|
import time
|
||||||
import matplotlib.pyplot as plt
|
|
||||||
import plotly.express as px
|
import plotly.express as px
|
||||||
|
|
||||||
### import functions ###
|
### import functions ###
|
||||||
@@ -17,7 +16,7 @@ from src.trainers.quantile_trainer import auto_regressive as quantile_auto_regre
|
|||||||
from src.trainers.diffusion_trainer import sample_diffusion
|
from src.trainers.diffusion_trainer import sample_diffusion
|
||||||
from src.utils.clearml import ClearMLHelper
|
from src.utils.clearml import ClearMLHelper
|
||||||
|
|
||||||
### Arguments ###
|
# argparse to parse task id and model type
|
||||||
parser = argparse.ArgumentParser()
|
parser = argparse.ArgumentParser()
|
||||||
parser.add_argument('--task_id', type=str, default=None)
|
parser.add_argument('--task_id', type=str, default=None)
|
||||||
parser.add_argument('--model_type', type=str, default=None)
|
parser.add_argument('--model_type', type=str, default=None)
|
||||||
@@ -28,7 +27,6 @@ assert args.task_id is not None, "Please specify task id"
|
|||||||
assert args.model_type is not None, "Please specify model type"
|
assert args.model_type is not None, "Please specify model type"
|
||||||
assert args.model_name is not None, "Please specify model name"
|
assert args.model_name is not None, "Please specify model name"
|
||||||
|
|
||||||
### Baseline Policy ###
|
|
||||||
battery = Battery(2, 1)
|
battery = Battery(2, 1)
|
||||||
baseline_policy = BaselinePolicy(battery, data_path="")
|
baseline_policy = BaselinePolicy(battery, data_path="")
|
||||||
|
|
||||||
@@ -126,9 +124,6 @@ def get_next_day_profits_for_date(model, data_processor, test_loader, date, ipc,
|
|||||||
predicted_nrv_profits_cycles = {i: [0, 0] for i in penalties}
|
predicted_nrv_profits_cycles = {i: [0, 0] for i in penalties}
|
||||||
baseline_profits_cycles = {i: [0, 0] for i in penalties}
|
baseline_profits_cycles = {i: [0, 0] for i in penalties}
|
||||||
|
|
||||||
_charge_thresholds = {}
|
|
||||||
_discharge_thresholds = {}
|
|
||||||
|
|
||||||
initial, nrvs, target = predict_NRV(model, date, data_processor, test_loader)
|
initial, nrvs, target = predict_NRV(model, date, data_processor, test_loader)
|
||||||
|
|
||||||
initial = np.repeat(initial, nrvs.shape[0])
|
initial = np.repeat(initial, nrvs.shape[0])
|
||||||
@@ -144,10 +139,6 @@ def get_next_day_profits_for_date(model, data_processor, test_loader, date, ipc,
|
|||||||
|
|
||||||
for penalty in penalties:
|
for penalty in penalties:
|
||||||
found_charge_thresholds, found_discharge_thresholds = baseline_policy.get_optimal_thresholds(reconstructed_imbalance_prices, charge_thresholds, discharge_thresholds, penalty)
|
found_charge_thresholds, found_discharge_thresholds = baseline_policy.get_optimal_thresholds(reconstructed_imbalance_prices, charge_thresholds, discharge_thresholds, penalty)
|
||||||
|
|
||||||
_charge_thresholds[penalty] = found_charge_thresholds
|
|
||||||
_discharge_thresholds[penalty] = found_discharge_thresholds
|
|
||||||
|
|
||||||
next_day_charge_threshold = found_charge_thresholds.mean(axis=0)
|
next_day_charge_threshold = found_charge_thresholds.mean(axis=0)
|
||||||
next_day_discharge_threshold = found_discharge_thresholds.mean(axis=0)
|
next_day_discharge_threshold = found_discharge_thresholds.mean(axis=0)
|
||||||
yesterday_charge_thresholds, yesterday_discharge_thresholds = baseline_policy.get_optimal_thresholds(yesterday_imbalance_prices, charge_thresholds, discharge_thresholds, penalty)
|
yesterday_charge_thresholds, yesterday_discharge_thresholds = baseline_policy.get_optimal_thresholds(yesterday_imbalance_prices, charge_thresholds, discharge_thresholds, penalty)
|
||||||
@@ -162,26 +153,23 @@ def get_next_day_profits_for_date(model, data_processor, test_loader, date, ipc,
|
|||||||
baseline_profits_cycles[penalty][0] += yesterday_profit.item()
|
baseline_profits_cycles[penalty][0] += yesterday_profit.item()
|
||||||
baseline_profits_cycles[penalty][1] += yesterday_charge_cycles.item()
|
baseline_profits_cycles[penalty][1] += yesterday_charge_cycles.item()
|
||||||
|
|
||||||
return predicted_nrv_profits_cycles, baseline_profits_cycles, _charge_thresholds, _discharge_thresholds
|
return predicted_nrv_profits_cycles, baseline_profits_cycles
|
||||||
|
|
||||||
def next_day_test_set(model, data_processor, test_loader, ipc, predict_NRV: callable):
|
def next_day_test_set(model, data_processor, test_loader, ipc, predict_NRV: callable):
|
||||||
penalties = [0, 50, 250, 500, 1000, 1500]
|
penalties = [0, 10, 50, 150, 300, 500, 600, 800, 1000, 1500, 2000, 2500]
|
||||||
predicted_nrv_profits_cycles = {i: [0, 0] for i in penalties}
|
predicted_nrv_profits_cycles = {i: [0, 0] for i in penalties}
|
||||||
baseline_profits_cycles = {i: [0, 0] for i in penalties}
|
baseline_profits_cycles = {i: [0, 0] for i in penalties}
|
||||||
|
|
||||||
charge_thresholds = {}
|
# get all dates in test set
|
||||||
discharge_thresholds = {}
|
|
||||||
|
|
||||||
dates = baseline_policy.test_data["DateTime"].dt.date.unique()
|
dates = baseline_policy.test_data["DateTime"].dt.date.unique()
|
||||||
|
|
||||||
|
# dates back to datetime
|
||||||
dates = pd.to_datetime(dates)
|
dates = pd.to_datetime(dates)
|
||||||
|
|
||||||
for date in tqdm(dates):
|
for date in tqdm(dates):
|
||||||
try:
|
try:
|
||||||
new_predicted_nrv_profits_cycles, new_baseline_profits_cycles, new_charge_thresholds, new_discharge_thresholds = get_next_day_profits_for_date(model, data_processor, test_loader, date, ipc, predict_NRV, penalties)
|
new_predicted_nrv_profits_cycles, new_baseline_profits_cycles = get_next_day_profits_for_date(model, data_processor, test_loader, date, ipc, predict_NRV, penalties)
|
||||||
|
|
||||||
charge_thresholds[date] = new_charge_thresholds
|
|
||||||
discharge_thresholds[date] = new_discharge_thresholds
|
|
||||||
|
|
||||||
for penalty in penalties:
|
for penalty in penalties:
|
||||||
predicted_nrv_profits_cycles[penalty][0] += new_predicted_nrv_profits_cycles[penalty][0]
|
predicted_nrv_profits_cycles[penalty][0] += new_predicted_nrv_profits_cycles[penalty][0]
|
||||||
predicted_nrv_profits_cycles[penalty][1] += new_predicted_nrv_profits_cycles[penalty][1]
|
predicted_nrv_profits_cycles[penalty][1] += new_predicted_nrv_profits_cycles[penalty][1]
|
||||||
@@ -190,15 +178,16 @@ def next_day_test_set(model, data_processor, test_loader, ipc, predict_NRV: call
|
|||||||
baseline_profits_cycles[penalty][1] += new_baseline_profits_cycles[penalty][1]
|
baseline_profits_cycles[penalty][1] += new_baseline_profits_cycles[penalty][1]
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"Error for date {date}")
|
# print(f"Error for date {date}")
|
||||||
|
continue
|
||||||
|
|
||||||
return predicted_nrv_profits_cycles, baseline_profits_cycles, charge_thresholds, discharge_thresholds
|
return predicted_nrv_profits_cycles, baseline_profits_cycles
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
clearml_helper = ClearMLHelper(project_name="Thesis/NrvForecast")
|
clearml_helper = ClearMLHelper(project_name="Thesis/NrvForecast")
|
||||||
task = clearml_helper.get_task(task_name="Policy Test")
|
task = clearml_helper.get_task(task_name="Policy Test")
|
||||||
|
|
||||||
# task.execute_remotely(queue_name="default", exit_process=True)
|
task.execute_remotely(queue_name="default", exit_process=True)
|
||||||
|
|
||||||
configuration, model, data_processor, test_loader = load_model(args.task_id)
|
configuration, model, data_processor, test_loader = load_model(args.task_id)
|
||||||
|
|
||||||
@@ -216,92 +205,7 @@ def main():
|
|||||||
|
|
||||||
ipc = ImbalancePriceCalculator(data_path="")
|
ipc = ImbalancePriceCalculator(data_path="")
|
||||||
|
|
||||||
predicted_nrv_profits_cycles, baseline_profits_cycles, charge_thresholds, discharge_thresholds = next_day_test_set(model, data_processor, test_loader, ipc, predict_NRV)
|
predicted_nrv_profits_cycles, baseline_profits_cycles = next_day_test_set(model, data_processor, test_loader, ipc, predict_NRV)
|
||||||
# the charge_thresholds is a dictionary with date as key. The values of the dictionary is another dictionary with keys as penalties and values as the charge thresholds
|
|
||||||
# create density plot that shows a density plot of the charge thresholds for each penalty (use seaborn displot) (One plot with a different color for each penalty)
|
|
||||||
|
|
||||||
charge_thresholds_for_penalty = {}
|
|
||||||
for d in charge_thresholds.values():
|
|
||||||
for penalty, thresholds in d.items():
|
|
||||||
if penalty not in charge_thresholds_for_penalty:
|
|
||||||
charge_thresholds_for_penalty[penalty] = []
|
|
||||||
charge_thresholds_for_penalty[penalty].extend(thresholds)
|
|
||||||
|
|
||||||
discharge_thresholds_for_penalty = {}
|
|
||||||
for d in discharge_thresholds.values():
|
|
||||||
for penalty, thresholds in d.items():
|
|
||||||
if penalty not in discharge_thresholds_for_penalty:
|
|
||||||
discharge_thresholds_for_penalty[penalty] = []
|
|
||||||
discharge_thresholds_for_penalty[penalty].extend(thresholds)
|
|
||||||
|
|
||||||
def plot_threshold_distribution(thresholds: dict, title: str):
|
|
||||||
data_to_plot = []
|
|
||||||
for penalty, values in thresholds.items():
|
|
||||||
for value in values:
|
|
||||||
data_to_plot.append({'Penalty': penalty, 'Value': value.item()})
|
|
||||||
df = pd.DataFrame(data_to_plot)
|
|
||||||
palette = sns.color_palette("bright", len(thresholds.keys()))
|
|
||||||
fig = sns.displot(data=df, x="Value", hue="Penalty", kind="kde", palette=palette)
|
|
||||||
plt.title('Density of Charge Thresholds by Penalty')
|
|
||||||
plt.xlabel('Charge Threshold')
|
|
||||||
plt.ylabel('Density')
|
|
||||||
plt.legend(title='Penalty')
|
|
||||||
task.get_logger().report_matplotlib_figure(
|
|
||||||
"Policy Results",
|
|
||||||
title,
|
|
||||||
iteration=0,
|
|
||||||
figure=fig
|
|
||||||
)
|
|
||||||
plt.close()
|
|
||||||
|
|
||||||
### Plot charge thresholds distribution ###
|
|
||||||
plot_threshold_distribution(charge_thresholds_for_penalty, "Charge Thresholds")
|
|
||||||
|
|
||||||
### Plot discharge thresholds distribution ###
|
|
||||||
plot_threshold_distribution(discharge_thresholds_for_penalty, "Discharge Thresholds")
|
|
||||||
|
|
||||||
def plot_thresholds_per_day(thresholds: dict, title: str):
|
|
||||||
# plot mean charge threshold per day (per penalty (other color))
|
|
||||||
data_to_plot = []
|
|
||||||
for date, values in thresholds.items():
|
|
||||||
for penalty, value in values.items():
|
|
||||||
mean_val = value.mean().item()
|
|
||||||
std_val = value.std().item() # Calculate standard deviation
|
|
||||||
data_to_plot.append({'Date': date, 'Penalty': penalty, 'Mean': mean_val, 'StdDev': std_val})
|
|
||||||
print(f"Date: {date}, Penalty: {penalty}, Mean: {mean_val}, StdDev: {std_val}")
|
|
||||||
df = pd.DataFrame(data_to_plot)
|
|
||||||
df["Date"] = pd.to_datetime(df["Date"])
|
|
||||||
|
|
||||||
fig = px.line(
|
|
||||||
df,
|
|
||||||
x="Date",
|
|
||||||
y="Mean",
|
|
||||||
color="Penalty",
|
|
||||||
title=title,
|
|
||||||
labels={"Mean": "Threshold", "Date": "Date"},
|
|
||||||
markers=True, # Adds markers to the lines
|
|
||||||
hover_data=["Penalty"], # Adds additional hover information
|
|
||||||
)
|
|
||||||
|
|
||||||
fig.update_layout(
|
|
||||||
width=1000, # Set the width of the figure
|
|
||||||
height=600, # Set the height of the figure
|
|
||||||
title_x=0.5, # Center the title horizontally
|
|
||||||
)
|
|
||||||
|
|
||||||
task.get_logger().report_plotly(
|
|
||||||
"Thresholds per Day",
|
|
||||||
title,
|
|
||||||
iteration=0,
|
|
||||||
figure=fig
|
|
||||||
)
|
|
||||||
|
|
||||||
### Plot mean charge thresholds per day ###
|
|
||||||
plot_thresholds_per_day(charge_thresholds, "Mean Charge Thresholds per Day")
|
|
||||||
|
|
||||||
### Plot mean discharge thresholds per day ###
|
|
||||||
plot_thresholds_per_day(discharge_thresholds, "Mean Discharge Thresholds per Day")
|
|
||||||
|
|
||||||
|
|
||||||
# create dataframe with columns "name", "penalty", "profit", "cycles"
|
# create dataframe with columns "name", "penalty", "profit", "cycles"
|
||||||
df = pd.DataFrame(columns=["name", "penalty", "profit", "cycles"])
|
df = pd.DataFrame(columns=["name", "penalty", "profit", "cycles"])
|
||||||
|
|||||||
@@ -33,29 +33,67 @@ class AutoRegressiveTrainer(Trainer):
|
|||||||
self.model.output_size = 1
|
self.model.output_size = 1
|
||||||
|
|
||||||
def debug_plots(self, task, train: bool, data_loader, sample_indices, epoch):
|
def debug_plots(self, task, train: bool, data_loader, sample_indices, epoch):
|
||||||
for actual_idx, idx in sample_indices.items():
|
num_samples = len(sample_indices)
|
||||||
auto_regressive_output = self.auto_regressive(data_loader.dataset, [idx]*1000)
|
rows = num_samples # One row per sample since we only want one column
|
||||||
|
|
||||||
|
# check if self has get_plot_error
|
||||||
|
if hasattr(self, "get_plot_error"):
|
||||||
|
cols = 2
|
||||||
|
print("Using get_plot_error")
|
||||||
|
else:
|
||||||
|
cols = 1
|
||||||
|
print("Using get_plot")
|
||||||
|
|
||||||
|
fig = make_subplots(
|
||||||
|
rows=rows,
|
||||||
|
cols=cols,
|
||||||
|
subplot_titles=[f"Sample {i+1}" for i in range(num_samples)],
|
||||||
|
)
|
||||||
|
|
||||||
|
for i, idx in enumerate(sample_indices):
|
||||||
|
auto_regressive_output = self.auto_regressive(data_loader.dataset, [idx])
|
||||||
if len(auto_regressive_output) == 3:
|
if len(auto_regressive_output) == 3:
|
||||||
initial, predictions, target = auto_regressive_output
|
initial, predictions, target = auto_regressive_output
|
||||||
else:
|
else:
|
||||||
initial, _, predictions, target = auto_regressive_output
|
initial, predictions, _, target = auto_regressive_output
|
||||||
|
|
||||||
|
initial = initial.squeeze(0)
|
||||||
# keep one initial
|
predictions = predictions.squeeze(0)
|
||||||
initial = initial[0]
|
target = target.squeeze(0)
|
||||||
target = target[0]
|
|
||||||
|
|
||||||
predictions = predictions
|
sub_fig = self.get_plot(initial, target, predictions, show_legend=(i == 0))
|
||||||
|
|
||||||
fig = self.get_plot(initial, target, predictions, show_legend=(0 == 0))
|
row = i + 1
|
||||||
|
col = 1
|
||||||
|
|
||||||
task.get_logger().report_matplotlib_figure(
|
for trace in sub_fig.data:
|
||||||
title="Training" if train else "Testing",
|
fig.add_trace(trace, row=row, col=col)
|
||||||
series=f'Sample {actual_idx}',
|
|
||||||
iteration=epoch,
|
if cols == 2:
|
||||||
figure=fig,
|
error_sub_fig = self.get_plot_error(
|
||||||
|
target, predictions
|
||||||
|
)
|
||||||
|
for trace in error_sub_fig.data:
|
||||||
|
fig.add_trace(trace, row=row, col=col + 1)
|
||||||
|
|
||||||
|
loss = self.criterion(
|
||||||
|
predictions.to(self.device), target.to(self.device)
|
||||||
|
).item()
|
||||||
|
|
||||||
|
fig["layout"]["annotations"][i].update(
|
||||||
|
text=f"{self.criterion.__class__.__name__}: {loss:.6f}"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# y axis same for all plots
|
||||||
|
# fig.update_yaxes(range=[-1, 1], col=1)
|
||||||
|
|
||||||
|
fig.update_layout(height=1000 * rows)
|
||||||
|
task.get_logger().report_plotly(
|
||||||
|
title=f"{'Training' if train else 'Test'} Samples",
|
||||||
|
series="full_day",
|
||||||
|
iteration=epoch,
|
||||||
|
figure=fig,
|
||||||
|
)
|
||||||
|
|
||||||
def auto_regressive(self, data_loader, idx, sequence_length: int = 96):
|
def auto_regressive(self, data_loader, idx, sequence_length: int = 96):
|
||||||
self.model.eval()
|
self.model.eval()
|
||||||
|
|||||||
@@ -19,11 +19,7 @@ def sample_diffusion(model: DiffusionModel, n: int, inputs: torch.tensor, noise_
|
|||||||
alpha = 1. - beta
|
alpha = 1. - beta
|
||||||
alpha_hat = torch.cumprod(alpha, dim=0)
|
alpha_hat = torch.cumprod(alpha, dim=0)
|
||||||
|
|
||||||
if len(inputs.shape) == 2:
|
inputs = inputs.repeat(n, 1).to(device)
|
||||||
inputs = inputs.repeat(n, 1)
|
|
||||||
elif len(inputs.shape) == 3:
|
|
||||||
inputs = inputs.repeat(n, 1, 1)
|
|
||||||
|
|
||||||
model.eval()
|
model.eval()
|
||||||
with torch.no_grad():
|
with torch.no_grad():
|
||||||
x = torch.randn(inputs.shape[0], ts_length).to(device)
|
x = torch.randn(inputs.shape[0], ts_length).to(device)
|
||||||
@@ -40,17 +36,17 @@ def sample_diffusion(model: DiffusionModel, n: int, inputs: torch.tensor, noise_
|
|||||||
noise = torch.zeros_like(x)
|
noise = torch.zeros_like(x)
|
||||||
|
|
||||||
x = 1/torch.sqrt(_alpha) * (x-((1-_alpha) / (torch.sqrt(1 - _alpha_hat))) * predicted_noise) + torch.sqrt(_beta) * noise
|
x = 1/torch.sqrt(_alpha) * (x-((1-_alpha) / (torch.sqrt(1 - _alpha_hat))) * predicted_noise) + torch.sqrt(_beta) * noise
|
||||||
x = torch.clamp(x, -1.0, 1.0)
|
|
||||||
return x
|
return x
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class DiffusionTrainer:
|
class DiffusionTrainer:
|
||||||
def __init__(self, model: nn.Module, data_processor: DataProcessor, device: torch.device):
|
def __init__(self, model: nn.Module, data_processor: DataProcessor, device: torch.device):
|
||||||
self.model = model
|
self.model = model
|
||||||
self.device = device
|
self.device = device
|
||||||
|
|
||||||
self.noise_steps = 30
|
self.noise_steps = 1000
|
||||||
self.beta_start = 0.0001
|
self.beta_start = 1e-4
|
||||||
self.beta_end = 0.02
|
self.beta_end = 0.02
|
||||||
self.ts_length = 96
|
self.ts_length = 96
|
||||||
|
|
||||||
@@ -96,16 +92,7 @@ class DiffusionTrainer:
|
|||||||
else:
|
else:
|
||||||
loader = test_loader
|
loader = test_loader
|
||||||
|
|
||||||
# set seed
|
indices = np.random.randint(0, len(loader.dataset) - 1, size=num_samples)
|
||||||
np.random.seed(42)
|
|
||||||
|
|
||||||
actual_indices = np.random.choice(loader.dataset.full_day_valid_indices, num_samples, replace=False)
|
|
||||||
indices = {}
|
|
||||||
for i in actual_indices:
|
|
||||||
indices[i] = loader.dataset.valid_indices.index(i)
|
|
||||||
|
|
||||||
print(actual_indices)
|
|
||||||
|
|
||||||
return indices
|
return indices
|
||||||
|
|
||||||
def init_clearml_task(self, task):
|
def init_clearml_task(self, task):
|
||||||
@@ -114,12 +101,8 @@ class DiffusionTrainer:
|
|||||||
|
|
||||||
input_data = torch.randn(1024, 96).to(self.device)
|
input_data = torch.randn(1024, 96).to(self.device)
|
||||||
time_steps = torch.randn(1024).long().to(self.device)
|
time_steps = torch.randn(1024).long().to(self.device)
|
||||||
|
other_input_data = torch.randn(1024, self.model.other_inputs_dim).to(self.device)
|
||||||
|
|
||||||
if self.data_processor.lstm:
|
|
||||||
inputDim = self.data_processor.get_input_size()
|
|
||||||
other_input_data = torch.randn(1024, inputDim[1], self.model.other_inputs_dim).to(self.device)
|
|
||||||
else:
|
|
||||||
other_input_data = torch.randn(1024, self.model.other_inputs_dim).to(self.device)
|
|
||||||
task.set_configuration_object("model", str(summary(self.model, input_data=[input_data, time_steps, other_input_data])))
|
task.set_configuration_object("model", str(summary(self.model, input_data=[input_data, time_steps, other_input_data])))
|
||||||
|
|
||||||
self.data_processor = task.connect(self.data_processor, name="data_processor")
|
self.data_processor = task.connect(self.data_processor, name="data_processor")
|
||||||
@@ -137,8 +120,8 @@ class DiffusionTrainer:
|
|||||||
predict_sequence_length=self.ts_length
|
predict_sequence_length=self.ts_length
|
||||||
)
|
)
|
||||||
|
|
||||||
train_sample_indices = self.random_samples(train=True, num_samples=5)
|
train_sample_indices = self.random_samples(train=True, num_samples=10)
|
||||||
test_sample_indices = self.random_samples(train=False, num_samples=5)
|
test_sample_indices = self.random_samples(train=False, num_samples=10)
|
||||||
|
|
||||||
for epoch in range(epochs):
|
for epoch in range(epochs):
|
||||||
running_loss = 0.0
|
running_loss = 0.0
|
||||||
@@ -149,6 +132,8 @@ class DiffusionTrainer:
|
|||||||
|
|
||||||
t = self.sample_timesteps(time_series.shape[0]).to(self.device)
|
t = self.sample_timesteps(time_series.shape[0]).to(self.device)
|
||||||
x_t, noise = self.noise_time_series(time_series, t)
|
x_t, noise = self.noise_time_series(time_series, t)
|
||||||
|
x_t = x_t.unsqueeze(-1)
|
||||||
|
print(x_t.shape, t.shape, base_pattern.shape)
|
||||||
predicted_noise = self.model(x_t, t, base_pattern)
|
predicted_noise = self.model(x_t, t, base_pattern)
|
||||||
loss = criterion(predicted_noise, noise)
|
loss = criterion(predicted_noise, noise)
|
||||||
|
|
||||||
@@ -160,7 +145,7 @@ class DiffusionTrainer:
|
|||||||
|
|
||||||
running_loss /= len(train_loader.dataset)
|
running_loss /= len(train_loader.dataset)
|
||||||
|
|
||||||
if epoch % 40 == 0 and epoch != 0:
|
if epoch % 20 == 0 and epoch != 0:
|
||||||
self.test(test_loader, epoch, task)
|
self.test(test_loader, epoch, task)
|
||||||
|
|
||||||
if task:
|
if task:
|
||||||
@@ -171,7 +156,7 @@ class DiffusionTrainer:
|
|||||||
value=loss.item(),
|
value=loss.item(),
|
||||||
)
|
)
|
||||||
|
|
||||||
if epoch % 150 == 0 and epoch != 0:
|
if epoch % 100 == 0 and epoch != 0:
|
||||||
self.debug_plots(task, True, train_loader, train_sample_indices, epoch)
|
self.debug_plots(task, True, train_loader, train_sample_indices, epoch)
|
||||||
self.debug_plots(task, False, test_loader, test_sample_indices, epoch)
|
self.debug_plots(task, False, test_loader, test_sample_indices, epoch)
|
||||||
|
|
||||||
@@ -180,30 +165,26 @@ class DiffusionTrainer:
|
|||||||
|
|
||||||
|
|
||||||
def debug_plots(self, task, training: bool, data_loader, sample_indices, epoch):
|
def debug_plots(self, task, training: bool, data_loader, sample_indices, epoch):
|
||||||
for actual_idx, idx in sample_indices.items():
|
for i, idx in enumerate(sample_indices):
|
||||||
features, target, _ = data_loader.dataset[idx]
|
features, target, _ = data_loader.dataset[idx]
|
||||||
|
|
||||||
features = features.to(self.device)
|
features = features.to(self.device)
|
||||||
features = features.unsqueeze(0)
|
|
||||||
|
|
||||||
self.model.eval()
|
self.model.eval()
|
||||||
with torch.no_grad():
|
with torch.no_grad():
|
||||||
samples = self.sample(self.model, 100, features).cpu().numpy()
|
samples = self.sample(self.model, 100, features).cpu().numpy()
|
||||||
samples = self.data_processor.inverse_transform(samples)
|
|
||||||
target = self.data_processor.inverse_transform(target)
|
|
||||||
|
|
||||||
ci_99_upper = np.quantile(samples, 0.995, axis=0)
|
ci_99_upper = np.quantile(samples, 0.99, axis=0)
|
||||||
ci_99_lower = np.quantile(samples, 0.005, axis=0)
|
ci_99_lower = np.quantile(samples, 0.01, axis=0)
|
||||||
|
|
||||||
ci_95_upper = np.quantile(samples, 0.975, axis=0)
|
ci_95_upper = np.quantile(samples, 0.95, axis=0)
|
||||||
ci_95_lower = np.quantile(samples, 0.025, axis=0)
|
ci_95_lower = np.quantile(samples, 0.05, axis=0)
|
||||||
|
|
||||||
ci_90_upper = np.quantile(samples, 0.95, axis=0)
|
ci_90_upper = np.quantile(samples, 0.9, axis=0)
|
||||||
ci_90_lower = np.quantile(samples, 0.05, axis=0)
|
ci_90_lower = np.quantile(samples, 0.1, axis=0)
|
||||||
|
|
||||||
ci_50_lower = np.quantile(samples, 0.25, axis=0)
|
|
||||||
ci_50_upper = np.quantile(samples, 0.75, axis=0)
|
|
||||||
|
|
||||||
|
ci_50_upper = np.quantile(samples, 0.5, axis=0)
|
||||||
|
ci_50_lower = np.quantile(samples, 0.5, axis=0)
|
||||||
|
|
||||||
sns.set_theme()
|
sns.set_theme()
|
||||||
time_steps = np.arange(0, 96)
|
time_steps = np.arange(0, 96)
|
||||||
@@ -229,7 +210,7 @@ class DiffusionTrainer:
|
|||||||
|
|
||||||
task.get_logger().report_matplotlib_figure(
|
task.get_logger().report_matplotlib_figure(
|
||||||
title="Training" if training else "Testing",
|
title="Training" if training else "Testing",
|
||||||
series=f'Sample {actual_idx}',
|
series=f'Sample {i}',
|
||||||
iteration=epoch,
|
iteration=epoch,
|
||||||
figure=fig,
|
figure=fig,
|
||||||
)
|
)
|
||||||
@@ -243,7 +224,7 @@ class DiffusionTrainer:
|
|||||||
|
|
||||||
number_of_samples = 100
|
number_of_samples = 100
|
||||||
sample = self.sample(self.model, number_of_samples, inputs)
|
sample = self.sample(self.model, number_of_samples, inputs)
|
||||||
|
|
||||||
# reduce samples from (batch_size*number_of_samples, time_steps) to (batch_size, number_of_samples, time_steps)
|
# reduce samples from (batch_size*number_of_samples, time_steps) to (batch_size, number_of_samples, time_steps)
|
||||||
samples_batched = sample.reshape(inputs.shape[0], number_of_samples, 96)
|
samples_batched = sample.reshape(inputs.shape[0], number_of_samples, 96)
|
||||||
|
|
||||||
|
|||||||
@@ -10,9 +10,7 @@ import plotly.graph_objects as go
|
|||||||
import numpy as np
|
import numpy as np
|
||||||
import matplotlib.pyplot as plt
|
import matplotlib.pyplot as plt
|
||||||
from scipy.interpolate import CubicSpline
|
from scipy.interpolate import CubicSpline
|
||||||
import matplotlib.pyplot as plt
|
|
||||||
import seaborn as sns
|
|
||||||
import matplotlib.patches as mpatches
|
|
||||||
|
|
||||||
def sample_from_dist(quantiles, preds):
|
def sample_from_dist(quantiles, preds):
|
||||||
if isinstance(preds, torch.Tensor):
|
if isinstance(preds, torch.Tensor):
|
||||||
@@ -263,35 +261,35 @@ class AutoRegressiveQuantileTrainer(AutoRegressiveTrainer):
|
|||||||
name="test_CRPS_from_samples_transformed", value=np.mean(crps_from_samples_metric)
|
name="test_CRPS_from_samples_transformed", value=np.mean(crps_from_samples_metric)
|
||||||
)
|
)
|
||||||
|
|
||||||
# def get_plot_error(
|
def get_plot_error(
|
||||||
# self,
|
self,
|
||||||
# next_day,
|
next_day,
|
||||||
# predictions,
|
predictions,
|
||||||
# ):
|
):
|
||||||
# metric = PinballLoss(quantiles=self.quantiles)
|
metric = PinballLoss(quantiles=self.quantiles)
|
||||||
# fig = go.Figure()
|
fig = go.Figure()
|
||||||
|
|
||||||
# next_day_np = next_day.view(-1).cpu().numpy()
|
next_day_np = next_day.view(-1).cpu().numpy()
|
||||||
# predictions_np = predictions.cpu().numpy()
|
predictions_np = predictions.cpu().numpy()
|
||||||
|
|
||||||
# if True:
|
if True:
|
||||||
# next_day_np = self.data_processor.inverse_transform(next_day_np)
|
next_day_np = self.data_processor.inverse_transform(next_day_np)
|
||||||
# predictions_np = self.data_processor.inverse_transform(predictions_np)
|
predictions_np = self.data_processor.inverse_transform(predictions_np)
|
||||||
|
|
||||||
# # for each time step, calculate the error using the metric
|
# for each time step, calculate the error using the metric
|
||||||
# errors = []
|
errors = []
|
||||||
# for i in range(96):
|
for i in range(96):
|
||||||
|
|
||||||
# target_tensor = torch.tensor(next_day_np[i]).unsqueeze(0)
|
target_tensor = torch.tensor(next_day_np[i]).unsqueeze(0)
|
||||||
# prediction_tensor = torch.tensor(predictions_np[i]).unsqueeze(0)
|
prediction_tensor = torch.tensor(predictions_np[i]).unsqueeze(0)
|
||||||
|
|
||||||
# errors.append(metric(prediction_tensor, target_tensor))
|
errors.append(metric(prediction_tensor, target_tensor))
|
||||||
|
|
||||||
# # plot the error
|
# plot the error
|
||||||
# fig.add_trace(go.Scatter(x=np.arange(96), y=errors, name=metric.__class__.__name__))
|
fig.add_trace(go.Scatter(x=np.arange(96), y=errors, name=metric.__class__.__name__))
|
||||||
# fig.update_layout(title=f"Error of {metric.__class__.__name__} for each time step")
|
fig.update_layout(title=f"Error of {metric.__class__.__name__} for each time step")
|
||||||
|
|
||||||
# return fig
|
return fig
|
||||||
|
|
||||||
|
|
||||||
def get_plot(
|
def get_plot(
|
||||||
@@ -314,59 +312,26 @@ class AutoRegressiveQuantileTrainer(AutoRegressiveTrainer):
|
|||||||
next_day_np = self.data_processor.inverse_transform(next_day_np)
|
next_day_np = self.data_processor.inverse_transform(next_day_np)
|
||||||
predictions_np = self.data_processor.inverse_transform(predictions_np)
|
predictions_np = self.data_processor.inverse_transform(predictions_np)
|
||||||
|
|
||||||
ci_99_upper = np.quantile(predictions_np, 0.995, axis=0)
|
|
||||||
ci_99_lower = np.quantile(predictions_np, 0.005, axis=0)
|
|
||||||
|
|
||||||
ci_95_upper = np.quantile(predictions_np, 0.975, axis=0)
|
|
||||||
ci_95_lower = np.quantile(predictions_np, 0.025, axis=0)
|
|
||||||
|
|
||||||
ci_90_upper = np.quantile(predictions_np, 0.95, axis=0)
|
|
||||||
ci_90_lower = np.quantile(predictions_np, 0.05, axis=0)
|
|
||||||
|
|
||||||
ci_50_lower = np.quantile(predictions_np, 0.25, axis=0)
|
|
||||||
ci_50_upper = np.quantile(predictions_np, 0.75, axis=0)
|
|
||||||
|
|
||||||
# Add traces for current and next day
|
# Add traces for current and next day
|
||||||
# fig.add_trace(go.Scatter(x=np.arange(96), y=current_day_np, name="Current Day"))
|
fig.add_trace(go.Scatter(x=np.arange(96), y=current_day_np, name="Current Day"))
|
||||||
# fig.add_trace(go.Scatter(x=96 + np.arange(96), y=next_day_np, name="Next Day"))
|
fig.add_trace(go.Scatter(x=96 + np.arange(96), y=next_day_np, name="Next Day"))
|
||||||
|
|
||||||
# for i, q in enumerate(self.quantiles):
|
for i, q in enumerate(self.quantiles):
|
||||||
# fig.add_trace(
|
fig.add_trace(
|
||||||
# go.Scatter(
|
go.Scatter(
|
||||||
# x=96 + np.arange(96),
|
x=96 + np.arange(96),
|
||||||
# y=predictions_np[:, i],
|
y=predictions_np[:, i],
|
||||||
# name=f"Prediction (Q={q})",
|
name=f"Prediction (Q={q})",
|
||||||
# line=dict(dash="dash"),
|
line=dict(dash="dash"),
|
||||||
# )
|
)
|
||||||
# )
|
)
|
||||||
|
|
||||||
# # Update the layout
|
# Update the layout
|
||||||
# fig.update_layout(
|
fig.update_layout(
|
||||||
# title="Predictions and Quantiles of the Linear Model",
|
title="Predictions and Quantiles of the Linear Model",
|
||||||
# showlegend=show_legend,
|
showlegend=show_legend,
|
||||||
# )
|
)
|
||||||
|
|
||||||
sns.set_theme()
|
|
||||||
time_steps = np.arange(0, 96)
|
|
||||||
|
|
||||||
fig, ax = plt.subplots(figsize=(20, 10))
|
|
||||||
ax.plot(time_steps, predictions_np.mean(axis=0), label="Mean of NRV samples", linewidth=3)
|
|
||||||
# ax.fill_between(time_steps, ci_lower, ci_upper, color='b', alpha=0.2, label='Full Interval')
|
|
||||||
|
|
||||||
ax.fill_between(time_steps, ci_99_lower, ci_99_upper, color='b', alpha=0.2, label='99% Interval')
|
|
||||||
ax.fill_between(time_steps, ci_95_lower, ci_95_upper, color='b', alpha=0.2, label='95% Interval')
|
|
||||||
ax.fill_between(time_steps, ci_90_lower, ci_90_upper, color='b', alpha=0.2, label='90% Interval')
|
|
||||||
ax.fill_between(time_steps, ci_50_lower, ci_50_upper, color='b', alpha=0.2, label='50% Interval')
|
|
||||||
|
|
||||||
ax.plot(next_day_np, label="Real NRV", linewidth=3)
|
|
||||||
# full_interval_patch = mpatches.Patch(color='b', alpha=0.2, label='Full Interval')
|
|
||||||
ci_99_patch = mpatches.Patch(color='b', alpha=0.3, label='99% Interval')
|
|
||||||
ci_95_patch = mpatches.Patch(color='b', alpha=0.4, label='95% Interval')
|
|
||||||
ci_90_patch = mpatches.Patch(color='b', alpha=0.5, label='90% Interval')
|
|
||||||
ci_50_patch = mpatches.Patch(color='b', alpha=0.6, label='50% Interval')
|
|
||||||
|
|
||||||
|
|
||||||
ax.legend(handles=[ci_99_patch, ci_95_patch, ci_90_patch, ci_50_patch, ax.lines[0], ax.lines[1]])
|
|
||||||
return fig
|
return fig
|
||||||
|
|
||||||
def auto_regressive(self, dataset, idx_batch, sequence_length: int = 96):
|
def auto_regressive(self, dataset, idx_batch, sequence_length: int = 96):
|
||||||
|
|||||||
@@ -86,7 +86,7 @@ class Trainer:
|
|||||||
|
|
||||||
def random_samples(self, train: bool = True, num_samples: int = 10):
|
def random_samples(self, train: bool = True, num_samples: int = 10):
|
||||||
train_loader, test_loader = self.data_processor.get_dataloaders(
|
train_loader, test_loader = self.data_processor.get_dataloaders(
|
||||||
predict_sequence_length=96
|
predict_sequence_length=self.model.output_size
|
||||||
)
|
)
|
||||||
|
|
||||||
if train:
|
if train:
|
||||||
@@ -94,14 +94,7 @@ class Trainer:
|
|||||||
else:
|
else:
|
||||||
loader = test_loader
|
loader = test_loader
|
||||||
|
|
||||||
np.random.seed(42)
|
indices = np.random.randint(0, len(loader.dataset) - 1, size=num_samples)
|
||||||
actual_indices = np.random.choice(loader.dataset.full_day_valid_indices, num_samples, replace=False)
|
|
||||||
indices = {}
|
|
||||||
for i in actual_indices:
|
|
||||||
indices[i] = loader.dataset.valid_indices.index(i)
|
|
||||||
|
|
||||||
print(actual_indices)
|
|
||||||
|
|
||||||
return indices
|
return indices
|
||||||
|
|
||||||
def train(self, epochs: int, remotely: bool = False, task: Task = None):
|
def train(self, epochs: int, remotely: bool = False, task: Task = None):
|
||||||
@@ -114,8 +107,8 @@ class Trainer:
|
|||||||
predict_sequence_length=self.model.output_size
|
predict_sequence_length=self.model.output_size
|
||||||
)
|
)
|
||||||
|
|
||||||
train_samples = self.random_samples(train=True, num_samples=5)
|
train_samples = self.random_samples(train=True)
|
||||||
test_samples = self.random_samples(train=False, num_samples=5)
|
test_samples = self.random_samples(train=False)
|
||||||
|
|
||||||
self.init_clearml_task(task)
|
self.init_clearml_task(task)
|
||||||
|
|
||||||
|
|||||||
@@ -10,7 +10,7 @@ from torch.nn import MSELoss, L1Loss
|
|||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
import torch.nn as nn
|
import torch.nn as nn
|
||||||
from src.models.time_embedding_layer import TimeEmbedding
|
from src.models.time_embedding_layer import TimeEmbedding
|
||||||
from src.models.diffusion_model import GRUDiffusionModel, SimpleDiffusionModel
|
from src.models.diffusion_model import SimpleDiffusionModel
|
||||||
from src.trainers.diffusion_trainer import DiffusionTrainer
|
from src.trainers.diffusion_trainer import DiffusionTrainer
|
||||||
|
|
||||||
|
|
||||||
@@ -38,24 +38,22 @@ data_config.NOMINAL_NET_POSITION = True
|
|||||||
data_config = task.connect(data_config, name="data_features")
|
data_config = task.connect(data_config, name="data_features")
|
||||||
|
|
||||||
data_processor = DataProcessor(data_config, path="", lstm=False)
|
data_processor = DataProcessor(data_config, path="", lstm=False)
|
||||||
data_processor.set_batch_size(64)
|
data_processor.set_batch_size(8192)
|
||||||
data_processor.set_full_day_skip(True)
|
data_processor.set_full_day_skip(True)
|
||||||
|
|
||||||
inputDim = data_processor.get_input_size()
|
inputDim = data_processor.get_input_size()
|
||||||
print("Input dim: ", inputDim)
|
|
||||||
|
|
||||||
model_parameters = {
|
model_parameters = {
|
||||||
"epochs": 5000,
|
"epochs": 5000,
|
||||||
"learning_rate": 0.0001,
|
"learning_rate": 0.0001,
|
||||||
"hidden_sizes": [128, 128],
|
"hidden_sizes": [512, 512, 512],
|
||||||
"time_dim": 8,
|
"time_dim": 64,
|
||||||
}
|
}
|
||||||
|
|
||||||
model_parameters = task.connect(model_parameters, name="model_parameters")
|
model_parameters = task.connect(model_parameters, name="model_parameters")
|
||||||
|
|
||||||
#### Model ####
|
#### Model ####
|
||||||
model = SimpleDiffusionModel(96, model_parameters["hidden_sizes"], other_inputs_dim=inputDim[1], time_dim=model_parameters["time_dim"])
|
model = SimpleDiffusionModel(96, model_parameters["hidden_sizes"], other_inputs_dim=inputDim[1], time_dim=model_parameters["time_dim"])
|
||||||
# model = GRUDiffusionModel(96, model_parameters["hidden_sizes"], other_inputs_dim=inputDim[2], time_dim=model_parameters["time_dim"], gru_hidden_size=128)
|
|
||||||
|
|
||||||
print("Starting training ...")
|
print("Starting training ...")
|
||||||
|
|
||||||
|
|||||||
@@ -10,6 +10,6 @@ class ClearMLHelper:
|
|||||||
Task.ignore_requirements("torchvision")
|
Task.ignore_requirements("torchvision")
|
||||||
Task.ignore_requirements("tensorboard")
|
Task.ignore_requirements("tensorboard")
|
||||||
task = Task.init(project_name=self.project_name, task_name=task_name, continue_last_task=False)
|
task = Task.init(project_name=self.project_name, task_name=task_name, continue_last_task=False)
|
||||||
task.set_base_docker(f"docker.io/clearml/pytorch-cuda-gcc:2.0.0-cuda11.7-cudnn8-runtime")
|
task.set_base_docker(f"docker.io/clearml/pytorch-cuda-gcc:2.0.0-cuda11.7-cudnn8-runtime --env GIT_SSL_NO_VERIFY=true --env CLEARML_AGENT_GIT_USER=VictorMylle --env CLEARML_AGENT_GIT_PASS=Voetballer1" )
|
||||||
task.set_packages("requirements.txt")
|
task.set_packages("requirements.txt")
|
||||||
return task
|
return task
|
||||||
Reference in New Issue
Block a user