Rewrote the NRVDataset to be cleaner
This commit is contained in:
@@ -1,6 +1,7 @@
|
||||
import torch
|
||||
from torch.utils.data import Dataset, DataLoader
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
|
||||
|
||||
class NrvDataset(Dataset):
|
||||
@@ -19,37 +20,44 @@ class NrvDataset(Dataset):
|
||||
# reset dataframe index
|
||||
self.dataframe.reset_index(drop=True, inplace=True)
|
||||
|
||||
self.nrv = torch.tensor(dataframe["nrv"].to_numpy(), dtype=torch.float32)
|
||||
self.load_forecast = torch.tensor(
|
||||
dataframe["load_forecast"].to_numpy(), dtype=torch.float32
|
||||
)
|
||||
self.total_load = torch.tensor(
|
||||
dataframe["total_load"].to_numpy(), dtype=torch.float32
|
||||
)
|
||||
self.pv_gen_forecast = torch.tensor(
|
||||
dataframe["pv_forecast"].to_numpy(), dtype=torch.float32
|
||||
)
|
||||
self.wind_gen_forecast = torch.tensor(
|
||||
dataframe["wind_forecast"].to_numpy(), dtype=torch.float32
|
||||
)
|
||||
|
||||
self.quarter = torch.tensor(
|
||||
dataframe["quarter"].to_numpy(), dtype=torch.float32
|
||||
)
|
||||
|
||||
self.day_of_week = torch.tensor(
|
||||
dataframe["day_of_week"].to_numpy(), dtype=torch.float32
|
||||
)
|
||||
|
||||
self.sequence_length = sequence_length
|
||||
self.predict_sequence_length = predict_sequence_length
|
||||
|
||||
self.samples_to_skip = self.skip_samples()
|
||||
total_indices = set(
|
||||
range(len(self.nrv) - self.sequence_length - self.predict_sequence_length)
|
||||
range(len(self.dataframe) - self.sequence_length - self.predict_sequence_length)
|
||||
)
|
||||
self.valid_indices = sorted(list(total_indices - set(self.samples_to_skip)))
|
||||
|
||||
self.history_features = []
|
||||
if self.data_config.LOAD_HISTORY:
|
||||
self.history_features.append("total_load")
|
||||
if self.data_config.PV_HISTORY:
|
||||
self.history_features.append("pv_gen_forecast")
|
||||
if self.data_config.WIND_HISTORY:
|
||||
self.history_features.append("wind_gen_forecast")
|
||||
|
||||
self.forecast_features = []
|
||||
if self.data_config.LOAD_FORECAST:
|
||||
self.forecast_features.append("load_forecast")
|
||||
if self.data_config.PV_FORECAST:
|
||||
self.forecast_features.append("pv_gen_forecast")
|
||||
if self.data_config.WIND_FORECAST:
|
||||
self.forecast_features.append("wind_gen_forecast")
|
||||
|
||||
# add time feature to dataframe
|
||||
time_feature = np.array([0] * len(self.dataframe))
|
||||
if self.data_config.QUARTER:
|
||||
time_feature += self.dataframe["quarter"]
|
||||
|
||||
if self.data_config.DAY_OF_WEEK:
|
||||
d_w = self.dataframe["day_of_week"]
|
||||
if self.data_config.QUARTER:
|
||||
d_w *= 96
|
||||
time_feature += d_w
|
||||
|
||||
self.dataframe["time_feature"] = time_feature
|
||||
|
||||
def skip_samples(self):
|
||||
nan_rows = self.dataframe[self.dataframe.isnull().any(axis=1)]
|
||||
nan_indices = nan_rows.index
|
||||
@@ -80,88 +88,41 @@ class NrvDataset(Dataset):
|
||||
def __len__(self):
|
||||
return len(self.valid_indices)
|
||||
|
||||
def _get__all_data(self, idx: int):
|
||||
history_df = self.dataframe.iloc[idx : idx + self.sequence_length]
|
||||
forecast_df = self.dataframe.iloc[
|
||||
idx + self.sequence_length : idx + self.sequence_length + self.predict_sequence_length
|
||||
]
|
||||
return history_df, forecast_df
|
||||
|
||||
def __getitem__(self, idx):
|
||||
actual_idx = self.valid_indices[idx]
|
||||
features = []
|
||||
|
||||
history_df, forecast_df = self._get__all_data(actual_idx)
|
||||
|
||||
# get nrv history features
|
||||
nrv_features = torch.tensor(history_df[["nrv"]].values).reshape(-1)
|
||||
|
||||
if self.data_config.NRV_HISTORY:
|
||||
nrv = self.nrv[actual_idx : actual_idx + self.sequence_length]
|
||||
features.append(nrv.view(-1))
|
||||
# get history featues
|
||||
history_features = history_df[self.history_features].values
|
||||
|
||||
if self.data_config.LOAD_HISTORY:
|
||||
load_history = self.total_load[
|
||||
actual_idx : actual_idx + self.sequence_length
|
||||
]
|
||||
features.append(load_history.view(-1))
|
||||
# combine the history features to one tensor (first one feature, then the next one, etc.)
|
||||
history_features = torch.tensor(history_features).reshape(-1)
|
||||
|
||||
if self.data_config.PV_HISTORY:
|
||||
pv_history = self.pv_gen_forecast[
|
||||
actual_idx : actual_idx + self.sequence_length
|
||||
]
|
||||
features.append(pv_history.view(-1))
|
||||
# get forecast features
|
||||
forecast_features = forecast_df[self.forecast_features].values
|
||||
forecast_features = torch.tensor(forecast_features).view(-1)
|
||||
|
||||
if self.data_config.WIND_HISTORY:
|
||||
wind_history = self.wind_gen_forecast[
|
||||
actual_idx : actual_idx + self.sequence_length
|
||||
]
|
||||
features.append(wind_history.view(-1))
|
||||
# add last time feature of the history
|
||||
time_feature = history_df["time_feature"].iloc[-1]
|
||||
|
||||
if self.data_config.LOAD_FORECAST:
|
||||
load_forecast = self.load_forecast[
|
||||
actual_idx
|
||||
+ self.sequence_length : actual_idx
|
||||
+ self.sequence_length
|
||||
+ self.predict_sequence_length
|
||||
]
|
||||
features.append(load_forecast.view(-1))
|
||||
|
||||
if self.data_config.PV_FORECAST:
|
||||
pv_forecast = self.pv_gen_forecast[
|
||||
actual_idx
|
||||
+ self.sequence_length : actual_idx
|
||||
+ self.sequence_length
|
||||
+ self.predict_sequence_length
|
||||
]
|
||||
features.append(pv_forecast.view(-1))
|
||||
|
||||
if self.data_config.WIND_FORECAST:
|
||||
wind_forecast = self.wind_gen_forecast[
|
||||
actual_idx
|
||||
+ self.sequence_length : actual_idx
|
||||
+ self.sequence_length
|
||||
+ self.predict_sequence_length
|
||||
]
|
||||
features.append(wind_forecast.view(-1))
|
||||
|
||||
### Time Features ###
|
||||
time_feature = 0
|
||||
if self.data_config.QUARTER:
|
||||
time_feature += self.quarter[actual_idx].item()
|
||||
|
||||
if self.data_config.DAY_OF_WEEK:
|
||||
d_w = self.day_of_week[actual_idx].item()
|
||||
if self.data_config.QUARTER:
|
||||
d_w *= 96
|
||||
time_feature += d_w
|
||||
|
||||
if time_feature is not None:
|
||||
features.append(torch.tensor([time_feature]))
|
||||
|
||||
if not features:
|
||||
raise ValueError(
|
||||
"No features are configured to be included in the dataset."
|
||||
)
|
||||
|
||||
# Concatenate along dimension 0 to create a one-dimensional feature vector
|
||||
all_features = torch.cat(features, dim=0)
|
||||
## all features
|
||||
all_features = torch.cat(
|
||||
[nrv_features, history_features, forecast_features, torch.tensor([time_feature])], dim=0
|
||||
)
|
||||
|
||||
# Target sequence, flattened if necessary
|
||||
nrv_target = self.nrv[
|
||||
actual_idx
|
||||
+ self.sequence_length : actual_idx
|
||||
+ self.sequence_length
|
||||
+ self.predict_sequence_length
|
||||
].view(-1)
|
||||
nrv_target = forecast_df["nrv"].values
|
||||
|
||||
# check if nan values are present
|
||||
if torch.isnan(all_features).any():
|
||||
@@ -169,78 +130,21 @@ class NrvDataset(Dataset):
|
||||
print(f"Actual index: {actual_idx}")
|
||||
raise ValueError("There are nan values in the features.")
|
||||
|
||||
# all features and target to float
|
||||
all_features = all_features.float()
|
||||
|
||||
# to tensors
|
||||
nrv_target = torch.tensor(nrv_target).float()
|
||||
return all_features, nrv_target, idx
|
||||
|
||||
def random_day_autoregressive(self, idx: int):
|
||||
idx = self.valid_indices[idx]
|
||||
features = []
|
||||
all_features, nrv_target, _ = self.__getitem__(idx)
|
||||
|
||||
# we already have the NRV history with the newly predicted values, so we don't need to include the last 96 values
|
||||
if self.data_config.LOAD_HISTORY:
|
||||
load_history = self.total_load[idx : idx + self.sequence_length]
|
||||
features.append(load_history.view(-1))
|
||||
# remove the first 96 values of the features (the nrv history)
|
||||
all_features = all_features[self.sequence_length :]
|
||||
|
||||
if self.data_config.PV_HISTORY:
|
||||
pv_history = self.pv_gen_forecast[idx : idx + self.sequence_length]
|
||||
features.append(pv_history.view(-1))
|
||||
return all_features, nrv_target
|
||||
|
||||
if self.data_config.WIND_HISTORY:
|
||||
wind_history = self.wind_gen_forecast[idx : idx + self.sequence_length]
|
||||
features.append(wind_history.view(-1))
|
||||
|
||||
if self.data_config.LOAD_FORECAST:
|
||||
load_forecast = self.load_forecast[
|
||||
idx
|
||||
+ self.sequence_length : idx
|
||||
+ self.sequence_length
|
||||
+ self.predict_sequence_length
|
||||
]
|
||||
features.append(load_forecast.view(-1))
|
||||
|
||||
if self.data_config.PV_FORECAST:
|
||||
pv_forecast = self.pv_gen_forecast[
|
||||
idx
|
||||
+ self.sequence_length : idx
|
||||
+ self.sequence_length
|
||||
+ self.predict_sequence_length
|
||||
]
|
||||
features.append(pv_forecast.view(-1))
|
||||
|
||||
if self.data_config.WIND_FORECAST:
|
||||
wind_forecast = self.wind_gen_forecast[
|
||||
idx
|
||||
+ self.sequence_length : idx
|
||||
+ self.sequence_length
|
||||
+ self.predict_sequence_length
|
||||
]
|
||||
features.append(wind_forecast.view(-1))
|
||||
|
||||
### Time Features ###
|
||||
time_feature = 0
|
||||
if self.data_config.QUARTER:
|
||||
time_feature += self.quarter[idx]
|
||||
|
||||
if self.data_config.DAY_OF_WEEK:
|
||||
d_w = self.day_of_week[idx].item()
|
||||
if self.data_config.QUARTER:
|
||||
d_w *= 96
|
||||
time_feature += d_w
|
||||
|
||||
if time_feature is not None:
|
||||
features.append(torch.tensor([time_feature]))
|
||||
|
||||
target = self.nrv[
|
||||
idx
|
||||
+ self.sequence_length : idx
|
||||
+ self.sequence_length
|
||||
+ self.predict_sequence_length
|
||||
]
|
||||
|
||||
if len(features) == 0:
|
||||
return None, target
|
||||
|
||||
all_features = torch.cat(features, dim=0)
|
||||
return all_features, target
|
||||
|
||||
def get_batch(self, idx: list):
|
||||
features = []
|
||||
|
||||
@@ -1,30 +1,28 @@
|
||||
import torch
|
||||
from torch import nn
|
||||
import torch
|
||||
from properscoring import crps_ensemble
|
||||
|
||||
|
||||
class CRPSLoss(nn.Module):
|
||||
def __init__(self, quantiles):
|
||||
def __init__(self):
|
||||
super(CRPSLoss, self).__init__()
|
||||
|
||||
if not torch.is_tensor(quantiles):
|
||||
quantiles = torch.tensor(quantiles, dtype=torch.float32)
|
||||
self.quantiles_tensor = quantiles
|
||||
|
||||
def forward(self, preds, target):
|
||||
# if tensor, to cpu
|
||||
if isinstance(preds, torch.Tensor):
|
||||
preds = preds.detach().cpu()
|
||||
|
||||
if isinstance(target, torch.Tensor):
|
||||
target = target.detach().cpu()
|
||||
|
||||
# target squeeze -1
|
||||
target = target.squeeze(-1)
|
||||
|
||||
# preds shape: [batch_size, num_quantiles]
|
||||
|
||||
# unsqueeze target
|
||||
# target = target.unsqueeze(-1)
|
||||
|
||||
mask = (preds > target).float()
|
||||
self.quantiles_tensor = self.quantiles_tensor.to(preds.device)
|
||||
test = self.quantiles_tensor - mask
|
||||
# square them
|
||||
test = test * test
|
||||
crps = torch.trapz(test, x=preds)
|
||||
scores = crps_ensemble(target, preds)
|
||||
|
||||
# mean over batch
|
||||
crps = torch.mean(crps)
|
||||
crps = scores.mean()
|
||||
|
||||
return crps
|
||||
|
||||
@@ -16,7 +16,7 @@ from src.models.time_embedding_layer import TimeEmbedding
|
||||
|
||||
#### ClearML ####
|
||||
clearml_helper = ClearMLHelper(project_name="Thesis/NrvForecast")
|
||||
task = clearml_helper.get_task(task_name="None")
|
||||
task = clearml_helper.get_task(task_name="Autoregressive Quantile Regression")
|
||||
|
||||
|
||||
#### Data Processor ####
|
||||
@@ -63,8 +63,8 @@ trainer = AutoRegressiveQuantileTrainer(
|
||||
debug=True,
|
||||
)
|
||||
trainer.add_metrics_to_track(
|
||||
[PinballLoss(quantiles), MSELoss(), L1Loss(), CRPSLoss(quantiles)]
|
||||
[PinballLoss(quantiles), MSELoss(), L1Loss(), CRPSLoss()]
|
||||
)
|
||||
trainer.early_stopping(patience=10)
|
||||
trainer.plot_every(5)
|
||||
trainer.train(task=task, epochs=epochs, remotely=True)
|
||||
trainer.train(task=task, epochs=epochs, remotely=False)
|
||||
Reference in New Issue
Block a user