Rewrote the NRVDataset to be cleaner

This commit is contained in:
Victor Mylle
2023-11-28 15:35:35 +00:00
parent f9e8f9e69f
commit ffa19592f9
3 changed files with 83 additions and 181 deletions

View File

@@ -1,6 +1,7 @@
import torch
from torch.utils.data import Dataset, DataLoader
import pandas as pd
import numpy as np
class NrvDataset(Dataset):
@@ -19,37 +20,44 @@ class NrvDataset(Dataset):
# reset dataframe index
self.dataframe.reset_index(drop=True, inplace=True)
self.nrv = torch.tensor(dataframe["nrv"].to_numpy(), dtype=torch.float32)
self.load_forecast = torch.tensor(
dataframe["load_forecast"].to_numpy(), dtype=torch.float32
)
self.total_load = torch.tensor(
dataframe["total_load"].to_numpy(), dtype=torch.float32
)
self.pv_gen_forecast = torch.tensor(
dataframe["pv_forecast"].to_numpy(), dtype=torch.float32
)
self.wind_gen_forecast = torch.tensor(
dataframe["wind_forecast"].to_numpy(), dtype=torch.float32
)
self.quarter = torch.tensor(
dataframe["quarter"].to_numpy(), dtype=torch.float32
)
self.day_of_week = torch.tensor(
dataframe["day_of_week"].to_numpy(), dtype=torch.float32
)
self.sequence_length = sequence_length
self.predict_sequence_length = predict_sequence_length
self.samples_to_skip = self.skip_samples()
total_indices = set(
range(len(self.nrv) - self.sequence_length - self.predict_sequence_length)
range(len(self.dataframe) - self.sequence_length - self.predict_sequence_length)
)
self.valid_indices = sorted(list(total_indices - set(self.samples_to_skip)))
self.history_features = []
if self.data_config.LOAD_HISTORY:
self.history_features.append("total_load")
if self.data_config.PV_HISTORY:
self.history_features.append("pv_gen_forecast")
if self.data_config.WIND_HISTORY:
self.history_features.append("wind_gen_forecast")
self.forecast_features = []
if self.data_config.LOAD_FORECAST:
self.forecast_features.append("load_forecast")
if self.data_config.PV_FORECAST:
self.forecast_features.append("pv_gen_forecast")
if self.data_config.WIND_FORECAST:
self.forecast_features.append("wind_gen_forecast")
# add time feature to dataframe
time_feature = np.array([0] * len(self.dataframe))
if self.data_config.QUARTER:
time_feature += self.dataframe["quarter"]
if self.data_config.DAY_OF_WEEK:
d_w = self.dataframe["day_of_week"]
if self.data_config.QUARTER:
d_w *= 96
time_feature += d_w
self.dataframe["time_feature"] = time_feature
def skip_samples(self):
nan_rows = self.dataframe[self.dataframe.isnull().any(axis=1)]
nan_indices = nan_rows.index
@@ -80,88 +88,41 @@ class NrvDataset(Dataset):
def __len__(self):
return len(self.valid_indices)
def _get__all_data(self, idx: int):
history_df = self.dataframe.iloc[idx : idx + self.sequence_length]
forecast_df = self.dataframe.iloc[
idx + self.sequence_length : idx + self.sequence_length + self.predict_sequence_length
]
return history_df, forecast_df
def __getitem__(self, idx):
actual_idx = self.valid_indices[idx]
features = []
history_df, forecast_df = self._get__all_data(actual_idx)
# get nrv history features
nrv_features = torch.tensor(history_df[["nrv"]].values).reshape(-1)
if self.data_config.NRV_HISTORY:
nrv = self.nrv[actual_idx : actual_idx + self.sequence_length]
features.append(nrv.view(-1))
# get history featues
history_features = history_df[self.history_features].values
if self.data_config.LOAD_HISTORY:
load_history = self.total_load[
actual_idx : actual_idx + self.sequence_length
]
features.append(load_history.view(-1))
# combine the history features to one tensor (first one feature, then the next one, etc.)
history_features = torch.tensor(history_features).reshape(-1)
if self.data_config.PV_HISTORY:
pv_history = self.pv_gen_forecast[
actual_idx : actual_idx + self.sequence_length
]
features.append(pv_history.view(-1))
# get forecast features
forecast_features = forecast_df[self.forecast_features].values
forecast_features = torch.tensor(forecast_features).view(-1)
if self.data_config.WIND_HISTORY:
wind_history = self.wind_gen_forecast[
actual_idx : actual_idx + self.sequence_length
]
features.append(wind_history.view(-1))
# add last time feature of the history
time_feature = history_df["time_feature"].iloc[-1]
if self.data_config.LOAD_FORECAST:
load_forecast = self.load_forecast[
actual_idx
+ self.sequence_length : actual_idx
+ self.sequence_length
+ self.predict_sequence_length
]
features.append(load_forecast.view(-1))
if self.data_config.PV_FORECAST:
pv_forecast = self.pv_gen_forecast[
actual_idx
+ self.sequence_length : actual_idx
+ self.sequence_length
+ self.predict_sequence_length
]
features.append(pv_forecast.view(-1))
if self.data_config.WIND_FORECAST:
wind_forecast = self.wind_gen_forecast[
actual_idx
+ self.sequence_length : actual_idx
+ self.sequence_length
+ self.predict_sequence_length
]
features.append(wind_forecast.view(-1))
### Time Features ###
time_feature = 0
if self.data_config.QUARTER:
time_feature += self.quarter[actual_idx].item()
if self.data_config.DAY_OF_WEEK:
d_w = self.day_of_week[actual_idx].item()
if self.data_config.QUARTER:
d_w *= 96
time_feature += d_w
if time_feature is not None:
features.append(torch.tensor([time_feature]))
if not features:
raise ValueError(
"No features are configured to be included in the dataset."
)
# Concatenate along dimension 0 to create a one-dimensional feature vector
all_features = torch.cat(features, dim=0)
## all features
all_features = torch.cat(
[nrv_features, history_features, forecast_features, torch.tensor([time_feature])], dim=0
)
# Target sequence, flattened if necessary
nrv_target = self.nrv[
actual_idx
+ self.sequence_length : actual_idx
+ self.sequence_length
+ self.predict_sequence_length
].view(-1)
nrv_target = forecast_df["nrv"].values
# check if nan values are present
if torch.isnan(all_features).any():
@@ -169,78 +130,21 @@ class NrvDataset(Dataset):
print(f"Actual index: {actual_idx}")
raise ValueError("There are nan values in the features.")
# all features and target to float
all_features = all_features.float()
# to tensors
nrv_target = torch.tensor(nrv_target).float()
return all_features, nrv_target, idx
def random_day_autoregressive(self, idx: int):
idx = self.valid_indices[idx]
features = []
all_features, nrv_target, _ = self.__getitem__(idx)
# we already have the NRV history with the newly predicted values, so we don't need to include the last 96 values
if self.data_config.LOAD_HISTORY:
load_history = self.total_load[idx : idx + self.sequence_length]
features.append(load_history.view(-1))
# remove the first 96 values of the features (the nrv history)
all_features = all_features[self.sequence_length :]
if self.data_config.PV_HISTORY:
pv_history = self.pv_gen_forecast[idx : idx + self.sequence_length]
features.append(pv_history.view(-1))
return all_features, nrv_target
if self.data_config.WIND_HISTORY:
wind_history = self.wind_gen_forecast[idx : idx + self.sequence_length]
features.append(wind_history.view(-1))
if self.data_config.LOAD_FORECAST:
load_forecast = self.load_forecast[
idx
+ self.sequence_length : idx
+ self.sequence_length
+ self.predict_sequence_length
]
features.append(load_forecast.view(-1))
if self.data_config.PV_FORECAST:
pv_forecast = self.pv_gen_forecast[
idx
+ self.sequence_length : idx
+ self.sequence_length
+ self.predict_sequence_length
]
features.append(pv_forecast.view(-1))
if self.data_config.WIND_FORECAST:
wind_forecast = self.wind_gen_forecast[
idx
+ self.sequence_length : idx
+ self.sequence_length
+ self.predict_sequence_length
]
features.append(wind_forecast.view(-1))
### Time Features ###
time_feature = 0
if self.data_config.QUARTER:
time_feature += self.quarter[idx]
if self.data_config.DAY_OF_WEEK:
d_w = self.day_of_week[idx].item()
if self.data_config.QUARTER:
d_w *= 96
time_feature += d_w
if time_feature is not None:
features.append(torch.tensor([time_feature]))
target = self.nrv[
idx
+ self.sequence_length : idx
+ self.sequence_length
+ self.predict_sequence_length
]
if len(features) == 0:
return None, target
all_features = torch.cat(features, dim=0)
return all_features, target
def get_batch(self, idx: list):
features = []

View File

@@ -1,30 +1,28 @@
import torch
from torch import nn
import torch
from properscoring import crps_ensemble
class CRPSLoss(nn.Module):
def __init__(self, quantiles):
def __init__(self):
super(CRPSLoss, self).__init__()
if not torch.is_tensor(quantiles):
quantiles = torch.tensor(quantiles, dtype=torch.float32)
self.quantiles_tensor = quantiles
def forward(self, preds, target):
# if tensor, to cpu
if isinstance(preds, torch.Tensor):
preds = preds.detach().cpu()
if isinstance(target, torch.Tensor):
target = target.detach().cpu()
# target squeeze -1
target = target.squeeze(-1)
# preds shape: [batch_size, num_quantiles]
# unsqueeze target
# target = target.unsqueeze(-1)
mask = (preds > target).float()
self.quantiles_tensor = self.quantiles_tensor.to(preds.device)
test = self.quantiles_tensor - mask
# square them
test = test * test
crps = torch.trapz(test, x=preds)
scores = crps_ensemble(target, preds)
# mean over batch
crps = torch.mean(crps)
crps = scores.mean()
return crps

View File

@@ -16,7 +16,7 @@ from src.models.time_embedding_layer import TimeEmbedding
#### ClearML ####
clearml_helper = ClearMLHelper(project_name="Thesis/NrvForecast")
task = clearml_helper.get_task(task_name="None")
task = clearml_helper.get_task(task_name="Autoregressive Quantile Regression")
#### Data Processor ####
@@ -63,8 +63,8 @@ trainer = AutoRegressiveQuantileTrainer(
debug=True,
)
trainer.add_metrics_to_track(
[PinballLoss(quantiles), MSELoss(), L1Loss(), CRPSLoss(quantiles)]
[PinballLoss(quantiles), MSELoss(), L1Loss(), CRPSLoss()]
)
trainer.early_stopping(patience=10)
trainer.plot_every(5)
trainer.train(task=task, epochs=epochs, remotely=True)
trainer.train(task=task, epochs=epochs, remotely=False)