Quarter embedding using trigonometry + more thesis writing
This commit is contained in:
@@ -46,7 +46,7 @@ class NrvDataset(Dataset):
|
||||
if self.data_config.LOAD_HISTORY:
|
||||
self.history_features.append("total_load")
|
||||
if self.data_config.PV_HISTORY:
|
||||
self.history_features.append("pv_gen_forecast")
|
||||
self.history_features.append("pv_history")
|
||||
if self.data_config.WIND_HISTORY:
|
||||
self.history_features.append("wind_history")
|
||||
if self.data_config.NOMINAL_NET_POSITION:
|
||||
@@ -56,7 +56,7 @@ class NrvDataset(Dataset):
|
||||
if self.data_config.LOAD_FORECAST:
|
||||
self.forecast_features.append("load_forecast")
|
||||
if self.data_config.PV_FORECAST:
|
||||
self.forecast_features.append("pv_gen_forecast")
|
||||
self.forecast_features.append("pv_forecast")
|
||||
if self.data_config.WIND_FORECAST:
|
||||
self.forecast_features.append("wind_forecast")
|
||||
if self.data_config.NOMINAL_NET_POSITION:
|
||||
|
||||
@@ -40,7 +40,7 @@ class DataConfig:
|
||||
|
||||
|
||||
class DataProcessor:
|
||||
def __init__(self, data_config: DataConfig, lstm: bool = False, path:str="./"):
|
||||
def __init__(self, data_config: DataConfig, lstm: bool = False, path: str = "./"):
|
||||
self.batch_size = 2048
|
||||
self.path = path
|
||||
self.lstm = lstm
|
||||
@@ -55,20 +55,21 @@ class DataProcessor:
|
||||
|
||||
self.history_features = self.get_nrv_history()
|
||||
self.future_features = self.get_load_forecast()
|
||||
# self.pv_forecast = self.get_pv_forecast()
|
||||
self.pv_forecast = self.get_pv_forecast()
|
||||
self.wind_forecast = self.get_wind_forecast()
|
||||
|
||||
self.all_features = self.history_features.merge(
|
||||
self.future_features, on="datetime", how="left"
|
||||
)
|
||||
# self.all_features = self.all_features.merge(
|
||||
# self.pv_forecast, on="datetime", how="left"
|
||||
# )
|
||||
|
||||
self.all_features = self.all_features.merge(
|
||||
self.pv_forecast, on="datetime", how="left"
|
||||
)
|
||||
|
||||
self.all_features = self.all_features.merge(
|
||||
self.wind_forecast, on="datetime", how="left"
|
||||
)
|
||||
|
||||
|
||||
self.all_features = self.all_features.merge(
|
||||
self.get_nominal_net_position(), on="datetime", how="left"
|
||||
)
|
||||
@@ -86,6 +87,7 @@ class DataProcessor:
|
||||
|
||||
self.nrv_scaler = MinMaxScaler(feature_range=(-1, 1))
|
||||
self.load_forecast_scaler = MinMaxScaler(feature_range=(-1, 1))
|
||||
self.pv_forecast_scaler = MinMaxScaler(feature_range=(-1, 1))
|
||||
self.wind_forecast_scaler = MinMaxScaler(feature_range=(-1, 1))
|
||||
self.nominal_net_position_scaler = MinMaxScaler(feature_range=(-1, 1))
|
||||
|
||||
@@ -151,10 +153,19 @@ class DataProcessor:
|
||||
def get_pv_forecast(self):
|
||||
df = pd.read_csv(self.path + pv_forecast_data_path, delimiter=";")
|
||||
|
||||
df = df[df["region"] == "Belgium"]
|
||||
|
||||
df = df.rename(
|
||||
columns={"dayahead11hforecast": "pv_forecast", "Datetime": "datetime"}
|
||||
columns={
|
||||
"dayahead11hforecast": "pv_forecast",
|
||||
"Datetime": "datetime",
|
||||
"measured": "pv_history",
|
||||
}
|
||||
)
|
||||
df = df[["datetime", "pv_forecast"]]
|
||||
df = df[["datetime", "pv_forecast", "pv_history"]]
|
||||
|
||||
# replace nan by zero
|
||||
df = df.fillna(0)
|
||||
|
||||
df = df.groupby("datetime").mean().reset_index()
|
||||
df["datetime"] = pd.to_datetime(df["datetime"], utc=True)
|
||||
@@ -165,7 +176,11 @@ class DataProcessor:
|
||||
df = pd.read_csv(self.path + wind_forecast_data_path, delimiter=";")
|
||||
|
||||
df = df.rename(
|
||||
columns={"measured": "wind_history", "dayaheadforecast": "wind_forecast", "datetime": "datetime"}
|
||||
columns={
|
||||
"measured": "wind_history",
|
||||
"dayaheadforecast": "wind_forecast",
|
||||
"datetime": "datetime",
|
||||
}
|
||||
)
|
||||
df = df[["datetime", "wind_forecast", "wind_history"]]
|
||||
|
||||
@@ -198,8 +213,6 @@ class DataProcessor:
|
||||
df = df.set_index("datetime").resample("15min").ffill().reset_index()
|
||||
return df
|
||||
|
||||
|
||||
|
||||
def set_batch_size(self, batch_size: int):
|
||||
self.batch_size = batch_size
|
||||
|
||||
@@ -233,15 +246,26 @@ class DataProcessor:
|
||||
train_df["total_load"] = self.load_forecast_scaler.transform(
|
||||
train_df["total_load"].values.reshape(-1, 1)
|
||||
).reshape(-1)
|
||||
|
||||
train_df["pv_forecast"] = self.pv_forecast_scaler.fit_transform(
|
||||
train_df["pv_forecast"].values.reshape(-1, 1)
|
||||
).reshape(-1)
|
||||
|
||||
train_df["pv_history"] = self.pv_forecast_scaler.transform(
|
||||
train_df["pv_history"].values.reshape(-1, 1)
|
||||
).reshape(-1)
|
||||
|
||||
train_df["wind_forecast"] = self.wind_forecast_scaler.fit_transform(
|
||||
train_df["wind_forecast"].values.reshape(-1, 1)
|
||||
).reshape(-1)
|
||||
train_df["wind_history"] = self.wind_forecast_scaler.transform(
|
||||
train_df["wind_history"].values.reshape(-1, 1)
|
||||
).reshape(-1)
|
||||
train_df["nominal_net_position"] = self.nominal_net_position_scaler.fit_transform(
|
||||
train_df["nominal_net_position"].values.reshape(-1, 1)
|
||||
).reshape(-1)
|
||||
train_df["nominal_net_position"] = (
|
||||
self.nominal_net_position_scaler.fit_transform(
|
||||
train_df["nominal_net_position"].values.reshape(-1, 1)
|
||||
).reshape(-1)
|
||||
)
|
||||
|
||||
train_dataset = NrvDataset(
|
||||
train_df,
|
||||
@@ -253,7 +277,10 @@ class DataProcessor:
|
||||
return self.get_dataloader(train_dataset, shuffle=shuffle)
|
||||
|
||||
def get_test_dataloader(
|
||||
self, transform: bool = True, predict_sequence_length: int = 96, full_day_skip: bool = False
|
||||
self,
|
||||
transform: bool = True,
|
||||
predict_sequence_length: int = 96,
|
||||
full_day_skip: bool = False,
|
||||
):
|
||||
test_df = self.all_features.copy()
|
||||
|
||||
@@ -273,16 +300,26 @@ class DataProcessor:
|
||||
test_df["total_load"] = self.load_forecast_scaler.transform(
|
||||
test_df["total_load"].values.reshape(-1, 1)
|
||||
).reshape(-1)
|
||||
|
||||
test_df["pv_forecast"] = self.pv_forecast_scaler.transform(
|
||||
test_df["pv_forecast"].values.reshape(-1, 1)
|
||||
).reshape(-1)
|
||||
|
||||
test_df["pv_history"] = self.pv_forecast_scaler.transform(
|
||||
test_df["pv_history"].values.reshape(-1, 1)
|
||||
).reshape(-1)
|
||||
|
||||
test_df["wind_forecast"] = self.wind_forecast_scaler.transform(
|
||||
test_df["wind_forecast"].values.reshape(-1, 1)
|
||||
).reshape(-1)
|
||||
test_df["wind_history"] = self.wind_forecast_scaler.transform(
|
||||
test_df["wind_history"].values.reshape(-1, 1)
|
||||
).reshape(-1)
|
||||
test_df["nominal_net_position"] = self.nominal_net_position_scaler.transform(
|
||||
test_df["nominal_net_position"].values.reshape(-1, 1)
|
||||
).reshape(-1)
|
||||
|
||||
test_df["nominal_net_position"] = (
|
||||
self.nominal_net_position_scaler.transform(
|
||||
test_df["nominal_net_position"].values.reshape(-1, 1)
|
||||
).reshape(-1)
|
||||
)
|
||||
|
||||
test_dataset = NrvDataset(
|
||||
test_df,
|
||||
@@ -294,12 +331,17 @@ class DataProcessor:
|
||||
return self.get_dataloader(test_dataset, shuffle=False)
|
||||
|
||||
def get_dataloaders(
|
||||
self, transform: bool = True, predict_sequence_length: int = 96, full_day_skip: bool = False
|
||||
self,
|
||||
transform: bool = True,
|
||||
predict_sequence_length: int = 96,
|
||||
full_day_skip: bool = False,
|
||||
):
|
||||
return self.get_train_dataloader(
|
||||
transform=transform, predict_sequence_length=predict_sequence_length
|
||||
), self.get_test_dataloader(
|
||||
transform=transform, predict_sequence_length=predict_sequence_length, full_day_skip=full_day_skip
|
||||
transform=transform,
|
||||
predict_sequence_length=predict_sequence_length,
|
||||
full_day_skip=full_day_skip,
|
||||
)
|
||||
|
||||
def inverse_transform(self, input_data):
|
||||
@@ -338,7 +380,7 @@ class DataProcessor:
|
||||
time_feature_size *= 96
|
||||
if self.data_config.DAY_OF_WEEK:
|
||||
time_feature_size *= 7
|
||||
|
||||
|
||||
if time_feature_size == 1:
|
||||
return 0
|
||||
return time_feature_size
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
from torch import nn
|
||||
import torch
|
||||
import numpy as np
|
||||
|
||||
|
||||
class TimeEmbedding(nn.Module):
|
||||
def __init__(self, time_features: int, embedding_dim: int):
|
||||
@@ -17,9 +19,10 @@ class TimeEmbedding(nn.Module):
|
||||
# Embed these time features
|
||||
embedded_time = self.embedding(time_feature)
|
||||
# Concatenate the embedded features with the original input (minus the last 'time feature')
|
||||
return torch.cat((x[..., :-1], embedded_time), dim=-1) # Use -1 to specify the last dimension
|
||||
return torch.cat(
|
||||
(x[..., :-1], embedded_time), dim=-1
|
||||
) # Use -1 to specify the last dimension
|
||||
|
||||
|
||||
def output_dim(self, input_dim):
|
||||
if self.time_features == 0:
|
||||
return input_dim
|
||||
@@ -30,3 +33,32 @@ class TimeEmbedding(nn.Module):
|
||||
# Convert the list back to a torch.Size object
|
||||
output_dim = torch.Size(input_dim_list)
|
||||
return output_dim
|
||||
|
||||
|
||||
class TrigonometricTimeEmbedding(nn.Module):
|
||||
def __init__(self, time_features: int):
|
||||
super().__init__()
|
||||
self.time_features = time_features
|
||||
|
||||
def forward(self, x):
|
||||
if self.time_features == 0:
|
||||
return x
|
||||
time_feature = x[..., -1] # Use ellipsis to access the last dimension
|
||||
time_feature = time_feature.int()
|
||||
# Calculate the sine and cosine of the time feature
|
||||
sin_time = torch.sin(2 * np.pi * time_feature.float() / self.time_features)
|
||||
cos_time = torch.cos(2 * np.pi * time_feature.float() / self.time_features)
|
||||
# Stack the sine and cosine features
|
||||
time_embedding = torch.stack((sin_time, cos_time), dim=-1)
|
||||
# Concatenate the embedded features with the original input (minus the last 'time feature')
|
||||
return torch.cat(
|
||||
(x[..., :-1], time_embedding), dim=-1
|
||||
) # Use -1 to specify the last dimension
|
||||
|
||||
def output_dim(self, input_dim):
|
||||
if self.time_features == 0:
|
||||
return input_dim
|
||||
input_dim_list = list(input_dim)
|
||||
input_dim_list[-1] = input_dim_list[-1] - 1 + 2
|
||||
output_dim = torch.Size(input_dim_list)
|
||||
return output_dim
|
||||
|
||||
@@ -262,13 +262,11 @@ class Trainer:
|
||||
self.model.eval()
|
||||
|
||||
# set full day skip
|
||||
_, test_loader = self.data_processor.get_dataloaders(
|
||||
train_loader, test_loader = self.data_processor.get_dataloaders(
|
||||
predict_sequence_length=self.model.output_size
|
||||
)
|
||||
|
||||
# if not hasattr(self, "plot_quantile_percentages"):
|
||||
# self.log_final_metrics(task, train_loader, train=True)
|
||||
|
||||
self.log_final_metrics(task, train_loader, train=True)
|
||||
self.log_final_metrics(task, test_loader, train=False)
|
||||
|
||||
def test(self, test_loader: torch.utils.data.DataLoader):
|
||||
|
||||
@@ -2,7 +2,7 @@ from src.utils.clearml import ClearMLHelper
|
||||
|
||||
#### ClearML ####
|
||||
clearml_helper = ClearMLHelper(project_name="Thesis/NrvForecast")
|
||||
task = clearml_helper.get_task(task_name="AQR: Linear Baseline")
|
||||
task = clearml_helper.get_task(task_name="AQR: Linear Baseline + Quarter Trigonometric")
|
||||
task.execute_remotely(queue_name="default", exit_process=True)
|
||||
|
||||
from src.policies.PolicyEvaluator import PolicyEvaluator
|
||||
@@ -20,7 +20,7 @@ from src.losses import *
|
||||
import torch
|
||||
from torch.nn import MSELoss, L1Loss
|
||||
import torch.nn as nn
|
||||
from src.models.time_embedding_layer import TimeEmbedding
|
||||
from src.models.time_embedding_layer import TimeEmbedding, TrigonometricTimeEmbedding
|
||||
|
||||
|
||||
#### Data Processor ####
|
||||
@@ -30,18 +30,21 @@ data_config.NRV_HISTORY = True
|
||||
data_config.LOAD_HISTORY = True
|
||||
data_config.LOAD_FORECAST = True
|
||||
|
||||
data_config.WIND_FORECAST = False
|
||||
data_config.WIND_HISTORY = False
|
||||
data_config.WIND_FORECAST = True
|
||||
data_config.WIND_HISTORY = True
|
||||
|
||||
data_config.QUARTER = False
|
||||
data_config.PV_FORECAST = True
|
||||
data_config.PV_HISTORY = True
|
||||
|
||||
data_config.QUARTER = True
|
||||
data_config.DAY_OF_WEEK = False
|
||||
|
||||
data_config.NOMINAL_NET_POSITION = False
|
||||
data_config.NOMINAL_NET_POSITION = True
|
||||
|
||||
|
||||
data_config = task.connect(data_config, name="data_features")
|
||||
|
||||
data_processor = DataProcessor(data_config, path="", lstm=True)
|
||||
data_processor = DataProcessor(data_config, path="", lstm=False)
|
||||
data_processor.set_batch_size(512)
|
||||
data_processor.set_full_day_skip(False)
|
||||
|
||||
@@ -67,7 +70,7 @@ model_parameters = {
|
||||
"hidden_size": 256,
|
||||
"num_layers": 2,
|
||||
"dropout": 0.2,
|
||||
"time_feature_embedding": 8,
|
||||
"time_feature_embedding": 2,
|
||||
}
|
||||
|
||||
model_parameters = task.connect(model_parameters, name="model_parameters")
|
||||
@@ -76,6 +79,8 @@ model_parameters = task.connect(model_parameters, name="model_parameters")
|
||||
# data_processor.get_time_feature_size(), model_parameters["time_feature_embedding"]
|
||||
# )
|
||||
|
||||
time_embedding = TrigonometricTimeEmbedding(data_processor.get_time_feature_size())
|
||||
|
||||
# lstm_model = GRUModel(
|
||||
# time_embedding.output_dim(inputDim),
|
||||
# len(quantiles),
|
||||
@@ -92,11 +97,11 @@ model_parameters = task.connect(model_parameters, name="model_parameters")
|
||||
# dropout=model_parameters["dropout"],
|
||||
# )
|
||||
|
||||
# linear_model = LinearRegression(time_embedding.output_dim(inputDim), len(quantiles))
|
||||
linear_model = LinearRegression(inputDim, len(quantiles))
|
||||
linear_model = LinearRegression(time_embedding.output_dim(inputDim), len(quantiles))
|
||||
# linear_model = LinearRegression(inputDim, len(quantiles))
|
||||
|
||||
# model = nn.Sequential(time_embedding, lstm_model)
|
||||
model = linear_model
|
||||
model = nn.Sequential(time_embedding, linear_model)
|
||||
# model = linear_model
|
||||
model.output_size = 1
|
||||
optimizer = torch.optim.Adam(model.parameters(), lr=model_parameters["learning_rate"])
|
||||
|
||||
@@ -121,7 +126,7 @@ trainer.add_metrics_to_track(
|
||||
[PinballLoss(quantiles), MSELoss(), L1Loss(), CRPSLoss(quantiles)]
|
||||
)
|
||||
trainer.early_stopping(patience=5)
|
||||
trainer.plot_every(2)
|
||||
trainer.plot_every(15)
|
||||
trainer.train(task=task, epochs=epochs, remotely=True)
|
||||
|
||||
### Policy Evaluation ###
|
||||
|
||||
Reference in New Issue
Block a user