diff --git a/src/data/dataset.py b/src/data/dataset.py index 91ffcf9..5bb82cd 100644 --- a/src/data/dataset.py +++ b/src/data/dataset.py @@ -12,10 +12,12 @@ class NrvDataset(Dataset): full_day_skip: bool = False, sequence_length=96, predict_sequence_length=96, + lstm: bool = False, ): self.data_config = data_config self.dataframe = dataframe self.full_day_skip = full_day_skip + self.lstm = lstm # reset dataframe index self.dataframe.reset_index(drop=True, inplace=True) @@ -107,19 +109,26 @@ class NrvDataset(Dataset): history_features = history_df[self.history_features].values # combine the history features to one tensor (first one feature, then the next one, etc.) - history_features = torch.tensor(history_features).reshape(-1) + history_features = torch.tensor(history_features) # get forecast features forecast_features = forecast_df[self.forecast_features].values - forecast_features = torch.tensor(forecast_features).view(-1) + forecast_features = torch.tensor(forecast_features) # add last time feature of the history time_feature = history_df["time_feature"].iloc[-1] ## all features - all_features = torch.cat( - [nrv_features, history_features, forecast_features, torch.tensor([time_feature])], dim=0 - ) + if not self.lstm: + all_features = torch.cat( + [nrv_features, history_features.reshape(-1), forecast_features.reshape(-1), torch.tensor([time_feature])], dim=0 + ) + else: + time_features = torch.tensor(history_df["time_feature"].values).reshape(-1, 1) + # combine (96, ) and (96, 2) to (96, 3) + all_features = torch.cat( + [nrv_features.unsqueeze(1), time_features], dim=1 + ) # Target sequence, flattened if necessary nrv_target = forecast_df["nrv"].values @@ -133,7 +142,7 @@ class NrvDataset(Dataset): # all features and target to float all_features = all_features.float() - # to tensors + # to tens&éazzaéaz"ezéors nrv_target = torch.tensor(nrv_target).float() return all_features, nrv_target, idx diff --git a/src/data/preprocessing.py b/src/data/preprocessing.py index d5c113b..81ccac6 100644 --- a/src/data/preprocessing.py +++ b/src/data/preprocessing.py @@ -36,9 +36,10 @@ class DataConfig: class DataProcessor: - def __init__(self, data_config: DataConfig, path:str="./"): + def __init__(self, data_config: DataConfig, lstm: bool = False, path:str="./"): self.batch_size = 2048 self.path = path + self.lstm = lstm self.train_range = ( -np.inf, @@ -204,6 +205,7 @@ class DataProcessor: data_config=self.data_config, full_day_skip=self.full_day_skip, predict_sequence_length=predict_sequence_length, + lstm=self.lstm, ) return self.get_dataloader(train_dataset, shuffle=shuffle) @@ -234,6 +236,7 @@ class DataProcessor: data_config=self.data_config, full_day_skip=self.full_day_skip, predict_sequence_length=predict_sequence_length, + lstm=self.lstm, ) return self.get_dataloader(test_dataset, shuffle=False) @@ -274,7 +277,7 @@ class DataProcessor: predict_sequence_length=self.output_size ) input, _, _ = next(iter(data_loader)) - return input.shape[-1] + return input.shape def get_time_feature_size(self): time_feature_size = 1 diff --git a/src/models/lstm_model.py b/src/models/lstm_model.py new file mode 100644 index 0000000..88754bf --- /dev/null +++ b/src/models/lstm_model.py @@ -0,0 +1,45 @@ +import torch + +class LSTMModel(torch.nn.Module): + def __init__(self, inputSize, output_size, num_layers: int, hidden_size: int, dropout: float = 0.2): + super(LSTMModel, self).__init__() + self.inputSize = inputSize + self.output_size = output_size + + self.num_layers = num_layers + self.hidden_size = hidden_size + self.dropout = dropout + + self.lstm = torch.nn.LSTM(input_size=inputSize[-1], hidden_size=hidden_size, num_layers=num_layers, dropout=dropout, batch_first=True) + self.linear = torch.nn.Linear(hidden_size, output_size) + + def forward(self, x): + # Forward pass through the LSTM layers + _, (hidden_state, _) = self.lstm(x) + + # Use the hidden state from the last time step for the output + output = self.linear(hidden_state[-1]) + + return output + +class GRUModel(torch.nn.Module): + def __init__(self, inputSize, output_size, num_layers: int, hidden_size: int, dropout: float = 0.2): + super(GRUModel, self).__init__() + self.inputSize = inputSize + self.output_size = output_size + + self.num_layers = num_layers + self.hidden_size = hidden_size + self.dropout = dropout + + self.gru = torch.nn.GRU(input_size=inputSize[-1], hidden_size=hidden_size, num_layers=num_layers, dropout=dropout, batch_first=True) + self.linear = torch.nn.Linear(hidden_size, output_size) + + def forward(self, x): + # Forward pass through the GRU layers + _, hidden_state = self.gru(x) + + # Use the hidden state from the last time step for the output + output = self.linear(hidden_state[-1]) + + return output diff --git a/src/models/time_embedding_layer.py b/src/models/time_embedding_layer.py index a2fea62..b81ef19 100644 --- a/src/models/time_embedding_layer.py +++ b/src/models/time_embedding_layer.py @@ -10,19 +10,20 @@ class TimeEmbedding(nn.Module): def forward(self, x): # Extract the last 'time_features' from the input - time_feature = x[:, -1] + time_feature = x[..., -1] # Use ellipsis to access the last dimension # convert to int time_feature = time_feature.int() # Embed these time features - # print max value of time_feature - if time_feature.max() > self.time_features: - # print the row from x that includes the max value in the last column - print(x[time_feature == time_feature.max()]) - print("time feature max value is greater than time features") - embedded_time = self.embedding(time_feature) # Concatenate the embedded features with the original input (minus the last 'time feature') - return torch.cat((x[:, :-1], embedded_time), dim=1) + return torch.cat((x[..., :-1], embedded_time), dim=-1) # Use -1 to specify the last dimension + def output_dim(self, input_dim): - return input_dim + self.embedding.embedding_dim - 1 + # Create a list from the input dimension + input_dim_list = list(input_dim) + # Modify the last dimension + input_dim_list[-1] = input_dim_list[-1] - 1 + self.embedding.embedding_dim + # Convert the list back to a torch.Size object + output_dim = torch.Size(input_dim_list) + return output_dim diff --git a/src/training_scripts/autoregressive_quantiles.py b/src/training_scripts/autoregressive_quantiles.py index 28a90b8..e790bde 100644 --- a/src/training_scripts/autoregressive_quantiles.py +++ b/src/training_scripts/autoregressive_quantiles.py @@ -1,3 +1,4 @@ +from src.models.lstm_model import LSTMModel, GRUModel from src.data import DataProcessor, DataConfig from src.trainers.quantile_trainer import AutoRegressiveQuantileTrainer, NonAutoRegressiveQuantileRegression from src.trainers.probabilistic_baseline import ProbabilisticBaselineTrainer @@ -28,19 +29,21 @@ data_config.LOAD_FORECAST = True data_config.QUARTER = True data_config.DAY_OF_WEEK = True -data_config = task.connect(data_config, name="data_features") +# data_config = task.connect(data_config, name="data_features") -data_processor = DataProcessor(data_config, path="") -data_processor.set_batch_size(1024) +data_processor = DataProcessor(data_config, path="", lstm=True) +data_processor.set_batch_size(512) data_processor.set_full_day_skip(False) #### Hyperparameters #### data_processor.set_output_size(1) inputDim = data_processor.get_input_size() -learningRate = 0.0001 +learningRate = 0.001 epochs = 100 +print("Input dim: ", inputDim) + # add parameters to clearml quantiles = task.get_parameter("general/quantiles", cast=True) if quantiles is None: @@ -49,8 +52,9 @@ if quantiles is None: # model = LinearRegression(inputDim, len(quantiles)) time_embedding = TimeEmbedding(data_processor.get_time_feature_size(), 4) -non_linear_regression_model = NonLinearRegression(time_embedding.output_dim(inputDim), len(quantiles), hiddenSize=1024, numLayers=5) -model = nn.Sequential(time_embedding, non_linear_regression_model) +# non_linear_regression_model = NonLinearRegression(time_embedding.output_dim(inputDim), len(quantiles), hiddenSize=1024, numLayers=5) +lstm_model = GRUModel(time_embedding.output_dim(inputDim), len(quantiles), hidden_size=512, num_layers=2) +model = nn.Sequential(time_embedding, lstm_model) optimizer = torch.optim.Adam(model.parameters(), lr=learningRate) #### Trainer #### @@ -62,9 +66,10 @@ trainer = AutoRegressiveQuantileTrainer( "cuda", debug=True, ) + trainer.add_metrics_to_track( [PinballLoss(quantiles), MSELoss(), L1Loss(), CRPSLoss()] ) trainer.early_stopping(patience=10) -trainer.plot_every(5) -trainer.train(task=task, epochs=epochs, remotely=False) \ No newline at end of file +trainer.plot_every(100) +trainer.train(task=task, epochs=epochs, remotely=True) \ No newline at end of file diff --git a/src/training_scripts/hyperparameter_optimizer.py b/src/training_scripts/hyperparameter_optimizer.py index f73c96f..fee49da 100644 --- a/src/training_scripts/hyperparameter_optimizer.py +++ b/src/training_scripts/hyperparameter_optimizer.py @@ -5,7 +5,6 @@ from clearml.automation.optuna import OptimizerOptuna from clearml.automation import ( DiscreteParameterRange, HyperParameterOptimizer, RandomSearch, UniformIntegerParameterRange) -from src.data.preprocessing import DataConfig # trying to load Bayesian optimizer package try: @@ -21,17 +20,28 @@ except ImportError as ex: 'we will be using RandomSearch strategy instead') aSearchStrategy = RandomSearch -# input task id to optimize -input_task_id = input("Please enter the task id to optimize: ") +# input task id to optimize using argparse +import argparse +parser = argparse.ArgumentParser() +parser.add_argument("--task_id", help="Task ID to optimize", type=str) +args = parser.parse_args() +input_task_id = args.task_id # check if task id is valid if not Task.get_task(task_id=input_task_id): raise ValueError("Invalid task id") -task = Task.init(project_name='Hyper-Parameter Optimization', - task_name='Automatic Hyper-Parameter Optimization', +Task.add_requirements("requirements.txt") +Task.ignore_requirements("torch") +Task.ignore_requirements("torchvision") +Task.ignore_requirements("tensorboard") +task = Task.init(project_name='Thesis/NrvForecast', + task_name='Autoregressive Quantile Regression Hyper-Parameter Optimization', task_type=Task.TaskTypes.optimizer, reuse_last_task_id=False) +task.set_base_docker(f"docker.io/clearml/pytorch-cuda-gcc:2.0.0-cuda11.7-cudnn8-runtime --env GIT_SSL_NO_VERIFY=true --env CLEARML_AGENT_GIT_USER=VictorMylle --env CLEARML_AGENT_GIT_PASS=Voetballer1" ) +task.set_packages("requirements.txt") + execution_queue = "default" @@ -40,36 +50,42 @@ execution_queue = "default" #### Quantiles #### quantile_lists = [ [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9], # Deciles - [0.25, 0.5, 0.75], # Quartiles [0.05, 0.15, 0.25, 0.35, 0.45, 0.55, 0.65, 0.75, 0.85, 0.95], # 10% Increments, Excluding Extremes [0.01, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.99], # Combining Deciles with Extremes [0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1], # Including 0 and 1 [0.01, 0.05, 0.1, 0.25, 0.5, 0.75, 0.9, 0.95, 0.99], # Mixed Small and Large Increments - [0.2, 0.4, 0.6, 0.8], # 20% Increments [0.125, 0.375, 0.625, 0.875], # Eighths - [0.05, 0.10, 0.20, 0.30, 0.40, 0.50, 0.60, 0.70, 0.80, 0.90], # 10% Increments - [0.01, 0.02, 0.03, 0.04, 0.05, 0.1, 0.15, 0.2, 0.3, 0.5] # Mixed Fine and Coarser Increments + [0.05, 0.10, 0.20, 0.30, 0.40, 0.50, 0.60, 0.70, 0.80, 0.90, 0.95], # 10% Increments ] - - quantiles_range = DiscreteParameterRange("general/quantiles", values=quantile_lists) +#### Data Config #### +quarter_range = DiscreteParameterRange("data_features/quarter", values=[True, False]) +day_of_week_range = DiscreteParameterRange("data_features/day_of_week", values=[True, False]) + +load_forecast_range = DiscreteParameterRange("data_features/load_forecast", values=[True, False]) +load_history_range = DiscreteParameterRange("data_features/load_history", values=[True, False]) ### OPTIMIZER OBJECT ### optimizer = HyperParameterOptimizer( base_task_id=input_task_id, - objective_metric_title="PinballLoss", - objective_metric_series="test", + objective_metric_title="Summary", + objective_metric_series="test_CRPSLoss", objective_metric_sign="min", execution_queue=execution_queue, max_number_of_concurrent_tasks=1, optimizer_class=aSearchStrategy, + max_iteration_per_job=50, # save_top_k_tasks_only=3, pool_period_min=0.2, total_max_jobs=15, hyper_parameters=[ quantiles_range, + quarter_range, + day_of_week_range, + load_forecast_range, + load_history_range ] )