From f9e8f9e69fc984dba4132814904a90f8ac5f3f93 Mon Sep 17 00:00:00 2001 From: Victor Mylle Date: Mon, 27 Nov 2023 16:06:05 +0000 Subject: [PATCH] Added hyperparameter optimization script --- src/trainers/trainer.py | 3 +- .../autoregressive_quantiles.py | 12 +-- .../hyperparameter_optimizer.py | 100 ++++++++++++++++++ 3 files changed, 106 insertions(+), 9 deletions(-) create mode 100644 src/training_scripts/hyperparameter_optimizer.py diff --git a/src/trainers/trainer.py b/src/trainers/trainer.py index bc4a895..80d9e38 100644 --- a/src/trainers/trainer.py +++ b/src/trainers/trainer.py @@ -78,8 +78,7 @@ class Trainer: self.data_processor = task.connect(self.data_processor, name="data_processor") self = task.connect(self, name="trainer") - task.delete_parameter("trainer/quantiles") - task.connect(self.data_processor.data_config, name="data_features") + task.delete_parameter("trainer/quantiles", force=True) def random_samples(self, train: bool = True, num_samples: int = 10): train_loader, test_loader = self.data_processor.get_dataloaders( diff --git a/src/training_scripts/autoregressive_quantiles.py b/src/training_scripts/autoregressive_quantiles.py index e9e05e2..779d912 100644 --- a/src/training_scripts/autoregressive_quantiles.py +++ b/src/training_scripts/autoregressive_quantiles.py @@ -22,14 +22,13 @@ task = clearml_helper.get_task(task_name="None") #### Data Processor #### data_config = DataConfig() data_config.NRV_HISTORY = True -data_config.LOAD_HISTORY = False -data_config.LOAD_FORECAST = False - -data_config.WIND_FORECAST = False -data_config.WIND_HISTORY = False +data_config.LOAD_HISTORY = True +data_config.LOAD_FORECAST = True data_config.QUARTER = True -data_config.DAY_OF_WEEK = False +data_config.DAY_OF_WEEK = True + +data_config = task.connect(data_config, name="data_features") data_processor = DataProcessor(data_config, path="") data_processor.set_batch_size(1024) @@ -48,7 +47,6 @@ if quantiles is None: quantiles = [0.01, 0.05, 0.1, 0.15, 0.3, 0.4, 0.5, 0.6, 0.7, 0.85, 0.9, 0.95, 0.99] task.set_parameter("general/quantiles", quantiles) - # model = LinearRegression(inputDim, len(quantiles)) time_embedding = TimeEmbedding(data_processor.get_time_feature_size(), 4) non_linear_regression_model = NonLinearRegression(time_embedding.output_dim(inputDim), len(quantiles), hiddenSize=1024, numLayers=5) diff --git a/src/training_scripts/hyperparameter_optimizer.py b/src/training_scripts/hyperparameter_optimizer.py new file mode 100644 index 0000000..f73c96f --- /dev/null +++ b/src/training_scripts/hyperparameter_optimizer.py @@ -0,0 +1,100 @@ +import logging +from clearml import Task +from clearml.automation import HyperParameterOptimizer +from clearml.automation.optuna import OptimizerOptuna +from clearml.automation import ( + DiscreteParameterRange, HyperParameterOptimizer, RandomSearch, + UniformIntegerParameterRange) +from src.data.preprocessing import DataConfig + +# trying to load Bayesian optimizer package +try: + from clearml.automation.optuna import OptimizerOptuna # noqa + aSearchStrategy = OptimizerOptuna +except ImportError as ex: + try: + from clearml.automation.hpbandster import OptimizerBOHB # noqa + aSearchStrategy = OptimizerBOHB + except ImportError as ex: + logging.getLogger().warning( + 'Apologies, it seems you do not have \'optuna\' or \'hpbandster\' installed, ' + 'we will be using RandomSearch strategy instead') + aSearchStrategy = RandomSearch + +# input task id to optimize +input_task_id = input("Please enter the task id to optimize: ") + +# check if task id is valid +if not Task.get_task(task_id=input_task_id): + raise ValueError("Invalid task id") + +task = Task.init(project_name='Hyper-Parameter Optimization', + task_name='Automatic Hyper-Parameter Optimization', + task_type=Task.TaskTypes.optimizer, + reuse_last_task_id=False) + +execution_queue = "default" + + +### HYPER PARAMETERS ### +#### Quantiles #### +quantile_lists = [ + [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9], # Deciles + [0.25, 0.5, 0.75], # Quartiles + [0.05, 0.15, 0.25, 0.35, 0.45, 0.55, 0.65, 0.75, 0.85, 0.95], # 10% Increments, Excluding Extremes + [0.01, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.99], # Combining Deciles with Extremes + [0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1], # Including 0 and 1 + [0.01, 0.05, 0.1, 0.25, 0.5, 0.75, 0.9, 0.95, 0.99], # Mixed Small and Large Increments + [0.2, 0.4, 0.6, 0.8], # 20% Increments + [0.125, 0.375, 0.625, 0.875], # Eighths + [0.05, 0.10, 0.20, 0.30, 0.40, 0.50, 0.60, 0.70, 0.80, 0.90], # 10% Increments + [0.01, 0.02, 0.03, 0.04, 0.05, 0.1, 0.15, 0.2, 0.3, 0.5] # Mixed Fine and Coarser Increments +] + + +quantiles_range = DiscreteParameterRange("general/quantiles", values=quantile_lists) + + +### OPTIMIZER OBJECT ### +optimizer = HyperParameterOptimizer( + base_task_id=input_task_id, + objective_metric_title="PinballLoss", + objective_metric_series="test", + objective_metric_sign="min", + execution_queue=execution_queue, + max_number_of_concurrent_tasks=1, + optimizer_class=aSearchStrategy, + # save_top_k_tasks_only=3, + pool_period_min=0.2, + total_max_jobs=15, + + hyper_parameters=[ + quantiles_range, + ] + +) + +task.execute_remotely(queue_name="hypertuning", exit_process=True) + +optimizer.set_report_period(0.2) + +def job_complete_callback( + job_id, # type: str + objective_value, # type: float + objective_iteration, # type: int + job_parameters, # type: dict + top_performance_job_id # type: str +): + print('Job completed!', job_id, objective_value, objective_iteration, job_parameters) + if job_id == top_performance_job_id: + print('WOOT WOOT we broke the record! Objective reached {}'.format(objective_value)) + +optimizer.start(job_complete_callback=job_complete_callback) +optimizer.set_time_limit(in_minutes=120.0) +optimizer.wait() +top_exp = optimizer.get_top_experiments(top_k=3) +print([t.id for t in top_exp]) +# make sure background optimization stopped +optimizer.stop() + +print('We are done, good bye') \ No newline at end of file