import logging from clearml import Task from clearml.automation import HyperParameterOptimizer from clearml.automation.optuna import OptimizerOptuna from clearml.automation import ( DiscreteParameterRange, HyperParameterOptimizer, RandomSearch, UniformIntegerParameterRange) # trying to load Bayesian optimizer package try: from clearml.automation.optuna import OptimizerOptuna # noqa aSearchStrategy = OptimizerOptuna except ImportError as ex: try: from clearml.automation.hpbandster import OptimizerBOHB # noqa aSearchStrategy = OptimizerBOHB except ImportError as ex: logging.getLogger().warning( 'Apologies, it seems you do not have \'optuna\' or \'hpbandster\' installed, ' 'we will be using RandomSearch strategy instead') aSearchStrategy = RandomSearch # input task id to optimize using argparse import argparse parser = argparse.ArgumentParser() parser.add_argument("--task_id", help="Task ID to optimize", type=str) args = parser.parse_args() input_task_id = args.task_id # check if task id is valid if not Task.get_task(task_id=input_task_id): raise ValueError("Invalid task id") Task.add_requirements("requirements.txt") Task.ignore_requirements("torch") Task.ignore_requirements("torchvision") Task.ignore_requirements("tensorboard") task = Task.init(project_name='Thesis/NrvForecast', task_name='Autoregressive Quantile Regression Hyper-Parameter Optimization', task_type=Task.TaskTypes.optimizer, reuse_last_task_id=False) task.set_base_docker(f"docker.io/clearml/pytorch-cuda-gcc:2.0.0-cuda11.7-cudnn8-runtime --env GIT_SSL_NO_VERIFY=true --env CLEARML_AGENT_GIT_USER=VictorMylle --env CLEARML_AGENT_GIT_PASS=Voetballer1" ) task.set_packages("requirements.txt") execution_queue = "default" ### HYPER PARAMETERS ### #### Quantiles #### quantile_lists = [ [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9], # Deciles [0.05, 0.15, 0.25, 0.35, 0.45, 0.55, 0.65, 0.75, 0.85, 0.95], # 10% Increments, Excluding Extremes [0.01, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.99], # Combining Deciles with Extremes [0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1], # Including 0 and 1 [0.01, 0.05, 0.1, 0.25, 0.5, 0.75, 0.9, 0.95, 0.99], # Mixed Small and Large Increments [0.125, 0.375, 0.625, 0.875], # Eighths [0.05, 0.10, 0.20, 0.30, 0.40, 0.50, 0.60, 0.70, 0.80, 0.90, 0.95], # 10% Increments ] quantiles_range = DiscreteParameterRange("general/quantiles", values=quantile_lists) #### Data Config #### quarter_range = DiscreteParameterRange("data_features/quarter", values=[True]) day_of_week_range = DiscreteParameterRange("data_features/day_of_week", values=[True]) load_forecast_range = DiscreteParameterRange("data_features/load_forecast", values=[True]) learning_rate = DiscreteParameterRange("model_parameters/learning_rate", values=[0.00001, 0.00005, 0.0001, 0.0005, 0.001]) hidden_size = DiscreteParameterRange("model_parameters/hidden_size", values=[64, 128, 256, 512, 1024, 2048]) num_layers = DiscreteParameterRange("model_parameters/num_layers", values=[1, 2, 3, 4, 5, 6]) dropout = DiscreteParameterRange("model_parameters/dropout", values=[0.0, 0.1, 0.2, 0.3, 0.4, 0.5]) time_feature_embedding = DiscreteParameterRange("model_parameters/time_feature_embedding", values=[1,2,3,4,5,6]) ### OPTIMIZER OBJECT ### optimizer = HyperParameterOptimizer( base_task_id=input_task_id, objective_metric_title="Summary", objective_metric_series="test_CRPSLoss", objective_metric_sign="min", execution_queue=execution_queue, max_number_of_concurrent_tasks=1, optimizer_class=aSearchStrategy, max_iteration_per_job=300, # save_top_k_tasks_only=3, pool_period_min=1, total_max_jobs=40, hyper_parameters=[ quarter_range, day_of_week_range, load_forecast_range, learning_rate, hidden_size, num_layers, dropout, time_feature_embedding ] ) task.execute_remotely(queue_name="hypertuning", exit_process=True) optimizer.set_report_period(1) def job_complete_callback( job_id, # type: str objective_value, # type: float objective_iteration, # type: int job_parameters, # type: dict top_performance_job_id # type: str ): print('Job completed!', job_id, objective_value, objective_iteration, job_parameters) if job_id == top_performance_job_id: print('WOOT WOOT we broke the record! Objective reached {}'.format(objective_value)) optimizer.start(job_complete_callback=job_complete_callback) optimizer.set_time_limit(in_minutes=120.0*8) optimizer.wait() top_exp = optimizer.get_top_experiments(top_k=5) print([t.id for t in top_exp]) # make sure background optimization stopped optimizer.stop() print('We are done, good bye')