124 lines
4.8 KiB
Python
124 lines
4.8 KiB
Python
import logging
|
|
from clearml import Task
|
|
from clearml.automation import HyperParameterOptimizer
|
|
from clearml.automation.optuna import OptimizerOptuna
|
|
from clearml.automation import (
|
|
DiscreteParameterRange, HyperParameterOptimizer, RandomSearch,
|
|
UniformIntegerParameterRange)
|
|
|
|
# trying to load Bayesian optimizer package
|
|
try:
|
|
from clearml.automation.optuna import OptimizerOptuna # noqa
|
|
aSearchStrategy = OptimizerOptuna
|
|
except ImportError as ex:
|
|
try:
|
|
from clearml.automation.hpbandster import OptimizerBOHB # noqa
|
|
aSearchStrategy = OptimizerBOHB
|
|
except ImportError as ex:
|
|
logging.getLogger().warning(
|
|
'Apologies, it seems you do not have \'optuna\' or \'hpbandster\' installed, '
|
|
'we will be using RandomSearch strategy instead')
|
|
aSearchStrategy = RandomSearch
|
|
|
|
# input task id to optimize using argparse
|
|
import argparse
|
|
parser = argparse.ArgumentParser()
|
|
parser.add_argument("--task_id", help="Task ID to optimize", type=str)
|
|
args = parser.parse_args()
|
|
input_task_id = args.task_id
|
|
|
|
# check if task id is valid
|
|
if not Task.get_task(task_id=input_task_id):
|
|
raise ValueError("Invalid task id")
|
|
|
|
Task.add_requirements("requirements.txt")
|
|
Task.ignore_requirements("torch")
|
|
Task.ignore_requirements("torchvision")
|
|
Task.ignore_requirements("tensorboard")
|
|
task = Task.init(project_name='Thesis/NrvForecast',
|
|
task_name='Autoregressive Quantile Regression Hyper-Parameter Optimization',
|
|
task_type=Task.TaskTypes.optimizer,
|
|
reuse_last_task_id=False)
|
|
task.set_base_docker(f"docker.io/clearml/pytorch-cuda-gcc:2.0.0-cuda11.7-cudnn8-runtime --env GIT_SSL_NO_VERIFY=true --env CLEARML_AGENT_GIT_USER=VictorMylle --env CLEARML_AGENT_GIT_PASS=Voetballer1" )
|
|
task.set_packages("requirements.txt")
|
|
|
|
|
|
execution_queue = "default"
|
|
|
|
|
|
### HYPER PARAMETERS ###
|
|
#### Quantiles ####
|
|
quantile_lists = [
|
|
[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9], # Deciles
|
|
[0.05, 0.15, 0.25, 0.35, 0.45, 0.55, 0.65, 0.75, 0.85, 0.95], # 10% Increments, Excluding Extremes
|
|
[0.01, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.99], # Combining Deciles with Extremes
|
|
[0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1], # Including 0 and 1
|
|
[0.01, 0.05, 0.1, 0.25, 0.5, 0.75, 0.9, 0.95, 0.99], # Mixed Small and Large Increments
|
|
[0.125, 0.375, 0.625, 0.875], # Eighths
|
|
[0.05, 0.10, 0.20, 0.30, 0.40, 0.50, 0.60, 0.70, 0.80, 0.90, 0.95], # 10% Increments
|
|
]
|
|
quantiles_range = DiscreteParameterRange("general/quantiles", values=quantile_lists)
|
|
|
|
#### Data Config ####
|
|
quarter_range = DiscreteParameterRange("data_features/quarter", values=[True])
|
|
day_of_week_range = DiscreteParameterRange("data_features/day_of_week", values=[True])
|
|
|
|
load_forecast_range = DiscreteParameterRange("data_features/load_forecast", values=[True])
|
|
|
|
learning_rate = DiscreteParameterRange("model_parameters/learning_rate", values=[0.00001, 0.00005, 0.0001, 0.0005, 0.001])
|
|
hidden_size = DiscreteParameterRange("model_parameters/hidden_size", values=[64, 128, 256, 512, 1024, 2048])
|
|
num_layers = DiscreteParameterRange("model_parameters/num_layers", values=[1, 2, 3, 4, 5, 6])
|
|
dropout = DiscreteParameterRange("model_parameters/dropout", values=[0.0, 0.1, 0.2, 0.3, 0.4, 0.5])
|
|
time_feature_embedding = DiscreteParameterRange("model_parameters/time_feature_embedding", values=[1,2,3,4,5,6])
|
|
|
|
### OPTIMIZER OBJECT ###
|
|
optimizer = HyperParameterOptimizer(
|
|
base_task_id=input_task_id,
|
|
objective_metric_title="Summary",
|
|
objective_metric_series="test_CRPSLoss",
|
|
objective_metric_sign="min",
|
|
execution_queue=execution_queue,
|
|
max_number_of_concurrent_tasks=1,
|
|
optimizer_class=aSearchStrategy,
|
|
max_iteration_per_job=300,
|
|
# save_top_k_tasks_only=3,
|
|
pool_period_min=1,
|
|
total_max_jobs=40,
|
|
|
|
hyper_parameters=[
|
|
quarter_range,
|
|
day_of_week_range,
|
|
load_forecast_range,
|
|
learning_rate,
|
|
hidden_size,
|
|
num_layers,
|
|
dropout,
|
|
time_feature_embedding
|
|
]
|
|
|
|
)
|
|
|
|
task.execute_remotely(queue_name="hypertuning", exit_process=True)
|
|
|
|
optimizer.set_report_period(1)
|
|
|
|
def job_complete_callback(
|
|
job_id, # type: str
|
|
objective_value, # type: float
|
|
objective_iteration, # type: int
|
|
job_parameters, # type: dict
|
|
top_performance_job_id # type: str
|
|
):
|
|
print('Job completed!', job_id, objective_value, objective_iteration, job_parameters)
|
|
if job_id == top_performance_job_id:
|
|
print('WOOT WOOT we broke the record! Objective reached {}'.format(objective_value))
|
|
|
|
optimizer.start(job_complete_callback=job_complete_callback)
|
|
optimizer.set_time_limit(in_minutes=120.0*8)
|
|
optimizer.wait()
|
|
top_exp = optimizer.get_top_experiments(top_k=5)
|
|
print([t.id for t in top_exp])
|
|
# make sure background optimization stopped
|
|
optimizer.stop()
|
|
|
|
print('We are done, good bye') |