🐛 bug fixes

This commit is contained in:
2026-03-11 23:07:37 +01:00
parent 4115447022
commit 1a822bd82e
4 changed files with 34 additions and 7 deletions

View File

@@ -157,6 +157,14 @@ def _create_base_task(
add_task_init_call=False,
)
# Explicitly set Hydra config-group choices so cloned tasks
# pick up the correct env / runner / training groups.
# Task.create() does not populate the Hydra parameter section
# because Hydra never actually runs during creation.
base_task.set_parameter("Hydra/env", env)
base_task.set_parameter("Hydra/runner", runner)
base_task.set_parameter("Hydra/training", training)
# Set docker config
base_task.set_base_docker(
"registry.kube.optimize/worker-image:latest",
@@ -198,12 +206,12 @@ def main() -> None:
help="Total HPO trial budget",
)
parser.add_argument(
"--min-budget", type=int, default=3,
help="Minimum budget (epochs) per trial",
"--min-budget", type=int, default=50_000,
help="Minimum budget (total_timesteps) per trial",
)
parser.add_argument(
"--max-budget", type=int, default=81,
help="Maximum budget (epochs) for promoted trials",
"--max-budget", type=int, default=500_000,
help="Maximum budget (total_timesteps) for promoted trials",
)
parser.add_argument("--eta", type=int, default=3, help="Successive halving reduction factor")
parser.add_argument(
@@ -303,6 +311,7 @@ def main() -> None:
pool_period_min=1,
time_limit_per_job=240, # 4 hours per trial max
eta=args.eta,
budget_param_name="Hydra/training.total_timesteps",
)
# Send this HPO controller to a remote services worker

View File

@@ -101,6 +101,12 @@ def main(cfg: DictConfig) -> None:
training_dict.pop("hpo", None) # HPO range metadata — not a TrainerConfig field
task = _init_clearml(choices, remote=remote)
# Drop keys not recognised by TrainerConfig (e.g. ClearML-injected
# resume_from_task_id or any future additions)
import dataclasses as _dc
_valid_keys = {f.name for f in _dc.fields(TrainerConfig)}
training_dict = {k: v for k, v in training_dict.items() if k in _valid_keys}
env_name = choices.get("env", "cartpole")
env = build_env(env_name, cfg)
runner = _build_runner(choices.get("runner", "mujoco"), env, cfg)