From 1a822bd82ea7bdb8f9dfb2e04d70d4d811ae41e6 Mon Sep 17 00:00:00 2001
From: Victor Mylle <victor.mylle@hotmail.com>
Date: Wed, 11 Mar 2026 23:07:37 +0100
Subject: [PATCH] :bug: bug fixes

---
 scripts/hpo.py   | 17 +++++++++++++----
 scripts/train.py |  6 ++++++
 src/hpo/smac3.py | 12 +++++++++---
 train.py         |  6 ++++++
 4 files changed, 34 insertions(+), 7 deletions(-)

diff --git a/scripts/hpo.py b/scripts/hpo.py
index 270ace2..69e23fd 100644
--- a/scripts/hpo.py
+++ b/scripts/hpo.py
@@ -157,6 +157,14 @@ def _create_base_task(
         add_task_init_call=False,
     )
 
+    # Explicitly set Hydra config-group choices so cloned tasks
+    # pick up the correct env / runner / training groups.
+    # Task.create() does not populate the Hydra parameter section
+    # because Hydra never actually runs during creation.
+    base_task.set_parameter("Hydra/env", env)
+    base_task.set_parameter("Hydra/runner", runner)
+    base_task.set_parameter("Hydra/training", training)
+
     # Set docker config
     base_task.set_base_docker(
         "registry.kube.optimize/worker-image:latest",
@@ -198,12 +206,12 @@ def main() -> None:
         help="Total HPO trial budget",
     )
     parser.add_argument(
-        "--min-budget", type=int, default=3,
-        help="Minimum budget (epochs) per trial",
+        "--min-budget", type=int, default=50_000,
+        help="Minimum budget (total_timesteps) per trial",
     )
     parser.add_argument(
-        "--max-budget", type=int, default=81,
-        help="Maximum budget (epochs) for promoted trials",
+        "--max-budget", type=int, default=500_000,
+        help="Maximum budget (total_timesteps) for promoted trials",
     )
     parser.add_argument("--eta", type=int, default=3, help="Successive halving reduction factor")
     parser.add_argument(
@@ -303,6 +311,7 @@ def main() -> None:
         pool_period_min=1,
         time_limit_per_job=240,  # 4 hours per trial max
         eta=args.eta,
+        budget_param_name="Hydra/training.total_timesteps",
     )
 
     # Send this HPO controller to a remote services worker
diff --git a/scripts/train.py b/scripts/train.py
index 84f5b1b..7ed00e4 100644
--- a/scripts/train.py
+++ b/scripts/train.py
@@ -101,6 +101,12 @@ def main(cfg: DictConfig) -> None:
     training_dict.pop("hpo", None)  # HPO range metadata — not a TrainerConfig field
     task = _init_clearml(choices, remote=remote)
 
+    # Drop keys not recognised by TrainerConfig (e.g. ClearML-injected
+    # resume_from_task_id or any future additions)
+    import dataclasses as _dc
+    _valid_keys = {f.name for f in _dc.fields(TrainerConfig)}
+    training_dict = {k: v for k, v in training_dict.items() if k in _valid_keys}
+
     env_name = choices.get("env", "cartpole")
     env = build_env(env_name, cfg)
     runner = _build_runner(choices.get("runner", "mujoco"), env, cfg)
diff --git a/src/hpo/smac3.py b/src/hpo/smac3.py
index b9bb10b..ded1061 100644
--- a/src/hpo/smac3.py
+++ b/src/hpo/smac3.py
@@ -175,6 +175,12 @@ class OptimizerSMAC(SearchStrategy):
             deterministic=True,
         )
 
+        # Configurable budget parameter name
+        # Default: Hydra/training.total_timesteps (RL-Framework convention)
+        self.budget_param_name = smac_kwargs.pop(
+            "budget_param_name", "Hydra/training.total_timesteps"
+        )
+
         # build the Successive Halving intensifier (NOT Hyperband!)
         # Hyperband runs multiple brackets with different starting budgets - wasteful
         # Successive Halving: ALL configs start at min_budget, only best get promoted
@@ -262,11 +268,11 @@ class OptimizerSMAC(SearchStrategy):
             else:
                 param_value = v
             clone.set_parameter(original_name, param_value)
-        # Override epochs budget if multi-fidelity
+        # Override budget parameter (e.g. total_timesteps) for multi-fidelity
         if self.max_iterations != self.min_iterations:
-            clone.set_parameter("Hydra/training.max_epochs", int(budget))
+            clone.set_parameter(self.budget_param_name, int(budget))
         else:
-            clone.set_parameter("Hydra/training.max_epochs", int(self.max_iterations))
+            clone.set_parameter(self.budget_param_name, int(self.max_iterations))
 
         # If we have a previous task, pass its ID so the worker can download the checkpoint
         if prev_task_id:
diff --git a/train.py b/train.py
index 6547e38..d59d225 100644
--- a/train.py
+++ b/train.py
@@ -99,6 +99,12 @@ def main(cfg: DictConfig) -> None:
     training_dict.pop("hpo", None)  # HPO range metadata — not a TrainerConfig field
     task = _init_clearml(choices, remote=remote)
 
+    # Drop keys not recognised by TrainerConfig (e.g. ClearML-injected
+    # resume_from_task_id or any future additions)
+    import dataclasses as _dc
+    _valid_keys = {f.name for f in _dc.fields(TrainerConfig)}
+    training_dict = {k: v for k, v in training_dict.items() if k in _valid_keys}
+
     env_name = choices.get("env", "cartpole")
     env = build_env(env_name, cfg)
     runner = _build_runner(choices.get("runner", "mujoco"), env, cfg)