♻️ crazy refactor

2026-03-11 22:52:01 +01:00
parent 35223b3560
commit 4115447022
34 changed files with 4255 additions and 102 deletions
--- a/src/hpo/init.py
+++ b/src/hpo/init.py
@@ -0,0 +1 @@
+"""Hyperparameter optimization — SMAC3 + ClearML Successive Halving."""
--- a/src/hpo/smac3.py
+++ b/src/hpo/smac3.py
@@ -0,0 +1,636 @@
+# Requires: pip install smac==2.0.0 ConfigSpace==0.4.20
+import contextlib
+import time
+from collections.abc import Sequence
+from functools import wraps
+from typing import Any
+
+from clearml import Task
+from clearml.automation.optimization import Objective, SearchStrategy
+from clearml.automation.parameters import Parameter
+from clearml.backend_interface.session import SendError
+from ConfigSpace import (
+    CategoricalHyperparameter,
+    ConfigurationSpace,
+    UniformFloatHyperparameter,
+    UniformIntegerHyperparameter,
+)
+from smac import MultiFidelityFacade
+from smac.intensifier.successive_halving import SuccessiveHalving
+from smac.runhistory.dataclasses import TrialInfo, TrialValue
+from smac.scenario import Scenario
+
+
+def retry_on_error(max_retries=5, initial_delay=2.0, backoff=2.0, exceptions=(Exception,)):
+    """Decorator to retry a function on exception with exponential backoff."""
+
+    def decorator(func):
+        @wraps(func)
+        def wrapper(*args, **kwargs):
+            delay = initial_delay
+            for attempt in range(max_retries):
+                try:
+                    return func(*args, **kwargs)
+                except exceptions:
+                    if attempt == max_retries - 1:
+                        return None  # Return None instead of raising
+                    time.sleep(delay)
+                    delay *= backoff
+            return None
+
+        return wrapper
+
+    return decorator
+
+
+def _encode_param_name(name: str) -> str:
+    """Encode parameter name for ConfigSpace (replace / with __SLASH__)"""
+    return name.replace("/", "__SLASH__")
+
+
+def _decode_param_name(name: str) -> str:
+    """Decode parameter name back to original (replace __SLASH__ with /)"""
+    return name.replace("__SLASH__", "/")
+
+
+def _convert_param_to_cs(param: Parameter):
+    """
+    Convert a ClearML Parameter into a ConfigSpace hyperparameter,
+    adapted to ConfigSpace>=1.x (no more 'q' argument).
+    """
+    # Encode the name to avoid ConfigSpace issues with special chars like '/'
+    name = _encode_param_name(param.name)
+
+    # Categorical / discrete list
+    if hasattr(param, "values"):
+        return CategoricalHyperparameter(name=name, choices=list(param.values))
+
+    # Numeric range (float or int)
+    if hasattr(param, "min_value") and hasattr(param, "max_value"):
+        min_val = param.min_value
+        max_val = param.max_value
+
+        # Check if this should be treated as integer
+        if isinstance(min_val, int) and isinstance(max_val, int):
+            log = getattr(param, "log_scale", False)
+
+            # Check for step_size for quantization
+            if hasattr(param, "step_size"):
+                sv = int(param.step_size)
+                if sv != 1:
+                    # emulate quantization by explicit list of values
+                    choices = list(range(min_val, max_val + 1, sv))
+                    return CategoricalHyperparameter(name=name, choices=choices)
+
+            # Simple uniform integer range
+            return UniformIntegerHyperparameter(name=name, lower=min_val, upper=max_val, log=log)
+        else:
+            # Treat as float
+            lower, upper = float(min_val), float(max_val)
+            log = getattr(param, "log_scale", False)
+            return UniformFloatHyperparameter(name=name, lower=lower, upper=upper, log=log)
+
+    raise ValueError(f"Unsupported Parameter type: {type(param)}")
+
+
+class OptimizerSMAC(SearchStrategy):
+    """
+    SMAC3-based hyperparameter optimizer, matching OptimizerBOHB interface.
+    """
+
+    def __init__(
+        self,
+        base_task_id: str,
+        hyper_parameters: Sequence[Parameter],
+        objective_metric: Objective,
+        execution_queue: str,
+        num_concurrent_workers: int,
+        min_iteration_per_job: int,
+        max_iteration_per_job: int,
+        total_max_jobs: int,
+        pool_period_min: float = 2.0,
+        time_limit_per_job: float | None = None,
+        compute_time_limit: float | None = None,
+        **smac_kwargs: Any,
+    ):
+        # Initialize base SearchStrategy
+        super().__init__(
+            base_task_id=base_task_id,
+            hyper_parameters=hyper_parameters,
+            objective_metric=objective_metric,
+            execution_queue=execution_queue,
+            num_concurrent_workers=num_concurrent_workers,
+            pool_period_min=pool_period_min,
+            time_limit_per_job=time_limit_per_job,
+            compute_time_limit=compute_time_limit,
+            min_iteration_per_job=min_iteration_per_job,
+            max_iteration_per_job=max_iteration_per_job,
+            total_max_jobs=total_max_jobs,
+        )
+
+        # Expose for internal use (access private attributes from base class)
+        self.execution_queue = self._execution_queue
+        self.min_iterations = min_iteration_per_job
+        self.max_iterations = max_iteration_per_job
+        self.num_concurrent_workers = self._num_concurrent_workers  # Fix: access private attribute
+
+        # Objective details
+        # Handle both single objective (string) and multi-objective (list) cases
+        if isinstance(self._objective_metric.title, list):
+            self.metric_title = self._objective_metric.title[0]  # Use first objective
+        else:
+            self.metric_title = self._objective_metric.title
+
+        if isinstance(self._objective_metric.series, list):
+            self.metric_series = self._objective_metric.series[0]  # Use first series
+        else:
+            self.metric_series = self._objective_metric.series
+
+        # ClearML Objective stores sign as a list, e.g., ['max'] or ['min']
+        objective_sign = getattr(self._objective_metric, "sign", None) or getattr(self._objective_metric, "order", None)
+
+        # Handle list case - extract first element
+        if isinstance(objective_sign, list):
+            objective_sign = objective_sign[0] if objective_sign else "max"
+
+        # Default to max if nothing found
+        if objective_sign is None:
+            objective_sign = "max"
+
+        self.maximize_metric = str(objective_sign).lower() in ("max", "max_global")
+
+        # Build ConfigSpace
+        self.config_space = ConfigurationSpace(seed=42)
+        for p in self._hyper_parameters:  # Access private attribute correctly
+            cs_hp = _convert_param_to_cs(p)
+            self.config_space.add(cs_hp)
+
+        # Configure SMAC Scenario
+        scenario = Scenario(
+            configspace=self.config_space,
+            n_trials=self.total_max_jobs,
+            min_budget=float(self.min_iterations),
+            max_budget=float(self.max_iterations),
+            walltime_limit=(self.compute_time_limit * 60) if self.compute_time_limit else None,
+            deterministic=True,
+        )
+
+        # build the Successive Halving intensifier (NOT Hyperband!)
+        # Hyperband runs multiple brackets with different starting budgets - wasteful
+        # Successive Halving: ALL configs start at min_budget, only best get promoted
+        # eta controls the reduction factor (default 3 means keep top 1/3 each round)
+        # eta can be overridden via smac_kwargs from HyperParameterOptimizer
+        eta = smac_kwargs.pop("eta", 3)  # Default to 3 if not specified
+        intensifier = SuccessiveHalving(scenario=scenario, eta=eta, **smac_kwargs)
+
+        # now pass that intensifier instance into the facade
+        self.smac = MultiFidelityFacade(
+            scenario=scenario,
+            target_function=lambda config, budget, seed: 0.0,
+            intensifier=intensifier,
+            overwrite=True,
+        )
+
+        # Bookkeeping
+        self.running_tasks = {}  # task_id -> trial info
+        self.task_start_times = {}  # task_id -> start time (for timeout)
+        self.completed_results = []
+        self.best_score_so_far = float("-inf") if self.maximize_metric else float("inf")
+        self.time_limit_per_job = time_limit_per_job  # Store time limit (minutes)
+
+        # Checkpoint continuation tracking: config_key -> {budget: task_id}
+        # Used to find the previous task's checkpoint when promoting a config
+        self.config_to_tasks = {}  # config_key -> {budget: task_id}
+
+        # Manual Successive Halving control
+        self.eta = eta
+        self.current_budget = float(self.min_iterations)
+        self.configs_at_budget = {}  # budget -> list of (config, score, trial)
+        self.pending_configs = []  # configs waiting to be evaluated at current_budget - list of (trial, prev_task_id)
+        self.evaluated_at_budget = []  # (config, score, trial, task_id) for current budget
+        self.smac_asked_configs = set()  # track which configs SMAC has given us
+
+        # Calculate initial rung size for proper Successive Halving
+        # With eta=3: rung sizes are n, n/3, n/9, ...
+        # Total trials = n * (1 + 1/eta + 1/eta^2 + ...) = n * eta/(eta-1) for infinite series
+        # For finite rungs, calculate exactly
+        num_rungs = 1
+        b = float(self.min_iterations)
+        while b * eta <= self.max_iterations:
+            num_rungs += 1
+            b *= eta
+
+        # Sum of geometric series: 1 + 1/eta + 1/eta^2 + ... (num_rungs terms)
+        series_sum = sum(1.0 / (eta**i) for i in range(num_rungs))
+        self.initial_rung_size = int(self.total_max_jobs / series_sum)
+        self.initial_rung_size = max(self.initial_rung_size, self.num_concurrent_workers)  # at least num_workers
+        self.configs_needed_for_rung = self.initial_rung_size  # how many configs we still need for current rung
+        self.rung_closed = False  # whether we've collected all configs for current rung
+
+    @retry_on_error(max_retries=3, initial_delay=5.0, exceptions=(ValueError, SendError, ConnectionError))
+    def _get_task_safe(self, task_id: str):
+        """Safely get a task with retry logic."""
+        return Task.get_task(task_id=task_id)
+
+    @retry_on_error(max_retries=3, initial_delay=5.0, exceptions=(ValueError, SendError, ConnectionError))
+    def _launch_task(self, config: dict, budget: float, prev_task_id: str | None = None):
+        """Launch a task with retry logic for robustness.
+
+        Args:
+            config: Hyperparameter configuration dict
+            budget: Number of epochs to train
+            prev_task_id: Optional task ID from previous budget to continue from (checkpoint)
+        """
+        base = self._get_task_safe(task_id=self._base_task_id)
+        if base is None:
+            return None
+
+        clone = Task.clone(
+            source_task=base,
+            name=f"HPO Trial - {base.name}",
+            parent=Task.current_task().id,  # Set the current HPO task as parent
+        )
+        # Override hyperparameters
+        for k, v in config.items():
+            # Decode parameter name back to original (with slashes)
+            original_name = _decode_param_name(k)
+            # Convert numpy types to Python built-in types
+            if hasattr(v, "item"):  # numpy scalar
+                param_value = v.item()
+            elif isinstance(v, int | float | str | bool):
+                param_value = type(v)(v)  # Ensure it's the built-in type
+            else:
+                param_value = v
+            clone.set_parameter(original_name, param_value)
+        # Override epochs budget if multi-fidelity
+        if self.max_iterations != self.min_iterations:
+            clone.set_parameter("Hydra/training.max_epochs", int(budget))
+        else:
+            clone.set_parameter("Hydra/training.max_epochs", int(self.max_iterations))
+
+        # If we have a previous task, pass its ID so the worker can download the checkpoint
+        if prev_task_id:
+            clone.set_parameter("Hydra/training.resume_from_task_id", prev_task_id)
+
+        Task.enqueue(task=clone, queue_name=self.execution_queue)
+        # Track start time for timeout enforcement
+        self.task_start_times[clone.id] = time.time()
+        return clone
+
+    def start(self):
+        controller = Task.current_task()
+        total_launched = 0
+
+        # Keep launching & collecting until budget exhausted
+        while total_launched < self.total_max_jobs:
+            # Check if current budget rung is complete BEFORE asking for new trials
+            # (no running tasks, no pending configs, and we have results for this budget)
+            if not self.running_tasks and not self.pending_configs and self.evaluated_at_budget:
+                # Rung complete! Promote top performers to next budget
+
+                # Store results for this budget
+                self.configs_at_budget[self.current_budget] = self.evaluated_at_budget.copy()
+
+                # Sort by score (best first)
+                sorted_configs = sorted(
+                    self.evaluated_at_budget,
+                    key=lambda x: x[1],  # score
+                    reverse=self.maximize_metric,
+                )
+
+                # Print rung results
+                for _i, (_cfg, _score, _tri, _task_id) in enumerate(sorted_configs[:5], 1):
+                    pass
+
+                # Move to next budget?
+                next_budget = self.current_budget * self.eta
+                if next_budget <= self.max_iterations:
+                    # How many to promote (top 1/eta)
+                    n_promote = max(1, len(sorted_configs) // self.eta)
+                    promoted = sorted_configs[:n_promote]
+
+                    # Update budget and reset for next rung
+                    self.current_budget = next_budget
+                    self.evaluated_at_budget = []
+                    self.configs_needed_for_rung = 0  # promoted configs are all we need
+                    self.rung_closed = True  # rung is pre-filled with promoted configs
+
+                    # Re-queue promoted configs with new budget
+                    # Include the previous task ID for checkpoint continuation
+                    for _cfg, _score, old_trial, prev_task_id in promoted:
+                        new_trial = TrialInfo(
+                            config=old_trial.config,
+                            instance=old_trial.instance,
+                            seed=old_trial.seed,
+                            budget=self.current_budget,
+                        )
+                        # Store as tuple: (trial, prev_task_id)
+                        self.pending_configs.append((new_trial, prev_task_id))
+                else:
+                    # All budgets complete
+                    break
+
+            # Fill pending_configs with new trials ONLY if we haven't closed this rung yet
+            # For the first rung: ask SMAC for initial_rung_size configs total
+            # For subsequent rungs: only use promoted configs (rung is already closed)
+            while (
+                not self.rung_closed
+                and len(self.pending_configs) + len(self.evaluated_at_budget) + len(self.running_tasks)
+                < self.initial_rung_size
+                and total_launched < self.total_max_jobs
+            ):
+                trial = self.smac.ask()
+                if trial is None:
+                    self.rung_closed = True
+                    break
+                # Create new trial with forced budget (TrialInfo is frozen, can't modify)
+                trial_with_budget = TrialInfo(
+                    config=trial.config,
+                    instance=trial.instance,
+                    seed=trial.seed,
+                    budget=self.current_budget,
+                )
+                cfg_key = str(sorted(trial.config.items()))
+                if cfg_key not in self.smac_asked_configs:
+                    self.smac_asked_configs.add(cfg_key)
+                    # Store as tuple: (trial, None) - no previous task for new configs
+                    self.pending_configs.append((trial_with_budget, None))
+
+            # Check if we've collected enough configs for this rung
+            if (
+                not self.rung_closed
+                and len(self.pending_configs) + len(self.evaluated_at_budget) + len(self.running_tasks)
+                >= self.initial_rung_size
+            ):
+                self.rung_closed = True
+
+            # Launch pending configs up to concurrent limit
+            while self.pending_configs and len(self.running_tasks) < self.num_concurrent_workers:
+                # Unpack tuple: (trial, prev_task_id)
+                trial, prev_task_id = self.pending_configs.pop(0)
+                t = self._launch_task(trial.config, self.current_budget, prev_task_id=prev_task_id)
+                if t is None:
+                    # Launch failed, mark trial as failed and continue
+                    # Tell SMAC this trial failed with worst possible score
+                    cost = float("inf") if self.maximize_metric else float("-inf")
+                    self.smac.tell(trial, TrialValue(cost=cost))
+                    total_launched += 1
+                    continue
+                self.running_tasks[t.id] = trial
+
+                # Track which task ID was used for this config at this budget
+                cfg_key = str(sorted(trial.config.items()))
+                if cfg_key not in self.config_to_tasks:
+                    self.config_to_tasks[cfg_key] = {}
+                self.config_to_tasks[cfg_key][self.current_budget] = t.id
+
+                total_launched += 1
+
+            if not self.running_tasks and not self.pending_configs:
+                break
+
+            # Poll for finished or timed out
+            done = []
+            timed_out = []
+            failed_to_check = []
+            for tid, _tri in self.running_tasks.items():
+                try:
+                    task = self._get_task_safe(task_id=tid)
+                    if task is None:
+                        failed_to_check.append(tid)
+                        continue
+
+                    st = task.get_status()
+
+                    # Check if task completed normally
+                    if st == Task.TaskStatusEnum.completed or st in (
+                        Task.TaskStatusEnum.failed,
+                        Task.TaskStatusEnum.stopped,
+                    ):
+                        done.append(tid)
+                    # Check for timeout (if time limit is set)
+                    elif self.time_limit_per_job and tid in self.task_start_times:
+                        elapsed_minutes = (time.time() - self.task_start_times[tid]) / 60.0
+                        if elapsed_minutes > self.time_limit_per_job:
+                            with contextlib.suppress(Exception):
+                                task.mark_stopped(force=True)
+                            timed_out.append(tid)
+                except Exception:
+                    # Don't mark as failed immediately, might be transient
+                    # Only mark failed after multiple consecutive failures
+                    if not hasattr(self, "_task_check_failures"):
+                        self._task_check_failures = {}
+                    self._task_check_failures[tid] = self._task_check_failures.get(tid, 0) + 1
+                    if self._task_check_failures[tid] >= 5:  # 5 consecutive failures
+                        failed_to_check.append(tid)
+                        del self._task_check_failures[tid]
+
+            # Process tasks that failed to check
+            for tid in failed_to_check:
+                tri = self.running_tasks.pop(tid)
+                if tid in self.task_start_times:
+                    del self.task_start_times[tid]
+                # Tell SMAC this trial failed with worst possible score
+                res = float("-inf") if self.maximize_metric else float("inf")
+                cost = -res if self.maximize_metric else res
+                self.smac.tell(tri, TrialValue(cost=cost))
+                self.completed_results.append(
+                    {
+                        "task_id": tid,
+                        "config": tri.config,
+                        "budget": tri.budget,
+                        "value": res,
+                        "failed": True,
+                    }
+                )
+                # Store result with task_id for checkpoint tracking
+                self.evaluated_at_budget.append((tri.config, res, tri, tid))
+
+            # Process completed tasks
+            for tid in done:
+                tri = self.running_tasks.pop(tid)
+                if tid in self.task_start_times:
+                    del self.task_start_times[tid]
+
+                # Clear any accumulated failures for this task
+                if hasattr(self, "_task_check_failures") and tid in self._task_check_failures:
+                    del self._task_check_failures[tid]
+
+                task = self._get_task_safe(task_id=tid)
+                if task is None:
+                    res = float("-inf") if self.maximize_metric else float("inf")
+                else:
+                    res = self._get_objective(task)
+
+                if res is None or res == float("-inf") or res == float("inf"):
+                    res = float("-inf") if self.maximize_metric else float("inf")
+
+                cost = -res if self.maximize_metric else res
+                self.smac.tell(tri, TrialValue(cost=cost))
+                self.completed_results.append(
+                    {
+                        "task_id": tid,
+                        "config": tri.config,
+                        "budget": tri.budget,
+                        "value": res,
+                    }
+                )
+
+                # Store result for this budget rung with task_id for checkpoint tracking
+                self.evaluated_at_budget.append((tri.config, res, tri, tid))
+
+                iteration = len(self.completed_results)
+
+                # Always report the trial score (even if it's bad)
+                if res is not None and res != float("-inf") and res != float("inf"):
+                    controller.get_logger().report_scalar(
+                        title="Optimization", series="trial_score", value=res, iteration=iteration
+                    )
+                    controller.get_logger().report_scalar(
+                        title="Optimization",
+                        series="trial_budget",
+                        value=tri.budget or self.max_iterations,
+                        iteration=iteration,
+                    )
+
+                # Update best score tracking based on actual results
+                if res is not None and res != float("-inf") and res != float("inf"):
+                    if self.maximize_metric:
+                        self.best_score_so_far = max(self.best_score_so_far, res)
+                    elif res < self.best_score_so_far:
+                        self.best_score_so_far = res
+
+                # Always report best score so far (shows flat line when no improvement)
+                if self.best_score_so_far != float("-inf") and self.best_score_so_far != float("inf"):
+                    controller.get_logger().report_scalar(
+                        title="Optimization", series="best_score", value=self.best_score_so_far, iteration=iteration
+                    )
+
+                # Report running statistics
+                valid_scores = [
+                    r["value"]
+                    for r in self.completed_results
+                    if r["value"] is not None and r["value"] != float("-inf") and r["value"] != float("inf")
+                ]
+                if valid_scores:
+                    controller.get_logger().report_scalar(
+                        title="Optimization",
+                        series="mean_score",
+                        value=sum(valid_scores) / len(valid_scores),
+                        iteration=iteration,
+                    )
+                    controller.get_logger().report_scalar(
+                        title="Progress",
+                        series="completed_trials",
+                        value=len(self.completed_results),
+                        iteration=iteration,
+                    )
+                    controller.get_logger().report_scalar(
+                        title="Progress", series="running_tasks", value=len(self.running_tasks), iteration=iteration
+                    )
+
+            # Process timed out tasks (treat as failed with current objective value)
+            for tid in timed_out:
+                tri = self.running_tasks.pop(tid)
+                if tid in self.task_start_times:
+                    del self.task_start_times[tid]
+
+                # Clear any accumulated failures for this task
+                if hasattr(self, "_task_check_failures") and tid in self._task_check_failures:
+                    del self._task_check_failures[tid]
+
+                # Try to get the last objective value before timeout
+                task = self._get_task_safe(task_id=tid)
+                if task is None:
+                    res = float("-inf") if self.maximize_metric else float("inf")
+                else:
+                    res = self._get_objective(task)
+
+                if res is None:
+                    res = float("-inf") if self.maximize_metric else float("inf")
+                cost = -res if self.maximize_metric else res
+                self.smac.tell(tri, TrialValue(cost=cost))
+                self.completed_results.append(
+                    {
+                        "task_id": tid,
+                        "config": tri.config,
+                        "budget": tri.budget,
+                        "value": res,
+                        "timed_out": True,
+                    }
+                )
+
+                # Store timed out result for this budget rung with task_id
+                self.evaluated_at_budget.append((tri.config, res, tri, tid))
+
+            time.sleep(self.pool_period_minutes * 60)  # Fix: use correct attribute name from base class
+            if self.compute_time_limit and controller.get_runtime() > self.compute_time_limit * 60:
+                break
+
+        # Finalize
+        self._finalize()
+        return self.completed_results
+
+    @retry_on_error(max_retries=3, initial_delay=2.0, exceptions=(SendError, ConnectionError, KeyError))
+    def _get_objective(self, task: Task):
+        """Get objective metric value with retry logic for robustness."""
+        if task is None:
+            return None
+
+        try:
+            m = task.get_last_scalar_metrics()
+            if not m:
+                return None
+
+            metric_data = m[self.metric_title][self.metric_series]
+
+            # ClearML returns dict with 'last', 'min', 'max' keys representing
+            # the last/min/max values of this series over ALL logged iterations.
+            # For snake_length/train_max: 'last' is the last logged train_max value,
+            # 'max' is the highest train_max ever logged during training.
+
+            # Use 'max' if maximizing (we want the best performance achieved),
+            # 'min' if minimizing, fallback to 'last'
+            if self.maximize_metric and "max" in metric_data:
+                result = metric_data["max"]
+            elif not self.maximize_metric and "min" in metric_data:
+                result = metric_data["min"]
+            else:
+                result = metric_data["last"]
+            return result
+        except (KeyError, Exception):
+            return None
+
+    def _finalize(self):
+        controller = Task.current_task()
+        # Report final best score
+        controller.get_logger().report_text(f"Final best score: {self.best_score_so_far}")
+
+        # Also try to get SMAC's incumbent for comparison
+        try:
+            incumbent = self.smac.intensifier.get_incumbent()
+            if incumbent is not None:
+                runhistory = self.smac.runhistory
+                # Try different ways to get the cost
+                incumbent_cost = None
+                try:
+                    incumbent_cost = runhistory.get_cost(incumbent)
+                except Exception:
+                    # Fallback: search through runhistory manually
+                    for trial_key, trial_value in runhistory.items():
+                        trial_config = runhistory.get_config(trial_key.config_id)
+                        if trial_config == incumbent and (incumbent_cost is None or trial_value.cost < incumbent_cost):
+                            incumbent_cost = trial_value.cost
+
+                if incumbent_cost is not None:
+                    score = -incumbent_cost if self.maximize_metric else incumbent_cost
+                    controller.get_logger().report_text(f"SMAC incumbent: {incumbent}, score: {score}")
+                    controller.upload_artifact(
+                        "best_config",
+                        {"config": dict(incumbent), "score": score, "our_best_score": self.best_score_so_far},
+                    )
+                else:
+                    controller.upload_artifact("best_config", {"our_best_score": self.best_score_so_far})
+        except Exception as e:
+            controller.get_logger().report_text(f"Error getting SMAC incumbent: {e}")
+            controller.upload_artifact("best_config", {"our_best_score": self.best_score_so_far})
				`@@ -0,0 +1 @@`
				`"""Hyperparameter optimization — SMAC3 + ClearML Successive Halving."""`