♻️ cleanup

This commit is contained in:
2026-03-11 23:16:42 +01:00
parent 3db68255f0
commit 23801857f4
6 changed files with 69 additions and 258 deletions

View File

@@ -170,9 +170,12 @@ def _create_base_task(
"registry.kube.optimize/worker-image:latest",
docker_setup_bash_script=(
"apt-get update && apt-get install -y --no-install-recommends "
"libosmesa6 libgl1-mesa-glx libglfw3 && rm -rf /var/lib/apt/lists/* "
"&& pip install 'jax[cuda12]' mujoco-mjx"
"libosmesa6-dev libgl1-mesa-glx libglfw3 && rm -rf /var/lib/apt/lists/* "
"&& pip install 'jax[cuda12]' mujoco-mjx PyOpenGL PyOpenGL-accelerate"
),
docker_arguments=[
"-e", "MUJOCO_GL=osmesa",
],
)
req_file = Path(__file__).resolve().parent.parent / "requirements.txt"
@@ -214,6 +217,10 @@ def main() -> None:
help="Maximum budget (total_timesteps) for promoted trials",
)
parser.add_argument("--eta", type=int, default=3, help="Successive halving reduction factor")
parser.add_argument(
"--max-consecutive-failures", type=int, default=3,
help="Abort HPO after N consecutive trial failures (0 = never abort)",
)
parser.add_argument(
"--time-limit-hours", type=float, default=72,
help="Total wall-clock time limit in hours",
@@ -312,6 +319,7 @@ def main() -> None:
time_limit_per_job=240, # 4 hours per trial max
eta=args.eta,
budget_param_name="Hydra/training.total_timesteps",
max_consecutive_failures=args.max_consecutive_failures,
)
# Send this HPO controller to a remote services worker

View File

@@ -8,8 +8,9 @@ _PROJECT_ROOT = str(pathlib.Path(__file__).resolve().parent.parent)
if _PROJECT_ROOT not in sys.path:
sys.path.insert(0, _PROJECT_ROOT)
# Headless rendering: use OSMesa on Linux servers (must be set before mujoco import)
if sys.platform == "linux" and "DISPLAY" not in os.environ:
# Headless rendering: use OSMesa on Linux servers (must be set before mujoco import).
# Always default on Linux — Docker containers may have DISPLAY set without a real X server.
if sys.platform == "linux":
os.environ.setdefault("MUJOCO_GL", "osmesa")
import hydra
@@ -73,9 +74,12 @@ def _init_clearml(choices: dict[str, str], remote: bool = False) -> Task:
"registry.kube.optimize/worker-image:latest",
docker_setup_bash_script=(
"apt-get update && apt-get install -y --no-install-recommends "
"libosmesa6 libgl1-mesa-glx libglfw3 && rm -rf /var/lib/apt/lists/* "
"&& pip install 'jax[cuda12]' mujoco-mjx"
"libosmesa6-dev libgl1-mesa-glx libglfw3 && rm -rf /var/lib/apt/lists/* "
"&& pip install 'jax[cuda12]' mujoco-mjx PyOpenGL PyOpenGL-accelerate"
),
docker_arguments=[
"-e", "MUJOCO_GL=osmesa",
],
)
req_file = pathlib.Path(hydra_utils.get_original_cwd()) / "requirements.txt"