♻️ cleanup
This commit is contained in:
@@ -170,9 +170,12 @@ def _create_base_task(
|
||||
"registry.kube.optimize/worker-image:latest",
|
||||
docker_setup_bash_script=(
|
||||
"apt-get update && apt-get install -y --no-install-recommends "
|
||||
"libosmesa6 libgl1-mesa-glx libglfw3 && rm -rf /var/lib/apt/lists/* "
|
||||
"&& pip install 'jax[cuda12]' mujoco-mjx"
|
||||
"libosmesa6-dev libgl1-mesa-glx libglfw3 && rm -rf /var/lib/apt/lists/* "
|
||||
"&& pip install 'jax[cuda12]' mujoco-mjx PyOpenGL PyOpenGL-accelerate"
|
||||
),
|
||||
docker_arguments=[
|
||||
"-e", "MUJOCO_GL=osmesa",
|
||||
],
|
||||
)
|
||||
|
||||
req_file = Path(__file__).resolve().parent.parent / "requirements.txt"
|
||||
@@ -214,6 +217,10 @@ def main() -> None:
|
||||
help="Maximum budget (total_timesteps) for promoted trials",
|
||||
)
|
||||
parser.add_argument("--eta", type=int, default=3, help="Successive halving reduction factor")
|
||||
parser.add_argument(
|
||||
"--max-consecutive-failures", type=int, default=3,
|
||||
help="Abort HPO after N consecutive trial failures (0 = never abort)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--time-limit-hours", type=float, default=72,
|
||||
help="Total wall-clock time limit in hours",
|
||||
@@ -312,6 +319,7 @@ def main() -> None:
|
||||
time_limit_per_job=240, # 4 hours per trial max
|
||||
eta=args.eta,
|
||||
budget_param_name="Hydra/training.total_timesteps",
|
||||
max_consecutive_failures=args.max_consecutive_failures,
|
||||
)
|
||||
|
||||
# Send this HPO controller to a remote services worker
|
||||
|
||||
Reference in New Issue
Block a user