♻️ full agent refactor

2026-06-10 21:15:34 +02:00
parent a98e86ef66
commit 1e0836e1bc
49 changed files with 1309 additions and 829 deletions
--- a/configs/runner/mjx.yaml
+++ b/configs/runner/mjx.yaml
@@ -2,9 +2,7 @@ num_envs: 1024       # MJX shines with many parallel envs
 device: auto         # auto = cuda if available, else cpu
 dt: 0.002
 substeps: 10
-history_length: 10   # RMA-style: 10-step window of (obs, action) pairs
-
-rma_mode: "none"    # "none" | "teacher" | "deploy"
+history_length: 10   # (obs, action) window for implicit adaptation

 # ── Domain randomization (sim-to-real) ──────────────────────────────
 # Full DR on GPU: latency + sensor noise + per-env dynamics scales
--- a/configs/runner/mujoco.yaml
+++ b/configs/runner/mujoco.yaml
@@ -2,9 +2,7 @@ num_envs: 64
 device: auto  # auto = cuda if available, else cpu
 dt: 0.002
 substeps: 10
-history_length: 10   # must match training.history_length (DR + embedding)
-
-rma_mode: "none"     # "none" | "teacher" | "deploy"
+history_length: 10   # (obs, action) window for implicit adaptation

 # ── Domain randomization (sim-to-real) ──────────────────────────────
 # Noise/delay levels anchored to the real recordings (~50 Hz, ~0.5 rad/s
--- a/configs/runner/mujoco_single.yaml
+++ b/configs/runner/mujoco_single.yaml
@@ -7,8 +7,6 @@ dt: 0.002
 substeps: 10
 history_length: 10

-rma_mode: "none"    # "none" | "teacher" | "deploy"
-
 # Clean by default (deterministic eval).  Confirming-experiment example —
 # re-eval an existing checkpoint in sim with a fixed 1-step action delay:
 #   mjpython scripts/eval.py env=rotary_cartpole runner=mujoco_single \
--- a/configs/runner/serial.yaml
+++ b/configs/runner/serial.yaml
@@ -9,5 +9,3 @@ baud: 115200
 dt: 0.02                    # control loop period (50 Hz, matches training)
 no_data_timeout: 2.0        # seconds of silence before declaring disconnect
 history_length: 10           # must match training runner
-
-rma_mode: "none"    # "none" | "teacher" | "deploy"