♻️ full agent refactor

This commit is contained in:
2026-06-10 21:15:34 +02:00
parent a98e86ef66
commit 1e0836e1bc
49 changed files with 1309 additions and 829 deletions

View File

@@ -2,9 +2,7 @@ num_envs: 1024 # MJX shines with many parallel envs
device: auto # auto = cuda if available, else cpu
dt: 0.002
substeps: 10
history_length: 10 # RMA-style: 10-step window of (obs, action) pairs
rma_mode: "none" # "none" | "teacher" | "deploy"
history_length: 10 # (obs, action) window for implicit adaptation
# ── Domain randomization (sim-to-real) ──────────────────────────────
# Full DR on GPU: latency + sensor noise + per-env dynamics scales

View File

@@ -2,9 +2,7 @@ num_envs: 64
device: auto # auto = cuda if available, else cpu
dt: 0.002
substeps: 10
history_length: 10 # must match training.history_length (DR + embedding)
rma_mode: "none" # "none" | "teacher" | "deploy"
history_length: 10 # (obs, action) window for implicit adaptation
# ── Domain randomization (sim-to-real) ──────────────────────────────
# Noise/delay levels anchored to the real recordings (~50 Hz, ~0.5 rad/s

View File

@@ -7,8 +7,6 @@ dt: 0.002
substeps: 10
history_length: 10
rma_mode: "none" # "none" | "teacher" | "deploy"
# Clean by default (deterministic eval). Confirming-experiment example —
# re-eval an existing checkpoint in sim with a fixed 1-step action delay:
# mjpython scripts/eval.py env=rotary_cartpole runner=mujoco_single \

View File

@@ -9,5 +9,3 @@ baud: 115200
dt: 0.02 # control loop period (50 Hz, matches training)
no_data_timeout: 2.0 # seconds of silence before declaring disconnect
history_length: 10 # must match training runner
rma_mode: "none" # "none" | "teacher" | "deploy"