♻️ full agent refactor
This commit is contained in:
@@ -2,9 +2,7 @@ num_envs: 1024 # MJX shines with many parallel envs
|
||||
device: auto # auto = cuda if available, else cpu
|
||||
dt: 0.002
|
||||
substeps: 10
|
||||
history_length: 10 # RMA-style: 10-step window of (obs, action) pairs
|
||||
|
||||
rma_mode: "none" # "none" | "teacher" | "deploy"
|
||||
history_length: 10 # (obs, action) window for implicit adaptation
|
||||
|
||||
# ── Domain randomization (sim-to-real) ──────────────────────────────
|
||||
# Full DR on GPU: latency + sensor noise + per-env dynamics scales
|
||||
|
||||
@@ -2,9 +2,7 @@ num_envs: 64
|
||||
device: auto # auto = cuda if available, else cpu
|
||||
dt: 0.002
|
||||
substeps: 10
|
||||
history_length: 10 # must match training.history_length (DR + embedding)
|
||||
|
||||
rma_mode: "none" # "none" | "teacher" | "deploy"
|
||||
history_length: 10 # (obs, action) window for implicit adaptation
|
||||
|
||||
# ── Domain randomization (sim-to-real) ──────────────────────────────
|
||||
# Noise/delay levels anchored to the real recordings (~50 Hz, ~0.5 rad/s
|
||||
|
||||
@@ -7,8 +7,6 @@ dt: 0.002
|
||||
substeps: 10
|
||||
history_length: 10
|
||||
|
||||
rma_mode: "none" # "none" | "teacher" | "deploy"
|
||||
|
||||
# Clean by default (deterministic eval). Confirming-experiment example —
|
||||
# re-eval an existing checkpoint in sim with a fixed 1-step action delay:
|
||||
# mjpython scripts/eval.py env=rotary_cartpole runner=mujoco_single \
|
||||
|
||||
@@ -9,5 +9,3 @@ baud: 115200
|
||||
dt: 0.02 # control loop period (50 Hz, matches training)
|
||||
no_data_timeout: 2.0 # seconds of silence before declaring disconnect
|
||||
history_length: 10 # must match training runner
|
||||
|
||||
rma_mode: "none" # "none" | "teacher" | "deploy"
|
||||
|
||||
Reference in New Issue
Block a user