num_envs: 1024       # MJX shines with many parallel envs
device: auto         # auto = cuda if available, else cpu
dt: 0.002
substeps: 10
history_length: 10   # RMA-style: 10-step window of (obs, action) pairs

rma_mode: "none"    # "none" | "teacher" | "deploy"

# ── Domain randomization (sim-to-real) ──────────────────────────────
# NOTE: action-delay and sensor-noise are applied for MJX, but the
# per-env dynamics *scales* (friction/damping/torque) are NOT yet wired
# into the JIT step — use runner=mujoco for scale randomization, or keep
# this block to delay+noise only on MJX.
domain_rand:
  qpos_noise_std: 0.01         # rad   — encoder angle noise
  qvel_noise_std: 0.5          # rad/s — velocity-estimate noise (measured)
  action_delay_steps: [0, 2]   # control-step latency (0–40 ms)