# Single-env MuJoCo runner — mimics real hardware timing. # dt × substeps = 0.002 × 10 = 0.02 s → 50 Hz control, same as serial runner. num_envs: 1 device: cpu dt: 0.002 substeps: 10 history_length: 10 rma_mode: "none" # "none" | "teacher" | "deploy" # Clean by default (deterministic eval). Confirming-experiment example — # re-eval an existing checkpoint in sim with a fixed 1-step action delay: # mjpython scripts/eval.py env=rotary_cartpole runner=mujoco_single \ # checkpoint=runs/.../agent_XXXX.pt \ # '++runner.domain_rand.action_delay_steps=[1,1]' domain_rand: {}