feat: RMA-style history-conditioned policy for sim2real adaptation
Added a temporal observation history buffer and 1D-CNN encoder so the
policy can implicitly infer environment parameters (mass, friction,
gear ratios, etc.) from recent (obs, action) dynamics.
Architecture:
history window [(obs₀,a₀), ..., (obs_{H-1},a_{H-1})]
→ 1D-CNN HistoryEncoder → embedding (32-dim)
→ concat [current_obs, embedding] → MLP → action
Components:
- BaseRunner: history ring buffer, _push_history/_reset_history,
augmented obs space (6 + H×7 = 76 with H=10)
- HistoryEncoder (src/models/mlp.py): 2-layer temporal Conv1d + GAP
- SharedMLP: optional history_length/raw_obs_dim/embedding_dim params;
splits augmented obs, encodes history, feeds [obs, emb] to MLP
- TrainerConfig: history_length, embedding_dim fields
- All runner configs: history_length=10 by default
- Tests: encoder shape, model with/without history, config defaults
This commit is contained in:
@@ -1,11 +1,14 @@
|
||||
"""Unit tests for MuJoCoRunner domain randomization."""
|
||||
"""Unit tests for MuJoCoRunner domain randomization and history buffer."""
|
||||
|
||||
import dataclasses
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
import torch
|
||||
from gymnasium import spaces
|
||||
|
||||
from src.runners.mujoco import DomainRandConfig, MuJoCoRunnerConfig
|
||||
from src.models.mlp import SharedMLP, HistoryEncoder
|
||||
|
||||
|
||||
class TestDomainRandConfig:
|
||||
@@ -37,3 +40,55 @@ class TestMuJoCoRunnerConfig:
|
||||
assert isinstance(cfg.domain_rand, DomainRandConfig)
|
||||
assert cfg.domain_rand.mass_frac == 0.2
|
||||
assert cfg.domain_rand.friction_frac == 0.3
|
||||
|
||||
def test_history_length_default(self) -> None:
|
||||
cfg = MuJoCoRunnerConfig()
|
||||
assert cfg.history_length == 0
|
||||
|
||||
|
||||
class TestHistoryEncoder:
|
||||
def test_output_shape(self) -> None:
|
||||
enc = HistoryEncoder(history_length=10, step_dim=7, embedding_dim=32)
|
||||
x = torch.randn(4, 10, 7) # batch=4, H=10, step_dim=7
|
||||
out = enc(x)
|
||||
assert out.shape == (4, 32)
|
||||
|
||||
def test_different_embedding_dim(self) -> None:
|
||||
enc = HistoryEncoder(history_length=5, step_dim=7, embedding_dim=16)
|
||||
x = torch.randn(2, 5, 7)
|
||||
out = enc(x)
|
||||
assert out.shape == (2, 16)
|
||||
|
||||
|
||||
class TestSharedMLPWithHistory:
|
||||
def test_no_history(self) -> None:
|
||||
"""Without history, model works as before."""
|
||||
obs_space = spaces.Box(low=-1.0, high=1.0, shape=(6,))
|
||||
act_space = spaces.Box(low=-1.0, high=1.0, shape=(1,))
|
||||
model = SharedMLP(obs_space, act_space, torch.device("cpu"),
|
||||
hidden_sizes=(32, 32))
|
||||
assert model.history_encoder is None
|
||||
inp = {"states": torch.randn(4, 6)}
|
||||
mean, log_std, _ = model.compute(inp, role="policy")
|
||||
assert mean.shape == (4, 1)
|
||||
|
||||
def test_with_history(self) -> None:
|
||||
"""With history, model splits obs and encodes history."""
|
||||
raw_obs_dim = 6
|
||||
act_dim = 1
|
||||
H = 10
|
||||
step_dim = raw_obs_dim + act_dim # 7
|
||||
aug_dim = raw_obs_dim + H * step_dim # 6 + 70 = 76
|
||||
|
||||
obs_space = spaces.Box(low=-1.0, high=1.0, shape=(aug_dim,))
|
||||
act_space = spaces.Box(low=-1.0, high=1.0, shape=(act_dim,))
|
||||
model = SharedMLP(obs_space, act_space, torch.device("cpu"),
|
||||
hidden_sizes=(32, 32),
|
||||
history_length=H, raw_obs_dim=raw_obs_dim,
|
||||
embedding_dim=32)
|
||||
assert model.history_encoder is not None
|
||||
inp = {"states": torch.randn(4, aug_dim)}
|
||||
mean, log_std, _ = model.compute(inp, role="policy")
|
||||
assert mean.shape == (4, act_dim)
|
||||
value, _ = model.compute(inp, role="value")
|
||||
assert value.shape == (4, 1)
|
||||
|
||||
Reference in New Issue
Block a user