import abc import dataclasses from typing import TypeVar, Generic, Any from gymnasium import spaces import torch import pathlib T = TypeVar("T") @dataclasses.dataclass class ActuatorConfig: """Actuator definition — maps a joint to a motor with gear ratio and control limits. Kept in the env config (not runner config) because actuators define what the robot can do, which determines action space — a task-level concept. This mirrors Isaac Lab's pattern of separating actuator config from the robot file.""" joint: str = "" gear: float = 1.0 ctrl_range: tuple[float, float] = (-1.0, 1.0) damping: float = 0.05 # joint damping — limits max speed: vel_max ≈ torque / damping @dataclasses.dataclass class BaseEnvConfig: max_steps: int = 1000 model_path: pathlib.Path | None = None actuators: list[ActuatorConfig] = dataclasses.field(default_factory=list) class BaseEnv(abc.ABC, Generic[T]): def __init__(self, config: BaseEnvConfig): self.config = config @property @abc.abstractmethod def observation_space(self) -> spaces.Space: ... @property @abc.abstractmethod def action_space(self) -> spaces.Space: ... @abc.abstractmethod def build_state(self, qpos: torch.Tensor, qvel: torch.Tensor) -> Any: ... @abc.abstractmethod def compute_observations(self, state: Any) -> torch.Tensor: ... @abc.abstractmethod def compute_rewards(self, state: Any, actions: torch.Tensor) -> torch.Tensor: ... @abc.abstractmethod def compute_terminations(self, state: Any) -> torch.Tensor: ... def compute_truncations(self, step_counts: torch.Tensor) -> torch.Tensor: return step_counts >= self.config.max_steps def get_default_qpos(self, nq: int) -> list[float] | None: """Return the default joint positions for reset. Override in subclass if the URDF zero pose doesn't match the desired initial state (e.g. pendulum hanging down). Returns None to use the URDF default (all zeros).""" return None