RL-Sim-Framework/src/core/env.py

import abc
import dataclasses
from typing import TypeVar, Generic, Any
from gymnasium import spaces
import torch
import pathlib

T = TypeVar("T")


@dataclasses.dataclass
class ActuatorConfig:
    """Actuator definition — maps a joint to a motor with gear ratio and control limits.
    Kept in the env config (not runner config) because actuators define what the robot
    can do, which determines action space — a task-level concept.
    This mirrors Isaac Lab's pattern of separating actuator config from the robot file."""
    joint: str = ""
    gear: float = 1.0
    ctrl_range: tuple[float, float] = (-1.0, 1.0)
    damping: float = 0.05  # joint damping — limits max speed: vel_max ≈ torque / damping


@dataclasses.dataclass
class BaseEnvConfig:
    max_steps: int = 1000
    model_path: pathlib.Path | None = None
    actuators: list[ActuatorConfig] = dataclasses.field(default_factory=list)

class BaseEnv(abc.ABC, Generic[T]):
    def __init__(self, config: BaseEnvConfig):
        self.config = config

    @property
    @abc.abstractmethod
    def observation_space(self) -> spaces.Space:
        ...

    @property
    @abc.abstractmethod
    def action_space(self) -> spaces.Space:
        ...

    @abc.abstractmethod
    def build_state(self, qpos: torch.Tensor, qvel: torch.Tensor) -> Any:
        ...

    @abc.abstractmethod
    def compute_observations(self, state: Any) -> torch.Tensor:
        ...

    @abc.abstractmethod
    def compute_rewards(self, state: Any, actions: torch.Tensor) -> torch.Tensor:
        ...

    @abc.abstractmethod
    def compute_terminations(self, state: Any) -> torch.Tensor:
        ...

    def compute_truncations(self, step_counts: torch.Tensor) -> torch.Tensor:
        return step_counts >= self.config.max_steps

    def get_default_qpos(self, nq: int) -> list[float] | None:
        """Return the default joint positions for reset.
        Override in subclass if the URDF zero pose doesn't match
        the desired initial state (e.g. pendulum hanging down).
        Returns None to use the URDF default (all zeros)."""
        return None