Files
RL-Sim-Framework/src/core/env.py
2026-03-09 20:39:02 +01:00

68 lines
2.1 KiB
Python

import abc
import dataclasses
from typing import TypeVar, Generic, Any
from gymnasium import spaces
import torch
import pathlib
T = TypeVar("T")
@dataclasses.dataclass
class ActuatorConfig:
"""Actuator definition — maps a joint to a motor with gear ratio and control limits.
Kept in the env config (not runner config) because actuators define what the robot
can do, which determines action space — a task-level concept.
This mirrors Isaac Lab's pattern of separating actuator config from the robot file."""
joint: str = ""
gear: float = 1.0
ctrl_range: tuple[float, float] = (-1.0, 1.0)
damping: float = 0.05 # joint damping — limits max speed: vel_max ≈ torque / damping
@dataclasses.dataclass
class BaseEnvConfig:
max_steps: int = 1000
model_path: pathlib.Path | None = None
actuators: list[ActuatorConfig] = dataclasses.field(default_factory=list)
class BaseEnv(abc.ABC, Generic[T]):
def __init__(self, config: BaseEnvConfig):
self.config = config
@property
@abc.abstractmethod
def observation_space(self) -> spaces.Space:
...
@property
@abc.abstractmethod
def action_space(self) -> spaces.Space:
...
@abc.abstractmethod
def build_state(self, qpos: torch.Tensor, qvel: torch.Tensor) -> Any:
...
@abc.abstractmethod
def compute_observations(self, state: Any) -> torch.Tensor:
...
@abc.abstractmethod
def compute_rewards(self, state: Any, actions: torch.Tensor) -> torch.Tensor:
...
@abc.abstractmethod
def compute_terminations(self, state: Any) -> torch.Tensor:
...
def compute_truncations(self, step_counts: torch.Tensor) -> torch.Tensor:
return step_counts >= self.config.max_steps
def get_default_qpos(self, nq: int) -> list[float] | None:
"""Return the default joint positions for reset.
Override in subclass if the URDF zero pose doesn't match
the desired initial state (e.g. pendulum hanging down).
Returns None to use the URDF default (all zeros)."""
return None