✨ initial commit

2026-03-06 22:19:44 +01:00
commit c8f28ffbcc
17 changed files with 811 additions and 0 deletions
--- a/configs/config.yaml
+++ b/configs/config.yaml
@@ -0,0 +1,5 @@
+defaults:
+  - env: cartpole
+  - runner: mujoco
+  - training: ppo
+  - _self_
--- a/configs/env/cartpole.yaml
+++ b/configs/env/cartpole.yaml
@@ -0,0 +1,11 @@
+max_steps: 500
+angle_threshold: 0.418
+cart_limit: 2.4
+reward_alive: 1.0
+reward_pole_upright_scale: 1.0
+reward_action_penalty_scale: 0.01
+model_path: assets/cartpole/cartpole.urdf
+actuators:
+  - joint: cart_joint
+    gear: 10.0
+    ctrl_range: [-1.0, 1.0]
--- a/configs/runner/mujoco.yaml
+++ b/configs/runner/mujoco.yaml
@@ -0,0 +1,4 @@
+num_envs: 16
+device: cpu
+dt: 0.02
+substeps: 2
--- a/configs/training/ppo.yaml
+++ b/configs/training/ppo.yaml
@@ -0,0 +1,13 @@
+hidden_sizes: [128, 128]
+total_timesteps: 1000000
+rollout_steps: 1024
+learning_epochs: 4
+mini_batches: 4
+discount_factor: 0.99
+gae_lambda: 0.95
+learning_rate: 0.0003
+clip_ratio: 0.2
+value_loss_scale: 0.5
+entropy_loss_scale: 0.01
+log_interval: 10
+clearml_project: RL-Framework