hidden_sizes: [128, 128] total_timesteps: 5000000 rollout_steps: 1024 learning_epochs: 4 mini_batches: 4 discount_factor: 0.99 gae_lambda: 0.95 learning_rate: 0.0003 clip_ratio: 0.2 value_loss_scale: 0.5 entropy_loss_scale: 0.05 log_interval: 1000 checkpoint_interval: 50000 # ClearML remote execution (GPU worker) remote: false