diff --git a/src/policies/PolicyEvaluator.py b/src/policies/PolicyEvaluator.py index 80691c2..2fef398 100644 --- a/src/policies/PolicyEvaluator.py +++ b/src/policies/PolicyEvaluator.py @@ -92,12 +92,16 @@ class PolicyEvaluator: test_loader, initial_penalty, target_charge_cycles, - learning_rate=2, + initial_learning_rate=2, max_iterations=10, tolerance=10, + learning_rate_decay=0.9, # Factor to reduce the learning rate after each iteration ): self.cache = {} penalty = initial_penalty + learning_rate = initial_learning_rate + previous_gradient = None # Track the previous gradient to adjust learning rate based on progress + for iteration in range(max_iterations): # Calculate profit and charge cycles for the current penalty simulated_profit, simulated_charge_cycles = ( @@ -105,19 +109,29 @@ class PolicyEvaluator: ) print( - f"Penalty: {penalty}, Charge Cycles: {simulated_charge_cycles}, Profit: {simulated_profit}" + f"Iteration {iteration}: Penalty: {penalty}, Charge Cycles: {simulated_charge_cycles}, Profit: {simulated_profit}, Learning Rate: {learning_rate}" ) # Calculate the gradient (difference) between the simulated and target charge cycles gradient = simulated_charge_cycles - target_charge_cycles + # Optionally, adjust learning rate based on the change of gradient direction to avoid oscillation + if previous_gradient is not None and gradient * previous_gradient < 0: + learning_rate *= learning_rate_decay + # Update the penalty parameter in the direction of the gradient - penalty += learning_rate * gradient + penalty += ( + learning_rate * gradient + ) # Note: Using -= to move penalty in the opposite direction of gradient if necessary + + # Update the previous gradient + previous_gradient = gradient # Check if the charge cycles are close enough to the target if abs(gradient) < tolerance: print(f"Optimal penalty found after {iteration+1} iterations") break + else: print( f"Reached max iterations ({max_iterations}) without converging to the target charge cycles" diff --git a/src/policies/baselines/PerfectBaseline.py b/src/policies/baselines/PerfectBaseline.py new file mode 100644 index 0000000..b2b928b --- /dev/null +++ b/src/policies/baselines/PerfectBaseline.py @@ -0,0 +1,48 @@ +from clearml import Task +from policies.simple_baseline import BaselinePolicy +from src.policies.baselines.YesterdayBaselinePolicyExecutor import ( + YesterdayBaselinePolicyEvaluator, +) +import torch +import numpy as np + + +class PerfectBaseline(YesterdayBaselinePolicyEvaluator): + def __init__(self, baseline_policy: BaselinePolicy, task: Task = None): + super().__init__(baseline_policy, task) + + def evaluate_for_date( + self, + date, + charge_thresholds=np.arange(-100, 250, 25), + discharge_thresholds=np.arange(-100, 250, 25), + penalty: int = 0, + current_state_of_charge=0.0, + ): + + real_imbalance_prices = self.get_imbanlance_prices_for_date(date.date()) + + best_charge_thresholds, best_discharge_thresholds = ( + self.baseline_policy.get_optimal_thresholds( + real_imbalance_prices, + charge_thresholds, + discharge_thresholds, + penalty, + battery_state_of_charge=current_state_of_charge, + ) + ) + + best_profit, best_charge_cycles, new_state_of_charge = ( + self.baseline_policy.simulate( + torch.tensor([[real_imbalance_prices]]), + torch.tensor([best_charge_thresholds.mean(axis=0)]), + torch.tensor([best_discharge_thresholds.mean(axis=0)]), + battery_state_of_charge=torch.tensor([current_state_of_charge]), + ) + ) + + return ( + best_profit[0][0].item(), + best_charge_cycles[0][0].item(), + new_state_of_charge.squeeze(0).item(), + ) diff --git a/src/policies/baselines/perfect_baseline.py b/src/policies/baselines/perfect_baseline.py new file mode 100644 index 0000000..fee6ccb --- /dev/null +++ b/src/policies/baselines/perfect_baseline.py @@ -0,0 +1,54 @@ +from src.utils.clearml import ClearMLHelper + +#### ClearML #### +clearml_helper = ClearMLHelper(project_name="Thesis/NrvForecast") +task = clearml_helper.get_task(task_name="Perfect Baseline") +task.execute_remotely(queue_name="default", exit_process=True) + +from src.policies.simple_baseline import BaselinePolicy, Battery +from src.data import DataProcessor, DataConfig +from policies.baselines.PerfectBaseline import PerfectBaseline + +### Data Processor ### +data_config = DataConfig() +data_config.NRV_HISTORY = True +data_config.LOAD_HISTORY = True +data_config.LOAD_FORECAST = True + +data_config.WIND_FORECAST = True +data_config.WIND_HISTORY = True + +data_config.QUARTER = False +data_config.DAY_OF_WEEK = False + +data_config.NOMINAL_NET_POSITION = True + +data_processor = DataProcessor(data_config, path="", lstm=False) +data_processor.set_batch_size(64) +data_processor.set_full_day_skip(True) + +### Policy Evaluator ### +battery = Battery(2, 1) +baseline_policy = BaselinePolicy(battery, data_path="") +policy_evaluator = PerfectBaseline(baseline_policy, task) + +penalty, profit, charge_cycles = ( + policy_evaluator.optimize_penalty_for_target_charge_cycles( + data_processor=data_processor, + initial_penalty=0, + target_charge_cycles=283, + learning_rate=2, + max_iterations=100, + tolerance=1, + ) +) +# policy_evaluator.plot_profits_table() +print() +print("Test Set Results") +print(f"Penalty: {penalty}, Profit: {profit}, Charge Cycles: {charge_cycles}") + +task.get_logger().report_single_value(name="Optimal Penalty", value=penalty) +task.get_logger().report_single_value(name="Optimal Profit", value=profit) +task.get_logger().report_single_value(name="Optimal Charge Cycles", value=charge_cycles) + +task.close()