From ca120e57153ccdea0a9680a7db95aeda54b1a732 Mon Sep 17 00:00:00 2001 From: Victor Mylle Date: Mon, 26 Feb 2024 18:20:53 +0100 Subject: [PATCH] Finished baseline policy evaluator --- src/policies/PolicyEvaluator.py | 20 ++- .../baselines/BaselinePolicyEvaluator.py | 151 ++++++++++++++++-- .../baselines/global_threshold_baseline.py | 9 +- 3 files changed, 158 insertions(+), 22 deletions(-) diff --git a/src/policies/PolicyEvaluator.py b/src/policies/PolicyEvaluator.py index 1a3106a..71ec211 100644 --- a/src/policies/PolicyEvaluator.py +++ b/src/policies/PolicyEvaluator.py @@ -121,9 +121,6 @@ class PolicyEvaluator: ], ) - print("Profits calculated") - print(self.profits.head()) - def plot_profits_table(self): # Check if task or penalties are not set if ( @@ -157,7 +154,11 @@ class PolicyEvaluator: ) # Rename columns to match expected output - final_df.columns = ["Penalty", "Total Profit", "Total Charge Cycles"] + final_df.columns = [ + "Penalty", + "Total Profit (per year)", + "Total Charge Cycles (per year)", + ] # Profits till 400 profits_till_400 = self.get_profits_till_400() @@ -167,7 +168,7 @@ class PolicyEvaluator: # Log the final results table self.task.get_logger().report_table( - "Policy Results", "Policy Results", iteration=0, table_plot=final_df + "Test Set Results", "Profits per Penalty", iteration=0, table_plot=final_df ) def plot_thresholds_per_day(self): @@ -213,16 +214,19 @@ class PolicyEvaluator: final_df.columns = ["Penalty", "Total Profit", "Total Charge Cycles"] return final_df - def get_profits_till_400(self): + def get_profits_till_400(self, profits: pd.DataFrame = None): + if profits is None: + profits = self.profits + # calculates profits until 400 charge cycles per year are reached - number_of_days = len(self.profits["Date"].unique()) + number_of_days = len(profits["Date"].unique()) usable_charge_cycles = (400 / 365) * number_of_days # now sum the profit until the usable charge cycles are reached penalty_profits = {} penalty_charge_cycles = {} - for index, row in self.profits.iterrows(): + for index, row in profits.iterrows(): penalty = row["Penalty"] profit = row["Profit"] charge_cycles = row["Charge Cycles"] diff --git a/src/policies/baselines/BaselinePolicyEvaluator.py b/src/policies/baselines/BaselinePolicyEvaluator.py index 88770ee..ac2e5f6 100644 --- a/src/policies/baselines/BaselinePolicyEvaluator.py +++ b/src/policies/baselines/BaselinePolicyEvaluator.py @@ -9,11 +9,9 @@ import torch class BaselinePolicyEvaluator(PolicyEvaluator): def __init__(self, baseline_policy: BaselinePolicy, task: Task = None): - super(baseline_policy, task) - self.dates = baseline_policy.train_data["DateTime"].dt.date.unique() - self.dates = pd.to_datetime(self.dates) - self.penalties = [0, 100, 300, 500, 800, 1000, 1500] - self.profits = [] + super(BaselinePolicyEvaluator, self).__init__(baseline_policy, task) + + self.train_profits = [] def determine_thresholds_for_date(self, date): charge_thresholds = np.arange(-100, 250, 25) @@ -31,8 +29,8 @@ class BaselinePolicyEvaluator(PolicyEvaluator): ) ) - best_charge_threshold = found_charge_thresholds.item() - best_discharge_threshold = found_discharge_thresholds.item() + best_charge_threshold = found_charge_thresholds + best_discharge_threshold = found_discharge_thresholds simulated_profit, simulated_charge_cycles = self.baseline_policy.simulate( torch.tensor([[real_imbalance_prices]]), @@ -40,7 +38,7 @@ class BaselinePolicyEvaluator(PolicyEvaluator): torch.tensor([best_discharge_threshold]), ) - self.profits.append( + self.train_profits.append( [ date, penalty, @@ -52,16 +50,18 @@ class BaselinePolicyEvaluator(PolicyEvaluator): ) def determine_best_thresholds(self): - self.profits = [] + self.train_profits = [] + dates = self.baseline_policy.train_data["DateTime"].dt.date.unique() + dates = pd.to_datetime(dates) try: - for date in tqdm(self.dates): + for date in tqdm(dates): self.determine_thresholds_for_date(date) except Exception as e: print(e) pass - self.profits = pd.DataFrame( - self.profits, + self.train_profits = pd.DataFrame( + self.train_profits, columns=[ "Date", "Penalty", @@ -71,3 +71,130 @@ class BaselinePolicyEvaluator(PolicyEvaluator): "Discharge Threshold", ], ) + + number_of_days = len(self.train_profits["Date"].unique()) + usable_charge_cycles = (400 / 365) * number_of_days + + intermediate_values = {penalty: {} for penalty in self.penalties} + + # find the best threshold combination for each penalty based on the total profit on the data + for penalty in self.penalties: + profits_for_penalty = self.train_profits[ + self.train_profits["Penalty"] == penalty + ] + + for index, row in profits_for_penalty.iterrows(): + charge_threshold = row["Charge Threshold"] + discharge_threshold = row["Discharge Threshold"] + + if (charge_threshold, discharge_threshold) not in intermediate_values[ + penalty + ]: + intermediate_values[penalty][ + (charge_threshold, discharge_threshold) + ] = (0, 0) + + new_charge_cycles = ( + intermediate_values[penalty][ + (charge_threshold, discharge_threshold) + ][1] + + row["Charge Cycles"] + ) + new_profit = ( + intermediate_values[penalty][ + (charge_threshold, discharge_threshold) + ][0] + + row["Profit"] + ) + + if new_charge_cycles <= usable_charge_cycles: + intermediate_values[penalty][ + (charge_threshold, discharge_threshold) + ] = (new_profit, new_charge_cycles) + + best_thresholds = {penalty: [0, 0, 0, 0] for penalty in self.penalties} + + for penalty in self.penalties: + best_profit = 0 + for threshold, values in intermediate_values[penalty].items(): + if values[0] > best_profit: + best_profit = values[0] + best_thresholds[penalty][0] = threshold[0] + best_thresholds[penalty][1] = threshold[1] + best_thresholds[penalty][2] = best_profit + best_thresholds[penalty][3] = values[1] + + # create dataframe from best_thresholds with columns, Penalty, Charge Threshold, Discharge Threshold, Profit + data = [ + (penalty, values[0], values[1], values[2], values[3]) + for penalty, values in best_thresholds.items() + ] + + best_thresholds_df = pd.DataFrame( + data, + columns=[ + "Penalty", + "Charge Threshold", + "Discharge Threshold", + "Profit (training data)", + f"Charge Cycles (training data: max {usable_charge_cycles})", + ], + ) + + if self.task: + self.task.get_logger().report_table( + "Baseline Train Data", + "Best Thresholds for each Penalty on Training Data (up to 400 cycles / year)", + iteration=0, + table_plot=best_thresholds_df, + ) + + return best_thresholds + + def evaluate_test_set(self, thresholds: dict): + """Evaluate the test set using the given thresholds (multiple penalties) + + Args: + thresholds (dict): Dictionary with penalties as keys and the corresponding thresholds tuple as values + """ + self.profits = [] + try: + for date in tqdm(self.dates): + real_imbalance_prices = self.get_imbanlance_prices_for_date(date.date()) + for penalty in thresholds.keys(): + charge_threshold = thresholds[penalty][0] + discharge_threshold = thresholds[penalty][1] + + simulated_profit, simulated_charge_cycles = ( + self.baseline_policy.simulate( + torch.tensor([[real_imbalance_prices]]), + torch.tensor([charge_threshold]), + torch.tensor([discharge_threshold]), + ) + ) + + self.profits.append( + [ + date, + penalty, + simulated_profit[0][0].item(), + simulated_charge_cycles[0][0].item(), + charge_threshold, + discharge_threshold, + ] + ) + + self.profits = pd.DataFrame( + self.profits, + columns=[ + "Date", + "Penalty", + "Profit", + "Charge Cycles", + "Charge Threshold", + "Discharge Threshold", + ], + ) + except Exception as e: + print(e) + pass diff --git a/src/policies/baselines/global_threshold_baseline.py b/src/policies/baselines/global_threshold_baseline.py index f2b75e4..b68df6a 100644 --- a/src/policies/baselines/global_threshold_baseline.py +++ b/src/policies/baselines/global_threshold_baseline.py @@ -2,7 +2,7 @@ from src.utils.clearml import ClearMLHelper #### ClearML #### clearml_helper = ClearMLHelper(project_name="Thesis/NrvForecast") -task = clearml_helper.get_task(task_name="Global Thresholds Baselien") +task = clearml_helper.get_task(task_name="Global Thresholds Baseline") task.execute_remotely(queue_name="default", exit_process=True) from src.policies.baselines.BaselinePolicyEvaluator import BaselinePolicyEvaluator @@ -13,4 +13,9 @@ battery = Battery(2, 1) baseline_policy = BaselinePolicy(battery, data_path="") policy_evaluator = BaselinePolicyEvaluator(baseline_policy, task) -policy_evaluator.determine_best_thresholds() +thresholds = policy_evaluator.determine_best_thresholds() +policy_evaluator.evaluate_test_set(thresholds) + +policy_evaluator.plot_profits_table() + +task.close()