From 1a8e735cbc8976966d9e18fb02928699470a0220 Mon Sep 17 00:00:00 2001 From: Victor Mylle Date: Mon, 18 Mar 2024 12:15:06 +0100 Subject: [PATCH] Updated training scripts --- src/policies/PolicyEvaluator.py | 202 +++++++++++---- .../baselines/BaselinePolicyEvaluator.py | 229 +++++++----------- .../YesterdayBaselinePolicyExecutor.py | 2 +- .../baselines/global_threshold_baseline.py | 15 +- src/trainers/autoregressive_trainer.py | 23 +- src/trainers/diffusion_trainer.py | 106 ++++++-- src/trainers/quantile_trainer.py | 137 ++++++----- src/trainers/trainer.py | 3 +- .../autoregressive_quantiles.py | 53 ++-- src/training_scripts/diffusion_training.py | 25 +- 10 files changed, 487 insertions(+), 308 deletions(-) diff --git a/src/policies/PolicyEvaluator.py b/src/policies/PolicyEvaluator.py index 38d09c2..ba72be9 100644 --- a/src/policies/PolicyEvaluator.py +++ b/src/policies/PolicyEvaluator.py @@ -5,6 +5,7 @@ import pandas as pd import numpy as np import torch import plotly.express as px +from functools import lru_cache from src.utils.imbalance_price_calculator import ImbalancePriceCalculator @@ -24,11 +25,14 @@ class PolicyEvaluator: ) self.imbalance_prices = imbalance_prices.sort_values(by=["DateTime"]) - self.penalties = [0, 100, 300, 500, 800, 1000, 1500] + self.penalties = [0, 1000, 1500] self.profits = [] self.task = task + self.cache = {} + + @lru_cache(maxsize=None) def get_imbanlance_prices_for_date(self, date): imbalance_prices_day = self.imbalance_prices[ self.imbalance_prices["DateTime"].dt.date == date @@ -40,69 +44,152 @@ class PolicyEvaluator: date, idx_samples, test_loader, - charge_thresholds=np.arange(-100, 250, 25), - discharge_thresholds=np.arange(-100, 250, 25), + charge_thresholds=np.arange(-1500, 1500, 50), + discharge_thresholds=np.arange(-1500, 1500, 50), + penalty: int = 0, ): - idx = test_loader.dataset.get_idx_for_date(date.date()) - - if idx not in idx_samples: - print("No samples for idx: ", idx, date) - (initial, samples) = idx_samples[idx] - - if len(initial.shape) == 2: - initial = initial.cpu().numpy()[0][-1] + if date in self.cache: + (reconstructed_imbalance_prices, real_imbalance_prices) = self.cache[date] else: - initial = initial.cpu().numpy()[-1] - samples = samples.cpu().numpy() + idx = test_loader.dataset.get_idx_for_date(date.date()) - initial = np.repeat(initial, samples.shape[0]) - combined = np.concatenate((initial.reshape(-1, 1), samples), axis=1) + if idx not in idx_samples: + print("No samples for idx: ", idx, date) + (initial, samples) = idx_samples[idx] - reconstructed_imbalance_prices = ( - self.ipc.get_imbalance_prices_2023_for_date_vectorized(date, combined) - ) - reconstructed_imbalance_prices = torch.tensor( - reconstructed_imbalance_prices, device="cuda" + if len(initial.shape) == 2: + initial = initial.cpu().numpy()[0][-1] + else: + initial = initial.cpu().numpy()[-1] + samples = samples.cpu().numpy() + + initial = np.repeat(initial, samples.shape[0]) + combined = np.concatenate((initial.reshape(-1, 1), samples), axis=1) + + reconstructed_imbalance_prices = ( + self.ipc.get_imbalance_prices_2023_for_date_vectorized(date, combined) + ) + reconstructed_imbalance_prices = torch.tensor( + reconstructed_imbalance_prices, device="cuda" + ) + + real_imbalance_prices = self.get_imbanlance_prices_for_date(date.date()) + self.cache[date] = (reconstructed_imbalance_prices, real_imbalance_prices) + + return self.profit_for_penalty( + reconstructed_imbalance_prices, + real_imbalance_prices, + penalty, + charge_thresholds, + discharge_thresholds, ) - real_imbalance_prices = self.get_imbanlance_prices_for_date(date.date()) - - for penalty in self.penalties: - found_charge_thresholds, found_discharge_thresholds = ( - self.baseline_policy.get_optimal_thresholds( - reconstructed_imbalance_prices, - charge_thresholds, - discharge_thresholds, - penalty, - ) + def optimize_penalty_for_target_charge_cycles( + self, + idx_samples, + test_loader, + initial_penalty, + target_charge_cycles, + learning_rate=2, + max_iterations=10, + tolerance=10, + ): + self.cache = {} + penalty = initial_penalty + for iteration in range(max_iterations): + # Calculate profit and charge cycles for the current penalty + simulated_profit, simulated_charge_cycles = ( + self.evaluate_test_set_for_penalty(idx_samples, test_loader, penalty) ) - predicted_charge_threshold = found_charge_thresholds.mean(axis=0) - predicted_discharge_threshold = found_discharge_thresholds.mean(axis=0) + print( + f"Penalty: {penalty}, Charge Cycles: {simulated_charge_cycles}, Profit: {simulated_profit}" + ) - ### Determine Profits and Charge Cycles ### - simulated_profit, simulated_charge_cycles = self.baseline_policy.simulate( - torch.tensor([[real_imbalance_prices]]), - torch.tensor([predicted_charge_threshold]), - torch.tensor([predicted_discharge_threshold]), + # Calculate the gradient (difference) between the simulated and target charge cycles + gradient = simulated_charge_cycles - target_charge_cycles + + # Update the penalty parameter in the direction of the gradient + penalty += learning_rate * gradient + + # Check if the charge cycles are close enough to the target + if abs(gradient) < tolerance: + print(f"Optimal penalty found after {iteration+1} iterations") + break + else: + print( + f"Reached max iterations ({max_iterations}) without converging to the target charge cycles" ) - self.profits.append( - [ - date, - penalty, - simulated_profit[0][0].item(), - simulated_charge_cycles[0][0].item(), - predicted_charge_threshold.item(), - predicted_discharge_threshold.item(), - ] + + # Re-calculate profit and charge cycles for the final penalty to return accurate results + profit, charge_cycles = self.evaluate_test_set_for_penalty( + idx_samples, test_loader, penalty + ) + + return penalty, profit, charge_cycles + + def profit_for_penalty( + self, + reconstructed_imbalance_prices, + real_imbalance_prices, + penalty: int, + charge_thresholds, + discharge_thresholds, + ): + """_summary_ + + Args: + date (_type_): date to evaluate + reconstructed_imbalance_prices (_type_): predicted imbalance price + real_imbalance_prices (_type_): real imbalance price + penalty (int): penalty parameter to take into account + charge_thresholds (_type_): list of charge thresholds + discharge_thresholds (_type_): list of discharge thresholds + + Returns: + _type_: returns the simulated profit, charge cycles, the found charge threshold and discharge threshold + """ + found_charge_thresholds, found_discharge_thresholds = ( + self.baseline_policy.get_optimal_thresholds( + reconstructed_imbalance_prices, + charge_thresholds, + discharge_thresholds, + penalty, ) + ) + + predicted_charge_threshold = found_charge_thresholds.mean(axis=0) + predicted_discharge_threshold = found_discharge_thresholds.mean(axis=0) + + ### Determine Profits and Charge Cycles ### + simulated_profit, simulated_charge_cycles = self.baseline_policy.simulate( + torch.tensor([[real_imbalance_prices]]), + torch.tensor([predicted_charge_threshold]), + torch.tensor([predicted_discharge_threshold]), + ) + return ( + simulated_profit[0][0].item(), + simulated_charge_cycles[0][0].item(), + predicted_charge_threshold.item(), + predicted_discharge_threshold.item(), + ) def evaluate_test_set(self, idx_samples, test_loader): self.profits = [] + self.cache = {} for date in tqdm(self.dates): try: - self.evaluate_for_date(date, idx_samples, test_loader) + for penalty in self.penalties: + self.profits.append( + [ + date, + penalty, + *self.evaluate_for_date( + date, idx_samples, test_loader, penalty=penalty + ), + ] + ) except KeyboardInterrupt: print("Interrupted") raise KeyboardInterrupt @@ -123,6 +210,27 @@ class PolicyEvaluator: ], ) + def evaluate_test_set_for_penalty(self, idx_samples, test_loader, penalty): + total_profit = 0 + total_charge_cycles = 0 + + for date in tqdm(self.dates): + try: + profit, charge_cycles, _, _ = self.evaluate_for_date( + date, idx_samples, test_loader, penalty=penalty + ) + total_profit += profit + total_charge_cycles += charge_cycles + except KeyboardInterrupt: + print("Interrupted") + raise KeyboardInterrupt + + except Exception as e: + print(e) + pass + + return total_profit, total_charge_cycles + def plot_profits_table(self): # Check if task or penalties are not set if ( diff --git a/src/policies/baselines/BaselinePolicyEvaluator.py b/src/policies/baselines/BaselinePolicyEvaluator.py index 846d527..c091b06 100644 --- a/src/policies/baselines/BaselinePolicyEvaluator.py +++ b/src/policies/baselines/BaselinePolicyEvaluator.py @@ -13,49 +13,46 @@ class BaselinePolicyEvaluator(PolicyEvaluator): self.train_profits = [] - def determine_thresholds_for_date(self, date): - charge_thresholds = np.arange(-100, 250, 25) - discharge_thresholds = np.arange(-100, 250, 25) + def determine_thresholds_for_date(self, date, penalty): + charge_thresholds = np.arange(-500, 500, 25) + discharge_thresholds = np.arange(-500, 500, 25) real_imbalance_prices = self.get_imbanlance_prices_for_date(date.date()) - for penalty in self.penalties: - found_charge_thresholds, found_discharge_thresholds = ( - self.baseline_policy.get_optimal_thresholds( - torch.tensor([real_imbalance_prices]), - charge_thresholds, - discharge_thresholds, - penalty, - ) + found_charge_thresholds, found_discharge_thresholds = ( + self.baseline_policy.get_optimal_thresholds( + torch.tensor([real_imbalance_prices]), + charge_thresholds, + discharge_thresholds, + penalty, ) + ) - best_charge_threshold = found_charge_thresholds - best_discharge_threshold = found_discharge_thresholds + best_charge_threshold = found_charge_thresholds + best_discharge_threshold = found_discharge_thresholds - simulated_profit, simulated_charge_cycles = self.baseline_policy.simulate( - torch.tensor([[real_imbalance_prices]]), - torch.tensor([best_charge_threshold]), - torch.tensor([best_discharge_threshold]), - ) + simulated_profit, simulated_charge_cycles = self.baseline_policy.simulate( + torch.tensor([[real_imbalance_prices]]), + torch.tensor([best_charge_threshold]), + torch.tensor([best_discharge_threshold]), + ) - self.train_profits.append( - [ - date, - penalty, - simulated_profit[0][0].item(), - simulated_charge_cycles[0][0].item(), - best_charge_threshold.item(), - best_discharge_threshold.item(), - ] - ) + self.train_profits.append( + [ + simulated_profit[0][0].item(), + simulated_charge_cycles[0][0].item(), + best_charge_threshold.item(), + best_discharge_threshold.item(), + ] + ) - def determine_best_thresholds(self): + def determine_best_thresholds(self, penalty): self.train_profits = [] dates = self.baseline_policy.train_data["DateTime"].dt.date.unique() dates = pd.to_datetime(dates) try: for date in tqdm(dates): - self.determine_thresholds_for_date(date) + self.determine_thresholds_for_date(date, penalty) except Exception as e: print(e) pass @@ -63,8 +60,6 @@ class BaselinePolicyEvaluator(PolicyEvaluator): self.train_profits = pd.DataFrame( self.train_profits, columns=[ - "Date", - "Penalty", "Profit", "Charge Cycles", "Charge Threshold", @@ -72,91 +67,18 @@ class BaselinePolicyEvaluator(PolicyEvaluator): ], ) - number_of_days = len(self.train_profits["Date"].unique()) - usable_charge_cycles = (400 / 365) * number_of_days + # get the best thresholds combination based on the sum of profits + best_thresholds = self.train_profits.groupby( + ["Charge Threshold", "Discharge Threshold"] + ).sum()["Profit"] - intermediate_values = {penalty: {} for penalty in self.penalties} + best_thresholds = best_thresholds.idxmax() + return (best_thresholds[0], best_thresholds[1]) - # find the best threshold combination for each penalty based on the total profit on the data - for penalty in self.penalties: - profits_for_penalty = self.train_profits[ - self.train_profits["Penalty"] == penalty - ] - - for index, row in profits_for_penalty.iterrows(): - charge_threshold = row["Charge Threshold"] - discharge_threshold = row["Discharge Threshold"] - - if (charge_threshold, discharge_threshold) not in intermediate_values[ - penalty - ]: - intermediate_values[penalty][ - (charge_threshold, discharge_threshold) - ] = (0, 0) - - new_charge_cycles = ( - intermediate_values[penalty][ - (charge_threshold, discharge_threshold) - ][1] - + row["Charge Cycles"] - ) - new_profit = ( - intermediate_values[penalty][ - (charge_threshold, discharge_threshold) - ][0] - + row["Profit"] - ) - - if new_charge_cycles <= usable_charge_cycles: - intermediate_values[penalty][ - (charge_threshold, discharge_threshold) - ] = (new_profit, new_charge_cycles) - - best_thresholds = {penalty: [0, 0, 0, 0] for penalty in self.penalties} - - for penalty in self.penalties: - best_profit = 0 - for threshold, values in intermediate_values[penalty].items(): - if values[0] > best_profit: - best_profit = values[0] - best_thresholds[penalty][0] = threshold[0] - best_thresholds[penalty][1] = threshold[1] - best_thresholds[penalty][2] = best_profit - best_thresholds[penalty][3] = values[1] - - # create dataframe from best_thresholds with columns, Penalty, Charge Threshold, Discharge Threshold, Profit - data = [ - (penalty, values[0], values[1], values[2], values[3]) - for penalty, values in best_thresholds.items() - ] - - best_thresholds_df = pd.DataFrame( - data, - columns=[ - "Penalty", - "Charge Threshold", - "Discharge Threshold", - "Profit (training data)", - f"Charge Cycles (training data: max {usable_charge_cycles})", - ], - ) - - if self.task: - self.task.get_logger().report_table( - "Baseline Train Data", - "Best Thresholds for each Penalty on Training Data (up to 400 cycles / year)", - iteration=0, - table_plot=best_thresholds_df, - ) - - return best_thresholds - - def evaluate_test_set(self, thresholds: dict, data_processor=None): - """Evaluate the test set using the given thresholds (multiple penalties) - - Args: - thresholds (dict): Dictionary with penalties as keys and the corresponding thresholds tuple as values - """ + def evaluate_test_set( + self, charge_threshold, discharge_threshold, data_processor=None + ): + """Evaluate the test set using the given thresholds""" self.profits = [] if data_processor: @@ -173,40 +95,63 @@ class BaselinePolicyEvaluator(PolicyEvaluator): try: for date in tqdm(self.dates): real_imbalance_prices = self.get_imbanlance_prices_for_date(date.date()) - for penalty in thresholds.keys(): - charge_threshold = thresholds[penalty][0] - discharge_threshold = thresholds[penalty][1] - simulated_profit, simulated_charge_cycles = ( - self.baseline_policy.simulate( - torch.tensor([[real_imbalance_prices]]), - torch.tensor([charge_threshold]), - torch.tensor([discharge_threshold]), - ) + simulated_profit, simulated_charge_cycles = ( + self.baseline_policy.simulate( + torch.tensor([[real_imbalance_prices]]), + torch.tensor([charge_threshold]), + torch.tensor([discharge_threshold]), ) + ) - self.profits.append( - [ - date, - penalty, - simulated_profit[0][0].item(), - simulated_charge_cycles[0][0].item(), - charge_threshold, - discharge_threshold, - ] - ) + self.profits.append( + [ + date, + simulated_profit[0][0].item(), + simulated_charge_cycles[0][0].item(), + ] + ) self.profits = pd.DataFrame( self.profits, - columns=[ - "Date", - "Penalty", - "Profit", - "Charge Cycles", - "Charge Threshold", - "Discharge Threshold", - ], + columns=["Date", "Profit", "Charge Cycles"], ) except Exception as e: print(e) pass + + # return the total profit and total charge cycles + return self.profits["Profit"].sum(), self.profits["Charge Cycles"].sum() + + def optimize_penalty_for_target_charge_cycles( + self, + initial_penalty, + target_charge_cycles, + learning_rate=2, + max_iterations=10, + tolerance=10, + ): + penalty = initial_penalty + + for i in range(max_iterations): + charge_threshold, discharge_threshold = self.determine_best_thresholds( + penalty + ) + total_profit, total_charge_cycles = self.evaluate_test_set( + charge_threshold, discharge_threshold + ) + + gradient = total_charge_cycles - target_charge_cycles + penalty += learning_rate * gradient + + print( + f"Iteration {i+1}: Penalty: {penalty}, Total Profit: {total_profit}, Total Charge Cycles: {total_charge_cycles}, Gradient: {gradient}, Charge Threshold: {charge_threshold}, Discharge Threshold: {discharge_threshold}" + ) + + if abs(gradient) < tolerance: + print(f"Optimal penalty found after {i+1} iterations") + break + else: + print(f"Optimal penalty not found after {max_iterations} iterations") + + return penalty, total_profit, total_charge_cycles diff --git a/src/policies/baselines/YesterdayBaselinePolicyExecutor.py b/src/policies/baselines/YesterdayBaselinePolicyExecutor.py index a12b5d7..486f242 100644 --- a/src/policies/baselines/YesterdayBaselinePolicyExecutor.py +++ b/src/policies/baselines/YesterdayBaselinePolicyExecutor.py @@ -17,6 +17,7 @@ class YesterdayBaselinePolicyEvaluator(PolicyEvaluator): date, charge_thresholds=np.arange(-100, 250, 25), discharge_thresholds=np.arange(-100, 250, 25), + penalty: int = 0 ): real_imbalance_prices = self.get_imbanlance_prices_for_date(date.date()) @@ -27,7 +28,6 @@ class YesterdayBaselinePolicyEvaluator(PolicyEvaluator): np.array([yesterday_imbalance_prices]), device="cpu" ) - for penalty in self.penalties: yesterday_charge_thresholds, yesterday_discharge_thresholds = ( self.baseline_policy.get_optimal_thresholds( yesterday_imbalance_prices, diff --git a/src/policies/baselines/global_threshold_baseline.py b/src/policies/baselines/global_threshold_baseline.py index 64f6e8e..bda7cff 100644 --- a/src/policies/baselines/global_threshold_baseline.py +++ b/src/policies/baselines/global_threshold_baseline.py @@ -32,9 +32,14 @@ battery = Battery(2, 1) baseline_policy = BaselinePolicy(battery, data_path="") policy_evaluator = BaselinePolicyEvaluator(baseline_policy, task) -thresholds = policy_evaluator.determine_best_thresholds() -policy_evaluator.evaluate_test_set(thresholds, data_processor=data_processor) - -policy_evaluator.plot_profits_table() - +total_profit, total_charge_cycles = ( + policy_evaluator.optimize_penalty_for_target_charge_cycles( + initial_penalty=100, + target_charge_cycles=283, + learning_rate=0.2, + max_iterations=150, + tolerance=1, + ) +) +print(f"Total Profit: {total_profit}, Total Charge Cycles: {total_charge_cycles}") task.close() diff --git a/src/trainers/autoregressive_trainer.py b/src/trainers/autoregressive_trainer.py index 37c489f..78bae6d 100644 --- a/src/trainers/autoregressive_trainer.py +++ b/src/trainers/autoregressive_trainer.py @@ -9,6 +9,8 @@ import plotly.subplots as sp from plotly.subplots import make_subplots from src.trainers.trainer import Trainer from tqdm import tqdm +import matplotlib.pyplot as plt + class AutoRegressiveTrainer(Trainer): def __init__( @@ -34,28 +36,41 @@ class AutoRegressiveTrainer(Trainer): def debug_plots(self, task, train: bool, data_loader, sample_indices, epoch): for actual_idx, idx in sample_indices.items(): - auto_regressive_output = self.auto_regressive(data_loader.dataset, [idx]*1000) + print(f"Plotting sample {actual_idx}") + auto_regressive_output = self.auto_regressive( + data_loader.dataset, [idx] * 1000 + ) if len(auto_regressive_output) == 3: initial, predictions, target = auto_regressive_output else: initial, _, predictions, target = auto_regressive_output - # keep one initial initial = initial[0] target = target[0] predictions = predictions - fig = self.get_plot(initial, target, predictions, show_legend=(0 == 0)) + fig, fig2 = self.get_plot( + initial, target, predictions, show_legend=(0 == 0) + ) task.get_logger().report_matplotlib_figure( title="Training" if train else "Testing", - series=f'Sample {actual_idx}', + series=f"Sample {actual_idx}", iteration=epoch, figure=fig, ) + task.get_logger().report_matplotlib_figure( + title="Training Samples" if train else "Testing Samples", + series=f"Sample {actual_idx} samples", + iteration=epoch, + figure=fig2, + report_interactive=False, + ) + + plt.close() def auto_regressive(self, data_loader, idx, sequence_length: int = 96): self.model.eval() diff --git a/src/trainers/diffusion_trainer.py b/src/trainers/diffusion_trainer.py index 9b1dd5c..7d69e13 100644 --- a/src/trainers/diffusion_trainer.py +++ b/src/trainers/diffusion_trainer.py @@ -85,6 +85,8 @@ class DiffusionTrainer: self.best_score = None self.policy_evaluator = policy_evaluator + self.prev_optimal_penalty = 0 + def noise_time_series(self, x: torch.tensor, t: int): """Add noise to time series Args: @@ -206,8 +208,8 @@ class DiffusionTrainer: running_loss /= len(train_loader.dataset) - if epoch % 40 == 0 and epoch != 0: - crps = self.test(test_loader, epoch, task) + if epoch % 150 == 0 and epoch != 0: + crps, _ = self.test(test_loader, epoch, task) if best_crps is None or crps < best_crps: best_crps = crps @@ -215,7 +217,7 @@ class DiffusionTrainer: else: early_stopping += 1 - if early_stopping > 5: + if early_stopping > 15: break if task: @@ -238,8 +240,32 @@ class DiffusionTrainer: self.model = torch.load("checkpoint.pt") self.model.to(self.device) - self.test(test_loader, None, task) - self.policy_evaluator.plot_profits_table() + _, generated_sampels = self.test(test_loader, None, task) + # self.policy_evaluator.plot_profits_table() + + optimal_penalty, profit, charge_cycles = ( + self.policy_evaluator.optimize_penalty_for_target_charge_cycles( + idx_samples=generated_sampels, + test_loader=test_loader, + initial_penalty=900, + target_charge_cycles=283, + learning_rate=1, + max_iterations=50, + tolerance=1, + ) + ) + + print( + f"Optimal Penalty: {optimal_penalty}, Profit: {profit}, Charge Cycles: {charge_cycles}" + ) + + task.get_logger().report_single_value( + name="Optimal Penalty", value=optimal_penalty + ) + task.get_logger().report_single_value(name="Optimal Profit", value=profit) + task.get_logger().report_single_value( + name="Optimal Charge Cycles", value=charge_cycles + ) if task: task.close() @@ -332,6 +358,8 @@ class DiffusionTrainer: ] ) + ax.set_ylim([-1500, 1500]) + task.get_logger().report_matplotlib_figure( title="Training" if training else "Testing", series=f"Sample {actual_idx}", @@ -341,6 +369,25 @@ class DiffusionTrainer: plt.close() + # plot some samples for the nrv genearations (10 samples) (scale -1500 to 1500) + fig, ax = plt.subplots(figsize=(20, 10)) + for i in range(10): + ax.plot(samples[i], label=f"Sample {i}") + + ax.plot(target, label="Real NRV", linewidth=3) + ax.legend() + ax.set_ylim([-1500, 1500]) + + task.get_logger().report_matplotlib_figure( + title="Training Samples" if training else "Testing Samples", + series=f"Sample {actual_idx} samples", + iteration=epoch, + figure=fig, + report_interactive=False, + ) + + plt.close() + def test( self, data_loader: torch.utils.data.DataLoader, epoch: int, task: Task = None ): @@ -385,28 +432,39 @@ class DiffusionTrainer: predict_sequence_length=self.ts_length, full_day_skip=True ) - self.policy_evaluator.evaluate_test_set(generated_samples, test_loader) - - df = self.policy_evaluator.get_profits_as_scalars() - - for idx, row in df.iterrows(): - task.get_logger().report_scalar( - title="Profit", - series=f"penalty_{row['Penalty']}", - value=row["Total Profit"], - iteration=epoch, + optimal_penalty, profit, charge_cycles = ( + self.policy_evaluator.optimize_penalty_for_target_charge_cycles( + idx_samples=generated_samples, + test_loader=test_loader, + initial_penalty=self.prev_optimal_penalty, + target_charge_cycles=283, + learning_rate=1, + max_iterations=50, + tolerance=1, ) + ) - df = self.policy_evaluator.get_profits_till_400() - for idx, row in df.iterrows(): - task.get_logger().report_scalar( - title="Profit_till_400", - series=f"penalty_{row['Penalty']}", - value=row["Profit_till_400"], - iteration=epoch, - ) + self.prev_optimal_penalty = optimal_penalty - return mean_crps + task.get_logger().report_scalar( + title="Optimal Penalty", + series="test", + value=optimal_penalty, + iteration=epoch, + ) + + task.get_logger().report_scalar( + title="Optimal Profit", series="test", value=profit, iteration=epoch + ) + + task.get_logger().report_scalar( + title="Optimal Charge Cycles", + series="test", + value=charge_cycles, + iteration=epoch, + ) + + return mean_crps, generated_samples def save_checkpoint(self, val_loss, task, iteration: int): torch.save(self.model, "checkpoint.pt") diff --git a/src/trainers/quantile_trainer.py b/src/trainers/quantile_trainer.py index 403bd1b..38bfe66 100644 --- a/src/trainers/quantile_trainer.py +++ b/src/trainers/quantile_trainer.py @@ -155,18 +155,6 @@ class AutoRegressiveQuantileTrainer(AutoRegressiveTrainer): generated_samples = {} with torch.no_grad(): - total_samples = len(dataloader.dataset) - print( - "Full day valid indices: ", - len(dataloader.dataset.full_day_valid_indices), - ) - print( - "Valid indices: ", - len(dataloader.dataset.valid_indices), - ) - - print(dataloader.dataset.valid_indices) - for i in tqdm(dataloader.dataset.full_day_valid_indices): idx = dataloader.dataset.valid_indices.index(i) @@ -188,74 +176,64 @@ class AutoRegressiveQuantileTrainer(AutoRegressiveTrainer): crps_from_samples_metric.append(crps[0].mean().item()) - task.get_logger().report_scalar( - title="CRPS_from_samples", - series="test", - value=np.mean(crps_from_samples_metric), - iteration=epoch, - ) + if epoch is not None: + task.get_logger().report_scalar( + title="CRPS_from_samples", + series="test", + value=np.mean(crps_from_samples_metric), + iteration=epoch, + ) - # using the policy evaluator, evaluate the policy with the generated samples - if self.policy_evaluator is not None: - self.policy_evaluator.evaluate_test_set(generated_samples, dataloader) - df = self.policy_evaluator.get_profits_as_scalars() + # using the policy evaluator, evaluate the policy with the generated samples + if self.policy_evaluator is not None: + optimal_penalty, profit, charge_cycles = ( + self.policy_evaluator.optimize_penalty_for_target_charge_cycles( + idx_samples=generated_samples, + test_loader=dataloader, + initial_penalty=900, + target_charge_cycles=283, + learning_rate=2, + max_iterations=100, + tolerance=1, + ) + ) + + print( + f"Optimal Penalty: {optimal_penalty}, Profit: {profit}, Charge Cycles: {charge_cycles}" + ) - # for each row, report the profits - for idx, row in df.iterrows(): task.get_logger().report_scalar( - title="Profit", - series=f"penalty_{row['Penalty']}", - value=row["Total Profit"], + title="Optimal Penalty", + series="test", + value=optimal_penalty, iteration=epoch, ) - df = self.policy_evaluator.get_profits_till_400() - for idx, row in df.iterrows(): task.get_logger().report_scalar( - title="Profit_till_400", - series=f"penalty_{row['Penalty']}", - value=row["Profit_till_400"], + title="Optimal Profit", series="test", value=profit, iteration=epoch + ) + + task.get_logger().report_scalar( + title="Optimal Charge Cycles", + series="test", + value=charge_cycles, iteration=epoch, ) + return np.mean(crps_from_samples_metric), generated_samples + def log_final_metrics(self, task, dataloader, train: bool = True): metrics = {metric.__class__.__name__: 0.0 for metric in self.metrics_to_track} transformed_metrics = { metric.__class__.__name__: 0.0 for metric in self.metrics_to_track } - crps_from_samples_metric = [] - with torch.no_grad(): total_samples = len(dataloader.dataset) - 96 batches = 0 for _, _, idx_batch in tqdm(dataloader): idx_batch = [idx for idx in idx_batch if idx < total_samples] - if len(idx_batch) == 0: - continue - - if train == False: - for idx in tqdm(idx_batch): - computed_idx_batch = [idx] * 250 - initial, outputs, samples, targets = self.auto_regressive( - dataloader.dataset, idx_batch=computed_idx_batch - ) - - # save the samples for the idx, these will be used for evaluating the policy - self.test_set_samples[idx.item()] = ( - self.data_processor.inverse_transform(initial), - self.data_processor.inverse_transform(samples), - ) - - samples = samples.unsqueeze(0) - targets = targets.squeeze(-1) - targets = targets[0].unsqueeze(0) - - crps = crps_from_samples(samples, targets) - - crps_from_samples_metric.append(crps[0].mean().item()) - _, outputs, samples, targets = self.auto_regressive( dataloader.dataset, idx_batch=idx_batch ) @@ -308,6 +286,9 @@ class AutoRegressiveQuantileTrainer(AutoRegressiveTrainer): task.get_logger().report_single_value(name=name, value=metric_value) if train == False: + crps_from_samples_metric, self.test_set_samples = ( + self.calculate_crps_from_samples(None, dataloader, None) + ) task.get_logger().report_single_value( name="test_CRPS_from_samples_transformed", value=np.mean(crps_from_samples_metric), @@ -320,6 +301,7 @@ class AutoRegressiveQuantileTrainer(AutoRegressiveTrainer): predictions, show_legend: bool = True, retransform: bool = True, + task=None, ): fig = go.Figure() @@ -427,7 +409,19 @@ class AutoRegressiveQuantileTrainer(AutoRegressiveTrainer): ax.lines[1], ] ) - return fig + + ax.set_ylim(-1500, 1500) + + fig2, ax2 = plt.subplots(figsize=(20, 10)) + for i in range(10): + ax2.plot(predictions_np[i], label=f"Sample {i}") + + ax2.plot(next_day_np, label="Real NRV", linewidth=3) + ax2.legend() + + ax2.set_ylim(-1500, 1500) + + return fig, fig2 def auto_regressive(self, dataset, idx_batch, sequence_length: int = 96): return auto_regressive( @@ -646,6 +640,21 @@ class NonAutoRegressiveQuantileRegression(Trainer): figure=fig, ) + fig, ax = plt.subplots(figsize=(20, 10)) + for i in range(10): + ax.plot(samples[i], label=f"Sample {i}") + + ax.plot(target, label="Real NRV", linewidth=3) + ax.legend() + task.get_logger().report_matplotlib_figure( + title="Training" if train else "Testing", + series=f"Sample {actual_idx} Samples", + iteration=epoch, + figure=fig, + ) + + plt.close() + def get_plot( self, current_day, @@ -740,7 +749,15 @@ class NonAutoRegressiveQuantileRegression(Trainer): ax.lines[1], ] ) - return fig + + fig2, ax2 = plt.subplots(figsize=(20, 10)) + for i in range(10): + ax2.plot(predictions_np[i], label=f"Sample {i}") + + ax2.plot(next_day_np, label="Real NRV", linewidth=3) + ax2.legend() + + return fig, fig2 def calculate_crps_from_samples(self, task, dataloader, epoch: int): crps_from_samples_metric = [] diff --git a/src/trainers/trainer.py b/src/trainers/trainer.py index 73f05d2..c01b2ba 100644 --- a/src/trainers/trainer.py +++ b/src/trainers/trainer.py @@ -261,8 +261,7 @@ class Trainer: self.model.eval() # set full day skip - self.data_processor.set_full_day_skip(True) - train_loader, test_loader = self.data_processor.get_dataloaders( + _, test_loader = self.data_processor.get_dataloaders( predict_sequence_length=self.model.output_size ) diff --git a/src/training_scripts/autoregressive_quantiles.py b/src/training_scripts/autoregressive_quantiles.py index 4b8f2fe..1ea553c 100644 --- a/src/training_scripts/autoregressive_quantiles.py +++ b/src/training_scripts/autoregressive_quantiles.py @@ -2,9 +2,7 @@ from src.utils.clearml import ClearMLHelper #### ClearML #### clearml_helper = ClearMLHelper(project_name="Thesis/NrvForecast") -task = clearml_helper.get_task( - task_name="Non Autoregressive Quantile Regression: Non Linear" -) +task = clearml_helper.get_task(task_name="AQR: Non Linear") task.execute_remotely(queue_name="default", exit_process=True) from src.policies.PolicyEvaluator import PolicyEvaluator @@ -60,16 +58,16 @@ if quantiles is None: quantiles = [0.01, 0.05, 0.1, 0.15, 0.3, 0.4, 0.5, 0.6, 0.7, 0.85, 0.9, 0.95, 0.99] task.set_parameter("general/quantiles", quantiles) else: - # if string, convert to list "[0.01, 0.05, 0.1, 0.15, 0.3, 0.4, 0.5, 0.6, 0.7, 0.85, 0.9, 0.95, 0.99]"" + # if string, convert to list "[0.01, 0.05, 0.1, 0.15, 0.3, 0.4, 0.5, 0.6, 0.7, 0.85, 0.9, 0.95, 0.99]" if isinstance(quantiles, str): quantiles = eval(quantiles) model_parameters = { "learning_rate": 0.0001, - "hidden_size": 256, - "num_layers": 4, + "hidden_size": 512, + "num_layers": 5, "dropout": 0.2, - "time_feature_embedding": 16, + "time_feature_embedding": 8, } model_parameters = task.connect(model_parameters, name="model_parameters") @@ -77,7 +75,14 @@ model_parameters = task.connect(model_parameters, name="model_parameters") time_embedding = TimeEmbedding( data_processor.get_time_feature_size(), model_parameters["time_feature_embedding"] ) -# lstm_model = GRUModel(time_embedding.output_dim(inputDim), len(quantiles), hidden_size=model_parameters["hidden_size"], num_layers=model_parameters["num_layers"], dropout=model_parameters["dropout"]) +# lstm_model = GRUModel( +# time_embedding.output_dim(inputDim), +# len(quantiles), +# hidden_size=model_parameters["hidden_size"], +# num_layers=model_parameters["num_layers"], +# dropout=model_parameters["dropout"], +# ) + non_linear_model = NonLinearRegression( time_embedding.output_dim(inputDim), len(quantiles), @@ -85,10 +90,11 @@ non_linear_model = NonLinearRegression( numLayers=model_parameters["num_layers"], dropout=model_parameters["dropout"], ) + # linear_model = LinearRegression(time_embedding.output_dim(inputDim), len(quantiles)) model = nn.Sequential(time_embedding, non_linear_model) -model.output_size = 96 +model.output_size = 1 optimizer = torch.optim.Adam(model.parameters(), lr=model_parameters["learning_rate"]) ### Policy Evaluator ### @@ -122,18 +128,37 @@ trainer = AutoRegressiveQuantileTrainer( trainer.add_metrics_to_track( [PinballLoss(quantiles), MSELoss(), L1Loss(), CRPSLoss(quantiles)] ) -trainer.early_stopping(patience=30) +trainer.early_stopping(patience=10) trainer.plot_every(5) trainer.train(task=task, epochs=epochs, remotely=True) ### Policy Evaluation ### idx_samples = trainer.test_set_samples _, test_loader = trainer.data_processor.get_dataloaders( - predict_sequence_length=trainer.model.output_size, full_day_skip=True + predict_sequence_length=trainer.model.output_size, full_day_skip=False ) -policy_evaluator.evaluate_test_set(idx_samples, test_loader) -policy_evaluator.plot_profits_table() -policy_evaluator.plot_thresholds_per_day() +# policy_evaluator.evaluate_test_set(idx_samples, test_loader) +# policy_evaluator.plot_profits_table() +# policy_evaluator.plot_thresholds_per_day() + +optimal_penalty, profit, charge_cycles = ( + policy_evaluator.optimize_penalty_for_target_charge_cycles( + idx_samples=idx_samples, + test_loader=test_loader, + initial_penalty=1000, + target_charge_cycles=283, + learning_rate=15, + max_iterations=150, + tolerance=1, + ) +) + +print( + f"Optimal Penalty: {optimal_penalty}, Profit: {profit}, Charge Cycles: {charge_cycles}" +) +task.get_logger().report_single_value(name="Optimal Penalty", value=optimal_penalty) +task.get_logger().report_single_value(name="Optimal Profit", value=profit) +task.get_logger().report_single_value(name="Optimal Charge Cycles", value=charge_cycles) task.close() diff --git a/src/training_scripts/diffusion_training.py b/src/training_scripts/diffusion_training.py index fe5029f..e48b511 100644 --- a/src/training_scripts/diffusion_training.py +++ b/src/training_scripts/diffusion_training.py @@ -2,7 +2,7 @@ from src.utils.clearml import ClearMLHelper clearml_helper = ClearMLHelper(project_name="Thesis/NrvForecast") task = clearml_helper.get_task( - task_name="Diffusion Training: hidden_sizes=[64, 64], lr=0.0001, time_dim=8" + task_name="Diffusion Training: hidden_sizes=[256, 256], lr=0.0001, time_dim=8" ) task.execute_remotely(queue_name="default", exit_process=True) @@ -18,16 +18,16 @@ from src.policies.PolicyEvaluator import PolicyEvaluator #### Data Processor #### data_config = DataConfig() data_config.NRV_HISTORY = True -data_config.LOAD_HISTORY = True -data_config.LOAD_FORECAST = True +data_config.LOAD_HISTORY = False +data_config.LOAD_FORECAST = False -data_config.WIND_FORECAST = True -data_config.WIND_HISTORY = True +data_config.WIND_FORECAST = False +data_config.WIND_HISTORY = False data_config.QUARTER = False data_config.DAY_OF_WEEK = False -data_config.NOMINAL_NET_POSITION = True +data_config.NOMINAL_NET_POSITION = False data_config = task.connect(data_config, name="data_features") @@ -39,9 +39,9 @@ inputDim = data_processor.get_input_size() print("Input dim: ", inputDim) model_parameters = { - "epochs": 8000, + "epochs": 15000, "learning_rate": 0.0001, - "hidden_sizes": [64, 64], + "hidden_sizes": [256, 256], "time_dim": 8, } @@ -54,7 +54,14 @@ model = SimpleDiffusionModel( other_inputs_dim=inputDim[1], time_dim=model_parameters["time_dim"], ) -# model = GRUDiffusionModel(96, model_parameters["hidden_sizes"], other_inputs_dim=inputDim[2], time_dim=model_parameters["time_dim"], gru_hidden_size=128) + +# model = GRUDiffusionModel( +# 96, +# model_parameters["hidden_sizes"], +# other_inputs_dim=inputDim[2], +# time_dim=model_parameters["time_dim"], +# gru_hidden_size=128, +# ) ### Policy Evaluator ### battery = Battery(2, 1)