Not resetting state of charge

This commit is contained in:
2024-03-23 19:18:55 +01:00
parent e780b46af7
commit 65ec8fcd54
8 changed files with 470 additions and 184 deletions

View File

@@ -47,6 +47,7 @@ class PolicyEvaluator:
charge_thresholds=np.arange(-1500, 1500, 50),
discharge_thresholds=np.arange(-1500, 1500, 50),
penalty: int = 0,
state_of_charge: float = 0.0,
):
if date in self.cache:
(reconstructed_imbalance_prices, real_imbalance_prices) = self.cache[date]
@@ -82,6 +83,7 @@ class PolicyEvaluator:
penalty,
charge_thresholds,
discharge_thresholds,
state_of_charge=state_of_charge,
)
def optimize_penalty_for_target_charge_cycles(
@@ -135,6 +137,7 @@ class PolicyEvaluator:
penalty: int,
charge_thresholds,
discharge_thresholds,
state_of_charge=0.0,
):
"""_summary_
@@ -155,6 +158,7 @@ class PolicyEvaluator:
charge_thresholds,
discharge_thresholds,
penalty,
battery_state_of_charge=state_of_charge,
)
)
@@ -162,16 +166,20 @@ class PolicyEvaluator:
predicted_discharge_threshold = found_discharge_thresholds.mean(axis=0)
### Determine Profits and Charge Cycles ###
simulated_profit, simulated_charge_cycles = self.baseline_policy.simulate(
torch.tensor([[real_imbalance_prices]]),
torch.tensor([predicted_charge_threshold]),
torch.tensor([predicted_discharge_threshold]),
simulated_profit, simulated_charge_cycles, new_state_of_charge = (
self.baseline_policy.simulate(
torch.tensor([[real_imbalance_prices]]),
torch.tensor([predicted_charge_threshold]),
torch.tensor([predicted_discharge_threshold]),
battery_state_of_charge=torch.tensor([state_of_charge]),
)
)
return (
simulated_profit[0][0].item(),
simulated_charge_cycles[0][0].item(),
predicted_charge_threshold.item(),
predicted_discharge_threshold.item(),
new_state_of_charge.squeeze(0).item(),
)
def evaluate_test_set(self, idx_samples, test_loader):
@@ -213,12 +221,20 @@ class PolicyEvaluator:
def evaluate_test_set_for_penalty(self, idx_samples, test_loader, penalty):
total_profit = 0
total_charge_cycles = 0
state_of_charge = 0.0
for date in tqdm(self.dates):
try:
profit, charge_cycles, _, _ = self.evaluate_for_date(
date, idx_samples, test_loader, penalty=penalty
profit, charge_cycles, _, _, new_state_of_charge = (
self.evaluate_for_date(
date,
idx_samples,
test_loader,
penalty=penalty,
state_of_charge=state_of_charge,
)
)
state_of_charge = new_state_of_charge
total_profit += profit
total_charge_cycles += charge_cycles
except KeyboardInterrupt:

View File

@@ -11,70 +11,80 @@ class BaselinePolicyEvaluator(PolicyEvaluator):
def __init__(self, baseline_policy: BaselinePolicy, task: Task = None):
super(BaselinePolicyEvaluator, self).__init__(baseline_policy, task)
self.current_state_of_charge = 0.0
self.train_profits = []
def determine_thresholds_for_date(self, date, penalty):
charge_thresholds = np.arange(-500, 500, 25)
discharge_thresholds = np.arange(-500, 500, 25)
self.charge_discharge_thresholds = [
(charge_threshold, discharge_threshold)
for charge_threshold in charge_thresholds
for discharge_threshold in discharge_thresholds
if charge_threshold < discharge_threshold
]
# state of charge to zero for all thresholds
self.current_state_of_charge = torch.zeros(
len(self.charge_discharge_thresholds)
)
self.profits = torch.zeros(len(self.charge_discharge_thresholds))
self.charge_cycles = torch.zeros(len(self.charge_discharge_thresholds))
def determine_thresholds_for_date(self, date):
# all combinations where charge_threshold is less than discharge_threshold
real_imbalance_prices = self.get_imbanlance_prices_for_date(date.date())
found_charge_thresholds, found_discharge_thresholds = (
self.baseline_policy.get_optimal_thresholds(
torch.tensor([real_imbalance_prices]),
charge_thresholds,
discharge_thresholds,
penalty,
simulated_profit, simulated_charge_cycles, simulated_state_of_charge = (
self.baseline_policy.simulate(
torch.tensor(
[[real_imbalance_prices] * len(self.charge_discharge_thresholds)]
),
torch.tensor([c for c, _ in self.charge_discharge_thresholds]),
torch.tensor([d for _, d in self.charge_discharge_thresholds]),
battery_state_of_charge=self.current_state_of_charge,
)
)
best_charge_threshold = found_charge_thresholds
best_discharge_threshold = found_discharge_thresholds
self.current_state_of_charge = simulated_state_of_charge.squeeze(0)
self.profits += simulated_profit.squeeze(0)
self.charge_cycles += simulated_charge_cycles.squeeze(0)
simulated_profit, simulated_charge_cycles = self.baseline_policy.simulate(
torch.tensor([[real_imbalance_prices]]),
torch.tensor([best_charge_threshold]),
torch.tensor([best_discharge_threshold]),
def determine_best_thresholds(self):
self.current_state_of_charge = torch.zeros(
len(self.charge_discharge_thresholds)
)
self.train_profits.append(
[
simulated_profit[0][0].item(),
simulated_charge_cycles[0][0].item(),
best_charge_threshold.item(),
best_discharge_threshold.item(),
]
)
self.profits = torch.zeros(len(self.charge_discharge_thresholds))
self.charge_cycles = torch.zeros(len(self.charge_discharge_thresholds))
def determine_best_thresholds(self, penalty):
self.train_profits = []
dates = self.baseline_policy.train_data["DateTime"].dt.date.unique()
dates = pd.to_datetime(dates)
total_dates = 0
try:
for date in tqdm(dates):
self.determine_thresholds_for_date(date, penalty)
self.determine_thresholds_for_date(date)
total_dates += 1
except Exception as e:
print(e)
pass
self.train_profits = pd.DataFrame(
self.train_profits,
columns=[
"Profit",
"Charge Cycles",
"Charge Threshold",
"Discharge Threshold",
],
wanted_charge_cycles = 400 / 365 * total_dates
best_idx = torch.argmin(
torch.abs(self.charge_cycles - wanted_charge_cycles)
).item()
return (
self.charge_discharge_thresholds[best_idx],
self.profits[best_idx].item(),
self.charge_cycles[best_idx].item(),
)
# get the best thresholds combination based on the sum of profits
best_thresholds = self.train_profits.groupby(
["Charge Threshold", "Discharge Threshold"]
).sum()["Profit"]
best_thresholds = best_thresholds.idxmax()
return (best_thresholds[0], best_thresholds[1])
def evaluate_test_set(
self, charge_threshold, discharge_threshold, data_processor=None
):
@@ -93,17 +103,23 @@ class BaselinePolicyEvaluator(PolicyEvaluator):
self.dates = filtered_dates
try:
battery_state_of_charge = torch.zeros(1)
for date in tqdm(self.dates):
real_imbalance_prices = self.get_imbanlance_prices_for_date(date.date())
simulated_profit, simulated_charge_cycles = (
self.baseline_policy.simulate(
torch.tensor([[real_imbalance_prices]]),
torch.tensor([charge_threshold]),
torch.tensor([discharge_threshold]),
)
(
simulated_profit,
simulated_charge_cycles,
simulated_battery_state_of_charge,
) = self.baseline_policy.simulate(
torch.tensor([[real_imbalance_prices]]),
torch.tensor([charge_threshold]),
torch.tensor([discharge_threshold]),
battery_state_of_charge=battery_state_of_charge,
)
battery_state_of_charge = simulated_battery_state_of_charge.squeeze(0)
self.profits.append(
[
date,
@@ -123,35 +139,45 @@ class BaselinePolicyEvaluator(PolicyEvaluator):
# return the total profit and total charge cycles
return self.profits["Profit"].sum(), self.profits["Charge Cycles"].sum()
def optimize_penalty_for_target_charge_cycles(
self,
initial_penalty,
target_charge_cycles,
learning_rate=2,
max_iterations=10,
tolerance=10,
):
penalty = initial_penalty
def determine_best_thresholds_test_set(self, data_processor=None):
self.current_state_of_charge = torch.zeros(
len(self.charge_discharge_thresholds)
)
for i in range(max_iterations):
charge_threshold, discharge_threshold = self.determine_best_thresholds(
penalty
)
total_profit, total_charge_cycles = self.evaluate_test_set(
charge_threshold, discharge_threshold
)
self.profits = torch.zeros(len(self.charge_discharge_thresholds))
self.charge_cycles = torch.zeros(len(self.charge_discharge_thresholds))
gradient = total_charge_cycles - target_charge_cycles
penalty += learning_rate * gradient
dates = self.baseline_policy.train_data["DateTime"].dt.date.unique()
dates = pd.to_datetime(dates)
print(
f"Iteration {i+1}: Penalty: {penalty}, Total Profit: {total_profit}, Total Charge Cycles: {total_charge_cycles}, Gradient: {gradient}, Charge Threshold: {charge_threshold}, Discharge Threshold: {discharge_threshold}"
)
if data_processor:
filtered_dates = []
_, test_loader = data_processor.get_dataloaders()
for date in self.dates:
try:
test_loader.dataset.get_idx_for_date(date.date())
filtered_dates.append(date)
except:
pass
dates = filtered_dates
if abs(gradient) < tolerance:
print(f"Optimal penalty found after {i+1} iterations")
break
else:
print(f"Optimal penalty not found after {max_iterations} iterations")
total_dates = 0
try:
for date in tqdm(dates):
self.determine_thresholds_for_date(date)
total_dates += 1
except Exception as e:
print(e)
pass
return penalty, total_profit, total_charge_cycles
wanted_charge_cycles = 400 / 365 * total_dates
best_idx = torch.argmin(
torch.abs(self.charge_cycles - wanted_charge_cycles)
).item()
return (
self.charge_discharge_thresholds[best_idx],
self.profits[best_idx].item(),
self.charge_cycles[best_idx].item(),
)

View File

@@ -17,7 +17,8 @@ class YesterdayBaselinePolicyEvaluator(PolicyEvaluator):
date,
charge_thresholds=np.arange(-100, 250, 25),
discharge_thresholds=np.arange(-100, 250, 25),
penalty: int = 0
penalty: int = 0,
current_state_of_charge=0.0,
):
real_imbalance_prices = self.get_imbanlance_prices_for_date(date.date())
@@ -28,33 +29,32 @@ class YesterdayBaselinePolicyEvaluator(PolicyEvaluator):
np.array([yesterday_imbalance_prices]), device="cpu"
)
yesterday_charge_thresholds, yesterday_discharge_thresholds = (
self.baseline_policy.get_optimal_thresholds(
yesterday_imbalance_prices,
charge_thresholds,
discharge_thresholds,
penalty,
)
yesterday_charge_thresholds, yesterday_discharge_thresholds = (
self.baseline_policy.get_optimal_thresholds(
yesterday_imbalance_prices,
charge_thresholds,
discharge_thresholds,
penalty,
battery_state_of_charge=current_state_of_charge,
)
)
yesterday_profit, yesterday_charge_cycles = self.baseline_policy.simulate(
yesterday_profit, yesterday_charge_cycles, new_state_of_charge = (
self.baseline_policy.simulate(
torch.tensor([[real_imbalance_prices]]),
torch.tensor([yesterday_charge_thresholds.mean(axis=0)]),
torch.tensor([yesterday_discharge_thresholds.mean(axis=0)]),
battery_state_of_charge=torch.tensor([current_state_of_charge]),
)
)
self.profits.append(
[
date,
penalty,
yesterday_profit[0][0].item(),
yesterday_charge_cycles[0][0].item(),
yesterday_charge_thresholds.mean(axis=0).item(),
yesterday_discharge_thresholds.mean(axis=0).item(),
]
)
return (
yesterday_profit[0][0].item(),
yesterday_charge_cycles[0][0].item(),
new_state_of_charge.squeeze(0).item(),
)
def evaluate_test_set(self, data_processor):
def evaluate_test_set_for_penalty(self, data_processor, penalty: int = 0):
if data_processor:
filtered_dates = []
@@ -67,22 +67,65 @@ class YesterdayBaselinePolicyEvaluator(PolicyEvaluator):
pass
self.dates = filtered_dates
self.profits = []
profit = 0
charge_cycles = 0
state_of_charge = 0.0
for date in tqdm(self.dates):
try:
self.evaluate_for_date(date)
new_profit, new_charge_cycles, new_state_of_charge = (
self.evaluate_for_date(
date, penalty=penalty, current_state_of_charge=state_of_charge
)
)
profit += new_profit
charge_cycles += new_charge_cycles
state_of_charge = new_state_of_charge
except Exception as e:
print(e)
pass
self.profits = pd.DataFrame(
self.profits,
columns=[
"Date",
"Penalty",
"Profit",
"Charge Cycles",
"Charge Threshold",
"Discharge Threshold",
],
return profit, charge_cycles
def optimize_penalty_for_target_charge_cycles(
self,
data_processor,
initial_penalty,
target_charge_cycles,
learning_rate=2,
max_iterations=10,
tolerance=10,
):
self.cache = {}
penalty = initial_penalty
for iteration in range(max_iterations):
# Calculate profit and charge cycles for the current penalty
simulated_profit, simulated_charge_cycles = (
self.evaluate_test_set_for_penalty(data_processor, penalty)
)
print(
f"Penalty: {penalty}, Charge Cycles: {simulated_charge_cycles}, Profit: {simulated_profit}"
)
# Calculate the gradient (difference) between the simulated and target charge cycles
gradient = simulated_charge_cycles - target_charge_cycles
# Update the penalty parameter in the direction of the gradient
penalty += learning_rate * gradient
# Check if the charge cycles are close enough to the target
if abs(gradient) < tolerance:
print(f"Optimal penalty found after {iteration+1} iterations")
break
else:
print(
f"Reached max iterations ({max_iterations}) without converging to the target charge cycles"
)
# Re-calculate profit and charge cycles for the final penalty to return accurate results
profit, charge_cycles = self.evaluate_test_set_for_penalty(
data_processor, penalty
)
return penalty, profit, charge_cycles

View File

@@ -32,14 +32,36 @@ battery = Battery(2, 1)
baseline_policy = BaselinePolicy(battery, data_path="")
policy_evaluator = BaselinePolicyEvaluator(baseline_policy, task)
total_profit, total_charge_cycles = (
policy_evaluator.optimize_penalty_for_target_charge_cycles(
initial_penalty=100,
target_charge_cycles=283,
learning_rate=0.2,
max_iterations=150,
tolerance=1,
)
charge_discharge_threshold, total_profit, total_charge_cycles = (
policy_evaluator.determine_best_thresholds()
)
print(f"Total Profit: {total_profit}, Total Charge Cycles: {total_charge_cycles}")
print(f"Training set results:")
print(f"Best Charge Discharge Threshold: {charge_discharge_threshold}")
print(f"Total Profit: {total_profit}")
print(f"Total Charge Cycles: {total_charge_cycles}")
profit, charge_cycles = policy_evaluator.evaluate_test_set(
charge_discharge_threshold[0],
charge_discharge_threshold[1],
data_processor=data_processor,
)
print()
print("Test Set Results")
print(f"Profit: {profit}, Charge Cycles: {charge_cycles}")
# Thresholds determined on test set
charge_discharge_threshold, total_profit, total_charge_cycles = (
policy_evaluator.determine_best_thresholds_test_set(data_processor)
)
task.get_logger().report_single_value(name="Optimal Profit", value=profit)
task.get_logger().report_single_value(name="Optimal Charge Cycles", value=charge_cycles)
task.get_logger().report_single_value(
name="Optimal Charge Threshold", value=charge_discharge_threshold[0]
)
task.get_logger().report_single_value(
name="Optimal Discharge Threshold", value=charge_discharge_threshold[1]
)
task.close()

View File

@@ -35,7 +35,23 @@ battery = Battery(2, 1)
baseline_policy = BaselinePolicy(battery, data_path="")
policy_evaluator = YesterdayBaselinePolicyEvaluator(baseline_policy, task)
policy_evaluator.evaluate_test_set(data_processor=data_processor)
policy_evaluator.plot_profits_table()
penalty, profit, charge_cycles = (
policy_evaluator.optimize_penalty_for_target_charge_cycles(
data_processor=data_processor,
initial_penalty=0,
target_charge_cycles=283,
learning_rate=2,
max_iterations=100,
tolerance=1,
)
)
# policy_evaluator.plot_profits_table()
print()
print("Test Set Results")
print(f"Penalty: {penalty}, Profit: {profit}, Charge Cycles: {charge_cycles}")
task.get_logger().report_single_value(name="Optimal Penalty", value=penalty)
task.get_logger().report_single_value(name="Optimal Profit", value=profit)
task.get_logger().report_single_value(name="Optimal Charge Cycles", value=charge_cycles)
task.close()

View File

@@ -6,6 +6,7 @@ import torch
imbalance_prices = "data/imbalance_prices.csv"
class Battery:
def __init__(self, capacity: float, power: float):
"""
@@ -26,11 +27,11 @@ class Battery:
return 0
self.current_charge -= self.power / 4
self.charge_cycles += 1/16
self.charge_cycles += 1 / 16
if self.current_charge <= 0:
self.current_charge = 0
return self.power / 4
def charge(self):
@@ -41,13 +42,13 @@ class Battery:
return 0
self.current_charge += self.power / 4
self.charge_cycles += 1/16
self.charge_cycles += 1 / 16
if self.current_charge >= self.capacity:
self.current_charge = self.capacity
return self.power / 4
def reset(self):
"""
Reset the battery to its initial state
@@ -57,32 +58,47 @@ class Battery:
self.discharging = False
self.charge_cycles = 0
class BaselinePolicy():
class BaselinePolicy:
def __init__(self, battery: Battery, data_path: str = ""):
self.data_path = data_path
self.battery = battery
self.train_data = self.load_imbalance_prices(train=True)
self.train_data = self.load_imbalance_prices(train=True)
self.test_data = self.load_imbalance_prices(train=False)
# print first datetime of train and test data
print(f"Training range: {self.train_data.iloc[0]['DateTime'].strftime('%d-%m-%Y')} - {self.train_data.iloc[-1]['DateTime'].strftime('%d-%m-%Y')}")
print(f"Test range: {self.test_data.iloc[0]['DateTime'].strftime('%d-%m-%Y')} - {self.test_data.iloc[-1]['DateTime'].strftime('%d-%m-%Y')}")
print(
f"Training range: {self.train_data.iloc[0]['DateTime'].strftime('%d-%m-%Y')} - {self.train_data.iloc[-1]['DateTime'].strftime('%d-%m-%Y')}"
)
print(
f"Test range: {self.test_data.iloc[0]['DateTime'].strftime('%d-%m-%Y')} - {self.test_data.iloc[-1]['DateTime'].strftime('%d-%m-%Y')}"
)
def load_imbalance_prices(self, train: bool = True):
imbalance_prices = pd.read_csv(self.data_path + 'data/imbalance_prices.csv', parse_dates=True, sep=";")
imbalance_prices = imbalance_prices[['DateTime', 'Positive imbalance price']]
imbalance_prices['DateTime'] = pd.to_datetime(imbalance_prices['DateTime'], utc=True)
imbalance_prices = pd.read_csv(
self.data_path + "data/imbalance_prices.csv", parse_dates=True, sep=";"
)
imbalance_prices = imbalance_prices[["DateTime", "Positive imbalance price"]]
imbalance_prices["DateTime"] = pd.to_datetime(
imbalance_prices["DateTime"], utc=True
)
if train:
imbalance_prices = imbalance_prices.loc[imbalance_prices['DateTime'].dt.year < 2023]
imbalance_prices = imbalance_prices.loc[imbalance_prices['DateTime'].dt.year >= 2020]
imbalance_prices = imbalance_prices.loc[
imbalance_prices["DateTime"].dt.year < 2023
]
imbalance_prices = imbalance_prices.loc[
imbalance_prices["DateTime"].dt.year >= 2020
]
else:
imbalance_prices = imbalance_prices.loc[imbalance_prices['DateTime'].dt.year == 2023]
imbalance_prices = imbalance_prices.sort_values(by=['DateTime'], ascending=True)
imbalance_prices = imbalance_prices.loc[
imbalance_prices["DateTime"].dt.year == 2023
]
imbalance_prices = imbalance_prices.sort_values(by=["DateTime"], ascending=True)
return imbalance_prices
def get_train_score(self, charge_threshold, discharge_threshold):
return self.get_score(self.train_data, charge_threshold, discharge_threshold)
def get_test_score(self, charge_threshold, discharge_threshold):
return self.get_score(self.test_data, charge_threshold, discharge_threshold)
@@ -96,38 +112,145 @@ class BaselinePolicy():
mean_discharging_price = 0
number_of_charges = 0
number_of_discharges = 0
for index, row in df.iterrows():
if row['Positive imbalance price'] < charge_threshold:
total_charging_cost += self.battery.charge() * row['Positive imbalance price']
mean_charging_price += row['Positive imbalance price']
if row["Positive imbalance price"] < charge_threshold:
total_charging_cost += (
self.battery.charge() * row["Positive imbalance price"]
)
mean_charging_price += row["Positive imbalance price"]
number_of_charges += 1
elif row['Positive imbalance price'] > discharge_threshold:
total_discharging_profit += self.battery.discharge() * row['Positive imbalance price']
mean_discharging_price += row['Positive imbalance price']
elif row["Positive imbalance price"] > discharge_threshold:
total_discharging_profit += (
self.battery.discharge() * row["Positive imbalance price"]
)
mean_discharging_price += row["Positive imbalance price"]
number_of_discharges += 1
return total_charging_cost, total_discharging_profit, self.battery.charge_cycles, mean_charging_price / number_of_charges, mean_discharging_price / number_of_discharges
return (
total_charging_cost,
total_discharging_profit,
self.battery.charge_cycles,
mean_charging_price / number_of_charges,
mean_discharging_price / number_of_discharges,
)
def threshold_scores(self, charge_thresholds, discharge_thresholds):
df = pd.DataFrame(columns=["Charge threshold", "Discharge threshold", "Charging Cost", "Discharging Profit", "Total Profit", "Charge cycles", "Mean charging price", "Mean discharging price"])
df_test = pd.DataFrame(columns=["Charge threshold", "Discharge threshold", "Charging Cost", "Discharging Profit", "Total Profit", "Charge cycles", "Mean charging price", "Mean discharging price"])
df = pd.DataFrame(
columns=[
"Charge threshold",
"Discharge threshold",
"Charging Cost",
"Discharging Profit",
"Total Profit",
"Charge cycles",
"Mean charging price",
"Mean discharging price",
]
)
df_test = pd.DataFrame(
columns=[
"Charge threshold",
"Discharge threshold",
"Charging Cost",
"Discharging Profit",
"Total Profit",
"Charge cycles",
"Mean charging price",
"Mean discharging price",
]
)
threshold_pairs = itertools.product(charge_thresholds, discharge_thresholds)
threshold_pairs = filter(lambda x: x[0] < x[1], threshold_pairs)
for charge_threshold, discharge_threshold in tqdm(threshold_pairs):
total_charging_cost, total_discharge_profit, charge_cycles, mean_charging_price, mean_discharging_price = self.get_train_score(charge_threshold, discharge_threshold)
df = pd.concat([df, pd.DataFrame([[charge_threshold, discharge_threshold, total_charging_cost, total_discharge_profit, total_discharge_profit - total_charging_cost, charge_cycles, mean_charging_price, mean_discharging_price]], columns=["Charge threshold", "Discharge threshold", "Charging Cost", "Discharging Profit", "Total Profit", "Charge cycles", "Mean charging price", "Mean discharging price"])])
(
total_charging_cost,
total_discharge_profit,
charge_cycles,
mean_charging_price,
mean_discharging_price,
) = self.get_train_score(charge_threshold, discharge_threshold)
df = pd.concat(
[
df,
pd.DataFrame(
[
[
charge_threshold,
discharge_threshold,
total_charging_cost,
total_discharge_profit,
total_discharge_profit - total_charging_cost,
charge_cycles,
mean_charging_price,
mean_discharging_price,
]
],
columns=[
"Charge threshold",
"Discharge threshold",
"Charging Cost",
"Discharging Profit",
"Total Profit",
"Charge cycles",
"Mean charging price",
"Mean discharging price",
],
),
]
)
total_charging_cost, total_discharge_profit, charge_cycles, mean_charging_price, mean_discharging_price = self.get_test_score(charge_threshold, discharge_threshold)
df_test = pd.concat([df_test, pd.DataFrame([[charge_threshold, discharge_threshold, total_charging_cost, total_discharge_profit, total_discharge_profit - total_charging_cost, charge_cycles, mean_charging_price, mean_discharging_price]], columns=["Charge threshold", "Discharge threshold", "Charging Cost", "Discharging Profit", "Total Profit", "Charge cycles", "Mean charging price", "Mean discharging price"])])
(
total_charging_cost,
total_discharge_profit,
charge_cycles,
mean_charging_price,
mean_discharging_price,
) = self.get_test_score(charge_threshold, discharge_threshold)
df_test = pd.concat(
[
df_test,
pd.DataFrame(
[
[
charge_threshold,
discharge_threshold,
total_charging_cost,
total_discharge_profit,
total_discharge_profit - total_charging_cost,
charge_cycles,
mean_charging_price,
mean_discharging_price,
]
],
columns=[
"Charge threshold",
"Discharge threshold",
"Charging Cost",
"Discharging Profit",
"Total Profit",
"Charge cycles",
"Mean charging price",
"Mean discharging price",
],
),
]
)
df = df.sort_values(by=['Total Profit'], ascending=False)
df = df.sort_values(by=["Total Profit"], ascending=False)
return df, df_test
def get_optimal_thresholds(self, imbalance_prices, charge_thresholds, discharge_thresholds, charge_cycles_penalty: float = 0):
def get_optimal_thresholds(
self,
imbalance_prices,
charge_thresholds,
discharge_thresholds,
charge_cycles_penalty: float = 0,
battery_state_of_charge: float = 0,
):
threshold_pairs = itertools.product(charge_thresholds, discharge_thresholds)
threshold_pairs = filter(lambda x: x[0] < x[1], threshold_pairs)
@@ -143,9 +266,19 @@ class BaselinePolicy():
next_day_charge_thresholds, next_day_discharge_thresholds = [], []
# imbalance_prices: (1000, 96) -> (1000, threshold_pairs, 96)
imbalance_prices = imbalance_prices.unsqueeze(1).expand(-1, len(threshold_pairs), -1)
imbalance_prices = imbalance_prices.unsqueeze(1).expand(
-1, len(threshold_pairs), -1
)
profits, charge_cycles = self.simulate(imbalance_prices, charge_thresholds, discharge_thresholds, charge_cycles_penalty=charge_cycles_penalty)
profits, charge_cycles, state_of_charge = self.simulate(
imbalance_prices,
charge_thresholds,
discharge_thresholds,
charge_cycles_penalty=charge_cycles_penalty,
battery_state_of_charge=torch.tensor(
[battery_state_of_charge] * len(charge_thresholds)
),
)
# get the index of the best threshold pair for each day (1000, 96) -> (1000)
best_threshold_indices = torch.argmax(profits, dim=1)
@@ -159,16 +292,27 @@ class BaselinePolicy():
return next_day_charge_thresholds, next_day_discharge_thresholds
def simulate(self, price_matrix, charge_thresholds: torch.tensor, discharge_thresholds: torch.tensor, charge_cycles_penalty: float = 0):
def simulate(
self,
price_matrix,
charge_thresholds: torch.tensor,
discharge_thresholds: torch.tensor,
charge_cycles_penalty: float = 0,
battery_state_of_charge: float = 0,
):
# make sure all on the same device
charge_thresholds = charge_thresholds.to(price_matrix.device)
discharge_thresholds = discharge_thresholds.to(price_matrix.device)
batch_size, num_thresholds, num_time_steps = price_matrix.shape
# Reshape thresholds for broadcasting
charge_thresholds = charge_thresholds.view(1, num_thresholds, 1).expand(batch_size, -1, num_time_steps)
discharge_thresholds = discharge_thresholds.view(1, num_thresholds, 1).expand(batch_size, -1, num_time_steps)
charge_thresholds = charge_thresholds.view(1, num_thresholds, 1).expand(
batch_size, -1, num_time_steps
)
discharge_thresholds = discharge_thresholds.view(1, num_thresholds, 1).expand(
batch_size, -1, num_time_steps
)
charge_matrix = torch.zeros_like(price_matrix)
@@ -176,6 +320,11 @@ class BaselinePolicy():
charge_matrix[price_matrix > discharge_thresholds] = -1
battery_states = torch.zeros(batch_size, num_thresholds)
battery_states = battery_state_of_charge.view(1, num_thresholds).expand(
batch_size, -1
)
profits = torch.zeros_like(battery_states)
charge_cycles = torch.zeros_like(battery_states)
@@ -185,23 +334,36 @@ class BaselinePolicy():
for i in range(num_time_steps):
discharge_mask = ~((charge_matrix[:, :, i] == -1) & (battery_states == 0))
charge_mask = ~((charge_matrix[:, :, i] == 1) & (battery_states == self.battery.capacity))
charge_mask = ~(
(charge_matrix[:, :, i] == 1)
& (battery_states == self.battery.capacity)
)
mask = discharge_mask & charge_mask
battery_states[mask] += charge_matrix[:, :, i][mask] * self.battery.power / 4
profits[mask] += -charge_matrix[:, :, i][mask] * price_matrix[:, :, i][mask] * self.battery.power / 4
charge_cycles[mask] += torch.abs(charge_matrix[:, :, i][mask]) * (self.battery.power / 4) / self.battery.capacity / 2
battery_states[mask] += (
charge_matrix[:, :, i][mask] * self.battery.power / 4
)
profits[mask] += (
-charge_matrix[:, :, i][mask]
* price_matrix[:, :, i][mask]
* self.battery.power
/ 4
)
charge_cycles[mask] += (
torch.abs(charge_matrix[:, :, i][mask])
* (self.battery.power / 4)
/ self.battery.capacity
/ 2
)
# penalize for excess charge cycles
excess_charge_cycles = (charge_cycles - 400/365).clamp(min=0)
profits -= excess_charge_cycles * charge_cycles_penalty
excess_charge_cycles = (charge_cycles - 400 / 365).clamp(min=0)
profits -= excess_charge_cycles * charge_cycles_penalty
return profits, charge_cycles, battery_states
return profits, charge_cycles
# battery = Battery(2, 1)
# policy = BaselinePolicy(battery)
@@ -214,4 +376,4 @@ class BaselinePolicy():
# print(df_test.to_markdown())
# # print(policy.get_test_score(150, 100))
# # print(policy.get_test_score(150, 100))

View File

@@ -557,6 +557,9 @@ class NonAutoRegressiveQuantileRegression(Trainer):
inputs, targets = inputs.to(self.device), targets.to(self.device)
outputs = self.model(inputs)
outputs = outputs.reshape(-1, len(self.quantiles))
outputted_samples = [
sample_from_dist(self.quantiles, output.cpu()) for output in outputs
]