Policy evaluation during training
This commit is contained in:
@@ -15,6 +15,7 @@ import matplotlib.pyplot as plt
|
||||
import seaborn as sns
|
||||
import matplotlib.patches as mpatches
|
||||
|
||||
|
||||
def sample_from_dist(quantiles, preds):
|
||||
if isinstance(preds, torch.Tensor):
|
||||
preds = preds.detach().cpu()
|
||||
@@ -31,10 +32,11 @@ def sample_from_dist(quantiles, preds):
|
||||
|
||||
# random probabilities of (1000, 1)
|
||||
import random
|
||||
|
||||
probs = np.array([random.random() for _ in range(1000)])
|
||||
|
||||
spline = CubicSpline(quantiles, preds, axis=1)
|
||||
|
||||
|
||||
samples = spline(probs)
|
||||
|
||||
# get the diagonal
|
||||
@@ -42,6 +44,7 @@ def sample_from_dist(quantiles, preds):
|
||||
|
||||
return samples
|
||||
|
||||
|
||||
def auto_regressive(dataset, model, quantiles, idx_batch, sequence_length: int = 96):
|
||||
device = next(model.parameters()).device
|
||||
prev_features, targets = dataset.get_batch(idx_batch)
|
||||
@@ -65,7 +68,7 @@ def auto_regressive(dataset, model, quantiles, idx_batch, sequence_length: int =
|
||||
predictions_full = new_predictions_full.unsqueeze(1)
|
||||
|
||||
for i in range(sequence_length - 1):
|
||||
if len(list(prev_features.shape)) == 2:
|
||||
if len(list(prev_features.shape)) == 2:
|
||||
new_features = torch.cat(
|
||||
(prev_features[:, 1:96], samples), dim=1
|
||||
) # (batch_size, 96)
|
||||
@@ -102,9 +105,7 @@ def auto_regressive(dataset, model, quantiles, idx_batch, sequence_length: int =
|
||||
) # (batch_size, sequence_length)
|
||||
|
||||
with torch.no_grad():
|
||||
new_predictions_full = model(
|
||||
prev_features
|
||||
) # (batch_size, quantiles)
|
||||
new_predictions_full = model(prev_features) # (batch_size, quantiles)
|
||||
predictions_full = torch.cat(
|
||||
(predictions_full, new_predictions_full.unsqueeze(1)), dim=1
|
||||
) # (batch_size, sequence_length, quantiles)
|
||||
@@ -123,6 +124,7 @@ def auto_regressive(dataset, model, quantiles, idx_batch, sequence_length: int =
|
||||
target_full.unsqueeze(-1),
|
||||
)
|
||||
|
||||
|
||||
class AutoRegressiveQuantileTrainer(AutoRegressiveTrainer):
|
||||
def __init__(
|
||||
self,
|
||||
@@ -162,40 +164,58 @@ class AutoRegressiveQuantileTrainer(AutoRegressiveTrainer):
|
||||
|
||||
if len(idx_batch) == 0:
|
||||
continue
|
||||
|
||||
|
||||
for idx in tqdm(idx_batch):
|
||||
computed_idx_batch = [idx] * 100
|
||||
initial, _, samples, targets = self.auto_regressive(
|
||||
dataloader.dataset, idx_batch=computed_idx_batch
|
||||
)
|
||||
|
||||
generated_samples[idx.item()] = (initial, self.data_processor.inverse_transform(samples))
|
||||
generated_samples[idx.item()] = (
|
||||
self.data_processor.inverse_transform(initial),
|
||||
self.data_processor.inverse_transform(samples),
|
||||
)
|
||||
|
||||
samples = samples.unsqueeze(0)
|
||||
targets = targets.squeeze(-1)
|
||||
targets = targets[0].unsqueeze(0)
|
||||
|
||||
|
||||
crps = crps_from_samples(samples, targets)
|
||||
|
||||
crps_from_samples_metric.append(crps[0].mean().item())
|
||||
|
||||
task.get_logger().report_scalar(
|
||||
title="CRPS_from_samples", series="test", value=np.mean(crps_from_samples_metric), iteration=epoch
|
||||
title="CRPS_from_samples",
|
||||
series="test",
|
||||
value=np.mean(crps_from_samples_metric),
|
||||
iteration=epoch,
|
||||
)
|
||||
|
||||
# using the policy evaluator, evaluate the policy with the generated samples
|
||||
if self.policy_evaluator is not None:
|
||||
_, test_loader = self.data_processor.get_dataloaders(
|
||||
predict_sequence_length=self.model.output_size)
|
||||
predict_sequence_length=self.model.output_size, full_day_skip=True
|
||||
)
|
||||
self.policy_evaluator.evaluate_test_set(generated_samples, test_loader)
|
||||
df = self.policy_evaluator.get_profits_as_scalars()
|
||||
|
||||
# for each row, report the profits
|
||||
for idx, row in df.iterrows():
|
||||
task.get_logger().report_scalar(
|
||||
title="Profit", series=f"penalty_{row['Penalty']}", value=row["Total Profit"], iteration=epoch
|
||||
title="Profit",
|
||||
series=f"penalty_{row['Penalty']}",
|
||||
value=row["Total Profit"],
|
||||
iteration=epoch,
|
||||
)
|
||||
|
||||
df = self.policy_evaluator.get_profits_till_400()
|
||||
for idx, row in df.iterrows():
|
||||
task.get_logger().report_scalar(
|
||||
title="Profit_till_400",
|
||||
series=f"penalty_{row['Penalty']}",
|
||||
value=row["Profit_till_400"],
|
||||
iteration=epoch,
|
||||
)
|
||||
|
||||
def log_final_metrics(self, task, dataloader, train: bool = True):
|
||||
metrics = {metric.__class__.__name__: 0.0 for metric in self.metrics_to_track}
|
||||
@@ -222,17 +242,19 @@ class AutoRegressiveQuantileTrainer(AutoRegressiveTrainer):
|
||||
)
|
||||
|
||||
# save the samples for the idx, these will be used for evaluating the policy
|
||||
self.test_set_samples[idx.item()] = (initial, self.data_processor.inverse_transform(samples))
|
||||
self.test_set_samples[idx.item()] = (
|
||||
self.data_processor.inverse_transform(initial),
|
||||
self.data_processor.inverse_transform(samples),
|
||||
)
|
||||
|
||||
samples = samples.unsqueeze(0)
|
||||
targets = targets.squeeze(-1)
|
||||
targets = targets[0].unsqueeze(0)
|
||||
|
||||
|
||||
crps = crps_from_samples(samples, targets)
|
||||
|
||||
crps_from_samples_metric.append(crps[0].mean().item())
|
||||
|
||||
|
||||
_, outputs, samples, targets = self.auto_regressive(
|
||||
dataloader.dataset, idx_batch=idx_batch
|
||||
)
|
||||
@@ -286,7 +308,8 @@ class AutoRegressiveQuantileTrainer(AutoRegressiveTrainer):
|
||||
|
||||
if train == False:
|
||||
task.get_logger().report_single_value(
|
||||
name="test_CRPS_from_samples_transformed", value=np.mean(crps_from_samples_metric)
|
||||
name="test_CRPS_from_samples_transformed",
|
||||
value=np.mean(crps_from_samples_metric),
|
||||
)
|
||||
|
||||
# def get_plot_error(
|
||||
@@ -313,13 +336,12 @@ class AutoRegressiveQuantileTrainer(AutoRegressiveTrainer):
|
||||
|
||||
# errors.append(metric(prediction_tensor, target_tensor))
|
||||
|
||||
# # plot the error
|
||||
# # plot the error
|
||||
# fig.add_trace(go.Scatter(x=np.arange(96), y=errors, name=metric.__class__.__name__))
|
||||
# fig.update_layout(title=f"Error of {metric.__class__.__name__} for each time step")
|
||||
|
||||
# return fig
|
||||
|
||||
|
||||
def get_plot(
|
||||
self,
|
||||
current_day,
|
||||
@@ -376,30 +398,78 @@ class AutoRegressiveQuantileTrainer(AutoRegressiveTrainer):
|
||||
time_steps = np.arange(0, 96)
|
||||
|
||||
fig, ax = plt.subplots(figsize=(20, 10))
|
||||
ax.plot(time_steps, predictions_np.mean(axis=0), label="Mean of NRV samples", linewidth=3)
|
||||
ax.plot(
|
||||
time_steps,
|
||||
predictions_np.mean(axis=0),
|
||||
label="Mean of NRV samples",
|
||||
linewidth=3,
|
||||
)
|
||||
# ax.fill_between(time_steps, ci_lower, ci_upper, color='b', alpha=0.2, label='Full Interval')
|
||||
|
||||
ax.fill_between(time_steps, ci_99_lower, ci_99_upper, color='b', alpha=0.2, label='99% Interval')
|
||||
ax.fill_between(time_steps, ci_95_lower, ci_95_upper, color='b', alpha=0.2, label='95% Interval')
|
||||
ax.fill_between(time_steps, ci_90_lower, ci_90_upper, color='b', alpha=0.2, label='90% Interval')
|
||||
ax.fill_between(time_steps, ci_50_lower, ci_50_upper, color='b', alpha=0.2, label='50% Interval')
|
||||
ax.fill_between(
|
||||
time_steps,
|
||||
ci_99_lower,
|
||||
ci_99_upper,
|
||||
color="b",
|
||||
alpha=0.2,
|
||||
label="99% Interval",
|
||||
)
|
||||
ax.fill_between(
|
||||
time_steps,
|
||||
ci_95_lower,
|
||||
ci_95_upper,
|
||||
color="b",
|
||||
alpha=0.2,
|
||||
label="95% Interval",
|
||||
)
|
||||
ax.fill_between(
|
||||
time_steps,
|
||||
ci_90_lower,
|
||||
ci_90_upper,
|
||||
color="b",
|
||||
alpha=0.2,
|
||||
label="90% Interval",
|
||||
)
|
||||
ax.fill_between(
|
||||
time_steps,
|
||||
ci_50_lower,
|
||||
ci_50_upper,
|
||||
color="b",
|
||||
alpha=0.2,
|
||||
label="50% Interval",
|
||||
)
|
||||
|
||||
ax.plot(next_day_np, label="Real NRV", linewidth=3)
|
||||
# full_interval_patch = mpatches.Patch(color='b', alpha=0.2, label='Full Interval')
|
||||
ci_99_patch = mpatches.Patch(color='b', alpha=0.3, label='99% Interval')
|
||||
ci_95_patch = mpatches.Patch(color='b', alpha=0.4, label='95% Interval')
|
||||
ci_90_patch = mpatches.Patch(color='b', alpha=0.5, label='90% Interval')
|
||||
ci_50_patch = mpatches.Patch(color='b', alpha=0.6, label='50% Interval')
|
||||
ci_99_patch = mpatches.Patch(color="b", alpha=0.3, label="99% Interval")
|
||||
ci_95_patch = mpatches.Patch(color="b", alpha=0.4, label="95% Interval")
|
||||
ci_90_patch = mpatches.Patch(color="b", alpha=0.5, label="90% Interval")
|
||||
ci_50_patch = mpatches.Patch(color="b", alpha=0.6, label="50% Interval")
|
||||
|
||||
|
||||
ax.legend(handles=[ci_99_patch, ci_95_patch, ci_90_patch, ci_50_patch, ax.lines[0], ax.lines[1]])
|
||||
ax.legend(
|
||||
handles=[
|
||||
ci_99_patch,
|
||||
ci_95_patch,
|
||||
ci_90_patch,
|
||||
ci_50_patch,
|
||||
ax.lines[0],
|
||||
ax.lines[1],
|
||||
]
|
||||
)
|
||||
return fig
|
||||
|
||||
def auto_regressive(self, dataset, idx_batch, sequence_length: int = 96):
|
||||
return auto_regressive(dataset, self.model, self.quantiles, idx_batch, sequence_length)
|
||||
return auto_regressive(
|
||||
dataset, self.model, self.quantiles, idx_batch, sequence_length
|
||||
)
|
||||
|
||||
def plot_quantile_percentages(
|
||||
self, task, data_loader, train: bool = True, iteration: int = None, full_day: bool = False
|
||||
self,
|
||||
task,
|
||||
data_loader,
|
||||
train: bool = True,
|
||||
iteration: int = None,
|
||||
full_day: bool = False,
|
||||
):
|
||||
quantiles = self.quantiles
|
||||
total = 0
|
||||
@@ -429,20 +499,18 @@ class AutoRegressiveQuantileTrainer(AutoRegressiveTrainer):
|
||||
|
||||
else:
|
||||
inputs = inputs.to(self.device)
|
||||
outputs = self.model(inputs).cpu().numpy() # (batch_size, num_quantiles)
|
||||
targets = targets.squeeze(-1).cpu().numpy() # (batch_size, 1)
|
||||
outputs = (
|
||||
self.model(inputs).cpu().numpy()
|
||||
) # (batch_size, num_quantiles)
|
||||
targets = targets.squeeze(-1).cpu().numpy() # (batch_size, 1)
|
||||
|
||||
for i, q in enumerate(quantiles):
|
||||
quantile_counter[q] += np.sum(
|
||||
targets < outputs[:, i]
|
||||
)
|
||||
quantile_counter[q] += np.sum(targets < outputs[:, i])
|
||||
|
||||
total += len(targets)
|
||||
|
||||
# to numpy array of length len(quantiles)
|
||||
percentages = np.array(
|
||||
[quantile_counter[q] / total for q in quantiles]
|
||||
)
|
||||
percentages = np.array([quantile_counter[q] / total for q in quantiles])
|
||||
|
||||
bar_width = 0.35
|
||||
index = np.arange(len(quantiles))
|
||||
@@ -450,9 +518,7 @@ class AutoRegressiveQuantileTrainer(AutoRegressiveTrainer):
|
||||
# Plotting the bars
|
||||
fig, ax = plt.subplots(figsize=(15, 10))
|
||||
|
||||
bar1 = ax.bar(
|
||||
index, quantiles, bar_width, label="Ideal", color="brown"
|
||||
)
|
||||
bar1 = ax.bar(index, quantiles, bar_width, label="Ideal", color="brown")
|
||||
bar2 = ax.bar(
|
||||
index + bar_width, percentages, bar_width, label="NN model", color="blue"
|
||||
)
|
||||
@@ -502,7 +568,6 @@ class NonAutoRegressiveQuantileRegression(Trainer):
|
||||
):
|
||||
self.quantiles = quantiles
|
||||
|
||||
|
||||
criterion = NonAutoRegressivePinballLoss(quantiles=quantiles)
|
||||
super().__init__(
|
||||
model=model,
|
||||
|
||||
Reference in New Issue
Block a user