Policy evaluation during training

This commit is contained in:
2024-02-25 22:13:00 +01:00
parent 90751866a4
commit f1b54df2c9
5 changed files with 450 additions and 158 deletions

View File

@@ -1,6 +1,7 @@
from clearml import Task
import torch
import torch.nn as nn
from src.policies.PolicyEvaluator import PolicyEvaluator
from torchinfo import summary
from src.losses.crps_metric import crps_from_samples
from src.data.preprocessing import DataProcessor
@@ -13,10 +14,18 @@ import seaborn as sns
import matplotlib.patches as mpatches
def sample_diffusion(model: DiffusionModel, n: int, inputs: torch.tensor, noise_steps=1000, beta_start=1e-4, beta_end=0.02, ts_length=96):
def sample_diffusion(
model: DiffusionModel,
n: int,
inputs: torch.tensor,
noise_steps=1000,
beta_start=1e-4,
beta_end=0.02,
ts_length=96,
):
device = next(model.parameters()).device
beta = torch.linspace(beta_start, beta_end, noise_steps).to(device)
alpha = 1. - beta
alpha = 1.0 - beta
alpha_hat = torch.cumprod(alpha, dim=0)
if len(inputs.shape) == 2:
@@ -39,13 +48,24 @@ def sample_diffusion(model: DiffusionModel, n: int, inputs: torch.tensor, noise_
else:
noise = torch.zeros_like(x)
x = 1/torch.sqrt(_alpha) * (x-((1-_alpha) / (torch.sqrt(1 - _alpha_hat))) * predicted_noise) + torch.sqrt(_beta) * noise
x = (
1
/ torch.sqrt(_alpha)
* (x - ((1 - _alpha) / (torch.sqrt(1 - _alpha_hat))) * predicted_noise)
+ torch.sqrt(_beta) * noise
)
x = torch.clamp(x, -1.0, 1.0)
return x
class DiffusionTrainer:
def __init__(self, model: nn.Module, data_processor: DataProcessor, device: torch.device):
def __init__(
self,
model: nn.Module,
data_processor: DataProcessor,
device: torch.device,
policy_evaluator: PolicyEvaluator = None,
):
self.model = model
self.device = device
@@ -53,39 +73,49 @@ class DiffusionTrainer:
self.beta_start = 0.0001
self.beta_end = 0.02
self.ts_length = 96
self.data_processor = data_processor
self.beta = torch.linspace(self.beta_start, self.beta_end, self.noise_steps).to(self.device)
self.alpha = 1. - self.beta
self.beta = torch.linspace(self.beta_start, self.beta_end, self.noise_steps).to(
self.device
)
self.alpha = 1.0 - self.beta
self.alpha_hat = torch.cumprod(self.alpha, dim=0)
self.best_score = None
self.policy_evaluator = policy_evaluator
def noise_time_series(self, x: torch.tensor, t: int):
""" Add noise to time series
"""Add noise to time series
Args:
x (torch.tensor): shape (batch_size, time_steps)
t (int): index of time step
"""
sqrt_alpha_hat = torch.sqrt(self.alpha_hat[t])[:, None]
sqrt_one_minus_alpha_hat = torch.sqrt(1. - self.alpha_hat[t])[:, None]
sqrt_one_minus_alpha_hat = torch.sqrt(1.0 - self.alpha_hat[t])[:, None]
noise = torch.randn_like(x)
return sqrt_alpha_hat * x + sqrt_one_minus_alpha_hat * noise, noise
def sample_timesteps(self, n: int):
""" Sample timesteps for noise
"""Sample timesteps for noise
Args:
n (int): number of samples
"""
return torch.randint(low=1, high=self.noise_steps, size=(n,))
def sample(self, model: DiffusionModel, n: int, inputs: torch.tensor):
x = sample_diffusion(model, n, inputs, self.noise_steps, self.beta_start, self.beta_end, self.ts_length)
x = sample_diffusion(
model,
n,
inputs,
self.noise_steps,
self.beta_start,
self.beta_end,
self.ts_length,
)
model.train()
return x
def random_samples(self, train: bool = True, num_samples: int = 10):
train_loader, test_loader = self.data_processor.get_dataloaders(
predict_sequence_length=96
@@ -99,15 +129,17 @@ class DiffusionTrainer:
# set seed
np.random.seed(42)
actual_indices = np.random.choice(loader.dataset.full_day_valid_indices, num_samples, replace=False)
actual_indices = np.random.choice(
loader.dataset.full_day_valid_indices, num_samples, replace=False
)
indices = {}
for i in actual_indices:
indices[i] = loader.dataset.valid_indices.index(i)
print(actual_indices)
return indices
def init_clearml_task(self, task):
task.add_tags(self.model.__class__.__name__)
task.add_tags(self.__class__.__name__)
@@ -117,13 +149,24 @@ class DiffusionTrainer:
if self.data_processor.lstm:
inputDim = self.data_processor.get_input_size()
other_input_data = torch.randn(1024, inputDim[1], self.model.other_inputs_dim).to(self.device)
other_input_data = torch.randn(
1024, inputDim[1], self.model.other_inputs_dim
).to(self.device)
else:
other_input_data = torch.randn(1024, self.model.other_inputs_dim).to(self.device)
task.set_configuration_object("model", str(summary(self.model, input_data=[input_data, time_steps, other_input_data])))
other_input_data = torch.randn(1024, self.model.other_inputs_dim).to(
self.device
)
task.set_configuration_object(
"model",
str(
summary(
self.model, input_data=[input_data, time_steps, other_input_data]
)
),
)
self.data_processor = task.connect(self.data_processor, name="data_processor")
def train(self, epochs: int, learning_rate: float, task: Task = None):
self.best_score = None
optimizer = torch.optim.Adam(self.model.parameters(), lr=learning_rate)
@@ -157,7 +200,7 @@ class DiffusionTrainer:
optimizer.zero_grad()
loss.backward()
optimizer.step()
running_loss /= len(train_loader.dataset)
if epoch % 40 == 0 and epoch != 0:
@@ -166,19 +209,22 @@ class DiffusionTrainer:
if task:
task.get_logger().report_scalar(
title=criterion.__class__.__name__,
series='train',
series="train",
iteration=epoch,
value=loss.item(),
)
if epoch % 150 == 0 and epoch != 0:
self.debug_plots(task, True, train_loader, train_sample_indices, epoch)
self.debug_plots(task, False, test_loader, test_sample_indices, epoch)
self.debug_plots(
task, True, train_loader, train_sample_indices, epoch
)
self.debug_plots(
task, False, test_loader, test_sample_indices, epoch
)
if task:
task.close()
def debug_plots(self, task, training: bool, data_loader, sample_indices, epoch):
for actual_idx, idx in sample_indices.items():
features, target, _ = data_loader.dataset[idx]
@@ -191,7 +237,7 @@ class DiffusionTrainer:
samples = self.sample(self.model, 100, features).cpu().numpy()
samples = self.data_processor.inverse_transform(samples)
target = self.data_processor.inverse_transform(target)
ci_99_upper = np.quantile(samples, 0.995, axis=0)
ci_99_lower = np.quantile(samples, 0.005, axis=0)
@@ -204,49 +250,100 @@ class DiffusionTrainer:
ci_50_lower = np.quantile(samples, 0.25, axis=0)
ci_50_upper = np.quantile(samples, 0.75, axis=0)
sns.set_theme()
time_steps = np.arange(0, 96)
fig, ax = plt.subplots(figsize=(20, 10))
ax.plot(time_steps, samples.mean(axis=0), label="Mean of NRV samples", linewidth=3)
ax.plot(
time_steps,
samples.mean(axis=0),
label="Mean of NRV samples",
linewidth=3,
)
# ax.fill_between(time_steps, ci_lower, ci_upper, color='b', alpha=0.2, label='Full Interval')
ax.fill_between(time_steps, ci_99_lower, ci_99_upper, color='b', alpha=0.2, label='99% Interval')
ax.fill_between(time_steps, ci_95_lower, ci_95_upper, color='b', alpha=0.2, label='95% Interval')
ax.fill_between(time_steps, ci_90_lower, ci_90_upper, color='b', alpha=0.2, label='90% Interval')
ax.fill_between(time_steps, ci_50_lower, ci_50_upper, color='b', alpha=0.2, label='50% Interval')
ax.fill_between(
time_steps,
ci_99_lower,
ci_99_upper,
color="b",
alpha=0.2,
label="99% Interval",
)
ax.fill_between(
time_steps,
ci_95_lower,
ci_95_upper,
color="b",
alpha=0.2,
label="95% Interval",
)
ax.fill_between(
time_steps,
ci_90_lower,
ci_90_upper,
color="b",
alpha=0.2,
label="90% Interval",
)
ax.fill_between(
time_steps,
ci_50_lower,
ci_50_upper,
color="b",
alpha=0.2,
label="50% Interval",
)
ax.plot(target, label="Real NRV", linewidth=3)
# full_interval_patch = mpatches.Patch(color='b', alpha=0.2, label='Full Interval')
ci_99_patch = mpatches.Patch(color='b', alpha=0.3, label='99% Interval')
ci_95_patch = mpatches.Patch(color='b', alpha=0.4, label='95% Interval')
ci_90_patch = mpatches.Patch(color='b', alpha=0.5, label='90% Interval')
ci_50_patch = mpatches.Patch(color='b', alpha=0.6, label='50% Interval')
ci_99_patch = mpatches.Patch(color="b", alpha=0.3, label="99% Interval")
ci_95_patch = mpatches.Patch(color="b", alpha=0.4, label="95% Interval")
ci_90_patch = mpatches.Patch(color="b", alpha=0.5, label="90% Interval")
ci_50_patch = mpatches.Patch(color="b", alpha=0.6, label="50% Interval")
ax.legend(handles=[ci_99_patch, ci_95_patch, ci_90_patch, ci_50_patch, ax.lines[0], ax.lines[1]])
ax.legend(
handles=[
ci_99_patch,
ci_95_patch,
ci_90_patch,
ci_50_patch,
ax.lines[0],
ax.lines[1],
]
)
task.get_logger().report_matplotlib_figure(
title="Training" if training else "Testing",
series=f'Sample {actual_idx}',
series=f"Sample {actual_idx}",
iteration=epoch,
figure=fig,
)
plt.close()
def test(self, data_loader: torch.utils.data.DataLoader, epoch: int, task: Task = None):
def test(
self, data_loader: torch.utils.data.DataLoader, epoch: int, task: Task = None
):
all_crps = []
for inputs, targets, _ in data_loader:
generated_samples = {}
for inputs, targets, idx_batch in data_loader:
inputs, targets = inputs.to(self.device), targets.to(self.device)
print(inputs.shape, targets.shape)
number_of_samples = 100
sample = self.sample(self.model, number_of_samples, inputs)
# reduce samples from (batch_size*number_of_samples, time_steps) to (batch_size, number_of_samples, time_steps)
samples_batched = sample.reshape(inputs.shape[0], number_of_samples, 96)
# add samples to generated_samples generated_samples[idx.item()] = (initial, samples)
for i, (idx, samples) in enumerate(zip(idx_batch, samples_batched)):
generated_samples[idx.item()] = (
self.data_processor.inverse_transform(inputs[i][:96]),
self.data_processor.inverse_transform(samples),
)
# calculate crps
crps = crps_from_samples(samples_batched, targets)
crps_mean = crps.mean(axis=1)
@@ -262,16 +359,38 @@ class DiffusionTrainer:
if task:
task.get_logger().report_scalar(
title="CRPS",
series='test',
value=mean_crps,
iteration=epoch
title="CRPS", series="test", value=mean_crps, iteration=epoch
)
if self.policy_evaluator:
_, test_loader = self.data_processor.get_dataloaders(
predict_sequence_length=self.ts_length, full_day_skip=True
)
self.policy_evaluator.evaluate_test_set(generated_samples, test_loader)
df = self.policy_evaluator.get_profits_as_scalars()
for idx, row in df.iterrows():
task.get_logger().report_scalar(
title="Profit",
series=f"penalty_{row['Penalty']}",
value=row["Total Profit"],
iteration=epoch,
)
df = self.policy_evaluator.get_profits_till_400()
for idx, row in df.iterrows():
task.get_logger().report_scalar(
title="Profit_till_400",
series=f"penalty_{row['Penalty']}",
value=row["Profit_till_400"],
iteration=epoch,
)
def save_checkpoint(self, val_loss, task, iteration: int):
torch.save(self.model, "checkpoint.pt")
task.update_output_model(
model_path="checkpoint.pt", iteration=iteration, auto_delete_file=False
)
self.best_score = val_loss