Merge remote-tracking branch 'hina/feature/val-loss' into validation-loss-upstream

Modified implementation for process_batch and cleanup validation recording
2026-04-08 22:35:09 +00:00 · 2025-01-03 00:48:08 -05:00
parent 7f6e124c7c cb89e0284e
commit d23c7322ee
85 changed files with 23666 additions and 1552 deletions
--- a/library/custom_train_functions.py
+++ b/library/custom_train_functions.py
@@ -98,10 +98,13 @@ def add_v_prediction_like_loss(loss: torch.Tensor, timesteps: torch.IntTensor, n
    return loss


-def apply_debiased_estimation(loss: torch.Tensor, timesteps: torch.IntTensor, noise_scheduler: DDPMScheduler):
+def apply_debiased_estimation(loss: torch.Tensor, timesteps: torch.IntTensor, noise_scheduler: DDPMScheduler, v_prediction=False):
    snr_t = torch.stack([noise_scheduler.all_snr[t] for t in timesteps])  # batch_size
    snr_t = torch.minimum(snr_t, torch.ones_like(snr_t) * 1000)  # if timestep is 0, snr_t is inf, so limit it to 1000
-    weight = 1 / torch.sqrt(snr_t)
+    if v_prediction:
+        weight = 1 / (snr_t + 1)
+    else:
+        weight = 1 / torch.sqrt(snr_t)
    loss = weight * loss
    return loss

@@ -482,12 +485,20 @@ def apply_noise_offset(latents, noise, noise_offset, adaptive_noise_scale):


 def apply_masked_loss(loss, batch):
-    # mask image is -1 to 1. we need to convert it to 0 to 1
-    mask_image = batch["conditioning_images"].to(dtype=loss.dtype)[:, 0].unsqueeze(1)  # use R channel
+    if "conditioning_images" in batch:
+        # conditioning image is -1 to 1. we need to convert it to 0 to 1
+        mask_image = batch["conditioning_images"].to(dtype=loss.dtype)[:, 0].unsqueeze(1)  # use R channel
+        mask_image = mask_image / 2 + 0.5
+        # print(f"conditioning_image: {mask_image.shape}")
+    elif "alpha_masks" in batch and batch["alpha_masks"] is not None:
+        # alpha mask is 0 to 1
+        mask_image = batch["alpha_masks"].to(dtype=loss.dtype).unsqueeze(1) # add channel dimension
+        # print(f"mask_image: {mask_image.shape}, {mask_image.mean()}")
+    else:
+        return loss

    # resize to the same size as the loss
    mask_image = torch.nn.functional.interpolate(mask_image, size=loss.shape[2:], mode="area")
-    mask_image = mask_image / 2 + 0.5
    loss = loss * mask_image
    return loss