mirror of
https://github.com/kohya-ss/sd-scripts.git
synced 2026-04-09 06:45:09 +00:00
Min-SNR Weighting Strategy: Refactored and added to all trainers
This commit is contained in:
@@ -19,7 +19,8 @@ from library.config_util import (
|
|||||||
ConfigSanitizer,
|
ConfigSanitizer,
|
||||||
BlueprintGenerator,
|
BlueprintGenerator,
|
||||||
)
|
)
|
||||||
|
import library.custom_train_functions as custom_train_functions
|
||||||
|
from library.custom_train_functions import apply_snr_weight
|
||||||
|
|
||||||
def collate_fn(examples):
|
def collate_fn(examples):
|
||||||
return examples[0]
|
return examples[0]
|
||||||
@@ -304,6 +305,9 @@ def train(args):
|
|||||||
|
|
||||||
loss = torch.nn.functional.mse_loss(noise_pred.float(), target.float(), reduction="mean")
|
loss = torch.nn.functional.mse_loss(noise_pred.float(), target.float(), reduction="mean")
|
||||||
|
|
||||||
|
if args.min_snr_gamma:
|
||||||
|
loss = apply_snr_weight(loss, latents, noisy_latents, args.min_snr_gamma)
|
||||||
|
|
||||||
accelerator.backward(loss)
|
accelerator.backward(loss)
|
||||||
if accelerator.sync_gradients and args.max_grad_norm != 0.0:
|
if accelerator.sync_gradients and args.max_grad_norm != 0.0:
|
||||||
params_to_clip = []
|
params_to_clip = []
|
||||||
@@ -396,6 +400,8 @@ def setup_parser() -> argparse.ArgumentParser:
|
|||||||
train_util.add_sd_saving_arguments(parser)
|
train_util.add_sd_saving_arguments(parser)
|
||||||
train_util.add_optimizer_arguments(parser)
|
train_util.add_optimizer_arguments(parser)
|
||||||
config_util.add_config_arguments(parser)
|
config_util.add_config_arguments(parser)
|
||||||
|
custom_train_functions.add_custom_train_arguments(parser)
|
||||||
|
|
||||||
|
|
||||||
parser.add_argument("--diffusers_xformers", action="store_true", help="use xformers by diffusers / Diffusersでxformersを使用する")
|
parser.add_argument("--diffusers_xformers", action="store_true", help="use xformers by diffusers / Diffusersでxformersを使用する")
|
||||||
parser.add_argument("--train_text_encoder", action="store_true", help="train text encoder / text encoderも学習する")
|
parser.add_argument("--train_text_encoder", action="store_true", help="train text encoder / text encoderも学習する")
|
||||||
|
|||||||
17
library/custom_train_functions.py
Normal file
17
library/custom_train_functions.py
Normal file
@@ -0,0 +1,17 @@
|
|||||||
|
import torch
|
||||||
|
import argparse
|
||||||
|
|
||||||
|
def apply_snr_weight(loss, latents, noisy_latents, gamma):
|
||||||
|
sigma = torch.sub(noisy_latents, latents) #find noise as applied by scheduler
|
||||||
|
zeros = torch.zeros_like(sigma)
|
||||||
|
alpha_mean_sq = torch.nn.functional.mse_loss(latents.float(), zeros.float(), reduction="none").mean([1, 2, 3]) #trick to get Mean Square/Second Moment
|
||||||
|
sigma_mean_sq = torch.nn.functional.mse_loss(sigma.float(), zeros.float(), reduction="none").mean([1, 2, 3]) #trick to get Mean Square/Second Moment
|
||||||
|
snr = torch.div(alpha_mean_sq,sigma_mean_sq) #Signal to Noise Ratio = ratio of Mean Squares
|
||||||
|
gamma_over_snr = torch.div(torch.ones_like(snr)*gamma,snr)
|
||||||
|
snr_weight = torch.minimum(gamma_over_snr,torch.ones_like(gamma_over_snr)).float() #from paper
|
||||||
|
loss = loss * snr_weight
|
||||||
|
print(snr_weight)
|
||||||
|
return loss
|
||||||
|
|
||||||
|
def add_custom_train_arguments(parser: argparse.ArgumentParser):
|
||||||
|
parser.add_argument("--min_snr_gamma", type=float, default=0, help="gamma for reducing the weight of high loss timesteps. Lower numbers have stronger effect. 5 is recommended by paper.")
|
||||||
@@ -1963,7 +1963,7 @@ def add_training_arguments(parser: argparse.ArgumentParser, support_dreambooth:
|
|||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--prior_loss_weight", type=float, default=1.0, help="loss weight for regularization images / 正則化画像のlossの重み"
|
"--prior_loss_weight", type=float, default=1.0, help="loss weight for regularization images / 正則化画像のlossの重み"
|
||||||
)
|
)
|
||||||
parser.add_argument("--min_snr_gamma", type=float, default=0, help="gamma for reducing the weight of high loss timesteps. Lower numbers have stronger effect. 5 is recommended by paper.")
|
|
||||||
|
|
||||||
def verify_training_args(args: argparse.Namespace):
|
def verify_training_args(args: argparse.Namespace):
|
||||||
if args.v_parameterization and not args.v2:
|
if args.v_parameterization and not args.v2:
|
||||||
|
|||||||
@@ -21,7 +21,8 @@ from library.config_util import (
|
|||||||
ConfigSanitizer,
|
ConfigSanitizer,
|
||||||
BlueprintGenerator,
|
BlueprintGenerator,
|
||||||
)
|
)
|
||||||
|
import library.custom_train_functions as custom_train_functions
|
||||||
|
from library.custom_train_functions import apply_snr_weight
|
||||||
|
|
||||||
def collate_fn(examples):
|
def collate_fn(examples):
|
||||||
return examples[0]
|
return examples[0]
|
||||||
@@ -291,6 +292,9 @@ def train(args):
|
|||||||
loss_weights = batch["loss_weights"] # 各sampleごとのweight
|
loss_weights = batch["loss_weights"] # 各sampleごとのweight
|
||||||
loss = loss * loss_weights
|
loss = loss * loss_weights
|
||||||
|
|
||||||
|
if args.min_snr_gamma:
|
||||||
|
loss = apply_snr_weight(loss, latents, noisy_latents, args.min_snr_gamma)
|
||||||
|
|
||||||
loss = loss.mean() # 平均なのでbatch_sizeで割る必要なし
|
loss = loss.mean() # 平均なのでbatch_sizeで割る必要なし
|
||||||
|
|
||||||
accelerator.backward(loss)
|
accelerator.backward(loss)
|
||||||
@@ -390,6 +394,7 @@ def setup_parser() -> argparse.ArgumentParser:
|
|||||||
train_util.add_sd_saving_arguments(parser)
|
train_util.add_sd_saving_arguments(parser)
|
||||||
train_util.add_optimizer_arguments(parser)
|
train_util.add_optimizer_arguments(parser)
|
||||||
config_util.add_config_arguments(parser)
|
config_util.add_config_arguments(parser)
|
||||||
|
custom_train_functions.add_custom_train_arguments(parser)
|
||||||
|
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--no_token_padding",
|
"--no_token_padding",
|
||||||
|
|||||||
@@ -23,7 +23,8 @@ from library.config_util import (
|
|||||||
ConfigSanitizer,
|
ConfigSanitizer,
|
||||||
BlueprintGenerator,
|
BlueprintGenerator,
|
||||||
)
|
)
|
||||||
|
import library.custom_train_functions as custom_train_functions
|
||||||
|
from library.custom_train_functions import apply_snr_weight
|
||||||
|
|
||||||
def collate_fn(examples):
|
def collate_fn(examples):
|
||||||
return examples[0]
|
return examples[0]
|
||||||
@@ -548,16 +549,9 @@ def train(args):
|
|||||||
|
|
||||||
loss_weights = batch["loss_weights"] # 各sampleごとのweight
|
loss_weights = batch["loss_weights"] # 各sampleごとのweight
|
||||||
loss = loss * loss_weights
|
loss = loss * loss_weights
|
||||||
gamma = args.min_snr_gamma
|
|
||||||
if gamma:
|
if args.min_snr_gamma:
|
||||||
sigma = torch.sub(noisy_latents, latents) #find noise as applied
|
loss = apply_snr_weight(loss, latents, noisy_latents, args.min_snr_gamma)
|
||||||
zeros = torch.zeros_like(sigma)
|
|
||||||
alpha_mean_sq = torch.nn.functional.mse_loss(latents.float(), zeros.float(), reduction="none").mean([1, 2, 3]) #trick to get Mean Square
|
|
||||||
sigma_mean_sq = torch.nn.functional.mse_loss(sigma.float(), zeros.float(), reduction="none").mean([1, 2, 3]) #trick to get Mean Square
|
|
||||||
snr = torch.div(alpha_mean_sq,sigma_mean_sq) #Signal to Noise Ratio = ratio of Mean Squares
|
|
||||||
gamma_over_snr = torch.div(torch.ones_like(snr)*gamma,snr)
|
|
||||||
snr_weight = torch.minimum(gamma_over_snr,torch.ones_like(gamma_over_snr)).float() #from paper
|
|
||||||
loss = loss * snr_weight
|
|
||||||
|
|
||||||
loss = loss.mean() # 平均なのでbatch_sizeで割る必要なし
|
loss = loss.mean() # 平均なのでbatch_sizeで割る必要なし
|
||||||
|
|
||||||
@@ -662,6 +656,7 @@ def setup_parser() -> argparse.ArgumentParser:
|
|||||||
train_util.add_training_arguments(parser, True)
|
train_util.add_training_arguments(parser, True)
|
||||||
train_util.add_optimizer_arguments(parser)
|
train_util.add_optimizer_arguments(parser)
|
||||||
config_util.add_config_arguments(parser)
|
config_util.add_config_arguments(parser)
|
||||||
|
custom_train_functions.add_custom_train_arguments(parser)
|
||||||
|
|
||||||
parser.add_argument("--no_metadata", action="store_true", help="do not save metadata in output model / メタデータを出力先モデルに保存しない")
|
parser.add_argument("--no_metadata", action="store_true", help="do not save metadata in output model / メタデータを出力先モデルに保存しない")
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
|
|||||||
@@ -17,6 +17,8 @@ from library.config_util import (
|
|||||||
ConfigSanitizer,
|
ConfigSanitizer,
|
||||||
BlueprintGenerator,
|
BlueprintGenerator,
|
||||||
)
|
)
|
||||||
|
import library.custom_train_functions as custom_train_functions
|
||||||
|
from library.custom_train_functions import apply_snr_weight
|
||||||
|
|
||||||
imagenet_templates_small = [
|
imagenet_templates_small = [
|
||||||
"a photo of a {}",
|
"a photo of a {}",
|
||||||
@@ -378,6 +380,9 @@ def train(args):
|
|||||||
loss = torch.nn.functional.mse_loss(noise_pred.float(), target.float(), reduction="none")
|
loss = torch.nn.functional.mse_loss(noise_pred.float(), target.float(), reduction="none")
|
||||||
loss = loss.mean([1, 2, 3])
|
loss = loss.mean([1, 2, 3])
|
||||||
|
|
||||||
|
if args.min_snr_gamma:
|
||||||
|
loss = apply_snr_weight(loss, latents, noisy_latents, args.min_snr_gamma)
|
||||||
|
|
||||||
loss_weights = batch["loss_weights"] # 各sampleごとのweight
|
loss_weights = batch["loss_weights"] # 各sampleごとのweight
|
||||||
loss = loss * loss_weights
|
loss = loss * loss_weights
|
||||||
|
|
||||||
@@ -534,6 +539,7 @@ def setup_parser() -> argparse.ArgumentParser:
|
|||||||
train_util.add_training_arguments(parser, True)
|
train_util.add_training_arguments(parser, True)
|
||||||
train_util.add_optimizer_arguments(parser)
|
train_util.add_optimizer_arguments(parser)
|
||||||
config_util.add_config_arguments(parser)
|
config_util.add_config_arguments(parser)
|
||||||
|
custom_train_functions.add_custom_train_arguments(parser)
|
||||||
|
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--save_model_as",
|
"--save_model_as",
|
||||||
|
|||||||
Reference in New Issue
Block a user