From 9dc7997803d70c718969526352e88908e827f091 Mon Sep 17 00:00:00 2001 From: Kohya S Date: Tue, 9 Jul 2024 20:37:00 +0900 Subject: [PATCH] fix typo --- library/sd3_models.py | 2 +- library/sd3_train_utils.py | 2 +- sd3_train.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/library/sd3_models.py b/library/sd3_models.py index e4c0790d..a1ff1e75 100644 --- a/library/sd3_models.py +++ b/library/sd3_models.py @@ -1643,7 +1643,7 @@ class T5LayerNorm(torch.nn.Module): # copy from transformers' T5LayerNorm def forward(self, hidden_states): # T5 uses a layer_norm which only scales and doesn't shift, which is also known as Root Mean - # Square Layer Normalization https://arxiv.org/abs/1910.07467 thus varience is calculated + # Square Layer Normalization https://arxiv.org/abs/1910.07467 thus variance is calculated # w/o mean and there is no bias. Additionally we want to make sure that the accumulation for # half-precision inputs is done in fp32 variance = hidden_states.to(torch.float32).pow(2).mean(-1, keepdim=True) diff --git a/library/sd3_train_utils.py b/library/sd3_train_utils.py index 98ee66bf..66034210 100644 --- a/library/sd3_train_utils.py +++ b/library/sd3_train_utils.py @@ -279,7 +279,7 @@ def sample_images(*args, **kwargs): return train_util.sample_images_common(SdxlStableDiffusionLongPromptWeightingPipeline, *args, **kwargs) -class Sd3LatensCachingStrategy(train_util.LatentsCachingStrategy): +class Sd3LatentsCachingStrategy(train_util.LatentsCachingStrategy): SD3_LATENTS_NPZ_SUFFIX = "_sd3.npz" def __init__(self, vae: sd3_models.SDVAE, cache_to_disk: bool, batch_size: int, skip_disk_cache_validity_check: bool) -> None: diff --git a/sd3_train.py b/sd3_train.py index 10cc5d57..30d994c7 100644 --- a/sd3_train.py +++ b/sd3_train.py @@ -217,7 +217,7 @@ def train(args): file_suffix="_sd3.npz", ) else: - strategy = sd3_train_utils.Sd3LatensCachingStrategy( + strategy = sd3_train_utils.Sd3LatentsCachingStrategy( vae, args.cache_latents_to_disk, args.vae_batch_size, args.skip_latents_validity_check ) train_dataset_group.new_cache_latents(accelerator.is_main_process, strategy)