From de830b89416f0671d7a1364a9262fa850c0669df Mon Sep 17 00:00:00 2001 From: rockerBOO Date: Wed, 29 Jan 2025 00:02:45 -0500 Subject: [PATCH 1/4] Move progress bar to account for sampling image first --- train_network.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/train_network.py b/train_network.py index c3879531..2deb736d 100644 --- a/train_network.py +++ b/train_network.py @@ -1163,10 +1163,6 @@ class NetworkTrainer: args.max_train_steps > initial_step ), f"max_train_steps should be greater than initial step / max_train_stepsは初期ステップより大きい必要があります: {args.max_train_steps} vs {initial_step}" - progress_bar = tqdm( - range(args.max_train_steps - initial_step), smoothing=0, disable=not accelerator.is_local_main_process, desc="steps" - ) - epoch_to_start = 0 if initial_step > 0: if args.skip_until_initial_step: @@ -1271,6 +1267,10 @@ class NetworkTrainer: clean_memory_on_device(accelerator.device) + progress_bar = tqdm( + range(args.max_train_steps - initial_step), smoothing=0, disable=not accelerator.is_local_main_process, desc="steps" + ) + for epoch in range(epoch_to_start, num_train_epochs): accelerator.print(f"\nepoch {epoch+1}/{num_train_epochs}\n") current_epoch.value = epoch + 1 From 4a71687d20787d78a30b7a0df327067f5c402999 Mon Sep 17 00:00:00 2001 From: tsukimiya <71832+tsukimiya@users.noreply.github.com> Date: Tue, 4 Feb 2025 00:42:27 +0900 Subject: [PATCH 2/4] =?UTF-8?q?=E4=B8=8D=E8=A6=81=E3=81=AA=E8=AD=A6?= =?UTF-8?q?=E5=91=8A=E3=81=AE=E5=89=8A=E9=99=A4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit (おそらく https://github.com/kohya-ss/sd-scripts/commit/be14c062674973d0e4fee1eb4527e04707bb72b8 の修正漏れ ) --- library/sdxl_train_util.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/library/sdxl_train_util.py b/library/sdxl_train_util.py index b74bea91..f78d9424 100644 --- a/library/sdxl_train_util.py +++ b/library/sdxl_train_util.py @@ -345,8 +345,6 @@ def add_sdxl_training_arguments(parser: argparse.ArgumentParser): def verify_sdxl_training_args(args: argparse.Namespace, supportTextEncoderCaching: bool = True): assert not args.v2, "v2 cannot be enabled in SDXL training / SDXL学習ではv2を有効にすることはできません" - if args.v_parameterization: - logger.warning("v_parameterization will be unexpected / SDXL学習ではv_parameterizationは想定外の動作になります") if args.clip_skip is not None: logger.warning("clip_skip will be unexpected / SDXL学習ではclip_skipは動作しません") From 13df47516dda6e350b6aa79373b5a0e7287648b5 Mon Sep 17 00:00:00 2001 From: Yidi Date: Thu, 20 Feb 2025 04:49:51 -0500 Subject: [PATCH 3/4] Remove position_ids for V2 The postions_ids cause errors for the newer version of transformer. This has already been fixed in convert_ldm_clip_checkpoint_v1() but not in v2. The new code applies the same fix to convert_ldm_clip_checkpoint_v2(). --- library/model_util.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/library/model_util.py b/library/model_util.py index be410a02..9918c7b2 100644 --- a/library/model_util.py +++ b/library/model_util.py @@ -643,16 +643,15 @@ def convert_ldm_clip_checkpoint_v2(checkpoint, max_length): new_sd[key_pfx + "k_proj" + key_suffix] = values[1] new_sd[key_pfx + "v_proj" + key_suffix] = values[2] - # rename or add position_ids + # remove position_ids for newer transformer, which causes error :( ANOTHER_POSITION_IDS_KEY = "text_model.encoder.text_model.embeddings.position_ids" if ANOTHER_POSITION_IDS_KEY in new_sd: # waifu diffusion v1.4 - position_ids = new_sd[ANOTHER_POSITION_IDS_KEY] del new_sd[ANOTHER_POSITION_IDS_KEY] - else: - position_ids = torch.Tensor([list(range(max_length))]).to(torch.int64) - new_sd["text_model.embeddings.position_ids"] = position_ids + if "text_model.embeddings.position_ids" in new_sd: + del new_sd["text_model.embeddings.position_ids"] + return new_sd From ae409e83c939f2c4a997cfb1679bd7cd364baf7e Mon Sep 17 00:00:00 2001 From: Kohya S Date: Wed, 26 Feb 2025 20:56:32 +0900 Subject: [PATCH 4/4] fix: FLUX/SD3 network training not working without caching latents closes #1954 --- flux_train_network.py | 11 ++++++++--- sd3_train_network.py | 11 ++++++++--- 2 files changed, 16 insertions(+), 6 deletions(-) diff --git a/flux_train_network.py b/flux_train_network.py index ae4b62f5..26503df1 100644 --- a/flux_train_network.py +++ b/flux_train_network.py @@ -36,7 +36,12 @@ class FluxNetworkTrainer(train_network.NetworkTrainer): self.is_schnell: Optional[bool] = None self.is_swapping_blocks: bool = False - def assert_extra_args(self, args, train_dataset_group: Union[train_util.DatasetGroup, train_util.MinimalDataset], val_dataset_group: Optional[train_util.DatasetGroup]): + def assert_extra_args( + self, + args, + train_dataset_group: Union[train_util.DatasetGroup, train_util.MinimalDataset], + val_dataset_group: Optional[train_util.DatasetGroup], + ): super().assert_extra_args(args, train_dataset_group, val_dataset_group) # sdxl_train_util.verify_sdxl_training_args(args) @@ -323,7 +328,7 @@ class FluxNetworkTrainer(train_network.NetworkTrainer): self.noise_scheduler_copy = copy.deepcopy(noise_scheduler) return noise_scheduler - def encode_images_to_latents(self, args, accelerator, vae, images): + def encode_images_to_latents(self, args, vae, images): return vae.encode(images) def shift_scale_latents(self, args, latents): @@ -341,7 +346,7 @@ class FluxNetworkTrainer(train_network.NetworkTrainer): network, weight_dtype, train_unet, - is_train=True + is_train=True, ): # Sample noise that we'll add to the latents noise = torch.randn_like(latents) diff --git a/sd3_train_network.py b/sd3_train_network.py index 2f457949..9438bc7b 100644 --- a/sd3_train_network.py +++ b/sd3_train_network.py @@ -26,7 +26,12 @@ class Sd3NetworkTrainer(train_network.NetworkTrainer): super().__init__() self.sample_prompts_te_outputs = None - def assert_extra_args(self, args, train_dataset_group: Union[train_util.DatasetGroup, train_util.MinimalDataset], val_dataset_group: Optional[train_util.DatasetGroup]): + def assert_extra_args( + self, + args, + train_dataset_group: Union[train_util.DatasetGroup, train_util.MinimalDataset], + val_dataset_group: Optional[train_util.DatasetGroup], + ): # super().assert_extra_args(args, train_dataset_group) # sdxl_train_util.verify_sdxl_training_args(args) @@ -299,7 +304,7 @@ class Sd3NetworkTrainer(train_network.NetworkTrainer): noise_scheduler = sd3_train_utils.FlowMatchEulerDiscreteScheduler(num_train_timesteps=1000, shift=args.training_shift) return noise_scheduler - def encode_images_to_latents(self, args, accelerator, vae, images): + def encode_images_to_latents(self, args, vae, images): return vae.encode(images) def shift_scale_latents(self, args, latents): @@ -317,7 +322,7 @@ class Sd3NetworkTrainer(train_network.NetworkTrainer): network, weight_dtype, train_unet, - is_train=True + is_train=True, ): # Sample noise that we'll add to the latents noise = torch.randn_like(latents)