From 0778dd9b1df0d6aa33287ded3ce4195f3d03251b Mon Sep 17 00:00:00 2001 From: Kohya S Date: Mon, 27 Jan 2025 22:03:42 +0900 Subject: [PATCH] fix Text Encoder only LoRA training --- flux_train_network.py | 2 +- sd3_train_network.py | 2 +- train_network.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/flux_train_network.py b/flux_train_network.py index 5cd1b9d5..ae4b62f5 100644 --- a/flux_train_network.py +++ b/flux_train_network.py @@ -378,7 +378,7 @@ class FluxNetworkTrainer(train_network.NetworkTrainer): def call_dit(img, img_ids, t5_out, txt_ids, l_pooled, timesteps, guidance_vec, t5_attn_mask): # if not args.split_mode: # normal forward - with torch.set_grad_enabled(is_train and train_unet), accelerator.autocast(): + with torch.set_grad_enabled(is_train), accelerator.autocast(): # YiYi notes: divide it by 1000 for now because we scale it by 1000 in the transformer model (we should not keep it but I want to keep the inputs same for the model for testing) model_pred = unet( img=img, diff --git a/sd3_train_network.py b/sd3_train_network.py index dcf497f5..2f457949 100644 --- a/sd3_train_network.py +++ b/sd3_train_network.py @@ -345,7 +345,7 @@ class Sd3NetworkTrainer(train_network.NetworkTrainer): t5_attn_mask = None # call model - with torch.set_grad_enabled(is_train and train_unet), accelerator.autocast(): + with torch.set_grad_enabled(is_train), accelerator.autocast(): # TODO support attention mask model_pred = unet(noisy_model_input, timesteps, context=context, y=lg_pooled) diff --git a/train_network.py b/train_network.py index 2c3bb2aa..c3879531 100644 --- a/train_network.py +++ b/train_network.py @@ -233,7 +233,7 @@ class NetworkTrainer: t.requires_grad_(True) # Predict the noise residual - with torch.set_grad_enabled(is_train and train_unet), accelerator.autocast(): + with torch.set_grad_enabled(is_train), accelerator.autocast(): noise_pred = self.call_unet( args, accelerator,