From 0778dd9b1df0d6aa33287ded3ce4195f3d03251b Mon Sep 17 00:00:00 2001
From: Kohya S <ykumeykume@gmail.com>
Date: Mon, 27 Jan 2025 22:03:42 +0900
Subject: [PATCH] fix Text Encoder only LoRA training

---
 flux_train_network.py | 2 +-
 sd3_train_network.py  | 2 +-
 train_network.py      | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/flux_train_network.py b/flux_train_network.py
index 5cd1b9d5..ae4b62f5 100644
--- a/flux_train_network.py
+++ b/flux_train_network.py
@@ -378,7 +378,7 @@ class FluxNetworkTrainer(train_network.NetworkTrainer):
         def call_dit(img, img_ids, t5_out, txt_ids, l_pooled, timesteps, guidance_vec, t5_attn_mask):
             # if not args.split_mode:
             # normal forward
-            with torch.set_grad_enabled(is_train and train_unet), accelerator.autocast():
+            with torch.set_grad_enabled(is_train), accelerator.autocast():
                 # YiYi notes: divide it by 1000 for now because we scale it by 1000 in the transformer model (we should not keep it but I want to keep the inputs same for the model for testing)
                 model_pred = unet(
                     img=img,
diff --git a/sd3_train_network.py b/sd3_train_network.py
index dcf497f5..2f457949 100644
--- a/sd3_train_network.py
+++ b/sd3_train_network.py
@@ -345,7 +345,7 @@ class Sd3NetworkTrainer(train_network.NetworkTrainer):
             t5_attn_mask = None
 
         # call model
-        with torch.set_grad_enabled(is_train and train_unet), accelerator.autocast():
+        with torch.set_grad_enabled(is_train), accelerator.autocast():
             # TODO support attention mask
             model_pred = unet(noisy_model_input, timesteps, context=context, y=lg_pooled)
 
diff --git a/train_network.py b/train_network.py
index 2c3bb2aa..c3879531 100644
--- a/train_network.py
+++ b/train_network.py
@@ -233,7 +233,7 @@ class NetworkTrainer:
                 t.requires_grad_(True)
 
         # Predict the noise residual
-        with torch.set_grad_enabled(is_train and train_unet), accelerator.autocast():
+        with torch.set_grad_enabled(is_train), accelerator.autocast():
             noise_pred = self.call_unet(
                 args,
                 accelerator,