From 92e7600cc2fea604321004f260e7db76c764f388 Mon Sep 17 00:00:00 2001 From: Kohya S Date: Sun, 1 Sep 2024 18:57:07 +0900 Subject: [PATCH] Move freeze_blocks to sd3_train because it's only for sd3 --- README.md | 3 +++ library/train_util.py | 21 --------------------- sd3_train.py | 22 ++++++++++++++++++++-- 3 files changed, 23 insertions(+), 23 deletions(-) diff --git a/README.md b/README.md index 30264e73..d9636719 100644 --- a/README.md +++ b/README.md @@ -309,6 +309,9 @@ resolution = [512, 512] SD3 training is done with `sd3_train.py`. +__Sep 1, 2024__: +- `--num_last_block_to_freeze` is added to `sd3_train.py`. This option is to freeze the last n blocks of the MMDiT. See [#1417](https://github.com/kohya-ss/sd-scripts/pull/1417) for details. Thanks to sdbds! + __Jul 27, 2024__: - Latents and text encoder outputs caching mechanism is refactored significantly. - Existing cache files for SD3 need to be recreated. Please delete the previous cache files. diff --git a/library/train_util.py b/library/train_util.py index 74aae0a7..989758ad 100644 --- a/library/train_util.py +++ b/library/train_util.py @@ -3246,12 +3246,6 @@ def add_sd_models_arguments(parser: argparse.ArgumentParser): default=None, help="directory for caching Tokenizer (for offline training) / Tokenizerをキャッシュするディレクトリ(ネット接続なしでの学習のため)", ) - parser.add_argument( - "--num_last_block_to_freeze", - type=int, - default=None, - help="num_last_block_to_freeze", - ) def add_optimizer_arguments(parser: argparse.ArgumentParser): @@ -5764,21 +5758,6 @@ def sample_image_inference( pass -def freeze_blocks(model, num_last_block_to_freeze, block_name="x_block"): - - filtered_blocks = [(name, param) for name, param in model.named_parameters() if block_name in name] - print(f"filtered_blocks: {len(filtered_blocks)}") - - num_blocks_to_freeze = min(len(filtered_blocks), num_last_block_to_freeze) - - print(f"freeze_blocks: {num_blocks_to_freeze}") - - start_freezing_from = max(0, len(filtered_blocks) - num_blocks_to_freeze) - - for i in range(start_freezing_from, len(filtered_blocks)): - _, param = filtered_blocks[i] - param.requires_grad = False - # endregion diff --git a/sd3_train.py b/sd3_train.py index ce9500b0..87011b21 100644 --- a/sd3_train.py +++ b/sd3_train.py @@ -373,7 +373,20 @@ def train(args): mmdit.to(accelerator.device, dtype=weight_dtype) # because of unet is not prepared if args.num_last_block_to_freeze: - train_util.freeze_blocks(mmdit,num_last_block_to_freeze=args.num_last_block_to_freeze) + # freeze last n blocks of MM-DIT + block_name = "x_block" + filtered_blocks = [(name, param) for name, param in mmdit.named_parameters() if block_name in name] + accelerator.print(f"filtered_blocks: {len(filtered_blocks)}") + + num_blocks_to_freeze = min(len(filtered_blocks), args.num_last_block_to_freeze) + + accelerator.print(f"freeze_blocks: {num_blocks_to_freeze}") + + start_freezing_from = max(0, len(filtered_blocks) - num_blocks_to_freeze) + + for i in range(start_freezing_from, len(filtered_blocks)): + _, param = filtered_blocks[i] + param.requires_grad = False training_models = [] params_to_optimize = [] @@ -1033,12 +1046,17 @@ def setup_parser() -> argparse.ArgumentParser: default=None, help="number of optimizers for fused backward pass and optimizer step / fused backward passとoptimizer stepのためのoptimizer数", ) - parser.add_argument( "--skip_latents_validity_check", action="store_true", help="skip latents validity check / latentsの正当性チェックをスキップする", ) + parser.add_argument( + "--num_last_block_to_freeze", + type=int, + default=None, + help="freeze last n blocks of MM-DIT / MM-DITの最後のnブロックを凍結する", + ) return parser