Move freeze_blocks to sd3_train because it's only for sd3

2026-04-09 06:45:09 +00:00 · 2024-09-01 18:57:07 +09:00
parent ef510b3cb9
commit 92e7600cc2
3 changed files with 23 additions and 23 deletions
--- a/README.md
+++ b/README.md
@@ -309,6 +309,9 @@ resolution = [512, 512]
 SD3 training is done with `sd3_train.py`. 
 __Sep 1, 2024__:
 - `--num_last_block_to_freeze` is added to `sd3_train.py`. This option is to freeze the last n blocks of the MMDiT. See [#1417](https://github.com/kohya-ss/sd-scripts/pull/1417) for details. Thanks to sdbds!
 __Jul  27, 2024__: 
 - Latents and text encoder outputs caching mechanism is refactored significantly. 
  - Existing cache files for SD3 need to be recreated. Please delete the previous cache files. 
--- a/library/train_util.py
+++ b/library/train_util.py
@@ -3246,12 +3246,6 @@ def add_sd_models_arguments(parser: argparse.ArgumentParser):
        default=None,
        help="directory for caching Tokenizer (for offline training) / Tokenizerをキャッシュするディレクトリ（ネット接続なしでの学習のため）",
    )
    parser.add_argument(
        "--num_last_block_to_freeze",
        type=int,
        default=None,
        help="num_last_block_to_freeze",
    )
 def add_optimizer_arguments(parser: argparse.ArgumentParser):
@@ -5764,21 +5758,6 @@ def sample_image_inference(
        pass
 def freeze_blocks(model, num_last_block_to_freeze, block_name="x_block"):
    filtered_blocks = [(name, param) for name, param in model.named_parameters() if block_name in name]
    print(f"filtered_blocks: {len(filtered_blocks)}")
    num_blocks_to_freeze = min(len(filtered_blocks), num_last_block_to_freeze)
    print(f"freeze_blocks: {num_blocks_to_freeze}")
    start_freezing_from = max(0, len(filtered_blocks) - num_blocks_to_freeze)
    for i in range(start_freezing_from, len(filtered_blocks)):
        _, param = filtered_blocks[i]
        param.requires_grad = False
 # endregion
--- a/sd3_train.py
+++ b/sd3_train.py
@@ -373,7 +373,20 @@ def train(args):
        mmdit.to(accelerator.device, dtype=weight_dtype)  # because of unet is not prepared
    if args.num_last_block_to_freeze:
-        train_util.freeze_blocks(mmdit,num_last_block_to_freeze=args.num_last_block_to_freeze)
+        # freeze last n blocks of MM-DIT
        block_name = "x_block"
        filtered_blocks = [(name, param) for name, param in mmdit.named_parameters() if block_name in name]
        accelerator.print(f"filtered_blocks: {len(filtered_blocks)}")
        num_blocks_to_freeze = min(len(filtered_blocks), args.num_last_block_to_freeze)
        accelerator.print(f"freeze_blocks: {num_blocks_to_freeze}")
        start_freezing_from = max(0, len(filtered_blocks) - num_blocks_to_freeze)
        for i in range(start_freezing_from, len(filtered_blocks)):
            _, param = filtered_blocks[i]
            param.requires_grad = False
    training_models = []
    params_to_optimize = []
@@ -1033,12 +1046,17 @@ def setup_parser() -> argparse.ArgumentParser:
        default=None,
        help="number of optimizers for fused backward pass and optimizer step / fused backward passとoptimizer stepのためのoptimizer数",
    )
    parser.add_argument(
        "--skip_latents_validity_check",
        action="store_true",
        help="skip latents validity check / latentsの正当性チェックをスキップする",
    )
    parser.add_argument(
        "--num_last_block_to_freeze",
        type=int,
        default=None,
        help="freeze last n blocks of MM-DIT / MM-DITの最後のnブロックを凍結する",
    )
    return parser