From b18d0992914d21e794d28000e3e5be53066e9164 Mon Sep 17 00:00:00 2001 From: Pam Date: Tue, 2 May 2023 09:42:17 +0500 Subject: [PATCH 01/10] Multi-Resolution Noise --- fine_tune.py | 4 +++- library/custom_train_functions.py | 13 +++++++++++++ library/train_util.py | 12 ++++++++++++ train_db.py | 4 +++- train_network.py | 6 +++++- train_textual_inversion.py | 4 +++- train_textual_inversion_XTI.py | 4 +++- 7 files changed, 42 insertions(+), 5 deletions(-) diff --git a/fine_tune.py b/fine_tune.py index b6a8d1d7..f0641e85 100644 --- a/fine_tune.py +++ b/fine_tune.py @@ -21,7 +21,7 @@ from library.config_util import ( BlueprintGenerator, ) import library.custom_train_functions as custom_train_functions -from library.custom_train_functions import apply_snr_weight, get_weighted_text_embeddings +from library.custom_train_functions import apply_snr_weight, get_weighted_text_embeddings, pyramid_noise_like def train(args): @@ -304,6 +304,8 @@ def train(args): if args.noise_offset: # https://www.crosslabs.org//blog/diffusion-with-offset-noise noise += args.noise_offset * torch.randn((latents.shape[0], latents.shape[1], 1, 1), device=latents.device) + elif args.multires_noise_iterations: + noise = pyramid_noise_like(noise, latents.device, args.multires_noise_iterations, args.multires_noise_discount) # Sample a random timestep for each image timesteps = torch.randint(0, noise_scheduler.config.num_train_timesteps, (b_size,), device=latents.device) diff --git a/library/custom_train_functions.py b/library/custom_train_functions.py index 7eb829fa..70a33e11 100644 --- a/library/custom_train_functions.py +++ b/library/custom_train_functions.py @@ -1,5 +1,6 @@ import torch import argparse +import random import re from typing import List, Optional, Union @@ -342,3 +343,15 @@ def get_weighted_text_embeddings( text_embeddings = text_embeddings * (previous_mean / current_mean).unsqueeze(-1).unsqueeze(-1) return text_embeddings + + +# https://wandb.ai/johnowhitaker/multires_noise/reports/Multi-Resolution-Noise-for-Diffusion-Model-Training--VmlldzozNjYyOTU2 +def pyramid_noise_like(noise, device, iterations=6, discount=0.3): + b, c, w, h = noise.shape + u = torch.nn.Upsample(size=(w, h), mode='bilinear').to(device) + for i in range(iterations): + r = random.random()*2+2 # Rather than always going 2x, + w, h = max(1, int(w/(r**i))), max(1, int(h/(r**i))) + noise += u(torch.randn(b, c, w, h).to(device)) * discount**i + if w==1 or h==1: break # Lowest resolution is 1x1 + return noise/noise.std() # Scaled back to roughly unit variance diff --git a/library/train_util.py b/library/train_util.py index 8c6e3437..2c107237 100644 --- a/library/train_util.py +++ b/library/train_util.py @@ -2119,6 +2119,18 @@ def add_training_arguments(parser: argparse.ArgumentParser, support_dreambooth: default=None, help="enable noise offset with this value (if enabled, around 0.1 is recommended) / Noise offsetを有効にしてこの値を設定する(有効にする場合は0.1程度を推奨)", ) + parser.add_argument( + "--multires_noise_iterations", + type=int, + default=None, + help="enable multires noise with this number of iterations (if enabled, around 6-10 is recommended)" + ) + parser.add_argument( + "--multires_noise_discount", + type=float, + default=0.3, + help="set discount value for multires noise (has no effect without --multires_noise_iterations)" + ) parser.add_argument( "--lowram", action="store_true", diff --git a/train_db.py b/train_db.py index 178d5cb4..4d054e9a 100644 --- a/train_db.py +++ b/train_db.py @@ -23,7 +23,7 @@ from library.config_util import ( BlueprintGenerator, ) import library.custom_train_functions as custom_train_functions -from library.custom_train_functions import apply_snr_weight, get_weighted_text_embeddings +from library.custom_train_functions import apply_snr_weight, get_weighted_text_embeddings, pyramid_noise_like def train(args): @@ -270,6 +270,8 @@ def train(args): if args.noise_offset: # https://www.crosslabs.org//blog/diffusion-with-offset-noise noise += args.noise_offset * torch.randn((latents.shape[0], latents.shape[1], 1, 1), device=latents.device) + elif args.multires_noise_iterations: + noise = pyramid_noise_like(noise, latents.device, args.multires_noise_iterations, args.multires_noise_discount) # Get the text embedding for conditioning with torch.set_grad_enabled(global_step < args.stop_text_encoder_training): diff --git a/train_network.py b/train_network.py index 5c4d5ad1..60007433 100644 --- a/train_network.py +++ b/train_network.py @@ -26,7 +26,7 @@ from library.config_util import ( ) import library.huggingface_util as huggingface_util import library.custom_train_functions as custom_train_functions -from library.custom_train_functions import apply_snr_weight, get_weighted_text_embeddings +from library.custom_train_functions import apply_snr_weight, get_weighted_text_embeddings, pyramid_noise_like # TODO 他のスクリプトと共通化する @@ -366,6 +366,8 @@ def train(args): "ss_seed": args.seed, "ss_lowram": args.lowram, "ss_noise_offset": args.noise_offset, + "ss_multires_noise_iterations": args.multires_noise_iterations, + "ss_multires_noise_discount": args.multires_noise_discount, "ss_training_comment": args.training_comment, # will not be updated after training "ss_sd_scripts_commit_hash": train_util.get_git_revision_hash(), "ss_optimizer": optimizer_name + (f"({optimizer_args})" if len(optimizer_args) > 0 else ""), @@ -612,6 +614,8 @@ def train(args): if args.noise_offset: # https://www.crosslabs.org//blog/diffusion-with-offset-noise noise += args.noise_offset * torch.randn((latents.shape[0], latents.shape[1], 1, 1), device=latents.device) + elif args.multires_noise_iterations: + noise = pyramid_noise_like(noise, latents.device, args.multires_noise_iterations, args.multires_noise_discount) # Sample a random timestep for each image timesteps = torch.randint(0, noise_scheduler.config.num_train_timesteps, (b_size,), device=latents.device) diff --git a/train_textual_inversion.py b/train_textual_inversion.py index fb6b6053..d77a8878 100644 --- a/train_textual_inversion.py +++ b/train_textual_inversion.py @@ -20,7 +20,7 @@ from library.config_util import ( BlueprintGenerator, ) import library.custom_train_functions as custom_train_functions -from library.custom_train_functions import apply_snr_weight +from library.custom_train_functions import apply_snr_weight, pyramid_noise_like imagenet_templates_small = [ "a photo of a {}", @@ -386,6 +386,8 @@ def train(args): if args.noise_offset: # https://www.crosslabs.org//blog/diffusion-with-offset-noise noise += args.noise_offset * torch.randn((latents.shape[0], latents.shape[1], 1, 1), device=latents.device) + elif args.multires_noise_iterations: + noise = pyramid_noise_like(noise, latents.device, args.multires_noise_iterations, args.multires_noise_discount) # Sample a random timestep for each image timesteps = torch.randint(0, noise_scheduler.config.num_train_timesteps, (b_size,), device=latents.device) diff --git a/train_textual_inversion_XTI.py b/train_textual_inversion_XTI.py index 69ec3eb1..27c5c2df 100644 --- a/train_textual_inversion_XTI.py +++ b/train_textual_inversion_XTI.py @@ -20,7 +20,7 @@ from library.config_util import ( BlueprintGenerator, ) import library.custom_train_functions as custom_train_functions -from library.custom_train_functions import apply_snr_weight +from library.custom_train_functions import apply_snr_weight, pyramid_noise_like from XTI_hijack import unet_forward_XTI, downblock_forward_XTI, upblock_forward_XTI imagenet_templates_small = [ @@ -425,6 +425,8 @@ def train(args): if args.noise_offset: # https://www.crosslabs.org//blog/diffusion-with-offset-noise noise += args.noise_offset * torch.randn((latents.shape[0], latents.shape[1], 1, 1), device=latents.device) + elif args.multires_noise_iterations: + noise = pyramid_noise_like(noise, latents.device, args.multires_noise_iterations, args.multires_noise_discount) # Sample a random timestep for each image timesteps = torch.randint(0, noise_scheduler.config.num_train_timesteps, (b_size,), device=latents.device) From 335b2f960e24be4a4ae4a258cf210318502f9de9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=9D=92=E9=BE=8D=E8=81=96=E8=80=85=40bdsqlsz?= <865105819@qq.com> Date: Wed, 3 May 2023 09:22:40 +0800 Subject: [PATCH 02/10] Support for Lion8bit (#447) * ADD libbitsandbytes.dll for 0.38.1 * Delete libbitsandbytes_cuda116.dll * Delete cextension.py * add main.py * Update requirements.txt for bitsandbytes 0.38.1 * Update README.md for bitsandbytes-windows * Update README-ja.md for bitsandbytes 0.38.1 * Update main.py for return cuda118 * Update train_util.py for lion8bit * Update train_README-ja.md for lion8bit * Update train_util.py for add DAdaptAdan and DAdaptSGD * Update train_util.py for DAdaptadam * Update train_network.py for dadapt * Update train_README-ja.md for DAdapt * Update train_util.py for DAdapt * Update train_network.py for DAdaptAdaGrad * Update train_db.py for DAdapt * Update fine_tune.py for DAdapt * Update train_textual_inversion.py for DAdapt * Update train_textual_inversion_XTI.py for DAdapt * Revert "Merge branch 'qinglong' into main" This reverts commit b65c023083d6d1e8a30eb42eddd603d1aac97650, reversing changes made to f6fda20caf5e773d56bcfb5c4575c650bb85362b. * Revert "Update requirements.txt for bitsandbytes 0.38.1" This reverts commit 83abc60dfaddb26845f54228425b98dd67997528. * Revert "Delete cextension.py" This reverts commit 3ba4dfe046874393f2a022a4cbef3628ada35391. * Revert "Update README.md for bitsandbytes-windows" This reverts commit 4642c52086b5e9791233007e2fdfd97f832cd897. * Revert "Update README-ja.md for bitsandbytes 0.38.1" This reverts commit fa6d7485ac067ebc49e6f381afdb8dd2f12caa8f. * Revert "ADD libbitsandbytes.dll for 0.38.1" This reverts commit bee1e6f731d2428dacb34b61997f06143c69c278. * Revert "Delete libbitsandbytes_cuda116.dll" This reverts commit 891c7e92623dab92f3767663982627cca6a26724. * reverse main.py * Reverse main.py --- library/train_util.py | 13 +++++++++++-- train_README-ja.md | 1 + 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/library/train_util.py b/library/train_util.py index 8c6e3437..a8ee260c 100644 --- a/library/train_util.py +++ b/library/train_util.py @@ -1883,7 +1883,7 @@ def add_optimizer_arguments(parser: argparse.ArgumentParser): "--optimizer_type", type=str, default="", - help="Optimizer to use / オプティマイザの種類: AdamW (default), AdamW8bit, Lion, SGDNesterov, SGDNesterov8bit, DAdaptation, AdaFactor", + help="Optimizer to use / オプティマイザの種類: AdamW (default), AdamW8bit, Lion, Lion8bit,SGDNesterov, SGDNesterov8bit, DAdaptation, AdaFactor", ) # backward compatibility @@ -2448,7 +2448,7 @@ def resume_from_local_or_hf_if_specified(accelerator, args): def get_optimizer(args, trainable_params): - # "Optimizer to use: AdamW, AdamW8bit, Lion, SGDNesterov, SGDNesterov8bit, DAdaptation, Adafactor" + # "Optimizer to use: AdamW, AdamW8bit, Lion, Lion8bit, SGDNesterov, SGDNesterov8bit, DAdaptation, Adafactor" optimizer_type = args.optimizer_type if args.use_8bit_adam: @@ -2525,6 +2525,15 @@ def get_optimizer(args, trainable_params): print(f"use Lion optimizer | {optimizer_kwargs}") optimizer_class = lion_pytorch.Lion optimizer = optimizer_class(trainable_params, lr=lr, **optimizer_kwargs) + + elif optimizer_type == "Lion8bit".lower(): + try: + import bitsandbytes as bnb + except ImportError: + raise ImportError("No bitsand bytes / bitsandbytesがインストールされていないようです") + print(f"use 8-bit Lion optimizer | {optimizer_kwargs}") + optimizer_class = bnb.optim.Lion8bit + optimizer = optimizer_class(trainable_params, lr=lr, **optimizer_kwargs) elif optimizer_type == "SGDNesterov".lower(): print(f"use SGD with Nesterov optimizer | {optimizer_kwargs}") diff --git a/train_README-ja.md b/train_README-ja.md index fd66458a..a155febd 100644 --- a/train_README-ja.md +++ b/train_README-ja.md @@ -563,6 +563,7 @@ masterpiece, best quality, 1boy, in business suit, standing at street, looking b - 過去のバージョンの--use_8bit_adam指定時と同じ - Lion : https://github.com/lucidrains/lion-pytorch - 過去のバージョンの--use_lion_optimizer指定時と同じ + - Lion8bit : 引数は同上 - SGDNesterov : [torch.optim.SGD](https://pytorch.org/docs/stable/generated/torch.optim.SGD.html), nesterov=True - SGDNesterov8bit : 引数は同上 - DAdaptation : https://github.com/facebookresearch/dadaptation From a7485e4d9e27455b4dd95235f8c840d905a6623e Mon Sep 17 00:00:00 2001 From: ykume Date: Wed, 3 May 2023 10:35:47 +0900 Subject: [PATCH 03/10] Add error message if no Lion8bit --- library/train_util.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/library/train_util.py b/library/train_util.py index a8ee260c..6c064738 100644 --- a/library/train_util.py +++ b/library/train_util.py @@ -2525,14 +2525,21 @@ def get_optimizer(args, trainable_params): print(f"use Lion optimizer | {optimizer_kwargs}") optimizer_class = lion_pytorch.Lion optimizer = optimizer_class(trainable_params, lr=lr, **optimizer_kwargs) - + elif optimizer_type == "Lion8bit".lower(): try: import bitsandbytes as bnb except ImportError: - raise ImportError("No bitsand bytes / bitsandbytesがインストールされていないようです") + raise ImportError("No bitsandbytes / bitsandbytesがインストールされていないようです") + print(f"use 8-bit Lion optimizer | {optimizer_kwargs}") - optimizer_class = bnb.optim.Lion8bit + try: + optimizer_class = bnb.optim.Lion8bit + except AttributeError: + raise AttributeError( + "No Lion8bit. The version of bitsandbytes installed seems to be old. Please install 0.38.0 or later. / Lion8bitが定義されていません。インストールされているbitsandbytesのバージョンが古いようです。0.38.0以上をインストールしてください" + ) + optimizer = optimizer_class(trainable_params, lr=lr, **optimizer_kwargs) elif optimizer_type == "SGDNesterov".lower(): From e1143caf381a60bcee50d4bf76de26093327059a Mon Sep 17 00:00:00 2001 From: Isotr0py <41363108+Isotr0py@users.noreply.github.com> Date: Wed, 3 May 2023 09:37:47 +0800 Subject: [PATCH 04/10] Fix DDP issues and Support DDP for all training scripts (#448) * Fix DDP bugs * Fix DDP bugs for finetune and db * refactor model loader * fix DDP network * try to fix DDP network in train unet only * remove unuse DDP import * refactor DDP transform * refactor DDP transform * fix sample images bugs * change DDP tranform location * add autocast to train_db * support DDP in XTI * Clear DDP import --- fine_tune.py | 5 ++++- library/train_util.py | 34 +++++++++++++++++++++++++++++++- networks/lora_interrogator.py | 2 +- train_db.py | 8 ++++++-- train_network.py | 36 ++++++---------------------------- train_textual_inversion.py | 5 ++++- train_textual_inversion_XTI.py | 5 ++++- 7 files changed, 58 insertions(+), 37 deletions(-) diff --git a/fine_tune.py b/fine_tune.py index b6a8d1d7..db1c8a23 100644 --- a/fine_tune.py +++ b/fine_tune.py @@ -90,7 +90,7 @@ def train(args): weight_dtype, save_dtype = train_util.prepare_dtype(args) # モデルを読み込む - text_encoder, vae, unet, load_stable_diffusion_format = train_util.load_target_model(args, weight_dtype) + text_encoder, vae, unet, load_stable_diffusion_format = train_util.load_target_model(args, weight_dtype, accelerator) # verify load/save model formats if load_stable_diffusion_format: @@ -228,6 +228,9 @@ def train(args): else: unet, optimizer, train_dataloader, lr_scheduler = accelerator.prepare(unet, optimizer, train_dataloader, lr_scheduler) + # transform DDP after prepare + text_encoder, unet, _ = train_util.transform_DDP(text_encoder, unet) + # 実験的機能:勾配も含めたfp16学習を行う PyTorchにパッチを当ててfp16でのgrad scaleを有効にする if args.full_fp16: train_util.patch_accelerator_for_fp16_training(accelerator) diff --git a/library/train_util.py b/library/train_util.py index 6c064738..1a3b2ed0 100644 --- a/library/train_util.py +++ b/library/train_util.py @@ -19,6 +19,7 @@ from typing import ( Union, ) from accelerate import Accelerator +import gc import glob import math import os @@ -30,6 +31,7 @@ import toml from tqdm import tqdm import torch +from torch.nn.parallel import DistributedDataParallel as DDP from torch.optim import Optimizer from torchvision import transforms from transformers import CLIPTokenizer @@ -2866,7 +2868,7 @@ def prepare_dtype(args: argparse.Namespace): return weight_dtype, save_dtype -def load_target_model(args: argparse.Namespace, weight_dtype, device="cpu"): +def _load_target_model(args: argparse.Namespace, weight_dtype, device="cpu"): name_or_path = args.pretrained_model_name_or_path name_or_path = os.readlink(name_or_path) if os.path.islink(name_or_path) else name_or_path load_stable_diffusion_format = os.path.isfile(name_or_path) # determine SD or Diffusers @@ -2895,6 +2897,36 @@ def load_target_model(args: argparse.Namespace, weight_dtype, device="cpu"): return text_encoder, vae, unet, load_stable_diffusion_format +def transform_DDP(text_encoder, unet, network=None): + # Transform text_encoder, unet and network from DistributedDataParallel + return (encoder.module if type(encoder) == DDP else encoder for encoder in [text_encoder, unet, network]) + + +def load_target_model(args, weight_dtype, accelerator): + # load models for each process + for pi in range(accelerator.state.num_processes): + if pi == accelerator.state.local_process_index: + print(f"loading model for process {accelerator.state.local_process_index}/{accelerator.state.num_processes}") + + text_encoder, vae, unet, load_stable_diffusion_format = _load_target_model( + args, weight_dtype, accelerator.device if args.lowram else "cpu" + ) + + # work on low-ram device + if args.lowram: + text_encoder.to(accelerator.device) + unet.to(accelerator.device) + vae.to(accelerator.device) + + gc.collect() + torch.cuda.empty_cache() + accelerator.wait_for_everyone() + + text_encoder, unet, _ = transform_DDP(text_encoder, unet, network=None) + + return text_encoder, vae, unet, load_stable_diffusion_format + + def patch_accelerator_for_fp16_training(accelerator): org_unscale_grads = accelerator.scaler._unscale_grads_ diff --git a/networks/lora_interrogator.py b/networks/lora_interrogator.py index beb25181..0dc066fd 100644 --- a/networks/lora_interrogator.py +++ b/networks/lora_interrogator.py @@ -23,7 +23,7 @@ def interrogate(args): print(f"loading SD model: {args.sd_model}") args.pretrained_model_name_or_path = args.sd_model args.vae = None - text_encoder, vae, unet, _ = train_util.load_target_model(args,weights_dtype, DEVICE) + text_encoder, vae, unet, _ = train_util._load_target_model(args,weights_dtype, DEVICE) print(f"loading LoRA: {args.model}") network, weights_sd = lora.create_network_from_weights(1.0, args.model, vae, text_encoder, unet) diff --git a/train_db.py b/train_db.py index 178d5cb4..abe2ecdf 100644 --- a/train_db.py +++ b/train_db.py @@ -92,7 +92,7 @@ def train(args): weight_dtype, save_dtype = train_util.prepare_dtype(args) # モデルを読み込む - text_encoder, vae, unet, load_stable_diffusion_format = train_util.load_target_model(args, weight_dtype) + text_encoder, vae, unet, load_stable_diffusion_format = train_util.load_target_model(args, weight_dtype, accelerator) # verify load/save model formats if load_stable_diffusion_format: @@ -196,6 +196,9 @@ def train(args): else: unet, optimizer, train_dataloader, lr_scheduler = accelerator.prepare(unet, optimizer, train_dataloader, lr_scheduler) + # transform DDP after prepare + text_encoder, unet, _ = train_util.transform_DDP(text_encoder, unet) + if not train_text_encoder: text_encoder.to(accelerator.device, dtype=weight_dtype) # to avoid 'cpu' vs 'cuda' error @@ -297,7 +300,8 @@ def train(args): noisy_latents = noise_scheduler.add_noise(latents, noise, timesteps) # Predict the noise residual - noise_pred = unet(noisy_latents, timesteps, encoder_hidden_states).sample + with accelerator.autocast(): + noise_pred = unet(noisy_latents, timesteps, encoder_hidden_states).sample if args.v_parameterization: # v-parameterization training diff --git a/train_network.py b/train_network.py index 5c4d5ad1..c5ec0ebd 100644 --- a/train_network.py +++ b/train_network.py @@ -1,4 +1,3 @@ -from torch.nn.parallel import DistributedDataParallel as DDP import importlib import argparse import gc @@ -144,24 +143,7 @@ def train(args): weight_dtype, save_dtype = train_util.prepare_dtype(args) # モデルを読み込む - for pi in range(accelerator.state.num_processes): - # TODO: modify other training scripts as well - if pi == accelerator.state.local_process_index: - print(f"loading model for process {accelerator.state.local_process_index}/{accelerator.state.num_processes}") - - text_encoder, vae, unet, _ = train_util.load_target_model( - args, weight_dtype, accelerator.device if args.lowram else "cpu" - ) - - # work on low-ram device - if args.lowram: - text_encoder.to(accelerator.device) - unet.to(accelerator.device) - vae.to(accelerator.device) - - gc.collect() - torch.cuda.empty_cache() - accelerator.wait_for_everyone() + text_encoder, vae, unet, _ = train_util.load_target_model(args, weight_dtype, accelerator) # モデルに xformers とか memory efficient attention を組み込む train_util.replace_unet_modules(unet, args.mem_eff_attn, args.xformers) @@ -279,6 +261,9 @@ def train(args): else: network, optimizer, train_dataloader, lr_scheduler = accelerator.prepare(network, optimizer, train_dataloader, lr_scheduler) + # transform DDP after prepare (train_network here only) + text_encoder, unet, network = train_util.transform_DDP(text_encoder, unet, network) + unet.requires_grad_(False) unet.to(accelerator.device, dtype=weight_dtype) text_encoder.requires_grad_(False) @@ -288,20 +273,11 @@ def train(args): text_encoder.train() # set top parameter requires_grad = True for gradient checkpointing works - if type(text_encoder) == DDP: - text_encoder.module.text_model.embeddings.requires_grad_(True) - else: - text_encoder.text_model.embeddings.requires_grad_(True) + text_encoder.text_model.embeddings.requires_grad_(True) else: unet.eval() text_encoder.eval() - - # support DistributedDataParallel - if type(text_encoder) == DDP: - text_encoder = text_encoder.module - unet = unet.module - network = network.module - + network.prepare_grad_etc(text_encoder, unet) if not cache_latents: diff --git a/train_textual_inversion.py b/train_textual_inversion.py index fb6b6053..c13fcf9f 100644 --- a/train_textual_inversion.py +++ b/train_textual_inversion.py @@ -98,7 +98,7 @@ def train(args): weight_dtype, save_dtype = train_util.prepare_dtype(args) # モデルを読み込む - text_encoder, vae, unet, _ = train_util.load_target_model(args, weight_dtype) + text_encoder, vae, unet, _ = train_util.load_target_model(args, weight_dtype, accelerator) # Convert the init_word to token_id if args.init_word is not None: @@ -280,6 +280,9 @@ def train(args): text_encoder, optimizer, train_dataloader, lr_scheduler ) + # transform DDP after prepare + text_encoder, unet, _ = train_util.transform_DDP(text_encoder, unet) + index_no_updates = torch.arange(len(tokenizer)) < token_ids[0] # print(len(index_no_updates), torch.sum(index_no_updates)) orig_embeds_params = unwrap_model(text_encoder).get_input_embeddings().weight.data.detach().clone() diff --git a/train_textual_inversion_XTI.py b/train_textual_inversion_XTI.py index 69ec3eb1..67d48023 100644 --- a/train_textual_inversion_XTI.py +++ b/train_textual_inversion_XTI.py @@ -104,7 +104,7 @@ def train(args): weight_dtype, save_dtype = train_util.prepare_dtype(args) # モデルを読み込む - text_encoder, vae, unet, _ = train_util.load_target_model(args, weight_dtype) + text_encoder, vae, unet, _ = train_util.load_target_model(args, weight_dtype, accelerator) # Convert the init_word to token_id if args.init_word is not None: @@ -314,6 +314,9 @@ def train(args): text_encoder, optimizer, train_dataloader, lr_scheduler ) + # transform DDP after prepare + text_encoder, unet, _ = train_util.transform_DDP(text_encoder, unet) + index_no_updates = torch.arange(len(tokenizer)) < token_ids_XTI[0] # print(len(index_no_updates), torch.sum(index_no_updates)) orig_embeds_params = unwrap_model(text_encoder).get_input_embeddings().weight.data.detach().clone() From 2fcbfec17873eedd70cd728f1205836dfca9ceab Mon Sep 17 00:00:00 2001 From: ykume Date: Wed, 3 May 2023 11:07:29 +0900 Subject: [PATCH 05/10] make transform_DDP more intuitive --- fine_tune.py | 2 +- library/train_util.py | 6 +++--- train_db.py | 2 +- train_network.py | 2 +- train_textual_inversion.py | 2 +- train_textual_inversion_XTI.py | 2 +- 6 files changed, 8 insertions(+), 8 deletions(-) diff --git a/fine_tune.py b/fine_tune.py index db1c8a23..9d42c873 100644 --- a/fine_tune.py +++ b/fine_tune.py @@ -229,7 +229,7 @@ def train(args): unet, optimizer, train_dataloader, lr_scheduler = accelerator.prepare(unet, optimizer, train_dataloader, lr_scheduler) # transform DDP after prepare - text_encoder, unet, _ = train_util.transform_DDP(text_encoder, unet) + text_encoder, unet = train_util.transform_if_model_is_DDP(text_encoder, unet) # 実験的機能:勾配も含めたfp16学習を行う PyTorchにパッチを当ててfp16でのgrad scaleを有効にする if args.full_fp16: diff --git a/library/train_util.py b/library/train_util.py index 1a3b2ed0..cac4cdc5 100644 --- a/library/train_util.py +++ b/library/train_util.py @@ -2897,9 +2897,9 @@ def _load_target_model(args: argparse.Namespace, weight_dtype, device="cpu"): return text_encoder, vae, unet, load_stable_diffusion_format -def transform_DDP(text_encoder, unet, network=None): +def transform_if_model_is_DDP(text_encoder, unet, network=None): # Transform text_encoder, unet and network from DistributedDataParallel - return (encoder.module if type(encoder) == DDP else encoder for encoder in [text_encoder, unet, network]) + return (model.module if type(model) == DDP else model for model in [text_encoder, unet, network] if model is not None) def load_target_model(args, weight_dtype, accelerator): @@ -2922,7 +2922,7 @@ def load_target_model(args, weight_dtype, accelerator): torch.cuda.empty_cache() accelerator.wait_for_everyone() - text_encoder, unet, _ = transform_DDP(text_encoder, unet, network=None) + text_encoder, unet = transform_if_model_is_DDP(text_encoder, unet) return text_encoder, vae, unet, load_stable_diffusion_format diff --git a/train_db.py b/train_db.py index abe2ecdf..ad7a317e 100644 --- a/train_db.py +++ b/train_db.py @@ -197,7 +197,7 @@ def train(args): unet, optimizer, train_dataloader, lr_scheduler = accelerator.prepare(unet, optimizer, train_dataloader, lr_scheduler) # transform DDP after prepare - text_encoder, unet, _ = train_util.transform_DDP(text_encoder, unet) + text_encoder, unet = train_util.transform_if_model_is_DDP(text_encoder, unet) if not train_text_encoder: text_encoder.to(accelerator.device, dtype=weight_dtype) # to avoid 'cpu' vs 'cuda' error diff --git a/train_network.py b/train_network.py index c5ec0ebd..3f95c5f7 100644 --- a/train_network.py +++ b/train_network.py @@ -262,7 +262,7 @@ def train(args): network, optimizer, train_dataloader, lr_scheduler = accelerator.prepare(network, optimizer, train_dataloader, lr_scheduler) # transform DDP after prepare (train_network here only) - text_encoder, unet, network = train_util.transform_DDP(text_encoder, unet, network) + text_encoder, unet, network = train_util.transform_if_model_is_DDP(text_encoder, unet, network) unet.requires_grad_(False) unet.to(accelerator.device, dtype=weight_dtype) diff --git a/train_textual_inversion.py b/train_textual_inversion.py index c13fcf9f..c11a199f 100644 --- a/train_textual_inversion.py +++ b/train_textual_inversion.py @@ -281,7 +281,7 @@ def train(args): ) # transform DDP after prepare - text_encoder, unet, _ = train_util.transform_DDP(text_encoder, unet) + text_encoder, unet = train_util.transform_if_model_is_DDP(text_encoder, unet) index_no_updates = torch.arange(len(tokenizer)) < token_ids[0] # print(len(index_no_updates), torch.sum(index_no_updates)) diff --git a/train_textual_inversion_XTI.py b/train_textual_inversion_XTI.py index 67d48023..5342a695 100644 --- a/train_textual_inversion_XTI.py +++ b/train_textual_inversion_XTI.py @@ -315,7 +315,7 @@ def train(args): ) # transform DDP after prepare - text_encoder, unet, _ = train_util.transform_DDP(text_encoder, unet) + text_encoder, unet = train_util.transform_if_model_is_DDP(text_encoder, unet) index_no_updates = torch.arange(len(tokenizer)) < token_ids_XTI[0] # print(len(index_no_updates), torch.sum(index_no_updates)) From f6556f79727a5dce174b300a11693d33ccc735db Mon Sep 17 00:00:00 2001 From: ykume Date: Wed, 3 May 2023 11:31:13 +0900 Subject: [PATCH 06/10] add ja help message for mutires noise --- library/train_util.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/library/train_util.py b/library/train_util.py index f25e6065..5098f63d 100644 --- a/library/train_util.py +++ b/library/train_util.py @@ -2125,13 +2125,13 @@ def add_training_arguments(parser: argparse.ArgumentParser, support_dreambooth: "--multires_noise_iterations", type=int, default=None, - help="enable multires noise with this number of iterations (if enabled, around 6-10 is recommended)" + help="enable multires noise with this number of iterations (if enabled, around 6-10 is recommended) / Multires noiseを有効にしてこのイテレーション数を設定する(有効にする場合は6-10程度を推奨)" ) parser.add_argument( "--multires_noise_discount", type=float, default=0.3, - help="set discount value for multires noise (has no effect without --multires_noise_iterations)" + help="set discount value for multires noise (has no effect without --multires_noise_iterations) / Multires noiseのdiscount値を設定する(--multires_noise_iterations指定時のみ有効)", ) parser.add_argument( "--lowram", From e25164cfedc52e251034c660395ecc2737e54b40 Mon Sep 17 00:00:00 2001 From: ykume Date: Wed, 3 May 2023 11:48:59 +0900 Subject: [PATCH 07/10] explicit import for BinaryIO, will fix #405 --- library/huggingface_util.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/library/huggingface_util.py b/library/huggingface_util.py index 41031b1f..2d0e1980 100644 --- a/library/huggingface_util.py +++ b/library/huggingface_util.py @@ -1,9 +1,8 @@ -from typing import * +from typing import Union, BinaryIO from huggingface_hub import HfApi from pathlib import Path import argparse import os - from library.utils import fire_in_thread From 1cba4471021105c9d476bab24bc3f1660e68cf99 Mon Sep 17 00:00:00 2001 From: ykume Date: Wed, 3 May 2023 14:06:51 +0900 Subject: [PATCH 08/10] fix unet cfg is different in saving diffuser model --- library/model_util.py | 23 +++++++++++++---------- tools/convert_diffusers20_original_sd.py | 10 +++++----- 2 files changed, 18 insertions(+), 15 deletions(-) diff --git a/library/model_util.py b/library/model_util.py index 35b0b6af..8eea76da 100644 --- a/library/model_util.py +++ b/library/model_util.py @@ -22,6 +22,7 @@ UNET_PARAMS_OUT_CHANNELS = 4 UNET_PARAMS_NUM_RES_BLOCKS = 2 UNET_PARAMS_CONTEXT_DIM = 768 UNET_PARAMS_NUM_HEADS = 8 +UNET_PARAMS_USE_LINEAR_PROJECTION = False VAE_PARAMS_Z_CHANNELS = 4 VAE_PARAMS_RESOLUTION = 256 @@ -34,6 +35,7 @@ VAE_PARAMS_NUM_RES_BLOCKS = 2 # V2 V2_UNET_PARAMS_ATTENTION_HEAD_DIM = [5, 10, 20, 20] V2_UNET_PARAMS_CONTEXT_DIM = 1024 +V2_UNET_PARAMS_USE_LINEAR_PROJECTION = True # Diffusersの設定を読み込むための参照モデル DIFFUSERS_REF_MODEL_ID_V1 = "runwayml/stable-diffusion-v1-5" @@ -207,13 +209,13 @@ def conv_attn_to_linear(checkpoint): checkpoint[key] = checkpoint[key][:, :, 0] -def linear_transformer_to_conv(checkpoint): - keys = list(checkpoint.keys()) - tf_keys = ["proj_in.weight", "proj_out.weight"] - for key in keys: - if ".".join(key.split(".")[-2:]) in tf_keys: - if checkpoint[key].ndim == 2: - checkpoint[key] = checkpoint[key].unsqueeze(2).unsqueeze(2) +# def linear_transformer_to_conv(checkpoint): +# keys = list(checkpoint.keys()) +# tf_keys = ["proj_in.weight", "proj_out.weight"] +# for key in keys: +# if ".".join(key.split(".")[-2:]) in tf_keys: +# if checkpoint[key].ndim == 2: +# checkpoint[key] = checkpoint[key].unsqueeze(2).unsqueeze(2) def convert_ldm_unet_checkpoint(v2, checkpoint, config): @@ -357,9 +359,9 @@ def convert_ldm_unet_checkpoint(v2, checkpoint, config): new_checkpoint[new_path] = unet_state_dict[old_path] - # SDのv2では1*1のconv2dがlinearに変わっているので、linear->convに変換する - if v2: - linear_transformer_to_conv(new_checkpoint) + # SDのv2では1*1のconv2dがlinearに変わっているが、Diffusers側も同じなので、変換不要 + # if v2: + # linear_transformer_to_conv(new_checkpoint) return new_checkpoint @@ -500,6 +502,7 @@ def create_unet_diffusers_config(v2): layers_per_block=UNET_PARAMS_NUM_RES_BLOCKS, cross_attention_dim=UNET_PARAMS_CONTEXT_DIM if not v2 else V2_UNET_PARAMS_CONTEXT_DIM, attention_head_dim=UNET_PARAMS_NUM_HEADS if not v2 else V2_UNET_PARAMS_ATTENTION_HEAD_DIM, + use_linear_projection=UNET_PARAMS_USE_LINEAR_PROJECTION if not v2 else V2_UNET_PARAMS_USE_LINEAR_PROJECTION, ) return config diff --git a/tools/convert_diffusers20_original_sd.py b/tools/convert_diffusers20_original_sd.py index 15a9ca4a..130eff1f 100644 --- a/tools/convert_diffusers20_original_sd.py +++ b/tools/convert_diffusers20_original_sd.py @@ -24,9 +24,9 @@ def convert(args): is_save_ckpt = len(os.path.splitext(args.model_to_save)[1]) > 0 assert not is_load_ckpt or args.v1 != args.v2, f"v1 or v2 is required to load checkpoint / checkpointの読み込みにはv1/v2指定が必要です" - assert ( - is_save_ckpt or args.reference_model is not None - ), f"reference model is required to save as Diffusers / Diffusers形式での保存には参照モデルが必要です" + # assert ( + # is_save_ckpt or args.reference_model is not None + # ), f"reference model is required to save as Diffusers / Diffusers形式での保存には参照モデルが必要です" # モデルを読み込む msg = "checkpoint" if is_load_ckpt else ("Diffusers" + (" as fp16" if args.fp16 else "")) @@ -61,7 +61,7 @@ def convert(args): ) print(f"model saved. total converted state_dict keys: {key_count}") else: - print(f"copy scheduler/tokenizer config from: {args.reference_model}") + print(f"copy scheduler/tokenizer config from: {args.reference_model if args.reference_model is not None else 'default model'}") model_util.save_diffusers_checkpoint( v2_model, args.model_to_save, text_encoder, unet, args.reference_model, vae, args.use_safetensors ) @@ -100,7 +100,7 @@ def setup_parser() -> argparse.ArgumentParser: "--reference_model", type=str, default=None, - help="reference model for schduler/tokenizer, required in saving Diffusers, copy schduler/tokenizer from this / scheduler/tokenizerのコピー元のDiffusersモデル、Diffusers形式で保存するときに必要", + help="scheduler/tokenizerのコピー元Diffusersモデル、Diffusers形式で保存するときに使用される、省略時は`runwayml/stable-diffusion-v1-5` または `stabilityai/stable-diffusion-2-1` / reference Diffusers model to copy scheduler/tokenizer config from, used when saving as Diffusers format, default is `runwayml/stable-diffusion-v1-5` or `stabilityai/stable-diffusion-2-1`", ) parser.add_argument( "--use_safetensors", From 758a1e7f666a9c081cee9702c9c5288c6607a59e Mon Sep 17 00:00:00 2001 From: ykume Date: Wed, 3 May 2023 16:05:15 +0900 Subject: [PATCH 09/10] Revert unet config, add option to convert script --- library/model_util.py | 35 +++++++++++++----------- tools/convert_diffusers20_original_sd.py | 5 +++- 2 files changed, 23 insertions(+), 17 deletions(-) diff --git a/library/model_util.py b/library/model_util.py index 8eea76da..26f72235 100644 --- a/library/model_util.py +++ b/library/model_util.py @@ -22,7 +22,7 @@ UNET_PARAMS_OUT_CHANNELS = 4 UNET_PARAMS_NUM_RES_BLOCKS = 2 UNET_PARAMS_CONTEXT_DIM = 768 UNET_PARAMS_NUM_HEADS = 8 -UNET_PARAMS_USE_LINEAR_PROJECTION = False +# UNET_PARAMS_USE_LINEAR_PROJECTION = False VAE_PARAMS_Z_CHANNELS = 4 VAE_PARAMS_RESOLUTION = 256 @@ -35,7 +35,7 @@ VAE_PARAMS_NUM_RES_BLOCKS = 2 # V2 V2_UNET_PARAMS_ATTENTION_HEAD_DIM = [5, 10, 20, 20] V2_UNET_PARAMS_CONTEXT_DIM = 1024 -V2_UNET_PARAMS_USE_LINEAR_PROJECTION = True +# V2_UNET_PARAMS_USE_LINEAR_PROJECTION = True # Diffusersの設定を読み込むための参照モデル DIFFUSERS_REF_MODEL_ID_V1 = "runwayml/stable-diffusion-v1-5" @@ -209,13 +209,13 @@ def conv_attn_to_linear(checkpoint): checkpoint[key] = checkpoint[key][:, :, 0] -# def linear_transformer_to_conv(checkpoint): -# keys = list(checkpoint.keys()) -# tf_keys = ["proj_in.weight", "proj_out.weight"] -# for key in keys: -# if ".".join(key.split(".")[-2:]) in tf_keys: -# if checkpoint[key].ndim == 2: -# checkpoint[key] = checkpoint[key].unsqueeze(2).unsqueeze(2) +def linear_transformer_to_conv(checkpoint): + keys = list(checkpoint.keys()) + tf_keys = ["proj_in.weight", "proj_out.weight"] + for key in keys: + if ".".join(key.split(".")[-2:]) in tf_keys: + if checkpoint[key].ndim == 2: + checkpoint[key] = checkpoint[key].unsqueeze(2).unsqueeze(2) def convert_ldm_unet_checkpoint(v2, checkpoint, config): @@ -359,9 +359,10 @@ def convert_ldm_unet_checkpoint(v2, checkpoint, config): new_checkpoint[new_path] = unet_state_dict[old_path] - # SDのv2では1*1のconv2dがlinearに変わっているが、Diffusers側も同じなので、変換不要 - # if v2: - # linear_transformer_to_conv(new_checkpoint) + # SDのv2では1*1のconv2dがlinearに変わっている + # 誤って Diffusers 側を conv2d のままにしてしまったので、変換必要 + if v2 and not config.get('use_linear_projection', False): + linear_transformer_to_conv(new_checkpoint) return new_checkpoint @@ -470,7 +471,7 @@ def convert_ldm_vae_checkpoint(checkpoint, config): return new_checkpoint -def create_unet_diffusers_config(v2): +def create_unet_diffusers_config(v2, use_linear_projection_in_v2=False): """ Creates a config for the diffusers based on the config of the LDM model. """ @@ -502,8 +503,10 @@ def create_unet_diffusers_config(v2): layers_per_block=UNET_PARAMS_NUM_RES_BLOCKS, cross_attention_dim=UNET_PARAMS_CONTEXT_DIM if not v2 else V2_UNET_PARAMS_CONTEXT_DIM, attention_head_dim=UNET_PARAMS_NUM_HEADS if not v2 else V2_UNET_PARAMS_ATTENTION_HEAD_DIM, - use_linear_projection=UNET_PARAMS_USE_LINEAR_PROJECTION if not v2 else V2_UNET_PARAMS_USE_LINEAR_PROJECTION, + # use_linear_projection=UNET_PARAMS_USE_LINEAR_PROJECTION if not v2 else V2_UNET_PARAMS_USE_LINEAR_PROJECTION, ) + if v2 and use_linear_projection_in_v2: + config["use_linear_projection"] = True return config @@ -849,11 +852,11 @@ def load_checkpoint_with_text_encoder_conversion(ckpt_path, device="cpu"): # TODO dtype指定の動作が怪しいので確認する text_encoderを指定形式で作れるか未確認 -def load_models_from_stable_diffusion_checkpoint(v2, ckpt_path, device="cpu", dtype=None): +def load_models_from_stable_diffusion_checkpoint(v2, ckpt_path, device="cpu", dtype=None, unet_use_linear_projection_in_v2=False): _, state_dict = load_checkpoint_with_text_encoder_conversion(ckpt_path, device) # Convert the UNet2DConditionModel model. - unet_config = create_unet_diffusers_config(v2) + unet_config = create_unet_diffusers_config(v2, unet_use_linear_projection_in_v2) converted_unet_checkpoint = convert_ldm_unet_checkpoint(v2, state_dict, unet_config) unet = UNet2DConditionModel(**unet_config).to(device) diff --git a/tools/convert_diffusers20_original_sd.py b/tools/convert_diffusers20_original_sd.py index 130eff1f..b9365b51 100644 --- a/tools/convert_diffusers20_original_sd.py +++ b/tools/convert_diffusers20_original_sd.py @@ -34,7 +34,7 @@ def convert(args): if is_load_ckpt: v2_model = args.v2 - text_encoder, vae, unet = model_util.load_models_from_stable_diffusion_checkpoint(v2_model, args.model_to_load) + text_encoder, vae, unet = model_util.load_models_from_stable_diffusion_checkpoint(v2_model, args.model_to_load, unet_use_linear_projection_in_v2=args.unet_use_linear_projection) else: pipe = StableDiffusionPipeline.from_pretrained( args.model_to_load, torch_dtype=load_dtype, tokenizer=None, safety_checker=None @@ -76,6 +76,9 @@ def setup_parser() -> argparse.ArgumentParser: parser.add_argument( "--v2", action="store_true", help="load v2.0 model (v1 or v2 is required to load checkpoint) / 2.0のモデルを読み込む" ) + parser.add_argument( + "--unet_use_linear_projection", action="store_true", help="When saving v2 model as Diffusers, set U-Net config to `use_linear_projection=true` (to match stabilityai's model) / Diffusers形式でv2モデルを保存するときにU-Netの設定を`use_linear_projection=true`にする(stabilityaiのモデルと合わせる)" + ) parser.add_argument( "--fp16", action="store_true", From b271a6bd89a59bc7bd179b8e32ae2f6661424172 Mon Sep 17 00:00:00 2001 From: ykume Date: Wed, 3 May 2023 16:22:32 +0900 Subject: [PATCH 10/10] update readme --- README-ja.md | 10 ++++++++++ README.md | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 42 insertions(+) diff --git a/README-ja.md b/README-ja.md index 47aaf16e..e052d6bf 100644 --- a/README-ja.md +++ b/README-ja.md @@ -115,6 +115,16 @@ accelerate configの質問には以下のように答えてください。(bf1 他のバージョンでは学習がうまくいかない場合があるようです。特に他の理由がなければ指定のバージョンをお使いください。 +### オプション:Lion8bitを使う + +Lion8bitを使う場合には`bitsandbytes`を0.38.0以降にアップグレードする必要があります。`bitsandbytes`をアンインストールし、Windows環境では例えば[こちら](https://github.com/jllllll/bitsandbytes-windows-webui)などからWindows版のwhlファイルをインストールしてください。たとえば以下のような手順になります。 + +```powershell +pip install https://github.com/jllllll/bitsandbytes-windows-webui/raw/main/bitsandbytes-0.38.1-py3-none-any.whl +``` + +アップグレード時には`pip install .`でこのリポジトリを更新し、必要に応じて他のパッケージもアップグレードしてください。 + ## アップグレード 新しいリリースがあった場合、以下のコマンドで更新できます。 diff --git a/README.md b/README.md index 88492719..5d2486d6 100644 --- a/README.md +++ b/README.md @@ -97,6 +97,16 @@ note: Some user reports ``ValueError: fp16 mixed precision requires a GPU`` is o Other versions of PyTorch and xformers seem to have problems with training. If there is no other reason, please install the specified version. +### Optional: Use Lion8bit + +For Lion8bit, you need to upgrade `bitsandbytes` to 0.38.0 or later. Uninstall `bitsandbytes`, and for Windows, install the Windows version whl file from [here](https://github.com/jllllll/bitsandbytes-windows-webui) or other sources, like: + +```powershell +pip install https://github.com/jllllll/bitsandbytes-windows-webui/raw/main/bitsandbytes-0.38.1-py3-none-any.whl +``` + +For upgrading, upgrade this repo with `pip install .`, and upgrade necessary packages manually. + ## Upgrade When a new release comes out you can upgrade your repo with the following command: @@ -128,6 +138,28 @@ The majority of scripts is licensed under ASL 2.0 (including codes from Diffuser ## Change History +### 3 May 2023, 2023/05/03 + +- When saving v2 models in Diffusers format in training scripts and conversion scripts, it was found that the U-Net configuration is different from those of Hugging Face's stabilityai models (this repository is `"use_linear_projection": false`, stabilityai is `true`). Please note that the weight shapes are different, so please be careful when using the weight files directly. We apologize for the inconvenience. + - Since the U-Net model is created based on the configuration, it should not cause any problems in training or inference. + - Added `--unet_use_linear_projection` option to `convert_diffusers20_original_sd.py` script. If you specify this option, you can save a Diffusers format model with the same configuration as stabilityai's model from an SD format model (a single `*.safetensors` or `*.ckpt` file). Unfortunately, it is not possible to convert a Diffusers format model to the same format. + +- Lion8bit optimizer is supported. [PR #447](https://github.com/kohya-ss/sd-scripts/pull/447) Thanks to sdbds! + - Currently it is optional because you need to update `bitsandbytes` version. See "Optional: Use Lion8bit" in installation instructions to use it. +- Multi-GPU training with DDP is supported in each training script. [PR #448](https://github.com/kohya-ss/sd-scripts/pull/448) Thanks to Isotr0py! +- Multi resolution noise (pyramid noise) is supported in each training script. [PR #471](https://github.com/kohya-ss/sd-scripts/pull/471) Thanks to pamparamm! + - See PR and this page [Multi-Resolution Noise for Diffusion Model Training](https://wandb.ai/johnowhitaker/multires_noise/reports/Multi-Resolution-Noise-for-Diffusion-Model-Training--VmlldzozNjYyOTU2) for details. + +- 学習スクリプトや変換スクリプトでDiffusers形式でv2モデルを保存するとき、U-Netの設定がHugging Faceのstabilityaiのモデルと異なることがわかりました(当リポジトリでは `"use_linear_projection": false`、stabilityaiは`true`)。重みの形状が異なるため、直接重みファイルを利用する場合にはご注意ください。ご不便をお掛けし申し訳ありません。 + - U-Netのモデルは設定に基づいて作成されるため、通常、学習や推論で問題になることはないと思われます。 + - `convert_diffusers20_original_sd.py`スクリプトに`--unet_use_linear_projection`オプションを追加しました。これを指定するとSD形式のモデル(単一の`*.safetensors`または`*.ckpt`ファイル)から、stabilityaiのモデルと同じ形状の重みファイルを持つDiffusers形式モデルが保存できます。なお、Diffusers形式のモデルを同形式に変換することはできません。 + +- Lion8bitオプティマイザがサポートされました。[PR #447](https://github.com/kohya-ss/sd-scripts/pull/447) sdbds氏に感謝します。 + - `bitsandbytes`のバージョンを更新する必要があるため、現在はオプションです。使用するにはインストール手順の「[オプション:Lion8bitを使う](./README-ja.md#オプションlion8bitを使う)」を参照してください。 +- 各学習スクリプトでDDPによるマルチGPU学習がサポートされました。[PR #448](https://github.com/kohya-ss/sd-scripts/pull/448) Isotr0py氏に感謝します。 +- Multi resolution noise (pyramid noise) が各学習スクリプトでサポートされました。[PR #471](https://github.com/kohya-ss/sd-scripts/pull/471) pamparamm氏に感謝します。 + - 詳細はPRおよびこちらのページ [Multi-Resolution Noise for Diffusion Model Training](https://wandb.ai/johnowhitaker/multires_noise/reports/Multi-Resolution-Noise-for-Diffusion-Model-Training--VmlldzozNjYyOTU2) を参照してください。 + ### 30 Apr. 2023, 2023/04/30 - Added Chinese translation of [DreamBooth guide](./train_db_README-zh.md) and [LoRA guide](./train_network_README-zh.md). [PR #459](https://github.com/kohya-ss/sd-scripts/pull/459) Thanks to tomj2ee!