From 95bc6e8749dde79b66b7150a86b7f84e6c6ec7fc Mon Sep 17 00:00:00 2001 From: Kohya S Date: Sun, 3 Sep 2023 12:46:40 +0900 Subject: [PATCH 1/4] update readme --- README.md | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index ada3b3c9..524e63aa 100644 --- a/README.md +++ b/README.md @@ -22,11 +22,21 @@ __Stable Diffusion web UI now seems to support LoRA trained by ``sd-scripts``.__ The feature of SDXL training is now available in sdxl branch as an experimental feature. -Aug 13, 2023: The feature will be merged into the main branch soon. Following are the changes from the previous version. +Sep 3, 2023: The feature will be merged into the main branch soon. Following are the changes from the previous version. + +- ControlNet-LLLite is added. See [documenation](./docs/train_lllite_README.md) for details. +- JPEG XL is supported. [#786](./pull/786) +- Peak memory usage is reduced. [#791](./pull/791) +- Input perturbation noise is added. See [#798](./pull/798) for details. +- Dataset subset now has `caption_prefix` and `caption_suffix` options. The strings are added to the beginning and the end of the captions before shuffling. You can specify the options in `.toml`. +- Other minor changes. +- Thanks for contributions from Isotr0py, vvern999, lansing and others! + +Aug 13, 2023: - LoRA-FA is added experimentally. Specify `--network_module networks.lora_fa` option instead of `--network_module networks.lora`. The trained model can be used as a normal LoRA model. -Aug 12, 2023: Following are the changes from the previous version. +Aug 12, 2023: - The default value of noise offset when omitted has been changed to 0 from 0.0357. - The different learning rates for each U-Net block are now supported. Specify with `--block_lr` option. Specify 23 values separated by commas like `--block_lr 1e-3,1e-3 ... 1e-3`. From 5f08a21d123380c72a7acf8bde4d61f9fcfe16bc Mon Sep 17 00:00:00 2001 From: Kohya S Date: Sun, 3 Sep 2023 12:48:35 +0900 Subject: [PATCH 2/4] update readme --- README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 524e63aa..0d932f1d 100644 --- a/README.md +++ b/README.md @@ -25,9 +25,9 @@ The feature of SDXL training is now available in sdxl branch as an experimental Sep 3, 2023: The feature will be merged into the main branch soon. Following are the changes from the previous version. - ControlNet-LLLite is added. See [documenation](./docs/train_lllite_README.md) for details. -- JPEG XL is supported. [#786](./pull/786) -- Peak memory usage is reduced. [#791](./pull/791) -- Input perturbation noise is added. See [#798](./pull/798) for details. +- JPEG XL is supported. [#786](https://github.com/kohya-ss/sd-scripts/pull/786) +- Peak memory usage is reduced. [#791](https://github.com/kohya-ss/sd-scripts/pull/791) +- Input perturbation noise is added. See [#798](https://github.com/kohya-ss/sd-scripts/pull/798) for details. - Dataset subset now has `caption_prefix` and `caption_suffix` options. The strings are added to the beginning and the end of the captions before shuffling. You can specify the options in `.toml`. - Other minor changes. - Thanks for contributions from Isotr0py, vvern999, lansing and others! From 867e7d3238b7f6d5400bd3f44399d65c42ef8b39 Mon Sep 17 00:00:00 2001 From: Kohya S Date: Sun, 3 Sep 2023 12:49:51 +0900 Subject: [PATCH 3/4] fix typo --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 0d932f1d..7d46dc9d 100644 --- a/README.md +++ b/README.md @@ -24,7 +24,7 @@ The feature of SDXL training is now available in sdxl branch as an experimental Sep 3, 2023: The feature will be merged into the main branch soon. Following are the changes from the previous version. -- ControlNet-LLLite is added. See [documenation](./docs/train_lllite_README.md) for details. +- ControlNet-LLLite is added. See [documentation](./docs/train_lllite_README.md) for details. - JPEG XL is supported. [#786](https://github.com/kohya-ss/sd-scripts/pull/786) - Peak memory usage is reduced. [#791](https://github.com/kohya-ss/sd-scripts/pull/791) - Input perturbation noise is added. See [#798](https://github.com/kohya-ss/sd-scripts/pull/798) for details. From 207fc8b2561f459e0e7a8e9fa7b88fd7b0eda69c Mon Sep 17 00:00:00 2001 From: Kohya S Date: Sun, 3 Sep 2023 17:50:27 +0900 Subject: [PATCH 4/4] fix to work regional LoRA --- networks/lora.py | 15 ++++++++++----- sdxl_gen_img.py | 10 ++++++++-- 2 files changed, 18 insertions(+), 7 deletions(-) diff --git a/networks/lora.py b/networks/lora.py index cd73cbe7..0c75cd42 100644 --- a/networks/lora.py +++ b/networks/lora.py @@ -241,9 +241,13 @@ class LoRAInfModule(LoRAModule): else: area = x.size()[1] - mask = self.network.mask_dic[area] + mask = self.network.mask_dic.get(area, None) if mask is None: - raise ValueError(f"mask is None for resolution {area}") + # raise ValueError(f"mask is None for resolution {area}") + # emb_layers in SDXL doesn't have mask + # print(f"mask is None for resolution {area}, {x.size()}") + mask_size = (1, x.size()[1]) if len(x.size()) == 2 else (1, *x.size()[1:-1], 1) + return torch.ones(mask_size, dtype=x.dtype, device=x.device) / self.network.num_sub_prompts if len(x.size()) != 4: mask = torch.reshape(mask, (1, -1, 1)) return mask @@ -348,9 +352,10 @@ class LoRAInfModule(LoRAModule): out[-self.network.batch_size :] = x[-self.network.batch_size :] # real_uncond # print("to_out_forward", self.lora_name, self.network.sub_prompt_index, self.network.num_sub_prompts) - # for i in range(len(masks)): - # if masks[i] is None: - # masks[i] = torch.zeros_like(masks[-1]) + # if num_sub_prompts > num of LoRAs, fill with zero + for i in range(len(masks)): + if masks[i] is None: + masks[i] = torch.zeros_like(masks[0]) mask = torch.cat(masks) mask_sum = torch.sum(mask, dim=0) + 1e-4 diff --git a/sdxl_gen_img.py b/sdxl_gen_img.py index 35bd6c61..c506ad3f 100755 --- a/sdxl_gen_img.py +++ b/sdxl_gen_img.py @@ -451,10 +451,11 @@ class PipelineLike: tes_text_embs = [] tes_uncond_embs = [] tes_real_uncond_embs = [] - # use last pool + for tokenizer, text_encoder in zip(self.tokenizers, self.text_encoders): token_replacer = self.get_token_replacer(tokenizer) + # use last text_pool, because it is from text encoder 2 text_embeddings, text_pool, uncond_embeddings, uncond_pool, _ = get_weighted_text_embeddings( tokenizer, text_encoder, @@ -529,6 +530,11 @@ class PipelineLike: c_vector = torch.cat([emb1, emb2, emb3], dim=1).to(self.device, dtype=text_embeddings.dtype).repeat(batch_size, 1) uc_vector = torch.cat([uc_emb1, emb2, emb3], dim=1).to(self.device, dtype=text_embeddings.dtype).repeat(batch_size, 1) + if reginonal_network: + # use last pool for conditioning + num_sub_prompts = len(text_pool) // batch_size + text_pool = text_pool[num_sub_prompts - 1 :: num_sub_prompts] # last subprompt + c_vector = torch.cat([text_pool, c_vector], dim=1) uc_vector = torch.cat([uncond_pool, uc_vector], dim=1) @@ -762,7 +768,7 @@ class PipelineLike: image = image.cpu().permute(0, 2, 3, 1).float().numpy() if torch.cuda.is_available(): - torch.cuda.empty_cache() + torch.cuda.empty_cache() if output_type == "pil": # image = self.numpy_to_pil(image)