mirror of
https://github.com/kohya-ss/sd-scripts.git
synced 2026-04-08 22:35:09 +00:00
use original ControlNet instead of Diffusers
This commit is contained in:
89
gen_img.py
89
gen_img.py
@@ -43,8 +43,8 @@ from diffusers import (
|
||||
)
|
||||
from einops import rearrange
|
||||
from tqdm import tqdm
|
||||
from torchvision import transforms
|
||||
from transformers import CLIPTextModel, CLIPTokenizer, CLIPVisionModelWithProjection, CLIPImageProcessor
|
||||
from accelerate import init_empty_weights
|
||||
import PIL
|
||||
from PIL import Image
|
||||
from PIL.PngImagePlugin import PngInfo
|
||||
@@ -58,6 +58,7 @@ import tools.original_control_net as original_control_net
|
||||
from tools.original_control_net import ControlNetInfo
|
||||
from library.original_unet import UNet2DConditionModel, InferUNet2DConditionModel
|
||||
from library.sdxl_original_unet import InferSdxlUNet2DConditionModel
|
||||
from library.sdxl_original_control_net import SdxlControlNet
|
||||
from library.original_unet import FlashAttentionFunction
|
||||
from networks.control_net_lllite import ControlNetLLLite
|
||||
from library.utils import GradualLatent, EulerAncestralDiscreteSchedulerGL
|
||||
@@ -352,8 +353,8 @@ class PipelineLike:
|
||||
self.token_replacements_list.append({})
|
||||
|
||||
# ControlNet
|
||||
self.control_nets: List[ControlNetInfo] = [] # only for SD 1.5
|
||||
self.control_net_lllites: List[ControlNetLLLite] = []
|
||||
self.control_nets: List[Union[ControlNetInfo, Tuple[SdxlControlNet, float]]] = []
|
||||
self.control_net_lllites: List[Tuple[ControlNetLLLite, float]] = []
|
||||
self.control_net_enabled = True # control_netsが空ならTrueでもFalseでもControlNetは動作しない
|
||||
|
||||
self.gradual_latent: GradualLatent = None
|
||||
@@ -542,7 +543,7 @@ class PipelineLike:
|
||||
else:
|
||||
text_embeddings = torch.cat([uncond_embeddings, text_embeddings, real_uncond_embeddings])
|
||||
|
||||
if self.control_net_lllites:
|
||||
if self.control_net_lllites or (self.control_nets and self.is_sdxl):
|
||||
# ControlNetのhintにguide imageを流用する。ControlNetの場合はControlNet側で行う
|
||||
if isinstance(clip_guide_images, PIL.Image.Image):
|
||||
clip_guide_images = [clip_guide_images]
|
||||
@@ -731,7 +732,12 @@ class PipelineLike:
|
||||
num_latent_input = (3 if negative_scale is not None else 2) if do_classifier_free_guidance else 1
|
||||
|
||||
if self.control_nets:
|
||||
guided_hints = original_control_net.get_guided_hints(self.control_nets, num_latent_input, batch_size, clip_guide_images)
|
||||
if not self.is_sdxl:
|
||||
guided_hints = original_control_net.get_guided_hints(
|
||||
self.control_nets, num_latent_input, batch_size, clip_guide_images
|
||||
)
|
||||
else:
|
||||
clip_guide_images = clip_guide_images * 0.5 + 0.5 # [-1, 1] => [0, 1]
|
||||
each_control_net_enabled = [self.control_net_enabled] * len(self.control_nets)
|
||||
|
||||
if self.control_net_lllites:
|
||||
@@ -793,7 +799,7 @@ class PipelineLike:
|
||||
latent_model_input = latents.repeat((num_latent_input, 1, 1, 1))
|
||||
latent_model_input = self.scheduler.scale_model_input(latent_model_input, t)
|
||||
|
||||
# disable ControlNet-LLLite if ratio is set. ControlNet is disabled in ControlNetInfo
|
||||
# disable ControlNet-LLLite or SDXL ControlNet if ratio is set. ControlNet is disabled in ControlNetInfo
|
||||
if self.control_net_lllites:
|
||||
for j, ((control_net, ratio), enabled) in enumerate(zip(self.control_net_lllites, each_control_net_enabled)):
|
||||
if not enabled or ratio >= 1.0:
|
||||
@@ -802,9 +808,16 @@ class PipelineLike:
|
||||
logger.info(f"ControlNetLLLite {j} is disabled (ratio={ratio} at {i} / {len(timesteps)})")
|
||||
control_net.set_cond_image(None)
|
||||
each_control_net_enabled[j] = False
|
||||
if self.control_nets and self.is_sdxl:
|
||||
for j, ((control_net, ratio), enabled) in enumerate(zip(self.control_nets, each_control_net_enabled)):
|
||||
if not enabled or ratio >= 1.0:
|
||||
continue
|
||||
if ratio < i / len(timesteps):
|
||||
logger.info(f"ControlNet {j} is disabled (ratio={ratio} at {i} / {len(timesteps)})")
|
||||
each_control_net_enabled[j] = False
|
||||
|
||||
# predict the noise residual
|
||||
if self.control_nets and self.control_net_enabled:
|
||||
if self.control_nets and self.control_net_enabled and not self.is_sdxl:
|
||||
if regional_network:
|
||||
num_sub_and_neg_prompts = len(text_embeddings) // batch_size
|
||||
text_emb_last = text_embeddings[num_sub_and_neg_prompts - 2 :: num_sub_and_neg_prompts] # last subprompt
|
||||
@@ -823,6 +836,31 @@ class PipelineLike:
|
||||
text_embeddings,
|
||||
text_emb_last,
|
||||
).sample
|
||||
elif self.control_nets:
|
||||
input_resi_add_list = []
|
||||
mid_add_list = []
|
||||
for (control_net, _), enbld in zip(self.control_nets, each_control_net_enabled):
|
||||
if not enbld:
|
||||
continue
|
||||
input_resi_add, mid_add = control_net(
|
||||
latent_model_input, t, text_embeddings, vector_embeddings, clip_guide_images
|
||||
)
|
||||
input_resi_add_list.append(input_resi_add)
|
||||
mid_add_list.append(mid_add)
|
||||
if len(input_resi_add_list) == 0:
|
||||
noise_pred = self.unet(latent_model_input, t, text_embeddings, vector_embeddings)
|
||||
else:
|
||||
if len(input_resi_add_list) > 1:
|
||||
# get mean of input_resi_add_list and mid_add_list
|
||||
input_resi_add_mean = []
|
||||
for i in range(len(input_resi_add_list[0])):
|
||||
input_resi_add_mean.append(
|
||||
torch.mean(torch.stack([input_resi_add_list[j][i] for j in range(len(input_resi_add_list))], dim=0))
|
||||
)
|
||||
input_resi_add = input_resi_add_mean
|
||||
mid_add = torch.mean(torch.stack(mid_add_list), dim=0)
|
||||
|
||||
noise_pred = self.unet(latent_model_input, t, text_embeddings, vector_embeddings, input_resi_add, mid_add)
|
||||
elif self.is_sdxl:
|
||||
noise_pred = self.unet(latent_model_input, t, text_embeddings, vector_embeddings)
|
||||
else:
|
||||
@@ -1827,16 +1865,37 @@ def main(args):
|
||||
upscaler.to(dtype).to(device)
|
||||
|
||||
# ControlNetの処理
|
||||
control_nets: List[ControlNetInfo] = []
|
||||
control_nets: List[Union[ControlNetInfo, Tuple[SdxlControlNet, float]]] = []
|
||||
if args.control_net_models:
|
||||
for i, model in enumerate(args.control_net_models):
|
||||
prep_type = None if not args.control_net_preps or len(args.control_net_preps) <= i else args.control_net_preps[i]
|
||||
weight = 1.0 if not args.control_net_weights or len(args.control_net_weights) <= i else args.control_net_weights[i]
|
||||
ratio = 1.0 if not args.control_net_ratios or len(args.control_net_ratios) <= i else args.control_net_ratios[i]
|
||||
if not is_sdxl:
|
||||
for i, model in enumerate(args.control_net_models):
|
||||
prep_type = None if not args.control_net_preps or len(args.control_net_preps) <= i else args.control_net_preps[i]
|
||||
weight = 1.0 if not args.control_net_weights or len(args.control_net_weights) <= i else args.control_net_weights[i]
|
||||
ratio = 1.0 if not args.control_net_ratios or len(args.control_net_ratios) <= i else args.control_net_ratios[i]
|
||||
|
||||
ctrl_unet, ctrl_net = original_control_net.load_control_net(args.v2, unet, model)
|
||||
prep = original_control_net.load_preprocess(prep_type)
|
||||
control_nets.append(ControlNetInfo(ctrl_unet, ctrl_net, prep, weight, ratio))
|
||||
ctrl_unet, ctrl_net = original_control_net.load_control_net(args.v2, unet, model)
|
||||
prep = original_control_net.load_preprocess(prep_type)
|
||||
control_nets.append(ControlNetInfo(ctrl_unet, ctrl_net, prep, weight, ratio))
|
||||
else:
|
||||
for i, model_file in enumerate(args.control_net_models):
|
||||
multiplier = (
|
||||
1.0
|
||||
if not args.control_net_multipliers or len(args.control_net_multipliers) <= i
|
||||
else args.control_net_multipliers[i]
|
||||
)
|
||||
ratio = 1.0 if not args.control_net_ratios or len(args.control_net_ratios) <= i else args.control_net_ratios[i]
|
||||
|
||||
logger.info(f"loading SDXL ControlNet: {model_file}")
|
||||
from safetensors.torch import load_file
|
||||
|
||||
state_dict = load_file(model_file)
|
||||
|
||||
logger.info(f"Initalizing SDXL ControlNet with multiplier: {multiplier}")
|
||||
with init_empty_weights():
|
||||
control_net = SdxlControlNet(multiplier=multiplier)
|
||||
control_net.load_state_dict(state_dict)
|
||||
control_net.to(dtype).to(device)
|
||||
control_nets.append((control_net, ratio))
|
||||
|
||||
control_net_lllites: List[Tuple[ControlNetLLLite, float]] = []
|
||||
if args.control_net_lllite_models:
|
||||
|
||||
Reference in New Issue
Block a user