diff --git a/gen_img_diffusers.py b/gen_img_diffusers.py index fb7866fc..6428ef8a 100644 --- a/gen_img_diffusers.py +++ b/gen_img_diffusers.py @@ -107,6 +107,7 @@ import tools.original_control_net as original_control_net from tools.original_control_net import ControlNetInfo from library.original_unet import UNet2DConditionModel, InferUNet2DConditionModel from library.original_unet import FlashAttentionFunction +from library.utils import GradualLatent, EulerAncestralDiscreteSchedulerGL from XTI_hijack import unet_forward_XTI, downblock_forward_XTI, upblock_forward_XTI @@ -454,6 +455,8 @@ class PipelineLike: self.control_nets: List[ControlNetInfo] = [] self.control_net_enabled = True # control_netsが空ならTrueでもFalseでもControlNetは動作しない + self.gradual_latent: GradualLatent = None + # Textual Inversion def add_token_replacement(self, target_token_id, rep_token_ids): self.token_replacements[target_token_id] = rep_token_ids @@ -968,13 +971,13 @@ class PipelineLike: enable_gradual_latent = False if self.gradual_latent: - if not hasattr(self.scheduler, "set_resized_size"): + if not hasattr(self.scheduler, "set_gradual_latent_params"): print("gradual_latent is not supported for this scheduler. Ignoring.") print(self.scheduler.__class__.__name__) else: enable_gradual_latent = True - current_ratio, start_timesteps, every_n_steps, ratio_step = self.gradual_latent step_elapsed = 1000 + current_ratio = self.gradual_latent.ratio # first, we downscale the latents to the specified ratio / 最初に指定された比率にlatentsをダウンスケールする height, width = latents.shape[-2:] @@ -985,20 +988,28 @@ class PipelineLike: latents, scale_factor=current_ratio, mode="bicubic", align_corners=False ).to(org_dtype) + # apply unsharp mask / アンシャープマスクを適用する + if self.gradual_latent.gaussian_blur_ksize: + latents = self.gradual_latent.apply_unshark_mask(latents) + for i, t in enumerate(tqdm(timesteps)): resized_size = None if enable_gradual_latent: # gradually upscale the latents / latentsを徐々にアップスケールする - if t < start_timesteps and current_ratio < 1.0 and step_elapsed >= every_n_steps: - print("upscale") - current_ratio = min(current_ratio + ratio_step, 1.0) - h = int(height * current_ratio) # // 8 * 8 - w = int(width * current_ratio) # // 8 * 8 + if ( + t < self.gradual_latent.start_timesteps + and current_ratio < 1.0 + and step_elapsed >= self.gradual_latent.every_n_steps + ): + current_ratio = min(current_ratio + self.gradual_latent.ratio_step, 1.0) + # make divisible by 8 because size of latents must be divisible at bottom of UNet + h = int(height * current_ratio) // 8 * 8 + w = int(width * current_ratio) // 8 * 8 resized_size = (h, w) - self.scheduler.set_resized_size(resized_size) + self.scheduler.set_gradual_latent_params(resized_size, self.gradual_latent) step_elapsed = 0 else: - self.scheduler.set_resized_size(None) + self.scheduler.set_gradual_latent_params(None, None) step_elapsed += 1 # expand the latents if we are doing classifier free guidance @@ -2154,133 +2165,6 @@ def handle_dynamic_prompt_variants(prompt, repeat_count): return prompts -# endregion - -# region Gradual Latent hires fix - -import diffusers.schedulers.scheduling_euler_ancestral_discrete -from diffusers.schedulers.scheduling_euler_ancestral_discrete import EulerAncestralDiscreteSchedulerOutput - - -class EulerAncestralDiscreteSchedulerGL(EulerAncestralDiscreteScheduler): - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - self.resized_size = None - - def set_resized_size(self, size): - self.resized_size = size - - def step( - self, - model_output: torch.FloatTensor, - timestep: Union[float, torch.FloatTensor], - sample: torch.FloatTensor, - generator: Optional[torch.Generator] = None, - return_dict: bool = True, - ) -> Union[EulerAncestralDiscreteSchedulerOutput, Tuple]: - """ - Predict the sample from the previous timestep by reversing the SDE. This function propagates the diffusion - process from the learned model outputs (most often the predicted noise). - - Args: - model_output (`torch.FloatTensor`): - The direct output from learned diffusion model. - timestep (`float`): - The current discrete timestep in the diffusion chain. - sample (`torch.FloatTensor`): - A current instance of a sample created by the diffusion process. - generator (`torch.Generator`, *optional*): - A random number generator. - return_dict (`bool`): - Whether or not to return a - [`~schedulers.scheduling_euler_ancestral_discrete.EulerAncestralDiscreteSchedulerOutput`] or tuple. - - Returns: - [`~schedulers.scheduling_euler_ancestral_discrete.EulerAncestralDiscreteSchedulerOutput`] or `tuple`: - If return_dict is `True`, - [`~schedulers.scheduling_euler_ancestral_discrete.EulerAncestralDiscreteSchedulerOutput`] is returned, - otherwise a tuple is returned where the first element is the sample tensor. - - """ - - if isinstance(timestep, int) or isinstance(timestep, torch.IntTensor) or isinstance(timestep, torch.LongTensor): - raise ValueError( - ( - "Passing integer indices (e.g. from `enumerate(timesteps)`) as timesteps to" - " `EulerDiscreteScheduler.step()` is not supported. Make sure to pass" - " one of the `scheduler.timesteps` as a timestep." - ), - ) - - if not self.is_scale_input_called: - logger.warning( - "The `scale_model_input` function should be called before `step` to ensure correct denoising. " - "See `StableDiffusionPipeline` for a usage example." - ) - - if self.step_index is None: - self._init_step_index(timestep) - - sigma = self.sigmas[self.step_index] - - # 1. compute predicted original sample (x_0) from sigma-scaled predicted noise - if self.config.prediction_type == "epsilon": - pred_original_sample = sample - sigma * model_output - elif self.config.prediction_type == "v_prediction": - # * c_out + input * c_skip - pred_original_sample = model_output * (-sigma / (sigma**2 + 1) ** 0.5) + (sample / (sigma**2 + 1)) - elif self.config.prediction_type == "sample": - raise NotImplementedError("prediction_type not implemented yet: sample") - else: - raise ValueError(f"prediction_type given as {self.config.prediction_type} must be one of `epsilon`, or `v_prediction`") - - sigma_from = self.sigmas[self.step_index] - sigma_to = self.sigmas[self.step_index + 1] - sigma_up = (sigma_to**2 * (sigma_from**2 - sigma_to**2) / sigma_from**2) ** 0.5 - sigma_down = (sigma_to**2 - sigma_up**2) ** 0.5 - - # 2. Convert to an ODE derivative - derivative = (sample - pred_original_sample) / sigma - - dt = sigma_down - sigma - - prev_sample = sample + derivative * dt - - device = model_output.device - if self.resized_size is None: - noise = diffusers.schedulers.scheduling_euler_ancestral_discrete.randn_tensor( - model_output.shape, dtype=model_output.dtype, device=device, generator=generator - ) - else: - print( - "resized_size", self.resized_size, "model_output.shape", model_output.shape, "prev_sample.shape", prev_sample.shape - ) - org_dtype = prev_sample.dtype - if org_dtype == torch.bfloat16: - prev_sample = prev_sample.float() - - prev_sample = torch.nn.functional.interpolate( - prev_sample.float(), size=self.resized_size, mode="bicubic", align_corners=False - ).to(dtype=org_dtype) - - noise = diffusers.schedulers.scheduling_euler_ancestral_discrete.randn_tensor( - (model_output.shape[0], model_output.shape[1], self.resized_size[0], self.resized_size[1]), - dtype=model_output.dtype, - device=device, - generator=generator, - ) - - prev_sample = prev_sample + noise * sigma_up - - # upon completion increase step index by one - self._step_index += 1 - - if not return_dict: - return (prev_sample,) - - return EulerAncestralDiscreteSchedulerOutput(prev_sample=prev_sample, pred_original_sample=pred_original_sample) - - # endregion @@ -2683,12 +2567,22 @@ def main(args): unet.set_deep_shrink(args.ds_depth_1, args.ds_timesteps_1, args.ds_depth_2, args.ds_timesteps_2, args.ds_ratio) # Gradual Latent - if args.gradual_latent_ratio is not None: - gradual_latent = ( + if args.gradual_latent_timesteps is not None: + if args.gradual_latent_unsharp_params: + ksize, sigma, strength = [float(v) for v in args.gradual_latent_unsharp_params.split(",")] + ksize = int(ksize) + else: + ksize, sigma, strength = None, None, None + + gradual_latent = GradualLatent( args.gradual_latent_ratio, args.gradual_latent_timesteps, args.gradual_latent_every_n_steps, args.gradual_latent_ratio_step, + args.gradual_latent_s_noise, + ksize, + sigma, + strength, ) pipe.set_gradual_latent(gradual_latent) @@ -3288,6 +3182,8 @@ def main(args): gl_ratio = args.gradual_latent_ratio gl_every_n_steps = args.gradual_latent_every_n_steps gl_ratio_step = args.gradual_latent_ratio_step + gl_s_noise = args.gradual_latent_s_noise + gl_unsharp_params = args.gradual_latent_unsharp_params prompt_args = raw_prompt.strip().split(" --") prompt = prompt_args[0] @@ -3423,6 +3319,20 @@ def main(args): print(f"gradual latent ratio step: {gl_ratio_step}") continue + m = re.match(r"glsn ([\d\.]+)", parg, re.IGNORECASE) + if m: # gradual latent s noise + gl_s_noise = float(m.group(1)) + gl_timesteps = gl_timesteps if gl_timesteps is not None else -1 # -1 means override + print(f"gradual latent s noise: {gl_s_noise}") + continue + + m = re.match(r"glus ([\d\.\-,]+)", parg, re.IGNORECASE) + if m: # gradual latent unsharp params + gl_unsharp_params = m.group(1) + gl_timesteps = gl_timesteps if gl_timesteps is not None else -1 # -1 means override + print(f"gradual latent unsharp params: {gl_unsharp_params}") + continue + except ValueError as ex: print(f"Exception in parsing / 解析エラー: {parg}") print(ex) @@ -3434,10 +3344,18 @@ def main(args): unet.set_deep_shrink(ds_depth_1, ds_timesteps_1, ds_depth_2, ds_timesteps_2, ds_ratio) # override Gradual Latent - if gl_ratio is not None: - if gl_timesteps is None: + if gl_timesteps is not None: + if gl_timesteps < 0: gl_timesteps = args.gradual_latent_timesteps or 650 - pipe.set_gradual_latent((gl_ratio, gl_timesteps, gl_every_n_steps, gl_ratio_step)) + if gl_unsharp_params is not None: + ksize, sigma, strength = [float(v) for v in gl_unsharp_params.split(",")] + ksize = int(ksize) + else: + ksize, sigma, strength = None, None, None + gradual_latent = GradualLatent( + gl_ratio, gl_timesteps, gl_every_n_steps, gl_ratio_step, gl_s_noise, ksize, sigma, strength + ) + pipe.set_gradual_latent(gradual_latent) # prepare seed if seeds is not None: # given in prompt @@ -3837,6 +3755,19 @@ def setup_parser() -> argparse.ArgumentParser: default=3, help="steps to increase size of latents every this steps for Gradual Latent / Gradual Latentでlatentsのサイズをこのステップごとに上げる", ) + parser.add_argument( + "--gradual_latent_s_noise", + type=float, + default=1.0, + help="s_noise for Gradual Latent / Gradual Latentのs_noise", + ) + parser.add_argument( + "--gradual_latent_unsharp_params", + type=str, + default=None, + help="unsharp mask parameters for Gradual Latent: ksize, sigma, strength. `3,0.5,0.5` is recommended /" + + " Gradual Latentのunsharp maskのパラメータ: ksize, sigma, strength. `3,0.5,0.5` が推奨", + ) return parser diff --git a/library/utils.py b/library/utils.py index 7d801a67..1ccb141c 100644 --- a/library/utils.py +++ b/library/utils.py @@ -1,6 +1,186 @@ import threading +import torch +from torchvision import transforms from typing import * +from diffusers import EulerAncestralDiscreteScheduler +import diffusers.schedulers.scheduling_euler_ancestral_discrete +from diffusers.schedulers.scheduling_euler_ancestral_discrete import EulerAncestralDiscreteSchedulerOutput def fire_in_thread(f, *args, **kwargs): - threading.Thread(target=f, args=args, kwargs=kwargs).start() \ No newline at end of file + threading.Thread(target=f, args=args, kwargs=kwargs).start() + + +# TODO make inf_utils.py + + +# region Gradual Latent hires fix + + +class GradualLatent: + def __init__( + self, + ratio, + start_timesteps, + every_n_steps, + ratio_step, + s_noise=1.0, + gaussian_blur_ksize=None, + gaussian_blur_sigma=0.5, + gaussian_blur_strength=0.5, + ): + self.ratio = ratio + self.start_timesteps = start_timesteps + self.every_n_steps = every_n_steps + self.ratio_step = ratio_step + self.s_noise = s_noise + self.gaussian_blur_ksize = gaussian_blur_ksize + self.gaussian_blur_sigma = gaussian_blur_sigma + self.gaussian_blur_strength = gaussian_blur_strength + + def __str__(self) -> str: + return ( + f"GradualLatent(ratio={self.ratio}, start_timesteps={self.start_timesteps}, " + + f"every_n_steps={self.every_n_steps}, ratio_step={self.ratio_step}, s_noise={self.s_noise}, " + + f"gaussian_blur_ksize={self.gaussian_blur_ksize}, gaussian_blur_sigma={self.gaussian_blur_sigma}, gaussian_blur_strength={self.gaussian_blur_strength})" + ) + + def apply_unshark_mask(self, x: torch.Tensor): + if self.gaussian_blur_ksize is None: + return x + blurred = transforms.functional.gaussian_blur(x, self.gaussian_blur_ksize, self.gaussian_blur_sigma) + # mask = torch.sigmoid((x - blurred) * self.gaussian_blur_strength) + mask = (x - blurred) * self.gaussian_blur_strength + sharpened = x + mask + return sharpened + + +class EulerAncestralDiscreteSchedulerGL(EulerAncestralDiscreteScheduler): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.resized_size = None + self.gradual_latent = None + + def set_gradual_latent_params(self, size, gradual_latent: GradualLatent): + self.resized_size = size + self.gradual_latent = gradual_latent + + def step( + self, + model_output: torch.FloatTensor, + timestep: Union[float, torch.FloatTensor], + sample: torch.FloatTensor, + generator: Optional[torch.Generator] = None, + return_dict: bool = True, + ) -> Union[EulerAncestralDiscreteSchedulerOutput, Tuple]: + """ + Predict the sample from the previous timestep by reversing the SDE. This function propagates the diffusion + process from the learned model outputs (most often the predicted noise). + + Args: + model_output (`torch.FloatTensor`): + The direct output from learned diffusion model. + timestep (`float`): + The current discrete timestep in the diffusion chain. + sample (`torch.FloatTensor`): + A current instance of a sample created by the diffusion process. + generator (`torch.Generator`, *optional*): + A random number generator. + return_dict (`bool`): + Whether or not to return a + [`~schedulers.scheduling_euler_ancestral_discrete.EulerAncestralDiscreteSchedulerOutput`] or tuple. + + Returns: + [`~schedulers.scheduling_euler_ancestral_discrete.EulerAncestralDiscreteSchedulerOutput`] or `tuple`: + If return_dict is `True`, + [`~schedulers.scheduling_euler_ancestral_discrete.EulerAncestralDiscreteSchedulerOutput`] is returned, + otherwise a tuple is returned where the first element is the sample tensor. + + """ + + if isinstance(timestep, int) or isinstance(timestep, torch.IntTensor) or isinstance(timestep, torch.LongTensor): + raise ValueError( + ( + "Passing integer indices (e.g. from `enumerate(timesteps)`) as timesteps to" + " `EulerDiscreteScheduler.step()` is not supported. Make sure to pass" + " one of the `scheduler.timesteps` as a timestep." + ), + ) + + if not self.is_scale_input_called: + # logger.warning( + print( + "The `scale_model_input` function should be called before `step` to ensure correct denoising. " + "See `StableDiffusionPipeline` for a usage example." + ) + + if self.step_index is None: + self._init_step_index(timestep) + + sigma = self.sigmas[self.step_index] + + # 1. compute predicted original sample (x_0) from sigma-scaled predicted noise + if self.config.prediction_type == "epsilon": + pred_original_sample = sample - sigma * model_output + elif self.config.prediction_type == "v_prediction": + # * c_out + input * c_skip + pred_original_sample = model_output * (-sigma / (sigma**2 + 1) ** 0.5) + (sample / (sigma**2 + 1)) + elif self.config.prediction_type == "sample": + raise NotImplementedError("prediction_type not implemented yet: sample") + else: + raise ValueError(f"prediction_type given as {self.config.prediction_type} must be one of `epsilon`, or `v_prediction`") + + sigma_from = self.sigmas[self.step_index] + sigma_to = self.sigmas[self.step_index + 1] + sigma_up = (sigma_to**2 * (sigma_from**2 - sigma_to**2) / sigma_from**2) ** 0.5 + sigma_down = (sigma_to**2 - sigma_up**2) ** 0.5 + + # 2. Convert to an ODE derivative + derivative = (sample - pred_original_sample) / sigma + + dt = sigma_down - sigma + + prev_sample = sample + derivative * dt + + device = model_output.device + if self.resized_size is None: + noise = diffusers.schedulers.scheduling_euler_ancestral_discrete.randn_tensor( + model_output.shape, dtype=model_output.dtype, device=device, generator=generator + ) + s_noise = 1.0 + else: + print( + "resized_size", self.resized_size, "model_output.shape", model_output.shape, "prev_sample.shape", prev_sample.shape + ) + org_dtype = prev_sample.dtype + if org_dtype == torch.bfloat16: + prev_sample = prev_sample.float() + + prev_sample = torch.nn.functional.interpolate( + prev_sample.float(), size=self.resized_size, mode="bicubic", align_corners=False + ).to(dtype=org_dtype) + + # apply unsharp mask / アンシャープマスクを適用する + if self.gradual_latent.gaussian_blur_ksize: + prev_sample = self.gradual_latent.apply_unshark_mask(prev_sample) + + noise = diffusers.schedulers.scheduling_euler_ancestral_discrete.randn_tensor( + (model_output.shape[0], model_output.shape[1], self.resized_size[0], self.resized_size[1]), + dtype=model_output.dtype, + device=device, + generator=generator, + ) + s_noise = self.gradual_latent.s_noise + + prev_sample = prev_sample + noise * sigma_up * s_noise + + # upon completion increase step index by one + self._step_index += 1 + + if not return_dict: + return (prev_sample,) + + return EulerAncestralDiscreteSchedulerOutput(prev_sample=prev_sample, pred_original_sample=pred_original_sample) + + +# endregion diff --git a/sdxl_gen_img.py b/sdxl_gen_img.py index 9e94d4f6..88031803 100755 --- a/sdxl_gen_img.py +++ b/sdxl_gen_img.py @@ -60,6 +60,7 @@ from networks.lora import LoRANetwork from library.sdxl_original_unet import InferSdxlUNet2DConditionModel from library.original_unet import FlashAttentionFunction from networks.control_net_lllite import ControlNetLLLite +from library.utils import GradualLatent, EulerAncestralDiscreteSchedulerGL # scheduler: SCHEDULER_LINEAR_START = 0.00085 @@ -345,7 +346,7 @@ class PipelineLike: self.control_nets: List[ControlNetLLLite] = [] self.control_net_enabled = True # control_netsが空ならTrueでもFalseでもControlNetは動作しない - self.gradual_latent = None + self.gradual_latent: GradualLatent = None # Textual Inversion def add_token_replacement(self, text_encoder_index, target_token_id, rep_token_ids): @@ -785,13 +786,13 @@ class PipelineLike: enable_gradual_latent = False if self.gradual_latent: - if not hasattr(self.scheduler, "set_resized_size"): + if not hasattr(self.scheduler, "set_gradual_latent_params"): print("gradual_latent is not supported for this scheduler. Ignoring.") print(self.scheduler.__class__.__name__) else: enable_gradual_latent = True - current_ratio, start_timesteps, every_n_steps, ratio_step = self.gradual_latent step_elapsed = 1000 + current_ratio = self.gradual_latent.ratio # first, we downscale the latents to the specified ratio / 最初に指定された比率にlatentsをダウンスケールする height, width = latents.shape[-2:] @@ -803,25 +804,27 @@ class PipelineLike: ).to(org_dtype) # apply unsharp mask / アンシャープマスクを適用する - blurred = torchvision.transforms.transforms.GaussianBlur(3, sigma=(0.5, 0.5))(latents) - latents = latents + (latents - blurred) * 0.5 + if self.gradual_latent.gaussian_blur_ksize: + latents = self.gradual_latent.apply_unshark_mask(latents) for i, t in enumerate(tqdm(timesteps)): resized_size = None if enable_gradual_latent: # gradually upscale the latents / latentsを徐々にアップスケールする - if t < start_timesteps and current_ratio < 1.0 and step_elapsed >= every_n_steps: - print("upscale") - current_ratio = min(current_ratio + ratio_step, 1.0) - h = ( - int(height * current_ratio) // 8 * 8 - ) # make divisible by 8 because size of latents must be divisible at bottom of UNet + if ( + t < self.gradual_latent.start_timesteps + and current_ratio < 1.0 + and step_elapsed >= self.gradual_latent.every_n_steps + ): + current_ratio = min(current_ratio + self.gradual_latent.ratio_step, 1.0) + # make divisible by 8 because size of latents must be divisible at bottom of UNet + h = int(height * current_ratio) // 8 * 8 w = int(width * current_ratio) // 8 * 8 resized_size = (h, w) - self.scheduler.set_resized_size(resized_size) + self.scheduler.set_gradual_latent_params(resized_size, self.gradual_latent) step_elapsed = 0 else: - self.scheduler.set_resized_size(None) + self.scheduler.set_gradual_latent_params(None, None) step_elapsed += 1 # expand the latents if we are doing classifier free guidance @@ -1427,141 +1430,6 @@ def handle_dynamic_prompt_variants(prompt, repeat_count): # endregion -# region Gradual Latent hires fix - -import diffusers.schedulers.scheduling_euler_ancestral_discrete -from diffusers.schedulers.scheduling_euler_ancestral_discrete import EulerAncestralDiscreteSchedulerOutput - - -class EulerAncestralDiscreteSchedulerGL(EulerAncestralDiscreteScheduler): - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - self.resized_size = None - - def set_resized_size(self, size, s_noise=0.5): - self.resized_size = size - self.s_noise = s_noise - - def step( - self, - model_output: torch.FloatTensor, - timestep: Union[float, torch.FloatTensor], - sample: torch.FloatTensor, - generator: Optional[torch.Generator] = None, - return_dict: bool = True, - ) -> Union[EulerAncestralDiscreteSchedulerOutput, Tuple]: - """ - Predict the sample from the previous timestep by reversing the SDE. This function propagates the diffusion - process from the learned model outputs (most often the predicted noise). - - Args: - model_output (`torch.FloatTensor`): - The direct output from learned diffusion model. - timestep (`float`): - The current discrete timestep in the diffusion chain. - sample (`torch.FloatTensor`): - A current instance of a sample created by the diffusion process. - generator (`torch.Generator`, *optional*): - A random number generator. - return_dict (`bool`): - Whether or not to return a - [`~schedulers.scheduling_euler_ancestral_discrete.EulerAncestralDiscreteSchedulerOutput`] or tuple. - - Returns: - [`~schedulers.scheduling_euler_ancestral_discrete.EulerAncestralDiscreteSchedulerOutput`] or `tuple`: - If return_dict is `True`, - [`~schedulers.scheduling_euler_ancestral_discrete.EulerAncestralDiscreteSchedulerOutput`] is returned, - otherwise a tuple is returned where the first element is the sample tensor. - - """ - - if isinstance(timestep, int) or isinstance(timestep, torch.IntTensor) or isinstance(timestep, torch.LongTensor): - raise ValueError( - ( - "Passing integer indices (e.g. from `enumerate(timesteps)`) as timesteps to" - " `EulerDiscreteScheduler.step()` is not supported. Make sure to pass" - " one of the `scheduler.timesteps` as a timestep." - ), - ) - - if not self.is_scale_input_called: - logger.warning( - "The `scale_model_input` function should be called before `step` to ensure correct denoising. " - "See `StableDiffusionPipeline` for a usage example." - ) - - if self.step_index is None: - self._init_step_index(timestep) - - sigma = self.sigmas[self.step_index] - - # 1. compute predicted original sample (x_0) from sigma-scaled predicted noise - if self.config.prediction_type == "epsilon": - pred_original_sample = sample - sigma * model_output - elif self.config.prediction_type == "v_prediction": - # * c_out + input * c_skip - pred_original_sample = model_output * (-sigma / (sigma**2 + 1) ** 0.5) + (sample / (sigma**2 + 1)) - elif self.config.prediction_type == "sample": - raise NotImplementedError("prediction_type not implemented yet: sample") - else: - raise ValueError(f"prediction_type given as {self.config.prediction_type} must be one of `epsilon`, or `v_prediction`") - - sigma_from = self.sigmas[self.step_index] - sigma_to = self.sigmas[self.step_index + 1] - sigma_up = (sigma_to**2 * (sigma_from**2 - sigma_to**2) / sigma_from**2) ** 0.5 - sigma_down = (sigma_to**2 - sigma_up**2) ** 0.5 - - # 2. Convert to an ODE derivative - derivative = (sample - pred_original_sample) / sigma - - dt = sigma_down - sigma - - prev_sample = sample + derivative * dt - - device = model_output.device - if self.resized_size is None: - noise = diffusers.schedulers.scheduling_euler_ancestral_discrete.randn_tensor( - model_output.shape, dtype=model_output.dtype, device=device, generator=generator - ) - s_noise = 1.0 - else: - print( - "resized_size", self.resized_size, "model_output.shape", model_output.shape, "prev_sample.shape", prev_sample.shape - ) - org_dtype = prev_sample.dtype - if org_dtype == torch.bfloat16: - prev_sample = prev_sample.float() - - prev_sample = torch.nn.functional.interpolate( - prev_sample.float(), size=self.resized_size, mode="bicubic", align_corners=False - ).to(dtype=org_dtype) - - # apply unsharp mask / アンシャープマスクを適用する - blurred = torchvision.transforms.transforms.GaussianBlur(3, sigma=(0.5, 0.5))(prev_sample) - prev_sample = prev_sample + (prev_sample - blurred) * 0.5 - - noise = diffusers.schedulers.scheduling_euler_ancestral_discrete.randn_tensor( - (model_output.shape[0], model_output.shape[1], self.resized_size[0], self.resized_size[1]), - dtype=model_output.dtype, - device=device, - generator=generator, - ) - s_noise = self.s_noise - - prev_sample = prev_sample + noise * sigma_up * s_noise - - # upon completion increase step index by one - self._step_index += 1 - - if not return_dict: - return (prev_sample,) - - return EulerAncestralDiscreteSchedulerOutput(prev_sample=prev_sample, pred_original_sample=pred_original_sample) - - -# endregion - - # def load_clip_l14_336(dtype): # print(f"loading CLIP: {CLIP_ID_L14_336}") # text_encoder = CLIPTextModel.from_pretrained(CLIP_ID_L14_336, torch_dtype=dtype) @@ -1960,11 +1828,21 @@ def main(args): # Gradual Latent if args.gradual_latent_timesteps is not None: - gradual_latent = ( + if args.gradual_latent_unsharp_params: + ksize, sigma, strength = [float(v) for v in args.gradual_latent_unsharp_params.split(",")] + ksize = int(ksize) + else: + ksize, sigma, strength = None, None, None + + gradual_latent = GradualLatent( args.gradual_latent_ratio, args.gradual_latent_timesteps, args.gradual_latent_every_n_steps, args.gradual_latent_ratio_step, + args.gradual_latent_s_noise, + ksize, + sigma, + strength, ) pipe.set_gradual_latent(gradual_latent) @@ -2570,6 +2448,8 @@ def main(args): gl_ratio = args.gradual_latent_ratio gl_every_n_steps = args.gradual_latent_every_n_steps gl_ratio_step = args.gradual_latent_ratio_step + gl_s_noise = args.gradual_latent_s_noise + gl_unsharp_params = args.gradual_latent_unsharp_params prompt_args = raw_prompt.strip().split(" --") prompt = prompt_args[0] @@ -2741,6 +2621,20 @@ def main(args): print(f"gradual latent ratio step: {gl_ratio_step}") continue + m = re.match(r"glsn ([\d\.]+)", parg, re.IGNORECASE) + if m: # gradual latent s noise + gl_s_noise = float(m.group(1)) + gl_timesteps = gl_timesteps if gl_timesteps is not None else -1 # -1 means override + print(f"gradual latent s noise: {gl_s_noise}") + continue + + m = re.match(r"glus ([\d\.\-,]+)", parg, re.IGNORECASE) + if m: # gradual latent unsharp params + gl_unsharp_params = m.group(1) + gl_timesteps = gl_timesteps if gl_timesteps is not None else -1 # -1 means override + print(f"gradual latent unsharp params: {gl_unsharp_params}") + continue + except ValueError as ex: print(f"Exception in parsing / 解析エラー: {parg}") print(ex) @@ -2755,7 +2649,15 @@ def main(args): if gl_timesteps is not None: if gl_timesteps < 0: gl_timesteps = args.gradual_latent_timesteps or 650 - pipe.set_gradual_latent((gl_ratio, gl_timesteps, gl_every_n_steps, gl_ratio_step)) + if gl_unsharp_params is not None: + ksize, sigma, strength = [float(v) for v in gl_unsharp_params.split(",")] + ksize = int(ksize) + else: + ksize, sigma, strength = None, None, None + gradual_latent = GradualLatent( + gl_ratio, gl_timesteps, gl_every_n_steps, gl_ratio_step, gl_s_noise, ksize, sigma, strength + ) + pipe.set_gradual_latent(gradual_latent) # prepare seed if seeds is not None: # given in prompt @@ -3144,6 +3046,19 @@ def setup_parser() -> argparse.ArgumentParser: default=3, help="steps to increase size of latents every this steps for Gradual Latent / Gradual Latentでlatentsのサイズをこのステップごとに上げる", ) + parser.add_argument( + "--gradual_latent_s_noise", + type=float, + default=1.0, + help="s_noise for Gradual Latent / Gradual Latentのs_noise", + ) + parser.add_argument( + "--gradual_latent_unsharp_params", + type=str, + default=None, + help="unsharp mask parameters for Gradual Latent: ksize, sigma, strength. `3,0.5,0.5` is recommended /" + + " Gradual Latentのunsharp maskのパラメータ: ksize, sigma, strength. `3,0.5,0.5` が推奨", + ) # # parser.add_argument( # "--control_net_image_path", type=str, default=None, nargs="*", help="image for ControlNet guidance / ControlNetでガイドに使う画像"