mirror of
https://github.com/kohya-ss/sd-scripts.git
synced 2026-04-09 06:45:09 +00:00
add gradual latent
This commit is contained in:
32
README.md
32
README.md
@@ -1,3 +1,35 @@
|
|||||||
|
# Gradual Latent について
|
||||||
|
|
||||||
|
latentのサイズを徐々に大きくしていくHires fixです。`sdxl_gen_img.py` に以下のオプションが追加されています。
|
||||||
|
|
||||||
|
- `--gradual_latent_timesteps` : latentのサイズを大きくし始めるタイムステップを指定します。デフォルトは None で、Gradual Latentを使用しません。
|
||||||
|
- `--gradual_latent_ratio` : latentの初期サイズを指定します。デフォルトは 0.5 で、デフォルトの latent サイズの半分のサイズから始めます。
|
||||||
|
- `--gradual_latent_ratio_step`: latentのサイズを大きくする割合を指定します。デフォルトは 0.125 で、latentのサイズを 0.625, 0.75, 0.875, 1.0 と徐々に大きくします。
|
||||||
|
- `--gradual_latent_ratio_every_n_steps`: latentのサイズを大きくする間隔を指定します。デフォルトは 3 で、3ステップごとに latent のサイズを大きくします。
|
||||||
|
|
||||||
|
それぞれのオプションは、プロンプトオプション、`--glt`、`--glr`、`--gls`、`--gle` でも指定できます。
|
||||||
|
|
||||||
|
__サンプラーに `euler_a` を指定してください。__ 他のサンプラーでは動作しません。
|
||||||
|
|
||||||
|
`gen_img_diffusers.py` にも同様のオプションが追加されていますが、試した範囲ではどうやっても乱れた画像しか生成できませんでした。
|
||||||
|
|
||||||
|
# About Gradual Latent
|
||||||
|
|
||||||
|
Gradual Latent is a Hires fix that gradually increases the size of the latent. `sdxl_gen_img.py` has the following options added.
|
||||||
|
|
||||||
|
- `--gradual_latent_timesteps`: Specifies the timestep to start increasing the size of the latent. The default is None, which means Gradual Latent is not used.
|
||||||
|
- `--gradual_latent_ratio`: Specifies the initial size of the latent. The default is 0.5, which means it starts with half the default latent size.
|
||||||
|
- `--gradual_latent_ratio_step`: Specifies the ratio to increase the size of the latent. The default is 0.125, which means the latent size is gradually increased to 0.625, 0.75, 0.875, 1.0.
|
||||||
|
- `--gradual_latent_ratio_every_n_steps`: Specifies the interval to increase the size of the latent. The default is 3, which means the latent size is increased every 3 steps.
|
||||||
|
|
||||||
|
Each option can also be specified with prompt options, `--glt`, `--glr`, `--gls`, `--gle`.
|
||||||
|
|
||||||
|
__Please specify `euler_a` for the sampler.__ It will not work with other samplers.
|
||||||
|
|
||||||
|
`gen_img_diffusers.py` also has the same options, but in the range I tried, it only generated distorted images no matter what I did.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
__SDXL is now supported. The sdxl branch has been merged into the main branch. If you update the repository, please follow the upgrade instructions. Also, the version of accelerate has been updated, so please run accelerate config again.__ The documentation for SDXL training is [here](./README.md#sdxl-training).
|
__SDXL is now supported. The sdxl branch has been merged into the main branch. If you update the repository, please follow the upgrade instructions. Also, the version of accelerate has been updated, so please run accelerate config again.__ The documentation for SDXL training is [here](./README.md#sdxl-training).
|
||||||
|
|
||||||
This repository contains training, generation and utility scripts for Stable Diffusion.
|
This repository contains training, generation and utility scripts for Stable Diffusion.
|
||||||
|
|||||||
@@ -484,6 +484,14 @@ class PipelineLike:
|
|||||||
def set_control_nets(self, ctrl_nets):
|
def set_control_nets(self, ctrl_nets):
|
||||||
self.control_nets = ctrl_nets
|
self.control_nets = ctrl_nets
|
||||||
|
|
||||||
|
def set_gradual_latent(self, gradual_latent):
|
||||||
|
if gradual_latent is None:
|
||||||
|
print("gradual_latent is disabled")
|
||||||
|
self.gradual_latent = None
|
||||||
|
else:
|
||||||
|
print(f"gradual_latent is enabled: {gradual_latent}")
|
||||||
|
self.gradual_latent = gradual_latent # (ds_ratio, start_timesteps, every_n_steps, ratio_step)
|
||||||
|
|
||||||
# region xformersとか使う部分:独自に書き換えるので関係なし
|
# region xformersとか使う部分:独自に書き換えるので関係なし
|
||||||
|
|
||||||
def enable_xformers_memory_efficient_attention(self):
|
def enable_xformers_memory_efficient_attention(self):
|
||||||
@@ -958,7 +966,41 @@ class PipelineLike:
|
|||||||
else:
|
else:
|
||||||
text_emb_last = text_embeddings
|
text_emb_last = text_embeddings
|
||||||
|
|
||||||
|
enable_gradual_latent = False
|
||||||
|
if self.gradual_latent:
|
||||||
|
if not hasattr(self.scheduler, "set_resized_size"):
|
||||||
|
print("gradual_latent is not supported for this scheduler. Ignoring.")
|
||||||
|
print(self.scheduler.__class__.__name__)
|
||||||
|
else:
|
||||||
|
enable_gradual_latent = True
|
||||||
|
current_ratio, start_timesteps, every_n_steps, ratio_step = self.gradual_latent
|
||||||
|
step_elapsed = 1000
|
||||||
|
|
||||||
|
# first, we downscale the latents to the specified ratio / 最初に指定された比率にlatentsをダウンスケールする
|
||||||
|
height, width = latents.shape[-2:]
|
||||||
|
org_dtype = latents.dtype
|
||||||
|
if org_dtype == torch.bfloat16:
|
||||||
|
latents = latents.float()
|
||||||
|
latents = torch.nn.functional.interpolate(
|
||||||
|
latents, scale_factor=current_ratio, mode="bicubic", align_corners=False
|
||||||
|
).to(org_dtype)
|
||||||
|
|
||||||
for i, t in enumerate(tqdm(timesteps)):
|
for i, t in enumerate(tqdm(timesteps)):
|
||||||
|
resized_size = None
|
||||||
|
if enable_gradual_latent:
|
||||||
|
# gradually upscale the latents / latentsを徐々にアップスケールする
|
||||||
|
if t < start_timesteps and current_ratio < 1.0 and step_elapsed >= every_n_steps:
|
||||||
|
print("upscale")
|
||||||
|
current_ratio = min(current_ratio + ratio_step, 1.0)
|
||||||
|
h = int(height * current_ratio) # // 8 * 8
|
||||||
|
w = int(width * current_ratio) # // 8 * 8
|
||||||
|
resized_size = (h, w)
|
||||||
|
self.scheduler.set_resized_size(resized_size)
|
||||||
|
step_elapsed = 0
|
||||||
|
else:
|
||||||
|
self.scheduler.set_resized_size(None)
|
||||||
|
step_elapsed += 1
|
||||||
|
|
||||||
# expand the latents if we are doing classifier free guidance
|
# expand the latents if we are doing classifier free guidance
|
||||||
latent_model_input = latents.repeat((num_latent_input, 1, 1, 1))
|
latent_model_input = latents.repeat((num_latent_input, 1, 1, 1))
|
||||||
latent_model_input = self.scheduler.scale_model_input(latent_model_input, t)
|
latent_model_input = self.scheduler.scale_model_input(latent_model_input, t)
|
||||||
@@ -2112,6 +2154,133 @@ def handle_dynamic_prompt_variants(prompt, repeat_count):
|
|||||||
return prompts
|
return prompts
|
||||||
|
|
||||||
|
|
||||||
|
# endregion
|
||||||
|
|
||||||
|
# region Gradual Latent hires fix
|
||||||
|
|
||||||
|
import diffusers.schedulers.scheduling_euler_ancestral_discrete
|
||||||
|
from diffusers.schedulers.scheduling_euler_ancestral_discrete import EulerAncestralDiscreteSchedulerOutput
|
||||||
|
|
||||||
|
|
||||||
|
class EulerAncestralDiscreteSchedulerGL(EulerAncestralDiscreteScheduler):
|
||||||
|
def __init__(self, *args, **kwargs):
|
||||||
|
super().__init__(*args, **kwargs)
|
||||||
|
self.resized_size = None
|
||||||
|
|
||||||
|
def set_resized_size(self, size):
|
||||||
|
self.resized_size = size
|
||||||
|
|
||||||
|
def step(
|
||||||
|
self,
|
||||||
|
model_output: torch.FloatTensor,
|
||||||
|
timestep: Union[float, torch.FloatTensor],
|
||||||
|
sample: torch.FloatTensor,
|
||||||
|
generator: Optional[torch.Generator] = None,
|
||||||
|
return_dict: bool = True,
|
||||||
|
) -> Union[EulerAncestralDiscreteSchedulerOutput, Tuple]:
|
||||||
|
"""
|
||||||
|
Predict the sample from the previous timestep by reversing the SDE. This function propagates the diffusion
|
||||||
|
process from the learned model outputs (most often the predicted noise).
|
||||||
|
|
||||||
|
Args:
|
||||||
|
model_output (`torch.FloatTensor`):
|
||||||
|
The direct output from learned diffusion model.
|
||||||
|
timestep (`float`):
|
||||||
|
The current discrete timestep in the diffusion chain.
|
||||||
|
sample (`torch.FloatTensor`):
|
||||||
|
A current instance of a sample created by the diffusion process.
|
||||||
|
generator (`torch.Generator`, *optional*):
|
||||||
|
A random number generator.
|
||||||
|
return_dict (`bool`):
|
||||||
|
Whether or not to return a
|
||||||
|
[`~schedulers.scheduling_euler_ancestral_discrete.EulerAncestralDiscreteSchedulerOutput`] or tuple.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
[`~schedulers.scheduling_euler_ancestral_discrete.EulerAncestralDiscreteSchedulerOutput`] or `tuple`:
|
||||||
|
If return_dict is `True`,
|
||||||
|
[`~schedulers.scheduling_euler_ancestral_discrete.EulerAncestralDiscreteSchedulerOutput`] is returned,
|
||||||
|
otherwise a tuple is returned where the first element is the sample tensor.
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
if isinstance(timestep, int) or isinstance(timestep, torch.IntTensor) or isinstance(timestep, torch.LongTensor):
|
||||||
|
raise ValueError(
|
||||||
|
(
|
||||||
|
"Passing integer indices (e.g. from `enumerate(timesteps)`) as timesteps to"
|
||||||
|
" `EulerDiscreteScheduler.step()` is not supported. Make sure to pass"
|
||||||
|
" one of the `scheduler.timesteps` as a timestep."
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
if not self.is_scale_input_called:
|
||||||
|
logger.warning(
|
||||||
|
"The `scale_model_input` function should be called before `step` to ensure correct denoising. "
|
||||||
|
"See `StableDiffusionPipeline` for a usage example."
|
||||||
|
)
|
||||||
|
|
||||||
|
if self.step_index is None:
|
||||||
|
self._init_step_index(timestep)
|
||||||
|
|
||||||
|
sigma = self.sigmas[self.step_index]
|
||||||
|
|
||||||
|
# 1. compute predicted original sample (x_0) from sigma-scaled predicted noise
|
||||||
|
if self.config.prediction_type == "epsilon":
|
||||||
|
pred_original_sample = sample - sigma * model_output
|
||||||
|
elif self.config.prediction_type == "v_prediction":
|
||||||
|
# * c_out + input * c_skip
|
||||||
|
pred_original_sample = model_output * (-sigma / (sigma**2 + 1) ** 0.5) + (sample / (sigma**2 + 1))
|
||||||
|
elif self.config.prediction_type == "sample":
|
||||||
|
raise NotImplementedError("prediction_type not implemented yet: sample")
|
||||||
|
else:
|
||||||
|
raise ValueError(f"prediction_type given as {self.config.prediction_type} must be one of `epsilon`, or `v_prediction`")
|
||||||
|
|
||||||
|
sigma_from = self.sigmas[self.step_index]
|
||||||
|
sigma_to = self.sigmas[self.step_index + 1]
|
||||||
|
sigma_up = (sigma_to**2 * (sigma_from**2 - sigma_to**2) / sigma_from**2) ** 0.5
|
||||||
|
sigma_down = (sigma_to**2 - sigma_up**2) ** 0.5
|
||||||
|
|
||||||
|
# 2. Convert to an ODE derivative
|
||||||
|
derivative = (sample - pred_original_sample) / sigma
|
||||||
|
|
||||||
|
dt = sigma_down - sigma
|
||||||
|
|
||||||
|
prev_sample = sample + derivative * dt
|
||||||
|
|
||||||
|
device = model_output.device
|
||||||
|
if self.resized_size is None:
|
||||||
|
noise = diffusers.schedulers.scheduling_euler_ancestral_discrete.randn_tensor(
|
||||||
|
model_output.shape, dtype=model_output.dtype, device=device, generator=generator
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
print(
|
||||||
|
"resized_size", self.resized_size, "model_output.shape", model_output.shape, "prev_sample.shape", prev_sample.shape
|
||||||
|
)
|
||||||
|
org_dtype = prev_sample.dtype
|
||||||
|
if org_dtype == torch.bfloat16:
|
||||||
|
prev_sample = prev_sample.float()
|
||||||
|
|
||||||
|
prev_sample = torch.nn.functional.interpolate(
|
||||||
|
prev_sample.float(), size=self.resized_size, mode="bicubic", align_corners=False
|
||||||
|
).to(dtype=org_dtype)
|
||||||
|
|
||||||
|
noise = diffusers.schedulers.scheduling_euler_ancestral_discrete.randn_tensor(
|
||||||
|
(model_output.shape[0], model_output.shape[1], self.resized_size[0], self.resized_size[1]),
|
||||||
|
dtype=model_output.dtype,
|
||||||
|
device=device,
|
||||||
|
generator=generator,
|
||||||
|
)
|
||||||
|
|
||||||
|
prev_sample = prev_sample + noise * sigma_up
|
||||||
|
|
||||||
|
# upon completion increase step index by one
|
||||||
|
self._step_index += 1
|
||||||
|
|
||||||
|
if not return_dict:
|
||||||
|
return (prev_sample,)
|
||||||
|
|
||||||
|
return EulerAncestralDiscreteSchedulerOutput(prev_sample=prev_sample, pred_original_sample=pred_original_sample)
|
||||||
|
|
||||||
|
|
||||||
# endregion
|
# endregion
|
||||||
|
|
||||||
|
|
||||||
@@ -2249,7 +2418,7 @@ def main(args):
|
|||||||
scheduler_cls = EulerDiscreteScheduler
|
scheduler_cls = EulerDiscreteScheduler
|
||||||
scheduler_module = diffusers.schedulers.scheduling_euler_discrete
|
scheduler_module = diffusers.schedulers.scheduling_euler_discrete
|
||||||
elif args.sampler == "euler_a" or args.sampler == "k_euler_a":
|
elif args.sampler == "euler_a" or args.sampler == "k_euler_a":
|
||||||
scheduler_cls = EulerAncestralDiscreteScheduler
|
scheduler_cls = EulerAncestralDiscreteSchedulerGL
|
||||||
scheduler_module = diffusers.schedulers.scheduling_euler_ancestral_discrete
|
scheduler_module = diffusers.schedulers.scheduling_euler_ancestral_discrete
|
||||||
elif args.sampler == "dpmsolver" or args.sampler == "dpmsolver++":
|
elif args.sampler == "dpmsolver" or args.sampler == "dpmsolver++":
|
||||||
scheduler_cls = DPMSolverMultistepScheduler
|
scheduler_cls = DPMSolverMultistepScheduler
|
||||||
@@ -2505,6 +2674,16 @@ def main(args):
|
|||||||
if args.ds_depth_1 is not None:
|
if args.ds_depth_1 is not None:
|
||||||
unet.set_deep_shrink(args.ds_depth_1, args.ds_timesteps_1, args.ds_depth_2, args.ds_timesteps_2, args.ds_ratio)
|
unet.set_deep_shrink(args.ds_depth_1, args.ds_timesteps_1, args.ds_depth_2, args.ds_timesteps_2, args.ds_ratio)
|
||||||
|
|
||||||
|
# Gradual Latent
|
||||||
|
if args.gradual_latent_ratio is not None:
|
||||||
|
gradual_latent = (
|
||||||
|
args.gradual_latent_ratio,
|
||||||
|
args.gradual_latent_timesteps,
|
||||||
|
args.gradual_latent_every_n_steps,
|
||||||
|
args.gradual_latent_ratio_step,
|
||||||
|
)
|
||||||
|
pipe.set_gradual_latent(gradual_latent)
|
||||||
|
|
||||||
# Extended Textual Inversion および Textual Inversionを処理する
|
# Extended Textual Inversion および Textual Inversionを処理する
|
||||||
if args.XTI_embeddings:
|
if args.XTI_embeddings:
|
||||||
diffusers.models.UNet2DConditionModel.forward = unet_forward_XTI
|
diffusers.models.UNet2DConditionModel.forward = unet_forward_XTI
|
||||||
@@ -3096,6 +3275,12 @@ def main(args):
|
|||||||
ds_timesteps_2 = args.ds_timesteps_2
|
ds_timesteps_2 = args.ds_timesteps_2
|
||||||
ds_ratio = args.ds_ratio
|
ds_ratio = args.ds_ratio
|
||||||
|
|
||||||
|
# Gradual Latent
|
||||||
|
gl_timesteps = None # means no override
|
||||||
|
gl_ratio = args.gradual_latent_ratio
|
||||||
|
gl_every_n_steps = args.gradual_latent_every_n_steps
|
||||||
|
gl_ratio_step = args.gradual_latent_ratio_step
|
||||||
|
|
||||||
prompt_args = raw_prompt.strip().split(" --")
|
prompt_args = raw_prompt.strip().split(" --")
|
||||||
prompt = prompt_args[0]
|
prompt = prompt_args[0]
|
||||||
print(f"prompt {prompt_index+1}/{len(prompt_list)}: {prompt}")
|
print(f"prompt {prompt_index+1}/{len(prompt_list)}: {prompt}")
|
||||||
@@ -3202,6 +3387,34 @@ def main(args):
|
|||||||
print(f"deep shrink ratio: {ds_ratio}")
|
print(f"deep shrink ratio: {ds_ratio}")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
# Gradual Latent
|
||||||
|
m = re.match(r"glt ([\d\.]+)", parg, re.IGNORECASE)
|
||||||
|
if m: # gradual latent timesteps
|
||||||
|
gl_timesteps = int(m.group(1))
|
||||||
|
print(f"gradual latent timesteps: {gl_timesteps}")
|
||||||
|
continue
|
||||||
|
|
||||||
|
m = re.match(r"glr ([\d\.]+)", parg, re.IGNORECASE)
|
||||||
|
if m: # gradual latent ratio
|
||||||
|
gl_ratio = float(m.group(1))
|
||||||
|
gl_timesteps = gl_timesteps if gl_timesteps is not None else -1 # -1 means override
|
||||||
|
print(f"gradual latent ratio: {ds_ratio}")
|
||||||
|
continue
|
||||||
|
|
||||||
|
m = re.match(r"gle ([\d\.]+)", parg, re.IGNORECASE)
|
||||||
|
if m: # gradual latent every n steps
|
||||||
|
gl_every_n_steps = int(m.group(1))
|
||||||
|
gl_timesteps = gl_timesteps if gl_timesteps is not None else -1 # -1 means override
|
||||||
|
print(f"gradual latent every n steps: {gl_every_n_steps}")
|
||||||
|
continue
|
||||||
|
|
||||||
|
m = re.match(r"gls ([\d\.]+)", parg, re.IGNORECASE)
|
||||||
|
if m: # gradual latent ratio step
|
||||||
|
gl_ratio_step = float(m.group(1))
|
||||||
|
gl_timesteps = gl_timesteps if gl_timesteps is not None else -1 # -1 means override
|
||||||
|
print(f"gradual latent ratio step: {gl_ratio_step}")
|
||||||
|
continue
|
||||||
|
|
||||||
except ValueError as ex:
|
except ValueError as ex:
|
||||||
print(f"Exception in parsing / 解析エラー: {parg}")
|
print(f"Exception in parsing / 解析エラー: {parg}")
|
||||||
print(ex)
|
print(ex)
|
||||||
@@ -3212,6 +3425,12 @@ def main(args):
|
|||||||
ds_depth_1 = args.ds_depth_1 or 3
|
ds_depth_1 = args.ds_depth_1 or 3
|
||||||
unet.set_deep_shrink(ds_depth_1, ds_timesteps_1, ds_depth_2, ds_timesteps_2, ds_ratio)
|
unet.set_deep_shrink(ds_depth_1, ds_timesteps_1, ds_depth_2, ds_timesteps_2, ds_ratio)
|
||||||
|
|
||||||
|
# override Gradual Latent
|
||||||
|
if gl_ratio is not None:
|
||||||
|
if gl_timesteps is None:
|
||||||
|
gl_timesteps = args.gradual_latent_timesteps or 650
|
||||||
|
pipe.set_gradual_latent((gl_ratio, gl_timesteps, gl_every_n_steps, gl_ratio_step))
|
||||||
|
|
||||||
# prepare seed
|
# prepare seed
|
||||||
if seeds is not None: # given in prompt
|
if seeds is not None: # given in prompt
|
||||||
# 数が足りないなら前のをそのまま使う
|
# 数が足りないなら前のをそのまま使う
|
||||||
@@ -3585,6 +3804,32 @@ def setup_parser() -> argparse.ArgumentParser:
|
|||||||
"--ds_ratio", type=float, default=0.5, help="Deep Shrink ratio for downsampling / Deep Shrinkのdownsampling比率"
|
"--ds_ratio", type=float, default=0.5, help="Deep Shrink ratio for downsampling / Deep Shrinkのdownsampling比率"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# gradual latent
|
||||||
|
parser.add_argument(
|
||||||
|
"--gradual_latent_timesteps",
|
||||||
|
type=int,
|
||||||
|
default=None,
|
||||||
|
help="enable Gradual Latent hires fix and apply upscaling from this timesteps / Gradual Latent hires fixをこのtimestepsで有効にし、このtimestepsからアップスケーリングを適用する",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--gradual_latent_ratio",
|
||||||
|
type=float,
|
||||||
|
default=0.5,
|
||||||
|
help=" this size ratio, 0.5 means 1/2 / Gradual Latent hires fixをこのサイズ比率で有効にする、0.5は1/2を意味する",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--gradual_latent_ratio_step",
|
||||||
|
type=float,
|
||||||
|
default=0.125,
|
||||||
|
help="step to increase ratio for Gradual Latent / Gradual Latentのratioをどのくらいずつ上げるか",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--gradual_latent_every_n_steps",
|
||||||
|
type=int,
|
||||||
|
default=3,
|
||||||
|
help="steps to increase size of latents every this steps for Gradual Latent / Gradual Latentでlatentsのサイズをこのステップごとに上げる",
|
||||||
|
)
|
||||||
|
|
||||||
return parser
|
return parser
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
318
sdxl_gen_img.py
318
sdxl_gen_img.py
@@ -345,6 +345,8 @@ class PipelineLike:
|
|||||||
self.control_nets: List[ControlNetLLLite] = []
|
self.control_nets: List[ControlNetLLLite] = []
|
||||||
self.control_net_enabled = True # control_netsが空ならTrueでもFalseでもControlNetは動作しない
|
self.control_net_enabled = True # control_netsが空ならTrueでもFalseでもControlNetは動作しない
|
||||||
|
|
||||||
|
self.gradual_latent = None
|
||||||
|
|
||||||
# Textual Inversion
|
# Textual Inversion
|
||||||
def add_token_replacement(self, text_encoder_index, target_token_id, rep_token_ids):
|
def add_token_replacement(self, text_encoder_index, target_token_id, rep_token_ids):
|
||||||
self.token_replacements_list[text_encoder_index][target_token_id] = rep_token_ids
|
self.token_replacements_list[text_encoder_index][target_token_id] = rep_token_ids
|
||||||
@@ -375,6 +377,14 @@ class PipelineLike:
|
|||||||
def set_control_nets(self, ctrl_nets):
|
def set_control_nets(self, ctrl_nets):
|
||||||
self.control_nets = ctrl_nets
|
self.control_nets = ctrl_nets
|
||||||
|
|
||||||
|
def set_gradual_latent(self, gradual_latent):
|
||||||
|
if gradual_latent is None:
|
||||||
|
print("gradual_latent is disabled")
|
||||||
|
self.gradual_latent = None
|
||||||
|
else:
|
||||||
|
print(f"gradual_latent is enabled: {gradual_latent}")
|
||||||
|
self.gradual_latent = gradual_latent # (ds_ratio, start_timesteps, every_n_steps, ratio_step)
|
||||||
|
|
||||||
@torch.no_grad()
|
@torch.no_grad()
|
||||||
def __call__(
|
def __call__(
|
||||||
self,
|
self,
|
||||||
@@ -706,7 +716,108 @@ class PipelineLike:
|
|||||||
control_net.set_cond_image(None)
|
control_net.set_cond_image(None)
|
||||||
|
|
||||||
each_control_net_enabled = [self.control_net_enabled] * len(self.control_nets)
|
each_control_net_enabled = [self.control_net_enabled] * len(self.control_nets)
|
||||||
|
|
||||||
|
# # first, we downscale the latents to the half of the size
|
||||||
|
# # 最初に1/2に縮小する
|
||||||
|
# height, width = latents.shape[-2:]
|
||||||
|
# # latents = torch.nn.functional.interpolate(latents.float(), scale_factor=0.5, mode="bicubic", align_corners=False).to(
|
||||||
|
# # latents.dtype
|
||||||
|
# # )
|
||||||
|
# latents = latents[:, :, ::2, ::2]
|
||||||
|
# current_scale = 0.5
|
||||||
|
|
||||||
|
# # how much to increase the scale at each step: .125 seems to work well (because it's 1/8?)
|
||||||
|
# # 各ステップに拡大率をどのくらい増やすか:.125がよさそう(たぶん1/8なので)
|
||||||
|
# scale_step = 0.125
|
||||||
|
|
||||||
|
# # timesteps at which to start increasing the scale: 1000 seems to be enough
|
||||||
|
# # 拡大を開始するtimesteps: 1000で十分そうである
|
||||||
|
# start_timesteps = 1000
|
||||||
|
|
||||||
|
# # how many steps to wait before increasing the scale again
|
||||||
|
# # small values leads to blurry images (because the latents are blurry after the upscale, so some denoising might be needed)
|
||||||
|
# # large values leads to flat images
|
||||||
|
|
||||||
|
# # 何ステップごとに拡大するか
|
||||||
|
# # 小さいとボケる(拡大後のlatentsはボケた感じになるので、そこから数stepのdenoiseが必要と思われる)
|
||||||
|
# # 大きすぎると細部が書き込まれずのっぺりした感じになる
|
||||||
|
# every_n_steps = 5
|
||||||
|
|
||||||
|
# scale_step = input("scale step:")
|
||||||
|
# scale_step = float(scale_step)
|
||||||
|
# start_timesteps = input("start timesteps:")
|
||||||
|
# start_timesteps = int(start_timesteps)
|
||||||
|
# every_n_steps = input("every n steps:")
|
||||||
|
# every_n_steps = int(every_n_steps)
|
||||||
|
|
||||||
|
# # for i, t in enumerate(tqdm(timesteps)):
|
||||||
|
# i = 0
|
||||||
|
# last_step = 0
|
||||||
|
# while i < len(timesteps):
|
||||||
|
# t = timesteps[i]
|
||||||
|
# print(f"[{i}] t={t}")
|
||||||
|
|
||||||
|
# print(i, t, current_scale, latents.shape)
|
||||||
|
# if t < start_timesteps and current_scale < 1.0 and i % every_n_steps == 0:
|
||||||
|
# if i == last_step:
|
||||||
|
# pass
|
||||||
|
# else:
|
||||||
|
# print("upscale")
|
||||||
|
# current_scale = min(current_scale + scale_step, 1.0)
|
||||||
|
|
||||||
|
# h = int(height * current_scale) // 8 * 8
|
||||||
|
# w = int(width * current_scale) // 8 * 8
|
||||||
|
|
||||||
|
# latents = torch.nn.functional.interpolate(latents.float(), size=(h, w), mode="bicubic", align_corners=False).to(
|
||||||
|
# latents.dtype
|
||||||
|
# )
|
||||||
|
# last_step = i
|
||||||
|
# i = max(0, i - every_n_steps + 1)
|
||||||
|
|
||||||
|
# diff = timesteps[i] - timesteps[last_step]
|
||||||
|
# # resized_init_noise = torch.nn.functional.interpolate(
|
||||||
|
# # init_noise.float(), size=(h, w), mode="bicubic", align_corners=False
|
||||||
|
# # ).to(latents.dtype)
|
||||||
|
# # latents = self.scheduler.add_noise(latents, resized_init_noise, diff)
|
||||||
|
# latents = self.scheduler.add_noise(latents, torch.randn_like(latents), diff * 4)
|
||||||
|
# # latents += torch.randn_like(latents) / 100 * diff
|
||||||
|
# continue
|
||||||
|
|
||||||
|
enable_gradual_latent = False
|
||||||
|
if self.gradual_latent:
|
||||||
|
if not hasattr(self.scheduler, "set_resized_size"):
|
||||||
|
print("gradual_latent is not supported for this scheduler. Ignoring.")
|
||||||
|
print(self.scheduler.__class__.__name__)
|
||||||
|
else:
|
||||||
|
enable_gradual_latent = True
|
||||||
|
current_ratio, start_timesteps, every_n_steps, ratio_step = self.gradual_latent
|
||||||
|
step_elapsed = 1000
|
||||||
|
|
||||||
|
# first, we downscale the latents to the specified ratio / 最初に指定された比率にlatentsをダウンスケールする
|
||||||
|
height, width = latents.shape[-2:]
|
||||||
|
org_dtype = latents.dtype
|
||||||
|
if org_dtype == torch.bfloat16:
|
||||||
|
latents = latents.float()
|
||||||
|
latents = torch.nn.functional.interpolate(
|
||||||
|
latents, scale_factor=current_ratio, mode="bicubic", align_corners=False
|
||||||
|
).to(org_dtype)
|
||||||
|
|
||||||
for i, t in enumerate(tqdm(timesteps)):
|
for i, t in enumerate(tqdm(timesteps)):
|
||||||
|
resized_size = None
|
||||||
|
if enable_gradual_latent:
|
||||||
|
# gradually upscale the latents / latentsを徐々にアップスケールする
|
||||||
|
if t < start_timesteps and current_ratio < 1.0 and step_elapsed >= every_n_steps:
|
||||||
|
print("upscale")
|
||||||
|
current_ratio = min(current_ratio + ratio_step, 1.0)
|
||||||
|
h = int(height * current_ratio) // 8 * 8 # make divisible by 8 because size of latents must be divisible at bottom of UNet
|
||||||
|
w = int(width * current_ratio) // 8 * 8
|
||||||
|
resized_size = (h, w)
|
||||||
|
self.scheduler.set_resized_size(resized_size)
|
||||||
|
step_elapsed = 0
|
||||||
|
else:
|
||||||
|
self.scheduler.set_resized_size(None)
|
||||||
|
step_elapsed += 1
|
||||||
|
|
||||||
# expand the latents if we are doing classifier free guidance
|
# expand the latents if we are doing classifier free guidance
|
||||||
latent_model_input = latents.repeat((num_latent_input, 1, 1, 1))
|
latent_model_input = latents.repeat((num_latent_input, 1, 1, 1))
|
||||||
latent_model_input = self.scheduler.scale_model_input(latent_model_input, t)
|
latent_model_input = self.scheduler.scale_model_input(latent_model_input, t)
|
||||||
@@ -775,6 +886,8 @@ class PipelineLike:
|
|||||||
if is_cancelled_callback is not None and is_cancelled_callback():
|
if is_cancelled_callback is not None and is_cancelled_callback():
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
i += 1
|
||||||
|
|
||||||
if return_latents:
|
if return_latents:
|
||||||
return latents
|
return latents
|
||||||
|
|
||||||
@@ -1306,6 +1419,133 @@ def handle_dynamic_prompt_variants(prompt, repeat_count):
|
|||||||
return prompts
|
return prompts
|
||||||
|
|
||||||
|
|
||||||
|
# endregion
|
||||||
|
|
||||||
|
# region Gradual Latent hires fix
|
||||||
|
|
||||||
|
import diffusers.schedulers.scheduling_euler_ancestral_discrete
|
||||||
|
from diffusers.schedulers.scheduling_euler_ancestral_discrete import EulerAncestralDiscreteSchedulerOutput
|
||||||
|
|
||||||
|
|
||||||
|
class EulerAncestralDiscreteSchedulerGL(EulerAncestralDiscreteScheduler):
|
||||||
|
def __init__(self, *args, **kwargs):
|
||||||
|
super().__init__(*args, **kwargs)
|
||||||
|
self.resized_size = None
|
||||||
|
|
||||||
|
def set_resized_size(self, size):
|
||||||
|
self.resized_size = size
|
||||||
|
|
||||||
|
def step(
|
||||||
|
self,
|
||||||
|
model_output: torch.FloatTensor,
|
||||||
|
timestep: Union[float, torch.FloatTensor],
|
||||||
|
sample: torch.FloatTensor,
|
||||||
|
generator: Optional[torch.Generator] = None,
|
||||||
|
return_dict: bool = True,
|
||||||
|
) -> Union[EulerAncestralDiscreteSchedulerOutput, Tuple]:
|
||||||
|
"""
|
||||||
|
Predict the sample from the previous timestep by reversing the SDE. This function propagates the diffusion
|
||||||
|
process from the learned model outputs (most often the predicted noise).
|
||||||
|
|
||||||
|
Args:
|
||||||
|
model_output (`torch.FloatTensor`):
|
||||||
|
The direct output from learned diffusion model.
|
||||||
|
timestep (`float`):
|
||||||
|
The current discrete timestep in the diffusion chain.
|
||||||
|
sample (`torch.FloatTensor`):
|
||||||
|
A current instance of a sample created by the diffusion process.
|
||||||
|
generator (`torch.Generator`, *optional*):
|
||||||
|
A random number generator.
|
||||||
|
return_dict (`bool`):
|
||||||
|
Whether or not to return a
|
||||||
|
[`~schedulers.scheduling_euler_ancestral_discrete.EulerAncestralDiscreteSchedulerOutput`] or tuple.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
[`~schedulers.scheduling_euler_ancestral_discrete.EulerAncestralDiscreteSchedulerOutput`] or `tuple`:
|
||||||
|
If return_dict is `True`,
|
||||||
|
[`~schedulers.scheduling_euler_ancestral_discrete.EulerAncestralDiscreteSchedulerOutput`] is returned,
|
||||||
|
otherwise a tuple is returned where the first element is the sample tensor.
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
if isinstance(timestep, int) or isinstance(timestep, torch.IntTensor) or isinstance(timestep, torch.LongTensor):
|
||||||
|
raise ValueError(
|
||||||
|
(
|
||||||
|
"Passing integer indices (e.g. from `enumerate(timesteps)`) as timesteps to"
|
||||||
|
" `EulerDiscreteScheduler.step()` is not supported. Make sure to pass"
|
||||||
|
" one of the `scheduler.timesteps` as a timestep."
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
if not self.is_scale_input_called:
|
||||||
|
logger.warning(
|
||||||
|
"The `scale_model_input` function should be called before `step` to ensure correct denoising. "
|
||||||
|
"See `StableDiffusionPipeline` for a usage example."
|
||||||
|
)
|
||||||
|
|
||||||
|
if self.step_index is None:
|
||||||
|
self._init_step_index(timestep)
|
||||||
|
|
||||||
|
sigma = self.sigmas[self.step_index]
|
||||||
|
|
||||||
|
# 1. compute predicted original sample (x_0) from sigma-scaled predicted noise
|
||||||
|
if self.config.prediction_type == "epsilon":
|
||||||
|
pred_original_sample = sample - sigma * model_output
|
||||||
|
elif self.config.prediction_type == "v_prediction":
|
||||||
|
# * c_out + input * c_skip
|
||||||
|
pred_original_sample = model_output * (-sigma / (sigma**2 + 1) ** 0.5) + (sample / (sigma**2 + 1))
|
||||||
|
elif self.config.prediction_type == "sample":
|
||||||
|
raise NotImplementedError("prediction_type not implemented yet: sample")
|
||||||
|
else:
|
||||||
|
raise ValueError(f"prediction_type given as {self.config.prediction_type} must be one of `epsilon`, or `v_prediction`")
|
||||||
|
|
||||||
|
sigma_from = self.sigmas[self.step_index]
|
||||||
|
sigma_to = self.sigmas[self.step_index + 1]
|
||||||
|
sigma_up = (sigma_to**2 * (sigma_from**2 - sigma_to**2) / sigma_from**2) ** 0.5
|
||||||
|
sigma_down = (sigma_to**2 - sigma_up**2) ** 0.5
|
||||||
|
|
||||||
|
# 2. Convert to an ODE derivative
|
||||||
|
derivative = (sample - pred_original_sample) / sigma
|
||||||
|
|
||||||
|
dt = sigma_down - sigma
|
||||||
|
|
||||||
|
prev_sample = sample + derivative * dt
|
||||||
|
|
||||||
|
device = model_output.device
|
||||||
|
if self.resized_size is None:
|
||||||
|
noise = diffusers.schedulers.scheduling_euler_ancestral_discrete.randn_tensor(
|
||||||
|
model_output.shape, dtype=model_output.dtype, device=device, generator=generator
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
print(
|
||||||
|
"resized_size", self.resized_size, "model_output.shape", model_output.shape, "prev_sample.shape", prev_sample.shape
|
||||||
|
)
|
||||||
|
org_dtype = prev_sample.dtype
|
||||||
|
if org_dtype == torch.bfloat16:
|
||||||
|
prev_sample = prev_sample.float()
|
||||||
|
|
||||||
|
prev_sample = torch.nn.functional.interpolate(
|
||||||
|
prev_sample.float(), size=self.resized_size, mode="bicubic", align_corners=False
|
||||||
|
).to(dtype=org_dtype)
|
||||||
|
|
||||||
|
noise = diffusers.schedulers.scheduling_euler_ancestral_discrete.randn_tensor(
|
||||||
|
(model_output.shape[0], model_output.shape[1], self.resized_size[0], self.resized_size[1]),
|
||||||
|
dtype=model_output.dtype,
|
||||||
|
device=device,
|
||||||
|
generator=generator,
|
||||||
|
)
|
||||||
|
|
||||||
|
prev_sample = prev_sample + noise * sigma_up
|
||||||
|
|
||||||
|
# upon completion increase step index by one
|
||||||
|
self._step_index += 1
|
||||||
|
|
||||||
|
if not return_dict:
|
||||||
|
return (prev_sample,)
|
||||||
|
|
||||||
|
return EulerAncestralDiscreteSchedulerOutput(prev_sample=prev_sample, pred_original_sample=pred_original_sample)
|
||||||
|
|
||||||
|
|
||||||
# endregion
|
# endregion
|
||||||
|
|
||||||
|
|
||||||
@@ -1407,7 +1647,7 @@ def main(args):
|
|||||||
scheduler_module = diffusers.schedulers.scheduling_euler_discrete
|
scheduler_module = diffusers.schedulers.scheduling_euler_discrete
|
||||||
has_clip_sample = False
|
has_clip_sample = False
|
||||||
elif args.sampler == "euler_a" or args.sampler == "k_euler_a":
|
elif args.sampler == "euler_a" or args.sampler == "k_euler_a":
|
||||||
scheduler_cls = EulerAncestralDiscreteScheduler
|
scheduler_cls = EulerAncestralDiscreteSchedulerGL
|
||||||
scheduler_module = diffusers.schedulers.scheduling_euler_ancestral_discrete
|
scheduler_module = diffusers.schedulers.scheduling_euler_ancestral_discrete
|
||||||
has_clip_sample = False
|
has_clip_sample = False
|
||||||
elif args.sampler == "dpmsolver" or args.sampler == "dpmsolver++":
|
elif args.sampler == "dpmsolver" or args.sampler == "dpmsolver++":
|
||||||
@@ -1700,6 +1940,16 @@ def main(args):
|
|||||||
if args.ds_depth_1 is not None:
|
if args.ds_depth_1 is not None:
|
||||||
unet.set_deep_shrink(args.ds_depth_1, args.ds_timesteps_1, args.ds_depth_2, args.ds_timesteps_2, args.ds_ratio)
|
unet.set_deep_shrink(args.ds_depth_1, args.ds_timesteps_1, args.ds_depth_2, args.ds_timesteps_2, args.ds_ratio)
|
||||||
|
|
||||||
|
# Gradual Latent
|
||||||
|
if args.gradual_latent_ratio is not None:
|
||||||
|
gradual_latent = (
|
||||||
|
args.gradual_latent_ratio,
|
||||||
|
args.gradual_latent_timesteps,
|
||||||
|
args.gradual_latent_every_n_steps,
|
||||||
|
args.gradual_latent_ratio_step,
|
||||||
|
)
|
||||||
|
pipe.set_gradual_latent(gradual_latent)
|
||||||
|
|
||||||
# Textual Inversionを処理する
|
# Textual Inversionを処理する
|
||||||
if args.textual_inversion_embeddings:
|
if args.textual_inversion_embeddings:
|
||||||
token_ids_embeds1 = []
|
token_ids_embeds1 = []
|
||||||
@@ -2297,6 +2547,12 @@ def main(args):
|
|||||||
ds_timesteps_2 = args.ds_timesteps_2
|
ds_timesteps_2 = args.ds_timesteps_2
|
||||||
ds_ratio = args.ds_ratio
|
ds_ratio = args.ds_ratio
|
||||||
|
|
||||||
|
# Gradual Latent
|
||||||
|
gl_timesteps = None # means no override
|
||||||
|
gl_ratio = args.gradual_latent_ratio
|
||||||
|
gl_every_n_steps = args.gradual_latent_every_n_steps
|
||||||
|
gl_ratio_step = args.gradual_latent_ratio_step
|
||||||
|
|
||||||
prompt_args = raw_prompt.strip().split(" --")
|
prompt_args = raw_prompt.strip().split(" --")
|
||||||
prompt = prompt_args[0]
|
prompt = prompt_args[0]
|
||||||
print(f"prompt {prompt_index+1}/{len(prompt_list)}: {prompt}")
|
print(f"prompt {prompt_index+1}/{len(prompt_list)}: {prompt}")
|
||||||
@@ -2439,6 +2695,34 @@ def main(args):
|
|||||||
print(f"deep shrink ratio: {ds_ratio}")
|
print(f"deep shrink ratio: {ds_ratio}")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
# Gradual Latent
|
||||||
|
m = re.match(r"glt ([\d\.]+)", parg, re.IGNORECASE)
|
||||||
|
if m: # gradual latent timesteps
|
||||||
|
gl_timesteps = int(m.group(1))
|
||||||
|
print(f"gradual latent timesteps: {gl_timesteps}")
|
||||||
|
continue
|
||||||
|
|
||||||
|
m = re.match(r"glr ([\d\.]+)", parg, re.IGNORECASE)
|
||||||
|
if m: # gradual latent ratio
|
||||||
|
gl_ratio = float(m.group(1))
|
||||||
|
gl_timesteps = gl_timesteps if gl_timesteps is not None else -1 # -1 means override
|
||||||
|
print(f"gradual latent ratio: {ds_ratio}")
|
||||||
|
continue
|
||||||
|
|
||||||
|
m = re.match(r"gle ([\d\.]+)", parg, re.IGNORECASE)
|
||||||
|
if m: # gradual latent every n steps
|
||||||
|
gl_every_n_steps = int(m.group(1))
|
||||||
|
gl_timesteps = gl_timesteps if gl_timesteps is not None else -1 # -1 means override
|
||||||
|
print(f"gradual latent every n steps: {gl_every_n_steps}")
|
||||||
|
continue
|
||||||
|
|
||||||
|
m = re.match(r"gls ([\d\.]+)", parg, re.IGNORECASE)
|
||||||
|
if m: # gradual latent ratio step
|
||||||
|
gl_ratio_step = float(m.group(1))
|
||||||
|
gl_timesteps = gl_timesteps if gl_timesteps is not None else -1 # -1 means override
|
||||||
|
print(f"gradual latent ratio step: {gl_ratio_step}")
|
||||||
|
continue
|
||||||
|
|
||||||
except ValueError as ex:
|
except ValueError as ex:
|
||||||
print(f"Exception in parsing / 解析エラー: {parg}")
|
print(f"Exception in parsing / 解析エラー: {parg}")
|
||||||
print(ex)
|
print(ex)
|
||||||
@@ -2449,6 +2733,12 @@ def main(args):
|
|||||||
ds_depth_1 = args.ds_depth_1 or 3
|
ds_depth_1 = args.ds_depth_1 or 3
|
||||||
unet.set_deep_shrink(ds_depth_1, ds_timesteps_1, ds_depth_2, ds_timesteps_2, ds_ratio)
|
unet.set_deep_shrink(ds_depth_1, ds_timesteps_1, ds_depth_2, ds_timesteps_2, ds_ratio)
|
||||||
|
|
||||||
|
# override Gradual Latent
|
||||||
|
if gl_ratio is not None:
|
||||||
|
if gl_timesteps is None:
|
||||||
|
gl_timesteps = args.gradual_latent_timesteps or 650
|
||||||
|
pipe.set_gradual_latent((gl_ratio, gl_timesteps, gl_every_n_steps, gl_ratio_step))
|
||||||
|
|
||||||
# prepare seed
|
# prepare seed
|
||||||
if seeds is not None: # given in prompt
|
if seeds is not None: # given in prompt
|
||||||
# 数が足りないなら前のをそのまま使う
|
# 数が足りないなら前のをそのまま使う
|
||||||
@@ -2811,6 +3101,32 @@ def setup_parser() -> argparse.ArgumentParser:
|
|||||||
"--ds_ratio", type=float, default=0.5, help="Deep Shrink ratio for downsampling / Deep Shrinkのdownsampling比率"
|
"--ds_ratio", type=float, default=0.5, help="Deep Shrink ratio for downsampling / Deep Shrinkのdownsampling比率"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# gradual latent
|
||||||
|
parser.add_argument(
|
||||||
|
"--gradual_latent_timesteps",
|
||||||
|
type=int,
|
||||||
|
default=None,
|
||||||
|
help="enable Gradual Latent hires fix and apply upscaling from this timesteps / Gradual Latent hires fixをこのtimestepsで有効にし、このtimestepsからアップスケーリングを適用する",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--gradual_latent_ratio",
|
||||||
|
type=float,
|
||||||
|
default=0.5,
|
||||||
|
help=" this size ratio, 0.5 means 1/2 / Gradual Latent hires fixをこのサイズ比率で有効にする、0.5は1/2を意味する",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--gradual_latent_ratio_step",
|
||||||
|
type=float,
|
||||||
|
default=0.125,
|
||||||
|
help="step to increase ratio for Gradual Latent / Gradual Latentのratioをどのくらいずつ上げるか",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--gradual_latent_every_n_steps",
|
||||||
|
type=int,
|
||||||
|
default=3,
|
||||||
|
help="steps to increase size of latents every this steps for Gradual Latent / Gradual Latentでlatentsのサイズをこのステップごとに上げる",
|
||||||
|
)
|
||||||
|
|
||||||
# # parser.add_argument(
|
# # parser.add_argument(
|
||||||
# "--control_net_image_path", type=str, default=None, nargs="*", help="image for ControlNet guidance / ControlNetでガイドに使う画像"
|
# "--control_net_image_path", type=str, default=None, nargs="*", help="image for ControlNet guidance / ControlNetでガイドに使う画像"
|
||||||
# )
|
# )
|
||||||
|
|||||||
Reference in New Issue
Block a user