mirror of
https://github.com/kohya-ss/sd-scripts.git
synced 2026-04-08 06:28:48 +00:00
feat: add guidance rescale options for Adaptive Projected Guidance in inference
This commit is contained in:
@@ -454,6 +454,9 @@ python hunyuan_image_minimal_inference.py \
|
|||||||
- `--flow_shift`: Flow matching shift parameter (default: 5.0)
|
- `--flow_shift`: Flow matching shift parameter (default: 5.0)
|
||||||
- `--text_encoder_cpu`: Run the text encoders on CPU to reduce VRAM usage
|
- `--text_encoder_cpu`: Run the text encoders on CPU to reduce VRAM usage
|
||||||
- `--vae_chunk_size`: Chunk size for VAE decoding to reduce memory usage (default: None, no chunking). 16 is recommended if enabled.
|
- `--vae_chunk_size`: Chunk size for VAE decoding to reduce memory usage (default: None, no chunking). 16 is recommended if enabled.
|
||||||
|
- `--apg_start_step_general` and `--apg_start_step_ocr`: Start steps for APG (Adaptive Projected Guidance) if using APG during inference. `5` and `38` are the official recommended values for 50 steps. If this value exceeds `--infer_steps`, APG will not be applied.
|
||||||
|
- `--guidance_rescale`: Rescales the guidance for steps before APG starts. Default is `0.0` (no rescaling). If you use this option, a value around `0.5` might be good starting point.
|
||||||
|
- `--guidance_rescale_apg`: Rescales the guidance for APG. Default is `0.0` (no rescaling). This option doesn't seem to have a large effect, but if you use it, a value around `0.5` might be a good starting point.
|
||||||
|
|
||||||
`--split_attn` is not supported (since inference is done one at a time). `--fp8_vl` is not supported, please use CPU for the text encoder if VRAM is insufficient.
|
`--split_attn` is not supported (since inference is done one at a time). `--fp8_vl` is not supported, please use CPU for the text encoder if VRAM is insufficient.
|
||||||
|
|
||||||
@@ -470,6 +473,9 @@ python hunyuan_image_minimal_inference.py \
|
|||||||
- `--flow_shift`: Flow Matchingシフトパラメータ(デフォルト: 5.0)
|
- `--flow_shift`: Flow Matchingシフトパラメータ(デフォルト: 5.0)
|
||||||
- `--text_encoder_cpu`: テキストエンコーダをCPUで実行してVRAM使用量削減
|
- `--text_encoder_cpu`: テキストエンコーダをCPUで実行してVRAM使用量削減
|
||||||
- `--vae_chunk_size`: VAEデコーディングのチャンクサイズ(デフォルト: None、チャンク処理なし)。有効にする場合は16を推奨。
|
- `--vae_chunk_size`: VAEデコーディングのチャンクサイズ(デフォルト: None、チャンク処理なし)。有効にする場合は16を推奨。
|
||||||
|
- `--apg_start_step_general` と `--apg_start_step_ocr`: 推論中にAPGを使用する場合の開始ステップ。50ステップの場合、公式推奨値はそれぞれ5と38です。この値が`--infer_steps`を超えると、APGは適用されません。
|
||||||
|
- `--guidance_rescale`: APG開始前のステップに対するガイダンスのリスケーリング。デフォルトは0.0(リスケーリングなし)。使用する場合、0.5程度から始めて調整してください。
|
||||||
|
- `--guidance_rescale_apg`: APGに対するガイダンスのリスケーリング。デフォルトは0.0(リスケーリングなし)。このオプションは大きな効果はないようですが、使用する場合は0.5程度から始めて調整してください。
|
||||||
|
|
||||||
`--split_attn`はサポートされていません(1件ずつ推論するため)。`--fp8_vl`もサポートされていません。VRAMが不足する場合はテキストエンコーダをCPUで実行してください。
|
`--split_attn`はサポートされていません(1件ずつ推論するため)。`--fp8_vl`もサポートされていません。VRAMが不足する場合はテキストエンコーダをCPUで実行してください。
|
||||||
|
|
||||||
|
|||||||
@@ -85,7 +85,13 @@ def parse_args() -> argparse.Namespace:
|
|||||||
"--guidance_rescale",
|
"--guidance_rescale",
|
||||||
type=float,
|
type=float,
|
||||||
default=0.0,
|
default=0.0,
|
||||||
help="Guidance rescale factor for steps without APG, 0.0 to 1.0. Default is 0.0 (no rescale)."
|
help="Guidance rescale factor for steps without APG, 0.0 to 1.0. Default is 0.0 (no rescale).",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--guidance_rescale_apg",
|
||||||
|
type=float,
|
||||||
|
default=0.0,
|
||||||
|
help="Guidance rescale factor for steps with APG, 0.0 to 1.0. Default is 0.0 (no rescale).",
|
||||||
)
|
)
|
||||||
parser.add_argument("--prompt", type=str, default=None, help="prompt for generation")
|
parser.add_argument("--prompt", type=str, default=None, help="prompt for generation")
|
||||||
parser.add_argument("--negative_prompt", type=str, default="", help="negative prompt for generation, default is empty string")
|
parser.add_argument("--negative_prompt", type=str, default="", help="negative prompt for generation, default is empty string")
|
||||||
@@ -695,10 +701,18 @@ def generate_body(
|
|||||||
|
|
||||||
# Prepare Guider
|
# Prepare Guider
|
||||||
cfg_guider_ocr = hunyuan_image_utils.AdaptiveProjectedGuidance(
|
cfg_guider_ocr = hunyuan_image_utils.AdaptiveProjectedGuidance(
|
||||||
guidance_scale=10.0, eta=0.0, adaptive_projected_guidance_rescale=10.0, adaptive_projected_guidance_momentum=-0.5
|
guidance_scale=10.0,
|
||||||
|
eta=0.0,
|
||||||
|
adaptive_projected_guidance_rescale=10.0,
|
||||||
|
adaptive_projected_guidance_momentum=-0.5,
|
||||||
|
guidance_rescale=args.guidance_rescale_apg,
|
||||||
)
|
)
|
||||||
cfg_guider_general = hunyuan_image_utils.AdaptiveProjectedGuidance(
|
cfg_guider_general = hunyuan_image_utils.AdaptiveProjectedGuidance(
|
||||||
guidance_scale=10.0, eta=0.0, adaptive_projected_guidance_rescale=10.0, adaptive_projected_guidance_momentum=-0.5
|
guidance_scale=10.0,
|
||||||
|
eta=0.0,
|
||||||
|
adaptive_projected_guidance_rescale=10.0,
|
||||||
|
adaptive_projected_guidance_momentum=-0.5,
|
||||||
|
guidance_rescale=args.guidance_rescale_apg,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Denoising loop
|
# Denoising loop
|
||||||
|
|||||||
@@ -401,8 +401,6 @@ class AdaptiveProjectedGuidance:
|
|||||||
guidance_rescale: float = 0.0,
|
guidance_rescale: float = 0.0,
|
||||||
use_original_formulation: bool = False,
|
use_original_formulation: bool = False,
|
||||||
):
|
):
|
||||||
assert guidance_rescale == 0.0, "guidance_rescale > 0.0 not supported."
|
|
||||||
|
|
||||||
self.guidance_scale = guidance_scale
|
self.guidance_scale = guidance_scale
|
||||||
self.adaptive_projected_guidance_momentum = adaptive_projected_guidance_momentum
|
self.adaptive_projected_guidance_momentum = adaptive_projected_guidance_momentum
|
||||||
self.adaptive_projected_guidance_rescale = adaptive_projected_guidance_rescale
|
self.adaptive_projected_guidance_rescale = adaptive_projected_guidance_rescale
|
||||||
@@ -425,6 +423,10 @@ class AdaptiveProjectedGuidance:
|
|||||||
self.use_original_formulation,
|
self.use_original_formulation,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if self.guidance_rescale > 0.0:
|
||||||
|
print(f"Applying guidance rescale with factor {self.guidance_rescale} at step {step}")
|
||||||
|
pred = rescale_noise_cfg(pred, pred_cond, self.guidance_rescale)
|
||||||
|
|
||||||
return pred
|
return pred
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user