mirror of
https://github.com/kohya-ss/sd-scripts.git
synced 2026-04-08 06:28:48 +00:00
feat: add guidance rescale options for Adaptive Projected Guidance in inference
This commit is contained in:
@@ -454,6 +454,9 @@ python hunyuan_image_minimal_inference.py \
|
||||
- `--flow_shift`: Flow matching shift parameter (default: 5.0)
|
||||
- `--text_encoder_cpu`: Run the text encoders on CPU to reduce VRAM usage
|
||||
- `--vae_chunk_size`: Chunk size for VAE decoding to reduce memory usage (default: None, no chunking). 16 is recommended if enabled.
|
||||
- `--apg_start_step_general` and `--apg_start_step_ocr`: Start steps for APG (Adaptive Projected Guidance) if using APG during inference. `5` and `38` are the official recommended values for 50 steps. If this value exceeds `--infer_steps`, APG will not be applied.
|
||||
- `--guidance_rescale`: Rescales the guidance for steps before APG starts. Default is `0.0` (no rescaling). If you use this option, a value around `0.5` might be good starting point.
|
||||
- `--guidance_rescale_apg`: Rescales the guidance for APG. Default is `0.0` (no rescaling). This option doesn't seem to have a large effect, but if you use it, a value around `0.5` might be a good starting point.
|
||||
|
||||
`--split_attn` is not supported (since inference is done one at a time). `--fp8_vl` is not supported, please use CPU for the text encoder if VRAM is insufficient.
|
||||
|
||||
@@ -470,6 +473,9 @@ python hunyuan_image_minimal_inference.py \
|
||||
- `--flow_shift`: Flow Matchingシフトパラメータ(デフォルト: 5.0)
|
||||
- `--text_encoder_cpu`: テキストエンコーダをCPUで実行してVRAM使用量削減
|
||||
- `--vae_chunk_size`: VAEデコーディングのチャンクサイズ(デフォルト: None、チャンク処理なし)。有効にする場合は16を推奨。
|
||||
- `--apg_start_step_general` と `--apg_start_step_ocr`: 推論中にAPGを使用する場合の開始ステップ。50ステップの場合、公式推奨値はそれぞれ5と38です。この値が`--infer_steps`を超えると、APGは適用されません。
|
||||
- `--guidance_rescale`: APG開始前のステップに対するガイダンスのリスケーリング。デフォルトは0.0(リスケーリングなし)。使用する場合、0.5程度から始めて調整してください。
|
||||
- `--guidance_rescale_apg`: APGに対するガイダンスのリスケーリング。デフォルトは0.0(リスケーリングなし)。このオプションは大きな効果はないようですが、使用する場合は0.5程度から始めて調整してください。
|
||||
|
||||
`--split_attn`はサポートされていません(1件ずつ推論するため)。`--fp8_vl`もサポートされていません。VRAMが不足する場合はテキストエンコーダをCPUで実行してください。
|
||||
|
||||
|
||||
@@ -85,7 +85,13 @@ def parse_args() -> argparse.Namespace:
|
||||
"--guidance_rescale",
|
||||
type=float,
|
||||
default=0.0,
|
||||
help="Guidance rescale factor for steps without APG, 0.0 to 1.0. Default is 0.0 (no rescale)."
|
||||
help="Guidance rescale factor for steps without APG, 0.0 to 1.0. Default is 0.0 (no rescale).",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--guidance_rescale_apg",
|
||||
type=float,
|
||||
default=0.0,
|
||||
help="Guidance rescale factor for steps with APG, 0.0 to 1.0. Default is 0.0 (no rescale).",
|
||||
)
|
||||
parser.add_argument("--prompt", type=str, default=None, help="prompt for generation")
|
||||
parser.add_argument("--negative_prompt", type=str, default="", help="negative prompt for generation, default is empty string")
|
||||
@@ -695,10 +701,18 @@ def generate_body(
|
||||
|
||||
# Prepare Guider
|
||||
cfg_guider_ocr = hunyuan_image_utils.AdaptiveProjectedGuidance(
|
||||
guidance_scale=10.0, eta=0.0, adaptive_projected_guidance_rescale=10.0, adaptive_projected_guidance_momentum=-0.5
|
||||
guidance_scale=10.0,
|
||||
eta=0.0,
|
||||
adaptive_projected_guidance_rescale=10.0,
|
||||
adaptive_projected_guidance_momentum=-0.5,
|
||||
guidance_rescale=args.guidance_rescale_apg,
|
||||
)
|
||||
cfg_guider_general = hunyuan_image_utils.AdaptiveProjectedGuidance(
|
||||
guidance_scale=10.0, eta=0.0, adaptive_projected_guidance_rescale=10.0, adaptive_projected_guidance_momentum=-0.5
|
||||
guidance_scale=10.0,
|
||||
eta=0.0,
|
||||
adaptive_projected_guidance_rescale=10.0,
|
||||
adaptive_projected_guidance_momentum=-0.5,
|
||||
guidance_rescale=args.guidance_rescale_apg,
|
||||
)
|
||||
|
||||
# Denoising loop
|
||||
|
||||
@@ -401,8 +401,6 @@ class AdaptiveProjectedGuidance:
|
||||
guidance_rescale: float = 0.0,
|
||||
use_original_formulation: bool = False,
|
||||
):
|
||||
assert guidance_rescale == 0.0, "guidance_rescale > 0.0 not supported."
|
||||
|
||||
self.guidance_scale = guidance_scale
|
||||
self.adaptive_projected_guidance_momentum = adaptive_projected_guidance_momentum
|
||||
self.adaptive_projected_guidance_rescale = adaptive_projected_guidance_rescale
|
||||
@@ -425,6 +423,10 @@ class AdaptiveProjectedGuidance:
|
||||
self.use_original_formulation,
|
||||
)
|
||||
|
||||
if self.guidance_rescale > 0.0:
|
||||
print(f"Applying guidance rescale with factor {self.guidance_rescale} at step {step}")
|
||||
pred = rescale_noise_cfg(pred, pred_cond, self.guidance_rescale)
|
||||
|
||||
return pred
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user