work cn load and validation

2026-04-10 06:54:17 +00:00 · 2024-11-18 12:47:01 +00:00
parent 35778f0218
commit 4dd4cd6ec8
4 changed files with 37 additions and 32 deletions
--- a/library/flux_train_utils.py
+++ b/library/flux_train_utils.py
@@ -175,10 +175,6 @@ def sample_image_inference(

    # if negative_prompt is None:
    #     negative_prompt = ""
-    if controlnet_image is not None:
-        controlnet_image = Image.open(controlnet_image).convert("RGB")
-        controlnet_image = controlnet_image.resize((width, height), Image.LANCZOS)
-
    height = max(64, height - height % 16)  # round to divisible by 16
    width = max(64, width - width % 16)  # round to divisible by 16
    logger.info(f"prompt: {prompt}")
@@ -232,6 +228,12 @@ def sample_image_inference(
    img_ids = flux_utils.prepare_img_ids(1, packed_latent_height, packed_latent_width).to(accelerator.device, weight_dtype)
    t5_attn_mask = t5_attn_mask.to(accelerator.device) if args.apply_t5_attn_mask else None

+    if controlnet_image is not None:
+        controlnet_image = Image.open(controlnet_image).convert("RGB")
+        controlnet_image = controlnet_image.resize((width, height), Image.LANCZOS)
+        controlnet_image = torch.from_numpy((np.array(controlnet_image) / 127.5) - 1)
+        controlnet_image = controlnet_image.permute(2, 0, 1).unsqueeze(0).to(weight_dtype).to(accelerator.device)
+
    with accelerator.autocast(), torch.no_grad():
        x = denoise(flux, noise, img_ids, t5_out, txt_ids, l_pooled, timesteps=timesteps, guidance=scale, t5_attn_mask=t5_attn_mask, controlnet=controlnet, controlnet_img=controlnet_image)

@@ -315,6 +317,8 @@ def denoise(
 ):
    # this is ignored for schnell
    guidance_vec = torch.full((img.shape[0],), guidance, device=img.device, dtype=img.dtype)
+
+
    for t_curr, t_prev in zip(tqdm(timesteps[:-1]), timesteps[1:]):
        t_vec = torch.full((img.shape[0],), t_curr, dtype=img.dtype, device=img.device)
        model.prepare_block_swap_before_forward()
@@ -560,6 +564,12 @@ def add_flux_train_arguments(parser: argparse.ArgumentParser):
        help="path to t5xxl (*.sft or *.safetensors), should be float16 / t5xxlのパス（*.sftまたは*.safetensors）、float16が前提",
    )
    parser.add_argument("--ae", type=str, help="path to ae (*.sft or *.safetensors) / aeのパス（*.sftまたは*.safetensors）")
+    parser.add_argument(
+        "--controlnet",
+        type=str,
+        default=None,
+        help="path to controlnet (*.sft or *.safetensors) / aeのパス（*.sftまたは*.safetensors）"
+    )
    parser.add_argument(
        "--t5xxl_max_token_length",
        type=int,