Merge branch 'dev' into sd3

2026-04-08 22:35:09 +00:00 · 2024-09-07 10:59:22 +09:00
parent 2889108d85 62ec3e6424
commit ce144476cf
8 changed files with 53 additions and 22 deletions
--- a/library/train_util.py
+++ b/library/train_util.py
@@ -73,7 +73,7 @@ import library.model_util as model_util
 import library.huggingface_util as huggingface_util
 import library.sai_model_spec as sai_model_spec
 import library.deepspeed_utils as deepspeed_utils
-from library.utils import setup_logging
+from library.utils import setup_logging, pil_resize

 setup_logging()
 import logging
@@ -1708,7 +1708,7 @@ class DreamBoothDataset(BaseDataset):
        def load_dreambooth_dir(subset: DreamBoothSubset):
            if not os.path.isdir(subset.image_dir):
                logger.warning(f"not directory: {subset.image_dir}")
-                return [], []
+                return [], [], []

            info_cache_file = os.path.join(subset.image_dir, self.IMAGE_INFO_CACHE_FILE)
            use_cached_info_for_subset = subset.cache_info
@@ -2263,9 +2263,7 @@ class ControlNetDataset(BaseDataset):
                # ), f"image size is small / 画像サイズが小さいようです: {image_info.absolute_path}"
                # resize to target
                if cond_img.shape[0] != target_size_hw[0] or cond_img.shape[1] != target_size_hw[1]:
-                    cond_img = cv2.resize(
-                        cond_img, (int(target_size_hw[1]), int(target_size_hw[0])), interpolation=cv2.INTER_LANCZOS4
-                    )
+                    cond_img = pil_resize(cond_img, (int(target_size_hw[1]), int(target_size_hw[0])))

            if flipped:
                cond_img = cond_img[:, ::-1, :].copy()  # copy to avoid negative stride
@@ -2659,7 +2657,10 @@ def trim_and_resize_if_required(

    if image_width != resized_size[0] or image_height != resized_size[1]:
        # リサイズする
-        image = cv2.resize(image, resized_size, interpolation=cv2.INTER_AREA)  # INTER_AREAでやりたいのでcv2でリサイズ
+        if image_width > resized_size[0] and image_height > resized_size[1]:
+            image = cv2.resize(image, resized_size, interpolation=cv2.INTER_AREA)  # INTER_AREAでやりたいのでcv2でリサイズ
+        else:
+            image = pil_resize(image, resized_size)

    image_height, image_width = image.shape[0:2]

@@ -5657,7 +5658,7 @@ def sample_images_common(
    clean_memory_on_device(accelerator.device)

    torch.set_rng_state(rng_state)
-    if cuda_rng_state is not None:
+    if torch.cuda.is_available() and cuda_rng_state is not None:
        torch.cuda.set_rng_state(cuda_rng_state)
    vae.to(org_vae_device)

@@ -5691,11 +5692,13 @@ def sample_image_inference(

    if seed is not None:
        torch.manual_seed(seed)
-        torch.cuda.manual_seed(seed)
+        if torch.cuda.is_available():
+            torch.cuda.manual_seed(seed)
    else:
        # True random sample image generation
        torch.seed()
-        torch.cuda.seed()
+        if torch.cuda.is_available():
+            torch.cuda.seed()

    scheduler = get_my_scheduler(
        sample_sampler=sampler_name,
@@ -5730,8 +5733,9 @@ def sample_image_inference(
            controlnet_image=controlnet_image,
        )

-    with torch.cuda.device(torch.cuda.current_device()):
-        torch.cuda.empty_cache()
+    if torch.cuda.is_available():
+        with torch.cuda.device(torch.cuda.current_device()):
+            torch.cuda.empty_cache()

    image = pipeline.latents_to_image(latents)[0]

--- a/library/utils.py
+++ b/library/utils.py
@@ -10,6 +10,9 @@ from torchvision import transforms
 from diffusers import EulerAncestralDiscreteScheduler
 import diffusers.schedulers.scheduling_euler_ancestral_discrete
 from diffusers.schedulers.scheduling_euler_ancestral_discrete import EulerAncestralDiscreteSchedulerOutput
+import cv2
+from PIL import Image
+import numpy as np


 def fire_in_thread(f, *args, **kwargs):
@@ -301,6 +304,17 @@ class MemoryEfficientSafeOpen:
            # return byte_tensor.view(torch.uint8).to(torch.float16).reshape(shape)
            raise ValueError(f"Unsupported float8 type: {dtype_str} (upgrade PyTorch to support float8 types)")

+def pil_resize(image, size, interpolation=Image.LANCZOS):
+    pil_image = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
+
+    # use Pillow resize
+    resized_pil = pil_image.resize(size, interpolation)
+
+    # return cv2 image
+    resized_cv2 = cv2.cvtColor(np.array(resized_pil), cv2.COLOR_RGB2BGR)
+
+    return resized_cv2
+

 # TODO make inf_utils.py