use PIL for lanczos and box

2026-04-09 06:45:09 +00:00 · 2025-03-30 20:40:29 +09:00
parent 9e9a13aa8a
commit 1f432e2c0e
4 changed files with 22 additions and 8 deletions
--- a/docs/config_README-en.md
+++ b/docs/config_README-en.md
@@ -152,6 +152,7 @@ These options are related to subset configuration.
 | `keep_tokens_separator` | `“|||”` | o | o | o |
 | `secondary_separator` | `“;;;”` | o | o | o |
 | `enable_wildcard` | `true` | o | o | o |
 | `resize_interpolation` | (not specified) | o | o | o |
 * `num_repeats`
    * Specifies the number of repeats for images in a subset. This is equivalent to `--dataset_repeats` in fine-tuning but can be specified for any training method.
@@ -165,6 +166,8 @@ These options are related to subset configuration.
    * Specifies an additional separator. The part separated by this separator is treated as one tag and is shuffled and dropped. It is then replaced by `caption_separator`. For example, if you specify `aaa;;;bbb;;;ccc`, it will be replaced by `aaa,bbb,ccc` or dropped together.
 * `enable_wildcard`
    * Enables wildcard notation. This will be explained later.
 * `resize_interpolation`
    * Specifies the interpolation method used when resizing images. Normally, there is no need to specify this. The following options can be specified: `lanczos`, `nearest`, `bilinear`, `linear`, `bicubic`, `cubic`, `area`, `box`. By default (when not specified), `area` is used for downscaling, and `lanczos` is used for upscaling. If this option is specified, the same interpolation method will be used for both upscaling and downscaling. When `lanczos` or `box` is specified, PIL is used; for other options, OpenCV is used.
 ### DreamBooth-specific options
--- a/docs/config_README-ja.md
+++ b/docs/config_README-ja.md
@@ -144,6 +144,7 @@ DreamBooth の手法と fine tuning の手法の両方とも利用可能な学
 | `keep_tokens_separator` | `“|||”` | o | o | o |
 | `secondary_separator` | `“;;;”` | o | o | o |
 | `enable_wildcard` | `true` | o | o | o |
 | `resize_interpolation` |（通常は設定しません） | o | o | o |
 * `num_repeats`
    * サブセットの画像の繰り返し回数を指定します。fine tuning における `--dataset_repeats` に相当しますが、`num_repeats` はどの学習方法でも指定可能です。
@@ -162,6 +163,9 @@ DreamBooth の手法と fine tuning の手法の両方とも利用可能な学
 * `enable_wildcard`
    * ワイルドカード記法および複数行キャプションを有効にします。ワイルドカード記法、複数行キャプションについては後述します。
 * `resize_interpolation`
    * 画像のリサイズ時に使用する補間方法を指定します。通常は指定しなくて構いません。`lanczos`, `nearest`, `bilinear`, `linear`, `bicubic`, `cubic`, `area`, `box` が指定可能です。デフォルト（未指定時）は、縮小時は `area`、拡大時は `lanczos` になります。このオプションを指定すると、拡大時・縮小時とも同じ補間方法が使用されます。`lanczos`、`box`を指定するとPILが、それ以外を指定するとOpenCVが使用されます。
 ### DreamBooth 方式専用のオプション
 DreamBooth 方式のオプションは、サブセット向けオプションのみ存在します。
--- a/library/train_util.py
+++ b/library/train_util.py
@@ -74,7 +74,7 @@ import library.model_util as model_util
 import library.huggingface_util as huggingface_util
 import library.sai_model_spec as sai_model_spec
 import library.deepspeed_utils as deepspeed_utils
-from library.utils import setup_logging, resize_image
+from library.utils import setup_logging, resize_image, validate_interpolation_fn
 setup_logging()
 import logging
--- a/library/utils.py
+++ b/library/utils.py
@@ -400,7 +400,7 @@ def pil_resize(image, size, interpolation):
 def resize_image(image: np.ndarray, width: int, height: int, resized_width: int, resized_height: int, resize_interpolation: Optional[str] = None):
    """
-    Resize image with resize interpolation. Default interpolation to AREA if image is smaller, else LANCZOS
+    Resize image with resize interpolation. Default interpolation to AREA if image is smaller, else LANCZOS.
    Args:
        image: numpy.ndarray
@@ -413,14 +413,21 @@ def resize_image(image: np.ndarray, width: int, height: int, resized_width: int,
    Returns:
        image
    """
-    interpolation = get_cv2_interpolation(resize_interpolation)
+    if resize_interpolation is None:
        resize_interpolation = "lanczos" if width > resized_width and height > resized_height else "area"
    # we use PIL for lanczos (for backward compatibility) and box, cv2 for others
    use_pil = resize_interpolation in ["lanczos", "lanczos4", "box"]
    resized_size = (resized_width, resized_height)
-    if width > resized_width and height > resized_width:
+    if use_pil:
-        image = cv2.resize(image, resized_size, interpolation=interpolation if interpolation is not None else cv2.INTER_AREA)  # INTER_AREAでやりたいのでcv2でリサイズ
+        interpolation = get_pil_interpolation(resize_interpolation)
-        logger.debug(f"resize image using {resize_interpolation}")
+        image = pil_resize(image, resized_size, interpolation=interpolation)
        logger.debug(f"resize image using {resize_interpolation} (PIL)")
    else:
-        image = cv2.resize(image, resized_size, interpolation=interpolation if interpolation is not None else cv2.INTER_LANCZOS4)  # INTER_AREAでやりたいのでcv2でリサイズ
+        interpolation = get_cv2_interpolation(resize_interpolation)
-        logger.debug(f"resize image using {resize_interpolation}")
+        image = cv2.resize(image, resized_size, interpolation=interpolation)
        logger.debug(f"resize image using {resize_interpolation} (cv2)")
    return image