diff --git a/docs/config_README-en.md b/docs/config_README-en.md index 66a50dc0..8c55903d 100644 --- a/docs/config_README-en.md +++ b/docs/config_README-en.md @@ -152,6 +152,7 @@ These options are related to subset configuration. | `keep_tokens_separator` | `“|||”` | o | o | o | | `secondary_separator` | `“;;;”` | o | o | o | | `enable_wildcard` | `true` | o | o | o | +| `resize_interpolation` | (not specified) | o | o | o | * `num_repeats` * Specifies the number of repeats for images in a subset. This is equivalent to `--dataset_repeats` in fine-tuning but can be specified for any training method. @@ -165,6 +166,8 @@ These options are related to subset configuration. * Specifies an additional separator. The part separated by this separator is treated as one tag and is shuffled and dropped. It is then replaced by `caption_separator`. For example, if you specify `aaa;;;bbb;;;ccc`, it will be replaced by `aaa,bbb,ccc` or dropped together. * `enable_wildcard` * Enables wildcard notation. This will be explained later. +* `resize_interpolation` + * Specifies the interpolation method used when resizing images. Normally, there is no need to specify this. The following options can be specified: `lanczos`, `nearest`, `bilinear`, `linear`, `bicubic`, `cubic`, `area`, `box`. By default (when not specified), `area` is used for downscaling, and `lanczos` is used for upscaling. If this option is specified, the same interpolation method will be used for both upscaling and downscaling. When `lanczos` or `box` is specified, PIL is used; for other options, OpenCV is used. ### DreamBooth-specific options diff --git a/docs/config_README-ja.md b/docs/config_README-ja.md index 0ed95e0e..aec0eca5 100644 --- a/docs/config_README-ja.md +++ b/docs/config_README-ja.md @@ -144,6 +144,7 @@ DreamBooth の手法と fine tuning の手法の両方とも利用可能な学 | `keep_tokens_separator` | `“|||”` | o | o | o | | `secondary_separator` | `“;;;”` | o | o | o | | `enable_wildcard` | `true` | o | o | o | +| `resize_interpolation` |(通常は設定しません) | o | o | o | * `num_repeats` * サブセットの画像の繰り返し回数を指定します。fine tuning における `--dataset_repeats` に相当しますが、`num_repeats` はどの学習方法でも指定可能です。 @@ -162,6 +163,9 @@ DreamBooth の手法と fine tuning の手法の両方とも利用可能な学 * `enable_wildcard` * ワイルドカード記法および複数行キャプションを有効にします。ワイルドカード記法、複数行キャプションについては後述します。 +* `resize_interpolation` + * 画像のリサイズ時に使用する補間方法を指定します。通常は指定しなくて構いません。`lanczos`, `nearest`, `bilinear`, `linear`, `bicubic`, `cubic`, `area`, `box` が指定可能です。デフォルト(未指定時)は、縮小時は `area`、拡大時は `lanczos` になります。このオプションを指定すると、拡大時・縮小時とも同じ補間方法が使用されます。`lanczos`、`box`を指定するとPILが、それ以外を指定するとOpenCVが使用されます。 + ### DreamBooth 方式専用のオプション DreamBooth 方式のオプションは、サブセット向けオプションのみ存在します。 diff --git a/library/train_util.py b/library/train_util.py index e9c50688..1ed1d3c2 100644 --- a/library/train_util.py +++ b/library/train_util.py @@ -74,7 +74,7 @@ import library.model_util as model_util import library.huggingface_util as huggingface_util import library.sai_model_spec as sai_model_spec import library.deepspeed_utils as deepspeed_utils -from library.utils import setup_logging, resize_image +from library.utils import setup_logging, resize_image, validate_interpolation_fn setup_logging() import logging diff --git a/library/utils.py b/library/utils.py index 4fbc2627..0f535a87 100644 --- a/library/utils.py +++ b/library/utils.py @@ -400,7 +400,7 @@ def pil_resize(image, size, interpolation): def resize_image(image: np.ndarray, width: int, height: int, resized_width: int, resized_height: int, resize_interpolation: Optional[str] = None): """ - Resize image with resize interpolation. Default interpolation to AREA if image is smaller, else LANCZOS + Resize image with resize interpolation. Default interpolation to AREA if image is smaller, else LANCZOS. Args: image: numpy.ndarray @@ -413,14 +413,21 @@ def resize_image(image: np.ndarray, width: int, height: int, resized_width: int, Returns: image """ - interpolation = get_cv2_interpolation(resize_interpolation) + if resize_interpolation is None: + resize_interpolation = "lanczos" if width > resized_width and height > resized_height else "area" + + # we use PIL for lanczos (for backward compatibility) and box, cv2 for others + use_pil = resize_interpolation in ["lanczos", "lanczos4", "box"] + resized_size = (resized_width, resized_height) - if width > resized_width and height > resized_width: - image = cv2.resize(image, resized_size, interpolation=interpolation if interpolation is not None else cv2.INTER_AREA) # INTER_AREAでやりたいのでcv2でリサイズ - logger.debug(f"resize image using {resize_interpolation}") + if use_pil: + interpolation = get_pil_interpolation(resize_interpolation) + image = pil_resize(image, resized_size, interpolation=interpolation) + logger.debug(f"resize image using {resize_interpolation} (PIL)") else: - image = cv2.resize(image, resized_size, interpolation=interpolation if interpolation is not None else cv2.INTER_LANCZOS4) # INTER_AREAでやりたいのでcv2でリサイズ - logger.debug(f"resize image using {resize_interpolation}") + interpolation = get_cv2_interpolation(resize_interpolation) + image = cv2.resize(image, resized_size, interpolation=interpolation) + logger.debug(f"resize image using {resize_interpolation} (cv2)") return image