From 89ad69b6a0d35791627cb58630a711befc6bb3b5 Mon Sep 17 00:00:00 2001 From: gesen2egee <79357052+gesen2egee@users.noreply.github.com> Date: Thu, 11 Apr 2024 08:42:31 +0800 Subject: [PATCH] Update train_util.py --- library/train_util.py | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/library/train_util.py b/library/train_util.py index b143e85a..8bf6823b 100644 --- a/library/train_util.py +++ b/library/train_util.py @@ -1511,17 +1511,6 @@ class DreamBoothDataset(BaseDataset): logger.warning(f"not directory: {subset.image_dir}") return [], [] - img_paths = glob_images(subset.image_dir, "*") - if self.validation_split > 0.0: - img_paths = split_train_val(img_paths, self.is_train, self.validation_split, self.validation_seed) - logger.info(f"found directory {subset.image_dir} contains {len(img_paths)} image files") - - # 画像ファイルごとにプロンプトを読み込み、もしあればそちらを使う - captions = [] - missing_captions = [] - for img_path in img_paths: - cap_for_img = read_caption(img_path, subset.caption_extension) - if cap_for_img is None and subset.class_tokens is None: info_cache_file = os.path.join(subset.image_dir, self.IMAGE_INFO_CACHE_FILE) use_cached_info_for_subset = subset.cache_info if use_cached_info_for_subset: @@ -1545,6 +1534,8 @@ class DreamBoothDataset(BaseDataset): # we may need to check image size and existence of image files, but it takes time, so user should check it before training else: img_paths = glob_images(subset.image_dir, "*") + if self.validation_split > 0.0: + img_paths = split_train_val(img_paths, self.is_train, self.validation_split, self.validation_seed) sizes = [None] * len(img_paths) logger.info(f"found directory {subset.image_dir} contains {len(img_paths)} image files")