From 5a1a14f9fc096ad5c2c7cdfa15d0e23320f69ab1 Mon Sep 17 00:00:00 2001 From: TingTingin <36141041+TingTingin@users.noreply.github.com> Date: Tue, 23 May 2023 01:57:35 -0400 Subject: [PATCH 1/4] Update train_util.py Added feature to add "." if missing in caption_extension Added warning on training without captions --- library/train_util.py | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/library/train_util.py b/library/train_util.py index 41afc13b..05ec7f84 100644 --- a/library/train_util.py +++ b/library/train_util.py @@ -348,6 +348,8 @@ class DreamBoothSubset(BaseSubset): self.is_reg = is_reg self.class_tokens = class_tokens self.caption_extension = caption_extension + if self.caption_extension and not self.caption_extension.startswith("."): + self.caption_extension = "." + self.caption_extension def __eq__(self, other) -> bool: if not isinstance(other, DreamBoothSubset): @@ -1069,7 +1071,7 @@ class DreamBoothDataset(BaseDataset): assert len(lines) > 0, f"caption file is empty / キャプションファイルが空です: {cap_path}" caption = lines[0].strip() break - return caption + return caption def load_dreambooth_dir(subset: DreamBoothSubset): if not os.path.isdir(subset.image_dir): @@ -1081,16 +1083,33 @@ class DreamBoothDataset(BaseDataset): # 画像ファイルごとにプロンプトを読み込み、もしあればそちらを使う captions = [] + missing_captions = [] for img_path in img_paths: cap_for_img = read_caption(img_path, subset.caption_extension) if cap_for_img is None and subset.class_tokens is None: print(f"neither caption file nor class tokens are found. use empty caption for {img_path}") captions.append("") else: - captions.append(subset.class_tokens if cap_for_img is None else cap_for_img) + if cap_for_img is None: + captions.append(subset.class_tokens) + missing_captions.append(img_path) + else: + captions.append(cap_for_img) self.set_tag_frequency(os.path.basename(subset.image_dir), captions) # タグ頻度を記録 + if missing_captions: + number_of_missing_captions = len(missing_captions) + number_of_missing_captions_to_show = 5 + remaining_missing_captions = number_of_missing_captions - number_of_missing_captions_to_show + + print(f"No caption file found for {number_of_missing_captions} images. Training will continue without captions for these images") + for i, missing_caption in enumerate(missing_captions): + if i >= number_of_missing_captions_to_show: + print(missing_caption+f"... and {remaining_missing_captions} more") + break + print(missing_caption) + time.sleep(5) return img_paths, captions print("prepare images.") From d859a3a9259dd04d03a41816e17c8fd8bb0189ee Mon Sep 17 00:00:00 2001 From: TingTingin <36141041+TingTingin@users.noreply.github.com> Date: Tue, 23 May 2023 02:00:33 -0400 Subject: [PATCH 2/4] Update train_util.py fix mistake --- library/train_util.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/library/train_util.py b/library/train_util.py index 05ec7f84..576fc5d8 100644 --- a/library/train_util.py +++ b/library/train_util.py @@ -1071,7 +1071,7 @@ class DreamBoothDataset(BaseDataset): assert len(lines) > 0, f"caption file is empty / キャプションファイルが空です: {cap_path}" caption = lines[0].strip() break - return caption + return caption def load_dreambooth_dir(subset: DreamBoothSubset): if not os.path.isdir(subset.image_dir): From 061e1571910d704e92e610bc48f90d9f21996afe Mon Sep 17 00:00:00 2001 From: TingTingin <36141041+TingTingin@users.noreply.github.com> Date: Tue, 23 May 2023 02:02:39 -0400 Subject: [PATCH 3/4] Update train_util.py --- library/train_util.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/library/train_util.py b/library/train_util.py index 576fc5d8..55eeb316 100644 --- a/library/train_util.py +++ b/library/train_util.py @@ -1071,7 +1071,7 @@ class DreamBoothDataset(BaseDataset): assert len(lines) > 0, f"caption file is empty / キャプションファイルが空です: {cap_path}" caption = lines[0].strip() break - return caption + return caption def load_dreambooth_dir(subset: DreamBoothSubset): if not os.path.isdir(subset.image_dir): From db756e9a343bcbc0efb2de077520233b70b5810a Mon Sep 17 00:00:00 2001 From: TingTingin <36141041+TingTingin@users.noreply.github.com> Date: Fri, 26 May 2023 08:08:34 -0400 Subject: [PATCH 4/4] Update train_util.py I removed the sleep since it triggers per subset and if someone had a lot of subsets it would trigger multiple times --- library/train_util.py | 1 - 1 file changed, 1 deletion(-) diff --git a/library/train_util.py b/library/train_util.py index 55eeb316..09e6a366 100644 --- a/library/train_util.py +++ b/library/train_util.py @@ -1109,7 +1109,6 @@ class DreamBoothDataset(BaseDataset): print(missing_caption+f"... and {remaining_missing_captions} more") break print(missing_caption) - time.sleep(5) return img_paths, captions print("prepare images.")