mirror of
https://github.com/kohya-ss/sd-scripts.git
synced 2026-04-08 22:35:09 +00:00
Merge pull request #533 from TingTingin/main
Added warning on training without captions
This commit is contained in:
@@ -348,6 +348,8 @@ class DreamBoothSubset(BaseSubset):
|
|||||||
self.is_reg = is_reg
|
self.is_reg = is_reg
|
||||||
self.class_tokens = class_tokens
|
self.class_tokens = class_tokens
|
||||||
self.caption_extension = caption_extension
|
self.caption_extension = caption_extension
|
||||||
|
if self.caption_extension and not self.caption_extension.startswith("."):
|
||||||
|
self.caption_extension = "." + self.caption_extension
|
||||||
|
|
||||||
def __eq__(self, other) -> bool:
|
def __eq__(self, other) -> bool:
|
||||||
if not isinstance(other, DreamBoothSubset):
|
if not isinstance(other, DreamBoothSubset):
|
||||||
@@ -1081,16 +1083,32 @@ class DreamBoothDataset(BaseDataset):
|
|||||||
|
|
||||||
# 画像ファイルごとにプロンプトを読み込み、もしあればそちらを使う
|
# 画像ファイルごとにプロンプトを読み込み、もしあればそちらを使う
|
||||||
captions = []
|
captions = []
|
||||||
|
missing_captions = []
|
||||||
for img_path in img_paths:
|
for img_path in img_paths:
|
||||||
cap_for_img = read_caption(img_path, subset.caption_extension)
|
cap_for_img = read_caption(img_path, subset.caption_extension)
|
||||||
if cap_for_img is None and subset.class_tokens is None:
|
if cap_for_img is None and subset.class_tokens is None:
|
||||||
print(f"neither caption file nor class tokens are found. use empty caption for {img_path}")
|
print(f"neither caption file nor class tokens are found. use empty caption for {img_path}")
|
||||||
captions.append("")
|
captions.append("")
|
||||||
else:
|
else:
|
||||||
captions.append(subset.class_tokens if cap_for_img is None else cap_for_img)
|
if cap_for_img is None:
|
||||||
|
captions.append(subset.class_tokens)
|
||||||
|
missing_captions.append(img_path)
|
||||||
|
else:
|
||||||
|
captions.append(cap_for_img)
|
||||||
|
|
||||||
self.set_tag_frequency(os.path.basename(subset.image_dir), captions) # タグ頻度を記録
|
self.set_tag_frequency(os.path.basename(subset.image_dir), captions) # タグ頻度を記録
|
||||||
|
|
||||||
|
if missing_captions:
|
||||||
|
number_of_missing_captions = len(missing_captions)
|
||||||
|
number_of_missing_captions_to_show = 5
|
||||||
|
remaining_missing_captions = number_of_missing_captions - number_of_missing_captions_to_show
|
||||||
|
|
||||||
|
print(f"No caption file found for {number_of_missing_captions} images. Training will continue without captions for these images")
|
||||||
|
for i, missing_caption in enumerate(missing_captions):
|
||||||
|
if i >= number_of_missing_captions_to_show:
|
||||||
|
print(missing_caption+f"... and {remaining_missing_captions} more")
|
||||||
|
break
|
||||||
|
print(missing_caption)
|
||||||
return img_paths, captions
|
return img_paths, captions
|
||||||
|
|
||||||
print("prepare images.")
|
print("prepare images.")
|
||||||
|
|||||||
Reference in New Issue
Block a user