mirror of
https://github.com/kohya-ss/sd-scripts.git
synced 2026-04-09 06:45:09 +00:00
Merge pull request #913 from KohakuBlueleaf/custom-seperator
Add custom seperator for shuffle caption
This commit is contained in:
@@ -51,6 +51,7 @@ class BaseSubsetParams:
|
|||||||
image_dir: Optional[str] = None
|
image_dir: Optional[str] = None
|
||||||
num_repeats: int = 1
|
num_repeats: int = 1
|
||||||
shuffle_caption: bool = False
|
shuffle_caption: bool = False
|
||||||
|
caption_separator: str = ',',
|
||||||
keep_tokens: int = 0
|
keep_tokens: int = 0
|
||||||
color_aug: bool = False
|
color_aug: bool = False
|
||||||
flip_aug: bool = False
|
flip_aug: bool = False
|
||||||
|
|||||||
@@ -349,6 +349,7 @@ class BaseSubset:
|
|||||||
image_dir: Optional[str],
|
image_dir: Optional[str],
|
||||||
num_repeats: int,
|
num_repeats: int,
|
||||||
shuffle_caption: bool,
|
shuffle_caption: bool,
|
||||||
|
caption_separator: str,
|
||||||
keep_tokens: int,
|
keep_tokens: int,
|
||||||
color_aug: bool,
|
color_aug: bool,
|
||||||
flip_aug: bool,
|
flip_aug: bool,
|
||||||
@@ -365,6 +366,7 @@ class BaseSubset:
|
|||||||
self.image_dir = image_dir
|
self.image_dir = image_dir
|
||||||
self.num_repeats = num_repeats
|
self.num_repeats = num_repeats
|
||||||
self.shuffle_caption = shuffle_caption
|
self.shuffle_caption = shuffle_caption
|
||||||
|
self.caption_separator = caption_separator
|
||||||
self.keep_tokens = keep_tokens
|
self.keep_tokens = keep_tokens
|
||||||
self.color_aug = color_aug
|
self.color_aug = color_aug
|
||||||
self.flip_aug = flip_aug
|
self.flip_aug = flip_aug
|
||||||
@@ -391,6 +393,7 @@ class DreamBoothSubset(BaseSubset):
|
|||||||
caption_extension: str,
|
caption_extension: str,
|
||||||
num_repeats,
|
num_repeats,
|
||||||
shuffle_caption,
|
shuffle_caption,
|
||||||
|
caption_separator: str,
|
||||||
keep_tokens,
|
keep_tokens,
|
||||||
color_aug,
|
color_aug,
|
||||||
flip_aug,
|
flip_aug,
|
||||||
@@ -410,6 +413,7 @@ class DreamBoothSubset(BaseSubset):
|
|||||||
image_dir,
|
image_dir,
|
||||||
num_repeats,
|
num_repeats,
|
||||||
shuffle_caption,
|
shuffle_caption,
|
||||||
|
caption_separator,
|
||||||
keep_tokens,
|
keep_tokens,
|
||||||
color_aug,
|
color_aug,
|
||||||
flip_aug,
|
flip_aug,
|
||||||
@@ -443,6 +447,7 @@ class FineTuningSubset(BaseSubset):
|
|||||||
metadata_file: str,
|
metadata_file: str,
|
||||||
num_repeats,
|
num_repeats,
|
||||||
shuffle_caption,
|
shuffle_caption,
|
||||||
|
caption_separator,
|
||||||
keep_tokens,
|
keep_tokens,
|
||||||
color_aug,
|
color_aug,
|
||||||
flip_aug,
|
flip_aug,
|
||||||
@@ -462,6 +467,7 @@ class FineTuningSubset(BaseSubset):
|
|||||||
image_dir,
|
image_dir,
|
||||||
num_repeats,
|
num_repeats,
|
||||||
shuffle_caption,
|
shuffle_caption,
|
||||||
|
caption_separator,
|
||||||
keep_tokens,
|
keep_tokens,
|
||||||
color_aug,
|
color_aug,
|
||||||
flip_aug,
|
flip_aug,
|
||||||
@@ -492,6 +498,7 @@ class ControlNetSubset(BaseSubset):
|
|||||||
caption_extension: str,
|
caption_extension: str,
|
||||||
num_repeats,
|
num_repeats,
|
||||||
shuffle_caption,
|
shuffle_caption,
|
||||||
|
caption_separator,
|
||||||
keep_tokens,
|
keep_tokens,
|
||||||
color_aug,
|
color_aug,
|
||||||
flip_aug,
|
flip_aug,
|
||||||
@@ -511,6 +518,7 @@ class ControlNetSubset(BaseSubset):
|
|||||||
image_dir,
|
image_dir,
|
||||||
num_repeats,
|
num_repeats,
|
||||||
shuffle_caption,
|
shuffle_caption,
|
||||||
|
caption_separator,
|
||||||
keep_tokens,
|
keep_tokens,
|
||||||
color_aug,
|
color_aug,
|
||||||
flip_aug,
|
flip_aug,
|
||||||
@@ -646,7 +654,7 @@ class BaseDataset(torch.utils.data.Dataset):
|
|||||||
caption = ""
|
caption = ""
|
||||||
else:
|
else:
|
||||||
if subset.shuffle_caption or subset.token_warmup_step > 0 or subset.caption_tag_dropout_rate > 0:
|
if subset.shuffle_caption or subset.token_warmup_step > 0 or subset.caption_tag_dropout_rate > 0:
|
||||||
tokens = [t.strip() for t in caption.strip().split(",")]
|
tokens = [t.strip() for t in caption.strip().split(subset.caption_separator)]
|
||||||
if subset.token_warmup_step < 1: # 初回に上書きする
|
if subset.token_warmup_step < 1: # 初回に上書きする
|
||||||
subset.token_warmup_step = math.floor(subset.token_warmup_step * self.max_train_steps)
|
subset.token_warmup_step = math.floor(subset.token_warmup_step * self.max_train_steps)
|
||||||
if subset.token_warmup_step and self.current_step < subset.token_warmup_step:
|
if subset.token_warmup_step and self.current_step < subset.token_warmup_step:
|
||||||
@@ -3105,7 +3113,10 @@ def add_dataset_arguments(
|
|||||||
# dataset common
|
# dataset common
|
||||||
parser.add_argument("--train_data_dir", type=str, default=None, help="directory for train images / 学習画像データのディレクトリ")
|
parser.add_argument("--train_data_dir", type=str, default=None, help="directory for train images / 学習画像データのディレクトリ")
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--shuffle_caption", action="store_true", help="shuffle comma-separated caption / コンマで区切られたcaptionの各要素をshuffleする"
|
"--shuffle_caption", action="store_true", help="shuffle separated caption / 区切られたcaptionの各要素をshuffleする"
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--caption_separator", type=str, default=",", help="separator for caption / captionの区切り文字"
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--caption_extension", type=str, default=".caption", help="extension of caption files / 読み込むcaptionファイルの拡張子"
|
"--caption_extension", type=str, default=".caption", help="extension of caption files / 読み込むcaptionファイルの拡張子"
|
||||||
|
|||||||
Reference in New Issue
Block a user