From 5dc2a0d3fd1a0cccf653aebf00ae17711f221008 Mon Sep 17 00:00:00 2001 From: Kohaku-Blueleaf <59680068+KohakuBlueleaf@users.noreply.github.com> Date: Mon, 30 Oct 2023 19:55:30 +0800 Subject: [PATCH 1/3] Add custom seperator --- library/config_util.py | 1 + library/train_util.py | 15 +++++++++++++-- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/library/config_util.py b/library/config_util.py index e8e0fda7..af4eedaa 100644 --- a/library/config_util.py +++ b/library/config_util.py @@ -51,6 +51,7 @@ class BaseSubsetParams: image_dir: Optional[str] = None num_repeats: int = 1 shuffle_caption: bool = False + caption_seperator: str = ',', keep_tokens: int = 0 color_aug: bool = False flip_aug: bool = False diff --git a/library/train_util.py b/library/train_util.py index 51610e70..c04ad9a9 100644 --- a/library/train_util.py +++ b/library/train_util.py @@ -341,6 +341,7 @@ class BaseSubset: image_dir: Optional[str], num_repeats: int, shuffle_caption: bool, + caption_seperator: str, keep_tokens: int, color_aug: bool, flip_aug: bool, @@ -357,6 +358,7 @@ class BaseSubset: self.image_dir = image_dir self.num_repeats = num_repeats self.shuffle_caption = shuffle_caption + self.caption_seperator = caption_seperator self.keep_tokens = keep_tokens self.color_aug = color_aug self.flip_aug = flip_aug @@ -383,6 +385,7 @@ class DreamBoothSubset(BaseSubset): caption_extension: str, num_repeats, shuffle_caption, + caption_seperator: str, keep_tokens, color_aug, flip_aug, @@ -402,6 +405,7 @@ class DreamBoothSubset(BaseSubset): image_dir, num_repeats, shuffle_caption, + caption_seperator, keep_tokens, color_aug, flip_aug, @@ -435,6 +439,7 @@ class FineTuningSubset(BaseSubset): metadata_file: str, num_repeats, shuffle_caption, + caption_seperator, keep_tokens, color_aug, flip_aug, @@ -454,6 +459,7 @@ class FineTuningSubset(BaseSubset): image_dir, num_repeats, shuffle_caption, + caption_seperator, keep_tokens, color_aug, flip_aug, @@ -484,6 +490,7 @@ class ControlNetSubset(BaseSubset): caption_extension: str, num_repeats, shuffle_caption, + caption_seperator, keep_tokens, color_aug, flip_aug, @@ -503,6 +510,7 @@ class ControlNetSubset(BaseSubset): image_dir, num_repeats, shuffle_caption, + caption_seperator, keep_tokens, color_aug, flip_aug, @@ -638,7 +646,7 @@ class BaseDataset(torch.utils.data.Dataset): caption = "" else: if subset.shuffle_caption or subset.token_warmup_step > 0 or subset.caption_tag_dropout_rate > 0: - tokens = [t.strip() for t in caption.strip().split(",")] + tokens = [t.strip() for t in caption.strip().split(subset.caption_seperator)] if subset.token_warmup_step < 1: # 初回に上書きする subset.token_warmup_step = math.floor(subset.token_warmup_step * self.max_train_steps) if subset.token_warmup_step and self.current_step < subset.token_warmup_step: @@ -3091,7 +3099,10 @@ def add_dataset_arguments( # dataset common parser.add_argument("--train_data_dir", type=str, default=None, help="directory for train images / 学習画像データのディレクトリ") parser.add_argument( - "--shuffle_caption", action="store_true", help="shuffle comma-separated caption / コンマで区切られたcaptionの各要素をshuffleする" + "--shuffle_caption", action="store_true", help="shuffle separated caption / 区切られたcaptionの各要素をshuffleする" + ) + parser.add_argument( + "--caption_seperator", type=str, default=",", help="seperator for caption / captionの区切り文字" ) parser.add_argument( "--caption_extension", type=str, default=".caption", help="extension of caption files / 読み込むcaptionファイルの拡張子" From 583e2b2d0174e9a8bebcabfa178295e2980d334c Mon Sep 17 00:00:00 2001 From: Kohaku-Blueleaf <59680068+KohakuBlueleaf@users.noreply.github.com> Date: Mon, 30 Oct 2023 20:02:04 +0800 Subject: [PATCH 2/3] Fix typo --- library/train_util.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/library/train_util.py b/library/train_util.py index c04ad9a9..35391e80 100644 --- a/library/train_util.py +++ b/library/train_util.py @@ -341,7 +341,7 @@ class BaseSubset: image_dir: Optional[str], num_repeats: int, shuffle_caption: bool, - caption_seperator: str, + caption_separator: str, keep_tokens: int, color_aug: bool, flip_aug: bool, @@ -358,7 +358,7 @@ class BaseSubset: self.image_dir = image_dir self.num_repeats = num_repeats self.shuffle_caption = shuffle_caption - self.caption_seperator = caption_seperator + self.caption_separator = caption_separator self.keep_tokens = keep_tokens self.color_aug = color_aug self.flip_aug = flip_aug @@ -385,7 +385,7 @@ class DreamBoothSubset(BaseSubset): caption_extension: str, num_repeats, shuffle_caption, - caption_seperator: str, + caption_separator: str, keep_tokens, color_aug, flip_aug, @@ -405,7 +405,7 @@ class DreamBoothSubset(BaseSubset): image_dir, num_repeats, shuffle_caption, - caption_seperator, + caption_separator, keep_tokens, color_aug, flip_aug, @@ -439,7 +439,7 @@ class FineTuningSubset(BaseSubset): metadata_file: str, num_repeats, shuffle_caption, - caption_seperator, + caption_separator, keep_tokens, color_aug, flip_aug, @@ -459,7 +459,7 @@ class FineTuningSubset(BaseSubset): image_dir, num_repeats, shuffle_caption, - caption_seperator, + caption_separator, keep_tokens, color_aug, flip_aug, @@ -490,7 +490,7 @@ class ControlNetSubset(BaseSubset): caption_extension: str, num_repeats, shuffle_caption, - caption_seperator, + caption_separator, keep_tokens, color_aug, flip_aug, @@ -510,7 +510,7 @@ class ControlNetSubset(BaseSubset): image_dir, num_repeats, shuffle_caption, - caption_seperator, + caption_separator, keep_tokens, color_aug, flip_aug, @@ -646,7 +646,7 @@ class BaseDataset(torch.utils.data.Dataset): caption = "" else: if subset.shuffle_caption or subset.token_warmup_step > 0 or subset.caption_tag_dropout_rate > 0: - tokens = [t.strip() for t in caption.strip().split(subset.caption_seperator)] + tokens = [t.strip() for t in caption.strip().split(subset.caption_separator)] if subset.token_warmup_step < 1: # 初回に上書きする subset.token_warmup_step = math.floor(subset.token_warmup_step * self.max_train_steps) if subset.token_warmup_step and self.current_step < subset.token_warmup_step: @@ -3102,7 +3102,7 @@ def add_dataset_arguments( "--shuffle_caption", action="store_true", help="shuffle separated caption / 区切られたcaptionの各要素をshuffleする" ) parser.add_argument( - "--caption_seperator", type=str, default=",", help="seperator for caption / captionの区切り文字" + "--caption_separator", type=str, default=",", help="separator for caption / captionの区切り文字" ) parser.add_argument( "--caption_extension", type=str, default=".caption", help="extension of caption files / 読み込むcaptionファイルの拡張子" From 489b728dbc7f85e22ca5e0fe4e7c91e2fb56c5f9 Mon Sep 17 00:00:00 2001 From: Kohaku-Blueleaf <59680068+KohakuBlueleaf@users.noreply.github.com> Date: Mon, 30 Oct 2023 20:19:51 +0800 Subject: [PATCH 3/3] Fix typo again --- library/config_util.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/library/config_util.py b/library/config_util.py index af4eedaa..ab90fb63 100644 --- a/library/config_util.py +++ b/library/config_util.py @@ -51,7 +51,7 @@ class BaseSubsetParams: image_dir: Optional[str] = None num_repeats: int = 1 shuffle_caption: bool = False - caption_seperator: str = ',', + caption_separator: str = ',', keep_tokens: int = 0 color_aug: bool = False flip_aug: bool = False