mirror of
https://github.com/kohya-ss/sd-scripts.git
synced 2026-04-17 17:24:21 +00:00
add detail dataset config feature by extra config file (#227)
* add config file schema * change config file specification * refactor config utility * unify batch_size to train_batch_size * fix indent size * use batch_size instead of train_batch_size * make cache_latents configurable on subset * rename options * bucket_repo_range * shuffle_keep_tokens * update readme * revert to min_bucket_reso & max_bucket_reso * use subset structure in dataset * format import lines * split mode specific options * use only valid subset * change valid subsets name * manage multiple datasets by dataset group * update config file sanitizer * prune redundant validation * add comments * update type annotation * rename json_file_name to metadata_file * ignore when image dir is invalid * fix tag shuffle and dropout * ignore duplicated subset * add method to check latent cachability * fix format * fix bug * update caption dropout default values * update annotation * fix bug * add option to enable bucket shuffle across dataset * update blueprint generate function * use blueprint generator for dataset initialization * delete duplicated function * update config readme * delete debug print * print dataset and subset info as info * enable bucket_shuffle_across_dataset option * update config readme for clarification * compensate quotes for string option example * fix bug of bad usage of join * conserve trained metadata backward compatibility * enable shuffle in data loader by default * delete resolved TODO * add comment for image data handling * fix reference bug * fix undefined variable bug * prevent raise overwriting * assert image_dir and metadata_file validity * add debug message for ignoring subset * fix inconsistent import statement * loosen too strict validation on float value * sanitize argument parser separately * make image_dir optional for fine tuning dataset * fix import * fix trailing characters in print * parse flexible dataset config deterministically * use relative import * print supplementary message for parsing error * add note about different methods * add note of benefit of separate dataset * add error example * add note for english readme plan --------- Co-authored-by: Kohya S <52813779+kohya-ss@users.noreply.github.com>
This commit is contained in:
181
train_network.py
181
train_network.py
@@ -14,7 +14,14 @@ from accelerate.utils import set_seed
|
||||
from diffusers import DDPMScheduler
|
||||
|
||||
import library.train_util as train_util
|
||||
from library.train_util import DreamBoothDataset, FineTuningDataset
|
||||
from library.train_util import (
|
||||
DreamBoothDataset,
|
||||
)
|
||||
import library.config_util as config_util
|
||||
from library.config_util import (
|
||||
ConfigSanitizer,
|
||||
BlueprintGenerator,
|
||||
)
|
||||
|
||||
|
||||
def collate_fn(examples):
|
||||
@@ -47,6 +54,7 @@ def train(args):
|
||||
|
||||
cache_latents = args.cache_latents
|
||||
use_dreambooth_method = args.in_json is None
|
||||
use_user_config = args.config_file is not None
|
||||
|
||||
if args.seed is not None:
|
||||
set_seed(args.seed)
|
||||
@@ -54,35 +62,45 @@ def train(args):
|
||||
tokenizer = train_util.load_tokenizer(args)
|
||||
|
||||
# データセットを準備する
|
||||
if use_dreambooth_method:
|
||||
print("Use DreamBooth method.")
|
||||
train_dataset = DreamBoothDataset(args.train_batch_size, args.train_data_dir, args.reg_data_dir,
|
||||
tokenizer, args.max_token_length, args.caption_extension, args.shuffle_caption, args.keep_tokens,
|
||||
args.resolution, args.enable_bucket, args.min_bucket_reso, args.max_bucket_reso,
|
||||
args.bucket_reso_steps, args.bucket_no_upscale,
|
||||
args.prior_loss_weight, args.flip_aug, args.color_aug, args.face_crop_aug_range,
|
||||
args.random_crop, args.debug_dataset)
|
||||
blueprint_generator = BlueprintGenerator(ConfigSanitizer(True, True, True))
|
||||
if use_user_config:
|
||||
print(f"Load config file from {args.config_file}")
|
||||
user_config = config_util.load_user_config(args.config_file)
|
||||
ignored = ["train_data_dir", "reg_data_dir", "in_json"]
|
||||
if any(getattr(args, attr) is not None for attr in ignored):
|
||||
print("ignore following options because config file is found: {0} / 設定ファイルが利用されるため以下のオプションは無視されます: {0}".format(', '.join(ignored)))
|
||||
else:
|
||||
print("Train with captions.")
|
||||
train_dataset = FineTuningDataset(args.in_json, args.train_batch_size, args.train_data_dir,
|
||||
tokenizer, args.max_token_length, args.shuffle_caption, args.keep_tokens,
|
||||
args.resolution, args.enable_bucket, args.min_bucket_reso, args.max_bucket_reso,
|
||||
args.bucket_reso_steps, args.bucket_no_upscale,
|
||||
args.flip_aug, args.color_aug, args.face_crop_aug_range, args.random_crop,
|
||||
args.dataset_repeats, args.debug_dataset)
|
||||
if use_dreambooth_method:
|
||||
print("Use DreamBooth method.")
|
||||
user_config = {
|
||||
"datasets": [{
|
||||
"subsets": config_util.generate_dreambooth_subsets_config_by_subdirs(args.train_data_dir, args.reg_data_dir)
|
||||
}]
|
||||
}
|
||||
else:
|
||||
print("Train with captions.")
|
||||
user_config = {
|
||||
"datasets": [{
|
||||
"subsets": [{
|
||||
"image_dir": args.train_data_dir,
|
||||
"metadata_file": args.in_json,
|
||||
}]
|
||||
}]
|
||||
}
|
||||
|
||||
# 学習データのdropout率を設定する
|
||||
train_dataset.set_caption_dropout(args.caption_dropout_rate, args.caption_dropout_every_n_epochs, args.caption_tag_dropout_rate)
|
||||
|
||||
train_dataset.make_buckets()
|
||||
blueprint = blueprint_generator.generate(user_config, args, tokenizer=tokenizer)
|
||||
train_dataset_group = config_util.generate_dataset_group_by_blueprint(blueprint.dataset_group)
|
||||
|
||||
if args.debug_dataset:
|
||||
train_util.debug_dataset(train_dataset)
|
||||
train_util.debug_dataset(train_dataset_group)
|
||||
return
|
||||
if len(train_dataset) == 0:
|
||||
if len(train_dataset_group) == 0:
|
||||
print("No data found. Please verify arguments (train_data_dir must be the parent of folders with images) / 画像がありません。引数指定を確認してください(train_data_dirには画像があるフォルダではなく、画像があるフォルダの親フォルダを指定する必要があります)")
|
||||
return
|
||||
|
||||
if cache_latents:
|
||||
assert train_dataset_group.is_latent_cachable(), "when caching latents, either color_aug or random_crop cannot be used / latentをキャッシュするときはcolor_augとrandom_cropは使えません"
|
||||
|
||||
# acceleratorを準備する
|
||||
print("prepare accelerator")
|
||||
accelerator, unwrap_model = train_util.prepare_accelerator(args)
|
||||
@@ -107,7 +125,7 @@ def train(args):
|
||||
vae.requires_grad_(False)
|
||||
vae.eval()
|
||||
with torch.no_grad():
|
||||
train_dataset.cache_latents(vae)
|
||||
train_dataset_group.cache_latents(vae)
|
||||
vae.to("cpu")
|
||||
if torch.cuda.is_available():
|
||||
torch.cuda.empty_cache()
|
||||
@@ -151,7 +169,7 @@ def train(args):
|
||||
# DataLoaderのプロセス数:0はメインプロセスになる
|
||||
n_workers = min(args.max_data_loader_n_workers, os.cpu_count() - 1) # cpu_count-1 ただし最大で指定された数まで
|
||||
train_dataloader = torch.utils.data.DataLoader(
|
||||
train_dataset, batch_size=1, shuffle=False, collate_fn=collate_fn, num_workers=n_workers, persistent_workers=args.persistent_data_loader_workers)
|
||||
train_dataset_group, batch_size=1, shuffle=True, collate_fn=collate_fn, num_workers=n_workers, persistent_workers=args.persistent_data_loader_workers)
|
||||
|
||||
# 学習ステップ数を計算する
|
||||
if args.max_train_epochs is not None:
|
||||
@@ -229,14 +247,15 @@ def train(args):
|
||||
args.save_every_n_epochs = math.floor(num_train_epochs / args.save_n_epoch_ratio) or 1
|
||||
|
||||
# 学習する
|
||||
# TODO: find a way to handle total batch size when there are multiple datasets
|
||||
total_batch_size = args.train_batch_size * accelerator.num_processes * args.gradient_accumulation_steps
|
||||
print("running training / 学習開始")
|
||||
print(f" num train images * repeats / 学習画像の数×繰り返し回数: {train_dataset.num_train_images}")
|
||||
print(f" num reg images / 正則化画像の数: {train_dataset.num_reg_images}")
|
||||
print(f" num train images * repeats / 学習画像の数×繰り返し回数: {train_dataset_group.num_train_images}")
|
||||
print(f" num reg images / 正則化画像の数: {train_dataset_group.num_reg_images}")
|
||||
print(f" num batches per epoch / 1epochのバッチ数: {len(train_dataloader)}")
|
||||
print(f" num epochs / epoch数: {num_train_epochs}")
|
||||
print(f" batch size per device / バッチサイズ: {args.train_batch_size}")
|
||||
print(f" total train batch size (with parallel & distributed & accumulation) / 総バッチサイズ(並列学習、勾配合計含む): {total_batch_size}")
|
||||
print(f" batch size per device / バッチサイズ: {', '.join([str(d.batch_size) for d in train_dataset_group.datasets])}")
|
||||
#print(f" total train batch size (with parallel & distributed & accumulation) / 総バッチサイズ(並列学習、勾配合計含む): {total_batch_size}")
|
||||
print(f" gradient accumulation steps / 勾配を合計するステップ数 = {args.gradient_accumulation_steps}")
|
||||
print(f" total optimization steps / 学習ステップ数: {args.max_train_steps}")
|
||||
|
||||
@@ -247,12 +266,10 @@ def train(args):
|
||||
"ss_learning_rate": args.learning_rate,
|
||||
"ss_text_encoder_lr": args.text_encoder_lr,
|
||||
"ss_unet_lr": args.unet_lr,
|
||||
"ss_num_train_images": train_dataset.num_train_images, # includes repeating
|
||||
"ss_num_reg_images": train_dataset.num_reg_images,
|
||||
"ss_num_train_images": train_dataset_group.num_train_images,
|
||||
"ss_num_reg_images": train_dataset_group.num_reg_images,
|
||||
"ss_num_batches_per_epoch": len(train_dataloader),
|
||||
"ss_num_epochs": num_train_epochs,
|
||||
"ss_batch_size_per_device": args.train_batch_size,
|
||||
"ss_total_batch_size": total_batch_size,
|
||||
"ss_gradient_checkpointing": args.gradient_checkpointing,
|
||||
"ss_gradient_accumulation_steps": args.gradient_accumulation_steps,
|
||||
"ss_max_train_steps": args.max_train_steps,
|
||||
@@ -264,26 +281,12 @@ def train(args):
|
||||
"ss_mixed_precision": args.mixed_precision,
|
||||
"ss_full_fp16": bool(args.full_fp16),
|
||||
"ss_v2": bool(args.v2),
|
||||
"ss_resolution": args.resolution,
|
||||
"ss_clip_skip": args.clip_skip,
|
||||
"ss_max_token_length": args.max_token_length,
|
||||
"ss_color_aug": bool(args.color_aug),
|
||||
"ss_flip_aug": bool(args.flip_aug),
|
||||
"ss_random_crop": bool(args.random_crop),
|
||||
"ss_shuffle_caption": bool(args.shuffle_caption),
|
||||
"ss_cache_latents": bool(args.cache_latents),
|
||||
"ss_enable_bucket": bool(train_dataset.enable_bucket),
|
||||
"ss_bucket_no_upscale": bool(train_dataset.bucket_no_upscale),
|
||||
"ss_min_bucket_reso": train_dataset.min_bucket_reso,
|
||||
"ss_max_bucket_reso": train_dataset.max_bucket_reso,
|
||||
"ss_seed": args.seed,
|
||||
"ss_lowram": args.lowram,
|
||||
"ss_keep_tokens": args.keep_tokens,
|
||||
"ss_noise_offset": args.noise_offset,
|
||||
"ss_dataset_dirs": json.dumps(train_dataset.dataset_dirs_info),
|
||||
"ss_reg_dataset_dirs": json.dumps(train_dataset.reg_dataset_dirs_info),
|
||||
"ss_tag_frequency": json.dumps(train_dataset.tag_frequency),
|
||||
"ss_bucket_info": json.dumps(train_dataset.bucket_info),
|
||||
"ss_training_comment": args.training_comment, # will not be updated after training
|
||||
"ss_sd_scripts_commit_hash": train_util.get_git_revision_hash(),
|
||||
"ss_optimizer": optimizer_name + (f"({optimizer_args})" if len(optimizer_args) > 0 else ""),
|
||||
@@ -295,6 +298,89 @@ def train(args):
|
||||
"ss_prior_loss_weight": args.prior_loss_weight,
|
||||
}
|
||||
|
||||
if use_user_config:
|
||||
# save metadata of multiple datasets
|
||||
# NOTE: pack "ss_datasets" value as json one time
|
||||
# or should also pack nested collections as json?
|
||||
datasets_metadata = []
|
||||
|
||||
for dataset in train_dataset_group.datasets:
|
||||
is_dreambooth_dataset = isinstance(dataset, DreamBoothDataset)
|
||||
dataset_metadata = {
|
||||
"is_dreambooth": is_dreambooth_dataset,
|
||||
"batch_size_per_device": dataset.batch_size,
|
||||
"num_train_images": dataset.num_train_images, # includes repeating
|
||||
"num_reg_images": dataset.num_reg_images,
|
||||
"resolution": (dataset.width, dataset.height),
|
||||
"enable_bucket": bool(dataset.enable_bucket),
|
||||
"min_bucket_reso": dataset.min_bucket_reso,
|
||||
"max_bucket_reso": dataset.max_bucket_reso,
|
||||
"tag_frequency": dataset.tag_frequency,
|
||||
"bucket_info": dataset.bucket_info,
|
||||
}
|
||||
|
||||
subsets_metadata = []
|
||||
for subset in dataset.subsets:
|
||||
subset_metadata = {
|
||||
"image_dir": os.path.basename(subset.image_dir),
|
||||
"img_count": subset.img_count,
|
||||
"num_repeats": subset.num_repeats,
|
||||
"color_aug": bool(subset.color_aug),
|
||||
"flip_aug": bool(subset.flip_aug),
|
||||
"random_crop": bool(subset.random_crop),
|
||||
"shuffle_caption": bool(subset.shuffle_caption),
|
||||
"keep_tokens": subset.keep_tokens,
|
||||
}
|
||||
if is_dreambooth_dataset:
|
||||
subset_metadata["class_tokens"] = subset.class_tokens
|
||||
subset_metadata["is_reg"] = subset.is_reg
|
||||
subsets_metadata.append(subset_metadata)
|
||||
|
||||
dataset_metadata["subsets"] = subsets_metadata
|
||||
datasets_metadata.append(dataset_metadata)
|
||||
|
||||
metadata["ss_datasets"] = json.dumps(datasets_metadata)
|
||||
else:
|
||||
# conserving backward compatiblity when using train_dataset_dir and reg_dataset_dir
|
||||
assert len(train_dataset_group.datasets) == 1, f"There should be a single dataset but {len(train_dataset_group.datasets)} found. This seems to be a bug. / データセットは1個だけ存在するはずですが、実際には{len(train_dataset_group.datasets)}個でした。プログラムのバグかもしれません。"
|
||||
|
||||
dataset = train_dataset_group.datasets[0]
|
||||
|
||||
dataset_dirs_info = {}
|
||||
reg_dataset_dirs_info = {}
|
||||
if use_dreambooth_method:
|
||||
for subset in dataset.subsets:
|
||||
info = reg_dataset_dirs_info if subset.is_reg else dataset_dirs_info
|
||||
info[os.path.basename(subset.image_dir)] = {
|
||||
"n_repeats": subset.num_repeats,
|
||||
"img_count": subset.img_count
|
||||
}
|
||||
else:
|
||||
for subset in dataset.subsets:
|
||||
dataset_dirs_info[os.path.basename(subset.metadata_file)] = {
|
||||
"n_repeats": subset.num_repeats,
|
||||
"img_count": subset.img_count
|
||||
}
|
||||
|
||||
metadata |= {
|
||||
"ss_batch_size_per_device": args.train_batch_size,
|
||||
"ss_total_batch_size": total_batch_size,
|
||||
"ss_resolution": args.resolution,
|
||||
"ss_color_aug": bool(args.color_aug),
|
||||
"ss_flip_aug": bool(args.flip_aug),
|
||||
"ss_random_crop": bool(args.random_crop),
|
||||
"ss_shuffle_caption": bool(args.shuffle_caption),
|
||||
"ss_enable_bucket": bool(dataset.enable_bucket),
|
||||
"ss_bucket_no_upscale": bool(dataset.bucket_no_upscale),
|
||||
"ss_min_bucket_reso": dataset.min_bucket_reso,
|
||||
"ss_max_bucket_reso": dataset.max_bucket_reso,
|
||||
"ss_keep_tokens": args.keep_tokens,
|
||||
"ss_dataset_dirs": json.dumps(dataset_dirs_info),
|
||||
"ss_reg_dataset_dirs": json.dumps(reg_dataset_dirs_info),
|
||||
"ss_tag_frequency": json.dumps(dataset.tag_frequency),
|
||||
"ss_bucket_info": json.dumps(dataset.bucket_info),
|
||||
}
|
||||
|
||||
# uncomment if another network is added
|
||||
# for key, value in net_kwargs.items():
|
||||
# metadata["ss_arg_" + key] = value
|
||||
@@ -330,7 +416,7 @@ def train(args):
|
||||
loss_total = 0.0
|
||||
for epoch in range(num_train_epochs):
|
||||
print(f"epoch {epoch+1}/{num_train_epochs}")
|
||||
train_dataset.set_current_epoch(epoch + 1)
|
||||
train_dataset_group.set_current_epoch(epoch + 1)
|
||||
|
||||
metadata["ss_epoch"] = str(epoch+1)
|
||||
|
||||
@@ -482,6 +568,7 @@ if __name__ == '__main__':
|
||||
train_util.add_dataset_arguments(parser, True, True, True)
|
||||
train_util.add_training_arguments(parser, True)
|
||||
train_util.add_optimizer_arguments(parser)
|
||||
config_util.add_config_arguments(parser)
|
||||
|
||||
parser.add_argument("--no_metadata", action='store_true', help="do not save metadata in output model / メタデータを出力先モデルに保存しない")
|
||||
parser.add_argument("--save_model_as", type=str, default="safetensors", choices=[None, "ckpt", "pt", "safetensors"],
|
||||
|
||||
Reference in New Issue
Block a user