mirror of
https://github.com/kohya-ss/sd-scripts.git
synced 2026-04-08 22:35:09 +00:00
make transform_DDP more intuitive
This commit is contained in:
@@ -229,7 +229,7 @@ def train(args):
|
|||||||
unet, optimizer, train_dataloader, lr_scheduler = accelerator.prepare(unet, optimizer, train_dataloader, lr_scheduler)
|
unet, optimizer, train_dataloader, lr_scheduler = accelerator.prepare(unet, optimizer, train_dataloader, lr_scheduler)
|
||||||
|
|
||||||
# transform DDP after prepare
|
# transform DDP after prepare
|
||||||
text_encoder, unet, _ = train_util.transform_DDP(text_encoder, unet)
|
text_encoder, unet = train_util.transform_if_model_is_DDP(text_encoder, unet)
|
||||||
|
|
||||||
# 実験的機能:勾配も含めたfp16学習を行う PyTorchにパッチを当ててfp16でのgrad scaleを有効にする
|
# 実験的機能:勾配も含めたfp16学習を行う PyTorchにパッチを当ててfp16でのgrad scaleを有効にする
|
||||||
if args.full_fp16:
|
if args.full_fp16:
|
||||||
|
|||||||
@@ -2897,9 +2897,9 @@ def _load_target_model(args: argparse.Namespace, weight_dtype, device="cpu"):
|
|||||||
return text_encoder, vae, unet, load_stable_diffusion_format
|
return text_encoder, vae, unet, load_stable_diffusion_format
|
||||||
|
|
||||||
|
|
||||||
def transform_DDP(text_encoder, unet, network=None):
|
def transform_if_model_is_DDP(text_encoder, unet, network=None):
|
||||||
# Transform text_encoder, unet and network from DistributedDataParallel
|
# Transform text_encoder, unet and network from DistributedDataParallel
|
||||||
return (encoder.module if type(encoder) == DDP else encoder for encoder in [text_encoder, unet, network])
|
return (model.module if type(model) == DDP else model for model in [text_encoder, unet, network] if model is not None)
|
||||||
|
|
||||||
|
|
||||||
def load_target_model(args, weight_dtype, accelerator):
|
def load_target_model(args, weight_dtype, accelerator):
|
||||||
@@ -2922,7 +2922,7 @@ def load_target_model(args, weight_dtype, accelerator):
|
|||||||
torch.cuda.empty_cache()
|
torch.cuda.empty_cache()
|
||||||
accelerator.wait_for_everyone()
|
accelerator.wait_for_everyone()
|
||||||
|
|
||||||
text_encoder, unet, _ = transform_DDP(text_encoder, unet, network=None)
|
text_encoder, unet = transform_if_model_is_DDP(text_encoder, unet)
|
||||||
|
|
||||||
return text_encoder, vae, unet, load_stable_diffusion_format
|
return text_encoder, vae, unet, load_stable_diffusion_format
|
||||||
|
|
||||||
|
|||||||
@@ -197,7 +197,7 @@ def train(args):
|
|||||||
unet, optimizer, train_dataloader, lr_scheduler = accelerator.prepare(unet, optimizer, train_dataloader, lr_scheduler)
|
unet, optimizer, train_dataloader, lr_scheduler = accelerator.prepare(unet, optimizer, train_dataloader, lr_scheduler)
|
||||||
|
|
||||||
# transform DDP after prepare
|
# transform DDP after prepare
|
||||||
text_encoder, unet, _ = train_util.transform_DDP(text_encoder, unet)
|
text_encoder, unet = train_util.transform_if_model_is_DDP(text_encoder, unet)
|
||||||
|
|
||||||
if not train_text_encoder:
|
if not train_text_encoder:
|
||||||
text_encoder.to(accelerator.device, dtype=weight_dtype) # to avoid 'cpu' vs 'cuda' error
|
text_encoder.to(accelerator.device, dtype=weight_dtype) # to avoid 'cpu' vs 'cuda' error
|
||||||
|
|||||||
@@ -262,7 +262,7 @@ def train(args):
|
|||||||
network, optimizer, train_dataloader, lr_scheduler = accelerator.prepare(network, optimizer, train_dataloader, lr_scheduler)
|
network, optimizer, train_dataloader, lr_scheduler = accelerator.prepare(network, optimizer, train_dataloader, lr_scheduler)
|
||||||
|
|
||||||
# transform DDP after prepare (train_network here only)
|
# transform DDP after prepare (train_network here only)
|
||||||
text_encoder, unet, network = train_util.transform_DDP(text_encoder, unet, network)
|
text_encoder, unet, network = train_util.transform_if_model_is_DDP(text_encoder, unet, network)
|
||||||
|
|
||||||
unet.requires_grad_(False)
|
unet.requires_grad_(False)
|
||||||
unet.to(accelerator.device, dtype=weight_dtype)
|
unet.to(accelerator.device, dtype=weight_dtype)
|
||||||
|
|||||||
@@ -281,7 +281,7 @@ def train(args):
|
|||||||
)
|
)
|
||||||
|
|
||||||
# transform DDP after prepare
|
# transform DDP after prepare
|
||||||
text_encoder, unet, _ = train_util.transform_DDP(text_encoder, unet)
|
text_encoder, unet = train_util.transform_if_model_is_DDP(text_encoder, unet)
|
||||||
|
|
||||||
index_no_updates = torch.arange(len(tokenizer)) < token_ids[0]
|
index_no_updates = torch.arange(len(tokenizer)) < token_ids[0]
|
||||||
# print(len(index_no_updates), torch.sum(index_no_updates))
|
# print(len(index_no_updates), torch.sum(index_no_updates))
|
||||||
|
|||||||
@@ -315,7 +315,7 @@ def train(args):
|
|||||||
)
|
)
|
||||||
|
|
||||||
# transform DDP after prepare
|
# transform DDP after prepare
|
||||||
text_encoder, unet, _ = train_util.transform_DDP(text_encoder, unet)
|
text_encoder, unet = train_util.transform_if_model_is_DDP(text_encoder, unet)
|
||||||
|
|
||||||
index_no_updates = torch.arange(len(tokenizer)) < token_ids_XTI[0]
|
index_no_updates = torch.arange(len(tokenizer)) < token_ids_XTI[0]
|
||||||
# print(len(index_no_updates), torch.sum(index_no_updates))
|
# print(len(index_no_updates), torch.sum(index_no_updates))
|
||||||
|
|||||||
Reference in New Issue
Block a user