diff --git a/library/train_util.py b/library/train_util.py index a20edbe1..676652e9 100644 --- a/library/train_util.py +++ b/library/train_util.py @@ -3964,6 +3964,8 @@ def prepare_accelerator(args: argparse.Namespace): zero3_init_flag=args.zero3_init_flag, zero3_save_16bit_model=args.zero3_save_16bit_model, ) deepspeed_plugin.deepspeed_config['train_micro_batch_size_per_gpu'] = args.train_batch_size + deepspeed_plugin.deepspeed_config['train_batch_size'] = \ + args.train_batch_size * args.gradient_accumulation_steps * int(os.environ['WORLD_SIZE']) accelerator = Accelerator( gradient_accumulation_steps=args.gradient_accumulation_steps, diff --git a/sdxl_train.py b/sdxl_train.py index e8680828..ef3ead38 100644 --- a/sdxl_train.py +++ b/sdxl_train.py @@ -391,6 +391,12 @@ def train(args): if args.deepspeed: # Wrapping model for DeepSpeed + import deepspeed + if args.offload_optimizer_device is not None: + accelerator.print('[DeepSpeed] start to manually build cpu_adam.') + deepspeed.ops.op_builder.CPUAdamBuilder().load() + accelerator.print('[DeepSpeed] building cpu_adam done.') + class DeepSpeedModel(torch.nn.Module): def __init__(self, unet, text_encoder) -> None: super().__init__()