the reason not working grad accum steps found. it was becasue of my accelerate settings

2026-04-09 06:45:09 +00:00 · 2024-02-09 17:47:49 +09:00
parent a98fecaeb1
commit 03f0816f86
4 changed files with 14 additions and 7 deletions
--- a/library/train_util.py
+++ b/library/train_util.py
@@ -3975,7 +3975,11 @@ def prepare_accelerator(args: argparse.Namespace):
        if args.mixed_precision.lower() == "fp16":
            deepspeed_plugin.deepspeed_config['fp16']['initial_scale_power'] = 0
        if args.full_fp16 or args.fp16_master_weights_and_gradients:
-            deepspeed_plugin.deepspeed_config['fp16_master_weights_and_gradients'] = True
+            if args.offload_optimizer_device == "cpu":
+                deepspeed_plugin.deepspeed_config['fp16']['fp16_master_weights_and_grads'] = True
+                print("[DeepSpeed] full fp16 enable.")
+            else:
+                print("full fp16, fp16_master_weights_and_grads currently only supported using ZeRO-Offload with DeepSpeedCPUAdam.")

    accelerator = Accelerator(
        gradient_accumulation_steps=args.gradient_accumulation_steps,