From 5dad64b6846b93154fdfb67d506c9f5edb119ec8 Mon Sep 17 00:00:00 2001 From: tsukimiya <71832+tsukimiya@users.noreply.github.com> Date: Mon, 13 Mar 2023 14:37:28 +0900 Subject: [PATCH 1/2] Fixed an issue where max_train_steps was not set correctly when max_train_epochs was specified and gradient_accumulation_steps was set to 2 or more. --- train_network.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/train_network.py b/train_network.py index 5aa8af48..31d6397a 100644 --- a/train_network.py +++ b/train_network.py @@ -178,7 +178,7 @@ def train(args): # 学習ステップ数を計算する if args.max_train_epochs is not None: - args.max_train_steps = args.max_train_epochs * math.ceil(len(train_dataloader) / accelerator.num_processes) + args.max_train_steps = args.max_train_epochs * math.ceil(len(train_dataloader) / accelerator.num_processes / args.gradient_accumulation_steps) if is_main_process: print(f"override steps. steps for {args.max_train_epochs} epochs is / 指定エポックまでのステップ数: {args.max_train_steps}") From a167a592e2bcfbbf701420ac7b8e5891dbf3e79f Mon Sep 17 00:00:00 2001 From: tsukimiya <71832+tsukimiya@users.noreply.github.com> Date: Mon, 13 Mar 2023 14:37:28 +0900 Subject: [PATCH 2/2] Fixed an issue where max_train_steps was not set correctly when max_train_epochs was specified and gradient_accumulation_steps was set to 2 or more. --- train_network.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/train_network.py b/train_network.py index 7f910df4..2945eefe 100644 --- a/train_network.py +++ b/train_network.py @@ -196,7 +196,7 @@ def train(args): # 学習ステップ数を計算する if args.max_train_epochs is not None: - args.max_train_steps = args.max_train_epochs * math.ceil(len(train_dataloader) / accelerator.num_processes) + args.max_train_steps = args.max_train_epochs * math.ceil(len(train_dataloader) / accelerator.num_processes / args.gradient_accumulation_steps) if is_main_process: print(f"override steps. steps for {args.max_train_epochs} epochs is / 指定エポックまでのステップ数: {args.max_train_steps}")