Fix to work PIECEWISE_CONSTANT, update requirement.txt and README #1393

This commit is contained in:
Kohya S
2024-09-11 21:44:36 +09:00
parent fd68703f37
commit 6dbfd47a59
3 changed files with 54 additions and 25 deletions

View File

@@ -42,7 +42,10 @@ from torch.optim import Optimizer
from torchvision import transforms
from transformers import CLIPTokenizer, CLIPTextModel, CLIPTextModelWithProjection
import transformers
from diffusers.optimization import SchedulerType as DiffusersSchedulerType, TYPE_TO_SCHEDULER_FUNCTION as DIFFUSERS_TYPE_TO_SCHEDULER_FUNCTION
from diffusers.optimization import (
SchedulerType as DiffusersSchedulerType,
TYPE_TO_SCHEDULER_FUNCTION as DIFFUSERS_TYPE_TO_SCHEDULER_FUNCTION,
)
from transformers.optimization import SchedulerType, TYPE_TO_SCHEDULER_FUNCTION
from diffusers import (
StableDiffusionPipeline,
@@ -2974,7 +2977,7 @@ def add_sd_models_arguments(parser: argparse.ArgumentParser):
def add_optimizer_arguments(parser: argparse.ArgumentParser):
def int_or_float(value):
if value.endswith('%'):
if value.endswith("%"):
try:
return float(value[:-1]) / 100.0
except ValueError:
@@ -3041,13 +3044,15 @@ def add_optimizer_arguments(parser: argparse.ArgumentParser):
"--lr_warmup_steps",
type=int_or_float,
default=0,
help="Int number of steps for the warmup in the lr scheduler (default is 0) or float with ratio of train steps / 学習率のスケジューラをウォームアップするステップ数デフォルト0",
help="Int number of steps for the warmup in the lr scheduler (default is 0) or float with ratio of train steps"
" / 学習率のスケジューラをウォームアップするステップ数デフォルト0、または学習ステップの比率1未満のfloat値の場合",
)
parser.add_argument(
"--lr_decay_steps",
type=int_or_float,
default=0,
help="Int number of steps for the decay in the lr scheduler (default is 0) or float with ratio of train steps",
help="Int number of steps for the decay in the lr scheduler (default is 0) or float (<1) with ratio of train steps"
" / 学習率のスケジューラを減衰させるステップ数デフォルト0、または学習ステップの比率1未満のfloat値の場合",
)
parser.add_argument(
"--lr_scheduler_num_cycles",
@@ -3071,13 +3076,16 @@ def add_optimizer_arguments(parser: argparse.ArgumentParser):
"--lr_scheduler_timescale",
type=int,
default=None,
help="Inverse sqrt timescale for inverse sqrt scheduler,defaults to `num_warmup_steps`",
help="Inverse sqrt timescale for inverse sqrt scheduler,defaults to `num_warmup_steps`"
" / 逆平方根スケジューラのタイムスケール、デフォルトは`num_warmup_steps`",
,
)
parser.add_argument(
"--lr_scheduler_min_lr_ratio",
type=float,
default=None,
help="The minimum learning rate as a ratio of the initial learning rate for cosine with min lr scheduler and warmup decay scheduler",
help="The minimum learning rate as a ratio of the initial learning rate for cosine with min lr scheduler and warmup decay scheduler"
" / 初期学習率の比率としての最小学習率を指定する、cosine with min lr と warmup decay スケジューラ で有効",
)
@@ -4327,8 +4335,12 @@ def get_scheduler_fix(args, optimizer: Optimizer, num_processes: int):
"""
name = args.lr_scheduler
num_training_steps = args.max_train_steps * num_processes # * args.gradient_accumulation_steps
num_warmup_steps: Optional[int] = int(args.lr_warmup_steps * num_training_steps) if isinstance(args.lr_warmup_steps, float) else args.lr_warmup_steps
num_decay_steps: Optional[int] = int(args.lr_decay_steps * num_training_steps) if isinstance(args.lr_decay_steps, float) else args.lr_decay_steps
num_warmup_steps: Optional[int] = (
int(args.lr_warmup_steps * num_training_steps) if isinstance(args.lr_warmup_steps, float) else args.lr_warmup_steps
)
num_decay_steps: Optional[int] = (
int(args.lr_decay_steps * num_training_steps) if isinstance(args.lr_decay_steps, float) else args.lr_decay_steps
)
num_stable_steps = num_training_steps - num_warmup_steps - num_decay_steps
num_cycles = args.lr_scheduler_num_cycles
power = args.lr_scheduler_power
@@ -4369,15 +4381,17 @@ def get_scheduler_fix(args, optimizer: Optimizer, num_processes: int):
# logger.info(f"adafactor scheduler init lr {initial_lr}")
return wrap_check_needless_num_warmup_steps(transformers.optimization.AdafactorSchedule(optimizer, initial_lr))
name = SchedulerType(name) or DiffusersSchedulerType(name)
schedule_func = TYPE_TO_SCHEDULER_FUNCTION[name] or DIFFUSERS_TYPE_TO_SCHEDULER_FUNCTION[name]
if name == DiffusersSchedulerType.PIECEWISE_CONSTANT.value:
name = DiffusersSchedulerType(name)
schedule_func = DIFFUSERS_TYPE_TO_SCHEDULER_FUNCTION[name]
return schedule_func(optimizer, **lr_scheduler_kwargs) # step_rules and last_epoch are given as kwargs
name = SchedulerType(name)
schedule_func = TYPE_TO_SCHEDULER_FUNCTION[name]
if name == SchedulerType.CONSTANT:
return wrap_check_needless_num_warmup_steps(schedule_func(optimizer, **lr_scheduler_kwargs))
if name == DiffusersSchedulerType.PIECEWISE_CONSTANT:
return schedule_func(optimizer, **lr_scheduler_kwargs) # step_rules and last_epoch are given as kwargs
# All other schedulers require `num_warmup_steps`
if num_warmup_steps is None:
raise ValueError(f"{name} requires `num_warmup_steps`, please provide that argument.")
@@ -4408,11 +4422,11 @@ def get_scheduler_fix(args, optimizer: Optimizer, num_processes: int):
if name == SchedulerType.COSINE_WITH_MIN_LR:
return schedule_func(
optimizer,
num_warmup_steps=num_warmup_steps,
num_training_steps=num_training_steps,
optimizer,
num_warmup_steps=num_warmup_steps,
num_training_steps=num_training_steps,
num_cycles=num_cycles / 2,
min_lr_rate=min_lr_ratio,
min_lr_rate=min_lr_ratio,
**lr_scheduler_kwargs,
)
@@ -4421,16 +4435,22 @@ def get_scheduler_fix(args, optimizer: Optimizer, num_processes: int):
raise ValueError(f"{name} requires `num_decay_steps`, please provide that argument.")
if name == SchedulerType.WARMUP_STABLE_DECAY:
return schedule_func(
optimizer,
num_warmup_steps=num_warmup_steps,
num_stable_steps=num_stable_steps,
num_decay_steps=num_decay_steps,
num_cycles=num_cycles / 2,
optimizer,
num_warmup_steps=num_warmup_steps,
num_stable_steps=num_stable_steps,
num_decay_steps=num_decay_steps,
num_cycles=num_cycles / 2,
min_lr_ratio=min_lr_ratio if min_lr_ratio is not None else 0.0,
**lr_scheduler_kwargs,
)
return schedule_func(optimizer, num_warmup_steps=num_warmup_steps, num_training_steps=num_training_steps, num_decay_steps=num_decay_steps, **lr_scheduler_kwargs)
return schedule_func(
optimizer,
num_warmup_steps=num_warmup_steps,
num_training_steps=num_training_steps,
num_decay_steps=num_decay_steps,
**lr_scheduler_kwargs,
)
def prepare_dataset_args(args: argparse.Namespace, support_metadata: bool):