diff --git a/library/train_util.py b/library/train_util.py index d5e72323..eaf6ec00 100644 --- a/library/train_util.py +++ b/library/train_util.py @@ -5875,7 +5875,7 @@ def get_huber_threshold(args, timesteps: torch.Tensor, noise_scheduler) -> torch alpha = -math.log(args.huber_c) / noise_scheduler.config.num_train_timesteps result = torch.exp(-alpha * timesteps) * args.huber_scale elif args.huber_schedule == "snr": - if noise_scheduler is None or not hasattr(noise_scheduler, "alphas_cumprod"): + if not hasattr(noise_scheduler, "alphas_cumprod"): raise NotImplementedError("Huber schedule 'snr' is not supported with the current model.") alphas_cumprod = torch.index_select(noise_scheduler.alphas_cumprod, 0, timesteps.cpu()) sigmas = ((1.0 - alphas_cumprod) / alphas_cumprod) ** 0.5 diff --git a/sd3_train.py b/sd3_train.py index 909c5ead..73a68aa6 100644 --- a/sd3_train.py +++ b/sd3_train.py @@ -675,8 +675,8 @@ def train(args): progress_bar = tqdm(range(args.max_train_steps), smoothing=0, disable=not accelerator.is_local_main_process, desc="steps") global_step = 0 - # noise_scheduler = sd3_train_utils.FlowMatchEulerDiscreteScheduler(num_train_timesteps=1000, shift=3.0) - # noise_scheduler_copy = copy.deepcopy(noise_scheduler) + # only used to get timesteps, etc. TODO manage timesteps etc. separately + dummy_scheduler = sd3_train_utils.FlowMatchEulerDiscreteScheduler(num_train_timesteps=1000, shift=3.0) if accelerator.is_main_process: init_kwargs = {} @@ -844,9 +844,7 @@ def train(args): # 1, # ) # calculate loss - loss = train_util.conditional_loss( - args, model_pred.float(), target.float(), timesteps, "none", None - ) + loss = train_util.conditional_loss(args, model_pred.float(), target.float(), timesteps, "none", dummy_scheduler) if args.masked_loss or ("alpha_masks" in batch and batch["alpha_masks"] is not None): loss = apply_masked_loss(loss, batch) loss = loss.mean([1, 2, 3])