Removed mention of Adafactor from warning message as this situation can apply to other optimizers now too.

This commit is contained in:
araleza
2025-08-24 16:25:25 +01:00
parent 225ea36285
commit f583e35673

View File

@@ -385,7 +385,7 @@ def train(args):
optimizer_eval_fn = lambda: None # dummy function
if (args.optimizer_type not in fused_optimizers_supported) and args.full_bf16:
logger.warning("Use of --blockwise_fused_optimizers with Adafactor optimizer prevents stochastic/Kahan weight updates.")
logger.warning("Use of --blockwise_fused_optimizers is preventing stochastic/Kahan weight updates.")
else:
_, _, optimizer = train_util.get_optimizer(args, trainable_params=params_to_optimize)
optimizer_train_fn, optimizer_eval_fn = train_util.get_optimizer_train_eval_fn(optimizer, args)