Add LossRecorder and use moving average in all places

This commit is contained in:
Yuta Hayashibe
2023-10-27 17:49:49 +09:00
parent 2a23713f71
commit 3d2bb1a8f1
5 changed files with 33 additions and 30 deletions

View File

@@ -459,7 +459,7 @@ def train(args):
for m in training_models:
m.train()
loss_total = 0
loss_recorder = train_util.LossRecorder()
for step, batch in enumerate(train_dataloader):
current_step.value = global_step
with accelerator.accumulate(training_models[0]): # 複数モデルに対応していない模様だがとりあえずこうしておく
@@ -632,9 +632,8 @@ def train(args):
accelerator.log(logs, step=global_step)
# TODO moving averageにする
loss_total += current_loss
avr_loss = loss_total / (step + 1)
loss_recorder.add(epoch=epoch, step=step, loss=current_loss)
avr_loss: float = loss_recorder.get_moving_average()
logs = {"loss": avr_loss} # , "lr": lr_scheduler.get_last_lr()[0]}
progress_bar.set_postfix(**logs)
@@ -642,7 +641,7 @@ def train(args):
break
if args.logging_dir is not None:
logs = {"loss/epoch": loss_total / len(train_dataloader)}
logs = {"loss/epoch": loss_recorder.get_moving_average()}
accelerator.log(logs, step=epoch + 1)
accelerator.wait_for_everyone()