Fix initialize place of loss_recorder

This commit is contained in:
Yuta Hayashibe
2023-10-27 21:13:29 +09:00
parent 9d00c8eea2
commit 63992b81c8
2 changed files with 2 additions and 2 deletions

View File

@@ -288,6 +288,7 @@ def train(args):
init_kwargs = toml.load(args.log_tracker_config) init_kwargs = toml.load(args.log_tracker_config)
accelerator.init_trackers("finetuning" if args.log_tracker_name is None else args.log_tracker_name, init_kwargs=init_kwargs) accelerator.init_trackers("finetuning" if args.log_tracker_name is None else args.log_tracker_name, init_kwargs=init_kwargs)
loss_recorder = train_util.LossRecorder()
for epoch in range(num_train_epochs): for epoch in range(num_train_epochs):
accelerator.print(f"\nepoch {epoch+1}/{num_train_epochs}") accelerator.print(f"\nepoch {epoch+1}/{num_train_epochs}")
current_epoch.value = epoch + 1 current_epoch.value = epoch + 1
@@ -295,7 +296,6 @@ def train(args):
for m in training_models: for m in training_models:
m.train() m.train()
loss_recorder = train_util.LossRecorder()
for step, batch in enumerate(train_dataloader): for step, batch in enumerate(train_dataloader):
current_step.value = global_step current_step.value = global_step
with accelerator.accumulate(training_models[0]): # 複数モデルに対応していない模様だがとりあえずこうしておく with accelerator.accumulate(training_models[0]): # 複数モデルに対応していない模様だがとりあえずこうしておく

View File

@@ -452,6 +452,7 @@ def train(args):
init_kwargs = toml.load(args.log_tracker_config) init_kwargs = toml.load(args.log_tracker_config)
accelerator.init_trackers("finetuning" if args.log_tracker_name is None else args.log_tracker_name, init_kwargs=init_kwargs) accelerator.init_trackers("finetuning" if args.log_tracker_name is None else args.log_tracker_name, init_kwargs=init_kwargs)
loss_recorder = train_util.LossRecorder()
for epoch in range(num_train_epochs): for epoch in range(num_train_epochs):
accelerator.print(f"\nepoch {epoch+1}/{num_train_epochs}") accelerator.print(f"\nepoch {epoch+1}/{num_train_epochs}")
current_epoch.value = epoch + 1 current_epoch.value = epoch + 1
@@ -459,7 +460,6 @@ def train(args):
for m in training_models: for m in training_models:
m.train() m.train()
loss_recorder = train_util.LossRecorder()
for step, batch in enumerate(train_dataloader): for step, batch in enumerate(train_dataloader):
current_step.value = global_step current_step.value = global_step
with accelerator.accumulate(training_models[0]): # 複数モデルに対応していない模様だがとりあえずこうしておく with accelerator.accumulate(training_models[0]): # 複数モデルに対応していない模様だがとりあえずこうしておく