diff --git a/library/train_util.py b/library/train_util.py index b65f06b9..672aa597 100644 --- a/library/train_util.py +++ b/library/train_util.py @@ -6253,10 +6253,14 @@ def append_lr_to_logs_with_names(logs, lr_scheduler, optimizer_type, names): name = names[lr_index] logs["lr/" + name] = float(lrs[lr_index]) - if optimizer_type.lower().startswith("DAdapt".lower()) or optimizer_type.lower() == "Prodigy".lower(): + if optimizer_type.lower().startswith("DAdapt".lower()) or optimizer_type.lower().startswith("Prodigy".lower()): logs["lr/d*lr/" + name] = ( lr_scheduler.optimizers[-1].param_groups[lr_index]["d"] * lr_scheduler.optimizers[-1].param_groups[lr_index]["lr"] ) + if "effective_lr" in lr_scheduler.optimizers[-1].param_groups[lr_index]: + logs["lr/d*eff_lr/" + name] = ( + lr_scheduler.optimizers[-1].param_groups[lr_index]["d"] * lr_scheduler.optimizers[-1].param_groups[lr_index]["effective_lr"] + ) # scheduler: diff --git a/train_network.py b/train_network.py index 2ee671e9..6b8ed9bd 100644 --- a/train_network.py +++ b/train_network.py @@ -90,40 +90,23 @@ class NetworkTrainer: if lr_descriptions is not None: lr_desc = lr_descriptions[i] else: - idx = i - (0 if args.network_train_unet_only else -1) + idx = i - (0 if args.network_train_unet_only else 1) if idx == -1: lr_desc = "textencoder" else: if len(lrs) > 2: - lr_desc = f"group{idx}" + lr_desc = f"group{i}" else: lr_desc = "unet" logs[f"lr/{lr_desc}"] = lr - if args.optimizer_type.lower().startswith("DAdapt".lower()) or args.optimizer_type.lower() == "Prodigy".lower(): - # tracking d*lr value - logs[f"lr/d*lr/{lr_desc}"] = ( - lr_scheduler.optimizers[-1].param_groups[i]["d"] * lr_scheduler.optimizers[-1].param_groups[i]["lr"] - ) - if ( - args.optimizer_type.lower().endswith("ProdigyPlusScheduleFree".lower()) and optimizer is not None - ): # tracking d*lr value of unet. - logs["lr/d*lr"] = optimizer.param_groups[0]["d"] * optimizer.param_groups[0]["lr"] - else: - idx = 0 - if not args.network_train_unet_only: - logs["lr/textencoder"] = float(lrs[0]) - idx = 1 - - for i in range(idx, len(lrs)): - logs[f"lr/group{i}"] = float(lrs[i]) - if args.optimizer_type.lower().startswith("DAdapt".lower()) or args.optimizer_type.lower() == "Prodigy".lower(): - logs[f"lr/d*lr/group{i}"] = ( - lr_scheduler.optimizers[-1].param_groups[i]["d"] * lr_scheduler.optimizers[-1].param_groups[i]["lr"] - ) - if args.optimizer_type.lower().endswith("ProdigyPlusScheduleFree".lower()) and optimizer is not None: - logs[f"lr/d*lr/group{i}"] = optimizer.param_groups[i]["d"] * optimizer.param_groups[i]["lr"] + if args.optimizer_type.lower().startswith("DAdapt".lower()) or args.optimizer_type.lower().startswith("Prodigy".lower()): + opt = lr_scheduler.optimizers[-1] if hasattr(lr_scheduler, "optimizers") else optimizer + if opt is not None: + logs[f"lr/d*lr/{lr_desc}"] = opt.param_groups[i]["d"] * opt.param_groups[i]["lr"] + if "effective_lr" in opt.param_groups[i]: + logs[f"lr/d*eff_lr/{lr_desc}"] = opt.param_groups[i]["d"] * opt.param_groups[i]["effective_lr"] return logs