Merge branch 'dev' into sd3

2026-04-06 13:47:06 +00:00 · 2024-10-25 19:03:27 +09:00
parent 138dac4aea b1e6504007
commit 5fba6f514a
13 changed files with 16 additions and 18 deletions
--- a/README.md
+++ b/README.md
@@ -779,6 +779,9 @@ The majority of scripts is licensed under ASL 2.0 (including codes from Diffuser
  - bitsandbytes, transformers, accelerate and huggingface_hub are updated. 
  - If you encounter any issues, please report them.

+- Fixed a bug where the loss weight was incorrect when `--debiased_estimation_loss` was specified with `--v_parameterization`. PR [#1715](https://github.com/kohya-ss/sd-scripts/pull/1715) Thanks to catboxanon! See [the PR](https://github.com/kohya-ss/sd-scripts/pull/1715) for details.
+  - Removed the warning when `--v_parameterization` is specified in SDXL and SD1.5. PR [#1717](https://github.com/kohya-ss/sd-scripts/pull/1717)
+
 - There was a bug where the min_bucket_reso/max_bucket_reso in the dataset configuration did not create the correct resolution bucket if it was not divisible by bucket_reso_steps. These values are now warned and automatically rounded to a divisible value. Thanks to Maru-mee for raising the issue. Related PR [#1632](https://github.com/kohya-ss/sd-scripts/pull/1632)

 - `bitsandbytes` is updated to 0.44.0. Now you can use `AdEMAMix8bit` and `PagedAdEMAMix8bit` in the training script. PR [#1640](https://github.com/kohya-ss/sd-scripts/pull/1640) Thanks to sdbds!
--- a/fine_tune.py
+++ b/fine_tune.py
@@ -406,7 +406,7 @@ def train(args):
                    if args.scale_v_pred_loss_like_noise_pred:
                        loss = scale_v_prediction_loss_like_noise_prediction(loss, timesteps, noise_scheduler)
                    if args.debiased_estimation_loss:
-                        loss = apply_debiased_estimation(loss, timesteps, noise_scheduler)
+                        loss = apply_debiased_estimation(loss, timesteps, noise_scheduler, args.v_parameterization)

                    loss = loss.mean()  # mean over batch dimension
                else:
--- a/gen_img.py
+++ b/gen_img.py
@@ -1533,8 +1533,6 @@ def main(args):
    highres_fix = args.highres_fix_scale is not None
    # assert not highres_fix or args.image_path is None, f"highres_fix doesn't work with img2img / highres_fixはimg2imgと同時に使えません"

-    if args.v_parameterization and not args.v2:
-        logger.warning("v_parameterization should be with v2 / v1でv_parameterizationを使用することは想定されていません")
    if args.v2 and args.clip_skip is not None:
        logger.warning("v2 with clip_skip will be unexpected / v2でclip_skipを使用することは想定されていません")

--- a/gen_img_diffusers.py
+++ b/gen_img_diffusers.py
@@ -2216,8 +2216,6 @@ def main(args):
    highres_fix = args.highres_fix_scale is not None
    # assert not highres_fix or args.image_path is None, f"highres_fix doesn't work with img2img / highres_fixはimg2imgと同時に使えません"

-    if args.v_parameterization and not args.v2:
-        logger.warning("v_parameterization should be with v2 / v1でv_parameterizationを使用することは想定されていません")
    if args.v2 and args.clip_skip is not None:
        logger.warning("v2 with clip_skip will be unexpected / v2でclip_skipを使用することは想定されていません")

--- a/library/custom_train_functions.py
+++ b/library/custom_train_functions.py
@@ -96,9 +96,12 @@ def add_v_prediction_like_loss(loss, timesteps, noise_scheduler, v_pred_like_los
    return loss


-def apply_debiased_estimation(loss, timesteps, noise_scheduler):
+def apply_debiased_estimation(loss, timesteps, noise_scheduler, v_prediction=False):
    snr_t = torch.stack([noise_scheduler.all_snr[t] for t in timesteps])  # batch_size
    snr_t = torch.minimum(snr_t, torch.ones_like(snr_t) * 1000)  # if timestep is 0, snr_t is inf, so limit it to 1000
+    if v_prediction:
+        weight = 1 / (snr_t + 1)
+    else:
        weight = 1 / torch.sqrt(snr_t)
    loss = weight * loss
    return loss
--- a/library/train_util.py
+++ b/library/train_util.py
@@ -4115,10 +4115,6 @@ def verify_training_args(args: argparse.Namespace):
    """
    enable_high_vram(args)

-    if args.v_parameterization and not args.v2:
-        logger.warning(
-            "v_parameterization should be with v2 not v1 or sdxl / v1やsdxlでv_parameterizationを使用することは想定されていません"
-        )
    if args.v2 and args.clip_skip is not None:
        logger.warning("v2 with clip_skip will be unexpected / v2でclip_skipを使用することは想定されていません")

--- a/sdxl_train.py
+++ b/sdxl_train.py
@@ -733,7 +733,7 @@ def train(args):
                    if args.v_pred_like_loss:
                        loss = add_v_prediction_like_loss(loss, timesteps, noise_scheduler, args.v_pred_like_loss)
                    if args.debiased_estimation_loss:
-                        loss = apply_debiased_estimation(loss, timesteps, noise_scheduler)
+                        loss = apply_debiased_estimation(loss, timesteps, noise_scheduler, args.v_parameterization)

                    loss = loss.mean()  # mean over batch dimension
                else:
--- a/sdxl_train_control_net_lllite.py
+++ b/sdxl_train_control_net_lllite.py
@@ -499,7 +499,7 @@ def train(args):
                if args.v_pred_like_loss:
                    loss = add_v_prediction_like_loss(loss, timesteps, noise_scheduler, args.v_pred_like_loss)
                if args.debiased_estimation_loss:
-                    loss = apply_debiased_estimation(loss, timesteps, noise_scheduler)
+                    loss = apply_debiased_estimation(loss, timesteps, noise_scheduler, args.v_parameterization)

                loss = loss.mean()  # 平均なのでbatch_sizeで割る必要なし

--- a/sdxl_train_control_net_lllite_old.py
+++ b/sdxl_train_control_net_lllite_old.py
@@ -439,7 +439,7 @@ def train(args):
                if args.v_pred_like_loss:
                    loss = add_v_prediction_like_loss(loss, timesteps, noise_scheduler, args.v_pred_like_loss)
                if args.debiased_estimation_loss:
-                    loss = apply_debiased_estimation(loss, timesteps, noise_scheduler)
+                    loss = apply_debiased_estimation(loss, timesteps, noise_scheduler, args.v_parameterization)

                loss = loss.mean()  # 平均なのでbatch_sizeで割る必要なし

--- a/train_db.py
+++ b/train_db.py
@@ -399,7 +399,7 @@ def train(args):
                if args.scale_v_pred_loss_like_noise_pred:
                    loss = scale_v_prediction_loss_like_noise_prediction(loss, timesteps, noise_scheduler)
                if args.debiased_estimation_loss:
-                    loss = apply_debiased_estimation(loss, timesteps, noise_scheduler)
+                    loss = apply_debiased_estimation(loss, timesteps, noise_scheduler, args.v_parameterization)

                loss = loss.mean()  # 平均なのでbatch_sizeで割る必要なし

--- a/train_network.py
+++ b/train_network.py
@@ -252,7 +252,7 @@ class NetworkTrainer:
        if args.v_pred_like_loss:
            loss = add_v_prediction_like_loss(loss, timesteps, noise_scheduler, args.v_pred_like_loss)
        if args.debiased_estimation_loss:
-            loss = apply_debiased_estimation(loss, timesteps, noise_scheduler)
+            loss = apply_debiased_estimation(loss, timesteps, noise_scheduler, args.v_parameterization)
        return loss

    def get_sai_model_spec(self, args):
--- a/train_textual_inversion.py
+++ b/train_textual_inversion.py
@@ -618,7 +618,7 @@ class TextualInversionTrainer:
                    if args.v_pred_like_loss:
                        loss = add_v_prediction_like_loss(loss, timesteps, noise_scheduler, args.v_pred_like_loss)
                    if args.debiased_estimation_loss:
-                        loss = apply_debiased_estimation(loss, timesteps, noise_scheduler)
+                        loss = apply_debiased_estimation(loss, timesteps, noise_scheduler, args.v_parameterization)

                    loss = loss.mean()  # 平均なのでbatch_sizeで割る必要なし

--- a/train_textual_inversion_XTI.py
+++ b/train_textual_inversion_XTI.py
@@ -486,7 +486,7 @@ def train(args):
                if args.scale_v_pred_loss_like_noise_pred:
                    loss = scale_v_prediction_loss_like_noise_prediction(loss, timesteps, noise_scheduler)
                if args.debiased_estimation_loss:
-                    loss = apply_debiased_estimation(loss, timesteps, noise_scheduler)
+                    loss = apply_debiased_estimation(loss, timesteps, noise_scheduler, args.v_parameterization)

                loss = loss.mean()  # 平均なのでbatch_sizeで割る必要なし