Merge branch 'dev' into deep-speed

2026-04-06 13:47:06 +00:00 · 2024-03-17 19:30:42 +09:00
parent 97524f1bda 443f02942c
commit 86e40fabbc
2 changed files with 15 additions and 3 deletions
--- a/README.md
+++ b/README.md
@@ -355,6 +355,16 @@ It becomes `1girl, hatsune miku, vocaloid, microphone, stage, white shirt, best
 `1girl, hatsune miku, vocaloid, microphone, stage, white shirt, best quality, rating: general` や `1girl, hatsune miku, vocaloid, white shirt, smile, stage, microphone, best quality, rating: general` などになります。


+### Mar 15, 2024 / 2024/3/15: v0.8.5
+
+- Fixed a bug that the value of timestep embedding during SDXL training was incorrect.
+  - The inference with the generation script is also fixed.
+  - The impact is unknown, but please update for SDXL training.
+
+- SDXL 学習時の timestep embedding の値が誤っていたのを修正しました。
+  - 生成スクリプトでの推論時についてもあわせて修正しました。
+  - 影響の度合いは不明ですが、SDXL の学習時にはアップデートをお願いいたします。
+
 ### Feb 24, 2024 / 2024/2/24: v0.8.4

 - The log output has been improved. PR [#905](https://github.com/kohya-ss/sd-scripts/pull/905) Thanks to shirayu!
--- a/library/sdxl_original_unet.py
+++ b/library/sdxl_original_unet.py
@@ -31,8 +31,10 @@ from torch import nn
 from torch.nn import functional as F
 from einops import rearrange
 from .utils import setup_logging
+
 setup_logging()
 import logging
+
 logger = logging.getLogger(__name__)

 IN_CHANNELS: int = 4
@@ -1074,7 +1076,7 @@ class SdxlUNet2DConditionModel(nn.Module):
        timesteps = timesteps.expand(x.shape[0])

        hs = []
-        t_emb = get_timestep_embedding(timesteps, self.model_channels)  # , repeat_only=False)
+        t_emb = get_timestep_embedding(timesteps, self.model_channels, downscale_freq_shift=0)  # , repeat_only=False)
        t_emb = t_emb.to(x.dtype)
        emb = self.time_embed(t_emb)

@@ -1132,7 +1134,7 @@ class InferSdxlUNet2DConditionModel:
    # call original model's methods
    def __getattr__(self, name):
        return getattr(self.delegate, name)
-    
+
    def __call__(self, *args, **kwargs):
        return self.delegate(*args, **kwargs)

@@ -1164,7 +1166,7 @@ class InferSdxlUNet2DConditionModel:
        timesteps = timesteps.expand(x.shape[0])

        hs = []
-        t_emb = get_timestep_embedding(timesteps, _self.model_channels)  # , repeat_only=False)
+        t_emb = get_timestep_embedding(timesteps, _self.model_channels, downscale_freq_shift=0)  # , repeat_only=False)
        t_emb = t_emb.to(x.dtype)
        emb = _self.time_embed(t_emb)