fix to work with torch 2.0

2026-04-06 13:47:06 +00:00 · 2024-01-20 10:00:30 +09:00
parent 9cfa68c92f
commit a7ef6422b6
1 changed files with 16 additions and 12 deletions
--- a/train_network.py
+++ b/train_network.py
@@ -393,11 +393,9 @@ class NetworkTrainer:
        unet_weight_dtype = te_weight_dtype = weight_dtype
        # Experimental Feature: Put base model into fp8 to save vram
        if args.fp8_base:
+            assert torch.__version__ >= "2.1.0", "fp8_base requires torch>=2.1.0 / fp8を使う場合はtorch>=2.1.0が必要です。"
            assert (
-                torch.__version__ >= '2.1.0'
-            ), "fp8_base requires torch>=2.1.0 / fp8を使う場合はtorch>=2.1.0が必要です。"
-            assert (
-                args.mixed_precision != 'no'
+                args.mixed_precision != "no"
            ), "fp8_base requires mixed precision='fp16' or 'bf16' / fp8を使う場合はmixed_precision='fp16'または'bf16'が必要です。"
            accelerator.print("enable fp8 training.")
            unet_weight_dtype = torch.float8_e4m3fn
@@ -407,13 +405,12 @@ class NetworkTrainer:
        unet.to(dtype=unet_weight_dtype)
        for t_enc in text_encoders:
            t_enc.requires_grad_(False)
-            t_enc.to(dtype=te_weight_dtype)
-            # nn.Embedding not support FP8
-            t_enc.text_model.embeddings.to(dtype=(
-                weight_dtype 
-                if te_weight_dtype == torch.float8_e4m3fn 
-                else te_weight_dtype
-            ))
+
+            # in case of cpu, dtype is already set to fp32 because cpu does not support fp8/fp16/bf16
+            if t_enc.device.type != "cpu":
+                t_enc.to(dtype=te_weight_dtype)
+                # nn.Embedding not support FP8
+                t_enc.text_model.embeddings.to(dtype=(weight_dtype if te_weight_dtype != weight_dtype else te_weight_dtype))

        # acceleratorがなんかよろしくやってくれるらしい / accelerator will do something good
        if train_unet:
@@ -805,7 +802,14 @@ class NetworkTrainer:
                    # Predict the noise residual
                    with accelerator.autocast():
                        noise_pred = self.call_unet(
-                            args, accelerator, unet, noisy_latents.requires_grad_(train_unet), timesteps, text_encoder_conds, batch, weight_dtype
+                            args,
+                            accelerator,
+                            unet,
+                            noisy_latents.requires_grad_(train_unet),
+                            timesteps,
+                            text_encoder_conds,
+                            batch,
+                            weight_dtype,
                        )

                    if args.v_parameterization: