mirror of
https://github.com/kohya-ss/sd-scripts.git
synced 2026-04-06 13:47:06 +00:00
make transform_DDP more intuitive
This commit is contained in:
@@ -229,7 +229,7 @@ def train(args):
|
||||
unet, optimizer, train_dataloader, lr_scheduler = accelerator.prepare(unet, optimizer, train_dataloader, lr_scheduler)
|
||||
|
||||
# transform DDP after prepare
|
||||
text_encoder, unet, _ = train_util.transform_DDP(text_encoder, unet)
|
||||
text_encoder, unet = train_util.transform_if_model_is_DDP(text_encoder, unet)
|
||||
|
||||
# 実験的機能:勾配も含めたfp16学習を行う PyTorchにパッチを当ててfp16でのgrad scaleを有効にする
|
||||
if args.full_fp16:
|
||||
|
||||
@@ -2897,9 +2897,9 @@ def _load_target_model(args: argparse.Namespace, weight_dtype, device="cpu"):
|
||||
return text_encoder, vae, unet, load_stable_diffusion_format
|
||||
|
||||
|
||||
def transform_DDP(text_encoder, unet, network=None):
|
||||
def transform_if_model_is_DDP(text_encoder, unet, network=None):
|
||||
# Transform text_encoder, unet and network from DistributedDataParallel
|
||||
return (encoder.module if type(encoder) == DDP else encoder for encoder in [text_encoder, unet, network])
|
||||
return (model.module if type(model) == DDP else model for model in [text_encoder, unet, network] if model is not None)
|
||||
|
||||
|
||||
def load_target_model(args, weight_dtype, accelerator):
|
||||
@@ -2922,7 +2922,7 @@ def load_target_model(args, weight_dtype, accelerator):
|
||||
torch.cuda.empty_cache()
|
||||
accelerator.wait_for_everyone()
|
||||
|
||||
text_encoder, unet, _ = transform_DDP(text_encoder, unet, network=None)
|
||||
text_encoder, unet = transform_if_model_is_DDP(text_encoder, unet)
|
||||
|
||||
return text_encoder, vae, unet, load_stable_diffusion_format
|
||||
|
||||
|
||||
@@ -197,7 +197,7 @@ def train(args):
|
||||
unet, optimizer, train_dataloader, lr_scheduler = accelerator.prepare(unet, optimizer, train_dataloader, lr_scheduler)
|
||||
|
||||
# transform DDP after prepare
|
||||
text_encoder, unet, _ = train_util.transform_DDP(text_encoder, unet)
|
||||
text_encoder, unet = train_util.transform_if_model_is_DDP(text_encoder, unet)
|
||||
|
||||
if not train_text_encoder:
|
||||
text_encoder.to(accelerator.device, dtype=weight_dtype) # to avoid 'cpu' vs 'cuda' error
|
||||
|
||||
@@ -262,7 +262,7 @@ def train(args):
|
||||
network, optimizer, train_dataloader, lr_scheduler = accelerator.prepare(network, optimizer, train_dataloader, lr_scheduler)
|
||||
|
||||
# transform DDP after prepare (train_network here only)
|
||||
text_encoder, unet, network = train_util.transform_DDP(text_encoder, unet, network)
|
||||
text_encoder, unet, network = train_util.transform_if_model_is_DDP(text_encoder, unet, network)
|
||||
|
||||
unet.requires_grad_(False)
|
||||
unet.to(accelerator.device, dtype=weight_dtype)
|
||||
|
||||
@@ -281,7 +281,7 @@ def train(args):
|
||||
)
|
||||
|
||||
# transform DDP after prepare
|
||||
text_encoder, unet, _ = train_util.transform_DDP(text_encoder, unet)
|
||||
text_encoder, unet = train_util.transform_if_model_is_DDP(text_encoder, unet)
|
||||
|
||||
index_no_updates = torch.arange(len(tokenizer)) < token_ids[0]
|
||||
# print(len(index_no_updates), torch.sum(index_no_updates))
|
||||
|
||||
@@ -315,7 +315,7 @@ def train(args):
|
||||
)
|
||||
|
||||
# transform DDP after prepare
|
||||
text_encoder, unet, _ = train_util.transform_DDP(text_encoder, unet)
|
||||
text_encoder, unet = train_util.transform_if_model_is_DDP(text_encoder, unet)
|
||||
|
||||
index_no_updates = torch.arange(len(tokenizer)) < token_ids_XTI[0]
|
||||
# print(len(index_no_updates), torch.sum(index_no_updates))
|
||||
|
||||
Reference in New Issue
Block a user