make transform_DDP more intuitive

This commit is contained in:
ykume
2023-05-03 11:07:29 +09:00
parent e1143caf38
commit 2fcbfec178
6 changed files with 8 additions and 8 deletions

View File

@@ -229,7 +229,7 @@ def train(args):
unet, optimizer, train_dataloader, lr_scheduler = accelerator.prepare(unet, optimizer, train_dataloader, lr_scheduler)
# transform DDP after prepare
text_encoder, unet, _ = train_util.transform_DDP(text_encoder, unet)
text_encoder, unet = train_util.transform_if_model_is_DDP(text_encoder, unet)
# 実験的機能勾配も含めたfp16学習を行う PyTorchにパッチを当ててfp16でのgrad scaleを有効にする
if args.full_fp16:

View File

@@ -2897,9 +2897,9 @@ def _load_target_model(args: argparse.Namespace, weight_dtype, device="cpu"):
return text_encoder, vae, unet, load_stable_diffusion_format
def transform_DDP(text_encoder, unet, network=None):
def transform_if_model_is_DDP(text_encoder, unet, network=None):
# Transform text_encoder, unet and network from DistributedDataParallel
return (encoder.module if type(encoder) == DDP else encoder for encoder in [text_encoder, unet, network])
return (model.module if type(model) == DDP else model for model in [text_encoder, unet, network] if model is not None)
def load_target_model(args, weight_dtype, accelerator):
@@ -2922,7 +2922,7 @@ def load_target_model(args, weight_dtype, accelerator):
torch.cuda.empty_cache()
accelerator.wait_for_everyone()
text_encoder, unet, _ = transform_DDP(text_encoder, unet, network=None)
text_encoder, unet = transform_if_model_is_DDP(text_encoder, unet)
return text_encoder, vae, unet, load_stable_diffusion_format

View File

@@ -197,7 +197,7 @@ def train(args):
unet, optimizer, train_dataloader, lr_scheduler = accelerator.prepare(unet, optimizer, train_dataloader, lr_scheduler)
# transform DDP after prepare
text_encoder, unet, _ = train_util.transform_DDP(text_encoder, unet)
text_encoder, unet = train_util.transform_if_model_is_DDP(text_encoder, unet)
if not train_text_encoder:
text_encoder.to(accelerator.device, dtype=weight_dtype) # to avoid 'cpu' vs 'cuda' error

View File

@@ -262,7 +262,7 @@ def train(args):
network, optimizer, train_dataloader, lr_scheduler = accelerator.prepare(network, optimizer, train_dataloader, lr_scheduler)
# transform DDP after prepare (train_network here only)
text_encoder, unet, network = train_util.transform_DDP(text_encoder, unet, network)
text_encoder, unet, network = train_util.transform_if_model_is_DDP(text_encoder, unet, network)
unet.requires_grad_(False)
unet.to(accelerator.device, dtype=weight_dtype)

View File

@@ -281,7 +281,7 @@ def train(args):
)
# transform DDP after prepare
text_encoder, unet, _ = train_util.transform_DDP(text_encoder, unet)
text_encoder, unet = train_util.transform_if_model_is_DDP(text_encoder, unet)
index_no_updates = torch.arange(len(tokenizer)) < token_ids[0]
# print(len(index_no_updates), torch.sum(index_no_updates))

View File

@@ -315,7 +315,7 @@ def train(args):
)
# transform DDP after prepare
text_encoder, unet, _ = train_util.transform_DDP(text_encoder, unet)
text_encoder, unet = train_util.transform_if_model_is_DDP(text_encoder, unet)
index_no_updates = torch.arange(len(tokenizer)) < token_ids_XTI[0]
# print(len(index_no_updates), torch.sum(index_no_updates))