From dfe08f395fb7860e5b888f37231ad8564852b95a Mon Sep 17 00:00:00 2001 From: BootsofLagrangian <125134079+BootsofLagrangian@users.noreply.github.com> Date: Sun, 4 Feb 2024 03:12:42 +0900 Subject: [PATCH 01/69] support deepspeed --- fine_tune.py | 41 ++++++++++++++++++++------- library/train_util.py | 54 +++++++++++++++++++++++++++++++++++ sdxl_train.py | 45 ++++++++++++++++++++--------- train_db.py | 39 +++++++++++++++++++------ train_network.py | 66 +++++++++++++++++++++++++++++++------------ 5 files changed, 195 insertions(+), 50 deletions(-) diff --git a/fine_tune.py b/fine_tune.py index 982dc8ae..78dfd169 100644 --- a/fine_tune.py +++ b/fine_tune.py @@ -102,6 +102,7 @@ def train(args): # mixed precisionに対応した型を用意しておき適宜castする weight_dtype, save_dtype = train_util.prepare_dtype(args) + vae_dtype = torch.float32 if args.no_half_vae else weight_dtype # モデルを読み込む text_encoder, vae, unet, load_stable_diffusion_format = train_util.load_target_model(args, weight_dtype, accelerator) @@ -152,7 +153,7 @@ def train(args): # 学習を準備する if cache_latents: - vae.to(accelerator.device, dtype=weight_dtype) + vae.to(accelerator.device, dtype=vae_dtype) vae.requires_grad_(False) vae.eval() with torch.no_grad(): @@ -187,7 +188,7 @@ def train(args): if not cache_latents: vae.requires_grad_(False) vae.eval() - vae.to(accelerator.device, dtype=weight_dtype) + vae.to(accelerator.device, dtype=vae_dtype) for m in training_models: m.requires_grad_(True) @@ -214,7 +215,7 @@ def train(args): batch_size=1, shuffle=True, collate_fn=collator, - num_workers=n_workers, + num_workers=n_workers if not args.deepspeed else 1, # To avoid RuntimeError: DataLoader worker exited unexpectedly with exit code 1. persistent_workers=args.persistent_data_loader_workers, ) @@ -240,13 +241,33 @@ def train(args): unet.to(weight_dtype) text_encoder.to(weight_dtype) - # acceleratorがなんかよろしくやってくれるらしい - if args.train_text_encoder: - unet, text_encoder, optimizer, train_dataloader, lr_scheduler = accelerator.prepare( - unet, text_encoder, optimizer, train_dataloader, lr_scheduler - ) - else: - unet, optimizer, train_dataloader, lr_scheduler = accelerator.prepare(unet, optimizer, train_dataloader, lr_scheduler) + if args.deepspeed: + # wrapping model + class DeepSpeedModel(torch.nn.Module): + def __init__(self, unet, text_encoder, vae) -> None: + super().__init__() + self.unet = unet + self.text_encoders = self.text_encoder = torch.nn.ModuleList(text_encoder) + self.vae = vae + def get_models(self): + return self.unet, self.text_encoders, self.vae + + unet.to(accelerator.device, dtype=weight_dtype) + [t_enc.to(accelerator.device, dtype=weight_dtype) for t_enc in text_encoders] + ds_model = DeepSpeedModel(unet, text_encoders, vae) + ds_model, optimizer, train_dataloader, lr_scheduler = accelerator.prepare(ds_model, optimizer, train_dataloader, lr_scheduler) + # Now, ds_model is an instance of DeepSpeedEngine. + unet, text_encoders, vae = ds_model.get_models() # for compatiblility + vae.to(vae_dtype) + text_encoder = text_encoders + + else: # acceleratorがなんかよろしくやってくれるらしい + if args.train_text_encoder: + unet, text_encoder, optimizer, train_dataloader, lr_scheduler = accelerator.prepare( + unet, text_encoder, optimizer, train_dataloader, lr_scheduler + ) + else: + unet, optimizer, train_dataloader, lr_scheduler = accelerator.prepare(unet, optimizer, train_dataloader, lr_scheduler) # 実験的機能:勾配も含めたfp16学習を行う PyTorchにパッチを当ててfp16でのgrad scaleを有効にする if args.full_fp16: diff --git a/library/train_util.py b/library/train_util.py index ba428e50..2d85c977 100644 --- a/library/train_util.py +++ b/library/train_util.py @@ -20,6 +20,7 @@ from typing import ( Union, ) from accelerate import Accelerator, InitProcessGroupKwargs, DistributedDataParallelKwargs +from accelerate import DeepSpeedPlugin import gc import glob import math @@ -3124,6 +3125,47 @@ def add_training_arguments(parser: argparse.ArgumentParser, support_dreambooth: "--prior_loss_weight", type=float, default=1.0, help="loss weight for regularization images / 正則化画像のlossの重み" ) + # DeepSpeed Arguments. https://huggingface.co/docs/accelerate/usage_guides/deepspeed + parser.add_argument("--deepspeed", action="store_true", help="enable deepspeed training") + parser.add_argument( + "--zero_stage", + type=int, default=2, + choices=[0, 1, 2, 3], + help="Possible options are 0,1,2,3." + ) + parser.add_argument( + "--offload_optimizer", + type=str, default=None, + choices=[None, "cpu", "nvme"], + help="Possible options are none|cpu|nvme. Only applicable with ZeRO Stages 2 and 3." + ) + parser.add_argument( + "--offload_optimizer_nvme_path", + type=str, default=None, + help="Possible options are /nvme|/local_nvme. Only applicable with ZeRO Stage 3." + ) + parser.add_argument( + "--offload_param_device", + type=str, default=None, + choices=[None, "cpu", "nvme"], + help="Possible options are none|cpu|nvme. Only applicable with ZeRO Stage 3." + ) + parser.add_argument( + "--offload_param_nvme_path", + type=str, default=None, + help="Possible options are /nvme|/local_nvme. Only applicable with ZeRO Stage 3." + ) + parser.add_argument( + "--zero3_init_flag", + action="store_true", + help="Flag to indicate whether to enable `deepspeed.zero.Init` for constructing massive models." + "Only applicable with ZeRO Stage-3." + ) + parser.add_argument( + "--zero3_save_16bit_model", + action="store_true", + help="Flag to indicate whether to save 16-bit model. Only applicable with ZeRO Stage-3." + ) def verify_training_args(args: argparse.Namespace): if args.v_parameterization and not args.v2: @@ -3912,6 +3954,17 @@ def prepare_accelerator(args: argparse.Namespace): else None, ) kwargs_handlers = list(filter(lambda x: x is not None, kwargs_handlers)) + deepspeed_plugin = None + if args.deepspeed: + deepspeed_plugin = DeepSpeedPlugin( + zero_stage=args.zero_stage, + gradient_accumulation_steps=args.gradient_accumulation_steps, gradient_clipping=args.max_grad_norm, + offload_optimizer=args.offload_optimizer, offload_optimizer_nvme_path=args.offload_optimizer_nvme_path, + offload_param_device=args.offload_param_device, offload_param_nvme_path=args.offload_param_nvme_path, + zero3_init_flag=args.zero3_init_flag, zero3_save_16bit_model=args.zero3_save_16bit_model, + ) + deepspeed_plugin.deepspeed_config['train_micro_batch_size_per_gpu'] = args.train_batch_size + accelerator = Accelerator( gradient_accumulation_steps=args.gradient_accumulation_steps, mixed_precision=args.mixed_precision, @@ -3919,6 +3972,7 @@ def prepare_accelerator(args: argparse.Namespace): project_dir=logging_dir, kwargs_handlers=kwargs_handlers, dynamo_backend=dynamo_backend, + deepspeed_plugin=deepspeed_plugin, ) return accelerator diff --git a/sdxl_train.py b/sdxl_train.py index a3f6f3a1..6ce6c201 100644 --- a/sdxl_train.py +++ b/sdxl_train.py @@ -354,7 +354,7 @@ def train(args): batch_size=1, shuffle=True, collate_fn=collator, - num_workers=n_workers, + num_workers=n_workers if not args.deepspeed else 1, # To avoid RuntimeError: DataLoader worker exited unexpectedly with exit code 1. persistent_workers=args.persistent_data_loader_workers, ) @@ -389,18 +389,37 @@ def train(args): text_encoder1.to(weight_dtype) text_encoder2.to(weight_dtype) - # acceleratorがなんかよろしくやってくれるらしい - if train_unet: - unet = accelerator.prepare(unet) - if train_text_encoder1: - # freeze last layer and final_layer_norm in te1 since we use the output of the penultimate layer - text_encoder1.text_model.encoder.layers[-1].requires_grad_(False) - text_encoder1.text_model.final_layer_norm.requires_grad_(False) - text_encoder1 = accelerator.prepare(text_encoder1) - if train_text_encoder2: - text_encoder2 = accelerator.prepare(text_encoder2) - - optimizer, train_dataloader, lr_scheduler = accelerator.prepare(optimizer, train_dataloader, lr_scheduler) + if args.deepspeed: + # Wrapping model for DeepSpeed + class DeepSpeedModel(torch.nn.Module): + def __init__(self, unet, text_encoder, vae) -> None: + super().__init__() + self.unet = unet + self.text_encoders = self.text_encoder = torch.nn.ModuleList(text_encoder) + self.vae = vae + + def get_models(self): + return self.unet, self.text_encoders, self.vae + text_encoders = [text_encoder1, text_encoder2] + unet.to(accelerator.device, dtype=weight_dtype) + [t_enc.to(accelerator.device, dtype=weight_dtype) for t_enc in text_encoders] + ds_model = DeepSpeedModel(unet, text_encoders, vae) + ds_model, optimizer, train_dataloader, lr_scheduler = accelerator.prepare(ds_model, optimizer, train_dataloader, lr_scheduler) + # Now, ds_model is an instance of DeepSpeedEngine. + unet, text_encoders, vae = ds_model.get_models() # for compatiblility + vae.to(vae_dtype) # to avoid explicitly half-vae + text_encoder1, text_encoder2 = text_encoders[0], text_encoders[1] + else: # acceleratorがなんかよろしくやってくれるらしい + if train_unet: + unet = accelerator.prepare(unet) + if train_text_encoder1: + # freeze last layer and final_layer_norm in te1 since we use the output of the penultimate layer + text_encoder1.text_model.encoder.layers[-1].requires_grad_(False) + text_encoder1.text_model.final_layer_norm.requires_grad_(False) + text_encoder1 = accelerator.prepare(text_encoder1) + if train_text_encoder2: + text_encoder2 = accelerator.prepare(text_encoder2) + optimizer, train_dataloader, lr_scheduler = accelerator.prepare(optimizer, train_dataloader, lr_scheduler) # TextEncoderの出力をキャッシュするときにはCPUへ移動する if args.cache_text_encoder_outputs: diff --git a/train_db.py b/train_db.py index 888cad25..d5f47a17 100644 --- a/train_db.py +++ b/train_db.py @@ -184,7 +184,7 @@ def train(args): batch_size=1, shuffle=True, collate_fn=collator, - num_workers=n_workers, + num_workers=n_workers if not args.deepspeed else 1, # To avoid RuntimeError: DataLoader worker exited unexpectedly with exit code 1. persistent_workers=args.persistent_data_loader_workers, ) @@ -214,15 +214,36 @@ def train(args): text_encoder.to(weight_dtype) # acceleratorがなんかよろしくやってくれるらしい - if train_text_encoder: - unet, text_encoder, optimizer, train_dataloader, lr_scheduler = accelerator.prepare( - unet, text_encoder, optimizer, train_dataloader, lr_scheduler - ) - else: - unet, optimizer, train_dataloader, lr_scheduler = accelerator.prepare(unet, optimizer, train_dataloader, lr_scheduler) + if args.deepspeed: + # wrapping model + class DeepSpeedModel(torch.nn.Module): + def __init__(self, unet, text_encoder, vae) -> None: + super().__init__() + self.unet = unet + self.text_encoders = self.text_encoder = torch.nn.ModuleList(text_encoder) + self.vae = vae + + def get_models(self): + return self.unet, self.text_encoders, self.vae - if not train_text_encoder: - text_encoder.to(accelerator.device, dtype=weight_dtype) # to avoid 'cpu' vs 'cuda' error + unet.to(accelerator.device, dtype=weight_dtype) + [t_enc.to(accelerator.device, dtype=weight_dtype) for t_enc in text_encoders] + ds_model = DeepSpeedModel(unet, text_encoders, vae) + ds_model, optimizer, train_dataloader, lr_scheduler = accelerator.prepare(ds_model, optimizer, train_dataloader, lr_scheduler) + # Now, ds_model is an instance of DeepSpeedEngine. + unet, text_encoders, vae = ds_model.get_models() # for compatiblility + vae.to(vae_dtype) # to avoid explicitly half-vae + text_encoder = text_encoders + else: + if train_text_encoder: + unet, text_encoder, optimizer, train_dataloader, lr_scheduler = accelerator.prepare( + unet, text_encoder, optimizer, train_dataloader, lr_scheduler + ) + else: + unet, optimizer, train_dataloader, lr_scheduler = accelerator.prepare(unet, optimizer, train_dataloader, lr_scheduler) + + if not train_text_encoder: + text_encoder.to(accelerator.device, dtype=weight_dtype) # to avoid 'cpu' vs 'cuda' error # 実験的機能:勾配も含めたfp16学習を行う PyTorchにパッチを当ててfp16でのgrad scaleを有効にする if args.full_fp16: diff --git a/train_network.py b/train_network.py index 8d102ae8..05dbe2de 100644 --- a/train_network.py +++ b/train_network.py @@ -353,18 +353,26 @@ class NetworkTrainer: batch_size=1, shuffle=True, collate_fn=collator, - num_workers=n_workers, + num_workers=n_workers if not args.deepspeed else 1, # To avoid RuntimeError: DataLoader worker exited unexpectedly with exit code 1. persistent_workers=args.persistent_data_loader_workers, ) # 学習ステップ数を計算する if args.max_train_epochs is not None: - args.max_train_steps = args.max_train_epochs * math.ceil( - len(train_dataloader) / accelerator.num_processes / args.gradient_accumulation_steps - ) - accelerator.print( - f"override steps. steps for {args.max_train_epochs} epochs is / 指定エポックまでのステップ数: {args.max_train_steps}" - ) + if args.deepspeed: + args.max_train_steps = args.max_train_epochs * math.ceil( + len(train_dataloader) / args.gradient_accumulation_steps + ) + accelerator.print( + f"override steps. steps for {args.max_train_epochs} epochs is / 指定エポックまでのステップ数: {args.max_train_steps}" + ) + else: + args.max_train_steps = args.max_train_epochs * math.ceil( + len(train_dataloader) / accelerator.num_processes / args.gradient_accumulation_steps + ) + accelerator.print( + f"override steps. steps for {args.max_train_epochs} epochs is / 指定エポックまでのステップ数: {args.max_train_steps}" + ) # データセット側にも学習ステップを送信 train_dataset_group.set_max_train_steps(args.max_train_steps) @@ -409,20 +417,42 @@ class NetworkTrainer: t_enc.text_model.embeddings.to(dtype=(weight_dtype if te_weight_dtype != weight_dtype else te_weight_dtype)) # acceleratorがなんかよろしくやってくれるらしい / accelerator will do something good - if train_unet: - unet = accelerator.prepare(unet) + if args.deepspeed: + # wrapping model + class DeepSpeedModel(torch.nn.Module): + def __init__(self, unet, text_encoder, vae, network) -> None: + super().__init__() + self.unet = unet + self.text_encoders = self.text_encoder = torch.nn.ModuleList(text_encoder) + self.vae = vae + self.network = network + + def get_models(self): + return self.unet, self.text_encoders, self.vae, self.network + + unet.to(accelerator.device, dtype=unet_weight_dtype) + [t_enc.to(accelerator.device, dtype=te_weight_dtype) for t_enc in text_encoders] + ds_model = DeepSpeedModel(unet, text_encoders, vae, network) + ds_model, optimizer, train_dataloader, lr_scheduler = accelerator.prepare(ds_model, optimizer, train_dataloader, lr_scheduler) + # Now, ds_model is an instance of DeepSpeedEngine. + unet, text_encoders, vae, network = ds_model.get_models() # for compatiblility + vae.to(vae_dtype) # to avoid explicitly half-vae + text_encoder = text_encoders else: - unet.to(accelerator.device, dtype=unet_weight_dtype) # move to device because unet is not prepared by accelerator - if train_text_encoder: - if len(text_encoders) > 1: - text_encoder = text_encoders = [accelerator.prepare(t_enc) for t_enc in text_encoders] + if train_unet: + unet = accelerator.prepare(unet) else: - text_encoder = accelerator.prepare(text_encoder) - text_encoders = [text_encoder] - else: - pass # if text_encoder is not trained, no need to prepare. and device and dtype are already set + unet.to(accelerator.device, dtype=unet_weight_dtype) # move to device because unet is not prepared by accelerator + if train_text_encoder: + if len(text_encoders) > 1: + text_encoder = text_encoders = [accelerator.prepare(t_enc) for t_enc in text_encoders] + else: + text_encoder = accelerator.prepare(text_encoder) + text_encoders = [text_encoder] + else: + pass # if text_encoder is not trained, no need to prepare. and device and dtype are already set - network, optimizer, train_dataloader, lr_scheduler = accelerator.prepare(network, optimizer, train_dataloader, lr_scheduler) + network, optimizer, train_dataloader, lr_scheduler = accelerator.prepare(network, optimizer, train_dataloader, lr_scheduler) if args.gradient_checkpointing: # according to TI example in Diffusers, train is required From 64873c1b4317afad99a1d397454ba0c64c6cb0b1 Mon Sep 17 00:00:00 2001 From: BootsofLagrangian Date: Mon, 5 Feb 2024 17:11:50 +0900 Subject: [PATCH 02/69] fix offload_optimizer_device typo --- library/train_util.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/library/train_util.py b/library/train_util.py index 2d85c977..933a34c4 100644 --- a/library/train_util.py +++ b/library/train_util.py @@ -3134,7 +3134,7 @@ def add_training_arguments(parser: argparse.ArgumentParser, support_dreambooth: help="Possible options are 0,1,2,3." ) parser.add_argument( - "--offload_optimizer", + "--offload_optimizer_device", type=str, default=None, choices=[None, "cpu", "nvme"], help="Possible options are none|cpu|nvme. Only applicable with ZeRO Stages 2 and 3." @@ -3959,7 +3959,7 @@ def prepare_accelerator(args: argparse.Namespace): deepspeed_plugin = DeepSpeedPlugin( zero_stage=args.zero_stage, gradient_accumulation_steps=args.gradient_accumulation_steps, gradient_clipping=args.max_grad_norm, - offload_optimizer=args.offload_optimizer, offload_optimizer_nvme_path=args.offload_optimizer_nvme_path, + offload_optimizer_device=args.offload_optimizer_device, offload_optimizer_nvme_path=args.offload_optimizer_nvme_path, offload_param_device=args.offload_param_device, offload_param_nvme_path=args.offload_param_nvme_path, zero3_init_flag=args.zero3_init_flag, zero3_save_16bit_model=args.zero3_save_16bit_model, ) From 2824312d5eb6df118d7585cde7e84d4cdae6f6c6 Mon Sep 17 00:00:00 2001 From: BootsofLagrangian Date: Mon, 5 Feb 2024 20:13:28 +0900 Subject: [PATCH 03/69] fix vae type error during training sdxl --- library/sdxl_train_util.py | 1 - library/train_util.py | 5 ----- sdxl_train.py | 25 +++++++++++-------------- 3 files changed, 11 insertions(+), 20 deletions(-) diff --git a/library/sdxl_train_util.py b/library/sdxl_train_util.py index 5ad748d1..ff7fef17 100644 --- a/library/sdxl_train_util.py +++ b/library/sdxl_train_util.py @@ -17,7 +17,6 @@ TOKENIZER2_PATH = "laion/CLIP-ViT-bigG-14-laion2B-39B-b160k" def load_target_model(args, accelerator, model_version: str, weight_dtype): - # load models for each process model_dtype = match_mixed_precision(args, weight_dtype) # prepare fp16/bf16 for pi in range(accelerator.state.num_processes): if pi == accelerator.state.local_process_index: diff --git a/library/train_util.py b/library/train_util.py index 933a34c4..a20edbe1 100644 --- a/library/train_util.py +++ b/library/train_util.py @@ -4042,28 +4042,23 @@ def _load_target_model(args: argparse.Namespace, weight_dtype, device="cpu", une def load_target_model(args, weight_dtype, accelerator, unet_use_linear_projection_in_v2=False): - # load models for each process for pi in range(accelerator.state.num_processes): if pi == accelerator.state.local_process_index: print(f"loading model for process {accelerator.state.local_process_index}/{accelerator.state.num_processes}") - text_encoder, vae, unet, load_stable_diffusion_format = _load_target_model( args, weight_dtype, accelerator.device if args.lowram else "cpu", unet_use_linear_projection_in_v2=unet_use_linear_projection_in_v2, ) - # work on low-ram device if args.lowram: text_encoder.to(accelerator.device) unet.to(accelerator.device) vae.to(accelerator.device) - gc.collect() torch.cuda.empty_cache() accelerator.wait_for_everyone() - return text_encoder, vae, unet, load_stable_diffusion_format diff --git a/sdxl_train.py b/sdxl_train.py index 6ce6c201..e8680828 100644 --- a/sdxl_train.py +++ b/sdxl_train.py @@ -392,23 +392,20 @@ def train(args): if args.deepspeed: # Wrapping model for DeepSpeed class DeepSpeedModel(torch.nn.Module): - def __init__(self, unet, text_encoder, vae) -> None: + def __init__(self, unet, text_encoder) -> None: super().__init__() self.unet = unet self.text_encoders = self.text_encoder = torch.nn.ModuleList(text_encoder) - self.vae = vae def get_models(self): - return self.unet, self.text_encoders, self.vae + return self.unet, self.text_encoders text_encoders = [text_encoder1, text_encoder2] - unet.to(accelerator.device, dtype=weight_dtype) - [t_enc.to(accelerator.device, dtype=weight_dtype) for t_enc in text_encoders] - ds_model = DeepSpeedModel(unet, text_encoders, vae) + ds_model = DeepSpeedModel(unet, text_encoders) ds_model, optimizer, train_dataloader, lr_scheduler = accelerator.prepare(ds_model, optimizer, train_dataloader, lr_scheduler) # Now, ds_model is an instance of DeepSpeedEngine. - unet, text_encoders, vae = ds_model.get_models() # for compatiblility - vae.to(vae_dtype) # to avoid explicitly half-vae - text_encoder1, text_encoder2 = text_encoders[0], text_encoders[1] + unet, text_encoders = ds_model.get_models() # for compatiblility + text_encoder1, text_encoder2 = text_encoder = text_encoders + training_models = [unet, text_encoder1, text_encoder2] else: # acceleratorがなんかよろしくやってくれるらしい if train_unet: unet = accelerator.prepare(unet) @@ -493,10 +490,10 @@ def train(args): for step, batch in enumerate(train_dataloader): current_step.value = global_step with accelerator.accumulate(*training_models): - if "latents" in batch and batch["latents"] is not None: - latents = batch["latents"].to(accelerator.device).to(dtype=weight_dtype) - else: - with torch.no_grad(): + with torch.no_grad(): # why this block differ within train_network.py? + if "latents" in batch and batch["latents"] is not None: + latents = batch["latents"].to(accelerator.device).to(dtype=weight_dtype) + else: # latentに変換 latents = vae.encode(batch["images"].to(vae_dtype)).latent_dist.sample().to(weight_dtype) @@ -504,7 +501,7 @@ def train(args): if torch.any(torch.isnan(latents)): accelerator.print("NaN found in latents, replacing with zeros") latents = torch.nan_to_num(latents, 0, out=latents) - latents = latents * sdxl_model_util.VAE_SCALE_FACTOR + latents = latents * sdxl_model_util.VAE_SCALE_FACTOR if "text_encoder_outputs1_list" not in batch or batch["text_encoder_outputs1_list"] is None: input_ids1 = batch["input_ids"] From 4295f91dcd75a7405aa70d5c5d2c826a618a4bcc Mon Sep 17 00:00:00 2001 From: BootsofLagrangian Date: Mon, 5 Feb 2024 20:19:56 +0900 Subject: [PATCH 04/69] fix all trainer about vae --- fine_tune.py | 29 ++++++++++++++++------------- train_db.py | 29 ++++++++++++++++------------- train_network.py | 15 +++++---------- 3 files changed, 37 insertions(+), 36 deletions(-) diff --git a/fine_tune.py b/fine_tune.py index 78dfd169..f901ee64 100644 --- a/fine_tune.py +++ b/fine_tune.py @@ -221,10 +221,18 @@ def train(args): # 学習ステップ数を計算する if args.max_train_epochs is not None: - args.max_train_steps = args.max_train_epochs * math.ceil( - len(train_dataloader) / accelerator.num_processes / args.gradient_accumulation_steps - ) - accelerator.print(f"override steps. steps for {args.max_train_epochs} epochs is / 指定エポックまでのステップ数: {args.max_train_steps}") + if args.deepspeed: + args.max_train_steps = args.max_train_epochs * math.ceil( + len(train_dataloader) / args.gradient_accumulation_steps + ) + accelerator.print( + f"[DeepSpeed] override steps not dividing by {accelerator.num_processes}. steps for {args.max_train_epochs} epochs is / 指定エポックまでのステップ数: {args.max_train_steps}" + ) + else: + args.max_train_steps = args.max_train_epochs * math.ceil( + len(train_dataloader) / accelerator.num_processes / args.gradient_accumulation_steps + ) + accelerator.print(f"override steps. steps for {args.max_train_epochs} epochs is / 指定エポックまでのステップ数: {args.max_train_steps}") # データセット側にも学習ステップを送信 train_dataset_group.set_max_train_steps(args.max_train_steps) @@ -244,21 +252,16 @@ def train(args): if args.deepspeed: # wrapping model class DeepSpeedModel(torch.nn.Module): - def __init__(self, unet, text_encoder, vae) -> None: + def __init__(self, unet, text_encoder) -> None: super().__init__() self.unet = unet self.text_encoders = self.text_encoder = torch.nn.ModuleList(text_encoder) - self.vae = vae def get_models(self): - return self.unet, self.text_encoders, self.vae - - unet.to(accelerator.device, dtype=weight_dtype) - [t_enc.to(accelerator.device, dtype=weight_dtype) for t_enc in text_encoders] - ds_model = DeepSpeedModel(unet, text_encoders, vae) + return self.unet, self.text_encoders + ds_model = DeepSpeedModel(unet, text_encoders) ds_model, optimizer, train_dataloader, lr_scheduler = accelerator.prepare(ds_model, optimizer, train_dataloader, lr_scheduler) # Now, ds_model is an instance of DeepSpeedEngine. - unet, text_encoders, vae = ds_model.get_models() # for compatiblility - vae.to(vae_dtype) + unet, text_encoders = ds_model.get_models() # for compatiblility text_encoder = text_encoders else: # acceleratorがなんかよろしくやってくれるらしい diff --git a/train_db.py b/train_db.py index d5f47a17..fa7f6a8d 100644 --- a/train_db.py +++ b/train_db.py @@ -190,10 +190,18 @@ def train(args): # 学習ステップ数を計算する if args.max_train_epochs is not None: - args.max_train_steps = args.max_train_epochs * math.ceil( - len(train_dataloader) / accelerator.num_processes / args.gradient_accumulation_steps - ) - accelerator.print(f"override steps. steps for {args.max_train_epochs} epochs is / 指定エポックまでのステップ数: {args.max_train_steps}") + if args.deepspeed: + args.max_train_steps = args.max_train_epochs * math.ceil( + len(train_dataloader) / args.gradient_accumulation_steps + ) + accelerator.print( + f"[DeepSpeed] override steps not dividing by {accelerator.num_processes}. steps for {args.max_train_epochs} epochs is / 指定エポックまでのステップ数: {args.max_train_steps}" + ) + else: + args.max_train_steps = args.max_train_epochs * math.ceil( + len(train_dataloader) / accelerator.num_processes / args.gradient_accumulation_steps + ) + accelerator.print(f"override steps. steps for {args.max_train_epochs} epochs is / 指定エポックまでのステップ数: {args.max_train_steps}") # データセット側にも学習ステップを送信 train_dataset_group.set_max_train_steps(args.max_train_steps) @@ -217,22 +225,17 @@ def train(args): if args.deepspeed: # wrapping model class DeepSpeedModel(torch.nn.Module): - def __init__(self, unet, text_encoder, vae) -> None: + def __init__(self, unet, text_encoder) -> None: super().__init__() self.unet = unet self.text_encoders = self.text_encoder = torch.nn.ModuleList(text_encoder) - self.vae = vae def get_models(self): - return self.unet, self.text_encoders, self.vae - - unet.to(accelerator.device, dtype=weight_dtype) - [t_enc.to(accelerator.device, dtype=weight_dtype) for t_enc in text_encoders] - ds_model = DeepSpeedModel(unet, text_encoders, vae) + return self.unet, self.text_encoders + ds_model = DeepSpeedModel(unet, text_encoders) ds_model, optimizer, train_dataloader, lr_scheduler = accelerator.prepare(ds_model, optimizer, train_dataloader, lr_scheduler) # Now, ds_model is an instance of DeepSpeedEngine. - unet, text_encoders, vae = ds_model.get_models() # for compatiblility - vae.to(vae_dtype) # to avoid explicitly half-vae + unet, text_encoders = ds_model.get_models() # for compatiblility text_encoder = text_encoders else: if train_text_encoder: diff --git a/train_network.py b/train_network.py index 05dbe2de..bbda427a 100644 --- a/train_network.py +++ b/train_network.py @@ -364,7 +364,7 @@ class NetworkTrainer: len(train_dataloader) / args.gradient_accumulation_steps ) accelerator.print( - f"override steps. steps for {args.max_train_epochs} epochs is / 指定エポックまでのステップ数: {args.max_train_steps}" + f"[DeepSpeed] override steps not dividing by {accelerator.num_processes}. steps for {args.max_train_epochs} epochs is / 指定エポックまでのステップ数: {args.max_train_steps}" ) else: args.max_train_steps = args.max_train_epochs * math.ceil( @@ -420,23 +420,18 @@ class NetworkTrainer: if args.deepspeed: # wrapping model class DeepSpeedModel(torch.nn.Module): - def __init__(self, unet, text_encoder, vae, network) -> None: + def __init__(self, unet, text_encoder, network) -> None: super().__init__() self.unet = unet self.text_encoders = self.text_encoder = torch.nn.ModuleList(text_encoder) - self.vae = vae self.network = network def get_models(self): - return self.unet, self.text_encoders, self.vae, self.network - - unet.to(accelerator.device, dtype=unet_weight_dtype) - [t_enc.to(accelerator.device, dtype=te_weight_dtype) for t_enc in text_encoders] - ds_model = DeepSpeedModel(unet, text_encoders, vae, network) + return self.unet, self.text_encoders, self.network + ds_model = DeepSpeedModel(unet, text_encoders, network) ds_model, optimizer, train_dataloader, lr_scheduler = accelerator.prepare(ds_model, optimizer, train_dataloader, lr_scheduler) # Now, ds_model is an instance of DeepSpeedEngine. - unet, text_encoders, vae, network = ds_model.get_models() # for compatiblility - vae.to(vae_dtype) # to avoid explicitly half-vae + unet, text_encoders, network = ds_model.get_models() # for compatiblility text_encoder = text_encoders else: if train_unet: From 3970bf40804d9c66e76e0af5e1d0477f19bfa79a Mon Sep 17 00:00:00 2001 From: BootsofLagrangian Date: Mon, 5 Feb 2024 22:40:43 +0900 Subject: [PATCH 05/69] maybe fix branch to run offloading --- library/train_util.py | 2 ++ sdxl_train.py | 6 ++++++ 2 files changed, 8 insertions(+) diff --git a/library/train_util.py b/library/train_util.py index a20edbe1..676652e9 100644 --- a/library/train_util.py +++ b/library/train_util.py @@ -3964,6 +3964,8 @@ def prepare_accelerator(args: argparse.Namespace): zero3_init_flag=args.zero3_init_flag, zero3_save_16bit_model=args.zero3_save_16bit_model, ) deepspeed_plugin.deepspeed_config['train_micro_batch_size_per_gpu'] = args.train_batch_size + deepspeed_plugin.deepspeed_config['train_batch_size'] = \ + args.train_batch_size * args.gradient_accumulation_steps * int(os.environ['WORLD_SIZE']) accelerator = Accelerator( gradient_accumulation_steps=args.gradient_accumulation_steps, diff --git a/sdxl_train.py b/sdxl_train.py index e8680828..ef3ead38 100644 --- a/sdxl_train.py +++ b/sdxl_train.py @@ -391,6 +391,12 @@ def train(args): if args.deepspeed: # Wrapping model for DeepSpeed + import deepspeed + if args.offload_optimizer_device is not None: + accelerator.print('[DeepSpeed] start to manually build cpu_adam.') + deepspeed.ops.op_builder.CPUAdamBuilder().load() + accelerator.print('[DeepSpeed] building cpu_adam done.') + class DeepSpeedModel(torch.nn.Module): def __init__(self, unet, text_encoder) -> None: super().__init__() From 7d2a9268b9d8d3c9b78068aaa2f9d43eb8b6101b Mon Sep 17 00:00:00 2001 From: BootsofLagrangian Date: Mon, 5 Feb 2024 22:42:06 +0900 Subject: [PATCH 06/69] apply offloading method runable for all trainer --- fine_tune.py | 5 +++++ train_db.py | 5 +++++ train_network.py | 5 +++++ 3 files changed, 15 insertions(+) diff --git a/fine_tune.py b/fine_tune.py index f901ee64..85febeaa 100644 --- a/fine_tune.py +++ b/fine_tune.py @@ -251,6 +251,11 @@ def train(args): if args.deepspeed: # wrapping model + import deepspeed + if args.offload_optimizer_device is not None: + accelerator.print('[DeepSpeed] start to manually build cpu_adam.') + deepspeed.ops.op_builder.CPUAdamBuilder().load() + accelerator.print('[DeepSpeed] building cpu_adam done.') class DeepSpeedModel(torch.nn.Module): def __init__(self, unet, text_encoder) -> None: super().__init__() diff --git a/train_db.py b/train_db.py index fa7f6a8d..e2661886 100644 --- a/train_db.py +++ b/train_db.py @@ -224,6 +224,11 @@ def train(args): # acceleratorがなんかよろしくやってくれるらしい if args.deepspeed: # wrapping model + import deepspeed + if args.offload_optimizer_device is not None: + accelerator.print('[DeepSpeed] start to manually build cpu_adam.') + deepspeed.ops.op_builder.CPUAdamBuilder().load() + accelerator.print('[DeepSpeed] building cpu_adam done.') class DeepSpeedModel(torch.nn.Module): def __init__(self, unet, text_encoder) -> None: super().__init__() diff --git a/train_network.py b/train_network.py index bbda427a..050a6511 100644 --- a/train_network.py +++ b/train_network.py @@ -419,6 +419,11 @@ class NetworkTrainer: # acceleratorがなんかよろしくやってくれるらしい / accelerator will do something good if args.deepspeed: # wrapping model + import deepspeed + if args.offload_optimizer_device is not None: + accelerator.print('[DeepSpeed] start to manually build cpu_adam.') + deepspeed.ops.op_builder.CPUAdamBuilder().load() + accelerator.print('[DeepSpeed] building cpu_adam done.') class DeepSpeedModel(torch.nn.Module): def __init__(self, unet, text_encoder, network) -> None: super().__init__() From 62556619bdc876c450bfb1445b16683cf3a98699 Mon Sep 17 00:00:00 2001 From: BootsofLagrangian Date: Wed, 7 Feb 2024 16:42:05 +0900 Subject: [PATCH 07/69] fix full_fp16 compatible and train_step --- fine_tune.py | 16 ++----- library/train_util.py | 7 +++ sdxl_train.py | 3 +- test_pip_requirements.txt | 96 +++++++++++++++++++++++++++++++++++++++ train_db.py | 16 ++----- train_network.py | 23 ++++------ 6 files changed, 121 insertions(+), 40 deletions(-) create mode 100644 test_pip_requirements.txt diff --git a/fine_tune.py b/fine_tune.py index 85febeaa..eb652742 100644 --- a/fine_tune.py +++ b/fine_tune.py @@ -221,18 +221,10 @@ def train(args): # 学習ステップ数を計算する if args.max_train_epochs is not None: - if args.deepspeed: - args.max_train_steps = args.max_train_epochs * math.ceil( - len(train_dataloader) / args.gradient_accumulation_steps - ) - accelerator.print( - f"[DeepSpeed] override steps not dividing by {accelerator.num_processes}. steps for {args.max_train_epochs} epochs is / 指定エポックまでのステップ数: {args.max_train_steps}" - ) - else: - args.max_train_steps = args.max_train_epochs * math.ceil( - len(train_dataloader) / accelerator.num_processes / args.gradient_accumulation_steps - ) - accelerator.print(f"override steps. steps for {args.max_train_epochs} epochs is / 指定エポックまでのステップ数: {args.max_train_steps}") + args.max_train_steps = args.max_train_epochs * math.ceil( + len(train_dataloader) / accelerator.num_processes / args.gradient_accumulation_steps + ) + accelerator.print(f"override steps. steps for {args.max_train_epochs} epochs is / 指定エポックまでのステップ数: {args.max_train_steps}") # データセット側にも学習ステップを送信 train_dataset_group.set_max_train_steps(args.max_train_steps) diff --git a/library/train_util.py b/library/train_util.py index 676652e9..ea626510 100644 --- a/library/train_util.py +++ b/library/train_util.py @@ -3166,6 +3166,11 @@ def add_training_arguments(parser: argparse.ArgumentParser, support_dreambooth: action="store_true", help="Flag to indicate whether to save 16-bit model. Only applicable with ZeRO Stage-3." ) + parser.add_argument( + "--fp16_master_weights_and_gradients", + action="store_true", + help="fp16_master_and_gradients requires optimizer to support keeping fp16 master and gradients while keeping the optimizer states in fp32." + ) def verify_training_args(args: argparse.Namespace): if args.v_parameterization and not args.v2: @@ -3966,6 +3971,8 @@ def prepare_accelerator(args: argparse.Namespace): deepspeed_plugin.deepspeed_config['train_micro_batch_size_per_gpu'] = args.train_batch_size deepspeed_plugin.deepspeed_config['train_batch_size'] = \ args.train_batch_size * args.gradient_accumulation_steps * int(os.environ['WORLD_SIZE']) + if args.full_fp16 or args.fp16_master_weights_and_gradients: + deepspeed_plugin.deepspeed_config['fp16_master_weights_and_gradients'] = True accelerator = Accelerator( gradient_accumulation_steps=args.gradient_accumulation_steps, diff --git a/sdxl_train.py b/sdxl_train.py index ef3ead38..54902b87 100644 --- a/sdxl_train.py +++ b/sdxl_train.py @@ -437,7 +437,8 @@ def train(args): text_encoder2.to(accelerator.device) # 実験的機能:勾配も含めたfp16学習を行う PyTorchにパッチを当ててfp16でのgrad scaleを有効にする - if args.full_fp16: + if args.full_fp16 and not args.deepspeed: + # During deepseed training, accelerate not handles fp16/bf16|mixed precision directly via scaler. Let deepspeed engine do. train_util.patch_accelerator_for_fp16_training(accelerator) # resumeする diff --git a/test_pip_requirements.txt b/test_pip_requirements.txt new file mode 100644 index 00000000..6abec351 --- /dev/null +++ b/test_pip_requirements.txt @@ -0,0 +1,96 @@ +absl-py==2.1.0 +accelerate==0.25.0 +aiohttp==3.9.3 +aiosignal==1.3.1 +altair==4.2.2 +annotated-types @ file:///home/conda/feedstock_root/build_artifacts/annotated-types_1696634205638/work +async-timeout==4.0.3 +attrs==23.2.0 +bitsandbytes==0.42.0 +Brotli @ file:///home/conda/feedstock_root/build_artifacts/brotli-split_1695989787169/work +cachetools==5.3.2 +certifi==2022.12.7 +charset-normalizer==2.1.1 +cmake==3.25.0 +deepspeed==0.13.1 +diffusers==0.25.0 +easygui==0.98.3 +einops==0.6.1 +entrypoints==0.4 +filelock==3.9.0 +frozenlist==1.4.1 +fsspec==2024.2.0 +ftfy==6.1.1 +gmpy2 @ file:///home/conda/feedstock_root/build_artifacts/gmpy2_1666808654411/work +google-auth==2.27.0 +google-auth-oauthlib==0.4.6 +grpcio==1.60.1 +hjson==3.1.0 +huggingface-hub==0.20.1 +idna==3.4 +importlib-metadata==7.0.1 +Jinja2==3.1.2 +jsonschema==4.21.1 +jsonschema-specifications==2023.12.1 +-e git+https://github.com/kohya-ss/sd-scripts@cd19df49cd512e13ac90db115c424d19c0e8868a#egg=library +lightning-utilities==0.10.1 +lit==15.0.7 +Markdown==3.5.2 +MarkupSafe==2.1.3 +mpmath==1.3.0 +multidict==6.0.5 +networkx==3.2.1 +ninja==1.11.1.1 +numpy==1.26.3 +oauthlib==3.2.2 +open-clip-torch==2.20.0 +opencv-python==4.7.0.68 +packaging==23.2 +pandas==2.2.0 +pillow==10.2.0 +protobuf==3.19.6 +psutil==5.9.8 +py-cpuinfo @ file:///home/conda/feedstock_root/build_artifacts/py-cpuinfo_1666774466606/work +pyasn1==0.5.1 +pyasn1-modules==0.3.0 +pydantic @ file:///home/conda/feedstock_root/build_artifacts/pydantic_1706543943340/work +pydantic_core @ file:///home/conda/feedstock_root/build_artifacts/pydantic-core_1705674688239/work +pynvml==11.5.0 +PySocks @ file:///home/conda/feedstock_root/build_artifacts/pysocks_1661604839144/work +python-dateutil==2.8.2 +pytorch-lightning==1.9.0 +pytz==2024.1 +PyYAML @ file:///home/conda/feedstock_root/build_artifacts/pyyaml_1695373428874/work +referencing==0.33.0 +regex==2023.12.25 +requests==2.28.1 +requests-oauthlib==1.3.1 +rpds-py==0.17.1 +rsa==4.9 +safetensors==0.4.2 +scipy==1.12.0 +sentencepiece==0.1.99 +six==1.16.0 +sympy==1.12 +tensorboard==2.10.1 +tensorboard-data-server==0.6.1 +tensorboard-plugin-wit==1.8.1 +timm==0.9.12 +tokenizers==0.15.1 +toml==0.10.2 +toolz==0.12.1 +torch==2.0.1+cu118 +torchaudio==2.2.0 +torchmetrics==1.3.0.post0 +torchvision==0.15.2+cu118 +tqdm==4.66.1 +transformers==4.36.2 +triton==2.0.0 +typing_extensions==4.8.0 +tzdata==2023.4 +urllib3==1.26.13 +voluptuous==0.13.1 +wcwidth==0.2.13 +Werkzeug==3.0.1 +yarl==1.9.4 +zipp==3.17.0 diff --git a/train_db.py b/train_db.py index e2661886..58536555 100644 --- a/train_db.py +++ b/train_db.py @@ -190,18 +190,10 @@ def train(args): # 学習ステップ数を計算する if args.max_train_epochs is not None: - if args.deepspeed: - args.max_train_steps = args.max_train_epochs * math.ceil( - len(train_dataloader) / args.gradient_accumulation_steps - ) - accelerator.print( - f"[DeepSpeed] override steps not dividing by {accelerator.num_processes}. steps for {args.max_train_epochs} epochs is / 指定エポックまでのステップ数: {args.max_train_steps}" - ) - else: - args.max_train_steps = args.max_train_epochs * math.ceil( - len(train_dataloader) / accelerator.num_processes / args.gradient_accumulation_steps - ) - accelerator.print(f"override steps. steps for {args.max_train_epochs} epochs is / 指定エポックまでのステップ数: {args.max_train_steps}") + args.max_train_steps = args.max_train_epochs * math.ceil( + len(train_dataloader) / accelerator.num_processes / args.gradient_accumulation_steps + ) + accelerator.print(f"override steps. steps for {args.max_train_epochs} epochs is / 指定エポックまでのステップ数: {args.max_train_steps}") # データセット側にも学習ステップを送信 train_dataset_group.set_max_train_steps(args.max_train_steps) diff --git a/train_network.py b/train_network.py index 050a6511..cc445d39 100644 --- a/train_network.py +++ b/train_network.py @@ -359,20 +359,12 @@ class NetworkTrainer: # 学習ステップ数を計算する if args.max_train_epochs is not None: - if args.deepspeed: - args.max_train_steps = args.max_train_epochs * math.ceil( - len(train_dataloader) / args.gradient_accumulation_steps - ) - accelerator.print( - f"[DeepSpeed] override steps not dividing by {accelerator.num_processes}. steps for {args.max_train_epochs} epochs is / 指定エポックまでのステップ数: {args.max_train_steps}" - ) - else: - args.max_train_steps = args.max_train_epochs * math.ceil( - len(train_dataloader) / accelerator.num_processes / args.gradient_accumulation_steps - ) - accelerator.print( - f"override steps. steps for {args.max_train_epochs} epochs is / 指定エポックまでのステップ数: {args.max_train_steps}" - ) + args.max_train_steps = args.max_train_epochs * math.ceil( + len(train_dataloader) / accelerator.num_processes / args.gradient_accumulation_steps + ) + accelerator.print( + f"override steps. steps for {args.max_train_epochs} epochs is / 指定エポックまでのステップ数: {args.max_train_steps}" + ) # データセット側にも学習ステップを送信 train_dataset_group.set_max_train_steps(args.max_train_steps) @@ -479,7 +471,8 @@ class NetworkTrainer: vae.to(accelerator.device, dtype=vae_dtype) # 実験的機能:勾配も含めたfp16学習を行う PyTorchにパッチを当ててfp16でのgrad scaleを有効にする - if args.full_fp16: + if args.full_fp16 and not args.deepspeed: + # During deepseed training, accelerate not handles fp16/bf16|mixed precision directly via scaler. Let deepspeed engine do. train_util.patch_accelerator_for_fp16_training(accelerator) # resumeする From 2445a5b74e4c5bb0af24e0b3162c1eaef218b56b Mon Sep 17 00:00:00 2001 From: BootsofLagrangian Date: Wed, 7 Feb 2024 16:48:18 +0900 Subject: [PATCH 08/69] remove test requirements --- test_pip_requirements.txt | 96 --------------------------------------- 1 file changed, 96 deletions(-) delete mode 100644 test_pip_requirements.txt diff --git a/test_pip_requirements.txt b/test_pip_requirements.txt deleted file mode 100644 index 6abec351..00000000 --- a/test_pip_requirements.txt +++ /dev/null @@ -1,96 +0,0 @@ -absl-py==2.1.0 -accelerate==0.25.0 -aiohttp==3.9.3 -aiosignal==1.3.1 -altair==4.2.2 -annotated-types @ file:///home/conda/feedstock_root/build_artifacts/annotated-types_1696634205638/work -async-timeout==4.0.3 -attrs==23.2.0 -bitsandbytes==0.42.0 -Brotli @ file:///home/conda/feedstock_root/build_artifacts/brotli-split_1695989787169/work -cachetools==5.3.2 -certifi==2022.12.7 -charset-normalizer==2.1.1 -cmake==3.25.0 -deepspeed==0.13.1 -diffusers==0.25.0 -easygui==0.98.3 -einops==0.6.1 -entrypoints==0.4 -filelock==3.9.0 -frozenlist==1.4.1 -fsspec==2024.2.0 -ftfy==6.1.1 -gmpy2 @ file:///home/conda/feedstock_root/build_artifacts/gmpy2_1666808654411/work -google-auth==2.27.0 -google-auth-oauthlib==0.4.6 -grpcio==1.60.1 -hjson==3.1.0 -huggingface-hub==0.20.1 -idna==3.4 -importlib-metadata==7.0.1 -Jinja2==3.1.2 -jsonschema==4.21.1 -jsonschema-specifications==2023.12.1 --e git+https://github.com/kohya-ss/sd-scripts@cd19df49cd512e13ac90db115c424d19c0e8868a#egg=library -lightning-utilities==0.10.1 -lit==15.0.7 -Markdown==3.5.2 -MarkupSafe==2.1.3 -mpmath==1.3.0 -multidict==6.0.5 -networkx==3.2.1 -ninja==1.11.1.1 -numpy==1.26.3 -oauthlib==3.2.2 -open-clip-torch==2.20.0 -opencv-python==4.7.0.68 -packaging==23.2 -pandas==2.2.0 -pillow==10.2.0 -protobuf==3.19.6 -psutil==5.9.8 -py-cpuinfo @ file:///home/conda/feedstock_root/build_artifacts/py-cpuinfo_1666774466606/work -pyasn1==0.5.1 -pyasn1-modules==0.3.0 -pydantic @ file:///home/conda/feedstock_root/build_artifacts/pydantic_1706543943340/work -pydantic_core @ file:///home/conda/feedstock_root/build_artifacts/pydantic-core_1705674688239/work -pynvml==11.5.0 -PySocks @ file:///home/conda/feedstock_root/build_artifacts/pysocks_1661604839144/work -python-dateutil==2.8.2 -pytorch-lightning==1.9.0 -pytz==2024.1 -PyYAML @ file:///home/conda/feedstock_root/build_artifacts/pyyaml_1695373428874/work -referencing==0.33.0 -regex==2023.12.25 -requests==2.28.1 -requests-oauthlib==1.3.1 -rpds-py==0.17.1 -rsa==4.9 -safetensors==0.4.2 -scipy==1.12.0 -sentencepiece==0.1.99 -six==1.16.0 -sympy==1.12 -tensorboard==2.10.1 -tensorboard-data-server==0.6.1 -tensorboard-plugin-wit==1.8.1 -timm==0.9.12 -tokenizers==0.15.1 -toml==0.10.2 -toolz==0.12.1 -torch==2.0.1+cu118 -torchaudio==2.2.0 -torchmetrics==1.3.0.post0 -torchvision==0.15.2+cu118 -tqdm==4.66.1 -transformers==4.36.2 -triton==2.0.0 -typing_extensions==4.8.0 -tzdata==2023.4 -urllib3==1.26.13 -voluptuous==0.13.1 -wcwidth==0.2.13 -Werkzeug==3.0.1 -yarl==1.9.4 -zipp==3.17.0 From a98fecaeb1e818c778c90fe441a71a8bd34615ff Mon Sep 17 00:00:00 2001 From: BootsofLagrangian Date: Wed, 7 Feb 2024 17:19:46 +0900 Subject: [PATCH 09/69] forgot setting mixed_precision for deepspeed. sorry --- library/train_util.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/library/train_util.py b/library/train_util.py index ea626510..dbe5a61c 100644 --- a/library/train_util.py +++ b/library/train_util.py @@ -3971,6 +3971,9 @@ def prepare_accelerator(args: argparse.Namespace): deepspeed_plugin.deepspeed_config['train_micro_batch_size_per_gpu'] = args.train_batch_size deepspeed_plugin.deepspeed_config['train_batch_size'] = \ args.train_batch_size * args.gradient_accumulation_steps * int(os.environ['WORLD_SIZE']) + deepspeed_plugin.set_mixed_precision(args.mixed_precision) + if args.mixed_precision.lower() == "fp16": + deepspeed_plugin.deepspeed_config['fp16']['initial_scale_power'] = 0 if args.full_fp16 or args.fp16_master_weights_and_gradients: deepspeed_plugin.deepspeed_config['fp16_master_weights_and_gradients'] = True From 03f0816f86b2d4d8915d81146242fb6f7f99c5ff Mon Sep 17 00:00:00 2001 From: BootsofLagrangian Date: Fri, 9 Feb 2024 17:47:49 +0900 Subject: [PATCH 10/69] the reason not working grad accum steps found. it was becasue of my accelerate settings --- fine_tune.py | 5 +++-- library/train_util.py | 6 +++++- sdxl_train.py | 5 +++-- train_db.py | 5 +++-- 4 files changed, 14 insertions(+), 7 deletions(-) diff --git a/fine_tune.py b/fine_tune.py index eb652742..741e9c85 100644 --- a/fine_tune.py +++ b/fine_tune.py @@ -224,8 +224,9 @@ def train(args): args.max_train_steps = args.max_train_epochs * math.ceil( len(train_dataloader) / accelerator.num_processes / args.gradient_accumulation_steps ) - accelerator.print(f"override steps. steps for {args.max_train_epochs} epochs is / 指定エポックまでのステップ数: {args.max_train_steps}") - + accelerator.print( + f"override steps. steps for {args.max_train_epochs} epochs is / 指定エポックまでのステップ数: {args.max_train_steps}" + ) # データセット側にも学習ステップを送信 train_dataset_group.set_max_train_steps(args.max_train_steps) diff --git a/library/train_util.py b/library/train_util.py index dbe5a61c..61c83624 100644 --- a/library/train_util.py +++ b/library/train_util.py @@ -3975,7 +3975,11 @@ def prepare_accelerator(args: argparse.Namespace): if args.mixed_precision.lower() == "fp16": deepspeed_plugin.deepspeed_config['fp16']['initial_scale_power'] = 0 if args.full_fp16 or args.fp16_master_weights_and_gradients: - deepspeed_plugin.deepspeed_config['fp16_master_weights_and_gradients'] = True + if args.offload_optimizer_device == "cpu": + deepspeed_plugin.deepspeed_config['fp16']['fp16_master_weights_and_grads'] = True + print("[DeepSpeed] full fp16 enable.") + else: + print("full fp16, fp16_master_weights_and_grads currently only supported using ZeRO-Offload with DeepSpeedCPUAdam.") accelerator = Accelerator( gradient_accumulation_steps=args.gradient_accumulation_steps, diff --git a/sdxl_train.py b/sdxl_train.py index 54902b87..6ffb1bba 100644 --- a/sdxl_train.py +++ b/sdxl_train.py @@ -363,8 +363,9 @@ def train(args): args.max_train_steps = args.max_train_epochs * math.ceil( len(train_dataloader) / accelerator.num_processes / args.gradient_accumulation_steps ) - accelerator.print(f"override steps. steps for {args.max_train_epochs} epochs is / 指定エポックまでのステップ数: {args.max_train_steps}") - + accelerator.print( + f"override steps. steps for {args.max_train_epochs} epochs is / 指定エポックまでのステップ数: {args.max_train_steps}" + ) # データセット側にも学習ステップを送信 train_dataset_group.set_max_train_steps(args.max_train_steps) diff --git a/train_db.py b/train_db.py index 58536555..c336a1c1 100644 --- a/train_db.py +++ b/train_db.py @@ -193,8 +193,9 @@ def train(args): args.max_train_steps = args.max_train_epochs * math.ceil( len(train_dataloader) / accelerator.num_processes / args.gradient_accumulation_steps ) - accelerator.print(f"override steps. steps for {args.max_train_epochs} epochs is / 指定エポックまでのステップ数: {args.max_train_steps}") - + accelerator.print( + f"override steps. steps for {args.max_train_epochs} epochs is / 指定エポックまでのステップ数: {args.max_train_steps}" + ) # データセット側にも学習ステップを送信 train_dataset_group.set_max_train_steps(args.max_train_steps) From 4d5186d1cf0b0fbda20513def793ac3f5e9d5ea0 Mon Sep 17 00:00:00 2001 From: BootsofLagrangian Date: Thu, 22 Feb 2024 16:20:53 +0900 Subject: [PATCH 11/69] refactored codes, some function moved into train_utils.py --- fine_tune.py | 29 +++++++--------- library/train_util.py | 78 +++++++++++++++++++++++++++++++------------ sdxl_train.py | 43 ++++++++++++------------ train_db.py | 31 ++++++++--------- train_network.py | 34 +++++++++---------- 5 files changed, 119 insertions(+), 96 deletions(-) diff --git a/fine_tune.py b/fine_tune.py index 741e9c85..86260754 100644 --- a/fine_tune.py +++ b/fine_tune.py @@ -243,24 +243,19 @@ def train(args): text_encoder.to(weight_dtype) if args.deepspeed: - # wrapping model - import deepspeed - if args.offload_optimizer_device is not None: - accelerator.print('[DeepSpeed] start to manually build cpu_adam.') - deepspeed.ops.op_builder.CPUAdamBuilder().load() - accelerator.print('[DeepSpeed] building cpu_adam done.') - class DeepSpeedModel(torch.nn.Module): - def __init__(self, unet, text_encoder) -> None: - super().__init__() - self.unet = unet - self.text_encoders = self.text_encoder = torch.nn.ModuleList(text_encoder) - def get_models(self): - return self.unet, self.text_encoders - ds_model = DeepSpeedModel(unet, text_encoders) + training_models_dict = {} + training_models_dict["unet"] = unet + if args.train_text_encoder: training_models_dict["text_encoder"] = text_encoder + + ds_model = train_util.prepare_deepspeed_model(args, **training_models_dict) ds_model, optimizer, train_dataloader, lr_scheduler = accelerator.prepare(ds_model, optimizer, train_dataloader, lr_scheduler) - # Now, ds_model is an instance of DeepSpeedEngine. - unet, text_encoders = ds_model.get_models() # for compatiblility - text_encoder = text_encoders + + training_models = [] + unet = ds_model.models["unet"] + training_models.append(unet) + if args.train_text_encoder: + text_encoder = ds_model.models["text_encoder"] + training_models.append(text_encoder) else: # acceleratorがなんかよろしくやってくれるらしい if args.train_text_encoder: diff --git a/library/train_util.py b/library/train_util.py index 61c83624..334aaa21 100644 --- a/library/train_util.py +++ b/library/train_util.py @@ -3959,27 +3959,7 @@ def prepare_accelerator(args: argparse.Namespace): else None, ) kwargs_handlers = list(filter(lambda x: x is not None, kwargs_handlers)) - deepspeed_plugin = None - if args.deepspeed: - deepspeed_plugin = DeepSpeedPlugin( - zero_stage=args.zero_stage, - gradient_accumulation_steps=args.gradient_accumulation_steps, gradient_clipping=args.max_grad_norm, - offload_optimizer_device=args.offload_optimizer_device, offload_optimizer_nvme_path=args.offload_optimizer_nvme_path, - offload_param_device=args.offload_param_device, offload_param_nvme_path=args.offload_param_nvme_path, - zero3_init_flag=args.zero3_init_flag, zero3_save_16bit_model=args.zero3_save_16bit_model, - ) - deepspeed_plugin.deepspeed_config['train_micro_batch_size_per_gpu'] = args.train_batch_size - deepspeed_plugin.deepspeed_config['train_batch_size'] = \ - args.train_batch_size * args.gradient_accumulation_steps * int(os.environ['WORLD_SIZE']) - deepspeed_plugin.set_mixed_precision(args.mixed_precision) - if args.mixed_precision.lower() == "fp16": - deepspeed_plugin.deepspeed_config['fp16']['initial_scale_power'] = 0 - if args.full_fp16 or args.fp16_master_weights_and_gradients: - if args.offload_optimizer_device == "cpu": - deepspeed_plugin.deepspeed_config['fp16']['fp16_master_weights_and_grads'] = True - print("[DeepSpeed] full fp16 enable.") - else: - print("full fp16, fp16_master_weights_and_grads currently only supported using ZeRO-Offload with DeepSpeedCPUAdam.") + deepspeed_plugin = prepare_deepspeed_plugin(args) accelerator = Accelerator( gradient_accumulation_steps=args.gradient_accumulation_steps, @@ -3992,6 +3972,62 @@ def prepare_accelerator(args: argparse.Namespace): ) return accelerator +def prepare_deepspeed_plugin(args: argparse.Namespace): + if args.deepspeed is None: return None + try: + import deepspeed + except ImportError as e: + print("deepspeed is not installed. please install deepspeed in your environment with following command. DS_BUILD_OPS=0 pip install deepspeed") + exit(1) + + deepspeed_plugin = DeepSpeedPlugin( + zero_stage=args.zero_stage, + gradient_accumulation_steps=args.gradient_accumulation_steps, gradient_clipping=args.max_grad_norm, + offload_optimizer_device=args.offload_optimizer_device, offload_optimizer_nvme_path=args.offload_optimizer_nvme_path, + offload_param_device=args.offload_param_device, offload_param_nvme_path=args.offload_param_nvme_path, + zero3_init_flag=args.zero3_init_flag, zero3_save_16bit_model=args.zero3_save_16bit_model, + ) + deepspeed_plugin.deepspeed_config['train_micro_batch_size_per_gpu'] = args.train_batch_size + deepspeed_plugin.deepspeed_config['train_batch_size'] = \ + args.train_batch_size * args.gradient_accumulation_steps * int(os.environ['WORLD_SIZE']) + deepspeed_plugin.set_mixed_precision(args.mixed_precision) + if args.mixed_precision.lower() == "fp16": + deepspeed_plugin.deepspeed_config['fp16']['initial_scale_power'] = 0 # preventing overflow. + if args.full_fp16 or args.fp16_master_weights_and_gradients: + if args.offload_optimizer_device == "cpu" and args.zero_stage == 2: + deepspeed_plugin.deepspeed_config['fp16']['fp16_master_weights_and_grads'] = True + print("[DeepSpeed] full fp16 enable.") + else: + print("[DeepSpeed]full fp16, fp16_master_weights_and_grads currently only supported using ZeRO-Offload with DeepSpeedCPUAdam on ZeRO-2 stage.") + + if args.offload_optimizer_device is not None: + print('[DeepSpeed] start to manually build cpu_adam.') + deepspeed.ops.op_builder.CPUAdamBuilder().load() + print('[DeepSpeed] building cpu_adam done.') + + return deepspeed_plugin + +def prepare_deepspeed_model(args: argparse.Namespace, **models): + class DeepSpeedWrapper(torch.nn.Module): + def __init__(self, **kw_models) -> None: + super().__init__() + self.models = torch.nn.ModuleDict() + + for key, model in kw_models.items(): + if isinstance(model, list): + model = torch.nn.ModuleList(model) + assert isinstance(model, torch.nn.Module), f"model must be an instance of torch.nn.Module, but got {key} is {type(model)}" + self.models.update( + torch.nn.ModuleDict( + {key: model} + ) + ) + + def get_models(self): + return self.models + + ds_model = DeepSpeedWrapper(**models) + return ds_model def prepare_dtype(args: argparse.Namespace): weight_dtype = torch.float32 diff --git a/sdxl_train.py b/sdxl_train.py index 6ffb1bba..2f1a5ce6 100644 --- a/sdxl_train.py +++ b/sdxl_train.py @@ -391,28 +391,29 @@ def train(args): text_encoder2.to(weight_dtype) if args.deepspeed: - # Wrapping model for DeepSpeed - import deepspeed - if args.offload_optimizer_device is not None: - accelerator.print('[DeepSpeed] start to manually build cpu_adam.') - deepspeed.ops.op_builder.CPUAdamBuilder().load() - accelerator.print('[DeepSpeed] building cpu_adam done.') - - class DeepSpeedModel(torch.nn.Module): - def __init__(self, unet, text_encoder) -> None: - super().__init__() - self.unet = unet - self.text_encoders = self.text_encoder = torch.nn.ModuleList(text_encoder) - - def get_models(self): - return self.unet, self.text_encoders - text_encoders = [text_encoder1, text_encoder2] - ds_model = DeepSpeedModel(unet, text_encoders) + training_models_dict = {} + if train_unet: + training_models_dict["unet"] = unet + if train_text_encoder1: + text_encoder1.text_model.encoder.layers[-1].requires_grad_(False) + text_encoder1.text_model.final_layer_norm.requires_grad_(False) + training_models_dict["text_encoder1"] = text_encoder1 + if train_text_encoder2: + training_models_dict["text_encoder2"] = text_encoder2 + ds_model = train_util.prepare_deepspeed_model(args, **training_models_dict) ds_model, optimizer, train_dataloader, lr_scheduler = accelerator.prepare(ds_model, optimizer, train_dataloader, lr_scheduler) - # Now, ds_model is an instance of DeepSpeedEngine. - unet, text_encoders = ds_model.get_models() # for compatiblility - text_encoder1, text_encoder2 = text_encoder = text_encoders - training_models = [unet, text_encoder1, text_encoder2] + + training_models = [] # override training_models + if train_unet: + unet = ds_model.models["unet"] + training_models.append(unet) + if train_text_encoder1: + text_encoder1 = ds_model.models["text_encoder1"] + training_models.append(text_encoder1) + if train_text_encoder2: + text_encoder2 = ds_model.models["text_encoder2"] + training_models.append(text_encoder2) + else: # acceleratorがなんかよろしくやってくれるらしい if train_unet: unet = accelerator.prepare(unet) diff --git a/train_db.py b/train_db.py index c336a1c1..f188d7bd 100644 --- a/train_db.py +++ b/train_db.py @@ -216,25 +216,20 @@ def train(args): # acceleratorがなんかよろしくやってくれるらしい if args.deepspeed: - # wrapping model - import deepspeed - if args.offload_optimizer_device is not None: - accelerator.print('[DeepSpeed] start to manually build cpu_adam.') - deepspeed.ops.op_builder.CPUAdamBuilder().load() - accelerator.print('[DeepSpeed] building cpu_adam done.') - class DeepSpeedModel(torch.nn.Module): - def __init__(self, unet, text_encoder) -> None: - super().__init__() - self.unet = unet - self.text_encoders = self.text_encoder = torch.nn.ModuleList(text_encoder) - - def get_models(self): - return self.unet, self.text_encoders - ds_model = DeepSpeedModel(unet, text_encoders) + training_models_dict = {} + training_models_dict["unet"] = unet + if train_text_encoder: training_models_dict["text_encoder"] = text_encoder + + ds_model = train_util.prepare_deepspeed_model(args, **training_models_dict) ds_model, optimizer, train_dataloader, lr_scheduler = accelerator.prepare(ds_model, optimizer, train_dataloader, lr_scheduler) - # Now, ds_model is an instance of DeepSpeedEngine. - unet, text_encoders = ds_model.get_models() # for compatiblility - text_encoder = text_encoders + + training_models = [] + unet = ds_model.models["unet"] + training_models.append(unet) + if train_text_encoder: + text_encoder = ds_model.models["text_encoder"] + training_models.append(text_encoder) + else: if train_text_encoder: unet, text_encoder, optimizer, train_dataloader, lr_scheduler = accelerator.prepare( diff --git a/train_network.py b/train_network.py index cc445d39..dfa17eb3 100644 --- a/train_network.py +++ b/train_network.py @@ -410,26 +410,22 @@ class NetworkTrainer: # acceleratorがなんかよろしくやってくれるらしい / accelerator will do something good if args.deepspeed: - # wrapping model - import deepspeed - if args.offload_optimizer_device is not None: - accelerator.print('[DeepSpeed] start to manually build cpu_adam.') - deepspeed.ops.op_builder.CPUAdamBuilder().load() - accelerator.print('[DeepSpeed] building cpu_adam done.') - class DeepSpeedModel(torch.nn.Module): - def __init__(self, unet, text_encoder, network) -> None: - super().__init__() - self.unet = unet - self.text_encoders = self.text_encoder = torch.nn.ModuleList(text_encoder) - self.network = network - - def get_models(self): - return self.unet, self.text_encoders, self.network - ds_model = DeepSpeedModel(unet, text_encoders, network) + training_models_dict = {} + if train_unet: training_models_dict["unet"] = unet + if train_text_encoder: training_models_dict["text_encoder"] = text_encoders + training_models_dict["network"] = network + + ds_model = train_util.prepare_deepspeed_model(args, **training_models_dict) ds_model, optimizer, train_dataloader, lr_scheduler = accelerator.prepare(ds_model, optimizer, train_dataloader, lr_scheduler) - # Now, ds_model is an instance of DeepSpeedEngine. - unet, text_encoders, network = ds_model.get_models() # for compatiblility - text_encoder = text_encoders + + if train_unet: unet = ds_model.models["unet"] + if train_text_encoder: + text_encoder = ds_model.models["text_encoder"] + if len(ds_model.models["text_encoder"]) > 1: + text_encoders = text_encoder + else: + text_encoders = [text_encoder] + else: if train_unet: unet = accelerator.prepare(unet) From 577e9913ca241a56631dbca924e17d9012bde116 Mon Sep 17 00:00:00 2001 From: Kohya S Date: Mon, 26 Feb 2024 20:01:25 +0900 Subject: [PATCH 12/69] add some new dataset settings --- README.md | 161 ++++++++++++++++++++++++++--------------- library/config_util.py | 6 ++ library/train_util.py | 62 +++++++++++++++- train_network.py | 5 ++ 4 files changed, 175 insertions(+), 59 deletions(-) diff --git a/README.md b/README.md index e635e5ae..e1b6a26c 100644 --- a/README.md +++ b/README.md @@ -249,6 +249,109 @@ ControlNet-LLLite, a novel method for ControlNet with SDXL, is added. See [docum ## Change History +### Working in progress + +- `train_network.py` and `sdxl_train_network.py` are modified to record some dataset settings in the metadata of the trained model (`caption_prefix`, `caption_suffix`, `keep_tokens_separator`, `secondary_separator`, `enable_wildcard`). +- Some features are added to the dataset subset settings. + - `secondary_separator` is added to specify the tag separator that is not the target of shuffling or dropping. + - Specify `secondary_separator=";;;"`. When you specify `secondary_separator`, the part is not shuffled or dropped. See the example below. + - `enable_wildcard` is added. When set to `true`, the wildcard notation `{aaa|bbb|ccc}` can be used. See the example below. + - `keep_tokens_separator` is updated to be used twice in the caption. When you specify `keep_tokens_separator="|||"`, the part divided by the second `|||` is not shuffled or dropped and remains at the end. + - The existing features `caption_prefix` and `caption_suffix` can be used together. `caption_prefix` and `caption_suffix` are processed first, and then `enable_wildcard`, `keep_tokens_separator`, shuffling and dropping, and `secondary_separator` are processed in order. + - The examples are [shown below](#example-of-dataset-settings--データセット設定の記述例). + +- `train_network.py` および `sdxl_train_network.py` で、学習したモデルのメタデータに一部のデータセット設定が記録されるよう修正しました(`caption_prefix`、`caption_suffix`、`keep_tokens_separator`、`secondary_separator`、`enable_wildcard`)。 +- データセットのサブセット設定にいくつかの機能を追加しました。 + - シャッフルの対象とならないタグ分割識別子の指定 `secondary_separator` を追加しました。`secondary_separator=";;;"` のように指定します。`secondary_separator` で区切ることで、その部分はシャッフル、drop 時にまとめて扱われます。詳しくは記述例をご覧ください。 + - `enable_wildcard` を追加しました。`true` にするとワイルドカード記法 `{aaa|bbb|ccc}` が使えます。詳しくは記述例をご覧ください。 + - `keep_tokens_separator` をキャプション内に 2 つ使えるようにしました。たとえば `keep_tokens_separator="|||"` と指定したとき、`1girl, hatsune miku, vocaloid ||| stage, mic ||| best quality, rating: general` とキャプションを指定すると、二番目の `|||` で分割された部分はシャッフル、drop されず末尾に残ります。 + - 既存の機能 `caption_prefix` と `caption_suffix` とあわせて使えます。`caption_prefix` と `caption_suffix` は一番最初に処理され、その後、ワイルドカード、`keep_tokens_separator`、シャッフルおよび drop、`secondary_separator` の順に処理されます。 + +#### Example of dataset settings / データセット設定の記述例: + +```toml +[general] +flip_aug = true +color_aug = false +resolution = [1024, 1024] + +[[datasets]] +batch_size = 6 +enable_bucket = true +bucket_no_upscale = true +caption_extension = ".txt" +keep_tokens_separator= "|||" +shuffle_caption = true +caption_tag_dropout_rate = 0.1 +secondary_separator = ";;;" # subset 側に書くこともできます / can be written in the subset side +enable_wildcard = true # 同上 / same as above + + [[datasets.subsets]] + image_dir = "/path/to/image_dir" + num_repeats = 1 + + # ||| の前後はカンマは不要です(自動的に追加されます) / No comma is required before and after ||| (it is added automatically) + caption_prefix = "1girl, hatsune miku, vocaloid |||" + + # ||| の後はシャッフル、drop されず残ります / After |||, it is not shuffled or dropped and remains + # 単純に文字列として連結されるので、カンマなどは自分で入れる必要があります / It is simply concatenated as a string, so you need to put commas yourself + caption_suffix = ", anime screencap ||| masterpiece, rating: general" +``` + +#### Example of caption, secondary_separator notation: `secondary_separator = ";;;"` + +```txt +1girl, hatsune miku, vocaloid, upper body, looking at viewer, sky;;;cloud;;;day, outdoors +``` +The part `sky;;;cloud;;;day` is replaced with `sky,cloud,day` without shuffling or dropping. When shuffling and dropping are enabled, it is processed as a whole (as one tag). For example, it becomes `vocaloid, 1girl, upper body, sky,cloud,day, outdoors, hatsune miku` (shuffled) or `vocaloid, 1girl, outdoors, looking at viewer, upper body, hatsune miku` (dropped). + +#### Example of caption, enable_wildcard notation: `enable_wildcard = true` + +```txt +1girl, hatsune miku, vocaloid, upper body, looking at viewer, {simple|white} background +``` +`simple` or `white` is randomly selected, and it becomes `simple background` or `white background`. + +```txt +1girl, hatsune miku, vocaloid, {{retro style}} +``` +If you want to include `{` or `}` in the tag string, double them like `{{` or `}}` (in this example, the actual caption used for training is `{retro style}`). + +#### Example of caption, `keep_tokens_separator` notation: `keep_tokens_separator = "|||"` + +```txt +1girl, hatsune miku, vocaloid ||| stage, microphone, white shirt, smile ||| best quality, rating: general +``` +It becomes `1girl, hatsune miku, vocaloid, microphone, stage, white shirt, best quality, rating: general` or `1girl, hatsune miku, vocaloid, white shirt, smile, stage, microphone, best quality, rating: general` etc. + + +#### キャプション記述例、secondary_separator 記法:`secondary_separator = ";;;"` の場合 + +```txt +1girl, hatsune miku, vocaloid, upper body, looking at viewer, sky;;;cloud;;;day, outdoors +``` +`sky;;;cloud;;;day` の部分はシャッフル、drop されず `sky,cloud,day` に置換されます。シャッフル、drop が有効な場合、まとめて(一つのタグとして)処理されます。つまり `vocaloid, 1girl, upper body, sky,cloud,day, outdoors, hatsune miku` (シャッフル)や `vocaloid, 1girl, outdoors, looking at viewer, upper body, hatsune miku` (drop されたケース)などになります。 + +#### キャプション記述例、ワイルドカード記法: `enable_wildcard = true` の場合 + +```txt +1girl, hatsune miku, vocaloid, upper body, looking at viewer, {simple|white} background +``` +ランダムに `simple` または `white` が選ばれ、`simple background` または `white background` になります。 + +```txt +1girl, hatsune miku, vocaloid, {{retro style}} +``` +タグ文字列に `{` や `}` そのものを含めたい場合は `{{` や `}}` のように二つ重ねてください(この例では実際に学習に用いられるキャプションは `{retro style}` になります)。 + +#### キャプション記述例、`keep_tokens_separator` 記法: `keep_tokens_separator = "|||"` の場合 + +```txt +1girl, hatsune miku, vocaloid ||| stage, microphone, white shirt, smile ||| best quality, rating: general +``` +`1girl, hatsune miku, vocaloid, microphone, stage, white shirt, best quality, rating: general` や `1girl, hatsune miku, vocaloid, white shirt, smile, stage, microphone, best quality, rating: general` などになります。 + + ### Feb 24, 2024 / 2024/2/24: v0.8.4 - The log output has been improved. PR [#905](https://github.com/kohya-ss/sd-scripts/pull/905) Thanks to shirayu! @@ -304,64 +407,6 @@ ControlNet-LLLite, a novel method for ControlNet with SDXL, is added. See [docum - 複数 GPU での学習時に `network_multiplier` を指定するとクラッシュする不具合が修正されました。 PR [#1084](https://github.com/kohya-ss/sd-scripts/pull/1084) fireicewolf 氏に感謝します。 - ControlNet-LLLite の学習がエラーになる不具合を修正しました。 -### Jan 23, 2024 / 2024/1/23: v0.8.2 - -- [Experimental] The `--fp8_base` option is added to the training scripts for LoRA etc. The base model (U-Net, and Text Encoder when training modules for Text Encoder) can be trained with fp8. PR [#1057](https://github.com/kohya-ss/sd-scripts/pull/1057) Thanks to KohakuBlueleaf! - - Please specify `--fp8_base` in `train_network.py` or `sdxl_train_network.py`. - - PyTorch 2.1 or later is required. - - If you use xformers with PyTorch 2.1, please see [xformers repository](https://github.com/facebookresearch/xformers) and install the appropriate version according to your CUDA version. - - The sample image generation during training consumes a lot of memory. It is recommended to turn it off. - -- [Experimental] The network multiplier can be specified for each dataset in the training scripts for LoRA etc. - - This is an experimental option and may be removed or changed in the future. - - For example, if you train with state A as `1.0` and state B as `-1.0`, you may be able to generate by switching between state A and B depending on the LoRA application rate. - - Also, if you prepare five states and train them as `0.2`, `0.4`, `0.6`, `0.8`, and `1.0`, you may be able to generate by switching the states smoothly depending on the application rate. - - Please specify `network_multiplier` in `[[datasets]]` in `.toml` file. -- Some options are added to `networks/extract_lora_from_models.py` to reduce the memory usage. - - `--load_precision` option can be used to specify the precision when loading the model. If the model is saved in fp16, you can reduce the memory usage by specifying `--load_precision fp16` without losing precision. - - `--load_original_model_to` option can be used to specify the device to load the original model. `--load_tuned_model_to` option can be used to specify the device to load the derived model. The default is `cpu` for both options, but you can specify `cuda` etc. You can reduce the memory usage by loading one of them to GPU. This option is available only for SDXL. - -- The gradient synchronization in LoRA training with multi-GPU is improved. PR [#1064](https://github.com/kohya-ss/sd-scripts/pull/1064) Thanks to KohakuBlueleaf! -- The code for Intel IPEX support is improved. PR [#1060](https://github.com/kohya-ss/sd-scripts/pull/1060) Thanks to akx! -- Fixed a bug in multi-GPU Textual Inversion training. - -- (実験的) LoRA等の学習スクリプトで、ベースモデル(U-Net、および Text Encoder のモジュール学習時は Text Encoder も)の重みを fp8 にして学習するオプションが追加されました。 PR [#1057](https://github.com/kohya-ss/sd-scripts/pull/1057) KohakuBlueleaf 氏に感謝します。 - - `train_network.py` または `sdxl_train_network.py` で `--fp8_base` を指定してください。 - - PyTorch 2.1 以降が必要です。 - - PyTorch 2.1 で xformers を使用する場合は、[xformers のリポジトリ](https://github.com/facebookresearch/xformers) を参照し、CUDA バージョンに応じて適切なバージョンをインストールしてください。 - - 学習中のサンプル画像生成はメモリを大量に消費するため、オフにすることをお勧めします。 -- (実験的) LoRA 等の学習で、データセットごとに異なるネットワーク適用率を指定できるようになりました。 - - 実験的オプションのため、将来的に削除または仕様変更される可能性があります。 - - たとえば状態 A を `1.0`、状態 B を `-1.0` として学習すると、LoRA の適用率に応じて状態 A と B を切り替えつつ生成できるかもしれません。 - - また、五段階の状態を用意し、それぞれ `0.2`、`0.4`、`0.6`、`0.8`、`1.0` として学習すると、適用率でなめらかに状態を切り替えて生成できるかもしれません。 - - `.toml` ファイルで `[[datasets]]` に `network_multiplier` を指定してください。 -- `networks/extract_lora_from_models.py` に使用メモリ量を削減するいくつかのオプションを追加しました。 - - `--load_precision` で読み込み時の精度を指定できます。モデルが fp16 で保存されている場合は `--load_precision fp16` を指定して精度を変えずにメモリ量を削減できます。 - - `--load_original_model_to` で元モデルを読み込むデバイスを、`--load_tuned_model_to` で派生モデルを読み込むデバイスを指定できます。デフォルトは両方とも `cpu` ですがそれぞれ `cuda` 等を指定できます。片方を GPU に読み込むことでメモリ量を削減できます。SDXL の場合のみ有効です。 -- マルチ GPU での LoRA 等の学習時に勾配の同期が改善されました。 PR [#1064](https://github.com/kohya-ss/sd-scripts/pull/1064) KohakuBlueleaf 氏に感謝します。 -- Intel IPEX サポートのコードが改善されました。PR [#1060](https://github.com/kohya-ss/sd-scripts/pull/1060) akx 氏に感謝します。 -- マルチ GPU での Textual Inversion 学習の不具合を修正しました。 - -- `.toml` example for network multiplier / ネットワーク適用率の `.toml` の記述例 - -```toml -[general] -[[datasets]] -resolution = 512 -batch_size = 8 -network_multiplier = 1.0 - -... subset settings ... - -[[datasets]] -resolution = 512 -batch_size = 8 -network_multiplier = -1.0 - -... subset settings ... -``` - - Please read [Releases](https://github.com/kohya-ss/sd-scripts/releases) for recent updates. 最近の更新情報は [Release](https://github.com/kohya-ss/sd-scripts/releases) をご覧ください。 diff --git a/library/config_util.py b/library/config_util.py index fc4b3617..eb652ecf 100644 --- a/library/config_util.py +++ b/library/config_util.py @@ -60,6 +60,8 @@ class BaseSubsetParams: caption_separator: str = (",",) keep_tokens: int = 0 keep_tokens_separator: str = (None,) + secondary_separator: Optional[str] = None + enable_wildcard: bool = False color_aug: bool = False flip_aug: bool = False face_crop_aug_range: Optional[Tuple[float, float]] = None @@ -181,6 +183,8 @@ class ConfigSanitizer: "shuffle_caption": bool, "keep_tokens": int, "keep_tokens_separator": str, + "secondary_separator": str, + "enable_wildcard": bool, "token_warmup_min": int, "token_warmup_step": Any(float, int), "caption_prefix": str, @@ -504,6 +508,8 @@ def generate_dataset_group_by_blueprint(dataset_group_blueprint: DatasetGroupBlu shuffle_caption: {subset.shuffle_caption} keep_tokens: {subset.keep_tokens} keep_tokens_separator: {subset.keep_tokens_separator} + secondary_separator: {subset.secondary_separator} + enable_wildcard: {subset.enable_wildcard} caption_dropout_rate: {subset.caption_dropout_rate} caption_dropout_every_n_epoches: {subset.caption_dropout_every_n_epochs} caption_tag_dropout_rate: {subset.caption_tag_dropout_rate} diff --git a/library/train_util.py b/library/train_util.py index d2b69edb..b71e4edc 100644 --- a/library/train_util.py +++ b/library/train_util.py @@ -364,6 +364,8 @@ class BaseSubset: caption_separator: str, keep_tokens: int, keep_tokens_separator: str, + secondary_separator: Optional[str], + enable_wildcard: bool, color_aug: bool, flip_aug: bool, face_crop_aug_range: Optional[Tuple[float, float]], @@ -382,6 +384,8 @@ class BaseSubset: self.caption_separator = caption_separator self.keep_tokens = keep_tokens self.keep_tokens_separator = keep_tokens_separator + self.secondary_separator = secondary_separator + self.enable_wildcard = enable_wildcard self.color_aug = color_aug self.flip_aug = flip_aug self.face_crop_aug_range = face_crop_aug_range @@ -410,6 +414,8 @@ class DreamBoothSubset(BaseSubset): caption_separator: str, keep_tokens, keep_tokens_separator, + secondary_separator, + enable_wildcard, color_aug, flip_aug, face_crop_aug_range, @@ -431,6 +437,8 @@ class DreamBoothSubset(BaseSubset): caption_separator, keep_tokens, keep_tokens_separator, + secondary_separator, + enable_wildcard, color_aug, flip_aug, face_crop_aug_range, @@ -466,6 +474,8 @@ class FineTuningSubset(BaseSubset): caption_separator, keep_tokens, keep_tokens_separator, + secondary_separator, + enable_wildcard, color_aug, flip_aug, face_crop_aug_range, @@ -487,6 +497,8 @@ class FineTuningSubset(BaseSubset): caption_separator, keep_tokens, keep_tokens_separator, + secondary_separator, + enable_wildcard, color_aug, flip_aug, face_crop_aug_range, @@ -519,6 +531,8 @@ class ControlNetSubset(BaseSubset): caption_separator, keep_tokens, keep_tokens_separator, + secondary_separator, + enable_wildcard, color_aug, flip_aug, face_crop_aug_range, @@ -540,6 +554,8 @@ class ControlNetSubset(BaseSubset): caption_separator, keep_tokens, keep_tokens_separator, + secondary_separator, + enable_wildcard, color_aug, flip_aug, face_crop_aug_range, @@ -675,15 +691,41 @@ class BaseDataset(torch.utils.data.Dataset): if is_drop_out: caption = "" else: + # process wildcards + if subset.enable_wildcard: + # wildcard is like '{aaa|bbb|ccc...}' + # escape the curly braces like {{ or }} + replacer1 = "⦅" + replacer2 = "⦆" + while replacer1 in caption or replacer2 in caption: + replacer1 += "⦅" + replacer2 += "⦆" + + caption = caption.replace("{{", replacer1).replace("}}", replacer2) + + # replace the wildcard + def replace_wildcard(match): + return random.choice(match.group(1).split("|")) + + caption = re.sub(r"\{([^}]+)\}", replace_wildcard, caption) + + # unescape the curly braces + caption = caption.replace(replacer1, "{").replace(replacer2, "}") + if subset.shuffle_caption or subset.token_warmup_step > 0 or subset.caption_tag_dropout_rate > 0: fixed_tokens = [] flex_tokens = [] + fixed_suffix_tokens = [] if ( hasattr(subset, "keep_tokens_separator") and subset.keep_tokens_separator and subset.keep_tokens_separator in caption ): fixed_part, flex_part = caption.split(subset.keep_tokens_separator, 1) + if subset.keep_tokens_separator in flex_part: + flex_part, fixed_suffix_part = flex_part.split(subset.keep_tokens_separator, 1) + fixed_suffix_tokens = [t.strip() for t in fixed_suffix_part.split(subset.caption_separator) if t.strip()] + fixed_tokens = [t.strip() for t in fixed_part.split(subset.caption_separator) if t.strip()] flex_tokens = [t.strip() for t in flex_part.split(subset.caption_separator) if t.strip()] else: @@ -718,7 +760,11 @@ class BaseDataset(torch.utils.data.Dataset): flex_tokens = dropout_tags(flex_tokens) - caption = ", ".join(fixed_tokens + flex_tokens) + caption = ", ".join(fixed_tokens + flex_tokens + fixed_suffix_tokens) + + # process secondary separator + if subset.secondary_separator: + caption = caption.replace(subset.secondary_separator, subset.caption_separator) # textual inversion対応 for str_from, str_to in self.replacements.items(): @@ -1774,6 +1820,8 @@ class ControlNetDataset(BaseDataset): subset.caption_separator, subset.keep_tokens, subset.keep_tokens_separator, + subset.secondary_separator, + subset.enable_wildcard, subset.color_aug, subset.flip_aug, subset.face_crop_aug_range, @@ -3284,6 +3332,18 @@ def add_dataset_arguments( help="A custom separator to divide the caption into fixed and flexible parts. Tokens before this separator will not be shuffled. If not specified, '--keep_tokens' will be used to determine the fixed number of tokens." + " / captionを固定部分と可変部分に分けるためのカスタム区切り文字。この区切り文字より前のトークンはシャッフルされない。指定しない場合、'--keep_tokens'が固定部分のトークン数として使用される。", ) + parser.add_argument( + "--secondary_separator", + type=str, + default=None, + help="a secondary separator for caption. This separator is replaced to caption_separator after dropping/shuffling caption" + + " / captionのセカンダリ区切り文字。この区切り文字はcaptionのドロップやシャッフル後にcaption_separatorに置き換えられる", + ) + parser.add_argument( + "--enable_wildcard", + action="store_true", + help="enable wildcard for caption (e.g. '{image|picture|rendition}') / captionのワイルドカードを有効にする(例:'{image|picture|rendition}')", + ) parser.add_argument( "--caption_prefix", type=str, diff --git a/train_network.py b/train_network.py index e0fa6945..e5b26d8a 100644 --- a/train_network.py +++ b/train_network.py @@ -564,6 +564,11 @@ class NetworkTrainer: "random_crop": bool(subset.random_crop), "shuffle_caption": bool(subset.shuffle_caption), "keep_tokens": subset.keep_tokens, + "keep_tokens_separator": subset.keep_tokens_separator, + "secondary_separator": subset.secondary_separator, + "enable_wildcard": bool(subset.enable_wildcard), + "caption_prefix": subset.caption_prefix, + "caption_suffix": subset.caption_suffix, } image_dir_or_metadata_file = None From f2c727fc8cadf0971c24fdb42c8684032e7e6f80 Mon Sep 17 00:00:00 2001 From: Kohya S Date: Mon, 26 Feb 2024 23:19:58 +0900 Subject: [PATCH 13/69] add minimal impl for masked loss --- library/config_util.py | 38 +++++++++++++++++++++++++------------- library/train_util.py | 3 +++ train_network.py | 18 +++++++++++++++++- 3 files changed, 45 insertions(+), 14 deletions(-) diff --git a/library/config_util.py b/library/config_util.py index eb652ecf..edc6a538 100644 --- a/library/config_util.py +++ b/library/config_util.py @@ -41,12 +41,17 @@ from .train_util import ( DatasetGroup, ) from .utils import setup_logging + setup_logging() import logging + logger = logging.getLogger(__name__) + def add_config_arguments(parser: argparse.ArgumentParser): - parser.add_argument("--dataset_config", type=Path, default=None, help="config file for detail settings / 詳細な設定用の設定ファイル") + parser.add_argument( + "--dataset_config", type=Path, default=None, help="config file for detail settings / 詳細な設定用の設定ファイル" + ) # TODO: inherit Params class in Subset, Dataset @@ -248,9 +253,10 @@ class ConfigSanitizer: } def __init__(self, support_dreambooth: bool, support_finetuning: bool, support_controlnet: bool, support_dropout: bool) -> None: - assert ( - support_dreambooth or support_finetuning or support_controlnet - ), "Neither DreamBooth mode nor fine tuning mode specified. Please specify one mode or more. / DreamBooth モードか fine tuning モードのどちらも指定されていません。1つ以上指定してください。" + assert support_dreambooth or support_finetuning or support_controlnet, ( + "Neither DreamBooth mode nor fine tuning mode nor controlnet mode specified. Please specify one mode or more." + + " / DreamBooth モードか fine tuning モードか controlnet モードのどれも指定されていません。1つ以上指定してください。" + ) self.db_subset_schema = self.__merge_dict( self.SUBSET_ASCENDABLE_SCHEMA, @@ -362,7 +368,9 @@ class ConfigSanitizer: return self.argparse_config_validator(argparse_namespace) except MultipleInvalid: # XXX: this should be a bug - logger.error("Invalid cmdline parsed arguments. This should be a bug. / コマンドラインのパース結果が正しくないようです。プログラムのバグの可能性が高いです。") + logger.error( + "Invalid cmdline parsed arguments. This should be a bug. / コマンドラインのパース結果が正しくないようです。プログラムのバグの可能性が高いです。" + ) raise # NOTE: value would be overwritten by latter dict if there is already the same key @@ -547,11 +555,11 @@ def generate_dataset_group_by_blueprint(dataset_group_blueprint: DatasetGroupBlu " ", ) - logger.info(f'{info}') + logger.info(f"{info}") # make buckets first because it determines the length of dataset # and set the same seed for all datasets - seed = random.randint(0, 2**31) # actual seed is seed + epoch_no + seed = random.randint(0, 2**31) # actual seed is seed + epoch_no for i, dataset in enumerate(datasets): logger.info(f"[Dataset {i}]") dataset.make_buckets() @@ -638,13 +646,17 @@ def load_user_config(file: str) -> dict: with open(file, "r") as f: config = json.load(f) except Exception: - logger.error(f"Error on parsing JSON config file. Please check the format. / JSON 形式の設定ファイルの読み込みに失敗しました。文法が正しいか確認してください。: {file}") + logger.error( + f"Error on parsing JSON config file. Please check the format. / JSON 形式の設定ファイルの読み込みに失敗しました。文法が正しいか確認してください。: {file}" + ) raise elif file.name.lower().endswith(".toml"): try: config = toml.load(file) except Exception: - logger.error(f"Error on parsing TOML config file. Please check the format. / TOML 形式の設定ファイルの読み込みに失敗しました。文法が正しいか確認してください。: {file}") + logger.error( + f"Error on parsing TOML config file. Please check the format. / TOML 形式の設定ファイルの読み込みに失敗しました。文法が正しいか確認してください。: {file}" + ) raise else: raise ValueError(f"not supported config file format / 対応していない設定ファイルの形式です: {file}") @@ -671,13 +683,13 @@ if __name__ == "__main__": train_util.prepare_dataset_args(argparse_namespace, config_args.support_finetuning) logger.info("[argparse_namespace]") - logger.info(f'{vars(argparse_namespace)}') + logger.info(f"{vars(argparse_namespace)}") user_config = load_user_config(config_args.dataset_config) logger.info("") logger.info("[user_config]") - logger.info(f'{user_config}') + logger.info(f"{user_config}") sanitizer = ConfigSanitizer( config_args.support_dreambooth, config_args.support_finetuning, config_args.support_controlnet, config_args.support_dropout @@ -686,10 +698,10 @@ if __name__ == "__main__": logger.info("") logger.info("[sanitized_user_config]") - logger.info(f'{sanitized_user_config}') + logger.info(f"{sanitized_user_config}") blueprint = BlueprintGenerator(sanitizer).generate(user_config, argparse_namespace) logger.info("") logger.info("[blueprint]") - logger.info(f'{blueprint}') + logger.info(f"{blueprint}") diff --git a/library/train_util.py b/library/train_util.py index b71e4edc..7fe5bc56 100644 --- a/library/train_util.py +++ b/library/train_util.py @@ -1810,6 +1810,9 @@ class ControlNetDataset(BaseDataset): db_subsets = [] for subset in subsets: + assert ( + not subset.random_crop + ), "random_crop is not supported in ControlNetDataset / random_cropはControlNetDatasetではサポートされていません" db_subset = DreamBoothSubset( subset.image_dir, False, diff --git a/train_network.py b/train_network.py index e5b26d8a..e3ce7bd3 100644 --- a/train_network.py +++ b/train_network.py @@ -13,6 +13,7 @@ from tqdm import tqdm import torch from library.device_utils import init_ipex, clean_memory_on_device + init_ipex() from torch.nn.parallel import DistributedDataParallel as DDP @@ -157,7 +158,7 @@ class NetworkTrainer: # データセットを準備する if args.dataset_class is None: - blueprint_generator = BlueprintGenerator(ConfigSanitizer(True, True, False, True)) + blueprint_generator = BlueprintGenerator(ConfigSanitizer(True, True, args.masked_loss, True)) if use_user_config: logger.info(f"Loading dataset config from {args.dataset_config}") user_config = config_util.load_user_config(args.dataset_config) @@ -834,6 +835,16 @@ class NetworkTrainer: target = noise loss = torch.nn.functional.mse_loss(noise_pred.float(), target.float(), reduction="none") + + if args.masked_loss: + # mask image is -1 to 1. we need to convert it to 0 to 1 + mask_image = batch["conditioning_images"].to(dtype=weight_dtype)[:, 0].unsqueeze(1) # use R channel + + # resize to the same size as the loss + mask_image = torch.nn.functional.interpolate(mask_image, size=loss.shape[2:], mode="area") + mask_image = mask_image / 2 + 0.5 + loss = loss * mask_image + loss = loss.mean([1, 2, 3]) loss_weights = batch["loss_weights"] # 各sampleごとのweight @@ -1050,6 +1061,11 @@ def setup_parser() -> argparse.ArgumentParser: action="store_true", help="do not use fp16/bf16 VAE in mixed precision (use float VAE) / mixed precisionでも fp16/bf16 VAEを使わずfloat VAEを使う", ) + parser.add_argument( + "--masked_loss", + action="store_true", + help="apply mask for caclulating loss. conditioning_data_dir is required for dataset. / 損失計算時にマスクを適用する。datasetにはconditioning_data_dirが必要", + ) return parser From 175193623b39027ffcfe0c0ae250dbce564ed6ef Mon Sep 17 00:00:00 2001 From: Kohya S Date: Mon, 26 Feb 2024 23:29:41 +0900 Subject: [PATCH 14/69] update readme --- README.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/README.md b/README.md index e1b6a26c..9cc79cc0 100644 --- a/README.md +++ b/README.md @@ -249,6 +249,13 @@ ControlNet-LLLite, a novel method for ControlNet with SDXL, is added. See [docum ## Change History +### Masked loss + +`train_network.py` and `sdxl_train_network.py` now support the masked loss. `--masked_loss` option is added. + +ControlNet dataset is used to specify the mask. The mask images should be the RGB images. The pixel value 255 in R channel is treated as the mask (the loss is calculated only for the pixels with the mask), and 0 is treated as the non-mask. See details for the dataset specification in the [LLLite documentation](./docs/train_lllite_README.md#preparing-the-dataset). + + ### Working in progress - `train_network.py` and `sdxl_train_network.py` are modified to record some dataset settings in the metadata of the trained model (`caption_prefix`, `caption_suffix`, `keep_tokens_separator`, `secondary_separator`, `enable_wildcard`). From 4a5546d40e6de5789be78dd16373d2b820b8754e Mon Sep 17 00:00:00 2001 From: Kohya S Date: Mon, 26 Feb 2024 23:39:56 +0900 Subject: [PATCH 15/69] fix typo --- train_network.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/train_network.py b/train_network.py index e3ce7bd3..f5617986 100644 --- a/train_network.py +++ b/train_network.py @@ -1064,7 +1064,7 @@ def setup_parser() -> argparse.ArgumentParser: parser.add_argument( "--masked_loss", action="store_true", - help="apply mask for caclulating loss. conditioning_data_dir is required for dataset. / 損失計算時にマスクを適用する。datasetにはconditioning_data_dirが必要", + help="apply mask for calculating loss. conditioning_data_dir is required for dataset. / 損失計算時にマスクを適用する。datasetにはconditioning_data_dirが必要", ) return parser From e3ccf8fbf73a0f728fc167a20b1e0648a3604f41 Mon Sep 17 00:00:00 2001 From: Kohya S Date: Tue, 27 Feb 2024 21:30:46 +0900 Subject: [PATCH 16/69] make deepspeed_utils --- fine_tune.py | 35 +++++----- library/deepspeed_utils.py | 139 +++++++++++++++++++++++++++++++++++++ library/train_util.py | 110 ++--------------------------- sdxl_train.py | 66 ++++++++---------- train_db.py | 37 +++++----- train_network.py | 51 +++++++------- 6 files changed, 238 insertions(+), 200 deletions(-) create mode 100644 library/deepspeed_utils.py diff --git a/fine_tune.py b/fine_tune.py index c5e97d26..b018a933 100644 --- a/fine_tune.py +++ b/fine_tune.py @@ -10,7 +10,9 @@ import toml from tqdm import tqdm import torch +from library import deepspeed_utils from library.device_utils import init_ipex, clean_memory_on_device + init_ipex() from accelerate.utils import set_seed @@ -42,6 +44,7 @@ from library.custom_train_functions import ( def train(args): train_util.verify_training_args(args) train_util.prepare_dataset_args(args, True) + deepspeed_utils.prepare_deepspeed_args(args) setup_logging(args, reset=True) cache_latents = args.cache_latents @@ -219,7 +222,7 @@ def train(args): batch_size=1, shuffle=True, collate_fn=collator, - num_workers=n_workers if not args.deepspeed else 1, # To avoid RuntimeError: DataLoader worker exited unexpectedly with exit code 1. + num_workers=n_workers, persistent_workers=args.persistent_data_loader_workers, ) @@ -231,7 +234,7 @@ def train(args): accelerator.print( f"override steps. steps for {args.max_train_epochs} epochs is / 指定エポックまでのステップ数: {args.max_train_steps}" ) - + # データセット側にも学習ステップを送信 train_dataset_group.set_max_train_steps(args.max_train_steps) @@ -248,21 +251,16 @@ def train(args): text_encoder.to(weight_dtype) if args.deepspeed: - training_models_dict = {} - training_models_dict["unet"] = unet - if args.train_text_encoder: training_models_dict["text_encoder"] = text_encoder - - ds_model = train_util.prepare_deepspeed_model(args, **training_models_dict) - ds_model, optimizer, train_dataloader, lr_scheduler = accelerator.prepare(ds_model, optimizer, train_dataloader, lr_scheduler) - - training_models = [] - unet = ds_model.models["unet"] - training_models.append(unet) if args.train_text_encoder: - text_encoder = ds_model.models["text_encoder"] - training_models.append(text_encoder) - - else: # acceleratorがなんかよろしくやってくれるらしい + ds_model = deepspeed_utils.prepare_deepspeed_model(args, unet=unet, text_encoder=text_encoder) + else: + ds_model = deepspeed_utils.prepare_deepspeed_model(args, unet=unet) + ds_model, optimizer, train_dataloader, lr_scheduler = accelerator.prepare( + ds_model, optimizer, train_dataloader, lr_scheduler + ) + training_models = [ds_model] + else: + # acceleratorがなんかよろしくやってくれるらしい if args.train_text_encoder: unet, text_encoder, optimizer, train_dataloader, lr_scheduler = accelerator.prepare( unet, text_encoder, optimizer, train_dataloader, lr_scheduler @@ -327,13 +325,13 @@ def train(args): for step, batch in enumerate(train_dataloader): current_step.value = global_step - with accelerator.accumulate(training_models[0]): # 複数モデルに対応していない模様だがとりあえずこうしておく + with accelerator.accumulate(*training_models): with torch.no_grad(): if "latents" in batch and batch["latents"] is not None: latents = batch["latents"].to(accelerator.device) # .to(dtype=weight_dtype) else: # latentに変換 - latents = vae.encode(batch["images"].to(dtype=weight_dtype)).latent_dist.sample() + latents = vae.encode(batch["images"].to(dtype=vae_dtype)).latent_dist.sample().to(weight_dtype) latents = latents * 0.18215 b_size = latents.shape[0] @@ -493,6 +491,7 @@ def setup_parser() -> argparse.ArgumentParser: train_util.add_sd_models_arguments(parser) train_util.add_dataset_arguments(parser, False, True, True) train_util.add_training_arguments(parser, False) + deepspeed_utils.add_deepspeed_arguments(parser) train_util.add_sd_saving_arguments(parser) train_util.add_optimizer_arguments(parser) config_util.add_config_arguments(parser) diff --git a/library/deepspeed_utils.py b/library/deepspeed_utils.py new file mode 100644 index 00000000..99a7b2b3 --- /dev/null +++ b/library/deepspeed_utils.py @@ -0,0 +1,139 @@ +import os +import argparse +import torch +from accelerate import DeepSpeedPlugin, Accelerator + +from .utils import setup_logging + +setup_logging() +import logging + +logger = logging.getLogger(__name__) + + +def add_deepspeed_arguments(parser: argparse.ArgumentParser): + # DeepSpeed Arguments. https://huggingface.co/docs/accelerate/usage_guides/deepspeed + parser.add_argument("--deepspeed", action="store_true", help="enable deepspeed training") + parser.add_argument("--zero_stage", type=int, default=2, choices=[0, 1, 2, 3], help="Possible options are 0,1,2,3.") + parser.add_argument( + "--offload_optimizer_device", + type=str, + default=None, + choices=[None, "cpu", "nvme"], + help="Possible options are none|cpu|nvme. Only applicable with ZeRO Stages 2 and 3.", + ) + parser.add_argument( + "--offload_optimizer_nvme_path", + type=str, + default=None, + help="Possible options are /nvme|/local_nvme. Only applicable with ZeRO Stage 3.", + ) + parser.add_argument( + "--offload_param_device", + type=str, + default=None, + choices=[None, "cpu", "nvme"], + help="Possible options are none|cpu|nvme. Only applicable with ZeRO Stage 3.", + ) + parser.add_argument( + "--offload_param_nvme_path", + type=str, + default=None, + help="Possible options are /nvme|/local_nvme. Only applicable with ZeRO Stage 3.", + ) + parser.add_argument( + "--zero3_init_flag", + action="store_true", + help="Flag to indicate whether to enable `deepspeed.zero.Init` for constructing massive models." + "Only applicable with ZeRO Stage-3.", + ) + parser.add_argument( + "--zero3_save_16bit_model", + action="store_true", + help="Flag to indicate whether to save 16-bit model. Only applicable with ZeRO Stage-3.", + ) + parser.add_argument( + "--fp16_master_weights_and_gradients", + action="store_true", + help="fp16_master_and_gradients requires optimizer to support keeping fp16 master and gradients while keeping the optimizer states in fp32.", + ) + + +def prepare_deepspeed_args(args: argparse.Namespace): + if not args.deepspeed: + return + + # To avoid RuntimeError: DataLoader worker exited unexpectedly with exit code 1. + args.max_data_loader_n_workers = 1 + + +def prepare_deepspeed_plugin(args: argparse.Namespace): + if not args.deepspeed: + return None + + try: + import deepspeed + except ImportError as e: + logger.error( + "deepspeed is not installed. please install deepspeed in your environment with following command. DS_BUILD_OPS=0 pip install deepspeed" + ) + exit(1) + + deepspeed_plugin = DeepSpeedPlugin( + zero_stage=args.zero_stage, + gradient_accumulation_steps=args.gradient_accumulation_steps, + gradient_clipping=args.max_grad_norm, + offload_optimizer_device=args.offload_optimizer_device, + offload_optimizer_nvme_path=args.offload_optimizer_nvme_path, + offload_param_device=args.offload_param_device, + offload_param_nvme_path=args.offload_param_nvme_path, + zero3_init_flag=args.zero3_init_flag, + zero3_save_16bit_model=args.zero3_save_16bit_model, + ) + deepspeed_plugin.deepspeed_config["train_micro_batch_size_per_gpu"] = args.train_batch_size + deepspeed_plugin.deepspeed_config["train_batch_size"] = ( + args.train_batch_size * args.gradient_accumulation_steps * int(os.environ["WORLD_SIZE"]) + ) + deepspeed_plugin.set_mixed_precision(args.mixed_precision) + if args.mixed_precision.lower() == "fp16": + deepspeed_plugin.deepspeed_config["fp16"]["initial_scale_power"] = 0 # preventing overflow. + if args.full_fp16 or args.fp16_master_weights_and_gradients: + if args.offload_optimizer_device == "cpu" and args.zero_stage == 2: + deepspeed_plugin.deepspeed_config["fp16"]["fp16_master_weights_and_grads"] = True + logger.info("[DeepSpeed] full fp16 enable.") + else: + logger.info( + "[DeepSpeed]full fp16, fp16_master_weights_and_grads currently only supported using ZeRO-Offload with DeepSpeedCPUAdam on ZeRO-2 stage." + ) + + if args.offload_optimizer_device is not None: + logger.info("[DeepSpeed] start to manually build cpu_adam.") + deepspeed.ops.op_builder.CPUAdamBuilder().load() + logger.info("[DeepSpeed] building cpu_adam done.") + + return deepspeed_plugin + + +# Accelerate library does not support multiple models for deepspeed. So, we need to wrap multiple models into a single model. +def prepare_deepspeed_model(args: argparse.Namespace, **models): + # remove None from models + models = {k: v for k, v in models.items() if v is not None} + + class DeepSpeedWrapper(torch.nn.Module): + def __init__(self, **kw_models) -> None: + super().__init__() + self.models = torch.nn.ModuleDict() + + for key, model in kw_models.items(): + if isinstance(model, list): + model = torch.nn.ModuleList(model) + assert isinstance( + model, torch.nn.Module + ), f"model must be an instance of torch.nn.Module, but got {key} is {type(model)}" + self.models.update(torch.nn.ModuleDict({key: model})) + + def get_models(self): + return self.models + + ds_model = DeepSpeedWrapper(**models) + return ds_model diff --git a/library/train_util.py b/library/train_util.py index 3781dcde..38e1b458 100644 --- a/library/train_util.py +++ b/library/train_util.py @@ -21,7 +21,6 @@ from typing import ( Union, ) from accelerate import Accelerator, InitProcessGroupKwargs, DistributedDataParallelKwargs -from accelerate import DeepSpeedPlugin import glob import math import os @@ -70,6 +69,7 @@ from library.lpw_stable_diffusion import StableDiffusionLongPromptWeightingPipel import library.model_util as model_util import library.huggingface_util as huggingface_util import library.sai_model_spec as sai_model_spec +import library.deepspeed_utils as deepspeed_utils from library.utils import setup_logging setup_logging() @@ -3243,52 +3243,6 @@ def add_training_arguments(parser: argparse.ArgumentParser, support_dreambooth: "--prior_loss_weight", type=float, default=1.0, help="loss weight for regularization images / 正則化画像のlossの重み" ) - # DeepSpeed Arguments. https://huggingface.co/docs/accelerate/usage_guides/deepspeed - parser.add_argument("--deepspeed", action="store_true", help="enable deepspeed training") - parser.add_argument( - "--zero_stage", - type=int, default=2, - choices=[0, 1, 2, 3], - help="Possible options are 0,1,2,3." - ) - parser.add_argument( - "--offload_optimizer_device", - type=str, default=None, - choices=[None, "cpu", "nvme"], - help="Possible options are none|cpu|nvme. Only applicable with ZeRO Stages 2 and 3." - ) - parser.add_argument( - "--offload_optimizer_nvme_path", - type=str, default=None, - help="Possible options are /nvme|/local_nvme. Only applicable with ZeRO Stage 3." - ) - parser.add_argument( - "--offload_param_device", - type=str, default=None, - choices=[None, "cpu", "nvme"], - help="Possible options are none|cpu|nvme. Only applicable with ZeRO Stage 3." - ) - parser.add_argument( - "--offload_param_nvme_path", - type=str, default=None, - help="Possible options are /nvme|/local_nvme. Only applicable with ZeRO Stage 3." - ) - parser.add_argument( - "--zero3_init_flag", - action="store_true", - help="Flag to indicate whether to enable `deepspeed.zero.Init` for constructing massive models." - "Only applicable with ZeRO Stage-3." - ) - parser.add_argument( - "--zero3_save_16bit_model", - action="store_true", - help="Flag to indicate whether to save 16-bit model. Only applicable with ZeRO Stage-3." - ) - parser.add_argument( - "--fp16_master_weights_and_gradients", - action="store_true", - help="fp16_master_and_gradients requires optimizer to support keeping fp16 master and gradients while keeping the optimizer states in fp32." - ) def verify_training_args(args: argparse.Namespace): r""" @@ -4090,6 +4044,10 @@ def load_tokenizer(args: argparse.Namespace): def prepare_accelerator(args: argparse.Namespace): + """ + this function also prepares deepspeed plugin + """ + if args.logging_dir is None: logging_dir = None else: @@ -4135,7 +4093,7 @@ def prepare_accelerator(args: argparse.Namespace): ), ) kwargs_handlers = list(filter(lambda x: x is not None, kwargs_handlers)) - deepspeed_plugin = prepare_deepspeed_plugin(args) + deepspeed_plugin = deepspeed_utils.prepare_deepspeed_plugin(args) accelerator = Accelerator( gradient_accumulation_steps=args.gradient_accumulation_steps, @@ -4149,62 +4107,6 @@ def prepare_accelerator(args: argparse.Namespace): print("accelerator device:", accelerator.device) return accelerator -def prepare_deepspeed_plugin(args: argparse.Namespace): - if args.deepspeed is None: return None - try: - import deepspeed - except ImportError as e: - print("deepspeed is not installed. please install deepspeed in your environment with following command. DS_BUILD_OPS=0 pip install deepspeed") - exit(1) - - deepspeed_plugin = DeepSpeedPlugin( - zero_stage=args.zero_stage, - gradient_accumulation_steps=args.gradient_accumulation_steps, gradient_clipping=args.max_grad_norm, - offload_optimizer_device=args.offload_optimizer_device, offload_optimizer_nvme_path=args.offload_optimizer_nvme_path, - offload_param_device=args.offload_param_device, offload_param_nvme_path=args.offload_param_nvme_path, - zero3_init_flag=args.zero3_init_flag, zero3_save_16bit_model=args.zero3_save_16bit_model, - ) - deepspeed_plugin.deepspeed_config['train_micro_batch_size_per_gpu'] = args.train_batch_size - deepspeed_plugin.deepspeed_config['train_batch_size'] = \ - args.train_batch_size * args.gradient_accumulation_steps * int(os.environ['WORLD_SIZE']) - deepspeed_plugin.set_mixed_precision(args.mixed_precision) - if args.mixed_precision.lower() == "fp16": - deepspeed_plugin.deepspeed_config['fp16']['initial_scale_power'] = 0 # preventing overflow. - if args.full_fp16 or args.fp16_master_weights_and_gradients: - if args.offload_optimizer_device == "cpu" and args.zero_stage == 2: - deepspeed_plugin.deepspeed_config['fp16']['fp16_master_weights_and_grads'] = True - print("[DeepSpeed] full fp16 enable.") - else: - print("[DeepSpeed]full fp16, fp16_master_weights_and_grads currently only supported using ZeRO-Offload with DeepSpeedCPUAdam on ZeRO-2 stage.") - - if args.offload_optimizer_device is not None: - print('[DeepSpeed] start to manually build cpu_adam.') - deepspeed.ops.op_builder.CPUAdamBuilder().load() - print('[DeepSpeed] building cpu_adam done.') - - return deepspeed_plugin - -def prepare_deepspeed_model(args: argparse.Namespace, **models): - class DeepSpeedWrapper(torch.nn.Module): - def __init__(self, **kw_models) -> None: - super().__init__() - self.models = torch.nn.ModuleDict() - - for key, model in kw_models.items(): - if isinstance(model, list): - model = torch.nn.ModuleList(model) - assert isinstance(model, torch.nn.Module), f"model must be an instance of torch.nn.Module, but got {key} is {type(model)}" - self.models.update( - torch.nn.ModuleDict( - {key: model} - ) - ) - - def get_models(self): - return self.models - - ds_model = DeepSpeedWrapper(**models) - return ds_model def prepare_dtype(args: argparse.Namespace): weight_dtype = torch.float32 diff --git a/sdxl_train.py b/sdxl_train.py index 5e5e9f29..0feb4e36 100644 --- a/sdxl_train.py +++ b/sdxl_train.py @@ -11,11 +11,12 @@ from tqdm import tqdm import torch from library.device_utils import init_ipex, clean_memory_on_device + init_ipex() from accelerate.utils import set_seed from diffusers import DDPMScheduler -from library import sdxl_model_util +from library import deepspeed_utils, sdxl_model_util import library.train_util as train_util @@ -97,6 +98,7 @@ def train(args): train_util.verify_training_args(args) train_util.prepare_dataset_args(args, True) sdxl_train_util.verify_sdxl_training_args(args) + deepspeed_utils.prepare_deepspeed_args(args) setup_logging(args, reset=True) assert ( @@ -361,7 +363,7 @@ def train(args): batch_size=1, shuffle=True, collate_fn=collator, - num_workers=n_workers if not args.deepspeed else 1, # To avoid RuntimeError: DataLoader worker exited unexpectedly with exit code 1. + num_workers=n_workers, persistent_workers=args.persistent_data_loader_workers, ) @@ -398,41 +400,31 @@ def train(args): text_encoder1.to(weight_dtype) text_encoder2.to(weight_dtype) - if args.deepspeed: - training_models_dict = {} - if train_unet: - training_models_dict["unet"] = unet - if train_text_encoder1: - text_encoder1.text_model.encoder.layers[-1].requires_grad_(False) - text_encoder1.text_model.final_layer_norm.requires_grad_(False) - training_models_dict["text_encoder1"] = text_encoder1 - if train_text_encoder2: - training_models_dict["text_encoder2"] = text_encoder2 - ds_model = train_util.prepare_deepspeed_model(args, **training_models_dict) - ds_model, optimizer, train_dataloader, lr_scheduler = accelerator.prepare(ds_model, optimizer, train_dataloader, lr_scheduler) - - training_models = [] # override training_models - if train_unet: - unet = ds_model.models["unet"] - training_models.append(unet) - if train_text_encoder1: - text_encoder1 = ds_model.models["text_encoder1"] - training_models.append(text_encoder1) - if train_text_encoder2: - text_encoder2 = ds_model.models["text_encoder2"] - training_models.append(text_encoder2) + # freeze last layer and final_layer_norm in te1 since we use the output of the penultimate layer + if train_text_encoder1: + text_encoder1.text_model.encoder.layers[-1].requires_grad_(False) + text_encoder1.text_model.final_layer_norm.requires_grad_(False) - else: # acceleratorがなんかよろしくやってくれるらしい + if args.deepspeed: + ds_model = deepspeed_utils.prepare_deepspeed_model( + args, + unet=unet if train_unet else None, + text_encoder1=text_encoder1 if train_text_encoder1 else None, + text_encoder2=text_encoder2 if train_text_encoder2 else None, + ) + ds_model = accelerator.prepare(ds_model) + training_models = [ds_model] + + else: + # acceleratorがなんかよろしくやってくれるらしい if train_unet: unet = accelerator.prepare(unet) if train_text_encoder1: - # freeze last layer and final_layer_norm in te1 since we use the output of the penultimate layer - text_encoder1.text_model.encoder.layers[-1].requires_grad_(False) - text_encoder1.text_model.final_layer_norm.requires_grad_(False) text_encoder1 = accelerator.prepare(text_encoder1) if train_text_encoder2: text_encoder2 = accelerator.prepare(text_encoder2) - optimizer, train_dataloader, lr_scheduler = accelerator.prepare(optimizer, train_dataloader, lr_scheduler) + + optimizer, train_dataloader, lr_scheduler = accelerator.prepare(optimizer, train_dataloader, lr_scheduler) # TextEncoderの出力をキャッシュするときにはCPUへ移動する if args.cache_text_encoder_outputs: @@ -446,8 +438,9 @@ def train(args): text_encoder2.to(accelerator.device) # 実験的機能:勾配も含めたfp16学習を行う PyTorchにパッチを当ててfp16でのgrad scaleを有効にする - if args.full_fp16 and not args.deepspeed: + if args.full_fp16: # During deepseed training, accelerate not handles fp16/bf16|mixed precision directly via scaler. Let deepspeed engine do. + # -> But we think it's ok to patch accelerator even if deepspeed is enabled. train_util.patch_accelerator_for_fp16_training(accelerator) # resumeする @@ -508,10 +501,10 @@ def train(args): for step, batch in enumerate(train_dataloader): current_step.value = global_step with accelerator.accumulate(*training_models): - with torch.no_grad(): # why this block differ within train_network.py? - if "latents" in batch and batch["latents"] is not None: - latents = batch["latents"].to(accelerator.device).to(dtype=weight_dtype) - else: + if "latents" in batch and batch["latents"] is not None: + latents = batch["latents"].to(accelerator.device).to(dtype=weight_dtype) + else: + with torch.no_grad(): # latentに変換 latents = vae.encode(batch["images"].to(vae_dtype)).latent_dist.sample().to(weight_dtype) @@ -519,7 +512,7 @@ def train(args): if torch.any(torch.isnan(latents)): accelerator.print("NaN found in latents, replacing with zeros") latents = torch.nan_to_num(latents, 0, out=latents) - latents = latents * sdxl_model_util.VAE_SCALE_FACTOR + latents = latents * sdxl_model_util.VAE_SCALE_FACTOR if "text_encoder_outputs1_list" not in batch or batch["text_encoder_outputs1_list"] is None: input_ids1 = batch["input_ids"] @@ -768,6 +761,7 @@ def setup_parser() -> argparse.ArgumentParser: train_util.add_sd_models_arguments(parser) train_util.add_dataset_arguments(parser, True, True, True) train_util.add_training_arguments(parser, False) + deepspeed_utils.add_deepspeed_arguments(parser) train_util.add_sd_saving_arguments(parser) train_util.add_optimizer_arguments(parser) config_util.add_config_arguments(parser) diff --git a/train_db.py b/train_db.py index 66a83d1d..ea1cfeb8 100644 --- a/train_db.py +++ b/train_db.py @@ -11,7 +11,9 @@ import toml from tqdm import tqdm import torch +from library import deepspeed_utils from library.device_utils import init_ipex, clean_memory_on_device + init_ipex() from accelerate.utils import set_seed @@ -46,6 +48,7 @@ logger = logging.getLogger(__name__) def train(args): train_util.verify_training_args(args) train_util.prepare_dataset_args(args, False) + deepspeed_utils.prepare_deepspeed_args(args) setup_logging(args, reset=True) cache_latents = args.cache_latents @@ -187,7 +190,7 @@ def train(args): batch_size=1, shuffle=True, collate_fn=collator, - num_workers=n_workers if not args.deepspeed else 1, # To avoid RuntimeError: DataLoader worker exited unexpectedly with exit code 1. + num_workers=n_workers, persistent_workers=args.persistent_data_loader_workers, ) @@ -220,30 +223,27 @@ def train(args): # acceleratorがなんかよろしくやってくれるらしい if args.deepspeed: - training_models_dict = {} - training_models_dict["unet"] = unet - if train_text_encoder: training_models_dict["text_encoder"] = text_encoder + if args.train_text_encoder: + ds_model = deepspeed_utils.prepare_deepspeed_model(args, unet=unet, text_encoder=text_encoder) + else: + ds_model = deepspeed_utils.prepare_deepspeed_model(args, unet=unet) + ds_model, optimizer, train_dataloader, lr_scheduler = accelerator.prepare( + ds_model, optimizer, train_dataloader, lr_scheduler + ) + training_models = [ds_model] - ds_model = train_util.prepare_deepspeed_model(args, **training_models_dict) - ds_model, optimizer, train_dataloader, lr_scheduler = accelerator.prepare(ds_model, optimizer, train_dataloader, lr_scheduler) - - training_models = [] - unet = ds_model.models["unet"] - training_models.append(unet) - if train_text_encoder: - text_encoder = ds_model.models["text_encoder"] - training_models.append(text_encoder) - else: if train_text_encoder: unet, text_encoder, optimizer, train_dataloader, lr_scheduler = accelerator.prepare( unet, text_encoder, optimizer, train_dataloader, lr_scheduler ) + training_models = [unet, text_encoder] else: unet, optimizer, train_dataloader, lr_scheduler = accelerator.prepare(unet, optimizer, train_dataloader, lr_scheduler) + training_models = [unet] - if not train_text_encoder: - text_encoder.to(accelerator.device, dtype=weight_dtype) # to avoid 'cpu' vs 'cuda' error + if not train_text_encoder: + text_encoder.to(accelerator.device, dtype=weight_dtype) # to avoid 'cpu' vs 'cuda' error # 実験的機能:勾配も含めたfp16学習を行う PyTorchにパッチを当ててfp16でのgrad scaleを有効にする if args.full_fp16: @@ -312,8 +312,10 @@ def train(args): if not args.gradient_checkpointing: text_encoder.train(False) text_encoder.requires_grad_(False) + if len(training_models) == 2: + training_models = training_models[0] # remove text_encoder from training_models - with accelerator.accumulate(unet): + with accelerator.accumulate(*training_models): with torch.no_grad(): # latentに変換 if cache_latents: @@ -480,6 +482,7 @@ def setup_parser() -> argparse.ArgumentParser: train_util.add_sd_models_arguments(parser) train_util.add_dataset_arguments(parser, True, False, True) train_util.add_training_arguments(parser, True) + deepspeed_utils.add_deepspeed_arguments(parser) train_util.add_sd_saving_arguments(parser) train_util.add_optimizer_arguments(parser) config_util.add_config_arguments(parser) diff --git a/train_network.py b/train_network.py index af1b7f63..a6ce169a 100644 --- a/train_network.py +++ b/train_network.py @@ -13,13 +13,14 @@ from tqdm import tqdm import torch from library.device_utils import init_ipex, clean_memory_on_device + init_ipex() from torch.nn.parallel import DistributedDataParallel as DDP from accelerate.utils import set_seed from diffusers import DDPMScheduler -from library import model_util +from library import deepspeed_utils, model_util import library.train_util as train_util from library.train_util import ( @@ -141,6 +142,7 @@ class NetworkTrainer: training_started_at = time.time() train_util.verify_training_args(args) train_util.prepare_dataset_args(args, True) + deepspeed_utils.prepare_deepspeed_args(args) setup_logging(args, reset=True) cache_latents = args.cache_latents @@ -357,7 +359,7 @@ class NetworkTrainer: batch_size=1, shuffle=True, collate_fn=collator, - num_workers=n_workers if not args.deepspeed else 1, # To avoid RuntimeError: DataLoader worker exited unexpectedly with exit code 1. + num_workers=n_workers, persistent_workers=args.persistent_data_loader_workers, ) @@ -414,22 +416,17 @@ class NetworkTrainer: # acceleratorがなんかよろしくやってくれるらしい / accelerator will do something good if args.deepspeed: - training_models_dict = {} - if train_unet: training_models_dict["unet"] = unet - if train_text_encoder: training_models_dict["text_encoder"] = text_encoders - training_models_dict["network"] = network - - ds_model = train_util.prepare_deepspeed_model(args, **training_models_dict) - ds_model, optimizer, train_dataloader, lr_scheduler = accelerator.prepare(ds_model, optimizer, train_dataloader, lr_scheduler) - - if train_unet: unet = ds_model.models["unet"] - if train_text_encoder: - text_encoder = ds_model.models["text_encoder"] - if len(ds_model.models["text_encoder"]) > 1: - text_encoders = text_encoder - else: - text_encoders = [text_encoder] - + ds_model = deepspeed_utils.prepare_deepspeed_model( + args, + unet=unet if train_unet else None, + text_encoder1=text_encoders[0] if train_text_encoder else None, + text_encoder2=text_encoders[1] if train_text_encoder and len(text_encoders) > 1 else None, + network=network, + ) + ds_model, optimizer, train_dataloader, lr_scheduler = accelerator.prepare( + ds_model, optimizer, train_dataloader, lr_scheduler + ) + training_model = ds_model else: if train_unet: unet = accelerator.prepare(unet) @@ -444,7 +441,10 @@ class NetworkTrainer: else: pass # if text_encoder is not trained, no need to prepare. and device and dtype are already set - network, optimizer, train_dataloader, lr_scheduler = accelerator.prepare(network, optimizer, train_dataloader, lr_scheduler) + network, optimizer, train_dataloader, lr_scheduler = accelerator.prepare( + network, optimizer, train_dataloader, lr_scheduler + ) + training_model = network if args.gradient_checkpointing: # according to TI example in Diffusers, train is required @@ -777,13 +777,13 @@ class NetworkTrainer: for step, batch in enumerate(train_dataloader): current_step.value = global_step - with accelerator.accumulate(network): + with accelerator.accumulate(training_model): on_step_start(text_encoder, unet) - with torch.no_grad(): - if "latents" in batch and batch["latents"] is not None: - latents = batch["latents"].to(accelerator.device) - else: + if "latents" in batch and batch["latents"] is not None: + latents = batch["latents"].to(accelerator.device) + else: + with torch.no_grad(): # latentに変換 latents = vae.encode(batch["images"].to(dtype=vae_dtype)).latent_dist.sample() @@ -791,7 +791,7 @@ class NetworkTrainer: if torch.any(torch.isnan(latents)): accelerator.print("NaN found in latents, replacing with zeros") latents = torch.nan_to_num(latents, 0, out=latents) - latents = latents * self.vae_scale_factor + latents = latents * self.vae_scale_factor # get multiplier for each sample if network_has_multiplier: @@ -976,6 +976,7 @@ def setup_parser() -> argparse.ArgumentParser: train_util.add_sd_models_arguments(parser) train_util.add_dataset_arguments(parser, True, True, True) train_util.add_training_arguments(parser, True) + deepspeed_utils.add_deepspeed_arguments(parser) train_util.add_optimizer_arguments(parser) config_util.add_config_arguments(parser) custom_train_functions.add_custom_train_arguments(parser) From a9b64ffba8efbb0991a094e38b1f5d5c56680caf Mon Sep 17 00:00:00 2001 From: Kohya S Date: Tue, 27 Feb 2024 21:43:55 +0900 Subject: [PATCH 17/69] support masked loss in sdxl_train ref #589 --- README.md | 4 +++- sdxl_train.py | 20 +++++++++++++++++++- 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 9cc79cc0..354983c3 100644 --- a/README.md +++ b/README.md @@ -251,7 +251,9 @@ ControlNet-LLLite, a novel method for ControlNet with SDXL, is added. See [docum ### Masked loss -`train_network.py` and `sdxl_train_network.py` now support the masked loss. `--masked_loss` option is added. +`train_network.py`, `sdxl_train_network.py` and `sdxl_train.py` now support the masked loss. `--masked_loss` option is added. + +NOTE: `train_network.py` and `sdxl_train.py` are not tested yet. ControlNet dataset is used to specify the mask. The mask images should be the RGB images. The pixel value 255 in R channel is treated as the mask (the loss is calculated only for the pixels with the mask), and 0 is treated as the non-mask. See details for the dataset specification in the [LLLite documentation](./docs/train_lllite_README.md#preparing-the-dataset). diff --git a/sdxl_train.py b/sdxl_train.py index e0df263d..448a160f 100644 --- a/sdxl_train.py +++ b/sdxl_train.py @@ -11,6 +11,7 @@ from tqdm import tqdm import torch from library.device_utils import init_ipex, clean_memory_on_device + init_ipex() from accelerate.utils import set_seed @@ -124,7 +125,7 @@ def train(args): # データセットを準備する if args.dataset_class is None: - blueprint_generator = BlueprintGenerator(ConfigSanitizer(True, True, False, True)) + blueprint_generator = BlueprintGenerator(ConfigSanitizer(True, True, args.masked_loss, True)) if args.dataset_config is not None: logger.info(f"Load dataset config from {args.dataset_config}") user_config = config_util.load_user_config(args.dataset_config) @@ -579,6 +580,16 @@ def train(args): ): # do not mean over batch dimension for snr weight or scale v-pred loss loss = torch.nn.functional.mse_loss(noise_pred.float(), target.float(), reduction="none") + + if args.masked_loss: + # mask image is -1 to 1. we need to convert it to 0 to 1 + mask_image = batch["conditioning_images"].to(dtype=weight_dtype)[:, 0].unsqueeze(1) # use R channel + + # resize to the same size as the loss + mask_image = torch.nn.functional.interpolate(mask_image, size=loss.shape[2:], mode="area") + mask_image = mask_image / 2 + 0.5 + loss = loss * mask_image + loss = loss.mean([1, 2, 3]) if args.min_snr_gamma: @@ -780,6 +791,13 @@ def setup_parser() -> argparse.ArgumentParser: + f"U-Netの各ブロックの学習率、カンマ区切り、{UNET_NUM_BLOCKS_FOR_BLOCK_LR}個の値", ) + # TODO common masked_loss argument + parser.add_argument( + "--masked_loss", + action="store_true", + help="apply mask for calculating loss. conditioning_data_dir is required for dataset. / 損失計算時にマスクを適用する。datasetにはconditioning_data_dirが必要", + ) + return parser From 14c9372a38c2d50d8206846fe0aa4406152258de Mon Sep 17 00:00:00 2001 From: Kohya S Date: Sun, 3 Mar 2024 21:47:37 +0900 Subject: [PATCH 18/69] add doc about Colab/rich issue --- README.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/README.md b/README.md index e1b6a26c..7df6fafc 100644 --- a/README.md +++ b/README.md @@ -251,6 +251,7 @@ ControlNet-LLLite, a novel method for ControlNet with SDXL, is added. See [docum ### Working in progress +- Colab seems to stop with log output. Try specifying `--console_log_simple` option in the training script to disable rich logging. - `train_network.py` and `sdxl_train_network.py` are modified to record some dataset settings in the metadata of the trained model (`caption_prefix`, `caption_suffix`, `keep_tokens_separator`, `secondary_separator`, `enable_wildcard`). - Some features are added to the dataset subset settings. - `secondary_separator` is added to specify the tag separator that is not the target of shuffling or dropping. @@ -260,6 +261,8 @@ ControlNet-LLLite, a novel method for ControlNet with SDXL, is added. See [docum - The existing features `caption_prefix` and `caption_suffix` can be used together. `caption_prefix` and `caption_suffix` are processed first, and then `enable_wildcard`, `keep_tokens_separator`, shuffling and dropping, and `secondary_separator` are processed in order. - The examples are [shown below](#example-of-dataset-settings--データセット設定の記述例). + +- Colab での動作時、ログ出力で停止してしまうようです。学習スクリプトに `--console_log_simple` オプションを指定し、rich のロギングを無効してお試しください。 - `train_network.py` および `sdxl_train_network.py` で、学習したモデルのメタデータに一部のデータセット設定が記録されるよう修正しました(`caption_prefix`、`caption_suffix`、`keep_tokens_separator`、`secondary_separator`、`enable_wildcard`)。 - データセットのサブセット設定にいくつかの機能を追加しました。 - シャッフルの対象とならないタグ分割識別子の指定 `secondary_separator` を追加しました。`secondary_separator=";;;"` のように指定します。`secondary_separator` で区切ることで、その部分はシャッフル、drop 時にまとめて扱われます。詳しくは記述例をご覧ください。 From 124ec45876a9f07820b42fda0d7ca9019de773d5 Mon Sep 17 00:00:00 2001 From: Horizon1704 <92718180+Horizon1704@users.noreply.github.com> Date: Sun, 10 Mar 2024 22:53:05 +0800 Subject: [PATCH 19/69] Add "encoding='utf-8'" --- library/train_util.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/library/train_util.py b/library/train_util.py index d2b69edb..5f23dd13 100644 --- a/library/train_util.py +++ b/library/train_util.py @@ -3474,7 +3474,7 @@ def read_config_from_file(args: argparse.Namespace, parser: argparse.ArgumentPar exit(1) logger.info(f"Loading settings from {config_path}...") - with open(config_path, "r") as f: + with open(config_path, "r", encoding='utf-8') as f: config_dict = toml.load(f) # combine all sections into one From 095b8035e63f7c79a232114d8f0e1ec27f431ebc Mon Sep 17 00:00:00 2001 From: gesen2egee <79357052+gesen2egee@users.noreply.github.com> Date: Sun, 10 Mar 2024 23:33:38 +0800 Subject: [PATCH 20/69] save state on train end --- fine_tune.py | 2 +- library/train_util.py | 5 +++++ sdxl_train.py | 2 +- sdxl_train_control_net_lllite.py | 2 +- train_controlnet.py | 2 +- train_db.py | 2 +- train_network.py | 2 +- train_textual_inversion.py | 2 +- train_textual_inversion_XTI.py | 2 +- 9 files changed, 13 insertions(+), 8 deletions(-) diff --git a/fine_tune.py b/fine_tune.py index 875a9195..46f12828 100644 --- a/fine_tune.py +++ b/fine_tune.py @@ -457,7 +457,7 @@ def train(args): accelerator.end_training() - if args.save_state and is_main_process: + if is_main_process and (args.save_state or args.save_state_on_train_end): train_util.save_state_on_train_end(args, accelerator) del accelerator # この後メモリを使うのでこれは消す diff --git a/library/train_util.py b/library/train_util.py index d2b69edb..b3ca15f5 100644 --- a/library/train_util.py +++ b/library/train_util.py @@ -2890,6 +2890,11 @@ def add_training_arguments(parser: argparse.ArgumentParser, support_dreambooth: action="store_true", help="save training state additionally (including optimizer states etc.) / optimizerなど学習状態も含めたstateを追加で保存する", ) + parser.add_argument( + "--save_state_on_train_end", + action="store_true", + help="save training state additionally (including optimizer states etc.) on train end / optimizerなど学習状態も含めたstateを追加で保存する", + ) parser.add_argument("--resume", type=str, default=None, help="saved state to resume training / 学習再開するモデルのstate") parser.add_argument("--train_batch_size", type=int, default=1, help="batch size for training / 学習時のバッチサイズ") diff --git a/sdxl_train.py b/sdxl_train.py index e0df263d..107bb945 100644 --- a/sdxl_train.py +++ b/sdxl_train.py @@ -712,7 +712,7 @@ def train(args): accelerator.end_training() - if args.save_state: # and is_main_process: + if args.save_state or args.save_state_on_train_end: train_util.save_state_on_train_end(args, accelerator) del accelerator # この後メモリを使うのでこれは消す diff --git a/sdxl_train_control_net_lllite.py b/sdxl_train_control_net_lllite.py index 1e5f9234..e99b4e35 100644 --- a/sdxl_train_control_net_lllite.py +++ b/sdxl_train_control_net_lllite.py @@ -549,7 +549,7 @@ def train(args): accelerator.end_training() - if is_main_process and args.save_state: + if is_main_process and (args.save_state or args.save_state_on_train_end): train_util.save_state_on_train_end(args, accelerator) if is_main_process: diff --git a/train_controlnet.py b/train_controlnet.py index dc73a91c..e44f0885 100644 --- a/train_controlnet.py +++ b/train_controlnet.py @@ -565,7 +565,7 @@ def train(args): accelerator.end_training() - if is_main_process and args.save_state: + if is_main_process and (args.save_state or args.save_state_on_train_end): train_util.save_state_on_train_end(args, accelerator) # del accelerator # この後メモリを使うのでこれは消す→printで使うので消さずにおく diff --git a/train_db.py b/train_db.py index 8d36097a..41a9a7b9 100644 --- a/train_db.py +++ b/train_db.py @@ -444,7 +444,7 @@ def train(args): accelerator.end_training() - if args.save_state and is_main_process: + if is_main_process and (args.save_state or args.save_state_on_train_end): train_util.save_state_on_train_end(args, accelerator) del accelerator # この後メモリを使うのでこれは消す diff --git a/train_network.py b/train_network.py index e0fa6945..4707d5ae 100644 --- a/train_network.py +++ b/train_network.py @@ -935,7 +935,7 @@ class NetworkTrainer: accelerator.end_training() - if is_main_process and args.save_state: + if is_main_process and args.save_state or args.save_state_on_train_end: train_util.save_state_on_train_end(args, accelerator) if is_main_process: diff --git a/train_textual_inversion.py b/train_textual_inversion.py index df1d8485..0266bc14 100644 --- a/train_textual_inversion.py +++ b/train_textual_inversion.py @@ -732,7 +732,7 @@ class TextualInversionTrainer: accelerator.end_training() - if args.save_state and is_main_process: + if is_main_process and (args.save_state or args.save_state_on_train_end): train_util.save_state_on_train_end(args, accelerator) if is_main_process: diff --git a/train_textual_inversion_XTI.py b/train_textual_inversion_XTI.py index 695fad2a..ad7c267e 100644 --- a/train_textual_inversion_XTI.py +++ b/train_textual_inversion_XTI.py @@ -586,7 +586,7 @@ def train(args): accelerator.end_training() - if args.save_state and is_main_process: + if is_main_process and (args.save_state or args.save_state_on_train_end): train_util.save_state_on_train_end(args, accelerator) updated_embs = text_encoder.get_input_embeddings().weight[token_ids_XTI].data.detach().clone() From d282c450026dcfd5f1fd5856f5087ebaed47be46 Mon Sep 17 00:00:00 2001 From: gesen2egee <79357052+gesen2egee@users.noreply.github.com> Date: Mon, 11 Mar 2024 23:56:09 +0800 Subject: [PATCH 21/69] Update train_network.py --- train_network.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/train_network.py b/train_network.py index 4707d5ae..3db583f1 100644 --- a/train_network.py +++ b/train_network.py @@ -935,7 +935,7 @@ class NetworkTrainer: accelerator.end_training() - if is_main_process and args.save_state or args.save_state_on_train_end: + if is_main_process and (args.save_state or args.save_state_on_train_end): train_util.save_state_on_train_end(args, accelerator) if is_main_process: From 948029fe61d9142f88374d6701223bf9f7ee5d47 Mon Sep 17 00:00:00 2001 From: kblueleaf Date: Tue, 12 Mar 2024 19:11:45 +0800 Subject: [PATCH 22/69] random ip_noise_gamma strength --- library/train_util.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/library/train_util.py b/library/train_util.py index b71e4edc..aa2d9b90 100644 --- a/library/train_util.py +++ b/library/train_util.py @@ -3100,6 +3100,13 @@ def add_training_arguments(parser: argparse.ArgumentParser, support_dreambooth: help="enable input perturbation noise. used for regularization. recommended value: around 0.1 (from arxiv.org/abs/2301.11706) " + "/ input perturbation noiseを有効にする。正則化に使用される。推奨値: 0.1程度 (arxiv.org/abs/2301.11706 より)", ) + parser.add_argument( + "--ip_noise_gamma_random_strength", + type=bool, + default=False, + help="Use random strength between 0~ip_noise_gamma for input perturbation noise." + + "/ input perturbation noiseにおいて、0からip_noise_gammaの間でランダムな強度を使用します。", + ) # parser.add_argument( # "--perlin_noise", # type=int, @@ -4673,7 +4680,11 @@ def get_noise_noisy_latents_and_timesteps(args, noise_scheduler, latents): # Add noise to the latents according to the noise magnitude at each timestep # (this is the forward diffusion process) if args.ip_noise_gamma: - noisy_latents = noise_scheduler.add_noise(latents, noise + args.ip_noise_gamma * torch.randn_like(latents), timesteps) + if args.ip_noise_gamma_random_strength: + strength = torch.rand(1, device=latents.device) * args.ip_noise_gamma + else: + strength = args.ip_noise_gamma + noisy_latents = noise_scheduler.add_noise(latents, noise + strength * torch.randn_like(latents), timesteps) else: noisy_latents = noise_scheduler.add_noise(latents, noise, timesteps) From 86399407b2bb5a93d691846acfa88e7ba38ae70d Mon Sep 17 00:00:00 2001 From: kblueleaf Date: Tue, 12 Mar 2024 19:14:01 +0800 Subject: [PATCH 23/69] random noise_offset strength --- library/train_util.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/library/train_util.py b/library/train_util.py index aa2d9b90..5282b524 100644 --- a/library/train_util.py +++ b/library/train_util.py @@ -3087,6 +3087,12 @@ def add_training_arguments(parser: argparse.ArgumentParser, support_dreambooth: default=None, help="enable noise offset with this value (if enabled, around 0.1 is recommended) / Noise offsetを有効にしてこの値を設定する(有効にする場合は0.1程度を推奨)", ) + parser.add_argument( + "--noise_offset_random_strength", + type=bool, + default=False, + help="use random strength between 0~noise_offset for noise offset. / noise offsetにおいて、0からnoise_offsetの間でランダムな強度を使用します。", + ) parser.add_argument( "--multires_noise_iterations", type=int, @@ -4663,7 +4669,11 @@ def get_noise_noisy_latents_and_timesteps(args, noise_scheduler, latents): # Sample noise that we'll add to the latents noise = torch.randn_like(latents, device=latents.device) if args.noise_offset: - noise = custom_train_functions.apply_noise_offset(latents, noise, args.noise_offset, args.adaptive_noise_scale) + if args.noise_offset_random_strength: + noise_offset = torch.rand(1, device=latents.device) * args.noise_offset + else: + noise_offset = args.noise_offset + noise = custom_train_functions.apply_noise_offset(latents, noise, noise_offset, args.adaptive_noise_scale) if args.multires_noise_iterations: noise = custom_train_functions.pyramid_noise_like( noise, latents.device, args.multires_noise_iterations, args.multires_noise_discount From 53954a1e2e05648bae6eb479720402968029cd3d Mon Sep 17 00:00:00 2001 From: kblueleaf Date: Tue, 12 Mar 2024 19:24:27 +0800 Subject: [PATCH 24/69] use correct settings for parser --- library/train_util.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/library/train_util.py b/library/train_util.py index 5282b524..73a76867 100644 --- a/library/train_util.py +++ b/library/train_util.py @@ -3089,8 +3089,7 @@ def add_training_arguments(parser: argparse.ArgumentParser, support_dreambooth: ) parser.add_argument( "--noise_offset_random_strength", - type=bool, - default=False, + action="store_true", help="use random strength between 0~noise_offset for noise offset. / noise offsetにおいて、0からnoise_offsetの間でランダムな強度を使用します。", ) parser.add_argument( @@ -3108,8 +3107,7 @@ def add_training_arguments(parser: argparse.ArgumentParser, support_dreambooth: ) parser.add_argument( "--ip_noise_gamma_random_strength", - type=bool, - default=False, + action="store_true", help="Use random strength between 0~ip_noise_gamma for input perturbation noise." + "/ input perturbation noiseにおいて、0からip_noise_gammaの間でランダムな強度を使用します。", ) From 443f02942cfefd6e1899849f563580508d118ce0 Mon Sep 17 00:00:00 2001 From: Kohya S Date: Fri, 15 Mar 2024 21:35:14 +0900 Subject: [PATCH 25/69] fix doc --- README.md | 106 ------------------------------------------------------ 1 file changed, 106 deletions(-) diff --git a/README.md b/README.md index cc5aca50..927d7a42 100644 --- a/README.md +++ b/README.md @@ -355,112 +355,6 @@ It becomes `1girl, hatsune miku, vocaloid, microphone, stage, white shirt, best `1girl, hatsune miku, vocaloid, microphone, stage, white shirt, best quality, rating: general` や `1girl, hatsune miku, vocaloid, white shirt, smile, stage, microphone, best quality, rating: general` などになります。 -### Working in progress - -- Colab seems to stop with log output. Try specifying `--console_log_simple` option in the training script to disable rich logging. -- `train_network.py` and `sdxl_train_network.py` are modified to record some dataset settings in the metadata of the trained model (`caption_prefix`, `caption_suffix`, `keep_tokens_separator`, `secondary_separator`, `enable_wildcard`). -- Some features are added to the dataset subset settings. - - `secondary_separator` is added to specify the tag separator that is not the target of shuffling or dropping. - - Specify `secondary_separator=";;;"`. When you specify `secondary_separator`, the part is not shuffled or dropped. See the example below. - - `enable_wildcard` is added. When set to `true`, the wildcard notation `{aaa|bbb|ccc}` can be used. See the example below. - - `keep_tokens_separator` is updated to be used twice in the caption. When you specify `keep_tokens_separator="|||"`, the part divided by the second `|||` is not shuffled or dropped and remains at the end. - - The existing features `caption_prefix` and `caption_suffix` can be used together. `caption_prefix` and `caption_suffix` are processed first, and then `enable_wildcard`, `keep_tokens_separator`, shuffling and dropping, and `secondary_separator` are processed in order. - - The examples are [shown below](#example-of-dataset-settings--データセット設定の記述例). - - -- Colab での動作時、ログ出力で停止してしまうようです。学習スクリプトに `--console_log_simple` オプションを指定し、rich のロギングを無効してお試しください。 -- `train_network.py` および `sdxl_train_network.py` で、学習したモデルのメタデータに一部のデータセット設定が記録されるよう修正しました(`caption_prefix`、`caption_suffix`、`keep_tokens_separator`、`secondary_separator`、`enable_wildcard`)。 -- データセットのサブセット設定にいくつかの機能を追加しました。 - - シャッフルの対象とならないタグ分割識別子の指定 `secondary_separator` を追加しました。`secondary_separator=";;;"` のように指定します。`secondary_separator` で区切ることで、その部分はシャッフル、drop 時にまとめて扱われます。詳しくは記述例をご覧ください。 - - `enable_wildcard` を追加しました。`true` にするとワイルドカード記法 `{aaa|bbb|ccc}` が使えます。詳しくは記述例をご覧ください。 - - `keep_tokens_separator` をキャプション内に 2 つ使えるようにしました。たとえば `keep_tokens_separator="|||"` と指定したとき、`1girl, hatsune miku, vocaloid ||| stage, mic ||| best quality, rating: general` とキャプションを指定すると、二番目の `|||` で分割された部分はシャッフル、drop されず末尾に残ります。 - - 既存の機能 `caption_prefix` と `caption_suffix` とあわせて使えます。`caption_prefix` と `caption_suffix` は一番最初に処理され、その後、ワイルドカード、`keep_tokens_separator`、シャッフルおよび drop、`secondary_separator` の順に処理されます。 - -#### Example of dataset settings / データセット設定の記述例: - -```toml -[general] -flip_aug = true -color_aug = false -resolution = [1024, 1024] - -[[datasets]] -batch_size = 6 -enable_bucket = true -bucket_no_upscale = true -caption_extension = ".txt" -keep_tokens_separator= "|||" -shuffle_caption = true -caption_tag_dropout_rate = 0.1 -secondary_separator = ";;;" # subset 側に書くこともできます / can be written in the subset side -enable_wildcard = true # 同上 / same as above - - [[datasets.subsets]] - image_dir = "/path/to/image_dir" - num_repeats = 1 - - # ||| の前後はカンマは不要です(自動的に追加されます) / No comma is required before and after ||| (it is added automatically) - caption_prefix = "1girl, hatsune miku, vocaloid |||" - - # ||| の後はシャッフル、drop されず残ります / After |||, it is not shuffled or dropped and remains - # 単純に文字列として連結されるので、カンマなどは自分で入れる必要があります / It is simply concatenated as a string, so you need to put commas yourself - caption_suffix = ", anime screencap ||| masterpiece, rating: general" -``` - -#### Example of caption, secondary_separator notation: `secondary_separator = ";;;"` - -```txt -1girl, hatsune miku, vocaloid, upper body, looking at viewer, sky;;;cloud;;;day, outdoors -``` -The part `sky;;;cloud;;;day` is replaced with `sky,cloud,day` without shuffling or dropping. When shuffling and dropping are enabled, it is processed as a whole (as one tag). For example, it becomes `vocaloid, 1girl, upper body, sky,cloud,day, outdoors, hatsune miku` (shuffled) or `vocaloid, 1girl, outdoors, looking at viewer, upper body, hatsune miku` (dropped). - -#### Example of caption, enable_wildcard notation: `enable_wildcard = true` - -```txt -1girl, hatsune miku, vocaloid, upper body, looking at viewer, {simple|white} background -``` -`simple` or `white` is randomly selected, and it becomes `simple background` or `white background`. - -```txt -1girl, hatsune miku, vocaloid, {{retro style}} -``` -If you want to include `{` or `}` in the tag string, double them like `{{` or `}}` (in this example, the actual caption used for training is `{retro style}`). - -#### Example of caption, `keep_tokens_separator` notation: `keep_tokens_separator = "|||"` - -```txt -1girl, hatsune miku, vocaloid ||| stage, microphone, white shirt, smile ||| best quality, rating: general -``` -It becomes `1girl, hatsune miku, vocaloid, microphone, stage, white shirt, best quality, rating: general` or `1girl, hatsune miku, vocaloid, white shirt, smile, stage, microphone, best quality, rating: general` etc. - - -#### キャプション記述例、secondary_separator 記法:`secondary_separator = ";;;"` の場合 - -```txt -1girl, hatsune miku, vocaloid, upper body, looking at viewer, sky;;;cloud;;;day, outdoors -``` -`sky;;;cloud;;;day` の部分はシャッフル、drop されず `sky,cloud,day` に置換されます。シャッフル、drop が有効な場合、まとめて(一つのタグとして)処理されます。つまり `vocaloid, 1girl, upper body, sky,cloud,day, outdoors, hatsune miku` (シャッフル)や `vocaloid, 1girl, outdoors, looking at viewer, upper body, hatsune miku` (drop されたケース)などになります。 - -#### キャプション記述例、ワイルドカード記法: `enable_wildcard = true` の場合 - -```txt -1girl, hatsune miku, vocaloid, upper body, looking at viewer, {simple|white} background -``` -ランダムに `simple` または `white` が選ばれ、`simple background` または `white background` になります。 - -```txt -1girl, hatsune miku, vocaloid, {{retro style}} -``` -タグ文字列に `{` や `}` そのものを含めたい場合は `{{` や `}}` のように二つ重ねてください(この例では実際に学習に用いられるキャプションは `{retro style}` になります)。 - -#### キャプション記述例、`keep_tokens_separator` 記法: `keep_tokens_separator = "|||"` の場合 - -```txt -1girl, hatsune miku, vocaloid ||| stage, microphone, white shirt, smile ||| best quality, rating: general -``` -`1girl, hatsune miku, vocaloid, microphone, stage, white shirt, best quality, rating: general` や `1girl, hatsune miku, vocaloid, white shirt, smile, stage, microphone, best quality, rating: general` などになります。 - - ### Mar 15, 2024 / 2024/3/15: v0.8.5 - Fixed a bug that the value of timestep embedding during SDXL training was incorrect. From 7081a0cf0f1ca1a543edf7cab10c4c7d497348ca Mon Sep 17 00:00:00 2001 From: Kohya S Date: Sun, 17 Mar 2024 18:09:15 +0900 Subject: [PATCH 26/69] extension of src image could be different than target image --- library/train_util.py | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/library/train_util.py b/library/train_util.py index 7fe5bc56..0f8cf9ee 100644 --- a/library/train_util.py +++ b/library/train_util.py @@ -1863,7 +1863,7 @@ class ControlNetDataset(BaseDataset): # assert all conditioning data exists missing_imgs = [] - cond_imgs_with_img = set() + cond_imgs_with_pair = set() for image_key, info in self.dreambooth_dataset_delegate.image_data.items(): db_subset = self.dreambooth_dataset_delegate.image_to_subset[image_key] subset = None @@ -1877,23 +1877,29 @@ class ControlNetDataset(BaseDataset): logger.warning(f"not directory: {subset.conditioning_data_dir}") continue - img_basename = os.path.basename(info.absolute_path) - ctrl_img_path = os.path.join(subset.conditioning_data_dir, img_basename) - if not os.path.exists(ctrl_img_path): + img_basename = os.path.splitext(os.path.basename(info.absolute_path))[0] + ctrl_img_path = glob_images(subset.conditioning_data_dir, img_basename) + if len(ctrl_img_path) < 1: missing_imgs.append(img_basename) + continue + ctrl_img_path = ctrl_img_path[0] + ctrl_img_path = os.path.abspath(ctrl_img_path) # normalize path info.cond_img_path = ctrl_img_path - cond_imgs_with_img.add(ctrl_img_path) + cond_imgs_with_pair.add(os.path.splitext(ctrl_img_path)[0]) # remove extension because Windows is case insensitive extra_imgs = [] for subset in subsets: conditioning_img_paths = glob_images(subset.conditioning_data_dir, "*") - extra_imgs.extend( - [cond_img_path for cond_img_path in conditioning_img_paths if cond_img_path not in cond_imgs_with_img] - ) + conditioning_img_paths = [os.path.abspath(p) for p in conditioning_img_paths] # normalize path + extra_imgs.extend([p for p in conditioning_img_paths if os.path.splitext(p)[0] not in cond_imgs_with_pair]) - assert len(missing_imgs) == 0, f"missing conditioning data for {len(missing_imgs)} images: {missing_imgs}" - assert len(extra_imgs) == 0, f"extra conditioning data for {len(extra_imgs)} images: {extra_imgs}" + assert ( + len(missing_imgs) == 0 + ), f"missing conditioning data for {len(missing_imgs)} images / 制御用画像が見つかりませんでした: {missing_imgs}" + assert ( + len(extra_imgs) == 0 + ), f"extra conditioning data for {len(extra_imgs)} images / 余分な制御用画像があります: {extra_imgs}" self.conditioning_image_transforms = IMAGE_TRANSFORMS From 3419c3de0d0ff8cba1d74444ece23608614f3c5b Mon Sep 17 00:00:00 2001 From: Kohya S Date: Sun, 17 Mar 2024 19:30:20 +0900 Subject: [PATCH 27/69] common masked loss func, apply to all training script --- docs/train_lllite_README-ja.md | 8 ++++++-- docs/train_lllite_README.md | 4 +++- library/config_util.py | 5 ++++- library/custom_train_functions.py | 24 ++++++++++++++++++++---- library/train_util.py | 16 ++++++++++++++++ sdxl_train.py | 21 ++++----------------- train_db.py | 7 ++++++- train_network.py | 17 +++-------------- train_textual_inversion.py | 7 ++++++- train_textual_inversion_XTI.py | 7 ++++++- 10 files changed, 74 insertions(+), 42 deletions(-) diff --git a/docs/train_lllite_README-ja.md b/docs/train_lllite_README-ja.md index dbdc1fea..1f6a78d5 100644 --- a/docs/train_lllite_README-ja.md +++ b/docs/train_lllite_README-ja.md @@ -21,9 +21,13 @@ ComfyUIのカスタムノードを用意しています。: https://github.com/k ## モデルの学習 ### データセットの準備 -通常のdatasetに加え、`conditioning_data_dir` で指定したディレクトリにconditioning imageを格納してください。conditioning imageは学習用画像と同じbasenameを持つ必要があります。また、conditioning imageは学習用画像と同じサイズに自動的にリサイズされます。conditioning imageにはキャプションファイルは不要です。 +DreamBooth 方式の dataset で、`conditioning_data_dir` で指定したディレクトリにconditioning imageを格納してください。 -たとえば DreamBooth 方式でキャプションファイルを用いる場合の設定ファイルは以下のようになります。 +(finetuning 方式の dataset はサポートしていません。) + +conditioning imageは学習用画像と同じbasenameを持つ必要があります。また、conditioning imageは学習用画像と同じサイズに自動的にリサイズされます。conditioning imageにはキャプションファイルは不要です。 + +たとえば、キャプションにフォルダ名ではなくキャプションファイルを用いる場合の設定ファイルは以下のようになります。 ```toml [[datasets.subsets]] diff --git a/docs/train_lllite_README.md b/docs/train_lllite_README.md index 04dc12da..a05f87f5 100644 --- a/docs/train_lllite_README.md +++ b/docs/train_lllite_README.md @@ -26,7 +26,9 @@ Due to the limitations of the inference environment, only CrossAttention (attn1 ### Preparing the dataset -In addition to the normal dataset, please store the conditioning image in the directory specified by `conditioning_data_dir`. The conditioning image must have the same basename as the training image. The conditioning image will be automatically resized to the same size as the training image. The conditioning image does not require a caption file. +In addition to the normal DreamBooth method dataset, please store the conditioning image in the directory specified by `conditioning_data_dir`. The conditioning image must have the same basename as the training image. The conditioning image will be automatically resized to the same size as the training image. The conditioning image does not require a caption file. + +(We do not support the finetuning method dataset.) ```toml [[datasets.subsets]] diff --git a/library/config_util.py b/library/config_util.py index edc6a538..26daeb47 100644 --- a/library/config_util.py +++ b/library/config_util.py @@ -323,7 +323,10 @@ class ConfigSanitizer: self.dataset_schema = validate_flex_dataset elif support_dreambooth: - self.dataset_schema = self.db_dataset_schema + if support_controlnet: + self.dataset_schema = self.cn_dataset_schema + else: + self.dataset_schema = self.db_dataset_schema elif support_finetuning: self.dataset_schema = self.ft_dataset_schema elif support_controlnet: diff --git a/library/custom_train_functions.py b/library/custom_train_functions.py index a5647462..406e0e36 100644 --- a/library/custom_train_functions.py +++ b/library/custom_train_functions.py @@ -3,11 +3,14 @@ import argparse import random import re from typing import List, Optional, Union -from .utils import setup_logging +from .utils import setup_logging + setup_logging() -import logging +import logging + logger = logging.getLogger(__name__) + def prepare_scheduler_for_custom_training(noise_scheduler, device): if hasattr(noise_scheduler, "all_snr"): return @@ -64,7 +67,7 @@ def apply_snr_weight(loss, timesteps, noise_scheduler, gamma, v_prediction=False snr = torch.stack([noise_scheduler.all_snr[t] for t in timesteps]) min_snr_gamma = torch.minimum(snr, torch.full_like(snr, gamma)) if v_prediction: - snr_weight = torch.div(min_snr_gamma, snr+1).float().to(loss.device) + snr_weight = torch.div(min_snr_gamma, snr + 1).float().to(loss.device) else: snr_weight = torch.div(min_snr_gamma, snr).float().to(loss.device) loss = loss * snr_weight @@ -92,13 +95,15 @@ def add_v_prediction_like_loss(loss, timesteps, noise_scheduler, v_pred_like_los loss = loss + loss / scale * v_pred_like_loss return loss + def apply_debiased_estimation(loss, timesteps, noise_scheduler): snr_t = torch.stack([noise_scheduler.all_snr[t] for t in timesteps]) # batch_size snr_t = torch.minimum(snr_t, torch.ones_like(snr_t) * 1000) # if timestep is 0, snr_t is inf, so limit it to 1000 - weight = 1/torch.sqrt(snr_t) + weight = 1 / torch.sqrt(snr_t) loss = weight * loss return loss + # TODO train_utilと分散しているのでどちらかに寄せる @@ -474,6 +479,17 @@ def apply_noise_offset(latents, noise, noise_offset, adaptive_noise_scale): return noise +def apply_masked_loss(loss, batch): + # mask image is -1 to 1. we need to convert it to 0 to 1 + mask_image = batch["conditioning_images"].to(dtype=loss.dtype)[:, 0].unsqueeze(1) # use R channel + + # resize to the same size as the loss + mask_image = torch.nn.functional.interpolate(mask_image, size=loss.shape[2:], mode="area") + mask_image = mask_image / 2 + 0.5 + loss = loss * mask_image + return loss + + """ ########################################## # Perlin Noise diff --git a/library/train_util.py b/library/train_util.py index 0f8cf9ee..1d9f8bf8 100644 --- a/library/train_util.py +++ b/library/train_util.py @@ -3028,6 +3028,7 @@ def add_training_arguments(parser: argparse.ArgumentParser, support_dreambooth: "--full_bf16", action="store_true", help="bf16 training including gradients / 勾配も含めてbf16で学習する" ) # TODO move to SDXL training, because it is not supported by SD1/2 parser.add_argument("--fp8_base", action="store_true", help="use fp8 for base model / base modelにfp8を使う") + parser.add_argument( "--ddp_timeout", type=int, @@ -3090,6 +3091,7 @@ def add_training_arguments(parser: argparse.ArgumentParser, support_dreambooth: default=None, help="specify WandB API key to log in before starting training (optional). / WandB APIキーを指定して学習開始前にログインする(オプション)", ) + parser.add_argument( "--noise_offset", type=float, @@ -3252,6 +3254,20 @@ def add_training_arguments(parser: argparse.ArgumentParser, support_dreambooth: ) +def add_masked_loss_arguments(parser: argparse.ArgumentParser): + parser.add_argument( + "--conditioning_data_dir", + type=str, + default=None, + help="conditioning data directory / 条件付けデータのディレクトリ", + ) + parser.add_argument( + "--masked_loss", + action="store_true", + help="apply mask for calculating loss. conditioning_data_dir is required for dataset. / 損失計算時にマスクを適用する。datasetにはconditioning_data_dirが必要", + ) + + def verify_training_args(args: argparse.Namespace): r""" Verify training arguments. Also reflect highvram option to global variable diff --git a/sdxl_train.py b/sdxl_train.py index 448a160f..f8aa4608 100644 --- a/sdxl_train.py +++ b/sdxl_train.py @@ -40,6 +40,7 @@ from library.custom_train_functions import ( scale_v_prediction_loss_like_noise_prediction, add_v_prediction_like_loss, apply_debiased_estimation, + apply_masked_loss, ) from library.sdxl_original_unet import SdxlUNet2DConditionModel @@ -577,19 +578,12 @@ def train(args): or args.scale_v_pred_loss_like_noise_pred or args.v_pred_like_loss or args.debiased_estimation_loss + or args.masked_loss ): # do not mean over batch dimension for snr weight or scale v-pred loss loss = torch.nn.functional.mse_loss(noise_pred.float(), target.float(), reduction="none") - if args.masked_loss: - # mask image is -1 to 1. we need to convert it to 0 to 1 - mask_image = batch["conditioning_images"].to(dtype=weight_dtype)[:, 0].unsqueeze(1) # use R channel - - # resize to the same size as the loss - mask_image = torch.nn.functional.interpolate(mask_image, size=loss.shape[2:], mode="area") - mask_image = mask_image / 2 + 0.5 - loss = loss * mask_image - + loss = apply_masked_loss(loss, batch) loss = loss.mean([1, 2, 3]) if args.min_snr_gamma: @@ -755,6 +749,7 @@ def setup_parser() -> argparse.ArgumentParser: train_util.add_sd_models_arguments(parser) train_util.add_dataset_arguments(parser, True, True, True) train_util.add_training_arguments(parser, False) + train_util.add_masked_loss_arguments(parser) train_util.add_sd_saving_arguments(parser) train_util.add_optimizer_arguments(parser) config_util.add_config_arguments(parser) @@ -790,14 +785,6 @@ def setup_parser() -> argparse.ArgumentParser: help=f"learning rates for each block of U-Net, comma-separated, {UNET_NUM_BLOCKS_FOR_BLOCK_LR} values / " + f"U-Netの各ブロックの学習率、カンマ区切り、{UNET_NUM_BLOCKS_FOR_BLOCK_LR}個の値", ) - - # TODO common masked_loss argument - parser.add_argument( - "--masked_loss", - action="store_true", - help="apply mask for calculating loss. conditioning_data_dir is required for dataset. / 損失計算時にマスクを適用する。datasetにはconditioning_data_dirが必要", - ) - return parser diff --git a/train_db.py b/train_db.py index 8d36097a..213df151 100644 --- a/train_db.py +++ b/train_db.py @@ -12,6 +12,7 @@ from tqdm import tqdm import torch from library.device_utils import init_ipex, clean_memory_on_device + init_ipex() from accelerate.utils import set_seed @@ -32,6 +33,7 @@ from library.custom_train_functions import ( apply_noise_offset, scale_v_prediction_loss_like_noise_prediction, apply_debiased_estimation, + apply_masked_loss, ) from library.utils import setup_logging, add_logging_arguments @@ -57,7 +59,7 @@ def train(args): # データセットを準備する if args.dataset_class is None: - blueprint_generator = BlueprintGenerator(ConfigSanitizer(True, False, False, True)) + blueprint_generator = BlueprintGenerator(ConfigSanitizer(True, False, args.masked_loss, True)) if args.dataset_config is not None: logger.info(f"Load dataset config from {args.dataset_config}") user_config = config_util.load_user_config(args.dataset_config) @@ -339,6 +341,8 @@ def train(args): target = noise loss = torch.nn.functional.mse_loss(noise_pred.float(), target.float(), reduction="none") + if args.masked_loss: + loss = apply_masked_loss(loss, batch) loss = loss.mean([1, 2, 3]) loss_weights = batch["loss_weights"] # 各sampleごとのweight @@ -464,6 +468,7 @@ def setup_parser() -> argparse.ArgumentParser: train_util.add_sd_models_arguments(parser) train_util.add_dataset_arguments(parser, True, False, True) train_util.add_training_arguments(parser, True) + train_util.add_masked_loss_arguments(parser) train_util.add_sd_saving_arguments(parser) train_util.add_optimizer_arguments(parser) config_util.add_config_arguments(parser) diff --git a/train_network.py b/train_network.py index f5617986..05522070 100644 --- a/train_network.py +++ b/train_network.py @@ -40,6 +40,7 @@ from library.custom_train_functions import ( scale_v_prediction_loss_like_noise_prediction, add_v_prediction_like_loss, apply_debiased_estimation, + apply_masked_loss, ) from library.utils import setup_logging, add_logging_arguments @@ -835,16 +836,8 @@ class NetworkTrainer: target = noise loss = torch.nn.functional.mse_loss(noise_pred.float(), target.float(), reduction="none") - if args.masked_loss: - # mask image is -1 to 1. we need to convert it to 0 to 1 - mask_image = batch["conditioning_images"].to(dtype=weight_dtype)[:, 0].unsqueeze(1) # use R channel - - # resize to the same size as the loss - mask_image = torch.nn.functional.interpolate(mask_image, size=loss.shape[2:], mode="area") - mask_image = mask_image / 2 + 0.5 - loss = loss * mask_image - + loss = apply_masked_loss(loss, batch) loss = loss.mean([1, 2, 3]) loss_weights = batch["loss_weights"] # 各sampleごとのweight @@ -968,6 +961,7 @@ def setup_parser() -> argparse.ArgumentParser: train_util.add_sd_models_arguments(parser) train_util.add_dataset_arguments(parser, True, True, True) train_util.add_training_arguments(parser, True) + train_util.add_masked_loss_arguments(parser) train_util.add_optimizer_arguments(parser) config_util.add_config_arguments(parser) custom_train_functions.add_custom_train_arguments(parser) @@ -1061,11 +1055,6 @@ def setup_parser() -> argparse.ArgumentParser: action="store_true", help="do not use fp16/bf16 VAE in mixed precision (use float VAE) / mixed precisionでも fp16/bf16 VAEを使わずfloat VAEを使う", ) - parser.add_argument( - "--masked_loss", - action="store_true", - help="apply mask for calculating loss. conditioning_data_dir is required for dataset. / 損失計算時にマスクを適用する。datasetにはconditioning_data_dirが必要", - ) return parser diff --git a/train_textual_inversion.py b/train_textual_inversion.py index df1d8485..7697b967 100644 --- a/train_textual_inversion.py +++ b/train_textual_inversion.py @@ -8,6 +8,7 @@ from tqdm import tqdm import torch from library.device_utils import init_ipex, clean_memory_on_device + init_ipex() from accelerate.utils import set_seed @@ -29,6 +30,7 @@ from library.custom_train_functions import ( scale_v_prediction_loss_like_noise_prediction, add_v_prediction_like_loss, apply_debiased_estimation, + apply_masked_loss, ) from library.utils import setup_logging, add_logging_arguments @@ -268,7 +270,7 @@ class TextualInversionTrainer: # データセットを準備する if args.dataset_class is None: - blueprint_generator = BlueprintGenerator(ConfigSanitizer(True, True, False, False)) + blueprint_generator = BlueprintGenerator(ConfigSanitizer(True, True, args.masked_loss, False)) if args.dataset_config is not None: accelerator.print(f"Load dataset config from {args.dataset_config}") user_config = config_util.load_user_config(args.dataset_config) @@ -586,6 +588,8 @@ class TextualInversionTrainer: target = noise loss = torch.nn.functional.mse_loss(noise_pred.float(), target.float(), reduction="none") + if args.masked_loss: + loss = apply_masked_loss(loss, batch) loss = loss.mean([1, 2, 3]) loss_weights = batch["loss_weights"] # 各sampleごとのweight @@ -749,6 +753,7 @@ def setup_parser() -> argparse.ArgumentParser: train_util.add_sd_models_arguments(parser) train_util.add_dataset_arguments(parser, True, True, False) train_util.add_training_arguments(parser, True) + train_util.add_masked_loss_arguments(parser) train_util.add_optimizer_arguments(parser) config_util.add_config_arguments(parser) custom_train_functions.add_custom_train_arguments(parser, False) diff --git a/train_textual_inversion_XTI.py b/train_textual_inversion_XTI.py index 695fad2a..72b79da4 100644 --- a/train_textual_inversion_XTI.py +++ b/train_textual_inversion_XTI.py @@ -9,6 +9,7 @@ from tqdm import tqdm import torch from library.device_utils import init_ipex, clean_memory_on_device + init_ipex() from accelerate.utils import set_seed @@ -31,6 +32,7 @@ from library.custom_train_functions import ( apply_noise_offset, scale_v_prediction_loss_like_noise_prediction, apply_debiased_estimation, + apply_masked_loss, ) import library.original_unet as original_unet from XTI_hijack import unet_forward_XTI, downblock_forward_XTI, upblock_forward_XTI @@ -200,7 +202,7 @@ def train(args): logger.info(f"create embeddings for {args.num_vectors_per_token} tokens, for {args.token_string}") # データセットを準備する - blueprint_generator = BlueprintGenerator(ConfigSanitizer(True, True, False, False)) + blueprint_generator = BlueprintGenerator(ConfigSanitizer(True, True, args.masked_loss, False)) if args.dataset_config is not None: logger.info(f"Load dataset config from {args.dataset_config}") user_config = config_util.load_user_config(args.dataset_config) @@ -471,6 +473,8 @@ def train(args): target = noise loss = torch.nn.functional.mse_loss(noise_pred.float(), target.float(), reduction="none") + if args.masked_loss: + loss = apply_masked_loss(loss, batch) loss = loss.mean([1, 2, 3]) loss_weights = batch["loss_weights"] # 各sampleごとのweight @@ -662,6 +666,7 @@ def setup_parser() -> argparse.ArgumentParser: train_util.add_sd_models_arguments(parser) train_util.add_dataset_arguments(parser, True, True, False) train_util.add_training_arguments(parser, True) + train_util.add_masked_loss_arguments(parser) train_util.add_optimizer_arguments(parser) config_util.add_config_arguments(parser) custom_train_functions.add_custom_train_arguments(parser, False) From a7dff592d34a5dd9d306de822db70f0028676cab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=9D=92=E9=BE=8D=E8=81=96=E8=80=85=40bdsqlsz?= Date: Mon, 18 Mar 2024 22:29:05 +0800 Subject: [PATCH 28/69] Update tag_images_by_wd14_tagger.py add WDV3 --- finetune/tag_images_by_wd14_tagger.py | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/finetune/tag_images_by_wd14_tagger.py b/finetune/tag_images_by_wd14_tagger.py index b56d921a..e63ec3eb 100644 --- a/finetune/tag_images_by_wd14_tagger.py +++ b/finetune/tag_images_by_wd14_tagger.py @@ -86,23 +86,26 @@ def main(args): logger.info(f"downloading wd14 tagger model from hf_hub. id: {args.repo_id}") files = FILES if args.onnx: + files = ["selected_tags.csv"] files += FILES_ONNX + else: + for file in SUB_DIR_FILES: + hf_hub_download( + args.repo_id, + file, + subfolder=SUB_DIR, + cache_dir=os.path.join(args.model_dir, SUB_DIR), + force_download=True, + force_filename=file, + ) for file in files: hf_hub_download(args.repo_id, file, cache_dir=args.model_dir, force_download=True, force_filename=file) - for file in SUB_DIR_FILES: - hf_hub_download( - args.repo_id, - file, - subfolder=SUB_DIR, - cache_dir=os.path.join(args.model_dir, SUB_DIR), - force_download=True, - force_filename=file, - ) else: logger.info("using existing wd14 tagger model") # 画像を読み込む if args.onnx: + import torch import onnx import onnxruntime as ort From 5410a8c79b23c594bb340050b4a81e30d95cd7be Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=9D=92=E9=BE=8D=E8=81=96=E8=80=85=40bdsqlsz?= Date: Mon, 18 Mar 2024 22:31:00 +0800 Subject: [PATCH 29/69] Update requirements.txt --- requirements.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/requirements.txt b/requirements.txt index 279de350..326b65b3 100644 --- a/requirements.txt +++ b/requirements.txt @@ -22,9 +22,9 @@ huggingface-hub==0.20.1 # for WD14 captioning (tensorflow) # tensorflow==2.10.1 # for WD14 captioning (onnx) -# onnx==1.14.1 -# onnxruntime-gpu==1.16.0 -# onnxruntime==1.16.0 +# onnx==1.15.1 +# onnxruntime-gpu==1.17.1 +# onnxruntime==1.17.1 # this is for onnx: # protobuf==3.20.3 # open clip for SDXL From a71c35ccd9c813821fcbd3f0e00d71fb5e6d91d6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=9D=92=E9=BE=8D=E8=81=96=E8=80=85=40bdsqlsz?= Date: Mon, 18 Mar 2024 22:31:59 +0800 Subject: [PATCH 30/69] Update requirements.txt --- requirements.txt | 3 +++ 1 file changed, 3 insertions(+) diff --git a/requirements.txt b/requirements.txt index 326b65b3..6898eccf 100644 --- a/requirements.txt +++ b/requirements.txt @@ -25,6 +25,9 @@ huggingface-hub==0.20.1 # onnx==1.15.1 # onnxruntime-gpu==1.17.1 # onnxruntime==1.17.1 +# for cuda 12.1(default 11.8) +# onnxruntime-gpu --extra-index-url https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/onnxruntime-cuda-12/pypi/simple/ + # this is for onnx: # protobuf==3.20.3 # open clip for SDXL From 6c51c971d135a346d2f9081760f138b1c6515e9b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=9D=92=E9=BE=8D=E8=81=96=E8=80=85=40bdsqlsz?= Date: Wed, 20 Mar 2024 09:35:21 +0800 Subject: [PATCH 31/69] fix typo --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 6898eccf..805f0501 100644 --- a/requirements.txt +++ b/requirements.txt @@ -22,7 +22,7 @@ huggingface-hub==0.20.1 # for WD14 captioning (tensorflow) # tensorflow==2.10.1 # for WD14 captioning (onnx) -# onnx==1.15.1 +# onnx==1.15.0 # onnxruntime-gpu==1.17.1 # onnxruntime==1.17.1 # for cuda 12.1(default 11.8) From 80dbbf5e4875f56ff1e0d8aacea4e73b96a14b63 Mon Sep 17 00:00:00 2001 From: Kohya S Date: Wed, 20 Mar 2024 16:14:57 +0900 Subject: [PATCH 32/69] tagger now stores model under repo_id subdir --- README.md | 9 ++++- finetune/tag_images_by_wd14_tagger.py | 55 ++++++++++++++++++--------- 2 files changed, 45 insertions(+), 19 deletions(-) diff --git a/README.md b/README.md index f0cad611..d0320403 100644 --- a/README.md +++ b/README.md @@ -260,7 +260,9 @@ ControlNet-LLLite, a novel method for ControlNet with SDXL, is added. See [docum - `keep_tokens_separator` is updated to be used twice in the caption. When you specify `keep_tokens_separator="|||"`, the part divided by the second `|||` is not shuffled or dropped and remains at the end. - The existing features `caption_prefix` and `caption_suffix` can be used together. `caption_prefix` and `caption_suffix` are processed first, and then `enable_wildcard`, `keep_tokens_separator`, shuffling and dropping, and `secondary_separator` are processed in order. - The examples are [shown below](#example-of-dataset-settings--データセット設定の記述例). - +- The support for v3 repositories is added to `tag_image_by_wd14_tagger.py` (`--onnx` option only). PR [#1192](https://github.com/kohya-ss/sd-scripts/pull/1192) Thanks to sdbds! + - Onnx may need to be updated. Onnx is not installed by default, so please install or update it with `pip install onnx==1.15.0 onnxruntime-gpu==1.17.1` etc. Please also check the comments in `requirements.txt`. +- The model is now saved in the subdirectory as `--repo_id` in `tag_image_by_wd14_tagger.py` . This caches multiple repo_id models. Please delete unnecessary files under `--model_dir`. - Colab での動作時、ログ出力で停止してしまうようです。学習スクリプトに `--console_log_simple` オプションを指定し、rich のロギングを無効してお試しください。 - `train_network.py` および `sdxl_train_network.py` で、学習したモデルのメタデータに一部のデータセット設定が記録されるよう修正しました(`caption_prefix`、`caption_suffix`、`keep_tokens_separator`、`secondary_separator`、`enable_wildcard`)。 @@ -269,6 +271,11 @@ ControlNet-LLLite, a novel method for ControlNet with SDXL, is added. See [docum - `enable_wildcard` を追加しました。`true` にするとワイルドカード記法 `{aaa|bbb|ccc}` が使えます。詳しくは記述例をご覧ください。 - `keep_tokens_separator` をキャプション内に 2 つ使えるようにしました。たとえば `keep_tokens_separator="|||"` と指定したとき、`1girl, hatsune miku, vocaloid ||| stage, mic ||| best quality, rating: general` とキャプションを指定すると、二番目の `|||` で分割された部分はシャッフル、drop されず末尾に残ります。 - 既存の機能 `caption_prefix` と `caption_suffix` とあわせて使えます。`caption_prefix` と `caption_suffix` は一番最初に処理され、その後、ワイルドカード、`keep_tokens_separator`、シャッフルおよび drop、`secondary_separator` の順に処理されます。 +- `tag_image_by_wd14_tagger.py` で v3 のリポジトリがサポートされました(`--onnx` 指定時のみ有効)。 PR [#1192](https://github.com/kohya-ss/sd-scripts/pull/1192) sdbds 氏に感謝します。 + - Onnx のバージョンアップが必要になるかもしれません。デフォルトでは Onnx はインストールされていませんので、`pip install onnx==1.15.0 onnxruntime-gpu==1.17.1` 等でインストール、アップデートしてください。`requirements.txt` のコメントもあわせてご確認ください。 +- `tag_image_by_wd14_tagger.py` で、モデルを`--repo_id` のサブディレクトリに保存するようにしました。これにより複数のモデルファイルがキャッシュされます。`--model_dir` 直下の不要なファイルは削除願います。 + + #### Example of dataset settings / データセット設定の記述例: diff --git a/finetune/tag_images_by_wd14_tagger.py b/finetune/tag_images_by_wd14_tagger.py index e63ec3eb..401c6d1e 100644 --- a/finetune/tag_images_by_wd14_tagger.py +++ b/finetune/tag_images_by_wd14_tagger.py @@ -12,8 +12,10 @@ from tqdm import tqdm import library.train_util as train_util from library.utils import setup_logging + setup_logging() import logging + logger = logging.getLogger(__name__) # from wd14 tagger @@ -79,10 +81,15 @@ def collate_fn_remove_corrupted(batch): def main(args): + # model location is model_dir + repo_id + # repo id may be like "user/repo" or "user/repo/branch", so we need to remove slash + model_location = os.path.join(args.model_dir, args.repo_id.replace("/", "_")) + # hf_hub_downloadをそのまま使うとsymlink関係で問題があるらしいので、キャッシュディレクトリとforce_filenameを指定してなんとかする # depreacatedの警告が出るけどなくなったらその時 # https://github.com/toriato/stable-diffusion-webui-wd14-tagger/issues/22 - if not os.path.exists(args.model_dir) or args.force_download: + if not os.path.exists(model_location) or args.force_download: + os.makedirs(args.model_dir, exist_ok=True) logger.info(f"downloading wd14 tagger model from hf_hub. id: {args.repo_id}") files = FILES if args.onnx: @@ -94,12 +101,12 @@ def main(args): args.repo_id, file, subfolder=SUB_DIR, - cache_dir=os.path.join(args.model_dir, SUB_DIR), + cache_dir=os.path.join(model_location, SUB_DIR), force_download=True, force_filename=file, ) for file in files: - hf_hub_download(args.repo_id, file, cache_dir=args.model_dir, force_download=True, force_filename=file) + hf_hub_download(args.repo_id, file, cache_dir=model_location, force_download=True, force_filename=file) else: logger.info("using existing wd14 tagger model") @@ -109,7 +116,7 @@ def main(args): import onnx import onnxruntime as ort - onnx_path = f"{args.model_dir}/model.onnx" + onnx_path = f"{model_location}/model.onnx" logger.info("Running wd14 tagger with onnx") logger.info(f"loading onnx model: {onnx_path}") @@ -126,7 +133,7 @@ def main(args): except: batch_size = model.graph.input[0].type.tensor_type.shape.dim[0].dim_param - if args.batch_size != batch_size and type(batch_size) != str: + if args.batch_size != batch_size and type(batch_size) != str and batch_size > 0: # some rebatch model may use 'N' as dynamic axes logger.warning( f"Batch size {args.batch_size} doesn't match onnx model batch size {batch_size}, use model batch size {batch_size}" @@ -137,19 +144,19 @@ def main(args): ort_sess = ort.InferenceSession( onnx_path, - providers=["CUDAExecutionProvider"] - if "CUDAExecutionProvider" in ort.get_available_providers() - else ["CPUExecutionProvider"], + providers=( + ["CUDAExecutionProvider"] if "CUDAExecutionProvider" in ort.get_available_providers() else ["CPUExecutionProvider"] + ), ) else: from tensorflow.keras.models import load_model - model = load_model(f"{args.model_dir}") + model = load_model(f"{model_location}") # label_names = pd.read_csv("2022_0000_0899_6549/selected_tags.csv") # 依存ライブラリを増やしたくないので自力で読むよ - with open(os.path.join(args.model_dir, CSV_FILE), "r", encoding="utf-8") as f: + with open(os.path.join(model_location, CSV_FILE), "r", encoding="utf-8") as f: reader = csv.reader(f) l = [row for row in reader] header = l[0] # tag_id,name,category,count @@ -175,8 +182,8 @@ def main(args): imgs = np.array([im for _, im in path_imgs]) if args.onnx: - if len(imgs) < args.batch_size: - imgs = np.concatenate([imgs, np.zeros((args.batch_size - len(imgs), IMAGE_SIZE, IMAGE_SIZE, 3))], axis=0) + # if len(imgs) < args.batch_size: + # imgs = np.concatenate([imgs, np.zeros((args.batch_size - len(imgs), IMAGE_SIZE, IMAGE_SIZE, 3))], axis=0) probs = ort_sess.run(None, {input_name: imgs})[0] # onnx output numpy probs = probs[: len(path_imgs)] else: @@ -317,7 +324,9 @@ def setup_parser() -> argparse.ArgumentParser: help="directory to store wd14 tagger model / wd14 taggerのモデルを格納するディレクトリ", ) parser.add_argument( - "--force_download", action="store_true", help="force downloading wd14 tagger models / wd14 taggerのモデルを再ダウンロードします" + "--force_download", + action="store_true", + help="force downloading wd14 tagger models / wd14 taggerのモデルを再ダウンロードします", ) parser.add_argument("--batch_size", type=int, default=1, help="batch size in inference / 推論時のバッチサイズ") parser.add_argument( @@ -332,8 +341,12 @@ def setup_parser() -> argparse.ArgumentParser: default=None, help="extension of caption file (for backward compatibility) / 出力されるキャプションファイルの拡張子(スペルミスしていたのを残してあります)", ) - parser.add_argument("--caption_extension", type=str, default=".txt", help="extension of caption file / 出力されるキャプションファイルの拡張子") - parser.add_argument("--thresh", type=float, default=0.35, help="threshold of confidence to add a tag / タグを追加するか判定する閾値") + parser.add_argument( + "--caption_extension", type=str, default=".txt", help="extension of caption file / 出力されるキャプションファイルの拡張子" + ) + parser.add_argument( + "--thresh", type=float, default=0.35, help="threshold of confidence to add a tag / タグを追加するか判定する閾値" + ) parser.add_argument( "--general_threshold", type=float, @@ -346,7 +359,9 @@ def setup_parser() -> argparse.ArgumentParser: default=None, help="threshold of confidence to add a tag for character category, same as --thres if omitted / characterカテゴリのタグを追加するための確信度の閾値、省略時は --thresh と同じ", ) - parser.add_argument("--recursive", action="store_true", help="search for images in subfolders recursively / サブフォルダを再帰的に検索する") + parser.add_argument( + "--recursive", action="store_true", help="search for images in subfolders recursively / サブフォルダを再帰的に検索する" + ) parser.add_argument( "--remove_underscore", action="store_true", @@ -359,9 +374,13 @@ def setup_parser() -> argparse.ArgumentParser: default="", help="comma-separated list of undesired tags to remove from the output / 出力から除外したいタグのカンマ区切りのリスト", ) - parser.add_argument("--frequency_tags", action="store_true", help="Show frequency of tags for images / 画像ごとのタグの出現頻度を表示する") + parser.add_argument( + "--frequency_tags", action="store_true", help="Show frequency of tags for images / 画像ごとのタグの出現頻度を表示する" + ) parser.add_argument("--onnx", action="store_true", help="use onnx model for inference / onnxモデルを推論に使用する") - parser.add_argument("--append_tags", action="store_true", help="Append captions instead of overwriting / 上書きではなくキャプションを追記する") + parser.add_argument( + "--append_tags", action="store_true", help="Append captions instead of overwriting / 上書きではなくキャプションを追記する" + ) parser.add_argument( "--caption_separator", type=str, From 46331a9e8ef695ea0b5a19686202d011109a56b6 Mon Sep 17 00:00:00 2001 From: Victor Espinoza-Guerra Date: Wed, 20 Mar 2024 00:31:01 -0700 Subject: [PATCH 33/69] English Translation of config_README-ja.md (#1175) * Add files via upload Creating template to work on. * Update config_README-en.md Total Conversion from Japanese to English. * Update config_README-en.md * Update config_README-en.md * Update config_README-en.md --- docs/config_README-en.md | 279 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 279 insertions(+) create mode 100644 docs/config_README-en.md diff --git a/docs/config_README-en.md b/docs/config_README-en.md new file mode 100644 index 00000000..a0727934 --- /dev/null +++ b/docs/config_README-en.md @@ -0,0 +1,279 @@ +Original Source by kohya-ss + +A.I Translation by Model: NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO, editing by Darkstorm2150 + +# Config Readme + +This README is about the configuration files that can be passed with the `--dataset_config` option. + +## Overview + +By passing a configuration file, users can make detailed settings. + +* Multiple datasets can be configured + * For example, by setting `resolution` for each dataset, they can be mixed and trained. + * In training methods that support both the DreamBooth approach and the fine-tuning approach, datasets of the DreamBooth method and the fine-tuning method can be mixed. +* Settings can be changed for each subset + * A subset is a partition of the dataset by image directory or metadata. Several subsets make up a dataset. + * Options such as `keep_tokens` and `flip_aug` can be set for each subset. On the other hand, options such as `resolution` and `batch_size` can be set for each dataset, and their values are common among subsets belonging to the same dataset. More details will be provided later. + +The configuration file format can be JSON or TOML. Considering the ease of writing, it is recommended to use [TOML](https://toml.io/ja/v1.0.0-rc.2). The following explanation assumes the use of TOML. + + +Here is an example of a configuration file written in TOML. + +```toml +[general] +shuffle_caption = true +caption_extension = '.txt' +keep_tokens = 1 + +# This is a DreamBooth-style dataset +[[datasets]] +resolution = 512 +batch_size = 4 +keep_tokens = 2 + + [[datasets.subsets]] + image_dir = 'C:\hoge' + class_tokens = 'hoge girl' + # This subset uses keep_tokens = 2 (the value of the parent datasets) + + [[datasets.subsets]] + image_dir = 'C:\fuga' + class_tokens = 'fuga boy' + keep_tokens = 3 + + [[datasets.subsets]] + is_reg = true + image_dir = 'C:\reg' + class_tokens = 'human' + keep_tokens = 1 + +# This is a fine-tuning dataset +[[datasets]] +resolution = [768, 768] +batch_size = 2 + + [[datasets.subsets]] + image_dir = 'C:\piyo' + metadata_file = 'C:\piyo\piyo_md.json' + # This subset uses keep_tokens = 1 (the value of [general]) +``` + +In this example, three directories are trained as a DreamBooth-style dataset at 512x512 (batch size 4), and one directory is trained as a fine-tuning dataset at 768x768 (batch size 2). + +## Settings for datasets and subsets + +Settings for datasets and subsets are divided into several registration locations. + +* `[general]` + * This is where options that apply to all datasets or all subsets are specified. + * If there are options with the same name in the dataset-specific or subset-specific settings, the dataset-specific or subset-specific settings take precedence. +* `[[datasets]]` + * `datasets` is where settings for datasets are registered. This is where options that apply individually to each dataset are specified. + * If there are subset-specific settings, the subset-specific settings take precedence. +* `[[datasets.subsets]]` + * `datasets.subsets` is where settings for subsets are registered. This is where options that apply individually to each subset are specified. + +Here is an image showing the correspondence between image directories and registration locations in the previous example. + +``` +C:\ +├─ hoge -> [[datasets.subsets]] No.1 ┐ ┐ +├─ fuga -> [[datasets.subsets]] No.2 |-> [[datasets]] No.1 |-> [general] +├─ reg -> [[datasets.subsets]] No.3 ┘ | +└─ piyo -> [[datasets.subsets]] No.4 --> [[datasets]] No.2 ┘ +``` + +The image directory corresponds to each `[[datasets.subsets]]`. Then, multiple `[[datasets.subsets]]` are combined to form one `[[datasets]]`. All `[[datasets]]` and `[[datasets.subsets]]` belong to `[general]`. + +The available options for each registration location may differ, but if the same option is specified, the value in the lower registration location will take precedence. You can check how the `keep_tokens` option is handled in the previous example for better understanding. + +Additionally, the available options may vary depending on the method that the learning approach supports. + +* Options specific to the DreamBooth method +* Options specific to the fine-tuning method +* Options available when using the caption dropout technique + +When using both the DreamBooth method and the fine-tuning method, they can be used together with a learning approach that supports both. +When using them together, a point to note is that the method is determined based on the dataset, so it is not possible to mix DreamBooth method subsets and fine-tuning method subsets within the same dataset. +In other words, if you want to use both methods together, you need to set up subsets of different methods belonging to different datasets. + +In terms of program behavior, if the `metadata_file` option exists, it is determined to be a subset of fine-tuning. Therefore, for subsets belonging to the same dataset, as long as they are either "all have the `metadata_file` option" or "all have no `metadata_file` option," there is no problem. + +Below, the available options will be explained. For options with the same name as the command-line argument, the explanation will be omitted in principle. Please refer to other READMEs. + +### Common options for all learning methods + +These are options that can be specified regardless of the learning method. + +#### Data set specific options + +These are options related to the configuration of the data set. They cannot be described in `datasets.subsets`. + + +| Option Name | Example Setting | `[general]` | `[[datasets]]` | +| ---- | ---- | ---- | ---- | +| `batch_size` | `1` | o | o | +| `bucket_no_upscale` | `true` | o | o | +| `bucket_reso_steps` | `64` | o | o | +| `enable_bucket` | `true` | o | o | +| `max_bucket_reso` | `1024` | o | o | +| `min_bucket_reso` | `128` | o | o | +| `resolution` | `256`, `[512, 512]` | o | o | + +* `batch_size` + * This corresponds to the command-line argument `--train_batch_size`. + +These settings are fixed per dataset. That means that subsets belonging to the same dataset will share these settings. For example, if you want to prepare datasets with different resolutions, you can define them as separate datasets as shown in the example above, and set different resolutions for each. + +#### Options for Subsets + +These options are related to subset configuration. + +| Option Name | Example | `[general]` | `[[datasets]]` | `[[dataset.subsets]]` | +| ---- | ---- | ---- | ---- | ---- | +| `color_aug` | `false` | o | o | o | +| `face_crop_aug_range` | `[1.0, 3.0]` | o | o | o | +| `flip_aug` | `true` | o | o | o | +| `keep_tokens` | `2` | o | o | o | +| `num_repeats` | `10` | o | o | o | +| `random_crop` | `false` | o | o | o | +| `shuffle_caption` | `true` | o | o | o | +| `caption_prefix` | `"masterpiece, best quality, "` | o | o | o | +| `caption_suffix` | `", from side"` | o | o | o | + +* `num_repeats` + * Specifies the number of repeats for images in a subset. This is equivalent to `--dataset_repeats` in fine-tuning but can be specified for any training method. +* `caption_prefix`, `caption_suffix` + * Specifies the prefix and suffix strings to be appended to the captions. Shuffling is performed with these strings included. Be cautious when using `keep_tokens`. + +### DreamBooth-specific options + +DreamBooth-specific options only exist as subsets-specific options. + +#### Subset-specific options + +Options related to the configuration of DreamBooth subsets. + +| Option Name | Example Setting | `[general]` | `[[datasets]]` | `[[dataset.subsets]]` | +| ---- | ---- | ---- | ---- | ---- | +| `image_dir` | `'C:\hoge'` | - | - | o (required) | +| `caption_extension` | `".txt"` | o | o | o | +| `class_tokens` | `"sks girl"` | - | - | o | +| `is_reg` | `false` | - | - | o | + +Firstly, note that for `image_dir`, the path to the image files must be specified as being directly in the directory. Unlike the previous DreamBooth method, where images had to be placed in subdirectories, this is not compatible with that specification. Also, even if you name the folder something like "5_cat", the number of repeats of the image and the class name will not be reflected. If you want to set these individually, you will need to explicitly specify them using `num_repeats` and `class_tokens`. + +* `image_dir` + * Specifies the path to the image directory. This is a required option. + * Images must be placed directly under the directory. +* `class_tokens` + * Sets the class tokens. + * Only used during training when a corresponding caption file does not exist. The determination of whether or not to use it is made on a per-image basis. If `class_tokens` is not specified and a caption file is not found, an error will occur. +* `is_reg` + * Specifies whether the subset images are for normalization. If not specified, it is set to `false`, meaning that the images are not for normalization. + +### Fine-tuning method specific options + +The options for the fine-tuning method only exist for subset-specific options. + +#### Subset-specific options + +These options are related to the configuration of the fine-tuning method's subsets. + +| Option name | Example setting | `[general]` | `[[datasets]]` | `[[dataset.subsets]]` | +| ---- | ---- | ---- | ---- | ---- | +| `image_dir` | `'C:\hoge'` | - | - | o | +| `metadata_file` | `'C:\piyo\piyo_md.json'` | - | - | o (required) | + +* `image_dir` + * Specify the path to the image directory. Unlike the DreamBooth method, specifying it is not mandatory, but it is recommended to do so. + * The case where it is not necessary to specify is when the `--full_path` is added to the command line when generating the metadata file. + * The images must be placed directly under the directory. +* `metadata_file` + * Specify the path to the metadata file used for the subset. This is a required option. + * It is equivalent to the command-line argument `--in_json`. + * Due to the specification that a metadata file must be specified for each subset, it is recommended to avoid creating a metadata file with images from different directories as a single metadata file. It is strongly recommended to prepare a separate metadata file for each image directory and register them as separate subsets. + +### Options available when caption dropout method can be used + +The options available when the caption dropout method can be used exist only for subsets. Regardless of whether it's the DreamBooth method or fine-tuning method, if it supports caption dropout, it can be specified. + +#### Subset-specific options + +Options related to the setting of subsets that caption dropout can be used for. + +| Option Name | `[general]` | `[[datasets]]` | `[[dataset.subsets]]` | +| ---- | ---- | ---- | ---- | +| `caption_dropout_every_n_epochs` | o | o | o | +| `caption_dropout_rate` | o | o | o | +| `caption_tag_dropout_rate` | o | o | o | + +## Behavior when there are duplicate subsets + +In the case of the DreamBooth dataset, if there are multiple `image_dir` directories with the same content, they are considered to be duplicate subsets. For the fine-tuning dataset, if there are multiple `metadata_file` files with the same content, they are considered to be duplicate subsets. If duplicate subsets exist in the dataset, subsequent subsets will be ignored. + +However, if they belong to different datasets, they are not considered duplicates. For example, if you have subsets with the same `image_dir` in different datasets, they will not be considered duplicates. This is useful when you want to train with the same image but with different resolutions. + +```toml +# If data sets exist separately, they are not considered duplicates and are both used for training. + +[[datasets]] +resolution = 512 + + [[datasets.subsets]] + image_dir = 'C:\hoge' + +[[datasets]] +resolution = 768 + + [[datasets.subsets]] + image_dir = 'C:\hoge' +``` + +## Command Line Argument and Configuration File + +There are options in the configuration file that have overlapping roles with command line argument options. + +The following command line argument options are ignored if a configuration file is passed: + +* `--train_data_dir` +* `--reg_data_dir` +* `--in_json` + +The following command line argument options are given priority over the configuration file options if both are specified simultaneously. In most cases, they have the same names as the corresponding options in the configuration file. + +| Command Line Argument Option | Prioritized Configuration File Option | +| ------------------------------- | ------------------------------------- | +| `--bucket_no_upscale` | | +| `--bucket_reso_steps` | | +| `--caption_dropout_every_n_epochs` | | +| `--caption_dropout_rate` | | +| `--caption_extension` | | +| `--caption_tag_dropout_rate` | | +| `--color_aug` | | +| `--dataset_repeats` | `num_repeats` | +| `--enable_bucket` | | +| `--face_crop_aug_range` | | +| `--flip_aug` | | +| `--keep_tokens` | | +| `--min_bucket_reso` | | +| `--random_crop` | | +| `--resolution` | | +| `--shuffle_caption` | | +| `--train_batch_size` | `batch_size` | + +## Error Guide + +Currently, we are using an external library to check if the configuration file is written correctly, but the development has not been completed, and there is a problem that the error message is not clear. In the future, we plan to improve this problem. + +As a temporary measure, we will list common errors and their solutions. If you encounter an error even though it should be correct or if the error content is not understandable, please contact us as it may be a bug. + +* `voluptuous.error.MultipleInvalid: required key not provided @ ...`: This error occurs when a required option is not provided. It is highly likely that you forgot to specify the option or misspelled the option name. + * The error location is indicated by `...` in the error message. For example, if you encounter an error like `voluptuous.error.MultipleInvalid: required key not provided @ data['datasets'][0]['subsets'][0]['image_dir']`, it means that the `image_dir` option does not exist in the 0th `subsets` of the 0th `datasets` setting. +* `voluptuous.error.MultipleInvalid: expected int for dictionary value @ ...`: This error occurs when the specified value format is incorrect. It is highly likely that the value format is incorrect. The `int` part changes depending on the target option. The example configurations in this README may be helpful. +* `voluptuous.error.MultipleInvalid: extra keys not allowed @ ...`: This error occurs when there is an option name that is not supported. It is highly likely that you misspelled the option name or mistakenly included it. + + From 5f6196e4c71763250da316cc0f4ce15db1696017 Mon Sep 17 00:00:00 2001 From: Kohya S Date: Wed, 20 Mar 2024 16:35:23 +0900 Subject: [PATCH 34/69] update readme --- README.md | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index d0320403..f8601c1b 100644 --- a/README.md +++ b/README.md @@ -33,6 +33,7 @@ Most of the documents are written in Japanese. * [Training guide - common](./docs/train_README-ja.md) : data preparation, options etc... * [Chinese version](./docs/train_README-zh.md) * [Dataset config](./docs/config_README-ja.md) + * [English version](./docs/config_README-en.md) * [DreamBooth training guide](./docs/train_db_README-ja.md) * [Step by Step fine-tuning guide](./docs/fine_tune_README_ja.md): * [training LoRA](./docs/train_network_README-ja.md) @@ -263,6 +264,8 @@ ControlNet-LLLite, a novel method for ControlNet with SDXL, is added. See [docum - The support for v3 repositories is added to `tag_image_by_wd14_tagger.py` (`--onnx` option only). PR [#1192](https://github.com/kohya-ss/sd-scripts/pull/1192) Thanks to sdbds! - Onnx may need to be updated. Onnx is not installed by default, so please install or update it with `pip install onnx==1.15.0 onnxruntime-gpu==1.17.1` etc. Please also check the comments in `requirements.txt`. - The model is now saved in the subdirectory as `--repo_id` in `tag_image_by_wd14_tagger.py` . This caches multiple repo_id models. Please delete unnecessary files under `--model_dir`. +- The options `--noise_offset_random_strength` and `--ip_noise_gamma_random_strength` are added to each training script. These options can be used to vary the noise offset and ip noise gamma in the range of 0 to the specified value. PR [#1177](https://github.com/kohya-ss/sd-scripts/pull/1177) Thanks to KohakuBlueleaf! +- The [English version of the dataset settings documentation](./docs/config_README-en.md) is added. PR [#1175](https://github.com/kohya-ss/sd-scripts/pull/1175) Thanks to darkstorm2150! - Colab での動作時、ログ出力で停止してしまうようです。学習スクリプトに `--console_log_simple` オプションを指定し、rich のロギングを無効してお試しください。 - `train_network.py` および `sdxl_train_network.py` で、学習したモデルのメタデータに一部のデータセット設定が記録されるよう修正しました(`caption_prefix`、`caption_suffix`、`keep_tokens_separator`、`secondary_separator`、`enable_wildcard`)。 @@ -274,7 +277,8 @@ ControlNet-LLLite, a novel method for ControlNet with SDXL, is added. See [docum - `tag_image_by_wd14_tagger.py` で v3 のリポジトリがサポートされました(`--onnx` 指定時のみ有効)。 PR [#1192](https://github.com/kohya-ss/sd-scripts/pull/1192) sdbds 氏に感謝します。 - Onnx のバージョンアップが必要になるかもしれません。デフォルトでは Onnx はインストールされていませんので、`pip install onnx==1.15.0 onnxruntime-gpu==1.17.1` 等でインストール、アップデートしてください。`requirements.txt` のコメントもあわせてご確認ください。 - `tag_image_by_wd14_tagger.py` で、モデルを`--repo_id` のサブディレクトリに保存するようにしました。これにより複数のモデルファイルがキャッシュされます。`--model_dir` 直下の不要なファイルは削除願います。 - +- 各学習スクリプトに、noise offset、ip noise gammaを、それぞれ 0~指定した値の範囲で変動させるオプション `--noise_offset_random_strength` および `--ip_noise_gamma_random_strength` が追加されました。 PR [#1177](https://github.com/kohya-ss/sd-scripts/pull/1177) KohakuBlueleaf 氏に感謝します。 +- データセット設定の[英語版ドキュメント](./docs/config_README-en.md) が追加されました。PR [#1175](https://github.com/kohya-ss/sd-scripts/pull/1175) darkstorm2150 氏に感謝します。 #### Example of dataset settings / データセット設定の記述例: From 3b0db0f17f46148abe345c5cdce76ff707bdccd3 Mon Sep 17 00:00:00 2001 From: Kohya S Date: Wed, 20 Mar 2024 17:45:35 +0900 Subject: [PATCH 35/69] update readme --- README.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index f8601c1b..81c176a7 100644 --- a/README.md +++ b/README.md @@ -266,6 +266,8 @@ ControlNet-LLLite, a novel method for ControlNet with SDXL, is added. See [docum - The model is now saved in the subdirectory as `--repo_id` in `tag_image_by_wd14_tagger.py` . This caches multiple repo_id models. Please delete unnecessary files under `--model_dir`. - The options `--noise_offset_random_strength` and `--ip_noise_gamma_random_strength` are added to each training script. These options can be used to vary the noise offset and ip noise gamma in the range of 0 to the specified value. PR [#1177](https://github.com/kohya-ss/sd-scripts/pull/1177) Thanks to KohakuBlueleaf! - The [English version of the dataset settings documentation](./docs/config_README-en.md) is added. PR [#1175](https://github.com/kohya-ss/sd-scripts/pull/1175) Thanks to darkstorm2150! +- The `.toml` file for the dataset config is now read in UTF-8 encoding. PR [#1167](https://github.com/kohya-ss/sd-scripts/pull/1167) Thanks to Horizon1704! + - Colab での動作時、ログ出力で停止してしまうようです。学習スクリプトに `--console_log_simple` オプションを指定し、rich のロギングを無効してお試しください。 - `train_network.py` および `sdxl_train_network.py` で、学習したモデルのメタデータに一部のデータセット設定が記録されるよう修正しました(`caption_prefix`、`caption_suffix`、`keep_tokens_separator`、`secondary_separator`、`enable_wildcard`)。 @@ -279,7 +281,7 @@ ControlNet-LLLite, a novel method for ControlNet with SDXL, is added. See [docum - `tag_image_by_wd14_tagger.py` で、モデルを`--repo_id` のサブディレクトリに保存するようにしました。これにより複数のモデルファイルがキャッシュされます。`--model_dir` 直下の不要なファイルは削除願います。 - 各学習スクリプトに、noise offset、ip noise gammaを、それぞれ 0~指定した値の範囲で変動させるオプション `--noise_offset_random_strength` および `--ip_noise_gamma_random_strength` が追加されました。 PR [#1177](https://github.com/kohya-ss/sd-scripts/pull/1177) KohakuBlueleaf 氏に感謝します。 - データセット設定の[英語版ドキュメント](./docs/config_README-en.md) が追加されました。PR [#1175](https://github.com/kohya-ss/sd-scripts/pull/1175) darkstorm2150 氏に感謝します。 - +- データセット設定の `.toml` ファイルが UTF-8 encoding で読み込まれるようになりました。PR [#1167](https://github.com/kohya-ss/sd-scripts/pull/1167) Horizon1704 氏に感謝します。 #### Example of dataset settings / データセット設定の記述例: From 855add067b06464eaa47ed55840da0f17d675762 Mon Sep 17 00:00:00 2001 From: Kohya S Date: Wed, 20 Mar 2024 18:14:05 +0900 Subject: [PATCH 36/69] update option help and readme --- README.md | 7 +++++-- library/train_util.py | 8 ++++---- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 81c176a7..804bad84 100644 --- a/README.md +++ b/README.md @@ -253,6 +253,7 @@ ControlNet-LLLite, a novel method for ControlNet with SDXL, is added. See [docum ### Working in progress - Colab seems to stop with log output. Try specifying `--console_log_simple` option in the training script to disable rich logging. +- The `.toml` file for the dataset config is now read in UTF-8 encoding. PR [#1167](https://github.com/kohya-ss/sd-scripts/pull/1167) Thanks to Horizon1704! - `train_network.py` and `sdxl_train_network.py` are modified to record some dataset settings in the metadata of the trained model (`caption_prefix`, `caption_suffix`, `keep_tokens_separator`, `secondary_separator`, `enable_wildcard`). - Some features are added to the dataset subset settings. - `secondary_separator` is added to specify the tag separator that is not the target of shuffling or dropping. @@ -266,10 +267,11 @@ ControlNet-LLLite, a novel method for ControlNet with SDXL, is added. See [docum - The model is now saved in the subdirectory as `--repo_id` in `tag_image_by_wd14_tagger.py` . This caches multiple repo_id models. Please delete unnecessary files under `--model_dir`. - The options `--noise_offset_random_strength` and `--ip_noise_gamma_random_strength` are added to each training script. These options can be used to vary the noise offset and ip noise gamma in the range of 0 to the specified value. PR [#1177](https://github.com/kohya-ss/sd-scripts/pull/1177) Thanks to KohakuBlueleaf! - The [English version of the dataset settings documentation](./docs/config_README-en.md) is added. PR [#1175](https://github.com/kohya-ss/sd-scripts/pull/1175) Thanks to darkstorm2150! -- The `.toml` file for the dataset config is now read in UTF-8 encoding. PR [#1167](https://github.com/kohya-ss/sd-scripts/pull/1167) Thanks to Horizon1704! +- The options `--save_state_on_train_end` are added to each training script. PR [#1168](https://github.com/kohya-ss/sd-scripts/pull/1168) Thanks to gesen2egee! - Colab での動作時、ログ出力で停止してしまうようです。学習スクリプトに `--console_log_simple` オプションを指定し、rich のロギングを無効してお試しください。 +- データセット設定の `.toml` ファイルが UTF-8 encoding で読み込まれるようになりました。PR [#1167](https://github.com/kohya-ss/sd-scripts/pull/1167) Horizon1704 氏に感謝します。 - `train_network.py` および `sdxl_train_network.py` で、学習したモデルのメタデータに一部のデータセット設定が記録されるよう修正しました(`caption_prefix`、`caption_suffix`、`keep_tokens_separator`、`secondary_separator`、`enable_wildcard`)。 - データセットのサブセット設定にいくつかの機能を追加しました。 - シャッフルの対象とならないタグ分割識別子の指定 `secondary_separator` を追加しました。`secondary_separator=";;;"` のように指定します。`secondary_separator` で区切ることで、その部分はシャッフル、drop 時にまとめて扱われます。詳しくは記述例をご覧ください。 @@ -281,7 +283,8 @@ ControlNet-LLLite, a novel method for ControlNet with SDXL, is added. See [docum - `tag_image_by_wd14_tagger.py` で、モデルを`--repo_id` のサブディレクトリに保存するようにしました。これにより複数のモデルファイルがキャッシュされます。`--model_dir` 直下の不要なファイルは削除願います。 - 各学習スクリプトに、noise offset、ip noise gammaを、それぞれ 0~指定した値の範囲で変動させるオプション `--noise_offset_random_strength` および `--ip_noise_gamma_random_strength` が追加されました。 PR [#1177](https://github.com/kohya-ss/sd-scripts/pull/1177) KohakuBlueleaf 氏に感謝します。 - データセット設定の[英語版ドキュメント](./docs/config_README-en.md) が追加されました。PR [#1175](https://github.com/kohya-ss/sd-scripts/pull/1175) darkstorm2150 氏に感謝します。 -- データセット設定の `.toml` ファイルが UTF-8 encoding で読み込まれるようになりました。PR [#1167](https://github.com/kohya-ss/sd-scripts/pull/1167) Horizon1704 氏に感謝します。 +- 各学習スクリプトに、学習終了時に state を保存する `--save_state_on_train_end` オプションが追加されました。 PR [#1168](https://github.com/kohya-ss/sd-scripts/pull/1168) gesen2egee 氏に感謝します。 + #### Example of dataset settings / データセット設定の記述例: diff --git a/library/train_util.py b/library/train_util.py index 23961505..a13985ee 100644 --- a/library/train_util.py +++ b/library/train_util.py @@ -2936,13 +2936,13 @@ def add_training_arguments(parser: argparse.ArgumentParser, support_dreambooth: parser.add_argument( "--save_state", action="store_true", - help="save training state additionally (including optimizer states etc.) / optimizerなど学習状態も含めたstateを追加で保存する", + help="save training state additionally (including optimizer states etc.) when saving model / optimizerなど学習状態も含めたstateをモデル保存時に追加で保存する", ) parser.add_argument( "--save_state_on_train_end", action="store_true", - help="save training state additionally (including optimizer states etc.) on train end / optimizerなど学習状態も含めたstateを追加で保存する", - ) + help="save training state (including optimizer states etc.) on train end / optimizerなど学習状態も含めたstateを学習完了時に保存する", + ) parser.add_argument("--resume", type=str, default=None, help="saved state to resume training / 学習再開するモデルのstate") parser.add_argument("--train_batch_size", type=int, default=1, help="batch size for training / 学習時のバッチサイズ") @@ -3550,7 +3550,7 @@ def read_config_from_file(args: argparse.Namespace, parser: argparse.ArgumentPar exit(1) logger.info(f"Loading settings from {config_path}...") - with open(config_path, "r", encoding='utf-8') as f: + with open(config_path, "r", encoding="utf-8") as f: config_dict = toml.load(f) # combine all sections into one From d9456020d7547743c809a7c93f9a487276a66c74 Mon Sep 17 00:00:00 2001 From: BootsofLagrangian Date: Wed, 20 Mar 2024 20:52:59 +0900 Subject: [PATCH 37/69] Fix most of ZeRO stage uses optimizer partitioning - we have to prepare optimizer and ds_model at the same time. - pull/1139#issuecomment-1986790007 Signed-off-by: BootsofLagrangian --- sdxl_train.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/sdxl_train.py b/sdxl_train.py index 613fe30b..2cb80b6b 100644 --- a/sdxl_train.py +++ b/sdxl_train.py @@ -412,7 +412,10 @@ def train(args): text_encoder1=text_encoder1 if train_text_encoder1 else None, text_encoder2=text_encoder2 if train_text_encoder2 else None, ) - ds_model = accelerator.prepare(ds_model) + # most of ZeRO stage uses optimizer partitioning, so we have to prepare optimizer and ds_model at the same time. # pull/1139#issuecomment-1986790007 + ds_model, optimizer, train_dataloader, lr_scheduler = accelerator.prepare( + ds_model, optimizer, train_dataloader, lr_scheduler + ) training_models = [ds_model] else: @@ -423,8 +426,7 @@ def train(args): text_encoder1 = accelerator.prepare(text_encoder1) if train_text_encoder2: text_encoder2 = accelerator.prepare(text_encoder2) - - optimizer, train_dataloader, lr_scheduler = accelerator.prepare(optimizer, train_dataloader, lr_scheduler) + optimizer, train_dataloader, lr_scheduler = accelerator.prepare(optimizer, train_dataloader, lr_scheduler) # TextEncoderの出力をキャッシュするときにはCPUへ移動する if args.cache_text_encoder_outputs: From d17c0f508416d734360393804732bfa420fe1c27 Mon Sep 17 00:00:00 2001 From: Kohya S Date: Thu, 21 Mar 2024 08:31:29 +0900 Subject: [PATCH 38/69] update dataset config doc --- README.md | 88 +--------------------------------------- docs/config_README-en.md | 73 +++++++++++++++++++++++++++++++++ docs/config_README-ja.md | 75 +++++++++++++++++++++++++++++++++- 3 files changed, 148 insertions(+), 88 deletions(-) diff --git a/README.md b/README.md index 804bad84..dae31132 100644 --- a/README.md +++ b/README.md @@ -261,7 +261,7 @@ ControlNet-LLLite, a novel method for ControlNet with SDXL, is added. See [docum - `enable_wildcard` is added. When set to `true`, the wildcard notation `{aaa|bbb|ccc}` can be used. See the example below. - `keep_tokens_separator` is updated to be used twice in the caption. When you specify `keep_tokens_separator="|||"`, the part divided by the second `|||` is not shuffled or dropped and remains at the end. - The existing features `caption_prefix` and `caption_suffix` can be used together. `caption_prefix` and `caption_suffix` are processed first, and then `enable_wildcard`, `keep_tokens_separator`, shuffling and dropping, and `secondary_separator` are processed in order. - - The examples are [shown below](#example-of-dataset-settings--データセット設定の記述例). + - See [Dataset config](./docs/config_README-en.md) for details. - The support for v3 repositories is added to `tag_image_by_wd14_tagger.py` (`--onnx` option only). PR [#1192](https://github.com/kohya-ss/sd-scripts/pull/1192) Thanks to sdbds! - Onnx may need to be updated. Onnx is not installed by default, so please install or update it with `pip install onnx==1.15.0 onnxruntime-gpu==1.17.1` etc. Please also check the comments in `requirements.txt`. - The model is now saved in the subdirectory as `--repo_id` in `tag_image_by_wd14_tagger.py` . This caches multiple repo_id models. Please delete unnecessary files under `--model_dir`. @@ -278,6 +278,7 @@ ControlNet-LLLite, a novel method for ControlNet with SDXL, is added. See [docum - `enable_wildcard` を追加しました。`true` にするとワイルドカード記法 `{aaa|bbb|ccc}` が使えます。詳しくは記述例をご覧ください。 - `keep_tokens_separator` をキャプション内に 2 つ使えるようにしました。たとえば `keep_tokens_separator="|||"` と指定したとき、`1girl, hatsune miku, vocaloid ||| stage, mic ||| best quality, rating: general` とキャプションを指定すると、二番目の `|||` で分割された部分はシャッフル、drop されず末尾に残ります。 - 既存の機能 `caption_prefix` と `caption_suffix` とあわせて使えます。`caption_prefix` と `caption_suffix` は一番最初に処理され、その後、ワイルドカード、`keep_tokens_separator`、シャッフルおよび drop、`secondary_separator` の順に処理されます。 + - 詳細は [データセット設定](./docs/config_README-ja.md) をご覧ください。 - `tag_image_by_wd14_tagger.py` で v3 のリポジトリがサポートされました(`--onnx` 指定時のみ有効)。 PR [#1192](https://github.com/kohya-ss/sd-scripts/pull/1192) sdbds 氏に感謝します。 - Onnx のバージョンアップが必要になるかもしれません。デフォルトでは Onnx はインストールされていませんので、`pip install onnx==1.15.0 onnxruntime-gpu==1.17.1` 等でインストール、アップデートしてください。`requirements.txt` のコメントもあわせてご確認ください。 - `tag_image_by_wd14_tagger.py` で、モデルを`--repo_id` のサブディレクトリに保存するようにしました。これにより複数のモデルファイルがキャッシュされます。`--model_dir` 直下の不要なファイルは削除願います。 @@ -286,91 +287,6 @@ ControlNet-LLLite, a novel method for ControlNet with SDXL, is added. See [docum - 各学習スクリプトに、学習終了時に state を保存する `--save_state_on_train_end` オプションが追加されました。 PR [#1168](https://github.com/kohya-ss/sd-scripts/pull/1168) gesen2egee 氏に感謝します。 -#### Example of dataset settings / データセット設定の記述例: - -```toml -[general] -flip_aug = true -color_aug = false -resolution = [1024, 1024] - -[[datasets]] -batch_size = 6 -enable_bucket = true -bucket_no_upscale = true -caption_extension = ".txt" -keep_tokens_separator= "|||" -shuffle_caption = true -caption_tag_dropout_rate = 0.1 -secondary_separator = ";;;" # subset 側に書くこともできます / can be written in the subset side -enable_wildcard = true # 同上 / same as above - - [[datasets.subsets]] - image_dir = "/path/to/image_dir" - num_repeats = 1 - - # ||| の前後はカンマは不要です(自動的に追加されます) / No comma is required before and after ||| (it is added automatically) - caption_prefix = "1girl, hatsune miku, vocaloid |||" - - # ||| の後はシャッフル、drop されず残ります / After |||, it is not shuffled or dropped and remains - # 単純に文字列として連結されるので、カンマなどは自分で入れる必要があります / It is simply concatenated as a string, so you need to put commas yourself - caption_suffix = ", anime screencap ||| masterpiece, rating: general" -``` - -#### Example of caption, secondary_separator notation: `secondary_separator = ";;;"` - -```txt -1girl, hatsune miku, vocaloid, upper body, looking at viewer, sky;;;cloud;;;day, outdoors -``` -The part `sky;;;cloud;;;day` is replaced with `sky,cloud,day` without shuffling or dropping. When shuffling and dropping are enabled, it is processed as a whole (as one tag). For example, it becomes `vocaloid, 1girl, upper body, sky,cloud,day, outdoors, hatsune miku` (shuffled) or `vocaloid, 1girl, outdoors, looking at viewer, upper body, hatsune miku` (dropped). - -#### Example of caption, enable_wildcard notation: `enable_wildcard = true` - -```txt -1girl, hatsune miku, vocaloid, upper body, looking at viewer, {simple|white} background -``` -`simple` or `white` is randomly selected, and it becomes `simple background` or `white background`. - -```txt -1girl, hatsune miku, vocaloid, {{retro style}} -``` -If you want to include `{` or `}` in the tag string, double them like `{{` or `}}` (in this example, the actual caption used for training is `{retro style}`). - -#### Example of caption, `keep_tokens_separator` notation: `keep_tokens_separator = "|||"` - -```txt -1girl, hatsune miku, vocaloid ||| stage, microphone, white shirt, smile ||| best quality, rating: general -``` -It becomes `1girl, hatsune miku, vocaloid, microphone, stage, white shirt, best quality, rating: general` or `1girl, hatsune miku, vocaloid, white shirt, smile, stage, microphone, best quality, rating: general` etc. - - -#### キャプション記述例、secondary_separator 記法:`secondary_separator = ";;;"` の場合 - -```txt -1girl, hatsune miku, vocaloid, upper body, looking at viewer, sky;;;cloud;;;day, outdoors -``` -`sky;;;cloud;;;day` の部分はシャッフル、drop されず `sky,cloud,day` に置換されます。シャッフル、drop が有効な場合、まとめて(一つのタグとして)処理されます。つまり `vocaloid, 1girl, upper body, sky,cloud,day, outdoors, hatsune miku` (シャッフル)や `vocaloid, 1girl, outdoors, looking at viewer, upper body, hatsune miku` (drop されたケース)などになります。 - -#### キャプション記述例、ワイルドカード記法: `enable_wildcard = true` の場合 - -```txt -1girl, hatsune miku, vocaloid, upper body, looking at viewer, {simple|white} background -``` -ランダムに `simple` または `white` が選ばれ、`simple background` または `white background` になります。 - -```txt -1girl, hatsune miku, vocaloid, {{retro style}} -``` -タグ文字列に `{` や `}` そのものを含めたい場合は `{{` や `}}` のように二つ重ねてください(この例では実際に学習に用いられるキャプションは `{retro style}` になります)。 - -#### キャプション記述例、`keep_tokens_separator` 記法: `keep_tokens_separator = "|||"` の場合 - -```txt -1girl, hatsune miku, vocaloid ||| stage, microphone, white shirt, smile ||| best quality, rating: general -``` -`1girl, hatsune miku, vocaloid, microphone, stage, white shirt, best quality, rating: general` や `1girl, hatsune miku, vocaloid, white shirt, smile, stage, microphone, best quality, rating: general` などになります。 - - ### Mar 15, 2024 / 2024/3/15: v0.8.5 - Fixed a bug that the value of timestep embedding during SDXL training was incorrect. diff --git a/docs/config_README-en.md b/docs/config_README-en.md index a0727934..bdcaabfc 100644 --- a/docs/config_README-en.md +++ b/docs/config_README-en.md @@ -1,7 +1,10 @@ Original Source by kohya-ss +First version: A.I Translation by Model: NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO, editing by Darkstorm2150 +Some parts are manually added. + # Config Readme This README is about the configuration files that can be passed with the `--dataset_config` option. @@ -143,11 +146,23 @@ These options are related to subset configuration. | `shuffle_caption` | `true` | o | o | o | | `caption_prefix` | `"masterpiece, best quality, "` | o | o | o | | `caption_suffix` | `", from side"` | o | o | o | +| `caption_separator` | (not specified) | o | o | o | +| `keep_tokens_separator` | `“|||”` | o | o | o | +| `secondary_separator` | `“;;;”` | o | o | o | +| `enable_wildcard` | `true` | o | o | o | * `num_repeats` * Specifies the number of repeats for images in a subset. This is equivalent to `--dataset_repeats` in fine-tuning but can be specified for any training method. * `caption_prefix`, `caption_suffix` * Specifies the prefix and suffix strings to be appended to the captions. Shuffling is performed with these strings included. Be cautious when using `keep_tokens`. +* `caption_separator` + * Specifies the string to separate the tags. The default is `,`. This option is usually not necessary to set. +* `keep_tokens_separator` + * Specifies the string to separate the parts to be fixed in the caption. For example, if you specify `aaa, bbb ||| ccc, ddd, eee, fff ||| ggg, hhh`, the parts `aaa, bbb` and `ggg, hhh` will remain, and the rest will be shuffled and dropped. The comma in between is not necessary. As a result, the prompt will be `aaa, bbb, eee, ccc, fff, ggg, hhh` or `aaa, bbb, fff, ccc, eee, ggg, hhh`, etc. +* `secondary_separator` + * Specifies an additional separator. The part separated by this separator is treated as one tag and is shuffled and dropped. It is then replaced by `caption_separator`. For example, if you specify `aaa;;;bbb;;;ccc`, it will be replaced by `aaa,bbb,ccc` or dropped together. +* `enable_wildcard` + * Enables wildcard notation. This will be explained later. ### DreamBooth-specific options @@ -276,4 +291,62 @@ As a temporary measure, we will list common errors and their solutions. If you e * `voluptuous.error.MultipleInvalid: expected int for dictionary value @ ...`: This error occurs when the specified value format is incorrect. It is highly likely that the value format is incorrect. The `int` part changes depending on the target option. The example configurations in this README may be helpful. * `voluptuous.error.MultipleInvalid: extra keys not allowed @ ...`: This error occurs when there is an option name that is not supported. It is highly likely that you misspelled the option name or mistakenly included it. +## Miscellaneous + +### Example of configuration file, 設定ファイルの記述例 + +```toml +[general] +flip_aug = true +color_aug = false +resolution = [1024, 1024] + +[[datasets]] +batch_size = 6 +enable_bucket = true +bucket_no_upscale = true +caption_extension = ".txt" +keep_tokens_separator= "|||" +shuffle_caption = true +caption_tag_dropout_rate = 0.1 +secondary_separator = ";;;" # subset 側に書くこともできます / can be written in the subset side +enable_wildcard = true # 同上 / same as above + + [[datasets.subsets]] + image_dir = "/path/to/image_dir" + num_repeats = 1 + + # ||| の前後はカンマは不要です(自動的に追加されます) / No comma is required before and after ||| (it is added automatically) + caption_prefix = "1girl, hatsune miku, vocaloid |||" + + # ||| の後はシャッフル、drop されず残ります / After |||, it is not shuffled or dropped and remains + # 単純に文字列として連結されるので、カンマなどは自分で入れる必要があります / It is simply concatenated as a string, so you need to put commas yourself + caption_suffix = ", anime screencap ||| masterpiece, rating: general" +``` + +### Example of caption, secondary_separator notation: `secondary_separator = ";;;"` + +```txt +1girl, hatsune miku, vocaloid, upper body, looking at viewer, sky;;;cloud;;;day, outdoors +``` +The part `sky;;;cloud;;;day` is replaced with `sky,cloud,day` without shuffling or dropping. When shuffling and dropping are enabled, it is processed as a whole (as one tag). For example, it becomes `vocaloid, 1girl, upper body, sky,cloud,day, outdoors, hatsune miku` (shuffled) or `vocaloid, 1girl, outdoors, looking at viewer, upper body, hatsune miku` (dropped). + +### Example of caption, enable_wildcard notation: `enable_wildcard = true` + +```txt +1girl, hatsune miku, vocaloid, upper body, looking at viewer, {simple|white} background +``` +`simple` or `white` is randomly selected, and it becomes `simple background` or `white background`. + +```txt +1girl, hatsune miku, vocaloid, {{retro style}} +``` +If you want to include `{` or `}` in the tag string, double them like `{{` or `}}` (in this example, the actual caption used for training is `{retro style}`). + +### Example of caption, `keep_tokens_separator` notation: `keep_tokens_separator = "|||"` + +```txt +1girl, hatsune miku, vocaloid ||| stage, microphone, white shirt, smile ||| best quality, rating: general +``` +It becomes `1girl, hatsune miku, vocaloid, microphone, stage, white shirt, best quality, rating: general` or `1girl, hatsune miku, vocaloid, white shirt, smile, stage, microphone, best quality, rating: general` etc. diff --git a/docs/config_README-ja.md b/docs/config_README-ja.md index 69a03f6c..47bb5c57 100644 --- a/docs/config_README-ja.md +++ b/docs/config_README-ja.md @@ -1,5 +1,3 @@ -For non-Japanese speakers: this README is provided only in Japanese in the current state. Sorry for inconvenience. We will provide English version in the near future. - `--dataset_config` で渡すことができる設定ファイルに関する説明です。 ## 概要 @@ -140,12 +138,28 @@ DreamBooth の手法と fine tuning の手法の両方とも利用可能な学 | `shuffle_caption` | `true` | o | o | o | | `caption_prefix` | `“masterpiece, best quality, ”` | o | o | o | | `caption_suffix` | `“, from side”` | o | o | o | +| `caption_separator` | (通常は設定しません) | o | o | o | +| `keep_tokens_separator` | `“|||”` | o | o | o | +| `secondary_separator` | `“;;;”` | o | o | o | +| `enable_wildcard` | `true` | o | o | o | * `num_repeats` * サブセットの画像の繰り返し回数を指定します。fine tuning における `--dataset_repeats` に相当しますが、`num_repeats` はどの学習方法でも指定可能です。 * `caption_prefix`, `caption_suffix` * キャプションの前、後に付与する文字列を指定します。シャッフルはこれらの文字列を含めた状態で行われます。`keep_tokens` を指定する場合には注意してください。 +* `caption_separator` + * タグを区切る文字列を指定します。デフォルトは `,` です。このオプションは通常は設定する必要はありません。 + +* `keep_tokens_separator` + * キャプションで固定したい部分を区切る文字列を指定します。たとえば `aaa, bbb ||| ccc, ddd, eee, fff ||| ggg, hhh` のように指定すると、`aaa, bbb` と `ggg, hhh` の部分はシャッフル、drop されず残ります。間のカンマは不要です。結果としてプロンプトは `aaa, bbb, eee, ccc, fff, ggg, hhh` や `aaa, bbb, fff, ccc, eee, ggg, hhh` などになります。 + +* `secondary_separator` + * 追加の区切り文字を指定します。この区切り文字で区切られた部分は一つのタグとして扱われ、シャッフル、drop されます。その後、`caption_separator` に置き換えられます。たとえば `aaa;;;bbb;;;ccc` のように指定すると、`aaa,bbb,ccc` に置き換えられるか、まとめて drop されます。 + +* `enable_wildcard` + * ワイルドカード記法を有効にします。ワイルドカード記法については後述します。 + ### DreamBooth 方式専用のオプション DreamBooth 方式のオプションは、サブセット向けオプションのみ存在します。 @@ -280,4 +294,61 @@ resolution = 768 * `voluptuous.error.MultipleInvalid: expected int for dictionary value @ ...`: 指定する値の形式が不正というエラーです。値の形式が間違っている可能性が高いです。`int` の部分は対象となるオプションによって変わります。この README に載っているオプションの「設定例」が役立つかもしれません。 * `voluptuous.error.MultipleInvalid: extra keys not allowed @ ...`: 対応していないオプション名が存在している場合に発生するエラーです。オプション名を間違って記述しているか、誤って紛れ込んでいる可能性が高いです。 +## その他 +### Example of configuration file, 設定ファイルの記述例 + +```toml +[general] +flip_aug = true +color_aug = false +resolution = [1024, 1024] + +[[datasets]] +batch_size = 6 +enable_bucket = true +bucket_no_upscale = true +caption_extension = ".txt" +keep_tokens_separator= "|||" +shuffle_caption = true +caption_tag_dropout_rate = 0.1 +secondary_separator = ";;;" # subset 側に書くこともできます / can be written in the subset side +enable_wildcard = true # 同上 / same as above + + [[datasets.subsets]] + image_dir = "/path/to/image_dir" + num_repeats = 1 + + # ||| の前後はカンマは不要です(自動的に追加されます) / No comma is required before and after ||| (it is added automatically) + caption_prefix = "1girl, hatsune miku, vocaloid |||" + + # ||| の後はシャッフル、drop されず残ります / After |||, it is not shuffled or dropped and remains + # 単純に文字列として連結されるので、カンマなどは自分で入れる必要があります / It is simply concatenated as a string, so you need to put commas yourself + caption_suffix = ", anime screencap ||| masterpiece, rating: general" +``` + +### キャプション記述例、secondary_separator 記法:`secondary_separator = ";;;"` の場合 + +```txt +1girl, hatsune miku, vocaloid, upper body, looking at viewer, sky;;;cloud;;;day, outdoors +``` +`sky;;;cloud;;;day` の部分はシャッフル、drop されず `sky,cloud,day` に置換されます。シャッフル、drop が有効な場合、まとめて(一つのタグとして)処理されます。つまり `vocaloid, 1girl, upper body, sky,cloud,day, outdoors, hatsune miku` (シャッフル)や `vocaloid, 1girl, outdoors, looking at viewer, upper body, hatsune miku` (drop されたケース)などになります。 + +### キャプション記述例、ワイルドカード記法: `enable_wildcard = true` の場合 + +```txt +1girl, hatsune miku, vocaloid, upper body, looking at viewer, {simple|white} background +``` +ランダムに `simple` または `white` が選ばれ、`simple background` または `white background` になります。 + +```txt +1girl, hatsune miku, vocaloid, {{retro style}} +``` +タグ文字列に `{` や `}` そのものを含めたい場合は `{{` や `}}` のように二つ重ねてください(この例では実際に学習に用いられるキャプションは `{retro style}` になります)。 + +### キャプション記述例、`keep_tokens_separator` 記法: `keep_tokens_separator = "|||"` の場合 + +```txt +1girl, hatsune miku, vocaloid ||| stage, microphone, white shirt, smile ||| best quality, rating: general +``` +`1girl, hatsune miku, vocaloid, microphone, stage, white shirt, best quality, rating: general` や `1girl, hatsune miku, vocaloid, white shirt, smile, stage, microphone, best quality, rating: general` などになります。 From 594c7f70500e402586654e73501e7d8fc74592b8 Mon Sep 17 00:00:00 2001 From: Kohya S Date: Sat, 23 Mar 2024 16:11:31 +0900 Subject: [PATCH 39/69] format by black --- finetune/merge_captions_to_metadata.py | 120 ++++++++++++++----------- finetune/merge_dd_tags_to_metadata.py | 110 +++++++++++++---------- 2 files changed, 134 insertions(+), 96 deletions(-) diff --git a/finetune/merge_captions_to_metadata.py b/finetune/merge_captions_to_metadata.py index 60765b86..89f71747 100644 --- a/finetune/merge_captions_to_metadata.py +++ b/finetune/merge_captions_to_metadata.py @@ -6,75 +6,95 @@ from tqdm import tqdm import library.train_util as train_util import os from library.utils import setup_logging + setup_logging() import logging + logger = logging.getLogger(__name__) + def main(args): - assert not args.recursive or (args.recursive and args.full_path), "recursive requires full_path / recursiveはfull_pathと同時に指定してください" + assert not args.recursive or ( + args.recursive and args.full_path + ), "recursive requires full_path / recursiveはfull_pathと同時に指定してください" - train_data_dir_path = Path(args.train_data_dir) - image_paths: List[Path] = train_util.glob_images_pathlib(train_data_dir_path, args.recursive) - logger.info(f"found {len(image_paths)} images.") + train_data_dir_path = Path(args.train_data_dir) + image_paths: List[Path] = train_util.glob_images_pathlib(train_data_dir_path, args.recursive) + logger.info(f"found {len(image_paths)} images.") - if args.in_json is None and Path(args.out_json).is_file(): - args.in_json = args.out_json + if args.in_json is None and Path(args.out_json).is_file(): + args.in_json = args.out_json - if args.in_json is not None: - logger.info(f"loading existing metadata: {args.in_json}") - metadata = json.loads(Path(args.in_json).read_text(encoding='utf-8')) - logger.warning("captions for existing images will be overwritten / 既存の画像のキャプションは上書きされます") - else: - logger.info("new metadata will be created / 新しいメタデータファイルが作成されます") - metadata = {} + if args.in_json is not None: + logger.info(f"loading existing metadata: {args.in_json}") + metadata = json.loads(Path(args.in_json).read_text(encoding="utf-8")) + logger.warning("captions for existing images will be overwritten / 既存の画像のキャプションは上書きされます") + else: + logger.info("new metadata will be created / 新しいメタデータファイルが作成されます") + metadata = {} - logger.info("merge caption texts to metadata json.") - for image_path in tqdm(image_paths): - caption_path = image_path.with_suffix(args.caption_extension) - caption = caption_path.read_text(encoding='utf-8').strip() + logger.info("merge caption texts to metadata json.") + for image_path in tqdm(image_paths): + caption_path = image_path.with_suffix(args.caption_extension) + caption = caption_path.read_text(encoding="utf-8").strip() - if not os.path.exists(caption_path): - caption_path = os.path.join(image_path, args.caption_extension) + if not os.path.exists(caption_path): + caption_path = os.path.join(image_path, args.caption_extension) - image_key = str(image_path) if args.full_path else image_path.stem - if image_key not in metadata: - metadata[image_key] = {} + image_key = str(image_path) if args.full_path else image_path.stem + if image_key not in metadata: + metadata[image_key] = {} - metadata[image_key]['caption'] = caption - if args.debug: - logger.info(f"{image_key} {caption}") + metadata[image_key]["caption"] = caption + if args.debug: + logger.info(f"{image_key} {caption}") - # metadataを書き出して終わり - logger.info(f"writing metadata: {args.out_json}") - Path(args.out_json).write_text(json.dumps(metadata, indent=2), encoding='utf-8') - logger.info("done!") + # metadataを書き出して終わり + logger.info(f"writing metadata: {args.out_json}") + Path(args.out_json).write_text(json.dumps(metadata, indent=2), encoding="utf-8") + logger.info("done!") def setup_parser() -> argparse.ArgumentParser: - parser = argparse.ArgumentParser() - parser.add_argument("train_data_dir", type=str, help="directory for train images / 学習画像データのディレクトリ") - parser.add_argument("out_json", type=str, help="metadata file to output / メタデータファイル書き出し先") - parser.add_argument("--in_json", type=str, - help="metadata file to input (if omitted and out_json exists, existing out_json is read) / 読み込むメタデータファイル(省略時、out_jsonが存在すればそれを読み込む)") - parser.add_argument("--caption_extention", type=str, default=None, - help="extension of caption file (for backward compatibility) / 読み込むキャプションファイルの拡張子(スペルミスしていたのを残してあります)") - parser.add_argument("--caption_extension", type=str, default=".caption", help="extension of caption file / 読み込むキャプションファイルの拡張子") - parser.add_argument("--full_path", action="store_true", - help="use full path as image-key in metadata (supports multiple directories) / メタデータで画像キーをフルパスにする(複数の学習画像ディレクトリに対応)") - parser.add_argument("--recursive", action="store_true", - help="recursively look for training tags in all child folders of train_data_dir / train_data_dirのすべての子フォルダにある学習タグを再帰的に探す") - parser.add_argument("--debug", action="store_true", help="debug mode") + parser = argparse.ArgumentParser() + parser.add_argument("train_data_dir", type=str, help="directory for train images / 学習画像データのディレクトリ") + parser.add_argument("out_json", type=str, help="metadata file to output / メタデータファイル書き出し先") + parser.add_argument( + "--in_json", + type=str, + help="metadata file to input (if omitted and out_json exists, existing out_json is read) / 読み込むメタデータファイル(省略時、out_jsonが存在すればそれを読み込む)", + ) + parser.add_argument( + "--caption_extention", + type=str, + default=None, + help="extension of caption file (for backward compatibility) / 読み込むキャプションファイルの拡張子(スペルミスしていたのを残してあります)", + ) + parser.add_argument( + "--caption_extension", type=str, default=".caption", help="extension of caption file / 読み込むキャプションファイルの拡張子" + ) + parser.add_argument( + "--full_path", + action="store_true", + help="use full path as image-key in metadata (supports multiple directories) / メタデータで画像キーをフルパスにする(複数の学習画像ディレクトリに対応)", + ) + parser.add_argument( + "--recursive", + action="store_true", + help="recursively look for training tags in all child folders of train_data_dir / train_data_dirのすべての子フォルダにある学習タグを再帰的に探す", + ) + parser.add_argument("--debug", action="store_true", help="debug mode") - return parser + return parser -if __name__ == '__main__': - parser = setup_parser() +if __name__ == "__main__": + parser = setup_parser() - args = parser.parse_args() + args = parser.parse_args() - # スペルミスしていたオプションを復元する - if args.caption_extention is not None: - args.caption_extension = args.caption_extention + # スペルミスしていたオプションを復元する + if args.caption_extention is not None: + args.caption_extension = args.caption_extention - main(args) + main(args) diff --git a/finetune/merge_dd_tags_to_metadata.py b/finetune/merge_dd_tags_to_metadata.py index 9ef8f14b..ce22d990 100644 --- a/finetune/merge_dd_tags_to_metadata.py +++ b/finetune/merge_dd_tags_to_metadata.py @@ -6,70 +6,88 @@ from tqdm import tqdm import library.train_util as train_util import os from library.utils import setup_logging + setup_logging() import logging + logger = logging.getLogger(__name__) + def main(args): - assert not args.recursive or (args.recursive and args.full_path), "recursive requires full_path / recursiveはfull_pathと同時に指定してください" + assert not args.recursive or ( + args.recursive and args.full_path + ), "recursive requires full_path / recursiveはfull_pathと同時に指定してください" - train_data_dir_path = Path(args.train_data_dir) - image_paths: List[Path] = train_util.glob_images_pathlib(train_data_dir_path, args.recursive) - logger.info(f"found {len(image_paths)} images.") + train_data_dir_path = Path(args.train_data_dir) + image_paths: List[Path] = train_util.glob_images_pathlib(train_data_dir_path, args.recursive) + logger.info(f"found {len(image_paths)} images.") - if args.in_json is None and Path(args.out_json).is_file(): - args.in_json = args.out_json + if args.in_json is None and Path(args.out_json).is_file(): + args.in_json = args.out_json - if args.in_json is not None: - logger.info(f"loading existing metadata: {args.in_json}") - metadata = json.loads(Path(args.in_json).read_text(encoding='utf-8')) - logger.warning("tags data for existing images will be overwritten / 既存の画像のタグは上書きされます") - else: - logger.info("new metadata will be created / 新しいメタデータファイルが作成されます") - metadata = {} + if args.in_json is not None: + logger.info(f"loading existing metadata: {args.in_json}") + metadata = json.loads(Path(args.in_json).read_text(encoding="utf-8")) + logger.warning("tags data for existing images will be overwritten / 既存の画像のタグは上書きされます") + else: + logger.info("new metadata will be created / 新しいメタデータファイルが作成されます") + metadata = {} - logger.info("merge tags to metadata json.") - for image_path in tqdm(image_paths): - tags_path = image_path.with_suffix(args.caption_extension) - tags = tags_path.read_text(encoding='utf-8').strip() + logger.info("merge tags to metadata json.") + for image_path in tqdm(image_paths): + tags_path = image_path.with_suffix(args.caption_extension) + tags = tags_path.read_text(encoding="utf-8").strip() - if not os.path.exists(tags_path): - tags_path = os.path.join(image_path, args.caption_extension) + if not os.path.exists(tags_path): + tags_path = os.path.join(image_path, args.caption_extension) - image_key = str(image_path) if args.full_path else image_path.stem - if image_key not in metadata: - metadata[image_key] = {} + image_key = str(image_path) if args.full_path else image_path.stem + if image_key not in metadata: + metadata[image_key] = {} - metadata[image_key]['tags'] = tags - if args.debug: - logger.info(f"{image_key} {tags}") + metadata[image_key]["tags"] = tags + if args.debug: + logger.info(f"{image_key} {tags}") - # metadataを書き出して終わり - logger.info(f"writing metadata: {args.out_json}") - Path(args.out_json).write_text(json.dumps(metadata, indent=2), encoding='utf-8') + # metadataを書き出して終わり + logger.info(f"writing metadata: {args.out_json}") + Path(args.out_json).write_text(json.dumps(metadata, indent=2), encoding="utf-8") - logger.info("done!") + logger.info("done!") def setup_parser() -> argparse.ArgumentParser: - parser = argparse.ArgumentParser() - parser.add_argument("train_data_dir", type=str, help="directory for train images / 学習画像データのディレクトリ") - parser.add_argument("out_json", type=str, help="metadata file to output / メタデータファイル書き出し先") - parser.add_argument("--in_json", type=str, - help="metadata file to input (if omitted and out_json exists, existing out_json is read) / 読み込むメタデータファイル(省略時、out_jsonが存在すればそれを読み込む)") - parser.add_argument("--full_path", action="store_true", - help="use full path as image-key in metadata (supports multiple directories) / メタデータで画像キーをフルパスにする(複数の学習画像ディレクトリに対応)") - parser.add_argument("--recursive", action="store_true", - help="recursively look for training tags in all child folders of train_data_dir / train_data_dirのすべての子フォルダにある学習タグを再帰的に探す") - parser.add_argument("--caption_extension", type=str, default=".txt", - help="extension of caption (tag) file / 読み込むキャプション(タグ)ファイルの拡張子") - parser.add_argument("--debug", action="store_true", help="debug mode, print tags") + parser = argparse.ArgumentParser() + parser.add_argument("train_data_dir", type=str, help="directory for train images / 学習画像データのディレクトリ") + parser.add_argument("out_json", type=str, help="metadata file to output / メタデータファイル書き出し先") + parser.add_argument( + "--in_json", + type=str, + help="metadata file to input (if omitted and out_json exists, existing out_json is read) / 読み込むメタデータファイル(省略時、out_jsonが存在すればそれを読み込む)", + ) + parser.add_argument( + "--full_path", + action="store_true", + help="use full path as image-key in metadata (supports multiple directories) / メタデータで画像キーをフルパスにする(複数の学習画像ディレクトリに対応)", + ) + parser.add_argument( + "--recursive", + action="store_true", + help="recursively look for training tags in all child folders of train_data_dir / train_data_dirのすべての子フォルダにある学習タグを再帰的に探す", + ) + parser.add_argument( + "--caption_extension", + type=str, + default=".txt", + help="extension of caption (tag) file / 読み込むキャプション(タグ)ファイルの拡張子", + ) + parser.add_argument("--debug", action="store_true", help="debug mode, print tags") - return parser + return parser -if __name__ == '__main__': - parser = setup_parser() +if __name__ == "__main__": + parser = setup_parser() - args = parser.parse_args() - main(args) + args = parser.parse_args() + main(args) From f4a4c11cd30a885d1d5ddb86bee609305c5398f3 Mon Sep 17 00:00:00 2001 From: Kohya S Date: Sat, 23 Mar 2024 18:51:37 +0900 Subject: [PATCH 40/69] support multiline captions ref #1155 --- README.md | 8 ++++---- docs/config_README-en.md | 30 +++++++++++++++++++++++++++++- docs/config_README-ja.md | 32 ++++++++++++++++++++++++++++++-- library/train_util.py | 38 +++++++++++++++++++++++++++++++------- 4 files changed, 94 insertions(+), 14 deletions(-) diff --git a/README.md b/README.md index dae31132..dd000d12 100644 --- a/README.md +++ b/README.md @@ -257,8 +257,8 @@ ControlNet-LLLite, a novel method for ControlNet with SDXL, is added. See [docum - `train_network.py` and `sdxl_train_network.py` are modified to record some dataset settings in the metadata of the trained model (`caption_prefix`, `caption_suffix`, `keep_tokens_separator`, `secondary_separator`, `enable_wildcard`). - Some features are added to the dataset subset settings. - `secondary_separator` is added to specify the tag separator that is not the target of shuffling or dropping. - - Specify `secondary_separator=";;;"`. When you specify `secondary_separator`, the part is not shuffled or dropped. See the example below. - - `enable_wildcard` is added. When set to `true`, the wildcard notation `{aaa|bbb|ccc}` can be used. See the example below. + - Specify `secondary_separator=";;;"`. When you specify `secondary_separator`, the part is not shuffled or dropped. + - `enable_wildcard` is added. When set to `true`, the wildcard notation `{aaa|bbb|ccc}` can be used. The multi-line caption is also enabled. - `keep_tokens_separator` is updated to be used twice in the caption. When you specify `keep_tokens_separator="|||"`, the part divided by the second `|||` is not shuffled or dropped and remains at the end. - The existing features `caption_prefix` and `caption_suffix` can be used together. `caption_prefix` and `caption_suffix` are processed first, and then `enable_wildcard`, `keep_tokens_separator`, shuffling and dropping, and `secondary_separator` are processed in order. - See [Dataset config](./docs/config_README-en.md) for details. @@ -274,8 +274,8 @@ ControlNet-LLLite, a novel method for ControlNet with SDXL, is added. See [docum - データセット設定の `.toml` ファイルが UTF-8 encoding で読み込まれるようになりました。PR [#1167](https://github.com/kohya-ss/sd-scripts/pull/1167) Horizon1704 氏に感謝します。 - `train_network.py` および `sdxl_train_network.py` で、学習したモデルのメタデータに一部のデータセット設定が記録されるよう修正しました(`caption_prefix`、`caption_suffix`、`keep_tokens_separator`、`secondary_separator`、`enable_wildcard`)。 - データセットのサブセット設定にいくつかの機能を追加しました。 - - シャッフルの対象とならないタグ分割識別子の指定 `secondary_separator` を追加しました。`secondary_separator=";;;"` のように指定します。`secondary_separator` で区切ることで、その部分はシャッフル、drop 時にまとめて扱われます。詳しくは記述例をご覧ください。 - - `enable_wildcard` を追加しました。`true` にするとワイルドカード記法 `{aaa|bbb|ccc}` が使えます。詳しくは記述例をご覧ください。 + - シャッフルの対象とならないタグ分割識別子の指定 `secondary_separator` を追加しました。`secondary_separator=";;;"` のように指定します。`secondary_separator` で区切ることで、その部分はシャッフル、drop 時にまとめて扱われます。 + - `enable_wildcard` を追加しました。`true` にするとワイルドカード記法 `{aaa|bbb|ccc}` が使えます。また複数行キャプションも有効になります。 - `keep_tokens_separator` をキャプション内に 2 つ使えるようにしました。たとえば `keep_tokens_separator="|||"` と指定したとき、`1girl, hatsune miku, vocaloid ||| stage, mic ||| best quality, rating: general` とキャプションを指定すると、二番目の `|||` で分割された部分はシャッフル、drop されず末尾に残ります。 - 既存の機能 `caption_prefix` と `caption_suffix` とあわせて使えます。`caption_prefix` と `caption_suffix` は一番最初に処理され、その後、ワイルドカード、`keep_tokens_separator`、シャッフルおよび drop、`secondary_separator` の順に処理されます。 - 詳細は [データセット設定](./docs/config_README-ja.md) をご覧ください。 diff --git a/docs/config_README-en.md b/docs/config_README-en.md index bdcaabfc..e99fde21 100644 --- a/docs/config_README-en.md +++ b/docs/config_README-en.md @@ -293,7 +293,35 @@ As a temporary measure, we will list common errors and their solutions. If you e ## Miscellaneous -### Example of configuration file, 設定ファイルの記述例 +### Multi-line captions + +By setting `enable_wildcard = true`, multiple-line captions are also enabled. If the caption file consists of multiple lines, one line is randomly selected as the caption. + +```txt +1girl, hatsune miku, vocaloid, upper body, looking at viewer, microphone, stage +a girl with a microphone standing on a stage +detailed digital art of a girl with a microphone on a stage +``` + +It can be combined with wildcard notation. + +In metadata files, you can also specify multiple-line captions. In the `.json` metadata file, use `\n` to represent a line break. If the caption file consists of multiple lines, `merge_captions_to_metadata.py` will create a metadata file in this format. + +The tags in the metadata (`tags`) are added to each line of the caption. + +```json +{ + "/path/to/image.png": { + "caption": "a cartoon of a frog with the word frog on it\ntest multiline caption1\ntest multiline caption2", + "tags": "open mouth, simple background, standing, no humans, animal, black background, frog, animal costume, animal focus" + }, + ... +} +``` + +In this case, the actual caption will be `a cartoon of a frog with the word frog on it, open mouth, simple background ...`, `test multiline caption1, open mouth, simple background ...`, `test multiline caption2, open mouth, simple background ...`, etc. + +### Example of configuration file : `secondary_separator`, wildcard notation, `keep_tokens_separator`, etc. ```toml [general] diff --git a/docs/config_README-ja.md b/docs/config_README-ja.md index 47bb5c57..b57ae86a 100644 --- a/docs/config_README-ja.md +++ b/docs/config_README-ja.md @@ -158,7 +158,7 @@ DreamBooth の手法と fine tuning の手法の両方とも利用可能な学 * 追加の区切り文字を指定します。この区切り文字で区切られた部分は一つのタグとして扱われ、シャッフル、drop されます。その後、`caption_separator` に置き換えられます。たとえば `aaa;;;bbb;;;ccc` のように指定すると、`aaa,bbb,ccc` に置き換えられるか、まとめて drop されます。 * `enable_wildcard` - * ワイルドカード記法を有効にします。ワイルドカード記法については後述します。 + * ワイルドカード記法および複数行キャプションを有効にします。ワイルドカード記法、複数行キャプションについては後述します。 ### DreamBooth 方式専用のオプション @@ -296,7 +296,35 @@ resolution = 768 ## その他 -### Example of configuration file, 設定ファイルの記述例 +### 複数行キャプション + +`enable_wildcard = true` を設定することで、複数行キャプションも同時に有効になります。キャプションファイルが複数の行からなる場合、ランダムに一つの行が選ばれてキャプションとして利用されます。 + +```txt +1girl, hatsune miku, vocaloid, upper body, looking at viewer, microphone, stage +a girl with a microphone standing on a stage +detailed digital art of a girl with a microphone on a stage +``` + +ワイルドカード記法と組み合わせることも可能です。 + +メタデータファイルでも同様に複数行キャプションを指定することができます。メタデータの .json 内には、`\n` を使って改行を表現してください。キャプションファイルが複数行からなる場合、`merge_captions_to_metadata.py` を使うと、この形式でメタデータファイルが作成されます。 + +メタデータのタグ (`tags`) は、キャプションの各行に追加されます。 + +```json +{ + "/path/to/image.png": { + "caption": "a cartoon of a frog with the word frog on it\ntest multiline caption1\ntest multiline caption2", + "tags": "open mouth, simple background, standing, no humans, animal, black background, frog, animal costume, animal focus" + }, + ... +} +``` + +この場合、実際のキャプションは `a cartoon of a frog with the word frog on it, open mouth, simple background ...` または `test multiline caption1, open mouth, simple background ...`、 `test multiline caption2, open mouth, simple background ...` 等になります。 + +### 設定ファイルの記述例:追加の区切り文字、ワイルドカード記法、`keep_tokens_separator` 等 ```toml [general] diff --git a/library/train_util.py b/library/train_util.py index a13985ee..d076cf84 100644 --- a/library/train_util.py +++ b/library/train_util.py @@ -693,6 +693,10 @@ class BaseDataset(torch.utils.data.Dataset): else: # process wildcards if subset.enable_wildcard: + # if caption is multiline, random choice one line + if "\n" in caption: + caption = random.choice(caption.split("\n")) + # wildcard is like '{aaa|bbb|ccc...}' # escape the curly braces like {{ or }} replacer1 = "⦅" @@ -711,6 +715,9 @@ class BaseDataset(torch.utils.data.Dataset): # unescape the curly braces caption = caption.replace(replacer1, "{").replace(replacer2, "}") + else: + # if caption is multiline, use the first line + caption = caption.split("\n")[0] if subset.shuffle_caption or subset.token_warmup_step > 0 or subset.caption_tag_dropout_rate > 0: fixed_tokens = [] @@ -1446,7 +1453,7 @@ class DreamBoothDataset(BaseDataset): self.bucket_reso_steps = None # この情報は使われない self.bucket_no_upscale = False - def read_caption(img_path, caption_extension): + def read_caption(img_path, caption_extension, enable_wildcard): # captionの候補ファイル名を作る base_name = os.path.splitext(img_path)[0] base_name_face_det = base_name @@ -1465,7 +1472,10 @@ class DreamBoothDataset(BaseDataset): logger.error(f"illegal char in file (not UTF-8) / ファイルにUTF-8以外の文字があります: {cap_path}") raise e assert len(lines) > 0, f"caption file is empty / キャプションファイルが空です: {cap_path}" - caption = lines[0].strip() + if enable_wildcard: + caption = "\n".join([line.strip() for line in lines if line.strip() != ""]) # 空行を除く、改行で連結 + else: + caption = lines[0].strip() break return caption @@ -1481,7 +1491,7 @@ class DreamBoothDataset(BaseDataset): captions = [] missing_captions = [] for img_path in img_paths: - cap_for_img = read_caption(img_path, subset.caption_extension) + cap_for_img = read_caption(img_path, subset.caption_extension, subset.enable_wildcard) if cap_for_img is None and subset.class_tokens is None: logger.warning( f"neither caption file nor class tokens are found. use empty caption for {img_path} / キャプションファイルもclass tokenも見つかりませんでした。空のキャプションを使用します: {img_path}" @@ -1657,10 +1667,24 @@ class FineTuningDataset(BaseDataset): caption = img_md.get("caption") tags = img_md.get("tags") if caption is None: - caption = tags - elif tags is not None and len(tags) > 0: - caption = caption + ", " + tags - tags_list.append(tags) + caption = tags # could be multiline + tags = None + + if subset.enable_wildcard: + # tags must be single line + if tags is not None: + tags = tags.replace("\n", subset.caption_separator) + + # add tags to each line of caption + if caption is not None and tags is not None: + caption = "\n".join( + [f"{line}{subset.caption_separator}{tags}" for line in caption.split("\n") if line.strip() != ""] + ) + else: + # use as is + if tags is not None and len(tags) > 0: + caption = caption + subset.caption_separator + tags + tags_list.append(tags) if caption is None: caption = "" From 0c7baea88cfa98c5fab2898551c426f2d4fac4c6 Mon Sep 17 00:00:00 2001 From: feffy380 <114889020+feffy380@users.noreply.github.com> Date: Sat, 23 Mar 2024 17:28:02 +0100 Subject: [PATCH 41/69] register reg images with correct subset --- library/train_util.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/library/train_util.py b/library/train_util.py index d076cf84..b69fb095 100644 --- a/library/train_util.py +++ b/library/train_util.py @@ -1554,7 +1554,7 @@ class DreamBoothDataset(BaseDataset): for img_path, caption in zip(img_paths, captions): info = ImageInfo(img_path, subset.num_repeats, caption, subset.is_reg, img_path) if subset.is_reg: - reg_infos.append(info) + reg_infos.append((info, subset)) else: self.register_image(info, subset) @@ -1575,7 +1575,7 @@ class DreamBoothDataset(BaseDataset): n = 0 first_loop = True while n < num_train_images: - for info in reg_infos: + for info, subset in reg_infos: if first_loop: self.register_image(info, subset) n += info.num_repeats From 79d1c12ab056e7114257d7079f2f8846e329320e Mon Sep 17 00:00:00 2001 From: Kohya S Date: Sun, 24 Mar 2024 11:06:37 +0900 Subject: [PATCH 42/69] disable sample_every_n_xxx if value less than 1 ref #1202 --- library/train_util.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/library/train_util.py b/library/train_util.py index d076cf84..8fbf3283 100644 --- a/library/train_util.py +++ b/library/train_util.py @@ -1473,7 +1473,7 @@ class DreamBoothDataset(BaseDataset): raise e assert len(lines) > 0, f"caption file is empty / キャプションファイルが空です: {cap_path}" if enable_wildcard: - caption = "\n".join([line.strip() for line in lines if line.strip() != ""]) # 空行を除く、改行で連結 + caption = "\n".join([line.strip() for line in lines if line.strip() != ""]) # 空行を除く、改行で連結 else: caption = lines[0].strip() break @@ -3338,6 +3338,18 @@ def verify_training_args(args: argparse.Namespace): + " / zero_terminal_snrが有効ですが、v_parameterizationが有効ではありません。学習結果は想定外になる可能性があります" ) + if args.sample_every_n_epochs is not None and args.sample_every_n_epochs <= 0: + logger.warning( + "sample_every_n_epochs is less than or equal to 0, so it will be disabled / sample_every_n_epochsに0以下の値が指定されたため無効になります" + ) + args.sample_every_n_epochs = None + + if args.sample_every_n_steps is not None and args.sample_every_n_steps <= 0: + logger.warning( + "sample_every_n_steps is less than or equal to 0, so it will be disabled / sample_every_n_stepsに0以下の値が指定されたため無効になります" + ) + args.sample_every_n_steps = None + def add_dataset_arguments( parser: argparse.ArgumentParser, support_dreambooth: bool, support_caption: bool, support_caption_dropout: bool From 691f04322a48566caf62dd67c2834ca2748c064f Mon Sep 17 00:00:00 2001 From: Kohya S Date: Sun, 24 Mar 2024 11:10:26 +0900 Subject: [PATCH 43/69] update readme --- README.md | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index dd000d12..25226dff 100644 --- a/README.md +++ b/README.md @@ -266,8 +266,9 @@ ControlNet-LLLite, a novel method for ControlNet with SDXL, is added. See [docum - Onnx may need to be updated. Onnx is not installed by default, so please install or update it with `pip install onnx==1.15.0 onnxruntime-gpu==1.17.1` etc. Please also check the comments in `requirements.txt`. - The model is now saved in the subdirectory as `--repo_id` in `tag_image_by_wd14_tagger.py` . This caches multiple repo_id models. Please delete unnecessary files under `--model_dir`. - The options `--noise_offset_random_strength` and `--ip_noise_gamma_random_strength` are added to each training script. These options can be used to vary the noise offset and ip noise gamma in the range of 0 to the specified value. PR [#1177](https://github.com/kohya-ss/sd-scripts/pull/1177) Thanks to KohakuBlueleaf! -- The [English version of the dataset settings documentation](./docs/config_README-en.md) is added. PR [#1175](https://github.com/kohya-ss/sd-scripts/pull/1175) Thanks to darkstorm2150! - The options `--save_state_on_train_end` are added to each training script. PR [#1168](https://github.com/kohya-ss/sd-scripts/pull/1168) Thanks to gesen2egee! +- The options `--sample_every_n_epochs` and `--sample_every_n_steps` in each training script now display a warning and ignore them when a number less than or equal to `0` is specified. Thanks to S-Del for raising the issue. +- The [English version of the dataset settings documentation](./docs/config_README-en.md) is added. PR [#1175](https://github.com/kohya-ss/sd-scripts/pull/1175) Thanks to darkstorm2150! - Colab での動作時、ログ出力で停止してしまうようです。学習スクリプトに `--console_log_simple` オプションを指定し、rich のロギングを無効してお試しください。 @@ -283,8 +284,9 @@ ControlNet-LLLite, a novel method for ControlNet with SDXL, is added. See [docum - Onnx のバージョンアップが必要になるかもしれません。デフォルトでは Onnx はインストールされていませんので、`pip install onnx==1.15.0 onnxruntime-gpu==1.17.1` 等でインストール、アップデートしてください。`requirements.txt` のコメントもあわせてご確認ください。 - `tag_image_by_wd14_tagger.py` で、モデルを`--repo_id` のサブディレクトリに保存するようにしました。これにより複数のモデルファイルがキャッシュされます。`--model_dir` 直下の不要なファイルは削除願います。 - 各学習スクリプトに、noise offset、ip noise gammaを、それぞれ 0~指定した値の範囲で変動させるオプション `--noise_offset_random_strength` および `--ip_noise_gamma_random_strength` が追加されました。 PR [#1177](https://github.com/kohya-ss/sd-scripts/pull/1177) KohakuBlueleaf 氏に感謝します。 -- データセット設定の[英語版ドキュメント](./docs/config_README-en.md) が追加されました。PR [#1175](https://github.com/kohya-ss/sd-scripts/pull/1175) darkstorm2150 氏に感謝します。 - 各学習スクリプトに、学習終了時に state を保存する `--save_state_on_train_end` オプションが追加されました。 PR [#1168](https://github.com/kohya-ss/sd-scripts/pull/1168) gesen2egee 氏に感謝します。 +- 各学習スクリプトで `--sample_every_n_epochs` および `--sample_every_n_steps` オプションに `0` 以下の数値を指定した時、警告を表示するとともにそれらを無視するよう変更しました。問題提起していただいた S-Del 氏に感謝します。 +- データセット設定の[英語版ドキュメント](./docs/config_README-en.md) が追加されました。PR [#1175](https://github.com/kohya-ss/sd-scripts/pull/1175) darkstorm2150 氏に感謝します。 ### Mar 15, 2024 / 2024/3/15: v0.8.5 From 381c44955e72f04b57c74aa9b3d9a43c839c631f Mon Sep 17 00:00:00 2001 From: Kohya S Date: Sun, 24 Mar 2024 11:27:18 +0900 Subject: [PATCH 44/69] update readme and typing hint --- README.md | 2 ++ library/train_util.py | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 25226dff..a19f7968 100644 --- a/README.md +++ b/README.md @@ -254,6 +254,7 @@ ControlNet-LLLite, a novel method for ControlNet with SDXL, is added. See [docum - Colab seems to stop with log output. Try specifying `--console_log_simple` option in the training script to disable rich logging. - The `.toml` file for the dataset config is now read in UTF-8 encoding. PR [#1167](https://github.com/kohya-ss/sd-scripts/pull/1167) Thanks to Horizon1704! +- Fixed a bug that the last subset settings are applied to all images when multiple subsets of regularization images are specified in the dataset settings. The settings for each subset are correctly applied to each image. PR [#1205](https://github.com/kohya-ss/sd-scripts/pull/1205) Thanks to feffy380! - `train_network.py` and `sdxl_train_network.py` are modified to record some dataset settings in the metadata of the trained model (`caption_prefix`, `caption_suffix`, `keep_tokens_separator`, `secondary_separator`, `enable_wildcard`). - Some features are added to the dataset subset settings. - `secondary_separator` is added to specify the tag separator that is not the target of shuffling or dropping. @@ -273,6 +274,7 @@ ControlNet-LLLite, a novel method for ControlNet with SDXL, is added. See [docum - Colab での動作時、ログ出力で停止してしまうようです。学習スクリプトに `--console_log_simple` オプションを指定し、rich のロギングを無効してお試しください。 - データセット設定の `.toml` ファイルが UTF-8 encoding で読み込まれるようになりました。PR [#1167](https://github.com/kohya-ss/sd-scripts/pull/1167) Horizon1704 氏に感謝します。 +- データセット設定で、正則化画像のサブセットを複数指定した時、最後のサブセットの各種設定がすべてのサブセットの画像に適用される不具合が修正されました。それぞれのサブセットの設定が、それぞれの画像に正しく適用されます。PR [#1205](https://github.com/kohya-ss/sd-scripts/pull/1205) feffy380 氏に感謝します。 - `train_network.py` および `sdxl_train_network.py` で、学習したモデルのメタデータに一部のデータセット設定が記録されるよう修正しました(`caption_prefix`、`caption_suffix`、`keep_tokens_separator`、`secondary_separator`、`enable_wildcard`)。 - データセットのサブセット設定にいくつかの機能を追加しました。 - シャッフルの対象とならないタグ分割識別子の指定 `secondary_separator` を追加しました。`secondary_separator=";;;"` のように指定します。`secondary_separator` で区切ることで、その部分はシャッフル、drop 時にまとめて扱われます。 diff --git a/library/train_util.py b/library/train_util.py index ce6e0924..99aeea90 100644 --- a/library/train_util.py +++ b/library/train_util.py @@ -1525,7 +1525,7 @@ class DreamBoothDataset(BaseDataset): logger.info("prepare images.") num_train_images = 0 num_reg_images = 0 - reg_infos: List[ImageInfo] = [] + reg_infos: List[Tuple[ImageInfo, DreamBoothSubset]] = [] for subset in subsets: if subset.num_repeats < 1: logger.warning( From ae97c8bfd18e4b51bdeae0a72753c8e9ceeff29d Mon Sep 17 00:00:00 2001 From: Kohaku-Blueleaf <59680068+KohakuBlueleaf@users.noreply.github.com> Date: Sun, 24 Mar 2024 14:40:18 +0800 Subject: [PATCH 45/69] [Experimental] Add cache mechanism for dataset groups to avoid long waiting time for initilization (#1178) * support meta cached dataset * add cache meta scripts * random ip_noise_gamma strength * random noise_offset strength * use correct settings for parser * cache path/caption/size only * revert mess up commit * revert mess up commit * Update requirements.txt * Add arguments for meta cache. * remove pickle implementation * Return sizes when enable cache --------- Co-authored-by: Kohya S <52813779+kohya-ss@users.noreply.github.com> --- cache_dataset_meta.py | 103 +++++++++++++++++++++++++++++++++++++++++ library/config_util.py | 4 ++ library/train_util.py | 83 ++++++++++++++++++++++++--------- requirements.txt | 2 + train_network.py | 3 +- 5 files changed, 173 insertions(+), 22 deletions(-) create mode 100644 cache_dataset_meta.py diff --git a/cache_dataset_meta.py b/cache_dataset_meta.py new file mode 100644 index 00000000..7e7d96d1 --- /dev/null +++ b/cache_dataset_meta.py @@ -0,0 +1,103 @@ +import argparse +import random + +from accelerate.utils import set_seed + +import library.train_util as train_util +import library.config_util as config_util +from library.config_util import ( + ConfigSanitizer, + BlueprintGenerator, +) +import library.custom_train_functions as custom_train_functions +from library.utils import setup_logging, add_logging_arguments + +setup_logging() +import logging + +logger = logging.getLogger(__name__) + + +def make_dataset(args): + train_util.prepare_dataset_args(args, True) + setup_logging(args, reset=True) + + use_dreambooth_method = args.in_json is None + use_user_config = args.dataset_config is not None + + if args.seed is None: + args.seed = random.randint(0, 2**32) + set_seed(args.seed) + + # データセットを準備する + if args.dataset_class is None: + blueprint_generator = BlueprintGenerator( + ConfigSanitizer(True, True, False, True) + ) + if use_user_config: + logger.info(f"Loading dataset config from {args.dataset_config}") + user_config = config_util.load_user_config(args.dataset_config) + ignored = ["train_data_dir", "reg_data_dir", "in_json"] + if any(getattr(args, attr) is not None for attr in ignored): + logger.warning( + "ignoring the following options because config file is found: {0} / 設定ファイルが利用されるため以下のオプションは無視されます: {0}".format( + ", ".join(ignored) + ) + ) + else: + if use_dreambooth_method: + logger.info("Using DreamBooth method.") + user_config = { + "datasets": [ + { + "subsets": config_util.generate_dreambooth_subsets_config_by_subdirs( + args.train_data_dir, args.reg_data_dir + ) + } + ] + } + else: + logger.info("Training with captions.") + user_config = { + "datasets": [ + { + "subsets": [ + { + "image_dir": args.train_data_dir, + "metadata_file": args.in_json, + } + ] + } + ] + } + + blueprint = blueprint_generator.generate(user_config, args, tokenizer=None) + train_dataset_group = config_util.generate_dataset_group_by_blueprint( + blueprint.dataset_group + ) + else: + # use arbitrary dataset class + train_dataset_group = train_util.load_arbitrary_dataset(args, tokenizer=None) + return train_dataset_group + + +def setup_parser() -> argparse.ArgumentParser: + parser = argparse.ArgumentParser() + add_logging_arguments(parser) + train_util.add_dataset_arguments(parser, True, True, True) + train_util.add_training_arguments(parser, True) + config_util.add_config_arguments(parser) + custom_train_functions.add_custom_train_arguments(parser) + return parser + + +if __name__ == "__main__": + parser = setup_parser() + + args, unknown = parser.parse_known_args() + args = train_util.read_config_from_file(args, parser) + if args.max_token_length is None: + args.max_token_length = 75 + args.cache_meta = True + + dataset_group = make_dataset(args) diff --git a/library/config_util.py b/library/config_util.py index eb652ecf..58ffa5f4 100644 --- a/library/config_util.py +++ b/library/config_util.py @@ -111,6 +111,8 @@ class DreamBoothDatasetParams(BaseDatasetParams): bucket_reso_steps: int = 64 bucket_no_upscale: bool = False prior_loss_weight: float = 1.0 + cache_meta: bool = False + use_cached_meta: bool = False @dataclass @@ -228,6 +230,8 @@ class ConfigSanitizer: "min_bucket_reso": int, "resolution": functools.partial(__validate_and_convert_scalar_or_twodim.__func__, int), "network_multiplier": float, + "cache_meta": bool, + "use_cached_meta": bool, } # options handled by argparse but not handled by user config diff --git a/library/train_util.py b/library/train_util.py index 99aeea90..58c0cc14 100644 --- a/library/train_util.py +++ b/library/train_util.py @@ -63,6 +63,7 @@ from library.original_unet import UNet2DConditionModel from huggingface_hub import hf_hub_download import numpy as np from PIL import Image +import imagesize import cv2 import safetensors.torch from library.lpw_stable_diffusion import StableDiffusionLongPromptWeightingPipeline @@ -1080,8 +1081,7 @@ class BaseDataset(torch.utils.data.Dataset): ) def get_image_size(self, image_path): - image = Image.open(image_path) - return image.size + return imagesize.get(image_path) def load_image_with_face_info(self, subset: BaseSubset, image_path: str): img = load_image(image_path) @@ -1425,6 +1425,8 @@ class DreamBoothDataset(BaseDataset): bucket_no_upscale: bool, prior_loss_weight: float, debug_dataset: bool, + cache_meta: bool, + use_cached_meta: bool, ) -> None: super().__init__(tokenizer, max_token_length, resolution, network_multiplier, debug_dataset) @@ -1484,26 +1486,43 @@ class DreamBoothDataset(BaseDataset): logger.warning(f"not directory: {subset.image_dir}") return [], [] - img_paths = glob_images(subset.image_dir, "*") + sizes = None + if use_cached_meta: + logger.info(f"using cached metadata: {subset.image_dir}/dataset.txt") + # [img_path, caption, resolution] + with open(f"{subset.image_dir}/dataset.txt", "r", encoding="utf-8") as f: + metas = f.readlines() + metas = [x.strip().split("<|##|>") for x in metas] + sizes = [tuple(int(res) for res in x[2].split(" ")) for x in metas] + + if use_cached_meta: + img_paths = [x[0] for x in metas] + else: + img_paths = glob_images(subset.image_dir, "*") + sizes = [None]*len(img_paths) logger.info(f"found directory {subset.image_dir} contains {len(img_paths)} image files") - # 画像ファイルごとにプロンプトを読み込み、もしあればそちらを使う - captions = [] - missing_captions = [] - for img_path in img_paths: - cap_for_img = read_caption(img_path, subset.caption_extension, subset.enable_wildcard) - if cap_for_img is None and subset.class_tokens is None: - logger.warning( - f"neither caption file nor class tokens are found. use empty caption for {img_path} / キャプションファイルもclass tokenも見つかりませんでした。空のキャプションを使用します: {img_path}" - ) - captions.append("") - missing_captions.append(img_path) - else: - if cap_for_img is None: - captions.append(subset.class_tokens) + if use_cached_meta: + captions = [x[1] for x in metas] + missing_captions = [x[0] for x in metas if x[1] == ""] + else: + # 画像ファイルごとにプロンプトを読み込み、もしあればそちらを使う + captions = [] + missing_captions = [] + for img_path in img_paths: + cap_for_img = read_caption(img_path, subset.caption_extension, subset.enable_wildcard) + if cap_for_img is None and subset.class_tokens is None: + logger.warning( + f"neither caption file nor class tokens are found. use empty caption for {img_path} / キャプションファイルもclass tokenも見つかりませんでした。空のキャプションを使用します: {img_path}" + ) + captions.append("") missing_captions.append(img_path) else: - captions.append(cap_for_img) + if cap_for_img is None: + captions.append(subset.class_tokens) + missing_captions.append(img_path) + else: + captions.append(cap_for_img) self.set_tag_frequency(os.path.basename(subset.image_dir), captions) # タグ頻度を記録 @@ -1520,7 +1539,21 @@ class DreamBoothDataset(BaseDataset): logger.warning(missing_caption + f"... and {remaining_missing_captions} more") break logger.warning(missing_caption) - return img_paths, captions + + if cache_meta: + logger.info(f"cache metadata for {subset.image_dir}") + if sizes is None or sizes[0] is None: + sizes = [self.get_image_size(img_path) for img_path in img_paths] + # [img_path, caption, resolution] + data = [ + (img_path, caption, " ".join(str(x) for x in size)) + for img_path, caption, size in zip(img_paths, captions, sizes) + ] + with open(f"{subset.image_dir}/dataset.txt", "w", encoding="utf-8") as f: + f.write("\n".join(["<|##|>".join(x) for x in data])) + logger.info(f"cache metadata done for {subset.image_dir}") + + return img_paths, captions, sizes logger.info("prepare images.") num_train_images = 0 @@ -1539,7 +1572,7 @@ class DreamBoothDataset(BaseDataset): ) continue - img_paths, captions = load_dreambooth_dir(subset) + img_paths, captions, sizes = load_dreambooth_dir(subset) if len(img_paths) < 1: logger.warning( f"ignore subset with image_dir='{subset.image_dir}': no images found / 画像が見つからないためサブセットを無視します" @@ -1551,8 +1584,10 @@ class DreamBoothDataset(BaseDataset): else: num_train_images += subset.num_repeats * len(img_paths) - for img_path, caption in zip(img_paths, captions): + for img_path, caption, size in zip(img_paths, captions, sizes): info = ImageInfo(img_path, subset.num_repeats, caption, subset.is_reg, img_path) + if size is not None: + info.image_size = size if subset.is_reg: reg_infos.append((info, subset)) else: @@ -3355,6 +3390,12 @@ def add_dataset_arguments( parser: argparse.ArgumentParser, support_dreambooth: bool, support_caption: bool, support_caption_dropout: bool ): # dataset common + parser.add_argument( + "--cache_meta", action="store_true" + ) + parser.add_argument( + "--use_cached_meta", action="store_true" + ) parser.add_argument( "--train_data_dir", type=str, default=None, help="directory for train images / 学習画像データのディレクトリ" ) diff --git a/requirements.txt b/requirements.txt index 805f0501..c7aeb689 100644 --- a/requirements.txt +++ b/requirements.txt @@ -15,6 +15,8 @@ easygui==0.98.3 toml==0.10.2 voluptuous==0.13.1 huggingface-hub==0.20.1 +# for Image utils +imagesize==1.4.1 # for BLIP captioning # requests==2.28.2 # timm==0.6.12 diff --git a/train_network.py b/train_network.py index 9e573d9f..b42daba7 100644 --- a/train_network.py +++ b/train_network.py @@ -6,6 +6,7 @@ import sys import random import time import json +import pickle from multiprocessing import Value import toml @@ -23,7 +24,7 @@ from library import model_util import library.train_util as train_util from library.train_util import ( - DreamBoothDataset, + DreamBoothDataset, DatasetGroup ) import library.config_util as config_util from library.config_util import ( From 025347214d761d63c5475fec83e11856f3cdbe9d Mon Sep 17 00:00:00 2001 From: Kohya S Date: Sun, 24 Mar 2024 18:09:32 +0900 Subject: [PATCH 46/69] refactor metadata caching for DreamBooth dataset --- cache_dataset_meta.py | 103 --------------------------------------- docs/config_README-en.md | 4 ++ docs/config_README-ja.md | 4 ++ library/config_util.py | 39 +++++++++------ library/train_util.py | 86 ++++++++++++++++++-------------- train_network.py | 8 +-- 6 files changed, 85 insertions(+), 159 deletions(-) delete mode 100644 cache_dataset_meta.py diff --git a/cache_dataset_meta.py b/cache_dataset_meta.py deleted file mode 100644 index 7e7d96d1..00000000 --- a/cache_dataset_meta.py +++ /dev/null @@ -1,103 +0,0 @@ -import argparse -import random - -from accelerate.utils import set_seed - -import library.train_util as train_util -import library.config_util as config_util -from library.config_util import ( - ConfigSanitizer, - BlueprintGenerator, -) -import library.custom_train_functions as custom_train_functions -from library.utils import setup_logging, add_logging_arguments - -setup_logging() -import logging - -logger = logging.getLogger(__name__) - - -def make_dataset(args): - train_util.prepare_dataset_args(args, True) - setup_logging(args, reset=True) - - use_dreambooth_method = args.in_json is None - use_user_config = args.dataset_config is not None - - if args.seed is None: - args.seed = random.randint(0, 2**32) - set_seed(args.seed) - - # データセットを準備する - if args.dataset_class is None: - blueprint_generator = BlueprintGenerator( - ConfigSanitizer(True, True, False, True) - ) - if use_user_config: - logger.info(f"Loading dataset config from {args.dataset_config}") - user_config = config_util.load_user_config(args.dataset_config) - ignored = ["train_data_dir", "reg_data_dir", "in_json"] - if any(getattr(args, attr) is not None for attr in ignored): - logger.warning( - "ignoring the following options because config file is found: {0} / 設定ファイルが利用されるため以下のオプションは無視されます: {0}".format( - ", ".join(ignored) - ) - ) - else: - if use_dreambooth_method: - logger.info("Using DreamBooth method.") - user_config = { - "datasets": [ - { - "subsets": config_util.generate_dreambooth_subsets_config_by_subdirs( - args.train_data_dir, args.reg_data_dir - ) - } - ] - } - else: - logger.info("Training with captions.") - user_config = { - "datasets": [ - { - "subsets": [ - { - "image_dir": args.train_data_dir, - "metadata_file": args.in_json, - } - ] - } - ] - } - - blueprint = blueprint_generator.generate(user_config, args, tokenizer=None) - train_dataset_group = config_util.generate_dataset_group_by_blueprint( - blueprint.dataset_group - ) - else: - # use arbitrary dataset class - train_dataset_group = train_util.load_arbitrary_dataset(args, tokenizer=None) - return train_dataset_group - - -def setup_parser() -> argparse.ArgumentParser: - parser = argparse.ArgumentParser() - add_logging_arguments(parser) - train_util.add_dataset_arguments(parser, True, True, True) - train_util.add_training_arguments(parser, True) - config_util.add_config_arguments(parser) - custom_train_functions.add_custom_train_arguments(parser) - return parser - - -if __name__ == "__main__": - parser = setup_parser() - - args, unknown = parser.parse_known_args() - args = train_util.read_config_from_file(args, parser) - if args.max_token_length is None: - args.max_token_length = 75 - args.cache_meta = True - - dataset_group = make_dataset(args) diff --git a/docs/config_README-en.md b/docs/config_README-en.md index e99fde21..83bea329 100644 --- a/docs/config_README-en.md +++ b/docs/config_README-en.md @@ -177,6 +177,7 @@ Options related to the configuration of DreamBooth subsets. | `image_dir` | `'C:\hoge'` | - | - | o (required) | | `caption_extension` | `".txt"` | o | o | o | | `class_tokens` | `"sks girl"` | - | - | o | +| `cache_info` | `false` | o | o | o | | `is_reg` | `false` | - | - | o | Firstly, note that for `image_dir`, the path to the image files must be specified as being directly in the directory. Unlike the previous DreamBooth method, where images had to be placed in subdirectories, this is not compatible with that specification. Also, even if you name the folder something like "5_cat", the number of repeats of the image and the class name will not be reflected. If you want to set these individually, you will need to explicitly specify them using `num_repeats` and `class_tokens`. @@ -187,6 +188,9 @@ Firstly, note that for `image_dir`, the path to the image files must be specifie * `class_tokens` * Sets the class tokens. * Only used during training when a corresponding caption file does not exist. The determination of whether or not to use it is made on a per-image basis. If `class_tokens` is not specified and a caption file is not found, an error will occur. +* `cache_info` + * Specifies whether to cache the image size and caption. If not specified, it is set to `false`. The cache is saved in `metadata_cache.json` in `image_dir`. + * Caching speeds up the loading of the dataset after the first time. It is effective when dealing with thousands of images or more. * `is_reg` * Specifies whether the subset images are for normalization. If not specified, it is set to `false`, meaning that the images are not for normalization. diff --git a/docs/config_README-ja.md b/docs/config_README-ja.md index b57ae86a..cc74c341 100644 --- a/docs/config_README-ja.md +++ b/docs/config_README-ja.md @@ -173,6 +173,7 @@ DreamBooth 方式のサブセットの設定に関わるオプションです。 | `image_dir` | `‘C:\hoge’` | - | - | o(必須) | | `caption_extension` | `".txt"` | o | o | o | | `class_tokens` | `“sks girl”` | - | - | o | +| `cache_info` | `false` | o | o | o | | `is_reg` | `false` | - | - | o | まず注意点として、 `image_dir` には画像ファイルが直下に置かれているパスを指定する必要があります。従来の DreamBooth の手法ではサブディレクトリに画像を置く必要がありましたが、そちらとは仕様に互換性がありません。また、`5_cat` のようなフォルダ名にしても、画像の繰り返し回数とクラス名は反映されません。これらを個別に設定したい場合、`num_repeats` と `class_tokens` で明示的に指定する必要があることに注意してください。 @@ -183,6 +184,9 @@ DreamBooth 方式のサブセットの設定に関わるオプションです。 * `class_tokens` * クラストークンを設定します。 * 画像に対応する caption ファイルが存在しない場合にのみ学習時に利用されます。利用するかどうかの判定は画像ごとに行います。`class_tokens` を指定しなかった場合に caption ファイルも見つからなかった場合にはエラーになります。 +* `cache_info` + * 画像サイズ、キャプションをキャッシュするかどうかを指定します。指定しなかった場合は `false` になります。キャッシュは `image_dir` に `metadata_cache.json` というファイル名で保存されます。 + * キャッシュを行うと、二回目以降のデータセット読み込みが高速化されます。数千枚以上の画像を扱う場合には有効です。 * `is_reg` * サブセットの画像が正規化用かどうかを指定します。指定しなかった場合は `false` として、つまり正規化画像ではないとして扱います。 diff --git a/library/config_util.py b/library/config_util.py index 58ffa5f4..e52b7fc0 100644 --- a/library/config_util.py +++ b/library/config_util.py @@ -41,12 +41,17 @@ from .train_util import ( DatasetGroup, ) from .utils import setup_logging + setup_logging() import logging + logger = logging.getLogger(__name__) + def add_config_arguments(parser: argparse.ArgumentParser): - parser.add_argument("--dataset_config", type=Path, default=None, help="config file for detail settings / 詳細な設定用の設定ファイル") + parser.add_argument( + "--dataset_config", type=Path, default=None, help="config file for detail settings / 詳細な設定用の設定ファイル" + ) # TODO: inherit Params class in Subset, Dataset @@ -80,6 +85,7 @@ class DreamBoothSubsetParams(BaseSubsetParams): is_reg: bool = False class_tokens: Optional[str] = None caption_extension: str = ".caption" + cache_info: bool = False @dataclass @@ -91,6 +97,7 @@ class FineTuningSubsetParams(BaseSubsetParams): class ControlNetSubsetParams(BaseSubsetParams): conditioning_data_dir: str = None caption_extension: str = ".caption" + cache_info: bool = False @dataclass @@ -111,8 +118,6 @@ class DreamBoothDatasetParams(BaseDatasetParams): bucket_reso_steps: int = 64 bucket_no_upscale: bool = False prior_loss_weight: float = 1.0 - cache_meta: bool = False - use_cached_meta: bool = False @dataclass @@ -202,6 +207,7 @@ class ConfigSanitizer: DB_SUBSET_ASCENDABLE_SCHEMA = { "caption_extension": str, "class_tokens": str, + "cache_info": bool, } DB_SUBSET_DISTINCT_SCHEMA = { Required("image_dir"): str, @@ -214,6 +220,7 @@ class ConfigSanitizer: } CN_SUBSET_ASCENDABLE_SCHEMA = { "caption_extension": str, + "cache_info": bool, } CN_SUBSET_DISTINCT_SCHEMA = { Required("image_dir"): str, @@ -230,8 +237,6 @@ class ConfigSanitizer: "min_bucket_reso": int, "resolution": functools.partial(__validate_and_convert_scalar_or_twodim.__func__, int), "network_multiplier": float, - "cache_meta": bool, - "use_cached_meta": bool, } # options handled by argparse but not handled by user config @@ -366,7 +371,9 @@ class ConfigSanitizer: return self.argparse_config_validator(argparse_namespace) except MultipleInvalid: # XXX: this should be a bug - logger.error("Invalid cmdline parsed arguments. This should be a bug. / コマンドラインのパース結果が正しくないようです。プログラムのバグの可能性が高いです。") + logger.error( + "Invalid cmdline parsed arguments. This should be a bug. / コマンドラインのパース結果が正しくないようです。プログラムのバグの可能性が高いです。" + ) raise # NOTE: value would be overwritten by latter dict if there is already the same key @@ -551,11 +558,11 @@ def generate_dataset_group_by_blueprint(dataset_group_blueprint: DatasetGroupBlu " ", ) - logger.info(f'{info}') + logger.info(f"{info}") # make buckets first because it determines the length of dataset # and set the same seed for all datasets - seed = random.randint(0, 2**31) # actual seed is seed + epoch_no + seed = random.randint(0, 2**31) # actual seed is seed + epoch_no for i, dataset in enumerate(datasets): logger.info(f"[Dataset {i}]") dataset.make_buckets() @@ -642,13 +649,17 @@ def load_user_config(file: str) -> dict: with open(file, "r") as f: config = json.load(f) except Exception: - logger.error(f"Error on parsing JSON config file. Please check the format. / JSON 形式の設定ファイルの読み込みに失敗しました。文法が正しいか確認してください。: {file}") + logger.error( + f"Error on parsing JSON config file. Please check the format. / JSON 形式の設定ファイルの読み込みに失敗しました。文法が正しいか確認してください。: {file}" + ) raise elif file.name.lower().endswith(".toml"): try: config = toml.load(file) except Exception: - logger.error(f"Error on parsing TOML config file. Please check the format. / TOML 形式の設定ファイルの読み込みに失敗しました。文法が正しいか確認してください。: {file}") + logger.error( + f"Error on parsing TOML config file. Please check the format. / TOML 形式の設定ファイルの読み込みに失敗しました。文法が正しいか確認してください。: {file}" + ) raise else: raise ValueError(f"not supported config file format / 対応していない設定ファイルの形式です: {file}") @@ -675,13 +686,13 @@ if __name__ == "__main__": train_util.prepare_dataset_args(argparse_namespace, config_args.support_finetuning) logger.info("[argparse_namespace]") - logger.info(f'{vars(argparse_namespace)}') + logger.info(f"{vars(argparse_namespace)}") user_config = load_user_config(config_args.dataset_config) logger.info("") logger.info("[user_config]") - logger.info(f'{user_config}') + logger.info(f"{user_config}") sanitizer = ConfigSanitizer( config_args.support_dreambooth, config_args.support_finetuning, config_args.support_controlnet, config_args.support_dropout @@ -690,10 +701,10 @@ if __name__ == "__main__": logger.info("") logger.info("[sanitized_user_config]") - logger.info(f'{sanitized_user_config}') + logger.info(f"{sanitized_user_config}") blueprint = BlueprintGenerator(sanitizer).generate(user_config, argparse_namespace) logger.info("") logger.info("[blueprint]") - logger.info(f'{blueprint}') + logger.info(f"{blueprint}") diff --git a/library/train_util.py b/library/train_util.py index 58c0cc14..743a1147 100644 --- a/library/train_util.py +++ b/library/train_util.py @@ -410,6 +410,7 @@ class DreamBoothSubset(BaseSubset): is_reg: bool, class_tokens: Optional[str], caption_extension: str, + cache_info: bool, num_repeats, shuffle_caption, caption_separator: str, @@ -458,6 +459,7 @@ class DreamBoothSubset(BaseSubset): self.caption_extension = caption_extension if self.caption_extension and not self.caption_extension.startswith("."): self.caption_extension = "." + self.caption_extension + self.cache_info = cache_info def __eq__(self, other) -> bool: if not isinstance(other, DreamBoothSubset): @@ -527,6 +529,7 @@ class ControlNetSubset(BaseSubset): image_dir: str, conditioning_data_dir: str, caption_extension: str, + cache_info: bool, num_repeats, shuffle_caption, caption_separator, @@ -574,6 +577,7 @@ class ControlNetSubset(BaseSubset): self.caption_extension = caption_extension if self.caption_extension and not self.caption_extension.startswith("."): self.caption_extension = "." + self.caption_extension + self.cache_info = cache_info def __eq__(self, other) -> bool: if not isinstance(other, ControlNetSubset): @@ -1410,6 +1414,8 @@ class BaseDataset(torch.utils.data.Dataset): class DreamBoothDataset(BaseDataset): + IMAGE_INFO_CACHE_FILE = "metadata_cache.json" + def __init__( self, subsets: Sequence[DreamBoothSubset], @@ -1425,8 +1431,6 @@ class DreamBoothDataset(BaseDataset): bucket_no_upscale: bool, prior_loss_weight: float, debug_dataset: bool, - cache_meta: bool, - use_cached_meta: bool, ) -> None: super().__init__(tokenizer, max_token_length, resolution, network_multiplier, debug_dataset) @@ -1486,25 +1490,36 @@ class DreamBoothDataset(BaseDataset): logger.warning(f"not directory: {subset.image_dir}") return [], [] - sizes = None - if use_cached_meta: - logger.info(f"using cached metadata: {subset.image_dir}/dataset.txt") - # [img_path, caption, resolution] - with open(f"{subset.image_dir}/dataset.txt", "r", encoding="utf-8") as f: - metas = f.readlines() - metas = [x.strip().split("<|##|>") for x in metas] - sizes = [tuple(int(res) for res in x[2].split(" ")) for x in metas] - - if use_cached_meta: - img_paths = [x[0] for x in metas] + info_cache_file = os.path.join(subset.image_dir, self.IMAGE_INFO_CACHE_FILE) + use_cached_info_for_subset = subset.cache_info + if use_cached_info_for_subset: + logger.info( + f"using cached image info for this subset / このサブセットで、キャッシュされた画像情報を使います: {info_cache_file}" + ) + if not os.path.isfile(info_cache_file): + logger.warning( + f"image info file not found. You can ignore this warning if this is the first time to use this subset" + + " / キャッシュファイルが見つかりませんでした。初回実行時はこの警告を無視してください: {metadata_file}" + ) + use_cached_info_for_subset = False + + if use_cached_info_for_subset: + # json: {`img_path`:{"caption": "caption...", "resolution": [width, height]}, ...} + with open(info_cache_file, "r", encoding="utf-8") as f: + metas = json.load(f) + img_paths = list(metas.keys()) + sizes = [meta["resolution"] for meta in metas.values()] + + # we may need to check image size and existence of image files, but it takes time, so user should check it before training else: img_paths = glob_images(subset.image_dir, "*") - sizes = [None]*len(img_paths) + sizes = [None] * len(img_paths) + logger.info(f"found directory {subset.image_dir} contains {len(img_paths)} image files") - if use_cached_meta: - captions = [x[1] for x in metas] - missing_captions = [x[0] for x in metas if x[1] == ""] + if use_cached_info_for_subset: + captions = [meta["caption"] for meta in metas.values()] + missing_captions = [img_path for img_path, caption in zip(img_paths, captions) if caption is None or caption == ""] else: # 画像ファイルごとにプロンプトを読み込み、もしあればそちらを使う captions = [] @@ -1540,19 +1555,17 @@ class DreamBoothDataset(BaseDataset): break logger.warning(missing_caption) - if cache_meta: - logger.info(f"cache metadata for {subset.image_dir}") - if sizes is None or sizes[0] is None: - sizes = [self.get_image_size(img_path) for img_path in img_paths] - # [img_path, caption, resolution] - data = [ - (img_path, caption, " ".join(str(x) for x in size)) - for img_path, caption, size in zip(img_paths, captions, sizes) - ] - with open(f"{subset.image_dir}/dataset.txt", "w", encoding="utf-8") as f: - f.write("\n".join(["<|##|>".join(x) for x in data])) - logger.info(f"cache metadata done for {subset.image_dir}") + if not use_cached_info_for_subset and subset.cache_info: + logger.info(f"cache image info for / 画像情報をキャッシュします : {info_cache_file}") + sizes = [self.get_image_size(img_path) for img_path in tqdm(img_paths, desc="get image size")] + matas = {} + for img_path, caption, size in zip(img_paths, captions, sizes): + matas[img_path] = {"caption": caption, "resolution": list(size)} + with open(info_cache_file, "w", encoding="utf-8") as f: + json.dump(matas, f, ensure_ascii=False, indent=2) + logger.info(f"cache image info done for / 画像情報を出力しました : {info_cache_file}") + # if sizes are not set, image size will be read in make_buckets return img_paths, captions, sizes logger.info("prepare images.") @@ -1873,7 +1886,8 @@ class ControlNetDataset(BaseDataset): subset.image_dir, False, None, - subset.caption_extension, + subset.caption_extension, + subset.cache_info, subset.num_repeats, subset.shuffle_caption, subset.caption_separator, @@ -3390,15 +3404,15 @@ def add_dataset_arguments( parser: argparse.ArgumentParser, support_dreambooth: bool, support_caption: bool, support_caption_dropout: bool ): # dataset common - parser.add_argument( - "--cache_meta", action="store_true" - ) - parser.add_argument( - "--use_cached_meta", action="store_true" - ) parser.add_argument( "--train_data_dir", type=str, default=None, help="directory for train images / 学習画像データのディレクトリ" ) + parser.add_argument( + "--cache_info", + action="store_true", + help="cache meta information (caption and image size) for faster dataset loading. only available for DreamBooth" + + " / メタ情報(キャプションとサイズ)をキャッシュしてデータセット読み込みを高速化する。DreamBooth方式のみ有効", + ) parser.add_argument( "--shuffle_caption", action="store_true", help="shuffle separated caption / 区切られたcaptionの各要素をshuffleする" ) diff --git a/train_network.py b/train_network.py index b42daba7..7ae9283c 100644 --- a/train_network.py +++ b/train_network.py @@ -6,7 +6,6 @@ import sys import random import time import json -import pickle from multiprocessing import Value import toml @@ -14,18 +13,15 @@ from tqdm import tqdm import torch from library.device_utils import init_ipex, clean_memory_on_device -init_ipex() -from torch.nn.parallel import DistributedDataParallel as DDP +init_ipex() from accelerate.utils import set_seed from diffusers import DDPMScheduler from library import model_util import library.train_util as train_util -from library.train_util import ( - DreamBoothDataset, DatasetGroup -) +from library.train_util import DreamBoothDataset import library.config_util as config_util from library.config_util import ( ConfigSanitizer, From 1648ade6da549c7def2e21f236453e7938c499cd Mon Sep 17 00:00:00 2001 From: Kohya S Date: Sun, 24 Mar 2024 20:55:48 +0900 Subject: [PATCH 47/69] format by black --- library/config_util.py | 31 +++++++++++++++++++++---------- 1 file changed, 21 insertions(+), 10 deletions(-) diff --git a/library/config_util.py b/library/config_util.py index eb652ecf..ff4de092 100644 --- a/library/config_util.py +++ b/library/config_util.py @@ -41,12 +41,17 @@ from .train_util import ( DatasetGroup, ) from .utils import setup_logging + setup_logging() import logging + logger = logging.getLogger(__name__) + def add_config_arguments(parser: argparse.ArgumentParser): - parser.add_argument("--dataset_config", type=Path, default=None, help="config file for detail settings / 詳細な設定用の設定ファイル") + parser.add_argument( + "--dataset_config", type=Path, default=None, help="config file for detail settings / 詳細な設定用の設定ファイル" + ) # TODO: inherit Params class in Subset, Dataset @@ -362,7 +367,9 @@ class ConfigSanitizer: return self.argparse_config_validator(argparse_namespace) except MultipleInvalid: # XXX: this should be a bug - logger.error("Invalid cmdline parsed arguments. This should be a bug. / コマンドラインのパース結果が正しくないようです。プログラムのバグの可能性が高いです。") + logger.error( + "Invalid cmdline parsed arguments. This should be a bug. / コマンドラインのパース結果が正しくないようです。プログラムのバグの可能性が高いです。" + ) raise # NOTE: value would be overwritten by latter dict if there is already the same key @@ -547,11 +554,11 @@ def generate_dataset_group_by_blueprint(dataset_group_blueprint: DatasetGroupBlu " ", ) - logger.info(f'{info}') + logger.info(f"{info}") # make buckets first because it determines the length of dataset # and set the same seed for all datasets - seed = random.randint(0, 2**31) # actual seed is seed + epoch_no + seed = random.randint(0, 2**31) # actual seed is seed + epoch_no for i, dataset in enumerate(datasets): logger.info(f"[Dataset {i}]") dataset.make_buckets() @@ -638,13 +645,17 @@ def load_user_config(file: str) -> dict: with open(file, "r") as f: config = json.load(f) except Exception: - logger.error(f"Error on parsing JSON config file. Please check the format. / JSON 形式の設定ファイルの読み込みに失敗しました。文法が正しいか確認してください。: {file}") + logger.error( + f"Error on parsing JSON config file. Please check the format. / JSON 形式の設定ファイルの読み込みに失敗しました。文法が正しいか確認してください。: {file}" + ) raise elif file.name.lower().endswith(".toml"): try: config = toml.load(file) except Exception: - logger.error(f"Error on parsing TOML config file. Please check the format. / TOML 形式の設定ファイルの読み込みに失敗しました。文法が正しいか確認してください。: {file}") + logger.error( + f"Error on parsing TOML config file. Please check the format. / TOML 形式の設定ファイルの読み込みに失敗しました。文法が正しいか確認してください。: {file}" + ) raise else: raise ValueError(f"not supported config file format / 対応していない設定ファイルの形式です: {file}") @@ -671,13 +682,13 @@ if __name__ == "__main__": train_util.prepare_dataset_args(argparse_namespace, config_args.support_finetuning) logger.info("[argparse_namespace]") - logger.info(f'{vars(argparse_namespace)}') + logger.info(f"{vars(argparse_namespace)}") user_config = load_user_config(config_args.dataset_config) logger.info("") logger.info("[user_config]") - logger.info(f'{user_config}') + logger.info(f"{user_config}") sanitizer = ConfigSanitizer( config_args.support_dreambooth, config_args.support_finetuning, config_args.support_controlnet, config_args.support_dropout @@ -686,10 +697,10 @@ if __name__ == "__main__": logger.info("") logger.info("[sanitized_user_config]") - logger.info(f'{sanitized_user_config}') + logger.info(f"{sanitized_user_config}") blueprint = BlueprintGenerator(sanitizer).generate(user_config, argparse_namespace) logger.info("") logger.info("[blueprint]") - logger.info(f'{blueprint}') + logger.info(f"{blueprint}") From 9bbb28c3619a9ff86a51bdc7ea83584976840663 Mon Sep 17 00:00:00 2001 From: Kohya S Date: Sun, 24 Mar 2024 22:06:37 +0900 Subject: [PATCH 48/69] update PyTorch version and reorganize dependencies --- README-ja.md | 55 ++---------- README.md | 149 ++++---------------------------- docs/train_SDXL-en.md | 84 ++++++++++++++++++ requirements.txt | 8 +- sdxl_minimal_inference.py | 30 +++++-- sdxl_train_textual_inversion.py | 1 - 6 files changed, 136 insertions(+), 191 deletions(-) create mode 100644 docs/train_SDXL-en.md diff --git a/README-ja.md b/README-ja.md index 29c33a65..1d83c44f 100644 --- a/README-ja.md +++ b/README-ja.md @@ -1,7 +1,3 @@ -SDXLがサポートされました。sdxlブランチはmainブランチにマージされました。リポジトリを更新したときにはUpgradeの手順を実行してください。また accelerate のバージョンが上がっていますので、accelerate config を再度実行してください。 - -SDXL学習については[こちら](./README.md#sdxl-training)をご覧ください(英語です)。 - ## リポジトリについて Stable Diffusionの学習、画像生成、その他のスクリプトを入れたリポジトリです。 @@ -21,6 +17,7 @@ GUIやPowerShellスクリプトなど、より使いやすくする機能が[bma * [学習について、共通編](./docs/train_README-ja.md) : データ整備やオプションなど * [データセット設定](./docs/config_README-ja.md) +* [SDXL学習](./docs/train_SDXL-en.md) (英語版) * [DreamBoothの学習について](./docs/train_db_README-ja.md) * [fine-tuningのガイド](./docs/fine_tune_README_ja.md): * [LoRAの学習について](./docs/train_network_README-ja.md) @@ -44,9 +41,7 @@ PowerShellを使う場合、venvを使えるようにするためには以下の ## Windows環境でのインストール -スクリプトはPyTorch 2.0.1でテストしています。PyTorch 1.12.1でも動作すると思われます。 - -以下の例ではPyTorchは2.0.1/CUDA 11.8版をインストールします。CUDA 11.6版やPyTorch 1.12.1を使う場合は適宜書き換えください。 +スクリプトはPyTorch 2.1.1でテストしています。PyTorch 2.0.1、1.12.1でも動作すると思われます。 (なお、python -m venv~の行で「python」とだけ表示された場合、py -m venv~のようにpythonをpyに変更してください。) @@ -59,21 +54,21 @@ cd sd-scripts python -m venv venv .\venv\Scripts\activate -pip install torch==2.0.1+cu118 torchvision==0.15.2+cu118 --index-url https://download.pytorch.org/whl/cu118 +pip install torch==2.1.1 torchvision==0.16.1 --index-url https://download.pytorch.org/whl/cu118 pip install --upgrade -r requirements.txt -pip install xformers==0.0.20 +pip install xformers==0.0.23 --index-url https://download.pytorch.org/whl/cu118 accelerate config ``` コマンドプロンプトでも同一です。 -(注:``python -m venv venv`` のほうが ``python -m venv --system-site-packages venv`` より安全そうなため書き換えました。globalなpythonにパッケージがインストールしてあると、後者だといろいろと問題が起きます。) +注:`bitsandbytes==0.43.0`、`prodigyopt==1.0`、`lion-pytorch==0.0.6` は `requirements.txt` に含まれるようになりました。他のバージョンを使う場合は適宜インストールしてください。 + +この例では PyTorch および xfomers は2.1.1/CUDA 11.8版をインストールします。CUDA 12.1版やPyTorch 1.12.1を使う場合は適宜書き換えください。たとえば CUDA 12.1版の場合は `pip install torch==2.1.1 torchvision==0.16.1 --index-url https://download.pytorch.org/whl/cu121` および `pip install xformers==0.0.23 --index-url https://download.pytorch.org/whl/cu121` としてください。 accelerate configの質問には以下のように答えてください。(bf16で学習する場合、最後の質問にはbf16と答えてください。) -※0.15.0から日本語環境では選択のためにカーソルキーを押すと落ちます(……)。数字キーの0、1、2……で選択できますので、そちらを使ってください。 - ```txt - This machine - No distributed training @@ -87,41 +82,6 @@ accelerate configの質問には以下のように答えてください。(bf1 ※場合によって ``ValueError: fp16 mixed precision requires a GPU`` というエラーが出ることがあるようです。この場合、6番目の質問( ``What GPU(s) (by id) should be used for training on this machine as a comma-separated list? [all]:``)に「0」と答えてください。(id `0`のGPUが使われます。) -### オプション:`bitsandbytes`(8bit optimizer)を使う - -`bitsandbytes`はオプションになりました。Linuxでは通常通りpipでインストールできます(0.41.1または以降のバージョンを推奨)。 - -Windowsでは0.35.0または0.41.1を推奨します。 - -- `bitsandbytes` 0.35.0: 安定しているとみられるバージョンです。AdamW8bitは使用できますが、他のいくつかの8bit optimizer、学習時の`full_bf16`オプションは使用できません。 -- `bitsandbytes` 0.41.1: Lion8bit、PagedAdamW8bit、PagedLion8bitをサポートします。`full_bf16`が使用できます。 - -注:`bitsandbytes` 0.35.0から0.41.0までのバージョンには問題があるようです。 https://github.com/TimDettmers/bitsandbytes/issues/659 - -以下の手順に従い、`bitsandbytes`をインストールしてください。 - -### 0.35.0を使う場合 - -PowerShellの例です。コマンドプロンプトではcpの代わりにcopyを使ってください。 - -```powershell -cd sd-scripts -.\venv\Scripts\activate -pip install bitsandbytes==0.35.0 - -cp .\bitsandbytes_windows\*.dll .\venv\Lib\site-packages\bitsandbytes\ -cp .\bitsandbytes_windows\cextension.py .\venv\Lib\site-packages\bitsandbytes\cextension.py -cp .\bitsandbytes_windows\main.py .\venv\Lib\site-packages\bitsandbytes\cuda_setup\main.py -``` - -### 0.41.1を使う場合 - -jllllll氏の配布されている[こちら](https://github.com/jllllll/bitsandbytes-windows-webui) または他の場所から、Windows用のwhlファイルをインストールしてください。 - -```powershell -python -m pip install bitsandbytes==0.41.1 --prefer-binary --extra-index-url=https://jllllll.github.io/bitsandbytes-windows-webui -``` - ## アップグレード 新しいリリースがあった場合、以下のコマンドで更新できます。 @@ -151,4 +111,3 @@ Conv2d 3x3への拡大は [cloneofsimo氏](https://github.com/cloneofsimo/lora) [BLIP](https://github.com/salesforce/BLIP): BSD-3-Clause - diff --git a/README.md b/README.md index a19f7968..ef26acab 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,3 @@ -__SDXL is now supported. The sdxl branch has been merged into the main branch. If you update the repository, please follow the upgrade instructions. Also, the version of accelerate has been updated, so please run accelerate config again.__ The documentation for SDXL training is [here](./README.md#sdxl-training). - This repository contains training, generation and utility scripts for Stable Diffusion. [__Change History__](#change-history) is moved to the bottom of the page. @@ -20,9 +18,9 @@ This repository contains the scripts for: ## About requirements.txt -These files do not contain requirements for PyTorch. Because the versions of them depend on your environment. Please install PyTorch at first (see installation guide below.) +The file does not contain requirements for PyTorch. Because the version of PyTorch depends on the environment, it is not included in the file. Please install PyTorch first according to the environment. See installation instructions below. -The scripts are tested with Pytorch 2.0.1. 1.12.1 is not tested but should work. +The scripts are tested with Pytorch 2.1.1. 2.0.1 and 1.12.1 is not tested but should work. ## Links to usage documentation @@ -32,12 +30,13 @@ Most of the documents are written in Japanese. * [Training guide - common](./docs/train_README-ja.md) : data preparation, options etc... * [Chinese version](./docs/train_README-zh.md) +* [SDXL training](./docs/train_SDXL-en.md) (English version) * [Dataset config](./docs/config_README-ja.md) * [English version](./docs/config_README-en.md) * [DreamBooth training guide](./docs/train_db_README-ja.md) * [Step by Step fine-tuning guide](./docs/fine_tune_README_ja.md): -* [training LoRA](./docs/train_network_README-ja.md) -* [training Textual Inversion](./docs/train_ti_README-ja.md) +* [Training LoRA](./docs/train_network_README-ja.md) +* [Training Textual Inversion](./docs/train_ti_README-ja.md) * [Image generation](./docs/gen_img_README-ja.md) * note.com [Model conversion](https://note.com/kohya_ss/n/n374f316fe4ad) @@ -65,14 +64,18 @@ cd sd-scripts python -m venv venv .\venv\Scripts\activate -pip install torch==2.0.1+cu118 torchvision==0.15.2+cu118 --index-url https://download.pytorch.org/whl/cu118 +pip install torch==2.1.1 torchvision==0.16.1 --index-url https://download.pytorch.org/whl/cu118 pip install --upgrade -r requirements.txt -pip install xformers==0.0.20 +pip install xformers==0.0.23 --index-url https://download.pytorch.org/whl/cu118 accelerate config ``` -__Note:__ Now bitsandbytes is optional. Please install any version of bitsandbytes as needed. Installation instructions are in the following section. +If `python -m venv` shows only `python`, change `python` to `py`. + +__Note:__ Now `bitsandbytes==0.43.0`, `prodigyopt==1.0` and `lion-pytorch==0.0.6` are included in the requirements.txt. If you'd like to use the another version, please install it manually. + +This installation is for CUDA 11.8. If you use a different version of CUDA, please install the appropriate version of PyTorch and xformers. For example, if you use CUDA 12, please install `pip install torch==2.1.1 torchvision==0.16.1 --index-url https://download.pytorch.org/whl/cu121` and `pip install xformers==0.0.23 --index-url https://download.pytorch.org/whl/cu121`. + +### 学習中のサンプル画像生成 + +プロンプトファイルは例えば以下のようになります。 + +``` +# prompt 1 +masterpiece, best quality, (1girl), in white shirts, upper body, looking at viewer, simple background --n low quality, worst quality, bad anatomy,bad composition, poor, low effort --w 768 --h 768 --d 1 --l 7.5 --s 28 + +# prompt 2 +masterpiece, best quality, 1boy, in business suit, standing at street, looking back --n (low quality, worst quality), bad anatomy,bad composition, poor, low effort --w 576 --h 832 --d 2 --l 5.5 --s 40 +``` + + `#` で始まる行はコメントになります。`--n` のように「ハイフン二個+英小文字」の形でオプションを指定できます。以下が使用可能できます。 + + * `--n` Negative prompt up to the next option. + * `--w` Specifies the width of the generated image. + * `--h` Specifies the height of the generated image. + * `--d` Specifies the seed of the generated image. + * `--l` Specifies the CFG scale of the generated image. + * `--s` Specifies the number of steps in the generation. + + `( )` や `[ ]` などの重みづけも動作します。 diff --git a/README.md b/README.md index 5282c1f6..1ca699be 100644 --- a/README.md +++ b/README.md @@ -137,15 +137,16 @@ The majority of scripts is licensed under ASL 2.0 (including codes from Diffuser ## Change History -### Mar XX, 2024 / 2024/3/XX: v0.8.6 +### Apr 7, 2024 / 2024-04-07: v0.8.6 #### Highlights - The dependent libraries are updated. Please see [Upgrade](#upgrade) and update the libraries. - Especially `imagesize` is newly added, so if you cannot update the libraries immediately, please install with `pip install imagesize==1.4.1` separately. - `bitsandbytes==0.43.0`, `prodigyopt==1.0`, `lion-pytorch==0.0.6` are included in the requirements.txt. + - `bitsandbytes` no longer requires complex procedures as it now officially supports Windows. - Also, the PyTorch version is updated to 2.1.2 (PyTorch does not need to be updated immediately). In the upgrade procedure, PyTorch is not updated, so please manually install or update torch, torchvision, xformers if necessary (see [Upgrade PyTorch](#upgrade-pytorch)). -- When logging to wandb is enabled, the entire command line is exposed. Therefore, it is recommended to write the API key of wandb and the token of HuggingFace in the configuration file (`.toml`). Thanks to bghira for raising the issue. +- When logging to wandb is enabled, the entire command line is exposed. Therefore, it is recommended to write wandb API key and HuggingFace token in the configuration file (`.toml`). Thanks to bghira for raising the issue. - A warning is displayed at the start of training if such information is included in the command line. - Also, if there is an absolute path, the path may be exposed, so it is recommended to specify a relative path or write it in the configuration file. In such cases, an INFO log is displayed. - See [#1123](https://github.com/kohya-ss/sd-scripts/pull/1123) and PR [#1240](https://github.com/kohya-ss/sd-scripts/pull/1240) for details. @@ -223,6 +224,7 @@ See PR [#1228](https://github.com/kohya-ss/sd-scripts/pull/1228/) for details. - 依存ライブラリが更新されました。[アップグレード](./README-ja.md#アップグレード) を参照しライブラリを更新してください。 - 特に `imagesize` が新しく追加されていますので、すぐにライブラリの更新ができない場合は `pip install imagesize==1.4.1` で個別にインストールしてください。 - `bitsandbytes==0.43.0`、`prodigyopt==1.0`、`lion-pytorch==0.0.6` が requirements.txt に含まれるようになりました。 + - `bitsandbytes` が公式に Windows をサポートしたため複雑な手順が不要になりました。 - また PyTorch のバージョンを 2.1.2 に更新しました。PyTorch はすぐに更新する必要はありません。更新時は、アップグレードの手順では PyTorch が更新されませんので、torch、torchvision、xformers を手動でインストールしてください。 - wandb へのログ出力が有効の場合、コマンドライン全体が公開されます。そのため、コマンドラインに wandb の API キーや HuggingFace のトークンなどが含まれる場合、設定ファイル(`.toml`)への記載をお勧めします。問題提起していただいた bghira 氏に感謝します。 - このような場合には学習開始時に警告が表示されます。 @@ -315,27 +317,14 @@ The LoRA supported by `train_network.py` has been named to avoid confusion. The In addition to 1., LoRA for Conv2d layers with 3x3 kernel -LoRA-LierLa is the default LoRA type for `train_network.py` (without `conv_dim` network arg). LoRA-LierLa can be used with [our extension](https://github.com/kohya-ss/sd-webui-additional-networks) for AUTOMATIC1111's Web UI, or with the built-in LoRA feature of the Web UI. +LoRA-LierLa is the default LoRA type for `train_network.py` (without `conv_dim` network arg). + -### LoRAの名称について - -`train_network.py` がサポートするLoRAについて、混乱を避けるため名前を付けました。ドキュメントは更新済みです。以下は当リポジトリ内の独自の名称です。 - -1. __LoRA-LierLa__ : (LoRA for __Li__ n __e__ a __r__ __La__ yers、リエラと読みます) - - Linear 層およびカーネルサイズ 1x1 の Conv2d 層に適用されるLoRA - -2. __LoRA-C3Lier__ : (LoRA for __C__ olutional layers with __3__ x3 Kernel and __Li__ n __e__ a __r__ layers、セリアと読みます) - - 1.に加え、カーネルサイズ 3x3 の Conv2d 層に適用されるLoRA - -LoRA-LierLa は[Web UI向け拡張](https://github.com/kohya-ss/sd-webui-additional-networks)、またはAUTOMATIC1111氏のWeb UIのLoRA機能で使用することができます。 - -LoRA-C3Lierを使いWeb UIで生成するには拡張を使用してください。 - -## Sample image generation during training +### Sample image generation during training A prompt file might look like this, for example ``` @@ -356,26 +345,3 @@ masterpiece, best quality, 1boy, in business suit, standing at street, looking b * `--s` Specifies the number of steps in the generation. The prompt weighting such as `( )` and `[ ]` are working. - -## サンプル画像生成 -プロンプトファイルは例えば以下のようになります。 - -``` -# prompt 1 -masterpiece, best quality, (1girl), in white shirts, upper body, looking at viewer, simple background --n low quality, worst quality, bad anatomy,bad composition, poor, low effort --w 768 --h 768 --d 1 --l 7.5 --s 28 - -# prompt 2 -masterpiece, best quality, 1boy, in business suit, standing at street, looking back --n (low quality, worst quality), bad anatomy,bad composition, poor, low effort --w 576 --h 832 --d 2 --l 5.5 --s 40 -``` - - `#` で始まる行はコメントになります。`--n` のように「ハイフン二個+英小文字」の形でオプションを指定できます。以下が使用可能できます。 - - * `--n` Negative prompt up to the next option. - * `--w` Specifies the width of the generated image. - * `--h` Specifies the height of the generated image. - * `--d` Specifies the seed of the generated image. - * `--l` Specifies the CFG scale of the generated image. - * `--s` Specifies the number of steps in the generation. - - `( )` や `[ ]` などの重みづけも動作します。 -