From d42431d73a2eafb60446295ef52c6628133d2ad5 Mon Sep 17 00:00:00 2001 From: ddPn08 Date: Tue, 28 Mar 2023 00:49:09 +0900 Subject: [PATCH 01/11] Added feature to upload to huggingface --- library/train_util.py | 13 +++++++-- library/utils.py | 64 +++++++++++++++++++++++++++++++++++++++++++ train_network.py | 3 ++ 3 files changed, 78 insertions(+), 2 deletions(-) create mode 100644 library/utils.py diff --git a/library/train_util.py b/library/train_util.py index 59dbc44c..179f23e4 100644 --- a/library/train_util.py +++ b/library/train_util.py @@ -58,6 +58,7 @@ from torch import einsum import safetensors.torch from library.lpw_stable_diffusion import StableDiffusionLongPromptWeightingPipeline import library.model_util as model_util +import library.utils as utils # Tokenizer: checkpointから読み込むのではなくあらかじめ提供されているものを使う TOKENIZER_PATH = "openai/clip-vit-large-patch14" @@ -1441,7 +1442,6 @@ def glob_images_pathlib(dir_path, recursive): # endregion - # region モジュール入れ替え部 """ 高速化のためのモジュール入れ替え @@ -1896,6 +1896,12 @@ def add_optimizer_arguments(parser: argparse.ArgumentParser): def add_training_arguments(parser: argparse.ArgumentParser, support_dreambooth: bool): parser.add_argument("--output_dir", type=str, default=None, help="directory to output trained model / 学習後のモデル出力先ディレクトリ") parser.add_argument("--output_name", type=str, default=None, help="base name of trained model file / 学習後のモデルの拡張子を除くファイル名") + parser.add_argument("--huggingface_repo_id", type=str, default=None, help="huggingface repo name to upload model / huggingfaceにアップロードするモデルのリポジトリ名") + parser.add_argument("--huggingface_repo_type", type=str, default=None, help="huggingface repo type to upload model / huggingfaceにアップロードするモデルのリポジトリの種類") + parser.add_argument("--huggingface_path_in_repo", type=str, default=None, help="huggingface model path to upload model / huggingfaceにアップロードするモデルのパス") + parser.add_argument("--huggingface_token", type=str, default=None, help="huggingface token to upload model / huggingfaceにアップロードするモデルのトークン") + parser.add_argument("--huggingface_repo_visibility", type=str, default=None, help="huggingface model visibility / huggingfaceにアップロードするモデルの公開設定") + parser.add_argument("--save_state_to_huggingface", action="store_true", help="save state to huggingface / huggingfaceにstateを保存する") parser.add_argument( "--save_precision", type=str, @@ -2803,7 +2809,10 @@ def save_sd_model_on_epoch_end( def save_state_on_epoch_end(args: argparse.Namespace, accelerator, model_name, epoch_no): print("saving state.") - accelerator.save_state(os.path.join(args.output_dir, EPOCH_STATE_NAME.format(model_name, epoch_no))) + state_dir = os.path.join(args.output_dir, EPOCH_STATE_NAME.format(model_name, epoch_no)) + accelerator.save_state(state_dir) + if args.save_state_to_huggingface: + utils.huggingface_upload(state_dir, args, "/" + EPOCH_STATE_NAME.format(model_name, epoch_no)) last_n_epochs = args.save_last_n_epochs_state if args.save_last_n_epochs_state else args.save_last_n_epochs if last_n_epochs is not None: diff --git a/library/utils.py b/library/utils.py new file mode 100644 index 00000000..68c51fc3 --- /dev/null +++ b/library/utils.py @@ -0,0 +1,64 @@ +import argparse +import os +from pathlib import Path +import threading +from typing import * + +from huggingface_hub import HfApi + + +def fire_in_thread(f): + def wrapped(*args, **kwargs): + threading.Thread(target=f, args=args, kwargs=kwargs).start() + return wrapped + + +def huggingface_exists_repo( + repo_id: str, repo_type: str, revision: str = "main", hf_token: str = None +): + api = HfApi() + try: + api.repo_info( + repo_id=repo_id, token=hf_token, revision=revision, repo_type=repo_type + ) + return True + except: + return False + + +@fire_in_thread +def huggingface_upload( + src: Union[str, Path, bytes, BinaryIO], + args: argparse.Namespace, + dest_suffix: str = "", +): + repo_id = args.huggingface_repo_id + repo_type = args.huggingface_repo_type + hf_token = args.huggingface_token + path_in_repo = args.huggingface_path_in_repo + dest_suffix + private = args.huggingface_repo_visibility == "private" + api = HfApi() + if not huggingface_exists_repo( + repo_id=repo_id, repo_type=repo_type, hf_token=hf_token + ): + api.create_repo( + token=hf_token, repo_id=repo_id, repo_type=repo_type, private=private + ) + + is_folder = (type(src) == str and os.path.isdir(src)) or ( + isinstance(src, Path) and src.is_dir() + ) + if is_folder: + api.upload_folder( + repo_id=repo_id, + repo_type=repo_type, + folder_path=src, + path_in_repo=path_in_repo, + ) + else: + api.upload_file( + repo_id=repo_id, + repo_type=repo_type, + path_or_fileobj=src, + path_in_repo=path_in_repo, + ) diff --git a/train_network.py b/train_network.py index 2b824018..b641e65c 100644 --- a/train_network.py +++ b/train_network.py @@ -24,6 +24,7 @@ from library.config_util import ( ConfigSanitizer, BlueprintGenerator, ) +import library.utils as utils import library.custom_train_functions as custom_train_functions from library.custom_train_functions import apply_snr_weight @@ -626,6 +627,7 @@ def train(args): metadata["ss_training_finished_at"] = str(time.time()) print(f"saving checkpoint: {ckpt_file}") unwrap_model(network).save_weights(ckpt_file, save_dtype, minimum_metadata if args.no_metadata else metadata) + utils.huggingface_upload(ckpt_file, args, "/" + ckpt_name) def remove_old_func(old_epoch_no): old_ckpt_name = train_util.EPOCH_FILE_NAME.format(model_name, old_epoch_no) + "." + args.save_model_as @@ -665,6 +667,7 @@ def train(args): print(f"save trained model to {ckpt_file}") network.save_weights(ckpt_file, save_dtype, minimum_metadata if args.no_metadata else metadata) + utils.huggingface_upload(ckpt_file, args, "/" + ckpt_name) print("model saved.") From 054fb3308c895c5ac137d190c6bfbbb72c6f8497 Mon Sep 17 00:00:00 2001 From: ddPn08 Date: Tue, 28 Mar 2023 08:06:33 +0900 Subject: [PATCH 02/11] use access token --- library/utils.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/library/utils.py b/library/utils.py index 68c51fc3..4b9e5ff5 100644 --- a/library/utils.py +++ b/library/utils.py @@ -39,7 +39,7 @@ def huggingface_upload( private = args.huggingface_repo_visibility == "private" api = HfApi() if not huggingface_exists_repo( - repo_id=repo_id, repo_type=repo_type, hf_token=hf_token + repo_id=repo_id, repo_type=repo_type, token=hf_token ): api.create_repo( token=hf_token, repo_id=repo_id, repo_type=repo_type, private=private @@ -54,6 +54,7 @@ def huggingface_upload( repo_type=repo_type, folder_path=src, path_in_repo=path_in_repo, + token=hf_token, ) else: api.upload_file( @@ -61,4 +62,5 @@ def huggingface_upload( repo_type=repo_type, path_or_fileobj=src, path_in_repo=path_in_repo, + token=hf_token, ) From 45381b188ca3129ef064b345e97101346f06c2d7 Mon Sep 17 00:00:00 2001 From: ddPn08 Date: Tue, 28 Mar 2023 08:59:23 +0900 Subject: [PATCH 03/11] small fix --- library/utils.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/library/utils.py b/library/utils.py index 4b9e5ff5..3c3727d2 100644 --- a/library/utils.py +++ b/library/utils.py @@ -14,12 +14,12 @@ def fire_in_thread(f): def huggingface_exists_repo( - repo_id: str, repo_type: str, revision: str = "main", hf_token: str = None + repo_id: str, repo_type: str, revision: str = "main", token: str = None ): api = HfApi() try: api.repo_info( - repo_id=repo_id, token=hf_token, revision=revision, repo_type=repo_type + repo_id=repo_id, token=token, revision=revision, repo_type=repo_type ) return True except: @@ -34,15 +34,15 @@ def huggingface_upload( ): repo_id = args.huggingface_repo_id repo_type = args.huggingface_repo_type - hf_token = args.huggingface_token + token = args.huggingface_token path_in_repo = args.huggingface_path_in_repo + dest_suffix private = args.huggingface_repo_visibility == "private" api = HfApi() if not huggingface_exists_repo( - repo_id=repo_id, repo_type=repo_type, token=hf_token + repo_id=repo_id, repo_type=repo_type, token=token ): api.create_repo( - token=hf_token, repo_id=repo_id, repo_type=repo_type, private=private + token=token, repo_id=repo_id, repo_type=repo_type, private=private ) is_folder = (type(src) == str and os.path.isdir(src)) or ( @@ -54,7 +54,7 @@ def huggingface_upload( repo_type=repo_type, folder_path=src, path_in_repo=path_in_repo, - token=hf_token, + token=token, ) else: api.upload_file( @@ -62,5 +62,5 @@ def huggingface_upload( repo_type=repo_type, path_or_fileobj=src, path_in_repo=path_in_repo, - token=hf_token, + token=token, ) From a7d302e196fa566809a169077a1a6412df42ed9b Mon Sep 17 00:00:00 2001 From: ddPn08 Date: Thu, 30 Mar 2023 16:49:03 +0900 Subject: [PATCH 04/11] write a random seed to metadata --- train_network.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/train_network.py b/train_network.py index b641e65c..c951b150 100644 --- a/train_network.py +++ b/train_network.py @@ -57,8 +57,9 @@ def train(args): use_dreambooth_method = args.in_json is None use_user_config = args.dataset_config is not None - if args.seed is not None: - set_seed(args.seed) + if args.seed is None: + args.seed = random.randint(0, 2**32) + set_seed(args.seed) tokenizer = train_util.load_tokenizer(args) From b5ff4e816f7b69f0ab0e8081a9b099fd5bb1a8f0 Mon Sep 17 00:00:00 2001 From: ddPn08 Date: Thu, 30 Mar 2023 23:36:42 +0900 Subject: [PATCH 05/11] resume from huggingface repository --- fine_tune.py | 4 +-- library/huggingface_util.py | 71 +++++++++++++++++++++++++++++++++++++ library/train_util.py | 61 +++++++++++++++++++++++++++++-- library/utils.py | 60 +------------------------------ requirements.txt | 2 +- train_db.py | 4 +-- train_network.py | 10 +++--- train_textual_inversion.py | 4 +-- 8 files changed, 139 insertions(+), 77 deletions(-) create mode 100644 library/huggingface_util.py diff --git a/fine_tune.py b/fine_tune.py index 637a729a..289fbeb8 100644 --- a/fine_tune.py +++ b/fine_tune.py @@ -231,9 +231,7 @@ def train(args): train_util.patch_accelerator_for_fp16_training(accelerator) # resumeする - if args.resume is not None: - print(f"resume training from state: {args.resume}") - accelerator.load_state(args.resume) + train_util.resume(accelerator, args) # epoch数を計算する num_update_steps_per_epoch = math.ceil(len(train_dataloader) / args.gradient_accumulation_steps) diff --git a/library/huggingface_util.py b/library/huggingface_util.py new file mode 100644 index 00000000..353189c0 --- /dev/null +++ b/library/huggingface_util.py @@ -0,0 +1,71 @@ +from typing import * +from huggingface_hub import HfApi +from pathlib import Path +import argparse +import os + +from library.utils import fire_in_thread + + +def exists_repo( + repo_id: str, repo_type: str, revision: str = "main", token: str = None +): + api = HfApi( + token=token, + ) + try: + api.repo_info(repo_id=repo_id, revision=revision, repo_type=repo_type) + return True + except: + return False + + +@fire_in_thread +def upload( + src: Union[str, Path, bytes, BinaryIO], + args: argparse.Namespace, + dest_suffix: str = "", +): + repo_id = args.huggingface_repo_id + repo_type = args.huggingface_repo_type + token = args.huggingface_token + path_in_repo = args.huggingface_path_in_repo + dest_suffix + private = args.huggingface_repo_visibility == "private" + api = HfApi(token=token) + if not exists_repo(repo_id=repo_id, repo_type=repo_type, token=token): + api.create_repo(repo_id=repo_id, repo_type=repo_type, private=private) + + is_folder = (type(src) == str and os.path.isdir(src)) or ( + isinstance(src, Path) and src.is_dir() + ) + if is_folder: + api.upload_folder( + repo_id=repo_id, + repo_type=repo_type, + folder_path=src, + path_in_repo=path_in_repo, + ) + else: + api.upload_file( + repo_id=repo_id, + repo_type=repo_type, + path_or_fileobj=src, + path_in_repo=path_in_repo, + ) + + +def list_dir( + repo_id: str, + subfolder: str, + repo_type: str, + revision: str = "main", + token: str = None, +): + api = HfApi( + token=token, + ) + repo_info = api.repo_info(repo_id=repo_id, revision=revision, repo_type=repo_type) + file_list = [ + file for file in repo_info.siblings if file.rfilename.startswith(subfolder) + ] + return file_list diff --git a/library/train_util.py b/library/train_util.py index 179f23e4..e4e91ee2 100644 --- a/library/train_util.py +++ b/library/train_util.py @@ -2,6 +2,7 @@ import argparse import ast +import asyncio import importlib import json import pathlib @@ -49,6 +50,7 @@ from diffusers import ( KDPM2DiscreteScheduler, KDPM2AncestralDiscreteScheduler, ) +from huggingface_hub import hf_hub_download import albumentations as albu import numpy as np from PIL import Image @@ -58,7 +60,7 @@ from torch import einsum import safetensors.torch from library.lpw_stable_diffusion import StableDiffusionLongPromptWeightingPipeline import library.model_util as model_util -import library.utils as utils +import library.huggingface_util as huggingface_util # Tokenizer: checkpointから読み込むのではなくあらかじめ提供されているものを使う TOKENIZER_PATH = "openai/clip-vit-large-patch14" @@ -1902,6 +1904,11 @@ def add_training_arguments(parser: argparse.ArgumentParser, support_dreambooth: parser.add_argument("--huggingface_token", type=str, default=None, help="huggingface token to upload model / huggingfaceにアップロードするモデルのトークン") parser.add_argument("--huggingface_repo_visibility", type=str, default=None, help="huggingface model visibility / huggingfaceにアップロードするモデルの公開設定") parser.add_argument("--save_state_to_huggingface", action="store_true", help="save state to huggingface / huggingfaceにstateを保存する") + parser.add_argument( + "--resume_from_huggingface", + action="store_true", + help="resume from huggingface (ex: --resume {repo_id}/{path_in_repo}:{revision}:{repo_type}) / huggingfaceから学習を再開する(例: --resume {repo_id}/{path_in_repo}:{revision}:{repo_type})", + ) parser.add_argument( "--save_precision", type=str, @@ -2266,6 +2273,56 @@ def read_config_from_file(args: argparse.Namespace, parser: argparse.ArgumentPar # region utils +def resume(accelerator, args): + if args.resume: + print(f"resume training from state: {args.resume}") + if args.resume_from_huggingface: + repo_id = args.resume.split("/")[0] + "/" + args.resume.split("/")[1] + path_in_repo = "/".join(args.resume.split("/")[2:]) + revision = None + repo_type = None + if ":" in path_in_repo: + divided = path_in_repo.split(":") + if len(divided) == 2: + path_in_repo, revision = divided + repo_type = "model" + else: + path_in_repo, revision, repo_type = divided + print( + f"Downloading state from huggingface: {repo_id}/{path_in_repo}@{revision}" + ) + + list_files = huggingface_util.list_dir( + repo_id=repo_id, + subfolder=path_in_repo, + revision=revision, + token=args.huggingface_token, + repo_type=repo_type, + ) + + async def download(filename) -> str: + def task(): + return hf_hub_download( + repo_id=repo_id, + filename=filename, + revision=revision, + repo_type=repo_type, + token=args.huggingface_token, + ) + + return await asyncio.get_event_loop().run_in_executor(None, task) + + loop = asyncio.get_event_loop() + results = loop.run_until_complete( + asyncio.gather( + *[download(filename=filename.rfilename) for filename in list_files] + ) + ) + dirname = os.path.dirname(results[0]) + accelerator.load_state(dirname) + else: + accelerator.load_state(args.resume) + def get_optimizer(args, trainable_params): # "Optimizer to use: AdamW, AdamW8bit, Lion, SGDNesterov, SGDNesterov8bit, DAdaptation, Adafactor" @@ -2812,7 +2869,7 @@ def save_state_on_epoch_end(args: argparse.Namespace, accelerator, model_name, e state_dir = os.path.join(args.output_dir, EPOCH_STATE_NAME.format(model_name, epoch_no)) accelerator.save_state(state_dir) if args.save_state_to_huggingface: - utils.huggingface_upload(state_dir, args, "/" + EPOCH_STATE_NAME.format(model_name, epoch_no)) + huggingface_util.upload(state_dir, args, "/" + EPOCH_STATE_NAME.format(model_name, epoch_no)) last_n_epochs = args.save_last_n_epochs_state if args.save_last_n_epochs_state else args.save_last_n_epochs if last_n_epochs is not None: diff --git a/library/utils.py b/library/utils.py index 3c3727d2..a6b05917 100644 --- a/library/utils.py +++ b/library/utils.py @@ -1,66 +1,8 @@ -import argparse -import os -from pathlib import Path import threading from typing import * -from huggingface_hub import HfApi - def fire_in_thread(f): def wrapped(*args, **kwargs): threading.Thread(target=f, args=args, kwargs=kwargs).start() - return wrapped - - -def huggingface_exists_repo( - repo_id: str, repo_type: str, revision: str = "main", token: str = None -): - api = HfApi() - try: - api.repo_info( - repo_id=repo_id, token=token, revision=revision, repo_type=repo_type - ) - return True - except: - return False - - -@fire_in_thread -def huggingface_upload( - src: Union[str, Path, bytes, BinaryIO], - args: argparse.Namespace, - dest_suffix: str = "", -): - repo_id = args.huggingface_repo_id - repo_type = args.huggingface_repo_type - token = args.huggingface_token - path_in_repo = args.huggingface_path_in_repo + dest_suffix - private = args.huggingface_repo_visibility == "private" - api = HfApi() - if not huggingface_exists_repo( - repo_id=repo_id, repo_type=repo_type, token=token - ): - api.create_repo( - token=token, repo_id=repo_id, repo_type=repo_type, private=private - ) - - is_folder = (type(src) == str and os.path.isdir(src)) or ( - isinstance(src, Path) and src.is_dir() - ) - if is_folder: - api.upload_folder( - repo_id=repo_id, - repo_type=repo_type, - folder_path=src, - path_in_repo=path_in_repo, - token=token, - ) - else: - api.upload_file( - repo_id=repo_id, - repo_type=repo_type, - path_or_fileobj=src, - path_in_repo=path_in_repo, - token=token, - ) + return wrapped \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index eea1c663..d3164894 100644 --- a/requirements.txt +++ b/requirements.txt @@ -21,6 +21,6 @@ fairscale==0.4.13 # for WD14 captioning # tensorflow<2.11 tensorflow==2.10.1 -huggingface-hub==0.12.0 +huggingface-hub==0.13.3 # for kohya_ss library . diff --git a/train_db.py b/train_db.py index b3eead94..0b7f2d37 100644 --- a/train_db.py +++ b/train_db.py @@ -202,9 +202,7 @@ def train(args): train_util.patch_accelerator_for_fp16_training(accelerator) # resumeする - if args.resume is not None: - print(f"resume training from state: {args.resume}") - accelerator.load_state(args.resume) + train_util.resume(accelerator, args) # epoch数を計算する num_update_steps_per_epoch = math.ceil(len(train_dataloader) / args.gradient_accumulation_steps) diff --git a/train_network.py b/train_network.py index c951b150..8cfe1ab8 100644 --- a/train_network.py +++ b/train_network.py @@ -24,7 +24,7 @@ from library.config_util import ( ConfigSanitizer, BlueprintGenerator, ) -import library.utils as utils +import library.huggingface_util as huggingface_util import library.custom_train_functions as custom_train_functions from library.custom_train_functions import apply_snr_weight @@ -285,9 +285,7 @@ def train(args): train_util.patch_accelerator_for_fp16_training(accelerator) # resumeする - if args.resume is not None: - print(f"resume training from state: {args.resume}") - accelerator.load_state(args.resume) + train_util.resume(accelerator, args) # epoch数を計算する num_update_steps_per_epoch = math.ceil(len(train_dataloader) / args.gradient_accumulation_steps) @@ -628,7 +626,7 @@ def train(args): metadata["ss_training_finished_at"] = str(time.time()) print(f"saving checkpoint: {ckpt_file}") unwrap_model(network).save_weights(ckpt_file, save_dtype, minimum_metadata if args.no_metadata else metadata) - utils.huggingface_upload(ckpt_file, args, "/" + ckpt_name) + huggingface_util.upload(ckpt_file, args, "/" + ckpt_name) def remove_old_func(old_epoch_no): old_ckpt_name = train_util.EPOCH_FILE_NAME.format(model_name, old_epoch_no) + "." + args.save_model_as @@ -668,7 +666,7 @@ def train(args): print(f"save trained model to {ckpt_file}") network.save_weights(ckpt_file, save_dtype, minimum_metadata if args.no_metadata else metadata) - utils.huggingface_upload(ckpt_file, args, "/" + ckpt_name) + huggingface_util.upload(ckpt_file, args, "/" + ckpt_name) print("model saved.") diff --git a/train_textual_inversion.py b/train_textual_inversion.py index f279370a..c5bacf3b 100644 --- a/train_textual_inversion.py +++ b/train_textual_inversion.py @@ -304,9 +304,7 @@ def train(args): text_encoder.to(weight_dtype) # resumeする - if args.resume is not None: - print(f"resume training from state: {args.resume}") - accelerator.load_state(args.resume) + train_util.resume(accelerator, args) # epoch数を計算する num_update_steps_per_epoch = math.ceil(len(train_dataloader) / args.gradient_accumulation_steps) From b5c7937f8d01ba86f9ff59c1ffdb08992df29306 Mon Sep 17 00:00:00 2001 From: ddPn08 Date: Sat, 1 Apr 2023 23:08:27 +0900 Subject: [PATCH 06/11] don't run when not needed --- train_network.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/train_network.py b/train_network.py index 8cfe1ab8..85b01def 100644 --- a/train_network.py +++ b/train_network.py @@ -626,7 +626,8 @@ def train(args): metadata["ss_training_finished_at"] = str(time.time()) print(f"saving checkpoint: {ckpt_file}") unwrap_model(network).save_weights(ckpt_file, save_dtype, minimum_metadata if args.no_metadata else metadata) - huggingface_util.upload(ckpt_file, args, "/" + ckpt_name) + if args.huggingface_repo_id is not None: + huggingface_util.upload(ckpt_file, args, "/" + ckpt_name) def remove_old_func(old_epoch_no): old_ckpt_name = train_util.EPOCH_FILE_NAME.format(model_name, old_epoch_no) + "." + args.save_model_as @@ -666,7 +667,8 @@ def train(args): print(f"save trained model to {ckpt_file}") network.save_weights(ckpt_file, save_dtype, minimum_metadata if args.no_metadata else metadata) - huggingface_util.upload(ckpt_file, args, "/" + ckpt_name) + if args.huggingface_repo_id is not None: + huggingface_util.upload(ckpt_file, args, "/" + ckpt_name) print("model saved.") From 3cc4939dd38d52a077b97e53260631b4da755628 Mon Sep 17 00:00:00 2001 From: ddPn08 Date: Sat, 1 Apr 2023 23:16:02 +0900 Subject: [PATCH 07/11] Implement huggingface upload for all scripts --- library/train_util.py | 2 ++ train_textual_inversion.py | 3 +++ train_textual_inversion_XTI.py | 3 +++ 3 files changed, 8 insertions(+) diff --git a/library/train_util.py b/library/train_util.py index e4e91ee2..4b9e3ec4 100644 --- a/library/train_util.py +++ b/library/train_util.py @@ -2830,6 +2830,8 @@ def save_sd_model_on_epoch_end( model_util.save_stable_diffusion_checkpoint( args.v2, ckpt_file, text_encoder, unet, src_path, epoch_no, global_step, save_dtype, vae ) + if args.huggingface_repo_id is not None: + huggingface_util.upload(ckpt_file, args, "/" + ckpt_name) def remove_sd(old_epoch_no): _, old_ckpt_name = get_epoch_ckpt_name(args, use_safetensors, old_epoch_no) diff --git a/train_textual_inversion.py b/train_textual_inversion.py index c5bacf3b..c4b04554 100644 --- a/train_textual_inversion.py +++ b/train_textual_inversion.py @@ -13,6 +13,7 @@ import diffusers from diffusers import DDPMScheduler import library.train_util as train_util +import library.huggingface_util as huggingface_util import library.config_util as config_util from library.config_util import ( ConfigSanitizer, @@ -450,6 +451,8 @@ def train(args): ckpt_file = os.path.join(args.output_dir, ckpt_name) print(f"saving checkpoint: {ckpt_file}") save_weights(ckpt_file, updated_embs, save_dtype) + if args.huggingface_repo_id is not None: + huggingface_util.upload(ckpt_file, args, "/" + ckpt_name) def remove_old_func(old_epoch_no): old_ckpt_name = train_util.EPOCH_FILE_NAME.format(model_name, old_epoch_no) + "." + args.save_model_as diff --git a/train_textual_inversion_XTI.py b/train_textual_inversion_XTI.py index 74e9bc2e..58c79142 100644 --- a/train_textual_inversion_XTI.py +++ b/train_textual_inversion_XTI.py @@ -13,6 +13,7 @@ import diffusers from diffusers import DDPMScheduler import library.train_util as train_util +import library.huggingface_util as huggingface_util import library.config_util as config_util from library.config_util import ( ConfigSanitizer, @@ -493,6 +494,8 @@ def train(args): ckpt_file = os.path.join(args.output_dir, ckpt_name) print(f"saving checkpoint: {ckpt_file}") save_weights(ckpt_file, updated_embs, save_dtype) + if args.huggingface_repo_id is not None: + huggingface_util.upload(ckpt_file, args, "/" + ckpt_name) def remove_old_func(old_epoch_no): old_ckpt_name = train_util.EPOCH_FILE_NAME.format(model_name, old_epoch_no) + "." + args.save_model_as From c4a11e5a5a988c5263a1e62bbacb741bc079f374 Mon Sep 17 00:00:00 2001 From: ddPn08 Date: Sat, 1 Apr 2023 23:17:48 +0900 Subject: [PATCH 08/11] fix help --- library/train_util.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/library/train_util.py b/library/train_util.py index 4b9e3ec4..768d5b32 100644 --- a/library/train_util.py +++ b/library/train_util.py @@ -1898,11 +1898,11 @@ def add_optimizer_arguments(parser: argparse.ArgumentParser): def add_training_arguments(parser: argparse.ArgumentParser, support_dreambooth: bool): parser.add_argument("--output_dir", type=str, default=None, help="directory to output trained model / 学習後のモデル出力先ディレクトリ") parser.add_argument("--output_name", type=str, default=None, help="base name of trained model file / 学習後のモデルの拡張子を除くファイル名") - parser.add_argument("--huggingface_repo_id", type=str, default=None, help="huggingface repo name to upload model / huggingfaceにアップロードするモデルのリポジトリ名") - parser.add_argument("--huggingface_repo_type", type=str, default=None, help="huggingface repo type to upload model / huggingfaceにアップロードするモデルのリポジトリの種類") - parser.add_argument("--huggingface_path_in_repo", type=str, default=None, help="huggingface model path to upload model / huggingfaceにアップロードするモデルのパス") - parser.add_argument("--huggingface_token", type=str, default=None, help="huggingface token to upload model / huggingfaceにアップロードするモデルのトークン") - parser.add_argument("--huggingface_repo_visibility", type=str, default=None, help="huggingface model visibility / huggingfaceにアップロードするモデルの公開設定") + parser.add_argument("--huggingface_repo_id", type=str, default=None, help="huggingface repo name to upload / huggingfaceにアップロードするリポジトリ名") + parser.add_argument("--huggingface_repo_type", type=str, default=None, help="huggingface repo type to upload / huggingfaceにアップロードするリポジトリの種類") + parser.add_argument("--huggingface_path_in_repo", type=str, default=None, help="huggingface model path to upload files / huggingfaceにアップロードするファイルのパス") + parser.add_argument("--huggingface_token", type=str, default=None, help="huggingface token / huggingfaceのトークン") + parser.add_argument("--huggingface_repo_visibility", type=str, default=None, help="huggingface repository visibility / huggingfaceにアップロードするリポジトリの公開設定") parser.add_argument("--save_state_to_huggingface", action="store_true", help="save state to huggingface / huggingfaceにstateを保存する") parser.add_argument( "--resume_from_huggingface", From 8bfa50e2831f0f99fc8dc575cf1f3ff5e103e868 Mon Sep 17 00:00:00 2001 From: ddPn08 Date: Sun, 2 Apr 2023 00:10:19 +0900 Subject: [PATCH 09/11] small fix --- library/train_util.py | 6 ++++++ train_textual_inversion.py | 2 ++ train_textual_inversion_XTI.py | 2 ++ 3 files changed, 10 insertions(+) diff --git a/library/train_util.py b/library/train_util.py index 768d5b32..c6d49974 100644 --- a/library/train_util.py +++ b/library/train_util.py @@ -2851,6 +2851,8 @@ def save_sd_model_on_epoch_end( model_util.save_diffusers_checkpoint( args.v2, out_dir, text_encoder, unet, src_path, vae=vae, use_safetensors=use_safetensors ) + if args.huggingface_repo_id is not None: + huggingface_util.upload(out_dir, args, "/" + model_name) def remove_du(old_epoch_no): out_dir_old = os.path.join(args.output_dir, EPOCH_DIFFUSERS_DIR_NAME.format(model_name, old_epoch_no)) @@ -2906,6 +2908,8 @@ def save_sd_model_on_train_end( model_util.save_stable_diffusion_checkpoint( args.v2, ckpt_file, text_encoder, unet, src_path, epoch, global_step, save_dtype, vae ) + if args.huggingface_repo_id is not None: + huggingface_util.upload(ckpt_file, args, "/" + ckpt_name) else: out_dir = os.path.join(args.output_dir, model_name) os.makedirs(out_dir, exist_ok=True) @@ -2914,6 +2918,8 @@ def save_sd_model_on_train_end( model_util.save_diffusers_checkpoint( args.v2, out_dir, text_encoder, unet, src_path, vae=vae, use_safetensors=use_safetensors ) + if args.huggingface_repo_id is not None: + huggingface_util.upload(out_dir, args, "/" + model_name) def save_state_on_train_end(args: argparse.Namespace, accelerator): diff --git a/train_textual_inversion.py b/train_textual_inversion.py index c4b04554..3fb17f2e 100644 --- a/train_textual_inversion.py +++ b/train_textual_inversion.py @@ -493,6 +493,8 @@ def train(args): print(f"save trained model to {ckpt_file}") save_weights(ckpt_file, updated_embs, save_dtype) + if args.huggingface_repo_id is not None: + huggingface_util.upload(ckpt_file, args, "/" + ckpt_name) print("model saved.") diff --git a/train_textual_inversion_XTI.py b/train_textual_inversion_XTI.py index 58c79142..b0bc4c3a 100644 --- a/train_textual_inversion_XTI.py +++ b/train_textual_inversion_XTI.py @@ -537,6 +537,8 @@ def train(args): print(f"save trained model to {ckpt_file}") save_weights(ckpt_file, updated_embs, save_dtype) + if args.huggingface_repo_id is not None: + huggingface_util.upload(ckpt_file, args, "/" + ckpt_name) print("model saved.") From 16ba1cec693f58b007128ebe9343a2ef7a01b1a3 Mon Sep 17 00:00:00 2001 From: ddPn08 Date: Sun, 2 Apr 2023 17:45:26 +0900 Subject: [PATCH 10/11] change async uploading to optional --- library/huggingface_util.py | 37 ++++++++++++++++++++-------------- library/train_util.py | 15 +++++++++----- train_network.py | 4 ++-- train_textual_inversion.py | 4 ++-- train_textual_inversion_XTI.py | 4 ++-- 5 files changed, 38 insertions(+), 26 deletions(-) diff --git a/library/huggingface_util.py b/library/huggingface_util.py index 353189c0..4431a208 100644 --- a/library/huggingface_util.py +++ b/library/huggingface_util.py @@ -20,11 +20,11 @@ def exists_repo( return False -@fire_in_thread def upload( - src: Union[str, Path, bytes, BinaryIO], args: argparse.Namespace, + src: Union[str, Path, bytes, BinaryIO], dest_suffix: str = "", + force_sync_upload: bool = False, ): repo_id = args.huggingface_repo_id repo_type = args.huggingface_repo_type @@ -38,20 +38,27 @@ def upload( is_folder = (type(src) == str and os.path.isdir(src)) or ( isinstance(src, Path) and src.is_dir() ) - if is_folder: - api.upload_folder( - repo_id=repo_id, - repo_type=repo_type, - folder_path=src, - path_in_repo=path_in_repo, - ) + + def uploader(): + if is_folder: + api.upload_folder( + repo_id=repo_id, + repo_type=repo_type, + folder_path=src, + path_in_repo=path_in_repo, + ) + else: + api.upload_file( + repo_id=repo_id, + repo_type=repo_type, + path_or_fileobj=src, + path_in_repo=path_in_repo, + ) + + if args.async_upload and not force_sync_upload: + fire_in_thread(uploader) else: - api.upload_file( - repo_id=repo_id, - repo_type=repo_type, - path_or_fileobj=src, - path_in_repo=path_in_repo, - ) + uploader() def list_dir( diff --git a/library/train_util.py b/library/train_util.py index c6d49974..425159c2 100644 --- a/library/train_util.py +++ b/library/train_util.py @@ -1909,6 +1909,11 @@ def add_training_arguments(parser: argparse.ArgumentParser, support_dreambooth: action="store_true", help="resume from huggingface (ex: --resume {repo_id}/{path_in_repo}:{revision}:{repo_type}) / huggingfaceから学習を再開する(例: --resume {repo_id}/{path_in_repo}:{revision}:{repo_type})", ) + parser.add_argument( + "--async_upload", + action="store_true", + help="upload to huggingface asynchronously / huggingfaceに非同期でアップロードする", + ) parser.add_argument( "--save_precision", type=str, @@ -2831,7 +2836,7 @@ def save_sd_model_on_epoch_end( args.v2, ckpt_file, text_encoder, unet, src_path, epoch_no, global_step, save_dtype, vae ) if args.huggingface_repo_id is not None: - huggingface_util.upload(ckpt_file, args, "/" + ckpt_name) + huggingface_util.upload(args, ckpt_file, "/" + ckpt_name) def remove_sd(old_epoch_no): _, old_ckpt_name = get_epoch_ckpt_name(args, use_safetensors, old_epoch_no) @@ -2852,7 +2857,7 @@ def save_sd_model_on_epoch_end( args.v2, out_dir, text_encoder, unet, src_path, vae=vae, use_safetensors=use_safetensors ) if args.huggingface_repo_id is not None: - huggingface_util.upload(out_dir, args, "/" + model_name) + huggingface_util.upload(args, out_dir, "/" + model_name) def remove_du(old_epoch_no): out_dir_old = os.path.join(args.output_dir, EPOCH_DIFFUSERS_DIR_NAME.format(model_name, old_epoch_no)) @@ -2873,7 +2878,7 @@ def save_state_on_epoch_end(args: argparse.Namespace, accelerator, model_name, e state_dir = os.path.join(args.output_dir, EPOCH_STATE_NAME.format(model_name, epoch_no)) accelerator.save_state(state_dir) if args.save_state_to_huggingface: - huggingface_util.upload(state_dir, args, "/" + EPOCH_STATE_NAME.format(model_name, epoch_no)) + huggingface_util.upload(args, state_dir, "/" + EPOCH_STATE_NAME.format(model_name, epoch_no)) last_n_epochs = args.save_last_n_epochs_state if args.save_last_n_epochs_state else args.save_last_n_epochs if last_n_epochs is not None: @@ -2909,7 +2914,7 @@ def save_sd_model_on_train_end( args.v2, ckpt_file, text_encoder, unet, src_path, epoch, global_step, save_dtype, vae ) if args.huggingface_repo_id is not None: - huggingface_util.upload(ckpt_file, args, "/" + ckpt_name) + huggingface_util.upload(args, ckpt_file, "/" + ckpt_name, force_sync_upload=True) else: out_dir = os.path.join(args.output_dir, model_name) os.makedirs(out_dir, exist_ok=True) @@ -2919,7 +2924,7 @@ def save_sd_model_on_train_end( args.v2, out_dir, text_encoder, unet, src_path, vae=vae, use_safetensors=use_safetensors ) if args.huggingface_repo_id is not None: - huggingface_util.upload(out_dir, args, "/" + model_name) + huggingface_util.upload(args, out_dir, "/" + model_name, force_sync_upload=True) def save_state_on_train_end(args: argparse.Namespace, accelerator): diff --git a/train_network.py b/train_network.py index 85b01def..dc890b99 100644 --- a/train_network.py +++ b/train_network.py @@ -627,7 +627,7 @@ def train(args): print(f"saving checkpoint: {ckpt_file}") unwrap_model(network).save_weights(ckpt_file, save_dtype, minimum_metadata if args.no_metadata else metadata) if args.huggingface_repo_id is not None: - huggingface_util.upload(ckpt_file, args, "/" + ckpt_name) + huggingface_util.upload(args, ckpt_file, "/" + ckpt_name) def remove_old_func(old_epoch_no): old_ckpt_name = train_util.EPOCH_FILE_NAME.format(model_name, old_epoch_no) + "." + args.save_model_as @@ -668,7 +668,7 @@ def train(args): print(f"save trained model to {ckpt_file}") network.save_weights(ckpt_file, save_dtype, minimum_metadata if args.no_metadata else metadata) if args.huggingface_repo_id is not None: - huggingface_util.upload(ckpt_file, args, "/" + ckpt_name) + huggingface_util.upload(args, ckpt_file, "/" + ckpt_name, force_sync_upload=True) print("model saved.") diff --git a/train_textual_inversion.py b/train_textual_inversion.py index 3fb17f2e..e7d052ee 100644 --- a/train_textual_inversion.py +++ b/train_textual_inversion.py @@ -452,7 +452,7 @@ def train(args): print(f"saving checkpoint: {ckpt_file}") save_weights(ckpt_file, updated_embs, save_dtype) if args.huggingface_repo_id is not None: - huggingface_util.upload(ckpt_file, args, "/" + ckpt_name) + huggingface_util.upload(args, ckpt_file, "/" + ckpt_name) def remove_old_func(old_epoch_no): old_ckpt_name = train_util.EPOCH_FILE_NAME.format(model_name, old_epoch_no) + "." + args.save_model_as @@ -494,7 +494,7 @@ def train(args): print(f"save trained model to {ckpt_file}") save_weights(ckpt_file, updated_embs, save_dtype) if args.huggingface_repo_id is not None: - huggingface_util.upload(ckpt_file, args, "/" + ckpt_name) + huggingface_util.upload(args, ckpt_file, "/" + ckpt_name, force_sync_upload=True) print("model saved.") diff --git a/train_textual_inversion_XTI.py b/train_textual_inversion_XTI.py index b0bc4c3a..7e393bcd 100644 --- a/train_textual_inversion_XTI.py +++ b/train_textual_inversion_XTI.py @@ -495,7 +495,7 @@ def train(args): print(f"saving checkpoint: {ckpt_file}") save_weights(ckpt_file, updated_embs, save_dtype) if args.huggingface_repo_id is not None: - huggingface_util.upload(ckpt_file, args, "/" + ckpt_name) + huggingface_util.upload(args, ckpt_file, "/" + ckpt_name) def remove_old_func(old_epoch_no): old_ckpt_name = train_util.EPOCH_FILE_NAME.format(model_name, old_epoch_no) + "." + args.save_model_as @@ -538,7 +538,7 @@ def train(args): print(f"save trained model to {ckpt_file}") save_weights(ckpt_file, updated_embs, save_dtype) if args.huggingface_repo_id is not None: - huggingface_util.upload(ckpt_file, args, "/" + ckpt_name) + huggingface_util.upload(args, ckpt_file, "/" + ckpt_name, force_sync_upload=True) print("model saved.") From 3f339cda6f760dae4aa4519600bdec79e765762b Mon Sep 17 00:00:00 2001 From: ddPn08 Date: Sun, 2 Apr 2023 23:21:17 +0900 Subject: [PATCH 11/11] small fix --- library/utils.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/library/utils.py b/library/utils.py index a6b05917..7d801a67 100644 --- a/library/utils.py +++ b/library/utils.py @@ -2,7 +2,5 @@ import threading from typing import * -def fire_in_thread(f): - def wrapped(*args, **kwargs): - threading.Thread(target=f, args=args, kwargs=kwargs).start() - return wrapped \ No newline at end of file +def fire_in_thread(f, *args, **kwargs): + threading.Thread(target=f, args=args, kwargs=kwargs).start() \ No newline at end of file