mirror of
https://github.com/kohya-ss/sd-scripts.git
synced 2026-04-06 13:47:06 +00:00
fix full_fp16 compatible and train_step
This commit is contained in:
16
fine_tune.py
16
fine_tune.py
@@ -221,18 +221,10 @@ def train(args):
|
||||
|
||||
# 学習ステップ数を計算する
|
||||
if args.max_train_epochs is not None:
|
||||
if args.deepspeed:
|
||||
args.max_train_steps = args.max_train_epochs * math.ceil(
|
||||
len(train_dataloader) / args.gradient_accumulation_steps
|
||||
)
|
||||
accelerator.print(
|
||||
f"[DeepSpeed] override steps not dividing by {accelerator.num_processes}. steps for {args.max_train_epochs} epochs is / 指定エポックまでのステップ数: {args.max_train_steps}"
|
||||
)
|
||||
else:
|
||||
args.max_train_steps = args.max_train_epochs * math.ceil(
|
||||
len(train_dataloader) / accelerator.num_processes / args.gradient_accumulation_steps
|
||||
)
|
||||
accelerator.print(f"override steps. steps for {args.max_train_epochs} epochs is / 指定エポックまでのステップ数: {args.max_train_steps}")
|
||||
args.max_train_steps = args.max_train_epochs * math.ceil(
|
||||
len(train_dataloader) / accelerator.num_processes / args.gradient_accumulation_steps
|
||||
)
|
||||
accelerator.print(f"override steps. steps for {args.max_train_epochs} epochs is / 指定エポックまでのステップ数: {args.max_train_steps}")
|
||||
|
||||
# データセット側にも学習ステップを送信
|
||||
train_dataset_group.set_max_train_steps(args.max_train_steps)
|
||||
|
||||
@@ -3166,6 +3166,11 @@ def add_training_arguments(parser: argparse.ArgumentParser, support_dreambooth:
|
||||
action="store_true",
|
||||
help="Flag to indicate whether to save 16-bit model. Only applicable with ZeRO Stage-3."
|
||||
)
|
||||
parser.add_argument(
|
||||
"--fp16_master_weights_and_gradients",
|
||||
action="store_true",
|
||||
help="fp16_master_and_gradients requires optimizer to support keeping fp16 master and gradients while keeping the optimizer states in fp32."
|
||||
)
|
||||
|
||||
def verify_training_args(args: argparse.Namespace):
|
||||
if args.v_parameterization and not args.v2:
|
||||
@@ -3966,6 +3971,8 @@ def prepare_accelerator(args: argparse.Namespace):
|
||||
deepspeed_plugin.deepspeed_config['train_micro_batch_size_per_gpu'] = args.train_batch_size
|
||||
deepspeed_plugin.deepspeed_config['train_batch_size'] = \
|
||||
args.train_batch_size * args.gradient_accumulation_steps * int(os.environ['WORLD_SIZE'])
|
||||
if args.full_fp16 or args.fp16_master_weights_and_gradients:
|
||||
deepspeed_plugin.deepspeed_config['fp16_master_weights_and_gradients'] = True
|
||||
|
||||
accelerator = Accelerator(
|
||||
gradient_accumulation_steps=args.gradient_accumulation_steps,
|
||||
|
||||
@@ -437,7 +437,8 @@ def train(args):
|
||||
text_encoder2.to(accelerator.device)
|
||||
|
||||
# 実験的機能:勾配も含めたfp16学習を行う PyTorchにパッチを当ててfp16でのgrad scaleを有効にする
|
||||
if args.full_fp16:
|
||||
if args.full_fp16 and not args.deepspeed:
|
||||
# During deepseed training, accelerate not handles fp16/bf16|mixed precision directly via scaler. Let deepspeed engine do.
|
||||
train_util.patch_accelerator_for_fp16_training(accelerator)
|
||||
|
||||
# resumeする
|
||||
|
||||
96
test_pip_requirements.txt
Normal file
96
test_pip_requirements.txt
Normal file
@@ -0,0 +1,96 @@
|
||||
absl-py==2.1.0
|
||||
accelerate==0.25.0
|
||||
aiohttp==3.9.3
|
||||
aiosignal==1.3.1
|
||||
altair==4.2.2
|
||||
annotated-types @ file:///home/conda/feedstock_root/build_artifacts/annotated-types_1696634205638/work
|
||||
async-timeout==4.0.3
|
||||
attrs==23.2.0
|
||||
bitsandbytes==0.42.0
|
||||
Brotli @ file:///home/conda/feedstock_root/build_artifacts/brotli-split_1695989787169/work
|
||||
cachetools==5.3.2
|
||||
certifi==2022.12.7
|
||||
charset-normalizer==2.1.1
|
||||
cmake==3.25.0
|
||||
deepspeed==0.13.1
|
||||
diffusers==0.25.0
|
||||
easygui==0.98.3
|
||||
einops==0.6.1
|
||||
entrypoints==0.4
|
||||
filelock==3.9.0
|
||||
frozenlist==1.4.1
|
||||
fsspec==2024.2.0
|
||||
ftfy==6.1.1
|
||||
gmpy2 @ file:///home/conda/feedstock_root/build_artifacts/gmpy2_1666808654411/work
|
||||
google-auth==2.27.0
|
||||
google-auth-oauthlib==0.4.6
|
||||
grpcio==1.60.1
|
||||
hjson==3.1.0
|
||||
huggingface-hub==0.20.1
|
||||
idna==3.4
|
||||
importlib-metadata==7.0.1
|
||||
Jinja2==3.1.2
|
||||
jsonschema==4.21.1
|
||||
jsonschema-specifications==2023.12.1
|
||||
-e git+https://github.com/kohya-ss/sd-scripts@cd19df49cd512e13ac90db115c424d19c0e8868a#egg=library
|
||||
lightning-utilities==0.10.1
|
||||
lit==15.0.7
|
||||
Markdown==3.5.2
|
||||
MarkupSafe==2.1.3
|
||||
mpmath==1.3.0
|
||||
multidict==6.0.5
|
||||
networkx==3.2.1
|
||||
ninja==1.11.1.1
|
||||
numpy==1.26.3
|
||||
oauthlib==3.2.2
|
||||
open-clip-torch==2.20.0
|
||||
opencv-python==4.7.0.68
|
||||
packaging==23.2
|
||||
pandas==2.2.0
|
||||
pillow==10.2.0
|
||||
protobuf==3.19.6
|
||||
psutil==5.9.8
|
||||
py-cpuinfo @ file:///home/conda/feedstock_root/build_artifacts/py-cpuinfo_1666774466606/work
|
||||
pyasn1==0.5.1
|
||||
pyasn1-modules==0.3.0
|
||||
pydantic @ file:///home/conda/feedstock_root/build_artifacts/pydantic_1706543943340/work
|
||||
pydantic_core @ file:///home/conda/feedstock_root/build_artifacts/pydantic-core_1705674688239/work
|
||||
pynvml==11.5.0
|
||||
PySocks @ file:///home/conda/feedstock_root/build_artifacts/pysocks_1661604839144/work
|
||||
python-dateutil==2.8.2
|
||||
pytorch-lightning==1.9.0
|
||||
pytz==2024.1
|
||||
PyYAML @ file:///home/conda/feedstock_root/build_artifacts/pyyaml_1695373428874/work
|
||||
referencing==0.33.0
|
||||
regex==2023.12.25
|
||||
requests==2.28.1
|
||||
requests-oauthlib==1.3.1
|
||||
rpds-py==0.17.1
|
||||
rsa==4.9
|
||||
safetensors==0.4.2
|
||||
scipy==1.12.0
|
||||
sentencepiece==0.1.99
|
||||
six==1.16.0
|
||||
sympy==1.12
|
||||
tensorboard==2.10.1
|
||||
tensorboard-data-server==0.6.1
|
||||
tensorboard-plugin-wit==1.8.1
|
||||
timm==0.9.12
|
||||
tokenizers==0.15.1
|
||||
toml==0.10.2
|
||||
toolz==0.12.1
|
||||
torch==2.0.1+cu118
|
||||
torchaudio==2.2.0
|
||||
torchmetrics==1.3.0.post0
|
||||
torchvision==0.15.2+cu118
|
||||
tqdm==4.66.1
|
||||
transformers==4.36.2
|
||||
triton==2.0.0
|
||||
typing_extensions==4.8.0
|
||||
tzdata==2023.4
|
||||
urllib3==1.26.13
|
||||
voluptuous==0.13.1
|
||||
wcwidth==0.2.13
|
||||
Werkzeug==3.0.1
|
||||
yarl==1.9.4
|
||||
zipp==3.17.0
|
||||
16
train_db.py
16
train_db.py
@@ -190,18 +190,10 @@ def train(args):
|
||||
|
||||
# 学習ステップ数を計算する
|
||||
if args.max_train_epochs is not None:
|
||||
if args.deepspeed:
|
||||
args.max_train_steps = args.max_train_epochs * math.ceil(
|
||||
len(train_dataloader) / args.gradient_accumulation_steps
|
||||
)
|
||||
accelerator.print(
|
||||
f"[DeepSpeed] override steps not dividing by {accelerator.num_processes}. steps for {args.max_train_epochs} epochs is / 指定エポックまでのステップ数: {args.max_train_steps}"
|
||||
)
|
||||
else:
|
||||
args.max_train_steps = args.max_train_epochs * math.ceil(
|
||||
len(train_dataloader) / accelerator.num_processes / args.gradient_accumulation_steps
|
||||
)
|
||||
accelerator.print(f"override steps. steps for {args.max_train_epochs} epochs is / 指定エポックまでのステップ数: {args.max_train_steps}")
|
||||
args.max_train_steps = args.max_train_epochs * math.ceil(
|
||||
len(train_dataloader) / accelerator.num_processes / args.gradient_accumulation_steps
|
||||
)
|
||||
accelerator.print(f"override steps. steps for {args.max_train_epochs} epochs is / 指定エポックまでのステップ数: {args.max_train_steps}")
|
||||
|
||||
# データセット側にも学習ステップを送信
|
||||
train_dataset_group.set_max_train_steps(args.max_train_steps)
|
||||
|
||||
@@ -359,20 +359,12 @@ class NetworkTrainer:
|
||||
|
||||
# 学習ステップ数を計算する
|
||||
if args.max_train_epochs is not None:
|
||||
if args.deepspeed:
|
||||
args.max_train_steps = args.max_train_epochs * math.ceil(
|
||||
len(train_dataloader) / args.gradient_accumulation_steps
|
||||
)
|
||||
accelerator.print(
|
||||
f"[DeepSpeed] override steps not dividing by {accelerator.num_processes}. steps for {args.max_train_epochs} epochs is / 指定エポックまでのステップ数: {args.max_train_steps}"
|
||||
)
|
||||
else:
|
||||
args.max_train_steps = args.max_train_epochs * math.ceil(
|
||||
len(train_dataloader) / accelerator.num_processes / args.gradient_accumulation_steps
|
||||
)
|
||||
accelerator.print(
|
||||
f"override steps. steps for {args.max_train_epochs} epochs is / 指定エポックまでのステップ数: {args.max_train_steps}"
|
||||
)
|
||||
args.max_train_steps = args.max_train_epochs * math.ceil(
|
||||
len(train_dataloader) / accelerator.num_processes / args.gradient_accumulation_steps
|
||||
)
|
||||
accelerator.print(
|
||||
f"override steps. steps for {args.max_train_epochs} epochs is / 指定エポックまでのステップ数: {args.max_train_steps}"
|
||||
)
|
||||
|
||||
# データセット側にも学習ステップを送信
|
||||
train_dataset_group.set_max_train_steps(args.max_train_steps)
|
||||
@@ -479,7 +471,8 @@ class NetworkTrainer:
|
||||
vae.to(accelerator.device, dtype=vae_dtype)
|
||||
|
||||
# 実験的機能:勾配も含めたfp16学習を行う PyTorchにパッチを当ててfp16でのgrad scaleを有効にする
|
||||
if args.full_fp16:
|
||||
if args.full_fp16 and not args.deepspeed:
|
||||
# During deepseed training, accelerate not handles fp16/bf16|mixed precision directly via scaler. Let deepspeed engine do.
|
||||
train_util.patch_accelerator_for_fp16_training(accelerator)
|
||||
|
||||
# resumeする
|
||||
|
||||
Reference in New Issue
Block a user