From 7d2a9268b9d8d3c9b78068aaa2f9d43eb8b6101b Mon Sep 17 00:00:00 2001 From: BootsofLagrangian Date: Mon, 5 Feb 2024 22:42:06 +0900 Subject: [PATCH] apply offloading method runable for all trainer --- fine_tune.py | 5 +++++ train_db.py | 5 +++++ train_network.py | 5 +++++ 3 files changed, 15 insertions(+) diff --git a/fine_tune.py b/fine_tune.py index f901ee64..85febeaa 100644 --- a/fine_tune.py +++ b/fine_tune.py @@ -251,6 +251,11 @@ def train(args): if args.deepspeed: # wrapping model + import deepspeed + if args.offload_optimizer_device is not None: + accelerator.print('[DeepSpeed] start to manually build cpu_adam.') + deepspeed.ops.op_builder.CPUAdamBuilder().load() + accelerator.print('[DeepSpeed] building cpu_adam done.') class DeepSpeedModel(torch.nn.Module): def __init__(self, unet, text_encoder) -> None: super().__init__() diff --git a/train_db.py b/train_db.py index fa7f6a8d..e2661886 100644 --- a/train_db.py +++ b/train_db.py @@ -224,6 +224,11 @@ def train(args): # acceleratorがなんかよろしくやってくれるらしい if args.deepspeed: # wrapping model + import deepspeed + if args.offload_optimizer_device is not None: + accelerator.print('[DeepSpeed] start to manually build cpu_adam.') + deepspeed.ops.op_builder.CPUAdamBuilder().load() + accelerator.print('[DeepSpeed] building cpu_adam done.') class DeepSpeedModel(torch.nn.Module): def __init__(self, unet, text_encoder) -> None: super().__init__() diff --git a/train_network.py b/train_network.py index bbda427a..050a6511 100644 --- a/train_network.py +++ b/train_network.py @@ -419,6 +419,11 @@ class NetworkTrainer: # acceleratorがなんかよろしくやってくれるらしい / accelerator will do something good if args.deepspeed: # wrapping model + import deepspeed + if args.offload_optimizer_device is not None: + accelerator.print('[DeepSpeed] start to manually build cpu_adam.') + deepspeed.ops.op_builder.CPUAdamBuilder().load() + accelerator.print('[DeepSpeed] building cpu_adam done.') class DeepSpeedModel(torch.nn.Module): def __init__(self, unet, text_encoder, network) -> None: super().__init__()