mirror of
https://github.com/easydiffusion/easydiffusion.git
synced 2025-03-30 18:26:27 +02:00
Speed up the model move, by using the earlier function to move modelCS and modelFS to the cpu
This commit is contained in:
parent
f7af259576
commit
c10e773401
@ -219,29 +219,36 @@ def unload_models():
|
||||
|
||||
gc()
|
||||
|
||||
def wait_model_move_to(model, target_device): # Send to target_device and wait until complete.
|
||||
if thread_data.device == target_device: return
|
||||
start_mem = torch.cuda.memory_allocated(thread_data.device) / 1e6
|
||||
if start_mem <= 0: return
|
||||
model_name = model.__class__.__name__
|
||||
print(f'Device {thread_data.device} - Sending model {model_name} to {target_device} | Memory transfer starting. Memory Used: {round(start_mem)}Mb')
|
||||
start_time = time.time()
|
||||
model.to(target_device)
|
||||
time_step = start_time
|
||||
WARNING_TIMEOUT = 1.5 # seconds - Show activity in console after timeout.
|
||||
last_mem = start_mem
|
||||
is_transfering = True
|
||||
while is_transfering:
|
||||
time.sleep(0.5) # 500ms
|
||||
mem = torch.cuda.memory_allocated(thread_data.device) / 1e6
|
||||
is_transfering = bool(mem > 0 and mem < last_mem) # still stuff loaded, but less than last time.
|
||||
last_mem = mem
|
||||
if not is_transfering:
|
||||
break;
|
||||
if time.time() - time_step > WARNING_TIMEOUT: # Long delay, print to console to show activity.
|
||||
print(f'Device {thread_data.device} - Waiting for Memory transfer. Memory Used: {round(mem)}Mb, Transfered: {round(start_mem - mem)}Mb')
|
||||
time_step = time.time()
|
||||
print(f'Device {thread_data.device} - {model_name} Moved: {round(start_mem - last_mem)}Mb in {round(time.time() - start_time, 3)} seconds to {target_device}')
|
||||
# def wait_model_move_to(model, target_device): # Send to target_device and wait until complete.
|
||||
# if thread_data.device == target_device: return
|
||||
# start_mem = torch.cuda.memory_allocated(thread_data.device) / 1e6
|
||||
# if start_mem <= 0: return
|
||||
# model_name = model.__class__.__name__
|
||||
# print(f'Device {thread_data.device} - Sending model {model_name} to {target_device} | Memory transfer starting. Memory Used: {round(start_mem)}Mb')
|
||||
# start_time = time.time()
|
||||
# model.to(target_device)
|
||||
# time_step = start_time
|
||||
# WARNING_TIMEOUT = 1.5 # seconds - Show activity in console after timeout.
|
||||
# last_mem = start_mem
|
||||
# is_transfering = True
|
||||
# while is_transfering:
|
||||
# time.sleep(0.5) # 500ms
|
||||
# mem = torch.cuda.memory_allocated(thread_data.device) / 1e6
|
||||
# is_transfering = bool(mem > 0 and mem < last_mem) # still stuff loaded, but less than last time.
|
||||
# last_mem = mem
|
||||
# if not is_transfering:
|
||||
# break;
|
||||
# if time.time() - time_step > WARNING_TIMEOUT: # Long delay, print to console to show activity.
|
||||
# print(f'Device {thread_data.device} - Waiting for Memory transfer. Memory Used: {round(mem)}Mb, Transfered: {round(start_mem - mem)}Mb')
|
||||
# time_step = time.time()
|
||||
# print(f'Device {thread_data.device} - {model_name} Moved: {round(start_mem - last_mem)}Mb in {round(time.time() - start_time, 3)} seconds to {target_device}')
|
||||
|
||||
def move_to_cpu(model):
|
||||
if thread_data.device != "cpu":
|
||||
mem = torch.cuda.memory_allocated() / 1e6
|
||||
model.to("cpu")
|
||||
while torch.cuda.memory_allocated() / 1e6 >= mem:
|
||||
time.sleep(1)
|
||||
|
||||
def load_model_gfpgan():
|
||||
if thread_data.gfpgan_file is None: raise ValueError(f'Thread gfpgan_file is undefined.')
|
||||
@ -484,7 +491,8 @@ def do_mk_img(req: Request):
|
||||
mask = mask.half()
|
||||
|
||||
# Send to CPU and wait until complete.
|
||||
wait_model_move_to(thread_data.modelFS, 'cpu')
|
||||
# wait_model_move_to(thread_data.modelFS, 'cpu')
|
||||
move_to_cpu(thread_data.modelFS)
|
||||
|
||||
assert 0. <= req.prompt_strength <= 1., 'can only work with strength in [0.0, 1.0]'
|
||||
t_enc = int(req.prompt_strength * req.num_inference_steps)
|
||||
@ -560,10 +568,6 @@ def do_mk_img(req: Request):
|
||||
img_data[i] = x_sample
|
||||
del x_samples, x_samples_ddim, x_sample
|
||||
|
||||
if thread_data.reduced_memory:
|
||||
# Send to CPU and wait until complete.
|
||||
wait_model_move_to(thread_data.modelFS, 'cpu')
|
||||
|
||||
print("saving images")
|
||||
for i in range(batch_size):
|
||||
img = Image.fromarray(img_data[i])
|
||||
@ -617,6 +621,7 @@ def do_mk_img(req: Request):
|
||||
|
||||
# if thread_data.reduced_memory:
|
||||
# unload_filters()
|
||||
move_to_cpu(thread_data.modelFS)
|
||||
del img_data
|
||||
gc()
|
||||
if thread_data.device != 'cpu':
|
||||
@ -656,7 +661,9 @@ def _txt2img(opt_W, opt_H, opt_n_samples, opt_ddim_steps, opt_scale, start_code,
|
||||
shape = [opt_n_samples, opt_C, opt_H // opt_f, opt_W // opt_f]
|
||||
|
||||
# Send to CPU and wait until complete.
|
||||
wait_model_move_to(thread_data.modelCS, 'cpu')
|
||||
# wait_model_move_to(thread_data.modelCS, 'cpu')
|
||||
|
||||
move_to_cpu(thread_data.modelCS)
|
||||
|
||||
if sampler_name == 'ddim':
|
||||
thread_data.model.make_schedule(ddim_num_steps=opt_ddim_steps, ddim_eta=opt_ddim_eta, verbose=False)
|
||||
|
Loading…
Reference in New Issue
Block a user