Update to the latest commit on basujindal's SD fork; More VRAM garbage-collection; Speed up live preview by displaying only every 5th step

This commit is contained in:
cmdr2
2022-09-22 22:14:25 +05:30
parent f98225cdb6
commit 7b520942dc
3 changed files with 56 additions and 27 deletions

View File

@ -193,6 +193,15 @@ def mk_img(req: Request):
gc()
if device != "cpu":
modelFS.to("cpu")
modelCS.to("cpu")
model.model1.to("cpu")
model.model2.to("cpu")
gc()
yield json.dumps({
"status": 'failed',
"detail": str(e)
@ -312,11 +321,7 @@ def do_mk_img(req: Request):
if device != "cpu" and precision == "autocast":
mask = mask.half()
if device != "cpu":
mem = torch.cuda.memory_allocated() / 1e6
modelFS.to("cpu")
while torch.cuda.memory_allocated() / 1e6 >= mem:
time.sleep(1)
move_fs_to_cpu()
assert 0. <= opt_strength <= 1., 'can only work with strength in [0.0, 1.0]'
t_enc = int(opt_strength * opt_ddim_steps)
@ -365,7 +370,7 @@ def do_mk_img(req: Request):
if req.stream_progress_updates:
progress = {"step": i, "total_steps": opt_ddim_steps}
if req.stream_image_progress:
if req.stream_image_progress and i % 5 == 0:
partial_images = []
for i in range(batch_size):
@ -484,12 +489,8 @@ def do_mk_img(req: Request):
seeds += str(opt_seed) + ","
opt_seed += 1
move_fs_to_cpu()
gc()
if device != "cpu":
mem = torch.cuda.memory_allocated() / 1e6
modelFS.to("cpu")
while torch.cuda.memory_allocated() / 1e6 >= mem:
time.sleep(1)
del x_samples, x_samples_ddim, x_sample
print("memory_final = ", torch.cuda.memory_allocated() / 1e6)
@ -575,6 +576,13 @@ def _img2img(init_latent, t_enc, batch_size, opt_scale, c, uc, opt_ddim_steps, o
else:
return samples_ddim
def move_fs_to_cpu():
if device != "cpu":
mem = torch.cuda.memory_allocated() / 1e6
modelFS.to("cpu")
while torch.cuda.memory_allocated() / 1e6 >= mem:
time.sleep(1)
def gc():
if device == 'cpu':
return