mirror of
https://github.com/easydiffusion/easydiffusion.git
synced 2025-02-18 11:20:58 +01:00
Update to the latest commit on basujindal's SD fork; More VRAM garbage-collection; Speed up live preview by displaying only every 5th step
This commit is contained in:
parent
f98225cdb6
commit
7b520942dc
@ -15,7 +15,7 @@
|
|||||||
|
|
||||||
@call git reset --hard
|
@call git reset --hard
|
||||||
@call git pull
|
@call git pull
|
||||||
@call git checkout d154155d4c0b43e13ec1f00eb72b7ff9d522fcf9
|
@call git checkout f6cfebffa752ee11a7b07497b8529d5971de916c
|
||||||
|
|
||||||
@call git apply ..\ui\sd_internal\ddim_callback.patch
|
@call git apply ..\ui\sd_internal\ddim_callback.patch
|
||||||
|
|
||||||
@ -32,7 +32,7 @@
|
|||||||
)
|
)
|
||||||
|
|
||||||
@cd stable-diffusion
|
@cd stable-diffusion
|
||||||
@call git checkout d154155d4c0b43e13ec1f00eb72b7ff9d522fcf9
|
@call git checkout f6cfebffa752ee11a7b07497b8529d5971de916c
|
||||||
|
|
||||||
@call git apply ..\ui\sd_internal\ddim_callback.patch
|
@call git apply ..\ui\sd_internal\ddim_callback.patch
|
||||||
|
|
||||||
|
@ -1,7 +1,16 @@
|
|||||||
diff --git a/optimizedSD/ddpm.py b/optimizedSD/ddpm.py
|
diff --git a/optimizedSD/ddpm.py b/optimizedSD/ddpm.py
|
||||||
index dcf7901..4028a70 100644
|
index b967b55..75ddd8b 100644
|
||||||
--- a/optimizedSD/ddpm.py
|
--- a/optimizedSD/ddpm.py
|
||||||
+++ b/optimizedSD/ddpm.py
|
+++ b/optimizedSD/ddpm.py
|
||||||
|
@@ -22,7 +22,7 @@ from ldm.util import exists, default, instantiate_from_config
|
||||||
|
from ldm.modules.diffusionmodules.util import make_beta_schedule
|
||||||
|
from ldm.modules.diffusionmodules.util import make_ddim_sampling_parameters, make_ddim_timesteps, noise_like
|
||||||
|
from ldm.modules.diffusionmodules.util import make_beta_schedule, extract_into_tensor, noise_like
|
||||||
|
-from samplers import CompVisDenoiser, get_ancestral_step, to_d, append_dims,linear_multistep_coeff
|
||||||
|
+from .samplers import CompVisDenoiser, get_ancestral_step, to_d, append_dims,linear_multistep_coeff
|
||||||
|
|
||||||
|
def disabled_train(self):
|
||||||
|
"""Overwrite model.train with this function to make sure train/eval mode
|
||||||
@@ -485,6 +485,7 @@ class UNet(DDPM):
|
@@ -485,6 +485,7 @@ class UNet(DDPM):
|
||||||
log_every_t=100,
|
log_every_t=100,
|
||||||
unconditional_guidance_scale=1.,
|
unconditional_guidance_scale=1.,
|
||||||
@ -25,11 +34,11 @@ index dcf7901..4028a70 100644
|
|||||||
+ callback=callback, img_callback=img_callback,
|
+ callback=callback, img_callback=img_callback,
|
||||||
+ streaming_callbacks=streaming_callbacks)
|
+ streaming_callbacks=streaming_callbacks)
|
||||||
|
|
||||||
# elif sampler == "euler":
|
elif sampler == "euler":
|
||||||
# cvd = CompVisDenoiser(self.alphas_cumprod)
|
self.make_schedule(ddim_num_steps=S, ddim_eta=eta, verbose=False)
|
||||||
@@ -536,11 +540,15 @@ class UNet(DDPM):
|
@@ -555,11 +559,15 @@ class UNet(DDPM):
|
||||||
# samples = self.heun_sampling(noise, sig, conditioning, unconditional_conditioning=unconditional_conditioning,
|
samples = self.lms_sampling(self.alphas_cumprod,x_latent, S, conditioning, unconditional_conditioning=unconditional_conditioning,
|
||||||
# unconditional_guidance_scale=unconditional_guidance_scale)
|
unconditional_guidance_scale=unconditional_guidance_scale)
|
||||||
|
|
||||||
+ if streaming_callbacks: # this line needs to be right after the sampling() call
|
+ if streaming_callbacks: # this line needs to be right after the sampling() call
|
||||||
+ yield from samples
|
+ yield from samples
|
||||||
@ -44,7 +53,7 @@ index dcf7901..4028a70 100644
|
|||||||
|
|
||||||
@torch.no_grad()
|
@torch.no_grad()
|
||||||
def plms_sampling(self, cond,b, img,
|
def plms_sampling(self, cond,b, img,
|
||||||
@@ -548,7 +556,8 @@ class UNet(DDPM):
|
@@ -567,7 +575,8 @@ class UNet(DDPM):
|
||||||
callback=None, quantize_denoised=False,
|
callback=None, quantize_denoised=False,
|
||||||
mask=None, x0=None, img_callback=None, log_every_t=100,
|
mask=None, x0=None, img_callback=None, log_every_t=100,
|
||||||
temperature=1., noise_dropout=0., score_corrector=None, corrector_kwargs=None,
|
temperature=1., noise_dropout=0., score_corrector=None, corrector_kwargs=None,
|
||||||
@ -54,13 +63,13 @@ index dcf7901..4028a70 100644
|
|||||||
|
|
||||||
device = self.betas.device
|
device = self.betas.device
|
||||||
timesteps = self.ddim_timesteps
|
timesteps = self.ddim_timesteps
|
||||||
@@ -580,10 +589,22 @@ class UNet(DDPM):
|
@@ -599,10 +608,21 @@ class UNet(DDPM):
|
||||||
old_eps.append(e_t)
|
old_eps.append(e_t)
|
||||||
if len(old_eps) >= 4:
|
if len(old_eps) >= 4:
|
||||||
old_eps.pop(0)
|
old_eps.pop(0)
|
||||||
- if callback: callback(i)
|
- if callback: callback(i)
|
||||||
- if img_callback: img_callback(pred_x0, i)
|
- if img_callback: img_callback(pred_x0, i)
|
||||||
|
-
|
||||||
- return img
|
- return img
|
||||||
+ if callback:
|
+ if callback:
|
||||||
+ if streaming_callbacks:
|
+ if streaming_callbacks:
|
||||||
@ -80,7 +89,7 @@ index dcf7901..4028a70 100644
|
|||||||
|
|
||||||
@torch.no_grad()
|
@torch.no_grad()
|
||||||
def p_sample_plms(self, x, c, t, index, repeat_noise=False, use_original_steps=False, quantize_denoised=False,
|
def p_sample_plms(self, x, c, t, index, repeat_noise=False, use_original_steps=False, quantize_denoised=False,
|
||||||
@@ -687,7 +708,9 @@ class UNet(DDPM):
|
@@ -706,7 +726,9 @@ class UNet(DDPM):
|
||||||
|
|
||||||
@torch.no_grad()
|
@torch.no_grad()
|
||||||
def ddim_sampling(self, x_latent, cond, t_start, unconditional_guidance_scale=1.0, unconditional_conditioning=None,
|
def ddim_sampling(self, x_latent, cond, t_start, unconditional_guidance_scale=1.0, unconditional_conditioning=None,
|
||||||
@ -91,11 +100,10 @@ index dcf7901..4028a70 100644
|
|||||||
|
|
||||||
timesteps = self.ddim_timesteps
|
timesteps = self.ddim_timesteps
|
||||||
timesteps = timesteps[:t_start]
|
timesteps = timesteps[:t_start]
|
||||||
@@ -710,11 +733,25 @@ class UNet(DDPM):
|
@@ -730,10 +752,24 @@ class UNet(DDPM):
|
||||||
x_dec = self.p_sample_ddim(x_dec, cond, ts, index=index, use_original_steps=use_original_steps,
|
|
||||||
unconditional_guidance_scale=unconditional_guidance_scale,
|
unconditional_guidance_scale=unconditional_guidance_scale,
|
||||||
unconditional_conditioning=unconditional_conditioning)
|
unconditional_conditioning=unconditional_conditioning)
|
||||||
+
|
|
||||||
+ if callback:
|
+ if callback:
|
||||||
+ if streaming_callbacks:
|
+ if streaming_callbacks:
|
||||||
+ yield from callback(i)
|
+ yield from callback(i)
|
||||||
@ -106,7 +114,7 @@ index dcf7901..4028a70 100644
|
|||||||
+ yield from img_callback(x_dec, i)
|
+ yield from img_callback(x_dec, i)
|
||||||
+ else:
|
+ else:
|
||||||
+ img_callback(x_dec, i)
|
+ img_callback(x_dec, i)
|
||||||
|
+
|
||||||
if mask is not None:
|
if mask is not None:
|
||||||
- return x0 * mask + (1. - mask) * x_dec
|
- return x0 * mask + (1. - mask) * x_dec
|
||||||
+ x_dec = x0 * mask + (1. - mask) * x_dec
|
+ x_dec = x0 * mask + (1. - mask) * x_dec
|
||||||
@ -119,3 +127,16 @@ index dcf7901..4028a70 100644
|
|||||||
|
|
||||||
|
|
||||||
@torch.no_grad()
|
@torch.no_grad()
|
||||||
|
diff --git a/optimizedSD/openaimodelSplit.py b/optimizedSD/openaimodelSplit.py
|
||||||
|
index abc3098..7a32ffe 100644
|
||||||
|
--- a/optimizedSD/openaimodelSplit.py
|
||||||
|
+++ b/optimizedSD/openaimodelSplit.py
|
||||||
|
@@ -13,7 +13,7 @@ from ldm.modules.diffusionmodules.util import (
|
||||||
|
normalization,
|
||||||
|
timestep_embedding,
|
||||||
|
)
|
||||||
|
-from splitAttention import SpatialTransformer
|
||||||
|
+from .splitAttention import SpatialTransformer
|
||||||
|
|
||||||
|
|
||||||
|
class AttentionPool2d(nn.Module):
|
||||||
|
@ -193,6 +193,15 @@ def mk_img(req: Request):
|
|||||||
|
|
||||||
gc()
|
gc()
|
||||||
|
|
||||||
|
if device != "cpu":
|
||||||
|
modelFS.to("cpu")
|
||||||
|
modelCS.to("cpu")
|
||||||
|
|
||||||
|
model.model1.to("cpu")
|
||||||
|
model.model2.to("cpu")
|
||||||
|
|
||||||
|
gc()
|
||||||
|
|
||||||
yield json.dumps({
|
yield json.dumps({
|
||||||
"status": 'failed',
|
"status": 'failed',
|
||||||
"detail": str(e)
|
"detail": str(e)
|
||||||
@ -312,11 +321,7 @@ def do_mk_img(req: Request):
|
|||||||
if device != "cpu" and precision == "autocast":
|
if device != "cpu" and precision == "autocast":
|
||||||
mask = mask.half()
|
mask = mask.half()
|
||||||
|
|
||||||
if device != "cpu":
|
move_fs_to_cpu()
|
||||||
mem = torch.cuda.memory_allocated() / 1e6
|
|
||||||
modelFS.to("cpu")
|
|
||||||
while torch.cuda.memory_allocated() / 1e6 >= mem:
|
|
||||||
time.sleep(1)
|
|
||||||
|
|
||||||
assert 0. <= opt_strength <= 1., 'can only work with strength in [0.0, 1.0]'
|
assert 0. <= opt_strength <= 1., 'can only work with strength in [0.0, 1.0]'
|
||||||
t_enc = int(opt_strength * opt_ddim_steps)
|
t_enc = int(opt_strength * opt_ddim_steps)
|
||||||
@ -365,7 +370,7 @@ def do_mk_img(req: Request):
|
|||||||
if req.stream_progress_updates:
|
if req.stream_progress_updates:
|
||||||
progress = {"step": i, "total_steps": opt_ddim_steps}
|
progress = {"step": i, "total_steps": opt_ddim_steps}
|
||||||
|
|
||||||
if req.stream_image_progress:
|
if req.stream_image_progress and i % 5 == 0:
|
||||||
partial_images = []
|
partial_images = []
|
||||||
|
|
||||||
for i in range(batch_size):
|
for i in range(batch_size):
|
||||||
@ -484,12 +489,8 @@ def do_mk_img(req: Request):
|
|||||||
seeds += str(opt_seed) + ","
|
seeds += str(opt_seed) + ","
|
||||||
opt_seed += 1
|
opt_seed += 1
|
||||||
|
|
||||||
|
move_fs_to_cpu()
|
||||||
gc()
|
gc()
|
||||||
if device != "cpu":
|
|
||||||
mem = torch.cuda.memory_allocated() / 1e6
|
|
||||||
modelFS.to("cpu")
|
|
||||||
while torch.cuda.memory_allocated() / 1e6 >= mem:
|
|
||||||
time.sleep(1)
|
|
||||||
del x_samples, x_samples_ddim, x_sample
|
del x_samples, x_samples_ddim, x_sample
|
||||||
print("memory_final = ", torch.cuda.memory_allocated() / 1e6)
|
print("memory_final = ", torch.cuda.memory_allocated() / 1e6)
|
||||||
|
|
||||||
@ -575,6 +576,13 @@ def _img2img(init_latent, t_enc, batch_size, opt_scale, c, uc, opt_ddim_steps, o
|
|||||||
else:
|
else:
|
||||||
return samples_ddim
|
return samples_ddim
|
||||||
|
|
||||||
|
def move_fs_to_cpu():
|
||||||
|
if device != "cpu":
|
||||||
|
mem = torch.cuda.memory_allocated() / 1e6
|
||||||
|
modelFS.to("cpu")
|
||||||
|
while torch.cuda.memory_allocated() / 1e6 >= mem:
|
||||||
|
time.sleep(1)
|
||||||
|
|
||||||
def gc():
|
def gc():
|
||||||
if device == 'cpu':
|
if device == 'cpu':
|
||||||
return
|
return
|
||||||
|
Loading…
Reference in New Issue
Block a user