mirror of
https://github.com/easydiffusion/easydiffusion.git
synced 2024-11-26 02:05:09 +01:00
Update to the latest commit on basujindal's SD fork; More VRAM garbage-collection; Speed up live preview by displaying only every 5th step
This commit is contained in:
parent
f98225cdb6
commit
7b520942dc
@ -15,7 +15,7 @@
|
||||
|
||||
@call git reset --hard
|
||||
@call git pull
|
||||
@call git checkout d154155d4c0b43e13ec1f00eb72b7ff9d522fcf9
|
||||
@call git checkout f6cfebffa752ee11a7b07497b8529d5971de916c
|
||||
|
||||
@call git apply ..\ui\sd_internal\ddim_callback.patch
|
||||
|
||||
@ -32,7 +32,7 @@
|
||||
)
|
||||
|
||||
@cd stable-diffusion
|
||||
@call git checkout d154155d4c0b43e13ec1f00eb72b7ff9d522fcf9
|
||||
@call git checkout f6cfebffa752ee11a7b07497b8529d5971de916c
|
||||
|
||||
@call git apply ..\ui\sd_internal\ddim_callback.patch
|
||||
|
||||
|
@ -1,7 +1,16 @@
|
||||
diff --git a/optimizedSD/ddpm.py b/optimizedSD/ddpm.py
|
||||
index dcf7901..4028a70 100644
|
||||
index b967b55..75ddd8b 100644
|
||||
--- a/optimizedSD/ddpm.py
|
||||
+++ b/optimizedSD/ddpm.py
|
||||
@@ -22,7 +22,7 @@ from ldm.util import exists, default, instantiate_from_config
|
||||
from ldm.modules.diffusionmodules.util import make_beta_schedule
|
||||
from ldm.modules.diffusionmodules.util import make_ddim_sampling_parameters, make_ddim_timesteps, noise_like
|
||||
from ldm.modules.diffusionmodules.util import make_beta_schedule, extract_into_tensor, noise_like
|
||||
-from samplers import CompVisDenoiser, get_ancestral_step, to_d, append_dims,linear_multistep_coeff
|
||||
+from .samplers import CompVisDenoiser, get_ancestral_step, to_d, append_dims,linear_multistep_coeff
|
||||
|
||||
def disabled_train(self):
|
||||
"""Overwrite model.train with this function to make sure train/eval mode
|
||||
@@ -485,6 +485,7 @@ class UNet(DDPM):
|
||||
log_every_t=100,
|
||||
unconditional_guidance_scale=1.,
|
||||
@ -25,11 +34,11 @@ index dcf7901..4028a70 100644
|
||||
+ callback=callback, img_callback=img_callback,
|
||||
+ streaming_callbacks=streaming_callbacks)
|
||||
|
||||
# elif sampler == "euler":
|
||||
# cvd = CompVisDenoiser(self.alphas_cumprod)
|
||||
@@ -536,11 +540,15 @@ class UNet(DDPM):
|
||||
# samples = self.heun_sampling(noise, sig, conditioning, unconditional_conditioning=unconditional_conditioning,
|
||||
# unconditional_guidance_scale=unconditional_guidance_scale)
|
||||
elif sampler == "euler":
|
||||
self.make_schedule(ddim_num_steps=S, ddim_eta=eta, verbose=False)
|
||||
@@ -555,11 +559,15 @@ class UNet(DDPM):
|
||||
samples = self.lms_sampling(self.alphas_cumprod,x_latent, S, conditioning, unconditional_conditioning=unconditional_conditioning,
|
||||
unconditional_guidance_scale=unconditional_guidance_scale)
|
||||
|
||||
+ if streaming_callbacks: # this line needs to be right after the sampling() call
|
||||
+ yield from samples
|
||||
@ -44,7 +53,7 @@ index dcf7901..4028a70 100644
|
||||
|
||||
@torch.no_grad()
|
||||
def plms_sampling(self, cond,b, img,
|
||||
@@ -548,7 +556,8 @@ class UNet(DDPM):
|
||||
@@ -567,7 +575,8 @@ class UNet(DDPM):
|
||||
callback=None, quantize_denoised=False,
|
||||
mask=None, x0=None, img_callback=None, log_every_t=100,
|
||||
temperature=1., noise_dropout=0., score_corrector=None, corrector_kwargs=None,
|
||||
@ -54,13 +63,13 @@ index dcf7901..4028a70 100644
|
||||
|
||||
device = self.betas.device
|
||||
timesteps = self.ddim_timesteps
|
||||
@@ -580,10 +589,22 @@ class UNet(DDPM):
|
||||
@@ -599,10 +608,21 @@ class UNet(DDPM):
|
||||
old_eps.append(e_t)
|
||||
if len(old_eps) >= 4:
|
||||
old_eps.pop(0)
|
||||
- if callback: callback(i)
|
||||
- if img_callback: img_callback(pred_x0, i)
|
||||
|
||||
-
|
||||
- return img
|
||||
+ if callback:
|
||||
+ if streaming_callbacks:
|
||||
@ -80,7 +89,7 @@ index dcf7901..4028a70 100644
|
||||
|
||||
@torch.no_grad()
|
||||
def p_sample_plms(self, x, c, t, index, repeat_noise=False, use_original_steps=False, quantize_denoised=False,
|
||||
@@ -687,7 +708,9 @@ class UNet(DDPM):
|
||||
@@ -706,7 +726,9 @@ class UNet(DDPM):
|
||||
|
||||
@torch.no_grad()
|
||||
def ddim_sampling(self, x_latent, cond, t_start, unconditional_guidance_scale=1.0, unconditional_conditioning=None,
|
||||
@ -91,11 +100,10 @@ index dcf7901..4028a70 100644
|
||||
|
||||
timesteps = self.ddim_timesteps
|
||||
timesteps = timesteps[:t_start]
|
||||
@@ -710,11 +733,25 @@ class UNet(DDPM):
|
||||
x_dec = self.p_sample_ddim(x_dec, cond, ts, index=index, use_original_steps=use_original_steps,
|
||||
@@ -730,10 +752,24 @@ class UNet(DDPM):
|
||||
unconditional_guidance_scale=unconditional_guidance_scale,
|
||||
unconditional_conditioning=unconditional_conditioning)
|
||||
+
|
||||
|
||||
+ if callback:
|
||||
+ if streaming_callbacks:
|
||||
+ yield from callback(i)
|
||||
@ -106,7 +114,7 @@ index dcf7901..4028a70 100644
|
||||
+ yield from img_callback(x_dec, i)
|
||||
+ else:
|
||||
+ img_callback(x_dec, i)
|
||||
|
||||
+
|
||||
if mask is not None:
|
||||
- return x0 * mask + (1. - mask) * x_dec
|
||||
+ x_dec = x0 * mask + (1. - mask) * x_dec
|
||||
@ -119,3 +127,16 @@ index dcf7901..4028a70 100644
|
||||
|
||||
|
||||
@torch.no_grad()
|
||||
diff --git a/optimizedSD/openaimodelSplit.py b/optimizedSD/openaimodelSplit.py
|
||||
index abc3098..7a32ffe 100644
|
||||
--- a/optimizedSD/openaimodelSplit.py
|
||||
+++ b/optimizedSD/openaimodelSplit.py
|
||||
@@ -13,7 +13,7 @@ from ldm.modules.diffusionmodules.util import (
|
||||
normalization,
|
||||
timestep_embedding,
|
||||
)
|
||||
-from splitAttention import SpatialTransformer
|
||||
+from .splitAttention import SpatialTransformer
|
||||
|
||||
|
||||
class AttentionPool2d(nn.Module):
|
||||
|
@ -193,6 +193,15 @@ def mk_img(req: Request):
|
||||
|
||||
gc()
|
||||
|
||||
if device != "cpu":
|
||||
modelFS.to("cpu")
|
||||
modelCS.to("cpu")
|
||||
|
||||
model.model1.to("cpu")
|
||||
model.model2.to("cpu")
|
||||
|
||||
gc()
|
||||
|
||||
yield json.dumps({
|
||||
"status": 'failed',
|
||||
"detail": str(e)
|
||||
@ -312,11 +321,7 @@ def do_mk_img(req: Request):
|
||||
if device != "cpu" and precision == "autocast":
|
||||
mask = mask.half()
|
||||
|
||||
if device != "cpu":
|
||||
mem = torch.cuda.memory_allocated() / 1e6
|
||||
modelFS.to("cpu")
|
||||
while torch.cuda.memory_allocated() / 1e6 >= mem:
|
||||
time.sleep(1)
|
||||
move_fs_to_cpu()
|
||||
|
||||
assert 0. <= opt_strength <= 1., 'can only work with strength in [0.0, 1.0]'
|
||||
t_enc = int(opt_strength * opt_ddim_steps)
|
||||
@ -365,7 +370,7 @@ def do_mk_img(req: Request):
|
||||
if req.stream_progress_updates:
|
||||
progress = {"step": i, "total_steps": opt_ddim_steps}
|
||||
|
||||
if req.stream_image_progress:
|
||||
if req.stream_image_progress and i % 5 == 0:
|
||||
partial_images = []
|
||||
|
||||
for i in range(batch_size):
|
||||
@ -484,12 +489,8 @@ def do_mk_img(req: Request):
|
||||
seeds += str(opt_seed) + ","
|
||||
opt_seed += 1
|
||||
|
||||
move_fs_to_cpu()
|
||||
gc()
|
||||
if device != "cpu":
|
||||
mem = torch.cuda.memory_allocated() / 1e6
|
||||
modelFS.to("cpu")
|
||||
while torch.cuda.memory_allocated() / 1e6 >= mem:
|
||||
time.sleep(1)
|
||||
del x_samples, x_samples_ddim, x_sample
|
||||
print("memory_final = ", torch.cuda.memory_allocated() / 1e6)
|
||||
|
||||
@ -575,6 +576,13 @@ def _img2img(init_latent, t_enc, batch_size, opt_scale, c, uc, opt_ddim_steps, o
|
||||
else:
|
||||
return samples_ddim
|
||||
|
||||
def move_fs_to_cpu():
|
||||
if device != "cpu":
|
||||
mem = torch.cuda.memory_allocated() / 1e6
|
||||
modelFS.to("cpu")
|
||||
while torch.cuda.memory_allocated() / 1e6 >= mem:
|
||||
time.sleep(1)
|
||||
|
||||
def gc():
|
||||
if device == 'cpu':
|
||||
return
|
||||
|
Loading…
Reference in New Issue
Block a user