Initial version that works with the lstein fork. The only things not working are: CPU mode, streaming updates (live and progress bar), Turbo Mode, and keeps the model in VRAM instead of RAM

2025-08-12 09:19:24 +02:00 · 2022-09-29 20:27:46 +05:30
parent 196649c0e9
commit 50cce36d94
7 changed files with 248 additions and 494 deletions
--- a/scripts/on_sd_start.bat
+++ b/scripts/on_sd_start.bat
@ -15,16 +15,17 @@
    @call git reset --hard
    @call git pull
-    @call git checkout f6cfebffa752ee11a7b07497b8529d5971de916c
+    @call git checkout d87bd29a6862996d8a0980c1343b6f0d4eb718b4
-    @call git apply ..\ui\sd_internal\ddim_callback.patch
+    @REM @call git apply ..\ui\sd_internal\ddim_callback.patch
-    @call git apply ..\ui\sd_internal\env_yaml.patch
+    @REM @call git apply ..\ui\sd_internal\env_yaml.patch
    @call git apply ..\ui\sd_internal\custom_sd.patch
    @cd ..
 ) else (
    @echo. & echo "Downloading Stable Diffusion.." & echo.
-    @call git clone https://github.com/basujindal/stable-diffusion.git && (
+    @call git clone https://github.com/invoke-ai/InvokeAI.git stable-diffusion && (
        @echo sd_git_cloned >> scripts\install_status.txt
    ) || (
        @echo "Error downloading Stable Diffusion. Sorry about that, please try to:" & echo "  1. Run this installer again." & echo "  2. If that doesn't fix it, please try the common troubleshooting steps at https://github.com/cmdr2/stable-diffusion-ui/blob/main/Troubleshooting.md" & echo "  3. If those steps don't help, please copy *all* the error messages in this window, and ask the community at https://discord.com/invite/u9yhsFmEkB" & echo "  4. If that doesn't solve the problem, please file an issue at https://github.com/cmdr2/stable-diffusion-ui/issues" & echo "Thanks!"
@ -33,10 +34,11 @@
    )
    @cd stable-diffusion
-    @call git checkout f6cfebffa752ee11a7b07497b8529d5971de916c
+    @call git checkout d87bd29a6862996d8a0980c1343b6f0d4eb718b4
-    @call git apply ..\ui\sd_internal\ddim_callback.patch
+    @REM @call git apply ..\ui\sd_internal\ddim_callback.patch
-    @call git apply ..\ui\sd_internal\env_yaml.patch
+    @REM @call git apply ..\ui\sd_internal\env_yaml.patch
    @call git apply ..\ui\sd_internal\custom_sd.patch
    @cd ..
 )
@ -81,58 +83,6 @@
 set PATH=C:\Windows\System32;%PATH%
@>nul grep -c "conda_sd_gfpgan_deps_installed" ..\scripts\install_status.txt
@if "%ERRORLEVEL%" EQU "0" (
    @echo "Packages necessary for GFPGAN (Face Correction) were already installed"
 ) else (
    @echo. & echo "Downloading packages necessary for GFPGAN (Face Correction).." & echo.
    @set PYTHONNOUSERSITE=1
    @call pip install -e git+https://github.com/TencentARC/GFPGAN#egg=GFPGAN || (
        @echo. & echo "Error installing the packages necessary for GFPGAN (Face Correction). Sorry about that, please try to:" & echo "  1. Run this installer again." & echo "  2. If that doesn't fix it, please try the common troubleshooting steps at https://github.com/cmdr2/stable-diffusion-ui/blob/main/Troubleshooting.md" & echo "  3. If those steps don't help, please copy *all* the error messages in this window, and ask the community at https://discord.com/invite/u9yhsFmEkB" & echo "  4. If that doesn't solve the problem, please file an issue at https://github.com/cmdr2/stable-diffusion-ui/issues" & echo "Thanks!" & echo.
        pause
        exit /b
    )
    @call pip install basicsr==1.4.2 || (
        @echo. & echo "Error installing the basicsr package necessary for GFPGAN (Face Correction). Sorry about that, please try to:" & echo "  1. Run this installer again." & echo "  2. If that doesn't fix it, please try the common troubleshooting steps at https://github.com/cmdr2/stable-diffusion-ui/blob/main/Troubleshooting.md" & echo "  3. If those steps don't help, please copy *all* the error messages in this window, and ask the community at https://discord.com/invite/u9yhsFmEkB" & echo "  4. If that doesn't solve the problem, please file an issue at https://github.com/cmdr2/stable-diffusion-ui/issues" & echo "Thanks!" & echo.
        pause
        exit /b
    )
    for /f "tokens=*" %%a in ('python -c "from gfpgan import GFPGANer; print(42)"') do if "%%a" NEQ "42" (
        @echo. & echo "Dependency test failed! Error installing the packages necessary for GFPGAN (Face Correction). Sorry about that, please try to:" & echo "  1. Run this installer again." & echo "  2. If that doesn't fix it, please try the common troubleshooting steps at https://github.com/cmdr2/stable-diffusion-ui/blob/main/Troubleshooting.md" & echo "  3. If those steps don't help, please copy *all* the error messages in this window, and ask the community at https://discord.com/invite/u9yhsFmEkB" & echo "  4. If that doesn't solve the problem, please file an issue at https://github.com/cmdr2/stable-diffusion-ui/issues" & echo "Thanks!" & echo.
        pause
        exit /b
    )
    @echo conda_sd_gfpgan_deps_installed >> ..\scripts\install_status.txt
 )
@>nul grep -c "conda_sd_esrgan_deps_installed" ..\scripts\install_status.txt
@if "%ERRORLEVEL%" EQU "0" (
    @echo "Packages necessary for ESRGAN (Resolution Upscaling) were already installed"
 ) else (
    @echo. & echo "Downloading packages necessary for ESRGAN (Resolution Upscaling).." & echo.
    @set PYTHONNOUSERSITE=1
    @call pip install -e git+https://github.com/xinntao/Real-ESRGAN#egg=realesrgan || (
        @echo. & echo "Error installing the packages necessary for ESRGAN (Resolution Upscaling). Sorry about that, please try to:" & echo "  1. Run this installer again." & echo "  2. If that doesn't fix it, please try the common troubleshooting steps at https://github.com/cmdr2/stable-diffusion-ui/blob/main/Troubleshooting.md" & echo "  3. If those steps don't help, please copy *all* the error messages in this window, and ask the community at https://discord.com/invite/u9yhsFmEkB" & echo "  4. If that doesn't solve the problem, please file an issue at https://github.com/cmdr2/stable-diffusion-ui/issues" & echo "Thanks!" & echo.
        pause
        exit /b
    )
    for /f "tokens=*" %%a in ('python -c "from basicsr.archs.rrdbnet_arch import RRDBNet; from realesrgan import RealESRGANer; print(42)"') do if "%%a" NEQ "42" (
        @echo. & echo "Dependency test failed! Error installing the packages necessary for ESRGAN (Resolution Upscaling). Sorry about that, please try to:" & echo "  1. Run this installer again." & echo "  2. If that doesn't fix it, please try the common troubleshooting steps at https://github.com/cmdr2/stable-diffusion-ui/blob/main/Troubleshooting.md" & echo "  3. If those steps don't help, please copy *all* the error messages in this window, and ask the community at https://discord.com/invite/u9yhsFmEkB" & echo "  4. If that doesn't solve the problem, please file an issue at https://github.com/cmdr2/stable-diffusion-ui/issues" & echo "Thanks!" & echo.
        pause
        exit /b
    )
    @echo conda_sd_esrgan_deps_installed >> ..\scripts\install_status.txt
 )
@>nul grep -c "conda_sd_ui_deps_installed" ..\scripts\install_status.txt
@if "%ERRORLEVEL%" EQU "0" (
    echo "Packages necessary for Stable Diffusion UI were already installed"
--- a/ui/index.html
+++ b/ui/index.html
@ -15,7 +15,7 @@
 <div id="container">
    <div id="top-nav">
        <div id="logo">
-            <h1>Stable Diffusion UI <small>v2.195 <span id="updateBranchLabel"></span></small></h1>
+            <h1>Stable Diffusion UI <small>v2.2 <span id="updateBranchLabel"></span></small></h1>
        </div>
        <ul id="top-nav-items">
            <li class="dropdown">
--- a/ui/media/main.js
+++ b/ui/media/main.js
@ -571,8 +571,10 @@ async function checkTasks() {
        // setStatus('request', 'done', 'success')
    } else {
        if (task.outputMsg.innerText.toLowerCase().indexOf('error') === -1) {
            task.outputMsg.innerText = 'Task ended after ' + time + ' seconds'
        }
    }
    if (randomSeedField.checked) {
        seedField.value = task.seed
--- a/ui/sd_internal/init.py
+++ b/ui/sd_internal/init.py
@ -23,6 +23,7 @@ class Request:
    use_face_correction: str = None # or "GFPGANv1.3"
    use_upscale: str = None # or "RealESRGAN_x4plus" or "RealESRGAN_x4plus_anime_6B"
    show_only_filtered_image: bool = False
    output_format: str = "jpeg" # "png", "jpeg"
    stream_progress_updates: bool = False
    stream_image_progress: bool = False
@ -42,6 +43,7 @@ class Request:
            "sampler": self.sampler,
            "use_face_correction": self.use_face_correction,
            "use_upscale": self.use_upscale,
            "output_format": self.output_format,
        }
    def to_string(self):
@ -63,6 +65,7 @@ class Request:
    use_face_correction: {self.use_face_correction}
    use_upscale: {self.use_upscale}
    show_only_filtered_image: {self.show_only_filtered_image}
    output_format: {self.output_format}
    stream_progress_updates: {self.stream_progress_updates}
    stream_image_progress: {self.stream_image_progress}'''
--- a/ui/sd_internal/custom_sd.patch
+++ b/ui/sd_internal/custom_sd.patch
@ -0,0 +1,46 @@
 diff --git a/ldm/dream/conditioning.py b/ldm/dream/conditioning.py
 index dfa1089..e4908ad 100644
 --- a/ldm/dream/conditioning.py
 +++ b/ldm/dream/conditioning.py
@@ -12,8 +12,8 @@ log_tokenization()              print out colour-coded tokens and warn if trunca
 import re
 import torch
 -def get_uc_and_c(prompt, model, log_tokens=False, skip_normalize=False):
 -    uc = model.get_learned_conditioning([''])
 +def get_uc_and_c(prompt, model, log_tokens=False, skip_normalize=False, negative_prompt=''):
 +    uc = model.get_learned_conditioning([negative_prompt])
     # get weighted sub-prompts
     weighted_subprompts = split_weighted_subprompts(
 diff --git a/ldm/generate.py b/ldm/generate.py
 index 8f67403..d88ce2d 100644
 --- a/ldm/generate.py
 +++ b/ldm/generate.py
@@ -205,6 +205,7 @@ class Generate:
             init_mask      =    None,
             fit            =    False,
             strength       =    None,
 +            init_img_is_path =  True,
             # these are specific to GFPGAN/ESRGAN
             gfpgan_strength=    0,
             save_original  =    False,
@@ -303,11 +304,15 @@ class Generate:
             uc, c = get_uc_and_c(
                 prompt, model=self.model,
                 skip_normalize=skip_normalize,
 -                log_tokens=self.log_tokenization
 +                log_tokens=self.log_tokenization,
 +                negative_prompt=(args['negative_prompt'] if 'negative_prompt' in args else '')
             )
 -            (init_image,mask_image) = self._make_images(init_img,init_mask, width, height, fit)
 -            
 +            if init_img_is_path:
 +                (init_image,mask_image) = self._make_images(init_img,init_mask, width, height, fit)
 +            else:
 +                (init_image,mask_image) = (init_img, init_mask)
 +
             if (init_image is not None) and (mask_image is not None):
                 generator = self._make_inpaint()
             elif init_image is not None:
--- a/ui/sd_internal/runtime.py
+++ b/ui/sd_internal/runtime.py
@ -1,64 +1,47 @@
-import json
+import sys
-import os, re
+import os
-import traceback
+import uuid
 import re
 import torch
 import traceback
 import numpy as np
 from omegaconf import OmegaConf
-from PIL import Image, ImageOps
+from pytorch_lightning import logging
 from tqdm import tqdm, trange
 from itertools import islice
 from einops import rearrange
-import time
+from PIL import Image, ImageOps, ImageChops
-from pytorch_lightning import seed_everything
+from ldm.generate import Generate
-from torch import autocast
+import transformers
 from contextlib import nullcontext
 from einops import rearrange, repeat
 from ldm.util import instantiate_from_config
 from optimizedSD.optimUtils import split_weighted_subprompts
 from transformers import logging
 from gfpgan import GFPGANer
 from basicsr.archs.rrdbnet_arch import RRDBNet
 from realesrgan import RealESRGANer
-import uuid
+transformers.logging.set_verbosity_error()
 logging.set_verbosity_error()
 # consts
 config_yaml = "optimizedSD/v1-inference.yaml"
 filename_regex = re.compile('[^a-zA-Z0-9]')
 # api stuff
 from . import Request, Response, Image as ResponseImage
 import base64
 import json
 from io import BytesIO
-#from colorama import Fore
+
 filename_regex = re.compile('[^a-zA-Z0-9]')
 generator = None
 gfpgan_file = None
 real_esrgan_file = None
 model_gfpgan = None
 model_real_esrgan = None
 device = None
 precision = 'autocast'
 has_valid_gpu = False
 force_full_precision = False
 # local
 stop_processing = False
 temp_images = {}
 ckpt_file = None
 gfpgan_file = None
 real_esrgan_file = None
 model = None
 modelCS = None
 modelFS = None
 model_gfpgan = None
 model_real_esrgan = None
 model_is_half = False
 model_fs_is_half = False
 device = None
 unet_bs = 1
 precision = 'autocast'
 sampler_plms = None
 sampler_ddim = None
 has_valid_gpu = False
 force_full_precision = False
 try:
    gpu = torch.cuda.current_device()
    gpu_name = torch.cuda.get_device_name(gpu)
@ -79,68 +62,45 @@ except:
    print('WARNING: No compatible GPU found. Using the CPU, but this will be very slow!')
    pass
-def load_model_ckpt(ckpt_to_use, device_to_use='cuda', turbo=False, unet_bs_to_use=1, precision_to_use='autocast', half_model_fs=False):
+def load_model_ckpt(ckpt_to_use, device_to_use='cuda', precision_to_use='autocast'):
-    global ckpt_file, model, modelCS, modelFS, model_is_half, device, unet_bs, precision, model_fs_is_half
+    global generator
    ckpt_file = ckpt_to_use
    device = device_to_use if has_valid_gpu else 'cpu'
    precision = precision_to_use if not force_full_precision else 'full'
    unet_bs = unet_bs_to_use
-    if device == 'cpu':
+    try:
-        precision = 'full'
+        config = 'configs/models.yaml'
        model = 'stable-diffusion-1.4'
-    sd = load_model_from_config(f"{ckpt_file}.ckpt")
+        models = OmegaConf.load(config)
-    li, lo = [], []
+        width = models[model].width
-    for key, value in sd.items():
+        height = models[model].height
-        sp = key.split(".")
+        config = models[model].config
-        if (sp[0]) == "model":
+        weights = ckpt_to_use + '.ckpt'
-            if "input_blocks" in sp:
+    except (FileNotFoundError, IOError, KeyError) as e:
-                li.append(key)
+        print(f'{e}. Aborting.')
-            elif "middle_block" in sp:
+        sys.exit(-1)
                li.append(key)
            elif "time_embed" in sp:
                li.append(key)
            else:
                lo.append(key)
    for key in li:
        sd["model1." + key[6:]] = sd.pop(key)
    for key in lo:
        sd["model2." + key[6:]] = sd.pop(key)
-    config = OmegaConf.load(f"{config_yaml}")
+    generator = Generate(
        width=width,
        height=height,
        sampler_name='ddim',
        weights=weights,
        full_precision=(precision == 'full'),
        config=config,
        grid=False,
        # this is solely for recreating the prompt
        seamless=False,
        embedding_path=None,
        device_type=device,
        ignore_ctrl_c=True,
    )
-    model = instantiate_from_config(config.modelUNet)
+    # gets rid of annoying messages about random seed
-    _, _ = model.load_state_dict(sd, strict=False)
+    logging.getLogger('pytorch_lightning').setLevel(logging.ERROR)
    model.eval()
    model.cdevice = device
    model.unet_bs = unet_bs
    model.turbo = turbo
-    modelCS = instantiate_from_config(config.modelCondStage)
+    # preload the model
-    _, _ = modelCS.load_state_dict(sd, strict=False)
+    generator.load_model()
    modelCS.eval()
    modelCS.cond_stage_model.device = device
    modelFS = instantiate_from_config(config.modelFirstStage)
    _, _ = modelFS.load_state_dict(sd, strict=False)
    modelFS.eval()
    del sd
    if device != "cpu" and precision == "autocast":
        model.half()
        modelCS.half()
        model_is_half = True
    else:
        model_is_half = False
    if half_model_fs:
        modelFS.half()
        model_fs_is_half = True
    else:
        model_fs_is_half = False
    print('loaded ', ckpt_file, 'to', device, 'precision', precision)
 def load_model_gfpgan(gfpgan_to_use):
    global gfpgan_file, model_gfpgan
@ -179,7 +139,7 @@ def load_model_real_esrgan(real_esrgan_to_use):
        model_real_esrgan.device = torch.device('cpu')
        model_real_esrgan.model.to('cpu')
    else:
-        model_real_esrgan = RealESRGANer(scale=2, model_path=model_path, model=model_to_use, pre_pad=0, half=model_is_half)
+        model_real_esrgan = RealESRGANer(scale=2, model_path=model_path, model=model_to_use, pre_pad=0, half=(precision != 'full'))
    model_real_esrgan.model.name = real_esrgan_to_use
@ -193,14 +153,14 @@ def mk_img(req: Request):
        gc()
-        if device != "cpu":
+        # if device != "cpu":
-            modelFS.to("cpu")
+        #     modelFS.to("cpu")
-            modelCS.to("cpu")
+        #     modelCS.to("cpu")
-            model.model1.to("cpu")
+        #     model.model1.to("cpu")
-            model.model2.to("cpu")
+        #     model.model2.to("cpu")
-        gc()
+        # gc()
        yield json.dumps({
            "status": 'failed',
@ -208,292 +168,164 @@ def mk_img(req: Request):
        })
 def do_mk_img(req: Request):
    global model, modelCS, modelFS, device
    global model_gfpgan, model_real_esrgan
    global stop_processing
    stop_processing = False
    res = Response()
    res.request = req
    res.images = []
    temp_images.clear()
    model.turbo = req.turbo
    if req.use_cpu:
        if device != 'cpu':
            device = 'cpu'
            if model_is_half:
                del model, modelCS, modelFS
                load_model_ckpt(ckpt_file, device)
            load_model_gfpgan(gfpgan_file)
            load_model_real_esrgan(real_esrgan_file)
    else:
        if has_valid_gpu:
            prev_device = device
            device = 'cuda'
            if (precision == 'autocast' and (req.use_full_precision or not model_is_half)) or \
                (precision == 'full' and not req.use_full_precision and not force_full_precision) or \
                (req.init_image is None and model_fs_is_half) or \
                (req.init_image is not None and not model_fs_is_half and not force_full_precision):
                del model, modelCS, modelFS
                load_model_ckpt(ckpt_file, device, req.turbo, unet_bs, ('full' if req.use_full_precision else 'autocast'), half_model_fs=(req.init_image is not None and not req.use_full_precision))
                if prev_device != device:
                    load_model_gfpgan(gfpgan_file)
                    load_model_real_esrgan(real_esrgan_file)
    if req.use_face_correction != gfpgan_file:
        load_model_gfpgan(req.use_face_correction)
    if req.use_upscale != real_esrgan_file:
        load_model_real_esrgan(req.use_upscale)
-    model.cdevice = device
+    init_image = None
-    modelCS.cond_stage_model.device = device
+    init_mask = None
-    opt_prompt = req.prompt
+    if req.init_image is not None:
-    opt_seed = req.seed
+        image = base64_str_to_img(req.init_image)
    opt_n_samples = req.num_outputs
    opt_n_iter = 1
    opt_scale = req.guidance_scale
    opt_C = 4
    opt_H = req.height
    opt_W = req.width
    opt_f = 8
    opt_ddim_steps = req.num_inference_steps
    opt_ddim_eta = 0.0
    opt_strength = req.prompt_strength
    opt_save_to_disk_path = req.save_to_disk_path
    opt_init_img = req.init_image
    opt_use_face_correction = req.use_face_correction
    opt_use_upscale = req.use_upscale
    opt_show_only_filtered = req.show_only_filtered_image
    opt_format = 'png'
    opt_sampler_name = req.sampler
-    print(req.to_string(), '\n    device', device)
+        w, h = image.size
        print(f"loaded input image of size ({w}, {h}) from base64")
        if req.width is not None and req.height is not None:
            h, w = req.height, req.width
-    print('\n\n    Using precision:', precision)
+        w, h = map(lambda x: x - x % 64, (w, h))  # resize to integer multiple of 64
        image = image.resize((w, h), resample=Image.Resampling.LANCZOS)
        init_image = generator._create_init_image(image)
-    seed_everything(opt_seed)
+        if generator._has_transparency(image) and req.mask is None:      # if image has a transparent area and no mask was provided, then try to generate mask
            print('>> Initial image has transparent areas. Will inpaint in these regions.')
            if generator._check_for_erasure(image):
                print(
                    '>> WARNING: Colors underneath the transparent region seem to have been erased.\n',
                    '>>          Inpainting will be suboptimal. Please preserve the colors when making\n',
                    '>>          a transparency mask, or provide mask explicitly using --init_mask (-M).'
                )
            init_mask = generator._create_init_mask(image)                   # this returns a torch tensor
-    batch_size = opt_n_samples
+        if device != "cpu" and precision != "full":
    prompt = opt_prompt
    assert prompt is not None
    data = [batch_size * [prompt]]
    if precision == "autocast" and device != "cpu":
        precision_scope = autocast
    else:
        precision_scope = nullcontext
    mask = None
    if req.init_image is None:
        handler = _txt2img
        init_latent = None
        t_enc = None
    else:
        handler = _img2img
        init_image = load_img(req.init_image, opt_W, opt_H)
        init_image = init_image.to(device)
        if device != "cpu" and precision == "autocast":
            init_image = init_image.half()
        modelFS.to(device)
        init_image = repeat(init_image, '1 ... -> b ...', b=batch_size)
        init_latent = modelFS.get_first_stage_encoding(modelFS.encode_first_stage(init_image))  # move to latent space
        if req.mask is not None:
-            mask = load_mask(req.mask, opt_W, opt_H, init_latent.shape[2], init_latent.shape[3], True).to(device)
+            image = base64_str_to_img(req.mask)
            mask = mask[0][0].unsqueeze(0).repeat(4, 1, 1).unsqueeze(0)
            mask = repeat(mask, '1 ... -> b ...', b=batch_size)
-            if device != "cpu" and precision == "autocast":
+            image = ImageChops.invert(image)
                mask = mask.half()
-        move_fs_to_cpu()
+            w, h = image.size
            print(f"loaded input image of size ({w}, {h}) from base64")
            if req.width is not None and req.height is not None:
                h, w = req.height, req.width
-        assert 0. <= opt_strength <= 1., 'can only work with strength in [0.0, 1.0]'
+            w, h = map(lambda x: x - x % 64, (w, h))  # resize to integer multiple of 64
-        t_enc = int(opt_strength * opt_ddim_steps)
+            image = image.resize((w, h), resample=Image.Resampling.LANCZOS)
        print(f"target t_enc is {t_enc} steps")
-    if opt_save_to_disk_path is not None:
+            init_mask = generator._create_init_mask(image)
-        session_out_path = os.path.join(opt_save_to_disk_path, req.session_id)
+
        if init_mask is not None:
            req.sampler = 'plms' # hack to force the underlying implementation to initialize DDIM properly
    result = generator.prompt2image(
        req.prompt,
        iterations     =    req.num_outputs,
        steps          =    req.num_inference_steps,
        seed           =    req.seed,
        cfg_scale      =    req.guidance_scale,
        ddim_eta       =    0.0,
        skip_normalize =    False,
        image_callback =    None,
        step_callback  =    None,
        width          =    req.width,
        height         =    req.height,
        sampler_name   =    req.sampler,
        seamless       =    False,
        log_tokenization=  False,
        with_variations =   None,
        variation_amount =  0.0,
        # these are specific to img2img and inpaint
        init_img       =    init_image,
        init_mask      =    init_mask,
        fit            =    False,
        strength       =    req.prompt_strength,
        init_img_is_path = False,
        # these are specific to GFPGAN/ESRGAN
        gfpgan_strength=    0,
        save_original  =    False,
        upscale        =    None,
        negative_prompt=    req.negative_prompt,
    )
    has_filters =   (req.use_face_correction is not None and req.use_face_correction.startswith('GFPGAN')) or \
                    (req.use_upscale is not None and req.use_upscale.startswith('RealESRGAN'))
    print('has filter', has_filters)
    return_orig_img = not has_filters or not req.show_only_filtered_image
    res = Response()
    res.request = req
    res.images = []
    if req.save_to_disk_path is not None:
        session_out_path = os.path.join(req.save_to_disk_path, req.session_id)
        os.makedirs(session_out_path, exist_ok=True)
    else:
        session_out_path = None
-    seeds = ""
+    for img, seed in result:
-    with torch.no_grad():
+        if req.save_to_disk_path is not None:
-        for n in trange(opt_n_iter, desc="Sampling"):
+            prompt_flattened = filename_regex.sub('_', req.prompt)
            for prompts in tqdm(data, desc="data"):
                with precision_scope("cuda"):
                    modelCS.to(device)
                    uc = None
                    if opt_scale != 1.0:
                        uc = modelCS.get_learned_conditioning(batch_size * [req.negative_prompt])
                    if isinstance(prompts, tuple):
                        prompts = list(prompts)
                    subprompts, weights = split_weighted_subprompts(prompts[0])
                    if len(subprompts) > 1:
                        c = torch.zeros_like(uc)
                        totalWeight = sum(weights)
                        # normalize each "sub prompt" and add it
                        for i in range(len(subprompts)):
                            weight = weights[i]
                            # if not skip_normalize:
                            weight = weight / totalWeight
                            c = torch.add(c, modelCS.get_learned_conditioning(subprompts[i]), alpha=weight)
                    else:
                        c = modelCS.get_learned_conditioning(prompts)
                    modelFS.to(device)
                    partial_x_samples = None
                    def img_callback(x_samples, i):
                        nonlocal partial_x_samples
                        partial_x_samples = x_samples
                        if req.stream_progress_updates:
                            n_steps = opt_ddim_steps if req.init_image is None else t_enc
                            progress = {"step": i, "total_steps": n_steps}
                            if req.stream_image_progress and i % 5 == 0:
                                partial_images = []
                                for i in range(batch_size):
                                    x_samples_ddim = modelFS.decode_first_stage(x_samples[i].unsqueeze(0))
                                    x_sample = torch.clamp((x_samples_ddim + 1.0) / 2.0, min=0.0, max=1.0)
                                    x_sample = 255.0 * rearrange(x_sample[0].cpu().numpy(), "c h w -> h w c")
                                    x_sample = x_sample.astype(np.uint8)
                                    img = Image.fromarray(x_sample)
                                    buf = BytesIO()
                                    img.save(buf, format='JPEG')
                                    buf.seek(0)
                                    del img, x_sample, x_samples_ddim
                                    # don't delete x_samples, it is used in the code that called this callback
                                    temp_images[str(req.session_id) + '/' + str(i)] = buf
                                    partial_images.append({'path': f'/image/tmp/{req.session_id}/{i}'})
                                progress['output'] = partial_images
                            yield json.dumps(progress)
                        if stop_processing:
                            raise UserInitiatedStop("User requested that we stop processing")
                    # run the handler
                    try:
                        if handler == _txt2img:
                            x_samples = _txt2img(opt_W, opt_H, opt_n_samples, opt_ddim_steps, opt_scale, None, opt_C, opt_f, opt_ddim_eta, c, uc, opt_seed, img_callback, mask, opt_sampler_name)
                        else:
                            x_samples = _img2img(init_latent, t_enc, batch_size, opt_scale, c, uc, opt_ddim_steps, opt_ddim_eta, opt_seed, img_callback, mask)
                        yield from x_samples
                        x_samples = partial_x_samples
                    except UserInitiatedStop:
                        if partial_x_samples is None:
                            continue
                        x_samples = partial_x_samples
                    print("saving images")
                    for i in range(batch_size):
                        x_samples_ddim = modelFS.decode_first_stage(x_samples[i].unsqueeze(0))
                        x_sample = torch.clamp((x_samples_ddim + 1.0) / 2.0, min=0.0, max=1.0)
                        x_sample = 255.0 * rearrange(x_sample[0].cpu().numpy(), "c h w -> h w c")
                        x_sample = x_sample.astype(np.uint8)
                        img = Image.fromarray(x_sample)
                        has_filters =   (opt_use_face_correction is not None and opt_use_face_correction.startswith('GFPGAN')) or \
                                        (opt_use_upscale is not None and opt_use_upscale.startswith('RealESRGAN'))
                        return_orig_img = not has_filters or not opt_show_only_filtered
                        if stop_processing:
                            return_orig_img = True
                        if opt_save_to_disk_path is not None:
                            prompt_flattened = filename_regex.sub('_', prompts[0])
            prompt_flattened = prompt_flattened[:50]
            img_id = str(uuid.uuid4())[-8:]
            file_path = f"{prompt_flattened}_{img_id}"
-                            img_out_path = os.path.join(session_out_path, f"{file_path}.{opt_format}")
+            img_out_path = os.path.join(session_out_path, f"{file_path}.{req.output_format}")
            meta_out_path = os.path.join(session_out_path, f"{file_path}.txt")
            if return_orig_img:
                save_image(img, img_out_path)
-                            save_metadata(meta_out_path, prompts, opt_seed, opt_W, opt_H, opt_ddim_steps, opt_scale, opt_strength, opt_use_face_correction, opt_use_upscale, opt_sampler_name, req.negative_prompt)
+            save_metadata(meta_out_path, req.prompt, seed, req.width, req.height, req.num_inference_steps, req.guidance_scale, req.prompt_strength, req.use_face_correction, req.use_upscale, req.sampler, req.negative_prompt)
        if return_orig_img:
            img_data = img_to_base64_str(img)
-                            res_image_orig = ResponseImage(data=img_data, seed=opt_seed)
+            res_image_orig = ResponseImage(data=img_data, seed=seed)
            res.images.append(res_image_orig)
-                            if opt_save_to_disk_path is not None:
+            if req.save_to_disk_path is not None:
                res_image_orig.path_abs = img_out_path
                        del img
        if has_filters and not stop_processing:
            print('Applying filters..')
            gc()
            filters_applied = []
-                            if opt_use_face_correction:
+            np_img = img.convert('RGB')
-                                _, _, output = model_gfpgan.enhance(x_sample[:,:,::-1], has_aligned=False, only_center_face=False, paste_back=True)
+            np_img = np.array(np_img, dtype=np.uint8)
                                x_sample = output[:,:,::-1]
                                filters_applied.append(opt_use_face_correction)
-                            if opt_use_upscale:
+            if req.use_face_correction:
-                                output, _ = model_real_esrgan.enhance(x_sample[:,:,::-1])
+                _, _, np_img = model_gfpgan.enhance(np_img, has_aligned=False, only_center_face=False, paste_back=True)
-                                x_sample = output[:,:,::-1]
+                filters_applied.append(req.use_face_correction)
                                filters_applied.append(opt_use_upscale)
-                            filtered_image = Image.fromarray(x_sample)
+            if req.use_upscale:
                np_img, _ = model_real_esrgan.enhance(np_img)
                filters_applied.append(req.use_upscale)
            filtered_image = Image.fromarray(np_img)
            filtered_img_data = img_to_base64_str(filtered_image)
-                            res_image_filtered = ResponseImage(data=filtered_img_data, seed=opt_seed)
+            res_image_filtered = ResponseImage(data=filtered_img_data, seed=seed)
            res.images.append(res_image_filtered)
            filters_applied = "_".join(filters_applied)
-                            if opt_save_to_disk_path is not None:
+            if req.save_to_disk_path is not None:
-                                filtered_img_out_path = os.path.join(session_out_path, f"{file_path}_{filters_applied}.{opt_format}")
+                filtered_img_out_path = os.path.join(session_out_path, f"{file_path}_{filters_applied}.{req.output_format}")
                save_image(filtered_image, filtered_img_out_path)
                res_image_filtered.path_abs = filtered_img_out_path
            del filtered_image
-                        seeds += str(opt_seed) + ","
+        del img
                        opt_seed += 1
                    move_fs_to_cpu()
                    gc()
                    del x_samples, x_samples_ddim, x_sample
                    print("memory_final = ", torch.cuda.memory_allocated() / 1e6)
    print('Task completed')
@ -505,8 +337,8 @@ def save_image(img, img_out_path):
    except:
        print('could not save the file', traceback.format_exc())
-def save_metadata(meta_out_path, prompts, opt_seed, opt_W, opt_H, opt_ddim_steps, opt_scale, opt_prompt_strength, opt_correct_face, opt_upscale, sampler_name, negative_prompt):
+def save_metadata(meta_out_path, prompt, seed, width, height, num_inference_steps, guidance_scale, prompt_strength, use_correct_face, use_upscale, sampler_name, negative_prompt):
-    metadata = f"{prompts[0]}\nWidth: {opt_W}\nHeight: {opt_H}\nSeed: {opt_seed}\nSteps: {opt_ddim_steps}\nGuidance Scale: {opt_scale}\nPrompt Strength: {opt_prompt_strength}\nUse Face Correction: {opt_correct_face}\nUse Upscaling: {opt_upscale}\nSampler: {sampler_name}\nNegative Prompt: {negative_prompt}"
+    metadata = f"{prompt}\nWidth: {width}\nHeight: {height}\nSeed: {seed}\nSteps: {num_inference_steps}\nGuidance Scale: {guidance_scale}\nPrompt Strength: {prompt_strength}\nUse Face Correction: {use_correct_face}\nUse Upscaling: {use_upscale}\nSampler: {sampler_name}\nNegative Prompt: {negative_prompt}"
    try:
        with open(meta_out_path, 'w') as f:
@ -514,68 +346,6 @@ def save_metadata(meta_out_path, prompts, opt_seed, opt_W, opt_H, opt_ddim_steps
    except:
        print('could not save the file', traceback.format_exc())
 def _txt2img(opt_W, opt_H, opt_n_samples, opt_ddim_steps, opt_scale, start_code, opt_C, opt_f, opt_ddim_eta, c, uc, opt_seed, img_callback, mask, sampler_name):
    shape = [opt_n_samples, opt_C, opt_H // opt_f, opt_W // opt_f]
    if device != "cpu":
        mem = torch.cuda.memory_allocated() / 1e6
        modelCS.to("cpu")
        while torch.cuda.memory_allocated() / 1e6 >= mem:
            time.sleep(1)
    if sampler_name == 'ddim':
        model.make_schedule(ddim_num_steps=opt_ddim_steps, ddim_eta=opt_ddim_eta, verbose=False)
    samples_ddim = model.sample(
        S=opt_ddim_steps,
        conditioning=c,
        seed=opt_seed,
        shape=shape,
        verbose=False,
        unconditional_guidance_scale=opt_scale,
        unconditional_conditioning=uc,
        eta=opt_ddim_eta,
        x_T=start_code,
        img_callback=img_callback,
        mask=mask,
        sampler = sampler_name,
    )
    yield from samples_ddim
 def _img2img(init_latent, t_enc, batch_size, opt_scale, c, uc, opt_ddim_steps, opt_ddim_eta, opt_seed, img_callback, mask):
    # encode (scaled latent)
    z_enc = model.stochastic_encode(
        init_latent,
        torch.tensor([t_enc] * batch_size).to(device),
        opt_seed,
        opt_ddim_eta,
        opt_ddim_steps,
    )
    x_T = None if mask is None else init_latent
    # decode it
    samples_ddim = model.sample(
        t_enc,
        c,
        z_enc,
        unconditional_guidance_scale=opt_scale,
        unconditional_conditioning=uc,
        img_callback=img_callback,
        mask=mask,
        x_T=x_T,
        sampler = 'ddim'
    )
    yield from samples_ddim
 def move_fs_to_cpu():
    if device != "cpu":
        mem = torch.cuda.memory_allocated() / 1e6
        modelFS.to("cpu")
        while torch.cuda.memory_allocated() / 1e6 >= mem:
            time.sleep(1)
 def gc():
    if device == 'cpu':
        return
@ -583,25 +353,6 @@ def gc():
    torch.cuda.empty_cache()
    torch.cuda.ipc_collect()
 # internal
 def chunk(it, size):
    it = iter(it)
    return iter(lambda: tuple(islice(it, size)), ())
 def load_model_from_config(ckpt, verbose=False):
    print(f"Loading model from {ckpt}")
    pl_sd = torch.load(ckpt, map_location="cpu")
    if "global_step" in pl_sd:
        print(f"Global Step: {pl_sd['global_step']}")
    sd = pl_sd["state_dict"]
    return sd
 # utils
 class UserInitiatedStop(Exception):
    pass
 def load_img(img_str, w0, h0):
    image = base64_str_to_img(img_str).convert("RGB")
    w, h = image.size
--- a/ui/server.py
+++ b/ui/server.py
@ -58,6 +58,7 @@ class ImageRequest(BaseModel):
    use_face_correction: str = None # or "GFPGANv1.3"
    use_upscale: str = None # or "RealESRGAN_x4plus" or "RealESRGAN_x4plus_anime_6B"
    show_only_filtered_image: bool = False
    output_format: str = "jpeg" # "png", "jpeg"
    stream_progress_updates: bool = False
    stream_image_progress: bool = False
@ -123,6 +124,7 @@ def image(req : ImageRequest):
    r.use_upscale: str = req.use_upscale
    r.use_face_correction = req.use_face_correction
    r.show_only_filtered_image = req.show_only_filtered_image
    r.output_format = req.output_format
    r.stream_progress_updates = True # the underlying implementation only supports streaming
    r.stream_image_progress = req.stream_image_progress