Start on multiple GPUs by default (top 75 percentile by free_mem); UI selection for 'cpu' or 'auto' or a list of specific GPUs, which is now linked to the backend; Dynamically start/stop render threads for the devices, without requiring a full program restart

2022-11-14 11:23:22 +05:30
parent a19ba40672
commit ea03fd22db
8 changed files with 339 additions and 179 deletions
--- a/ui/index.html
+++ b/ui/index.html
@ -7,7 +7,7 @@
    <link rel="icon" type="image/png" href="/media/images/favicon-32x32.png" sizes="32x32">
    <link rel="stylesheet" href="/media/css/fonts.css?v=1">
    <link rel="stylesheet" href="/media/css/themes.css?v=2">
-    <link rel="stylesheet" href="/media/css/main.css?v=10">
+    <link rel="stylesheet" href="/media/css/main.css?v=11">
    <link rel="stylesheet" href="/media/css/auto-save.css?v=5">
    <link rel="stylesheet" href="/media/css/modifier-thumbnails.css?v=4">
    <link rel="stylesheet" href="/media/css/fontawesome-all.min.css?v=1">
@ -19,7 +19,7 @@
 <div id="container">
    <div id="top-nav">
        <div id="logo">
-            <h1>Stable Diffusion UI <small>v2.3.14 <span id="updateBranchLabel"></span></small></h1>
+            <h1>Stable Diffusion UI <small>v2.4 <span id="updateBranchLabel"></span></small></h1>
        </div>
        <div id="server-status">
            <div id="server-status-color">●</div>
@ -35,6 +35,9 @@
            <span id="tab-about" class="tab">
                <span><i class="fa fa-comments icon"></i> Help & Community</span>
            </span>
            <!-- <span id="tab-system-info" class="tab">
                <span><i class="fa fa-microchip icon"></i> System Info</span>
            </span> -->
        </div>
    </div>
@ -245,7 +248,7 @@
            <div class="tab-content-inner">
                <div class="float-container">
                    <div class="float-child">
-			<h1>Help</h1>
+                        <h1>Help</h1>
                        <ul id="help-links">
                            <li><span class="help-section">Using the software</span>
                            <ul>
@ -270,7 +273,7 @@
                    </div>
                    <div class="float-child">
-			<h1>Community</h1>
+                        <h1>Community</h1>
                        <ul id="community-links">
                            <li><a href="https://discord.com/invite/u9yhsFmEkB" target="_blank"><i class="fa-brands fa-discord fa-fw"></i> Discord user community</a></li>
                            <li><a href="https://www.reddit.com/r/StableDiffusionUI/" target="_blank"><i class="fa-brands fa-reddit fa-fw"></i> Reddit community</a></li>
@ -280,6 +283,18 @@
                </div>
            </div>
        </div>
        <!-- <div id="tab-content-system-info" class="tab-content">
            <div id="system-info" class="tab-content-inner">
                <h1>System Info</h1>
                <table>
                    <tr><td><label>Processor:</label></td><td id="system-info-cpu" class="value">Dingus</td></tr>
                    <tr><td><label>RAM:</label></td><td id="system-info-ram" class="value">Dingus Another</td></tr>
                    <tr><td><label>Compatible Graphics Cards (all):</label></td><td id="system-info-all-gpus" class="value">Dingus</td></tr>
                    <tr><td></td><td>&nbsp;</td></tr>
                    <tr><td><label>What's being used for rendering 🔥:</label></td><td id="system-info-active-gpus" class="value">Dingus<br/>Intel Graphics SOmething<br/>Another thing</td></tr>
                </table>
            </div>
        </div> -->
    </div>
@ -317,13 +332,13 @@
 </div>
 </body>
-<script src="media/js/parameters.js?v=4"></script>
+<script src="media/js/parameters.js?v=5"></script>
 <script src="media/js/plugins.js?v=1"></script>
 <script src="media/js/utils.js?v=6"></script>
 <script src="media/js/inpainting-editor.js?v=1"></script>
 <script src="media/js/image-modifiers.js?v=6"></script>
 <script src="media/js/auto-save.js?v=7"></script>
-<script src="media/js/main.js?v=13"></script>
+<script src="media/js/main.js?v=14"></script>
 <script src="media/js/themes.js?v=4"></script>
 <script src="media/js/dnd.js?v=8"></script>
 <script>
--- a/ui/media/css/main.css
+++ b/ui/media/css/main.css
@ -123,7 +123,7 @@ label {
    padding: 16px;
    display: flex;
    flex-direction: column;
-    flex: 0 0 350pt;
+    flex: 0 0 370pt;
 }
 #editor label {
    font-weight: normal;
@ -887,3 +887,9 @@ input::file-selector-button {
    margin-bottom: 15px;
    box-shadow: 0 4px 8px 0 rgba(0, 0, 0, 0.15), 0 6px 20px 0 rgba(0, 0, 0, 0.15);
 }
 #system-info .value {
    text-align: left;
 }
 #system-info label {
    float: right;
 }
--- a/ui/media/js/main.js
+++ b/ui/media/js/main.js
@ -27,6 +27,7 @@ let maskImageSelector = document.querySelector("#mask")
 let maskImagePreview = document.querySelector("#mask_preview")
 let turboField = document.querySelector('#turbo')
 let useCPUField = document.querySelector('#use_cpu')
 let autoPickGPUsField = document.querySelector('#auto_pick_gpus')
 let useGPUsField = document.querySelector('#use_gpus')
 let useFullPrecisionField = document.querySelector('#use_full_precision')
 let saveToDiskField = document.querySelector('#save_to_disk')
@ -810,14 +811,15 @@ function getCurrentUserRequest() {
 }
 function getCurrentRenderDeviceSelection() {
-    if (useCPUField.checked) {
+    let selectedGPUs = $('#use_gpus').val()
    if (useCPUField.checked && !autoPickGPUsField.checked) {
        return 'cpu'
    }
-
+    if (autoPickGPUsField.checked || selectedGPUs.length == 0) {
-    let selectedGPUs = $(useGPUsField).val()
+        return 'auto'
    if (selectedGPUs.length == 0) {
        selectedGPUs = ['auto']
    }
    return selectedGPUs.join(',')
 }
@ -1136,10 +1138,27 @@ updatePromptStrength()
 useCPUField.addEventListener('click', function() {
    let gpuSettingEntry = getParameterSettingsEntry('use_gpus')
    let autoPickGPUSettingEntry = getParameterSettingsEntry('auto_pick_gpus')
    if (this.checked) {
        gpuSettingEntry.style.display = 'none'
        autoPickGPUSettingEntry.style.display = 'none'
        autoPickGPUsField.setAttribute('data-old-value', autoPickGPUsField.checked)
        autoPickGPUsField.checked = false
    } else if (useGPUsField.options.length >= MIN_GPUS_TO_SHOW_SELECTION) {
        gpuSettingEntry.style.display = ''
        autoPickGPUSettingEntry.style.display = ''
        autoPickGPUsField.checked = (autoPickGPUsField.getAttribute('data-old-value') === 'true')
    }
 })
 useGPUsField.addEventListener('click', function() {
    let selectedGPUs = $('#use_gpus').val()
    autoPickGPUsField.checked = (selectedGPUs.length === 0)
 })
 autoPickGPUsField.addEventListener('click', function() {
    if (this.checked) {
        $('#use_gpus').val([])
    }
 })
@ -1360,6 +1379,8 @@ async function getDevices() {
            if (allDeviceIds.length < MIN_GPUS_TO_SHOW_SELECTION) {
                let gpuSettingEntry = getParameterSettingsEntry('use_gpus')
                gpuSettingEntry.style.display = 'none'
                let autoPickGPUSettingEntry = getParameterSettingsEntry('auto_pick_gpus')
                autoPickGPUSettingEntry.style.display = 'none'
                if (allDeviceIds.length === 0) {
                    useCPUField.checked = true
@ -1367,14 +1388,18 @@ async function getDevices() {
                }
            }
-            useGPUsField.innerHTML = ''
+            autoPickGPUsField.checked = (res['config'] === 'auto')
            useGPUsField.innerHTML = ''
            allDeviceIds.forEach(device => {
                let deviceName = res['all'][device]
-                let selected = (activeDeviceIds.includes(device) ? 'selected' : '')
+                let deviceOption = `<option value="${device}">${deviceName}</option>`
                let deviceOption = `<option value="${device}" ${selected}>${deviceName}</option>`
                useGPUsField.insertAdjacentHTML('beforeend', deviceOption)
            })
            if (!autoPickGPUsField.checked) {
                $('#use_gpus').val(activeDeviceIds)
            }
        }
    } catch (e) {
        console.log('error fetching devices', e)
--- a/ui/media/js/parameters.js
+++ b/ui/media/js/parameters.js
@ -73,6 +73,12 @@ var PARAMETERS = [
 		note: "warning: this will be *very* slow",
 		default: false,
 	},
 	{
 		id: "auto_pick_gpus",
 		type: ParameterType.checkbox,
 		label: "Automatically pick the GPUs",
 		default: false,
 	},
 	{
 		id: "use_gpus",
 		type: ParameterType.select_multiple,
--- a/ui/sd_internal/device_manager.py
+++ b/ui/sd_internal/device_manager.py
@ -0,0 +1,156 @@
 import os
 import torch
 import traceback
 import re
 COMPARABLE_GPU_PERCENTILE = 0.75 # if a GPU's free_mem is within this % of the GPU with the most free_mem, it will be picked
 def get_device_delta(render_devices, active_devices):
    '''
    render_devices: 'cpu', or 'auto' or ['cuda:N'...]
    active_devices: ['cpu', 'cuda:N'...]
    '''
    if render_devices is not None:
        if render_devices in ('cpu', 'auto'):
            render_devices = [render_devices]
        elif isinstance(render_devices, list) and len(render_devices) > 0:
            render_devices = list(filter(lambda x: x.startswith('cuda:'), render_devices))
            if len(render_devices) == 0:
                raise Exception('Invalid render_devices value in config.json. Valid: {"render_devices": ["cuda:0", "cuda:1"...]}, or {"render_devices": "cpu"} or {"render_devices": "auto"}')
            render_devices = list(filter(lambda x: is_device_compatible(x), render_devices))
            if len(render_devices) == 0:
                raise Exception('Sorry, none of the render_devices configured in config.json are compatible with Stable Diffusion')
        else:
            raise Exception('Invalid render_devices value in config.json. Valid: {"render_devices": ["cuda:0", "cuda:1"...]}, or {"render_devices": "cpu"} or {"render_devices": "auto"}')
    else:
        render_devices = ['auto']
    if 'auto' in render_devices:
        render_devices = auto_pick_devices(active_devices)
        if 'cpu' in render_devices:
            print('WARNING: Could not find a compatible GPU. Using the CPU, but this will be very slow!')
    active_devices = set(active_devices)
    render_devices = set(render_devices)
    devices_to_start = render_devices - active_devices
    devices_to_stop = active_devices - render_devices
    return devices_to_start, devices_to_stop
 def auto_pick_devices(currently_active_devices):
    if not torch.cuda.is_available(): return ['cpu']
    device_count = torch.cuda.device_count()
    if device_count == 1:
        return ['cuda:0'] if is_device_compatible('cuda:0') else ['cpu']
    print('Autoselecting GPU. Using most free memory.')
    devices = []
    for device in range(device_count):
        device = f'cuda:{device}'
        if not is_device_compatible(device):
            continue
        mem_free, mem_total = torch.cuda.mem_get_info(device)
        mem_free /= float(10**9)
        mem_total /= float(10**9)
        device_name = torch.cuda.get_device_name(device)
        print(f'{device} detected: {device_name} - Memory: {round(mem_total - mem_free, 2)}Gb / {round(mem_total, 2)}Gb')
        devices.append({'device': device, 'device_name': device_name, 'mem_free': mem_free})
    devices.sort(key=lambda x:x['mem_free'], reverse=True)
    max_free_mem = devices[0]['mem_free']
    free_mem_threshold = COMPARABLE_GPU_PERCENTILE * max_free_mem
    # Auto-pick algorithm:
    # 1. Pick the top 75 percentile of the GPUs, sorted by free_mem.
    # 2. Also include already-running devices (GPU-only), otherwise their free_mem will
    #    always be very low (since their VRAM contains the model).
    #    These already-running devices probably aren't terrible, since they were picked in the past.
    #    Worst case, the user can restart the program and that'll get rid of them.
    devices = list(filter((lambda x: x['mem_free'] > free_mem_threshold or x['device'] in currently_active_devices), devices))
    return devices
 def device_init(thread_data, device):
    '''
    This function assumes the 'device' has already been verified to be compatible.
    `get_device_delta()` has already filtered out incompatible devices.
    '''
    validate_device_id(device, log_prefix='device_init')
    if device == 'cpu':
        thread_data.device = 'cpu'
        thread_data.device_name = get_processor_name()
        print('Render device CPU available as', thread_data.device_name)
        return
    thread_data.device_name = torch.cuda.get_device_name(device)
    thread_data.device = device
    # Force full precision on 1660 and 1650 NVIDIA cards to avoid creating green images
    device_name = thread_data.device_name.lower()
    thread_data.force_full_precision = ('nvidia' in device_name or 'geforce' in device_name) and (' 1660' in device_name or ' 1650' in device_name)
    if thread_data.force_full_precision:
        print('forcing full precision on NVIDIA 16xx cards, to avoid green images. GPU detected: ', thread_data.device_name)
        # Apply force_full_precision now before models are loaded.
        thread_data.precision = 'full'
    print(f'Setting {device} as active')
    torch.cuda.device(device)
    return
 def validate_device_id(device, log_prefix=''):
    def is_valid():
        if not isinstance(device, str):
            return False
        if device == 'cpu':
            return True
        if not device.startswith('cuda:') or not device[5:].isnumeric():
            return False
        return True
    if not is_valid():
        raise EnvironmentError(f"{log_prefix}: device id should be 'cpu', or 'cuda:N' (where N is an integer index for the GPU). Got: {device}")
 def is_device_compatible(device):
    '''
    Returns True/False, and prints any compatibility errors
    '''
    validate_device_id(device, log_prefix='is_device_compatible')
    if device == 'cpu': return True
    # Memory check
    try:
        _, mem_total = torch.cuda.mem_get_info(device)
        mem_total /= float(10**9)
        if mem_total < 3.0:
            print(f'GPU {device} with less than 3 GB of VRAM is not compatible with Stable Diffusion')
            return False
    except RuntimeError as e:
        print(str(e))
        return False
    return True
 def get_processor_name():
    try:
        import platform, subprocess
        if platform.system() == "Windows":
            return platform.processor()
        elif platform.system() == "Darwin":
            os.environ['PATH'] = os.environ['PATH'] + os.pathsep + '/usr/sbin'
            command = "sysctl -n machdep.cpu.brand_string"
            return subprocess.check_output(command).strip()
        elif platform.system() == "Linux":
            command = "cat /proc/cpuinfo"
            all_info = subprocess.check_output(command, shell=True).decode().strip()
            for line in all_info.split("\n"):
                if "model name" in line:
                    return re.sub(".*model name.*:", "", line, 1).strip()
    except:
        print(traceback.format_exc())
        return "cpu"
--- a/ui/sd_internal/runtime.py
+++ b/ui/sd_internal/runtime.py
@ -37,6 +37,7 @@ config_yaml = "optimizedSD/v1-inference.yaml"
 filename_regex = re.compile('[^a-zA-Z0-9]')
 # api stuff
 from sd_internal import device_manager
 from . import Request, Response, Image as ResponseImage
 import base64
 from io import BytesIO
@ -45,73 +46,7 @@ from io import BytesIO
 from threading import local as LocalThreadVars
 thread_data = LocalThreadVars()
-def get_processor_name():
+def thread_init(device):
    try:
        import platform, subprocess
        if platform.system() == "Windows":
            return platform.processor()
        elif platform.system() == "Darwin":
            os.environ['PATH'] = os.environ['PATH'] + os.pathsep + '/usr/sbin'
            command = "sysctl -n machdep.cpu.brand_string"
            return subprocess.check_output(command).strip()
        elif platform.system() == "Linux":
            command = "cat /proc/cpuinfo"
            all_info = subprocess.check_output(command, shell=True).decode().strip()
            for line in all_info.split("\n"):
                if "model name" in line:
                    return re.sub(".*model name.*:", "", line, 1).strip()
    except:
        print(traceback.format_exc())
        return "cpu"
 def validate_device_id(device, allow_auto=False, log_prefix=''):
    device_names = ['cpu', 'auto'] if allow_auto else ['cpu']
    if not isinstance(device, str) or (device not in device_names and (len(device) <= len('cuda:') or device[:5] != 'cuda:' or not device[5:].isnumeric())):
        raise EnvironmentError(f"{log_prefix}: device id should be {', '.join(device_names)}, or 'cuda:N' (where N is an integer index for the GPU). Got: {device}")
 '''
 Returns True/False, and prints any compatibility errors
 '''
 def is_device_compatible(device):
    validate_device_id(device, allow_auto=False, log_prefix='is_device_compatible')
    if device == 'cpu': return True
    # Memory check
    try:
        mem_free, mem_total = torch.cuda.mem_get_info(device)
        mem_total /= float(10**9)
        if mem_total < 3.0:
            print('GPUs with less than 3 GB of VRAM are not compatible with Stable Diffusion')
            return False
    except RuntimeError as e:
        print(str(e))
        return False
    return True
 def device_select(device):
    validate_device_id(device, allow_auto=False, log_prefix='device_select')
    if device == 'cpu': return True
    if not torch.cuda.is_available(): return False
    if not is_device_compatible(device):
        return False
    thread_data.device_name = torch.cuda.get_device_name(device)
    thread_data.device = device
    # Force full precision on 1660 and 1650 NVIDIA cards to avoid creating green images
    device_name = thread_data.device_name.lower()
    thread_data.force_full_precision = ('nvidia' in device_name or 'geforce' in device_name) and (' 1660' in device_name or ' 1650' in device_name)
    if thread_data.force_full_precision:
        print('forcing full precision on NVIDIA 16xx cards, to avoid green images. GPU detected: ', thread_data.device_name)
        # Apply force_full_precision now before models are loaded.
        thread_data.precision = 'full'
    return True
 def device_init(device_selection):
    validate_device_id(device_selection, allow_auto=True, log_prefix='device_init')
    # Thread bound properties
    thread_data.stop_processing = False
    thread_data.temp_images = {}
@ -140,50 +75,7 @@ def device_init(device_selection):
    thread_data.force_full_precision = False
    thread_data.reduced_memory = True
-    if device_selection == 'cpu':
+    device_manager.device_init(thread_data, device)
        thread_data.device = 'cpu'
        thread_data.device_name = get_processor_name()
        print('Render device CPU available as', thread_data.device_name)
        return True
    if not torch.cuda.is_available():
        if device_selection == 'auto':
            print('WARNING: Could not find a compatible GPU. Using the CPU, but this will be very slow!')
            thread_data.device = 'cpu'
            thread_data.device_name = get_processor_name()
            return True
        else:
            raise EnvironmentError(f'Could not find a compatible GPU for the requested device_selection: {device_selection}!')
    if device_selection == 'auto':
        device_count = torch.cuda.device_count()
        if device_count == 1 and device_select('cuda:0'):
            torch.cuda.device('cuda:0')
            return True
        print('Autoselecting GPU. Using most free memory.')
        max_mem_free = 0
        best_device = None
        for device in range(device_count):
            device = f'cuda:{device}'
            mem_free, mem_total = torch.cuda.mem_get_info(device)
            mem_free /= float(10**9)
            mem_total /= float(10**9)
            device_name = torch.cuda.get_device_name(device)
            print(f'{device} detected: {device_name} - Memory: {round(mem_total - mem_free, 2)}Gb / {round(mem_total, 2)}Gb')
            if max_mem_free < mem_free:
                max_mem_free = mem_free
                best_device = device
        if best_device and device_select(best_device):
            print(f'Setting {device} as active')
            torch.cuda.device(device)
            return True
    if device_selection != 'auto' and device_select(device_selection):
        print(f'Setting {device_selection} as active')
        torch.cuda.device(device_selection)
        return True
    return False
 def load_model_ckpt():
    if not thread_data.ckpt_file: raise ValueError(f'Thread ckpt_file is undefined.')
@ -296,6 +188,8 @@ def unload_filters():
        del thread_data.model_real_esrgan
    thread_data.model_real_esrgan = None
    gc()
 def unload_models():
    if thread_data.model is not None:
        print('Unloading models...')
@ -313,6 +207,8 @@ def unload_models():
    thread_data.modelCS = None
    thread_data.modelFS = None
    gc()
 def wait_model_move_to(model, target_device): # Send to target_device and wait until complete.
    if thread_data.device == target_device: return
    start_mem = torch.cuda.memory_allocated(thread_data.device) / 1e6
@ -518,7 +414,6 @@ def do_mk_img(req: Request):
    if needs_model_reload:
        unload_models()
        unload_filters()
        gc()
        load_model_ckpt()
    if thread_data.turbo != req.turbo:
--- a/ui/sd_internal/task_manager.py
+++ b/ui/sd_internal/task_manager.py
@ -14,7 +14,7 @@ import queue, threading, time, weakref
 from typing import Any, Generator, Hashable, Optional, Union
 from pydantic import BaseModel
-from sd_internal import Request, Response, runtime
+from sd_internal import Request, Response, runtime, device_manager
 THREAD_NAME_PREFIX = 'Runtime-Render/'
 ERR_LOCK_FAILED = ' failed to acquire lock within timeout.'
@ -253,11 +253,7 @@ def thread_render(device):
    global current_state, current_state_error, current_model_path, current_vae_path
    from . import runtime
    try:
-        if not runtime.device_init(device):
+        runtime.thread_init(device)
            weak_thread_data[threading.current_thread()] = {
                'error': f'Could not start on the selected device: {device}'
            }
            return
    except Exception as e:
        print(traceback.format_exc())
        weak_thread_data[threading.current_thread()] = {
@ -266,13 +262,19 @@ def thread_render(device):
        return
    weak_thread_data[threading.current_thread()] = {
        'device': runtime.thread_data.device,
-        'device_name': runtime.thread_data.device_name
+        'device_name': runtime.thread_data.device_name,
        'alive': True
    }
    if runtime.thread_data.device != 'cpu' or is_alive() == 1:
        preload_model()
        current_state = ServerStates.Online
    while True:
        task_cache.clean()
        if not weak_thread_data[threading.current_thread()]['alive']:
            print(f'Shutting down thread for device {runtime.thread_data.device}')
            runtime.unload_models()
            runtime.unload_filters()
            return
        if isinstance(current_state_error, SystemExit):
            current_state = ServerStates.Unavailable
            return
@ -371,12 +373,12 @@ def get_devices():
    gpu_count = torch.cuda.device_count()
    for device in range(gpu_count):
        device = f'cuda:{device}'
-        if not runtime.is_device_compatible(device):
+        if not device_manager.is_device_compatible(device):
            continue
        devices['all'].update({device: torch.cuda.get_device_name(device)})
-    devices['all'].update({'cpu': runtime.get_processor_name()})
+    devices['all'].update({'cpu': device_manager.get_processor_name()})
    # list the activated devices
    if not manager_lock.acquire(blocking=True, timeout=LOCK_TIMEOUT): raise Exception('get_devices' + ERR_LOCK_FAILED)
@ -411,13 +413,13 @@ def is_alive(device=None):
    finally:
        manager_lock.release()
-def start_render_thread(device='auto'):
+def start_render_thread(device):
-    if not manager_lock.acquire(blocking=True, timeout=LOCK_TIMEOUT): raise Exception('start_render_threads' + ERR_LOCK_FAILED)
+    if not manager_lock.acquire(blocking=True, timeout=LOCK_TIMEOUT): raise Exception('start_render_thread' + ERR_LOCK_FAILED)
    print('Start new Rendering Thread on device', device)
    try:
        rthread = threading.Thread(target=thread_render, kwargs={'device': device})
        rthread.daemon = True
-        rthread.name = THREAD_NAME_PREFIX + str(device)
+        rthread.name = THREAD_NAME_PREFIX + device
        rthread.start()
        render_threads.append(rthread)
    finally:
@ -425,6 +427,7 @@ def start_render_thread(device='auto'):
    timeout = DEVICE_START_TIMEOUT
    while not rthread.is_alive() or not rthread in weak_thread_data or not 'device' in weak_thread_data[rthread]:
        if rthread in weak_thread_data and 'error' in weak_thread_data[rthread]:
            print(rthread, device, 'error:', weak_thread_data[rthread]['error'])
            return False
        if timeout <= 0:
            return False
@ -432,6 +435,59 @@ def start_render_thread(device='auto'):
        time.sleep(1)
    return True
 def stop_render_thread(device):
    try:
        device_manager.validate_device_id(device, log_prefix='stop_render_thread')
    except:
        print(traceback.format_exec())
        return False
    if not manager_lock.acquire(blocking=True, timeout=LOCK_TIMEOUT): raise Exception('stop_render_thread' + ERR_LOCK_FAILED)
    print('Stopping Rendering Thread on device', device)
    try:
        thread_to_remove = None
        for rthread in render_threads:
            weak_data = weak_thread_data.get(rthread)
            if weak_data is None or not 'device' in weak_data or weak_data['device'] is None:
                continue
            thread_device = weak_data['device']
            if thread_device == device:
                weak_data['alive'] = False
                thread_to_remove = rthread
                break
        if thread_to_remove is not None:
            render_threads.remove(rthread)
            return True
    finally:
        manager_lock.release()
    return False
 def update_render_threads(render_devices, active_devices):
    devices_to_start, devices_to_stop = device_manager.get_device_delta(render_devices, active_devices)
    print('devices_to_start', devices_to_start)
    print('devices_to_stop', devices_to_stop)
    for device in devices_to_stop:
        if is_alive(device) <= 0:
            print(device, 'is not alive')
            continue
        if not stop_render_thread(device):
            print(device, 'could not stop render thread')
    for device in devices_to_start:
        if is_alive(device) >= 1:
            print(device, 'already registered.')
            continue
        if not start_render_thread(device):
            print(device, 'failed to start.')
    if is_alive() <= 0: # No running devices, probably invalid user config.
        raise EnvironmentError('ERROR: No active render devices! Please verify the "render_devices" value in config.json')
    print('active devices', get_devices()['active'])
 def shutdown_event(): # Signal render thread to close on shutdown
    global current_state_error
    current_state_error = SystemExit('Application shutting down.')
@ -478,7 +534,6 @@ def render(req : ImageRequest):
        r.stream_image_progress = False
    new_task = RenderTask(r)
    new_task.render_device = req.render_device
    if task_cache.put(r.session_id, new_task, TASK_TTL):
        # Use twice the normal timeout for adding user requests.
--- a/ui/server.py
+++ b/ui/server.py
@ -224,7 +224,10 @@ def read_web_data(key:str=None):
            raise HTTPException(status_code=500, detail="Config file is missing or unreadable")
        return JSONResponse(config, headers=NOCACHE_HEADERS)
    elif key == 'devices':
-        return JSONResponse(task_manager.get_devices(), headers=NOCACHE_HEADERS)
+        config = getConfig()
        devices = task_manager.get_devices()
        devices['config'] = config.get('render_devices', "auto")
        return JSONResponse(devices, headers=NOCACHE_HEADERS)
    elif key == 'models':
        return JSONResponse(getModels(), headers=NOCACHE_HEADERS)
    elif key == 'modifiers': return FileResponse(os.path.join(SD_UI_DIR, 'modifiers.json'), headers=NOCACHE_HEADERS)
@ -272,17 +275,41 @@ def save_model_to_config(ckpt_model_name, vae_model_name):
    setConfig(config)
-@app.post('/render')
+def save_render_devices_to_config(render_devices):
-def render(req : task_manager.ImageRequest):
+    config = getConfig()
    if 'render_devices' not in config:
        config['render_devices'] = {}
    config['render_devices'] = render_devices
    if render_devices is None or len(render_devices) == 0:
        del config['render_devices']
    setConfig(config)
 def update_render_threads_on_request(req : task_manager.ImageRequest):
    if req.use_cpu:  # TODO Remove after transition.
        print('WARNING Replace {use_cpu: true} by {render_device: "cpu"}')
        req.render_device = 'cpu'
        del req.use_cpu
-    if req.render_device != 'cpu':
+
-        req.render_device = 'cuda:0' # temp hack to get beta working
+    if req.render_device not in ('cpu', 'auto') and not req.render_device.startswith('cuda:'):
-    if req.render_device and task_manager.is_alive(req.render_device) <= 0: raise HTTPException(status_code=403, detail=f'{req.render_device} rendering is not enabled in config.json or the thread has died...') # HTTP403 Forbidden
+        raise HTTPException(status_code=400, detail=f'Invalid render device requested: {req.render_device}')
    if req.render_device.startswith('cuda:'):
        req.render_device = req.render_device.split(',')
    save_render_devices_to_config(req.render_device)
    del req.render_device
    update_render_threads()
@app.post('/render')
 def render(req : task_manager.ImageRequest):
    update_render_threads_on_request(req)
    if req.use_face_correction and task_manager.is_alive('cuda:0') <= 0: #TODO Remove when GFPGANer is fixed upstream.
-        raise HTTPException(status_code=412, detail=f'GFPGANer only works GPU:0, use CUDA_VISIBLE_DEVICES if GFPGANer is needed on a specific GPU.') # HTTP412 Precondition Failed
+        raise HTTPException(status_code=412, detail=f'The "Fix incorrect faces" feature works only on cuda:0. Disable "Fix incorrect faces" (in Image Settings), or use the CUDA_VISIBLE_DEVICES environment variable.')
    try:
        save_model_to_config(req.use_stable_diffusion_model, req.use_vae_model)
        req.use_stable_diffusion_model = resolve_ckpt_to_use(req.use_stable_diffusion_model)
@ -359,44 +386,19 @@ class LogSuppressFilter(logging.Filter):
        return True
 logging.getLogger('uvicorn.access').addFilter(LogSuppressFilter())
 config = getConfig()
 # Start the task_manager
 task_manager.default_model_to_load = resolve_ckpt_to_use()
 task_manager.default_vae_to_load = resolve_vae_to_use()
 if 'render_devices' in config:  # Start a new thread for each device.
    if not isinstance(config['render_devices'], list):
        raise Exception('Invalid render_devices value in config. Should be a list')
    config['render_devices'] = set(config['render_devices']) # de-duplicate
    for device in config['render_devices']:
        if task_manager.is_alive(device) >= 1:
            print(device, 'already registered.')
            continue
        if not task_manager.start_render_thread(device):
            print(device, 'failed to start.')
    if task_manager.is_alive() <= 0: # No running devices, probably invalid user config.
        print('WARNING: No active render devices after loading config. Validate "render_devices" in config.json')
        print('Loading default render devices to replace invalid render_devices field from config', config['render_devices'])
-if task_manager.is_alive() <= 0: # Either no defaults or no devices after loading config.
+def update_render_threads():
-    # Select best GPU device using free memory, if more than one device.
+    config = getConfig()
-    if task_manager.start_render_thread('auto'): # Detect best device for renders
+    render_devices = config.get('render_devices', "auto")
-        # if cuda:0 is missing, another cuda device is better. try to start it...
+    active_devices = task_manager.get_devices()['active'].keys()
        if task_manager.is_alive('cuda:0') <= 0 and task_manager.is_alive('cpu') <= 0 and not task_manager.start_render_thread('cuda:0'):
            print('Failed to start GPU:0...')
    else:
        print('Failed to start gpu device.')
    if task_manager.is_alive('cpu') <= 0 and not task_manager.start_render_thread('cpu'): # Allow CPU to be used for renders
        print('Failed to start CPU render device...')
-is_using_a_gpu = (task_manager.is_alive() > task_manager.is_alive('cpu'))
+    print('requesting for render_devices', render_devices)
-if is_using_a_gpu and task_manager.is_alive('cuda:0') <= 0:
+    task_manager.update_render_threads(render_devices, active_devices)
    print('WARNING: GFPGANer only works on GPU:0, use CUDA_VISIBLE_DEVICES if GFPGANer is needed on a specific GPU.')
    print('Using CUDA_VISIBLE_DEVICES will remap the selected devices starting at GPU:0 fixing GFPGANer')
    print('Add the line "@set CUDA_VISIBLE_DEVICES=N" where N is the GPUs to use to config.bat')
    print('Add the line "CUDA_VISIBLE_DEVICES=N" where N is the GPUs to use to config.sh')
-print('active devices', task_manager.get_devices()['active'])
+update_render_threads()
 # start the browser ui
 import webbrowser; webbrowser.open('http://localhost:9000')