Start on multiple GPUs by default (top 75 percentile by free_mem); UI selection for 'cpu' or 'auto' or a list of specific GPUs, which is now linked to the backend; Dynamically start/stop render threads for the devices, without requiring a full program restart

This commit is contained in:
cmdr2 2022-11-14 11:23:22 +05:30
parent a19ba40672
commit ea03fd22db
8 changed files with 339 additions and 179 deletions

View File

@ -7,7 +7,7 @@
<link rel="icon" type="image/png" href="/media/images/favicon-32x32.png" sizes="32x32">
<link rel="stylesheet" href="/media/css/fonts.css?v=1">
<link rel="stylesheet" href="/media/css/themes.css?v=2">
<link rel="stylesheet" href="/media/css/main.css?v=10">
<link rel="stylesheet" href="/media/css/main.css?v=11">
<link rel="stylesheet" href="/media/css/auto-save.css?v=5">
<link rel="stylesheet" href="/media/css/modifier-thumbnails.css?v=4">
<link rel="stylesheet" href="/media/css/fontawesome-all.min.css?v=1">
@ -19,7 +19,7 @@
<div id="container">
<div id="top-nav">
<div id="logo">
<h1>Stable Diffusion UI <small>v2.3.14 <span id="updateBranchLabel"></span></small></h1>
<h1>Stable Diffusion UI <small>v2.4 <span id="updateBranchLabel"></span></small></h1>
</div>
<div id="server-status">
<div id="server-status-color"></div>
@ -35,6 +35,9 @@
<span id="tab-about" class="tab">
<span><i class="fa fa-comments icon"></i> Help & Community</span>
</span>
<!-- <span id="tab-system-info" class="tab">
<span><i class="fa fa-microchip icon"></i> System Info</span>
</span> -->
</div>
</div>
@ -245,7 +248,7 @@
<div class="tab-content-inner">
<div class="float-container">
<div class="float-child">
<h1>Help</h1>
<h1>Help</h1>
<ul id="help-links">
<li><span class="help-section">Using the software</span>
<ul>
@ -270,7 +273,7 @@
</div>
<div class="float-child">
<h1>Community</h1>
<h1>Community</h1>
<ul id="community-links">
<li><a href="https://discord.com/invite/u9yhsFmEkB" target="_blank"><i class="fa-brands fa-discord fa-fw"></i> Discord user community</a></li>
<li><a href="https://www.reddit.com/r/StableDiffusionUI/" target="_blank"><i class="fa-brands fa-reddit fa-fw"></i> Reddit community</a></li>
@ -280,6 +283,18 @@
</div>
</div>
</div>
<!-- <div id="tab-content-system-info" class="tab-content">
<div id="system-info" class="tab-content-inner">
<h1>System Info</h1>
<table>
<tr><td><label>Processor:</label></td><td id="system-info-cpu" class="value">Dingus</td></tr>
<tr><td><label>RAM:</label></td><td id="system-info-ram" class="value">Dingus Another</td></tr>
<tr><td><label>Compatible Graphics Cards (all):</label></td><td id="system-info-all-gpus" class="value">Dingus</td></tr>
<tr><td></td><td>&nbsp;</td></tr>
<tr><td><label>What's being used for rendering 🔥:</label></td><td id="system-info-active-gpus" class="value">Dingus<br/>Intel Graphics SOmething<br/>Another thing</td></tr>
</table>
</div>
</div> -->
</div>
@ -317,13 +332,13 @@
</div>
</body>
<script src="media/js/parameters.js?v=4"></script>
<script src="media/js/parameters.js?v=5"></script>
<script src="media/js/plugins.js?v=1"></script>
<script src="media/js/utils.js?v=6"></script>
<script src="media/js/inpainting-editor.js?v=1"></script>
<script src="media/js/image-modifiers.js?v=6"></script>
<script src="media/js/auto-save.js?v=7"></script>
<script src="media/js/main.js?v=13"></script>
<script src="media/js/main.js?v=14"></script>
<script src="media/js/themes.js?v=4"></script>
<script src="media/js/dnd.js?v=8"></script>
<script>

View File

@ -123,7 +123,7 @@ label {
padding: 16px;
display: flex;
flex-direction: column;
flex: 0 0 350pt;
flex: 0 0 370pt;
}
#editor label {
font-weight: normal;
@ -887,3 +887,9 @@ input::file-selector-button {
margin-bottom: 15px;
box-shadow: 0 4px 8px 0 rgba(0, 0, 0, 0.15), 0 6px 20px 0 rgba(0, 0, 0, 0.15);
}
#system-info .value {
text-align: left;
}
#system-info label {
float: right;
}

View File

@ -27,6 +27,7 @@ let maskImageSelector = document.querySelector("#mask")
let maskImagePreview = document.querySelector("#mask_preview")
let turboField = document.querySelector('#turbo')
let useCPUField = document.querySelector('#use_cpu')
let autoPickGPUsField = document.querySelector('#auto_pick_gpus')
let useGPUsField = document.querySelector('#use_gpus')
let useFullPrecisionField = document.querySelector('#use_full_precision')
let saveToDiskField = document.querySelector('#save_to_disk')
@ -810,14 +811,15 @@ function getCurrentUserRequest() {
}
function getCurrentRenderDeviceSelection() {
if (useCPUField.checked) {
let selectedGPUs = $('#use_gpus').val()
if (useCPUField.checked && !autoPickGPUsField.checked) {
return 'cpu'
}
let selectedGPUs = $(useGPUsField).val()
if (selectedGPUs.length == 0) {
selectedGPUs = ['auto']
if (autoPickGPUsField.checked || selectedGPUs.length == 0) {
return 'auto'
}
return selectedGPUs.join(',')
}
@ -1136,10 +1138,27 @@ updatePromptStrength()
useCPUField.addEventListener('click', function() {
let gpuSettingEntry = getParameterSettingsEntry('use_gpus')
let autoPickGPUSettingEntry = getParameterSettingsEntry('auto_pick_gpus')
if (this.checked) {
gpuSettingEntry.style.display = 'none'
autoPickGPUSettingEntry.style.display = 'none'
autoPickGPUsField.setAttribute('data-old-value', autoPickGPUsField.checked)
autoPickGPUsField.checked = false
} else if (useGPUsField.options.length >= MIN_GPUS_TO_SHOW_SELECTION) {
gpuSettingEntry.style.display = ''
autoPickGPUSettingEntry.style.display = ''
autoPickGPUsField.checked = (autoPickGPUsField.getAttribute('data-old-value') === 'true')
}
})
useGPUsField.addEventListener('click', function() {
let selectedGPUs = $('#use_gpus').val()
autoPickGPUsField.checked = (selectedGPUs.length === 0)
})
autoPickGPUsField.addEventListener('click', function() {
if (this.checked) {
$('#use_gpus').val([])
}
})
@ -1360,6 +1379,8 @@ async function getDevices() {
if (allDeviceIds.length < MIN_GPUS_TO_SHOW_SELECTION) {
let gpuSettingEntry = getParameterSettingsEntry('use_gpus')
gpuSettingEntry.style.display = 'none'
let autoPickGPUSettingEntry = getParameterSettingsEntry('auto_pick_gpus')
autoPickGPUSettingEntry.style.display = 'none'
if (allDeviceIds.length === 0) {
useCPUField.checked = true
@ -1367,14 +1388,18 @@ async function getDevices() {
}
}
useGPUsField.innerHTML = ''
autoPickGPUsField.checked = (res['config'] === 'auto')
useGPUsField.innerHTML = ''
allDeviceIds.forEach(device => {
let deviceName = res['all'][device]
let selected = (activeDeviceIds.includes(device) ? 'selected' : '')
let deviceOption = `<option value="${device}" ${selected}>${deviceName}</option>`
let deviceOption = `<option value="${device}">${deviceName}</option>`
useGPUsField.insertAdjacentHTML('beforeend', deviceOption)
})
if (!autoPickGPUsField.checked) {
$('#use_gpus').val(activeDeviceIds)
}
}
} catch (e) {
console.log('error fetching devices', e)

View File

@ -73,6 +73,12 @@ var PARAMETERS = [
note: "warning: this will be *very* slow",
default: false,
},
{
id: "auto_pick_gpus",
type: ParameterType.checkbox,
label: "Automatically pick the GPUs",
default: false,
},
{
id: "use_gpus",
type: ParameterType.select_multiple,

View File

@ -0,0 +1,156 @@
import os
import torch
import traceback
import re
COMPARABLE_GPU_PERCENTILE = 0.75 # if a GPU's free_mem is within this % of the GPU with the most free_mem, it will be picked
def get_device_delta(render_devices, active_devices):
'''
render_devices: 'cpu', or 'auto' or ['cuda:N'...]
active_devices: ['cpu', 'cuda:N'...]
'''
if render_devices is not None:
if render_devices in ('cpu', 'auto'):
render_devices = [render_devices]
elif isinstance(render_devices, list) and len(render_devices) > 0:
render_devices = list(filter(lambda x: x.startswith('cuda:'), render_devices))
if len(render_devices) == 0:
raise Exception('Invalid render_devices value in config.json. Valid: {"render_devices": ["cuda:0", "cuda:1"...]}, or {"render_devices": "cpu"} or {"render_devices": "auto"}')
render_devices = list(filter(lambda x: is_device_compatible(x), render_devices))
if len(render_devices) == 0:
raise Exception('Sorry, none of the render_devices configured in config.json are compatible with Stable Diffusion')
else:
raise Exception('Invalid render_devices value in config.json. Valid: {"render_devices": ["cuda:0", "cuda:1"...]}, or {"render_devices": "cpu"} or {"render_devices": "auto"}')
else:
render_devices = ['auto']
if 'auto' in render_devices:
render_devices = auto_pick_devices(active_devices)
if 'cpu' in render_devices:
print('WARNING: Could not find a compatible GPU. Using the CPU, but this will be very slow!')
active_devices = set(active_devices)
render_devices = set(render_devices)
devices_to_start = render_devices - active_devices
devices_to_stop = active_devices - render_devices
return devices_to_start, devices_to_stop
def auto_pick_devices(currently_active_devices):
if not torch.cuda.is_available(): return ['cpu']
device_count = torch.cuda.device_count()
if device_count == 1:
return ['cuda:0'] if is_device_compatible('cuda:0') else ['cpu']
print('Autoselecting GPU. Using most free memory.')
devices = []
for device in range(device_count):
device = f'cuda:{device}'
if not is_device_compatible(device):
continue
mem_free, mem_total = torch.cuda.mem_get_info(device)
mem_free /= float(10**9)
mem_total /= float(10**9)
device_name = torch.cuda.get_device_name(device)
print(f'{device} detected: {device_name} - Memory: {round(mem_total - mem_free, 2)}Gb / {round(mem_total, 2)}Gb')
devices.append({'device': device, 'device_name': device_name, 'mem_free': mem_free})
devices.sort(key=lambda x:x['mem_free'], reverse=True)
max_free_mem = devices[0]['mem_free']
free_mem_threshold = COMPARABLE_GPU_PERCENTILE * max_free_mem
# Auto-pick algorithm:
# 1. Pick the top 75 percentile of the GPUs, sorted by free_mem.
# 2. Also include already-running devices (GPU-only), otherwise their free_mem will
# always be very low (since their VRAM contains the model).
# These already-running devices probably aren't terrible, since they were picked in the past.
# Worst case, the user can restart the program and that'll get rid of them.
devices = list(filter((lambda x: x['mem_free'] > free_mem_threshold or x['device'] in currently_active_devices), devices))
return devices
def device_init(thread_data, device):
'''
This function assumes the 'device' has already been verified to be compatible.
`get_device_delta()` has already filtered out incompatible devices.
'''
validate_device_id(device, log_prefix='device_init')
if device == 'cpu':
thread_data.device = 'cpu'
thread_data.device_name = get_processor_name()
print('Render device CPU available as', thread_data.device_name)
return
thread_data.device_name = torch.cuda.get_device_name(device)
thread_data.device = device
# Force full precision on 1660 and 1650 NVIDIA cards to avoid creating green images
device_name = thread_data.device_name.lower()
thread_data.force_full_precision = ('nvidia' in device_name or 'geforce' in device_name) and (' 1660' in device_name or ' 1650' in device_name)
if thread_data.force_full_precision:
print('forcing full precision on NVIDIA 16xx cards, to avoid green images. GPU detected: ', thread_data.device_name)
# Apply force_full_precision now before models are loaded.
thread_data.precision = 'full'
print(f'Setting {device} as active')
torch.cuda.device(device)
return
def validate_device_id(device, log_prefix=''):
def is_valid():
if not isinstance(device, str):
return False
if device == 'cpu':
return True
if not device.startswith('cuda:') or not device[5:].isnumeric():
return False
return True
if not is_valid():
raise EnvironmentError(f"{log_prefix}: device id should be 'cpu', or 'cuda:N' (where N is an integer index for the GPU). Got: {device}")
def is_device_compatible(device):
'''
Returns True/False, and prints any compatibility errors
'''
validate_device_id(device, log_prefix='is_device_compatible')
if device == 'cpu': return True
# Memory check
try:
_, mem_total = torch.cuda.mem_get_info(device)
mem_total /= float(10**9)
if mem_total < 3.0:
print(f'GPU {device} with less than 3 GB of VRAM is not compatible with Stable Diffusion')
return False
except RuntimeError as e:
print(str(e))
return False
return True
def get_processor_name():
try:
import platform, subprocess
if platform.system() == "Windows":
return platform.processor()
elif platform.system() == "Darwin":
os.environ['PATH'] = os.environ['PATH'] + os.pathsep + '/usr/sbin'
command = "sysctl -n machdep.cpu.brand_string"
return subprocess.check_output(command).strip()
elif platform.system() == "Linux":
command = "cat /proc/cpuinfo"
all_info = subprocess.check_output(command, shell=True).decode().strip()
for line in all_info.split("\n"):
if "model name" in line:
return re.sub(".*model name.*:", "", line, 1).strip()
except:
print(traceback.format_exc())
return "cpu"

View File

@ -37,6 +37,7 @@ config_yaml = "optimizedSD/v1-inference.yaml"
filename_regex = re.compile('[^a-zA-Z0-9]')
# api stuff
from sd_internal import device_manager
from . import Request, Response, Image as ResponseImage
import base64
from io import BytesIO
@ -45,73 +46,7 @@ from io import BytesIO
from threading import local as LocalThreadVars
thread_data = LocalThreadVars()
def get_processor_name():
try:
import platform, subprocess
if platform.system() == "Windows":
return platform.processor()
elif platform.system() == "Darwin":
os.environ['PATH'] = os.environ['PATH'] + os.pathsep + '/usr/sbin'
command = "sysctl -n machdep.cpu.brand_string"
return subprocess.check_output(command).strip()
elif platform.system() == "Linux":
command = "cat /proc/cpuinfo"
all_info = subprocess.check_output(command, shell=True).decode().strip()
for line in all_info.split("\n"):
if "model name" in line:
return re.sub(".*model name.*:", "", line, 1).strip()
except:
print(traceback.format_exc())
return "cpu"
def validate_device_id(device, allow_auto=False, log_prefix=''):
device_names = ['cpu', 'auto'] if allow_auto else ['cpu']
if not isinstance(device, str) or (device not in device_names and (len(device) <= len('cuda:') or device[:5] != 'cuda:' or not device[5:].isnumeric())):
raise EnvironmentError(f"{log_prefix}: device id should be {', '.join(device_names)}, or 'cuda:N' (where N is an integer index for the GPU). Got: {device}")
'''
Returns True/False, and prints any compatibility errors
'''
def is_device_compatible(device):
validate_device_id(device, allow_auto=False, log_prefix='is_device_compatible')
if device == 'cpu': return True
# Memory check
try:
mem_free, mem_total = torch.cuda.mem_get_info(device)
mem_total /= float(10**9)
if mem_total < 3.0:
print('GPUs with less than 3 GB of VRAM are not compatible with Stable Diffusion')
return False
except RuntimeError as e:
print(str(e))
return False
return True
def device_select(device):
validate_device_id(device, allow_auto=False, log_prefix='device_select')
if device == 'cpu': return True
if not torch.cuda.is_available(): return False
if not is_device_compatible(device):
return False
thread_data.device_name = torch.cuda.get_device_name(device)
thread_data.device = device
# Force full precision on 1660 and 1650 NVIDIA cards to avoid creating green images
device_name = thread_data.device_name.lower()
thread_data.force_full_precision = ('nvidia' in device_name or 'geforce' in device_name) and (' 1660' in device_name or ' 1650' in device_name)
if thread_data.force_full_precision:
print('forcing full precision on NVIDIA 16xx cards, to avoid green images. GPU detected: ', thread_data.device_name)
# Apply force_full_precision now before models are loaded.
thread_data.precision = 'full'
return True
def device_init(device_selection):
validate_device_id(device_selection, allow_auto=True, log_prefix='device_init')
def thread_init(device):
# Thread bound properties
thread_data.stop_processing = False
thread_data.temp_images = {}
@ -140,50 +75,7 @@ def device_init(device_selection):
thread_data.force_full_precision = False
thread_data.reduced_memory = True
if device_selection == 'cpu':
thread_data.device = 'cpu'
thread_data.device_name = get_processor_name()
print('Render device CPU available as', thread_data.device_name)
return True
if not torch.cuda.is_available():
if device_selection == 'auto':
print('WARNING: Could not find a compatible GPU. Using the CPU, but this will be very slow!')
thread_data.device = 'cpu'
thread_data.device_name = get_processor_name()
return True
else:
raise EnvironmentError(f'Could not find a compatible GPU for the requested device_selection: {device_selection}!')
if device_selection == 'auto':
device_count = torch.cuda.device_count()
if device_count == 1 and device_select('cuda:0'):
torch.cuda.device('cuda:0')
return True
print('Autoselecting GPU. Using most free memory.')
max_mem_free = 0
best_device = None
for device in range(device_count):
device = f'cuda:{device}'
mem_free, mem_total = torch.cuda.mem_get_info(device)
mem_free /= float(10**9)
mem_total /= float(10**9)
device_name = torch.cuda.get_device_name(device)
print(f'{device} detected: {device_name} - Memory: {round(mem_total - mem_free, 2)}Gb / {round(mem_total, 2)}Gb')
if max_mem_free < mem_free:
max_mem_free = mem_free
best_device = device
if best_device and device_select(best_device):
print(f'Setting {device} as active')
torch.cuda.device(device)
return True
if device_selection != 'auto' and device_select(device_selection):
print(f'Setting {device_selection} as active')
torch.cuda.device(device_selection)
return True
return False
device_manager.device_init(thread_data, device)
def load_model_ckpt():
if not thread_data.ckpt_file: raise ValueError(f'Thread ckpt_file is undefined.')
@ -296,6 +188,8 @@ def unload_filters():
del thread_data.model_real_esrgan
thread_data.model_real_esrgan = None
gc()
def unload_models():
if thread_data.model is not None:
print('Unloading models...')
@ -313,6 +207,8 @@ def unload_models():
thread_data.modelCS = None
thread_data.modelFS = None
gc()
def wait_model_move_to(model, target_device): # Send to target_device and wait until complete.
if thread_data.device == target_device: return
start_mem = torch.cuda.memory_allocated(thread_data.device) / 1e6
@ -518,7 +414,6 @@ def do_mk_img(req: Request):
if needs_model_reload:
unload_models()
unload_filters()
gc()
load_model_ckpt()
if thread_data.turbo != req.turbo:

View File

@ -14,7 +14,7 @@ import queue, threading, time, weakref
from typing import Any, Generator, Hashable, Optional, Union
from pydantic import BaseModel
from sd_internal import Request, Response, runtime
from sd_internal import Request, Response, runtime, device_manager
THREAD_NAME_PREFIX = 'Runtime-Render/'
ERR_LOCK_FAILED = ' failed to acquire lock within timeout.'
@ -253,11 +253,7 @@ def thread_render(device):
global current_state, current_state_error, current_model_path, current_vae_path
from . import runtime
try:
if not runtime.device_init(device):
weak_thread_data[threading.current_thread()] = {
'error': f'Could not start on the selected device: {device}'
}
return
runtime.thread_init(device)
except Exception as e:
print(traceback.format_exc())
weak_thread_data[threading.current_thread()] = {
@ -266,13 +262,19 @@ def thread_render(device):
return
weak_thread_data[threading.current_thread()] = {
'device': runtime.thread_data.device,
'device_name': runtime.thread_data.device_name
'device_name': runtime.thread_data.device_name,
'alive': True
}
if runtime.thread_data.device != 'cpu' or is_alive() == 1:
preload_model()
current_state = ServerStates.Online
while True:
task_cache.clean()
if not weak_thread_data[threading.current_thread()]['alive']:
print(f'Shutting down thread for device {runtime.thread_data.device}')
runtime.unload_models()
runtime.unload_filters()
return
if isinstance(current_state_error, SystemExit):
current_state = ServerStates.Unavailable
return
@ -371,12 +373,12 @@ def get_devices():
gpu_count = torch.cuda.device_count()
for device in range(gpu_count):
device = f'cuda:{device}'
if not runtime.is_device_compatible(device):
if not device_manager.is_device_compatible(device):
continue
devices['all'].update({device: torch.cuda.get_device_name(device)})
devices['all'].update({'cpu': runtime.get_processor_name()})
devices['all'].update({'cpu': device_manager.get_processor_name()})
# list the activated devices
if not manager_lock.acquire(blocking=True, timeout=LOCK_TIMEOUT): raise Exception('get_devices' + ERR_LOCK_FAILED)
@ -411,13 +413,13 @@ def is_alive(device=None):
finally:
manager_lock.release()
def start_render_thread(device='auto'):
if not manager_lock.acquire(blocking=True, timeout=LOCK_TIMEOUT): raise Exception('start_render_threads' + ERR_LOCK_FAILED)
def start_render_thread(device):
if not manager_lock.acquire(blocking=True, timeout=LOCK_TIMEOUT): raise Exception('start_render_thread' + ERR_LOCK_FAILED)
print('Start new Rendering Thread on device', device)
try:
rthread = threading.Thread(target=thread_render, kwargs={'device': device})
rthread.daemon = True
rthread.name = THREAD_NAME_PREFIX + str(device)
rthread.name = THREAD_NAME_PREFIX + device
rthread.start()
render_threads.append(rthread)
finally:
@ -425,6 +427,7 @@ def start_render_thread(device='auto'):
timeout = DEVICE_START_TIMEOUT
while not rthread.is_alive() or not rthread in weak_thread_data or not 'device' in weak_thread_data[rthread]:
if rthread in weak_thread_data and 'error' in weak_thread_data[rthread]:
print(rthread, device, 'error:', weak_thread_data[rthread]['error'])
return False
if timeout <= 0:
return False
@ -432,6 +435,59 @@ def start_render_thread(device='auto'):
time.sleep(1)
return True
def stop_render_thread(device):
try:
device_manager.validate_device_id(device, log_prefix='stop_render_thread')
except:
print(traceback.format_exec())
return False
if not manager_lock.acquire(blocking=True, timeout=LOCK_TIMEOUT): raise Exception('stop_render_thread' + ERR_LOCK_FAILED)
print('Stopping Rendering Thread on device', device)
try:
thread_to_remove = None
for rthread in render_threads:
weak_data = weak_thread_data.get(rthread)
if weak_data is None or not 'device' in weak_data or weak_data['device'] is None:
continue
thread_device = weak_data['device']
if thread_device == device:
weak_data['alive'] = False
thread_to_remove = rthread
break
if thread_to_remove is not None:
render_threads.remove(rthread)
return True
finally:
manager_lock.release()
return False
def update_render_threads(render_devices, active_devices):
devices_to_start, devices_to_stop = device_manager.get_device_delta(render_devices, active_devices)
print('devices_to_start', devices_to_start)
print('devices_to_stop', devices_to_stop)
for device in devices_to_stop:
if is_alive(device) <= 0:
print(device, 'is not alive')
continue
if not stop_render_thread(device):
print(device, 'could not stop render thread')
for device in devices_to_start:
if is_alive(device) >= 1:
print(device, 'already registered.')
continue
if not start_render_thread(device):
print(device, 'failed to start.')
if is_alive() <= 0: # No running devices, probably invalid user config.
raise EnvironmentError('ERROR: No active render devices! Please verify the "render_devices" value in config.json')
print('active devices', get_devices()['active'])
def shutdown_event(): # Signal render thread to close on shutdown
global current_state_error
current_state_error = SystemExit('Application shutting down.')
@ -478,7 +534,6 @@ def render(req : ImageRequest):
r.stream_image_progress = False
new_task = RenderTask(r)
new_task.render_device = req.render_device
if task_cache.put(r.session_id, new_task, TASK_TTL):
# Use twice the normal timeout for adding user requests.

View File

@ -224,7 +224,10 @@ def read_web_data(key:str=None):
raise HTTPException(status_code=500, detail="Config file is missing or unreadable")
return JSONResponse(config, headers=NOCACHE_HEADERS)
elif key == 'devices':
return JSONResponse(task_manager.get_devices(), headers=NOCACHE_HEADERS)
config = getConfig()
devices = task_manager.get_devices()
devices['config'] = config.get('render_devices', "auto")
return JSONResponse(devices, headers=NOCACHE_HEADERS)
elif key == 'models':
return JSONResponse(getModels(), headers=NOCACHE_HEADERS)
elif key == 'modifiers': return FileResponse(os.path.join(SD_UI_DIR, 'modifiers.json'), headers=NOCACHE_HEADERS)
@ -272,17 +275,41 @@ def save_model_to_config(ckpt_model_name, vae_model_name):
setConfig(config)
@app.post('/render')
def render(req : task_manager.ImageRequest):
def save_render_devices_to_config(render_devices):
config = getConfig()
if 'render_devices' not in config:
config['render_devices'] = {}
config['render_devices'] = render_devices
if render_devices is None or len(render_devices) == 0:
del config['render_devices']
setConfig(config)
def update_render_threads_on_request(req : task_manager.ImageRequest):
if req.use_cpu: # TODO Remove after transition.
print('WARNING Replace {use_cpu: true} by {render_device: "cpu"}')
req.render_device = 'cpu'
del req.use_cpu
if req.render_device != 'cpu':
req.render_device = 'cuda:0' # temp hack to get beta working
if req.render_device and task_manager.is_alive(req.render_device) <= 0: raise HTTPException(status_code=403, detail=f'{req.render_device} rendering is not enabled in config.json or the thread has died...') # HTTP403 Forbidden
if req.render_device not in ('cpu', 'auto') and not req.render_device.startswith('cuda:'):
raise HTTPException(status_code=400, detail=f'Invalid render device requested: {req.render_device}')
if req.render_device.startswith('cuda:'):
req.render_device = req.render_device.split(',')
save_render_devices_to_config(req.render_device)
del req.render_device
update_render_threads()
@app.post('/render')
def render(req : task_manager.ImageRequest):
update_render_threads_on_request(req)
if req.use_face_correction and task_manager.is_alive('cuda:0') <= 0: #TODO Remove when GFPGANer is fixed upstream.
raise HTTPException(status_code=412, detail=f'GFPGANer only works GPU:0, use CUDA_VISIBLE_DEVICES if GFPGANer is needed on a specific GPU.') # HTTP412 Precondition Failed
raise HTTPException(status_code=412, detail=f'The "Fix incorrect faces" feature works only on cuda:0. Disable "Fix incorrect faces" (in Image Settings), or use the CUDA_VISIBLE_DEVICES environment variable.')
try:
save_model_to_config(req.use_stable_diffusion_model, req.use_vae_model)
req.use_stable_diffusion_model = resolve_ckpt_to_use(req.use_stable_diffusion_model)
@ -359,44 +386,19 @@ class LogSuppressFilter(logging.Filter):
return True
logging.getLogger('uvicorn.access').addFilter(LogSuppressFilter())
config = getConfig()
# Start the task_manager
task_manager.default_model_to_load = resolve_ckpt_to_use()
task_manager.default_vae_to_load = resolve_vae_to_use()
if 'render_devices' in config: # Start a new thread for each device.
if not isinstance(config['render_devices'], list):
raise Exception('Invalid render_devices value in config. Should be a list')
config['render_devices'] = set(config['render_devices']) # de-duplicate
for device in config['render_devices']:
if task_manager.is_alive(device) >= 1:
print(device, 'already registered.')
continue
if not task_manager.start_render_thread(device):
print(device, 'failed to start.')
if task_manager.is_alive() <= 0: # No running devices, probably invalid user config.
print('WARNING: No active render devices after loading config. Validate "render_devices" in config.json')
print('Loading default render devices to replace invalid render_devices field from config', config['render_devices'])
if task_manager.is_alive() <= 0: # Either no defaults or no devices after loading config.
# Select best GPU device using free memory, if more than one device.
if task_manager.start_render_thread('auto'): # Detect best device for renders
# if cuda:0 is missing, another cuda device is better. try to start it...
if task_manager.is_alive('cuda:0') <= 0 and task_manager.is_alive('cpu') <= 0 and not task_manager.start_render_thread('cuda:0'):
print('Failed to start GPU:0...')
else:
print('Failed to start gpu device.')
if task_manager.is_alive('cpu') <= 0 and not task_manager.start_render_thread('cpu'): # Allow CPU to be used for renders
print('Failed to start CPU render device...')
def update_render_threads():
config = getConfig()
render_devices = config.get('render_devices', "auto")
active_devices = task_manager.get_devices()['active'].keys()
is_using_a_gpu = (task_manager.is_alive() > task_manager.is_alive('cpu'))
if is_using_a_gpu and task_manager.is_alive('cuda:0') <= 0:
print('WARNING: GFPGANer only works on GPU:0, use CUDA_VISIBLE_DEVICES if GFPGANer is needed on a specific GPU.')
print('Using CUDA_VISIBLE_DEVICES will remap the selected devices starting at GPU:0 fixing GFPGANer')
print('Add the line "@set CUDA_VISIBLE_DEVICES=N" where N is the GPUs to use to config.bat')
print('Add the line "CUDA_VISIBLE_DEVICES=N" where N is the GPUs to use to config.sh')
print('requesting for render_devices', render_devices)
task_manager.update_render_threads(render_devices, active_devices)
print('active devices', task_manager.get_devices()['active'])
update_render_threads()
# start the browser ui
import webbrowser; webbrowser.open('http://localhost:9000')