Start on multiple GPUs by default (top 75 percentile by free_mem); UI selection for 'cpu' or 'auto' or a list of specific GPUs, which is now linked to the backend; Dynamically start/stop render threads for the devices, without requiring a full program restart

This commit is contained in:
cmdr2 2022-11-14 11:23:22 +05:30
parent a19ba40672
commit ea03fd22db
8 changed files with 339 additions and 179 deletions

View File

@ -7,7 +7,7 @@
<link rel="icon" type="image/png" href="/media/images/favicon-32x32.png" sizes="32x32"> <link rel="icon" type="image/png" href="/media/images/favicon-32x32.png" sizes="32x32">
<link rel="stylesheet" href="/media/css/fonts.css?v=1"> <link rel="stylesheet" href="/media/css/fonts.css?v=1">
<link rel="stylesheet" href="/media/css/themes.css?v=2"> <link rel="stylesheet" href="/media/css/themes.css?v=2">
<link rel="stylesheet" href="/media/css/main.css?v=10"> <link rel="stylesheet" href="/media/css/main.css?v=11">
<link rel="stylesheet" href="/media/css/auto-save.css?v=5"> <link rel="stylesheet" href="/media/css/auto-save.css?v=5">
<link rel="stylesheet" href="/media/css/modifier-thumbnails.css?v=4"> <link rel="stylesheet" href="/media/css/modifier-thumbnails.css?v=4">
<link rel="stylesheet" href="/media/css/fontawesome-all.min.css?v=1"> <link rel="stylesheet" href="/media/css/fontawesome-all.min.css?v=1">
@ -19,7 +19,7 @@
<div id="container"> <div id="container">
<div id="top-nav"> <div id="top-nav">
<div id="logo"> <div id="logo">
<h1>Stable Diffusion UI <small>v2.3.14 <span id="updateBranchLabel"></span></small></h1> <h1>Stable Diffusion UI <small>v2.4 <span id="updateBranchLabel"></span></small></h1>
</div> </div>
<div id="server-status"> <div id="server-status">
<div id="server-status-color"></div> <div id="server-status-color"></div>
@ -35,6 +35,9 @@
<span id="tab-about" class="tab"> <span id="tab-about" class="tab">
<span><i class="fa fa-comments icon"></i> Help & Community</span> <span><i class="fa fa-comments icon"></i> Help & Community</span>
</span> </span>
<!-- <span id="tab-system-info" class="tab">
<span><i class="fa fa-microchip icon"></i> System Info</span>
</span> -->
</div> </div>
</div> </div>
@ -245,7 +248,7 @@
<div class="tab-content-inner"> <div class="tab-content-inner">
<div class="float-container"> <div class="float-container">
<div class="float-child"> <div class="float-child">
<h1>Help</h1> <h1>Help</h1>
<ul id="help-links"> <ul id="help-links">
<li><span class="help-section">Using the software</span> <li><span class="help-section">Using the software</span>
<ul> <ul>
@ -270,7 +273,7 @@
</div> </div>
<div class="float-child"> <div class="float-child">
<h1>Community</h1> <h1>Community</h1>
<ul id="community-links"> <ul id="community-links">
<li><a href="https://discord.com/invite/u9yhsFmEkB" target="_blank"><i class="fa-brands fa-discord fa-fw"></i> Discord user community</a></li> <li><a href="https://discord.com/invite/u9yhsFmEkB" target="_blank"><i class="fa-brands fa-discord fa-fw"></i> Discord user community</a></li>
<li><a href="https://www.reddit.com/r/StableDiffusionUI/" target="_blank"><i class="fa-brands fa-reddit fa-fw"></i> Reddit community</a></li> <li><a href="https://www.reddit.com/r/StableDiffusionUI/" target="_blank"><i class="fa-brands fa-reddit fa-fw"></i> Reddit community</a></li>
@ -280,6 +283,18 @@
</div> </div>
</div> </div>
</div> </div>
<!-- <div id="tab-content-system-info" class="tab-content">
<div id="system-info" class="tab-content-inner">
<h1>System Info</h1>
<table>
<tr><td><label>Processor:</label></td><td id="system-info-cpu" class="value">Dingus</td></tr>
<tr><td><label>RAM:</label></td><td id="system-info-ram" class="value">Dingus Another</td></tr>
<tr><td><label>Compatible Graphics Cards (all):</label></td><td id="system-info-all-gpus" class="value">Dingus</td></tr>
<tr><td></td><td>&nbsp;</td></tr>
<tr><td><label>What's being used for rendering 🔥:</label></td><td id="system-info-active-gpus" class="value">Dingus<br/>Intel Graphics SOmething<br/>Another thing</td></tr>
</table>
</div>
</div> -->
</div> </div>
@ -317,13 +332,13 @@
</div> </div>
</body> </body>
<script src="media/js/parameters.js?v=4"></script> <script src="media/js/parameters.js?v=5"></script>
<script src="media/js/plugins.js?v=1"></script> <script src="media/js/plugins.js?v=1"></script>
<script src="media/js/utils.js?v=6"></script> <script src="media/js/utils.js?v=6"></script>
<script src="media/js/inpainting-editor.js?v=1"></script> <script src="media/js/inpainting-editor.js?v=1"></script>
<script src="media/js/image-modifiers.js?v=6"></script> <script src="media/js/image-modifiers.js?v=6"></script>
<script src="media/js/auto-save.js?v=7"></script> <script src="media/js/auto-save.js?v=7"></script>
<script src="media/js/main.js?v=13"></script> <script src="media/js/main.js?v=14"></script>
<script src="media/js/themes.js?v=4"></script> <script src="media/js/themes.js?v=4"></script>
<script src="media/js/dnd.js?v=8"></script> <script src="media/js/dnd.js?v=8"></script>
<script> <script>

View File

@ -123,7 +123,7 @@ label {
padding: 16px; padding: 16px;
display: flex; display: flex;
flex-direction: column; flex-direction: column;
flex: 0 0 350pt; flex: 0 0 370pt;
} }
#editor label { #editor label {
font-weight: normal; font-weight: normal;
@ -887,3 +887,9 @@ input::file-selector-button {
margin-bottom: 15px; margin-bottom: 15px;
box-shadow: 0 4px 8px 0 rgba(0, 0, 0, 0.15), 0 6px 20px 0 rgba(0, 0, 0, 0.15); box-shadow: 0 4px 8px 0 rgba(0, 0, 0, 0.15), 0 6px 20px 0 rgba(0, 0, 0, 0.15);
} }
#system-info .value {
text-align: left;
}
#system-info label {
float: right;
}

View File

@ -27,6 +27,7 @@ let maskImageSelector = document.querySelector("#mask")
let maskImagePreview = document.querySelector("#mask_preview") let maskImagePreview = document.querySelector("#mask_preview")
let turboField = document.querySelector('#turbo') let turboField = document.querySelector('#turbo')
let useCPUField = document.querySelector('#use_cpu') let useCPUField = document.querySelector('#use_cpu')
let autoPickGPUsField = document.querySelector('#auto_pick_gpus')
let useGPUsField = document.querySelector('#use_gpus') let useGPUsField = document.querySelector('#use_gpus')
let useFullPrecisionField = document.querySelector('#use_full_precision') let useFullPrecisionField = document.querySelector('#use_full_precision')
let saveToDiskField = document.querySelector('#save_to_disk') let saveToDiskField = document.querySelector('#save_to_disk')
@ -810,14 +811,15 @@ function getCurrentUserRequest() {
} }
function getCurrentRenderDeviceSelection() { function getCurrentRenderDeviceSelection() {
if (useCPUField.checked) { let selectedGPUs = $('#use_gpus').val()
if (useCPUField.checked && !autoPickGPUsField.checked) {
return 'cpu' return 'cpu'
} }
if (autoPickGPUsField.checked || selectedGPUs.length == 0) {
let selectedGPUs = $(useGPUsField).val() return 'auto'
if (selectedGPUs.length == 0) {
selectedGPUs = ['auto']
} }
return selectedGPUs.join(',') return selectedGPUs.join(',')
} }
@ -1136,10 +1138,27 @@ updatePromptStrength()
useCPUField.addEventListener('click', function() { useCPUField.addEventListener('click', function() {
let gpuSettingEntry = getParameterSettingsEntry('use_gpus') let gpuSettingEntry = getParameterSettingsEntry('use_gpus')
let autoPickGPUSettingEntry = getParameterSettingsEntry('auto_pick_gpus')
if (this.checked) { if (this.checked) {
gpuSettingEntry.style.display = 'none' gpuSettingEntry.style.display = 'none'
autoPickGPUSettingEntry.style.display = 'none'
autoPickGPUsField.setAttribute('data-old-value', autoPickGPUsField.checked)
autoPickGPUsField.checked = false
} else if (useGPUsField.options.length >= MIN_GPUS_TO_SHOW_SELECTION) { } else if (useGPUsField.options.length >= MIN_GPUS_TO_SHOW_SELECTION) {
gpuSettingEntry.style.display = '' gpuSettingEntry.style.display = ''
autoPickGPUSettingEntry.style.display = ''
autoPickGPUsField.checked = (autoPickGPUsField.getAttribute('data-old-value') === 'true')
}
})
useGPUsField.addEventListener('click', function() {
let selectedGPUs = $('#use_gpus').val()
autoPickGPUsField.checked = (selectedGPUs.length === 0)
})
autoPickGPUsField.addEventListener('click', function() {
if (this.checked) {
$('#use_gpus').val([])
} }
}) })
@ -1360,6 +1379,8 @@ async function getDevices() {
if (allDeviceIds.length < MIN_GPUS_TO_SHOW_SELECTION) { if (allDeviceIds.length < MIN_GPUS_TO_SHOW_SELECTION) {
let gpuSettingEntry = getParameterSettingsEntry('use_gpus') let gpuSettingEntry = getParameterSettingsEntry('use_gpus')
gpuSettingEntry.style.display = 'none' gpuSettingEntry.style.display = 'none'
let autoPickGPUSettingEntry = getParameterSettingsEntry('auto_pick_gpus')
autoPickGPUSettingEntry.style.display = 'none'
if (allDeviceIds.length === 0) { if (allDeviceIds.length === 0) {
useCPUField.checked = true useCPUField.checked = true
@ -1367,14 +1388,18 @@ async function getDevices() {
} }
} }
useGPUsField.innerHTML = '' autoPickGPUsField.checked = (res['config'] === 'auto')
useGPUsField.innerHTML = ''
allDeviceIds.forEach(device => { allDeviceIds.forEach(device => {
let deviceName = res['all'][device] let deviceName = res['all'][device]
let selected = (activeDeviceIds.includes(device) ? 'selected' : '') let deviceOption = `<option value="${device}">${deviceName}</option>`
let deviceOption = `<option value="${device}" ${selected}>${deviceName}</option>`
useGPUsField.insertAdjacentHTML('beforeend', deviceOption) useGPUsField.insertAdjacentHTML('beforeend', deviceOption)
}) })
if (!autoPickGPUsField.checked) {
$('#use_gpus').val(activeDeviceIds)
}
} }
} catch (e) { } catch (e) {
console.log('error fetching devices', e) console.log('error fetching devices', e)

View File

@ -73,6 +73,12 @@ var PARAMETERS = [
note: "warning: this will be *very* slow", note: "warning: this will be *very* slow",
default: false, default: false,
}, },
{
id: "auto_pick_gpus",
type: ParameterType.checkbox,
label: "Automatically pick the GPUs",
default: false,
},
{ {
id: "use_gpus", id: "use_gpus",
type: ParameterType.select_multiple, type: ParameterType.select_multiple,

View File

@ -0,0 +1,156 @@
import os
import torch
import traceback
import re
COMPARABLE_GPU_PERCENTILE = 0.75 # if a GPU's free_mem is within this % of the GPU with the most free_mem, it will be picked
def get_device_delta(render_devices, active_devices):
'''
render_devices: 'cpu', or 'auto' or ['cuda:N'...]
active_devices: ['cpu', 'cuda:N'...]
'''
if render_devices is not None:
if render_devices in ('cpu', 'auto'):
render_devices = [render_devices]
elif isinstance(render_devices, list) and len(render_devices) > 0:
render_devices = list(filter(lambda x: x.startswith('cuda:'), render_devices))
if len(render_devices) == 0:
raise Exception('Invalid render_devices value in config.json. Valid: {"render_devices": ["cuda:0", "cuda:1"...]}, or {"render_devices": "cpu"} or {"render_devices": "auto"}')
render_devices = list(filter(lambda x: is_device_compatible(x), render_devices))
if len(render_devices) == 0:
raise Exception('Sorry, none of the render_devices configured in config.json are compatible with Stable Diffusion')
else:
raise Exception('Invalid render_devices value in config.json. Valid: {"render_devices": ["cuda:0", "cuda:1"...]}, or {"render_devices": "cpu"} or {"render_devices": "auto"}')
else:
render_devices = ['auto']
if 'auto' in render_devices:
render_devices = auto_pick_devices(active_devices)
if 'cpu' in render_devices:
print('WARNING: Could not find a compatible GPU. Using the CPU, but this will be very slow!')
active_devices = set(active_devices)
render_devices = set(render_devices)
devices_to_start = render_devices - active_devices
devices_to_stop = active_devices - render_devices
return devices_to_start, devices_to_stop
def auto_pick_devices(currently_active_devices):
if not torch.cuda.is_available(): return ['cpu']
device_count = torch.cuda.device_count()
if device_count == 1:
return ['cuda:0'] if is_device_compatible('cuda:0') else ['cpu']
print('Autoselecting GPU. Using most free memory.')
devices = []
for device in range(device_count):
device = f'cuda:{device}'
if not is_device_compatible(device):
continue
mem_free, mem_total = torch.cuda.mem_get_info(device)
mem_free /= float(10**9)
mem_total /= float(10**9)
device_name = torch.cuda.get_device_name(device)
print(f'{device} detected: {device_name} - Memory: {round(mem_total - mem_free, 2)}Gb / {round(mem_total, 2)}Gb')
devices.append({'device': device, 'device_name': device_name, 'mem_free': mem_free})
devices.sort(key=lambda x:x['mem_free'], reverse=True)
max_free_mem = devices[0]['mem_free']
free_mem_threshold = COMPARABLE_GPU_PERCENTILE * max_free_mem
# Auto-pick algorithm:
# 1. Pick the top 75 percentile of the GPUs, sorted by free_mem.
# 2. Also include already-running devices (GPU-only), otherwise their free_mem will
# always be very low (since their VRAM contains the model).
# These already-running devices probably aren't terrible, since they were picked in the past.
# Worst case, the user can restart the program and that'll get rid of them.
devices = list(filter((lambda x: x['mem_free'] > free_mem_threshold or x['device'] in currently_active_devices), devices))
return devices
def device_init(thread_data, device):
'''
This function assumes the 'device' has already been verified to be compatible.
`get_device_delta()` has already filtered out incompatible devices.
'''
validate_device_id(device, log_prefix='device_init')
if device == 'cpu':
thread_data.device = 'cpu'
thread_data.device_name = get_processor_name()
print('Render device CPU available as', thread_data.device_name)
return
thread_data.device_name = torch.cuda.get_device_name(device)
thread_data.device = device
# Force full precision on 1660 and 1650 NVIDIA cards to avoid creating green images
device_name = thread_data.device_name.lower()
thread_data.force_full_precision = ('nvidia' in device_name or 'geforce' in device_name) and (' 1660' in device_name or ' 1650' in device_name)
if thread_data.force_full_precision:
print('forcing full precision on NVIDIA 16xx cards, to avoid green images. GPU detected: ', thread_data.device_name)
# Apply force_full_precision now before models are loaded.
thread_data.precision = 'full'
print(f'Setting {device} as active')
torch.cuda.device(device)
return
def validate_device_id(device, log_prefix=''):
def is_valid():
if not isinstance(device, str):
return False
if device == 'cpu':
return True
if not device.startswith('cuda:') or not device[5:].isnumeric():
return False
return True
if not is_valid():
raise EnvironmentError(f"{log_prefix}: device id should be 'cpu', or 'cuda:N' (where N is an integer index for the GPU). Got: {device}")
def is_device_compatible(device):
'''
Returns True/False, and prints any compatibility errors
'''
validate_device_id(device, log_prefix='is_device_compatible')
if device == 'cpu': return True
# Memory check
try:
_, mem_total = torch.cuda.mem_get_info(device)
mem_total /= float(10**9)
if mem_total < 3.0:
print(f'GPU {device} with less than 3 GB of VRAM is not compatible with Stable Diffusion')
return False
except RuntimeError as e:
print(str(e))
return False
return True
def get_processor_name():
try:
import platform, subprocess
if platform.system() == "Windows":
return platform.processor()
elif platform.system() == "Darwin":
os.environ['PATH'] = os.environ['PATH'] + os.pathsep + '/usr/sbin'
command = "sysctl -n machdep.cpu.brand_string"
return subprocess.check_output(command).strip()
elif platform.system() == "Linux":
command = "cat /proc/cpuinfo"
all_info = subprocess.check_output(command, shell=True).decode().strip()
for line in all_info.split("\n"):
if "model name" in line:
return re.sub(".*model name.*:", "", line, 1).strip()
except:
print(traceback.format_exc())
return "cpu"

View File

@ -37,6 +37,7 @@ config_yaml = "optimizedSD/v1-inference.yaml"
filename_regex = re.compile('[^a-zA-Z0-9]') filename_regex = re.compile('[^a-zA-Z0-9]')
# api stuff # api stuff
from sd_internal import device_manager
from . import Request, Response, Image as ResponseImage from . import Request, Response, Image as ResponseImage
import base64 import base64
from io import BytesIO from io import BytesIO
@ -45,73 +46,7 @@ from io import BytesIO
from threading import local as LocalThreadVars from threading import local as LocalThreadVars
thread_data = LocalThreadVars() thread_data = LocalThreadVars()
def get_processor_name(): def thread_init(device):
try:
import platform, subprocess
if platform.system() == "Windows":
return platform.processor()
elif platform.system() == "Darwin":
os.environ['PATH'] = os.environ['PATH'] + os.pathsep + '/usr/sbin'
command = "sysctl -n machdep.cpu.brand_string"
return subprocess.check_output(command).strip()
elif platform.system() == "Linux":
command = "cat /proc/cpuinfo"
all_info = subprocess.check_output(command, shell=True).decode().strip()
for line in all_info.split("\n"):
if "model name" in line:
return re.sub(".*model name.*:", "", line, 1).strip()
except:
print(traceback.format_exc())
return "cpu"
def validate_device_id(device, allow_auto=False, log_prefix=''):
device_names = ['cpu', 'auto'] if allow_auto else ['cpu']
if not isinstance(device, str) or (device not in device_names and (len(device) <= len('cuda:') or device[:5] != 'cuda:' or not device[5:].isnumeric())):
raise EnvironmentError(f"{log_prefix}: device id should be {', '.join(device_names)}, or 'cuda:N' (where N is an integer index for the GPU). Got: {device}")
'''
Returns True/False, and prints any compatibility errors
'''
def is_device_compatible(device):
validate_device_id(device, allow_auto=False, log_prefix='is_device_compatible')
if device == 'cpu': return True
# Memory check
try:
mem_free, mem_total = torch.cuda.mem_get_info(device)
mem_total /= float(10**9)
if mem_total < 3.0:
print('GPUs with less than 3 GB of VRAM are not compatible with Stable Diffusion')
return False
except RuntimeError as e:
print(str(e))
return False
return True
def device_select(device):
validate_device_id(device, allow_auto=False, log_prefix='device_select')
if device == 'cpu': return True
if not torch.cuda.is_available(): return False
if not is_device_compatible(device):
return False
thread_data.device_name = torch.cuda.get_device_name(device)
thread_data.device = device
# Force full precision on 1660 and 1650 NVIDIA cards to avoid creating green images
device_name = thread_data.device_name.lower()
thread_data.force_full_precision = ('nvidia' in device_name or 'geforce' in device_name) and (' 1660' in device_name or ' 1650' in device_name)
if thread_data.force_full_precision:
print('forcing full precision on NVIDIA 16xx cards, to avoid green images. GPU detected: ', thread_data.device_name)
# Apply force_full_precision now before models are loaded.
thread_data.precision = 'full'
return True
def device_init(device_selection):
validate_device_id(device_selection, allow_auto=True, log_prefix='device_init')
# Thread bound properties # Thread bound properties
thread_data.stop_processing = False thread_data.stop_processing = False
thread_data.temp_images = {} thread_data.temp_images = {}
@ -140,50 +75,7 @@ def device_init(device_selection):
thread_data.force_full_precision = False thread_data.force_full_precision = False
thread_data.reduced_memory = True thread_data.reduced_memory = True
if device_selection == 'cpu': device_manager.device_init(thread_data, device)
thread_data.device = 'cpu'
thread_data.device_name = get_processor_name()
print('Render device CPU available as', thread_data.device_name)
return True
if not torch.cuda.is_available():
if device_selection == 'auto':
print('WARNING: Could not find a compatible GPU. Using the CPU, but this will be very slow!')
thread_data.device = 'cpu'
thread_data.device_name = get_processor_name()
return True
else:
raise EnvironmentError(f'Could not find a compatible GPU for the requested device_selection: {device_selection}!')
if device_selection == 'auto':
device_count = torch.cuda.device_count()
if device_count == 1 and device_select('cuda:0'):
torch.cuda.device('cuda:0')
return True
print('Autoselecting GPU. Using most free memory.')
max_mem_free = 0
best_device = None
for device in range(device_count):
device = f'cuda:{device}'
mem_free, mem_total = torch.cuda.mem_get_info(device)
mem_free /= float(10**9)
mem_total /= float(10**9)
device_name = torch.cuda.get_device_name(device)
print(f'{device} detected: {device_name} - Memory: {round(mem_total - mem_free, 2)}Gb / {round(mem_total, 2)}Gb')
if max_mem_free < mem_free:
max_mem_free = mem_free
best_device = device
if best_device and device_select(best_device):
print(f'Setting {device} as active')
torch.cuda.device(device)
return True
if device_selection != 'auto' and device_select(device_selection):
print(f'Setting {device_selection} as active')
torch.cuda.device(device_selection)
return True
return False
def load_model_ckpt(): def load_model_ckpt():
if not thread_data.ckpt_file: raise ValueError(f'Thread ckpt_file is undefined.') if not thread_data.ckpt_file: raise ValueError(f'Thread ckpt_file is undefined.')
@ -296,6 +188,8 @@ def unload_filters():
del thread_data.model_real_esrgan del thread_data.model_real_esrgan
thread_data.model_real_esrgan = None thread_data.model_real_esrgan = None
gc()
def unload_models(): def unload_models():
if thread_data.model is not None: if thread_data.model is not None:
print('Unloading models...') print('Unloading models...')
@ -313,6 +207,8 @@ def unload_models():
thread_data.modelCS = None thread_data.modelCS = None
thread_data.modelFS = None thread_data.modelFS = None
gc()
def wait_model_move_to(model, target_device): # Send to target_device and wait until complete. def wait_model_move_to(model, target_device): # Send to target_device and wait until complete.
if thread_data.device == target_device: return if thread_data.device == target_device: return
start_mem = torch.cuda.memory_allocated(thread_data.device) / 1e6 start_mem = torch.cuda.memory_allocated(thread_data.device) / 1e6
@ -518,7 +414,6 @@ def do_mk_img(req: Request):
if needs_model_reload: if needs_model_reload:
unload_models() unload_models()
unload_filters() unload_filters()
gc()
load_model_ckpt() load_model_ckpt()
if thread_data.turbo != req.turbo: if thread_data.turbo != req.turbo:

View File

@ -14,7 +14,7 @@ import queue, threading, time, weakref
from typing import Any, Generator, Hashable, Optional, Union from typing import Any, Generator, Hashable, Optional, Union
from pydantic import BaseModel from pydantic import BaseModel
from sd_internal import Request, Response, runtime from sd_internal import Request, Response, runtime, device_manager
THREAD_NAME_PREFIX = 'Runtime-Render/' THREAD_NAME_PREFIX = 'Runtime-Render/'
ERR_LOCK_FAILED = ' failed to acquire lock within timeout.' ERR_LOCK_FAILED = ' failed to acquire lock within timeout.'
@ -253,11 +253,7 @@ def thread_render(device):
global current_state, current_state_error, current_model_path, current_vae_path global current_state, current_state_error, current_model_path, current_vae_path
from . import runtime from . import runtime
try: try:
if not runtime.device_init(device): runtime.thread_init(device)
weak_thread_data[threading.current_thread()] = {
'error': f'Could not start on the selected device: {device}'
}
return
except Exception as e: except Exception as e:
print(traceback.format_exc()) print(traceback.format_exc())
weak_thread_data[threading.current_thread()] = { weak_thread_data[threading.current_thread()] = {
@ -266,13 +262,19 @@ def thread_render(device):
return return
weak_thread_data[threading.current_thread()] = { weak_thread_data[threading.current_thread()] = {
'device': runtime.thread_data.device, 'device': runtime.thread_data.device,
'device_name': runtime.thread_data.device_name 'device_name': runtime.thread_data.device_name,
'alive': True
} }
if runtime.thread_data.device != 'cpu' or is_alive() == 1: if runtime.thread_data.device != 'cpu' or is_alive() == 1:
preload_model() preload_model()
current_state = ServerStates.Online current_state = ServerStates.Online
while True: while True:
task_cache.clean() task_cache.clean()
if not weak_thread_data[threading.current_thread()]['alive']:
print(f'Shutting down thread for device {runtime.thread_data.device}')
runtime.unload_models()
runtime.unload_filters()
return
if isinstance(current_state_error, SystemExit): if isinstance(current_state_error, SystemExit):
current_state = ServerStates.Unavailable current_state = ServerStates.Unavailable
return return
@ -371,12 +373,12 @@ def get_devices():
gpu_count = torch.cuda.device_count() gpu_count = torch.cuda.device_count()
for device in range(gpu_count): for device in range(gpu_count):
device = f'cuda:{device}' device = f'cuda:{device}'
if not runtime.is_device_compatible(device): if not device_manager.is_device_compatible(device):
continue continue
devices['all'].update({device: torch.cuda.get_device_name(device)}) devices['all'].update({device: torch.cuda.get_device_name(device)})
devices['all'].update({'cpu': runtime.get_processor_name()}) devices['all'].update({'cpu': device_manager.get_processor_name()})
# list the activated devices # list the activated devices
if not manager_lock.acquire(blocking=True, timeout=LOCK_TIMEOUT): raise Exception('get_devices' + ERR_LOCK_FAILED) if not manager_lock.acquire(blocking=True, timeout=LOCK_TIMEOUT): raise Exception('get_devices' + ERR_LOCK_FAILED)
@ -411,13 +413,13 @@ def is_alive(device=None):
finally: finally:
manager_lock.release() manager_lock.release()
def start_render_thread(device='auto'): def start_render_thread(device):
if not manager_lock.acquire(blocking=True, timeout=LOCK_TIMEOUT): raise Exception('start_render_threads' + ERR_LOCK_FAILED) if not manager_lock.acquire(blocking=True, timeout=LOCK_TIMEOUT): raise Exception('start_render_thread' + ERR_LOCK_FAILED)
print('Start new Rendering Thread on device', device) print('Start new Rendering Thread on device', device)
try: try:
rthread = threading.Thread(target=thread_render, kwargs={'device': device}) rthread = threading.Thread(target=thread_render, kwargs={'device': device})
rthread.daemon = True rthread.daemon = True
rthread.name = THREAD_NAME_PREFIX + str(device) rthread.name = THREAD_NAME_PREFIX + device
rthread.start() rthread.start()
render_threads.append(rthread) render_threads.append(rthread)
finally: finally:
@ -425,6 +427,7 @@ def start_render_thread(device='auto'):
timeout = DEVICE_START_TIMEOUT timeout = DEVICE_START_TIMEOUT
while not rthread.is_alive() or not rthread in weak_thread_data or not 'device' in weak_thread_data[rthread]: while not rthread.is_alive() or not rthread in weak_thread_data or not 'device' in weak_thread_data[rthread]:
if rthread in weak_thread_data and 'error' in weak_thread_data[rthread]: if rthread in weak_thread_data and 'error' in weak_thread_data[rthread]:
print(rthread, device, 'error:', weak_thread_data[rthread]['error'])
return False return False
if timeout <= 0: if timeout <= 0:
return False return False
@ -432,6 +435,59 @@ def start_render_thread(device='auto'):
time.sleep(1) time.sleep(1)
return True return True
def stop_render_thread(device):
try:
device_manager.validate_device_id(device, log_prefix='stop_render_thread')
except:
print(traceback.format_exec())
return False
if not manager_lock.acquire(blocking=True, timeout=LOCK_TIMEOUT): raise Exception('stop_render_thread' + ERR_LOCK_FAILED)
print('Stopping Rendering Thread on device', device)
try:
thread_to_remove = None
for rthread in render_threads:
weak_data = weak_thread_data.get(rthread)
if weak_data is None or not 'device' in weak_data or weak_data['device'] is None:
continue
thread_device = weak_data['device']
if thread_device == device:
weak_data['alive'] = False
thread_to_remove = rthread
break
if thread_to_remove is not None:
render_threads.remove(rthread)
return True
finally:
manager_lock.release()
return False
def update_render_threads(render_devices, active_devices):
devices_to_start, devices_to_stop = device_manager.get_device_delta(render_devices, active_devices)
print('devices_to_start', devices_to_start)
print('devices_to_stop', devices_to_stop)
for device in devices_to_stop:
if is_alive(device) <= 0:
print(device, 'is not alive')
continue
if not stop_render_thread(device):
print(device, 'could not stop render thread')
for device in devices_to_start:
if is_alive(device) >= 1:
print(device, 'already registered.')
continue
if not start_render_thread(device):
print(device, 'failed to start.')
if is_alive() <= 0: # No running devices, probably invalid user config.
raise EnvironmentError('ERROR: No active render devices! Please verify the "render_devices" value in config.json')
print('active devices', get_devices()['active'])
def shutdown_event(): # Signal render thread to close on shutdown def shutdown_event(): # Signal render thread to close on shutdown
global current_state_error global current_state_error
current_state_error = SystemExit('Application shutting down.') current_state_error = SystemExit('Application shutting down.')
@ -478,7 +534,6 @@ def render(req : ImageRequest):
r.stream_image_progress = False r.stream_image_progress = False
new_task = RenderTask(r) new_task = RenderTask(r)
new_task.render_device = req.render_device
if task_cache.put(r.session_id, new_task, TASK_TTL): if task_cache.put(r.session_id, new_task, TASK_TTL):
# Use twice the normal timeout for adding user requests. # Use twice the normal timeout for adding user requests.

View File

@ -224,7 +224,10 @@ def read_web_data(key:str=None):
raise HTTPException(status_code=500, detail="Config file is missing or unreadable") raise HTTPException(status_code=500, detail="Config file is missing or unreadable")
return JSONResponse(config, headers=NOCACHE_HEADERS) return JSONResponse(config, headers=NOCACHE_HEADERS)
elif key == 'devices': elif key == 'devices':
return JSONResponse(task_manager.get_devices(), headers=NOCACHE_HEADERS) config = getConfig()
devices = task_manager.get_devices()
devices['config'] = config.get('render_devices', "auto")
return JSONResponse(devices, headers=NOCACHE_HEADERS)
elif key == 'models': elif key == 'models':
return JSONResponse(getModels(), headers=NOCACHE_HEADERS) return JSONResponse(getModels(), headers=NOCACHE_HEADERS)
elif key == 'modifiers': return FileResponse(os.path.join(SD_UI_DIR, 'modifiers.json'), headers=NOCACHE_HEADERS) elif key == 'modifiers': return FileResponse(os.path.join(SD_UI_DIR, 'modifiers.json'), headers=NOCACHE_HEADERS)
@ -272,17 +275,41 @@ def save_model_to_config(ckpt_model_name, vae_model_name):
setConfig(config) setConfig(config)
@app.post('/render') def save_render_devices_to_config(render_devices):
def render(req : task_manager.ImageRequest): config = getConfig()
if 'render_devices' not in config:
config['render_devices'] = {}
config['render_devices'] = render_devices
if render_devices is None or len(render_devices) == 0:
del config['render_devices']
setConfig(config)
def update_render_threads_on_request(req : task_manager.ImageRequest):
if req.use_cpu: # TODO Remove after transition. if req.use_cpu: # TODO Remove after transition.
print('WARNING Replace {use_cpu: true} by {render_device: "cpu"}') print('WARNING Replace {use_cpu: true} by {render_device: "cpu"}')
req.render_device = 'cpu' req.render_device = 'cpu'
del req.use_cpu del req.use_cpu
if req.render_device != 'cpu':
req.render_device = 'cuda:0' # temp hack to get beta working if req.render_device not in ('cpu', 'auto') and not req.render_device.startswith('cuda:'):
if req.render_device and task_manager.is_alive(req.render_device) <= 0: raise HTTPException(status_code=403, detail=f'{req.render_device} rendering is not enabled in config.json or the thread has died...') # HTTP403 Forbidden raise HTTPException(status_code=400, detail=f'Invalid render device requested: {req.render_device}')
if req.render_device.startswith('cuda:'):
req.render_device = req.render_device.split(',')
save_render_devices_to_config(req.render_device)
del req.render_device
update_render_threads()
@app.post('/render')
def render(req : task_manager.ImageRequest):
update_render_threads_on_request(req)
if req.use_face_correction and task_manager.is_alive('cuda:0') <= 0: #TODO Remove when GFPGANer is fixed upstream. if req.use_face_correction and task_manager.is_alive('cuda:0') <= 0: #TODO Remove when GFPGANer is fixed upstream.
raise HTTPException(status_code=412, detail=f'GFPGANer only works GPU:0, use CUDA_VISIBLE_DEVICES if GFPGANer is needed on a specific GPU.') # HTTP412 Precondition Failed raise HTTPException(status_code=412, detail=f'The "Fix incorrect faces" feature works only on cuda:0. Disable "Fix incorrect faces" (in Image Settings), or use the CUDA_VISIBLE_DEVICES environment variable.')
try: try:
save_model_to_config(req.use_stable_diffusion_model, req.use_vae_model) save_model_to_config(req.use_stable_diffusion_model, req.use_vae_model)
req.use_stable_diffusion_model = resolve_ckpt_to_use(req.use_stable_diffusion_model) req.use_stable_diffusion_model = resolve_ckpt_to_use(req.use_stable_diffusion_model)
@ -359,44 +386,19 @@ class LogSuppressFilter(logging.Filter):
return True return True
logging.getLogger('uvicorn.access').addFilter(LogSuppressFilter()) logging.getLogger('uvicorn.access').addFilter(LogSuppressFilter())
config = getConfig()
# Start the task_manager # Start the task_manager
task_manager.default_model_to_load = resolve_ckpt_to_use() task_manager.default_model_to_load = resolve_ckpt_to_use()
task_manager.default_vae_to_load = resolve_vae_to_use() task_manager.default_vae_to_load = resolve_vae_to_use()
if 'render_devices' in config: # Start a new thread for each device.
if not isinstance(config['render_devices'], list):
raise Exception('Invalid render_devices value in config. Should be a list')
config['render_devices'] = set(config['render_devices']) # de-duplicate
for device in config['render_devices']:
if task_manager.is_alive(device) >= 1:
print(device, 'already registered.')
continue
if not task_manager.start_render_thread(device):
print(device, 'failed to start.')
if task_manager.is_alive() <= 0: # No running devices, probably invalid user config.
print('WARNING: No active render devices after loading config. Validate "render_devices" in config.json')
print('Loading default render devices to replace invalid render_devices field from config', config['render_devices'])
if task_manager.is_alive() <= 0: # Either no defaults or no devices after loading config. def update_render_threads():
# Select best GPU device using free memory, if more than one device. config = getConfig()
if task_manager.start_render_thread('auto'): # Detect best device for renders render_devices = config.get('render_devices', "auto")
# if cuda:0 is missing, another cuda device is better. try to start it... active_devices = task_manager.get_devices()['active'].keys()
if task_manager.is_alive('cuda:0') <= 0 and task_manager.is_alive('cpu') <= 0 and not task_manager.start_render_thread('cuda:0'):
print('Failed to start GPU:0...')
else:
print('Failed to start gpu device.')
if task_manager.is_alive('cpu') <= 0 and not task_manager.start_render_thread('cpu'): # Allow CPU to be used for renders
print('Failed to start CPU render device...')
is_using_a_gpu = (task_manager.is_alive() > task_manager.is_alive('cpu')) print('requesting for render_devices', render_devices)
if is_using_a_gpu and task_manager.is_alive('cuda:0') <= 0: task_manager.update_render_threads(render_devices, active_devices)
print('WARNING: GFPGANer only works on GPU:0, use CUDA_VISIBLE_DEVICES if GFPGANer is needed on a specific GPU.')
print('Using CUDA_VISIBLE_DEVICES will remap the selected devices starting at GPU:0 fixing GFPGANer')
print('Add the line "@set CUDA_VISIBLE_DEVICES=N" where N is the GPUs to use to config.bat')
print('Add the line "CUDA_VISIBLE_DEVICES=N" where N is the GPUs to use to config.sh')
print('active devices', task_manager.get_devices()['active']) update_render_threads()
# start the browser ui # start the browser ui
import webbrowser; webbrowser.open('http://localhost:9000') import webbrowser; webbrowser.open('http://localhost:9000')