mirror of
https://github.com/easydiffusion/easydiffusion.git
synced 2024-11-22 16:23:28 +01:00
Start on multiple GPUs by default (top 75 percentile by free_mem); UI selection for 'cpu' or 'auto' or a list of specific GPUs, which is now linked to the backend; Dynamically start/stop render threads for the devices, without requiring a full program restart
This commit is contained in:
parent
a19ba40672
commit
ea03fd22db
@ -7,7 +7,7 @@
|
||||
<link rel="icon" type="image/png" href="/media/images/favicon-32x32.png" sizes="32x32">
|
||||
<link rel="stylesheet" href="/media/css/fonts.css?v=1">
|
||||
<link rel="stylesheet" href="/media/css/themes.css?v=2">
|
||||
<link rel="stylesheet" href="/media/css/main.css?v=10">
|
||||
<link rel="stylesheet" href="/media/css/main.css?v=11">
|
||||
<link rel="stylesheet" href="/media/css/auto-save.css?v=5">
|
||||
<link rel="stylesheet" href="/media/css/modifier-thumbnails.css?v=4">
|
||||
<link rel="stylesheet" href="/media/css/fontawesome-all.min.css?v=1">
|
||||
@ -19,7 +19,7 @@
|
||||
<div id="container">
|
||||
<div id="top-nav">
|
||||
<div id="logo">
|
||||
<h1>Stable Diffusion UI <small>v2.3.14 <span id="updateBranchLabel"></span></small></h1>
|
||||
<h1>Stable Diffusion UI <small>v2.4 <span id="updateBranchLabel"></span></small></h1>
|
||||
</div>
|
||||
<div id="server-status">
|
||||
<div id="server-status-color">●</div>
|
||||
@ -35,6 +35,9 @@
|
||||
<span id="tab-about" class="tab">
|
||||
<span><i class="fa fa-comments icon"></i> Help & Community</span>
|
||||
</span>
|
||||
<!-- <span id="tab-system-info" class="tab">
|
||||
<span><i class="fa fa-microchip icon"></i> System Info</span>
|
||||
</span> -->
|
||||
</div>
|
||||
</div>
|
||||
|
||||
@ -245,7 +248,7 @@
|
||||
<div class="tab-content-inner">
|
||||
<div class="float-container">
|
||||
<div class="float-child">
|
||||
<h1>Help</h1>
|
||||
<h1>Help</h1>
|
||||
<ul id="help-links">
|
||||
<li><span class="help-section">Using the software</span>
|
||||
<ul>
|
||||
@ -270,7 +273,7 @@
|
||||
</div>
|
||||
|
||||
<div class="float-child">
|
||||
<h1>Community</h1>
|
||||
<h1>Community</h1>
|
||||
<ul id="community-links">
|
||||
<li><a href="https://discord.com/invite/u9yhsFmEkB" target="_blank"><i class="fa-brands fa-discord fa-fw"></i> Discord user community</a></li>
|
||||
<li><a href="https://www.reddit.com/r/StableDiffusionUI/" target="_blank"><i class="fa-brands fa-reddit fa-fw"></i> Reddit community</a></li>
|
||||
@ -280,6 +283,18 @@
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<!-- <div id="tab-content-system-info" class="tab-content">
|
||||
<div id="system-info" class="tab-content-inner">
|
||||
<h1>System Info</h1>
|
||||
<table>
|
||||
<tr><td><label>Processor:</label></td><td id="system-info-cpu" class="value">Dingus</td></tr>
|
||||
<tr><td><label>RAM:</label></td><td id="system-info-ram" class="value">Dingus Another</td></tr>
|
||||
<tr><td><label>Compatible Graphics Cards (all):</label></td><td id="system-info-all-gpus" class="value">Dingus</td></tr>
|
||||
<tr><td></td><td> </td></tr>
|
||||
<tr><td><label>What's being used for rendering 🔥:</label></td><td id="system-info-active-gpus" class="value">Dingus<br/>Intel Graphics SOmething<br/>Another thing</td></tr>
|
||||
</table>
|
||||
</div>
|
||||
</div> -->
|
||||
</div>
|
||||
|
||||
|
||||
@ -317,13 +332,13 @@
|
||||
</div>
|
||||
</body>
|
||||
|
||||
<script src="media/js/parameters.js?v=4"></script>
|
||||
<script src="media/js/parameters.js?v=5"></script>
|
||||
<script src="media/js/plugins.js?v=1"></script>
|
||||
<script src="media/js/utils.js?v=6"></script>
|
||||
<script src="media/js/inpainting-editor.js?v=1"></script>
|
||||
<script src="media/js/image-modifiers.js?v=6"></script>
|
||||
<script src="media/js/auto-save.js?v=7"></script>
|
||||
<script src="media/js/main.js?v=13"></script>
|
||||
<script src="media/js/main.js?v=14"></script>
|
||||
<script src="media/js/themes.js?v=4"></script>
|
||||
<script src="media/js/dnd.js?v=8"></script>
|
||||
<script>
|
||||
|
@ -123,7 +123,7 @@ label {
|
||||
padding: 16px;
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
flex: 0 0 350pt;
|
||||
flex: 0 0 370pt;
|
||||
}
|
||||
#editor label {
|
||||
font-weight: normal;
|
||||
@ -887,3 +887,9 @@ input::file-selector-button {
|
||||
margin-bottom: 15px;
|
||||
box-shadow: 0 4px 8px 0 rgba(0, 0, 0, 0.15), 0 6px 20px 0 rgba(0, 0, 0, 0.15);
|
||||
}
|
||||
#system-info .value {
|
||||
text-align: left;
|
||||
}
|
||||
#system-info label {
|
||||
float: right;
|
||||
}
|
||||
|
@ -27,6 +27,7 @@ let maskImageSelector = document.querySelector("#mask")
|
||||
let maskImagePreview = document.querySelector("#mask_preview")
|
||||
let turboField = document.querySelector('#turbo')
|
||||
let useCPUField = document.querySelector('#use_cpu')
|
||||
let autoPickGPUsField = document.querySelector('#auto_pick_gpus')
|
||||
let useGPUsField = document.querySelector('#use_gpus')
|
||||
let useFullPrecisionField = document.querySelector('#use_full_precision')
|
||||
let saveToDiskField = document.querySelector('#save_to_disk')
|
||||
@ -810,14 +811,15 @@ function getCurrentUserRequest() {
|
||||
}
|
||||
|
||||
function getCurrentRenderDeviceSelection() {
|
||||
if (useCPUField.checked) {
|
||||
let selectedGPUs = $('#use_gpus').val()
|
||||
|
||||
if (useCPUField.checked && !autoPickGPUsField.checked) {
|
||||
return 'cpu'
|
||||
}
|
||||
|
||||
let selectedGPUs = $(useGPUsField).val()
|
||||
if (selectedGPUs.length == 0) {
|
||||
selectedGPUs = ['auto']
|
||||
if (autoPickGPUsField.checked || selectedGPUs.length == 0) {
|
||||
return 'auto'
|
||||
}
|
||||
|
||||
return selectedGPUs.join(',')
|
||||
}
|
||||
|
||||
@ -1136,10 +1138,27 @@ updatePromptStrength()
|
||||
|
||||
useCPUField.addEventListener('click', function() {
|
||||
let gpuSettingEntry = getParameterSettingsEntry('use_gpus')
|
||||
let autoPickGPUSettingEntry = getParameterSettingsEntry('auto_pick_gpus')
|
||||
if (this.checked) {
|
||||
gpuSettingEntry.style.display = 'none'
|
||||
autoPickGPUSettingEntry.style.display = 'none'
|
||||
autoPickGPUsField.setAttribute('data-old-value', autoPickGPUsField.checked)
|
||||
autoPickGPUsField.checked = false
|
||||
} else if (useGPUsField.options.length >= MIN_GPUS_TO_SHOW_SELECTION) {
|
||||
gpuSettingEntry.style.display = ''
|
||||
autoPickGPUSettingEntry.style.display = ''
|
||||
autoPickGPUsField.checked = (autoPickGPUsField.getAttribute('data-old-value') === 'true')
|
||||
}
|
||||
})
|
||||
|
||||
useGPUsField.addEventListener('click', function() {
|
||||
let selectedGPUs = $('#use_gpus').val()
|
||||
autoPickGPUsField.checked = (selectedGPUs.length === 0)
|
||||
})
|
||||
|
||||
autoPickGPUsField.addEventListener('click', function() {
|
||||
if (this.checked) {
|
||||
$('#use_gpus').val([])
|
||||
}
|
||||
})
|
||||
|
||||
@ -1360,6 +1379,8 @@ async function getDevices() {
|
||||
if (allDeviceIds.length < MIN_GPUS_TO_SHOW_SELECTION) {
|
||||
let gpuSettingEntry = getParameterSettingsEntry('use_gpus')
|
||||
gpuSettingEntry.style.display = 'none'
|
||||
let autoPickGPUSettingEntry = getParameterSettingsEntry('auto_pick_gpus')
|
||||
autoPickGPUSettingEntry.style.display = 'none'
|
||||
|
||||
if (allDeviceIds.length === 0) {
|
||||
useCPUField.checked = true
|
||||
@ -1367,14 +1388,18 @@ async function getDevices() {
|
||||
}
|
||||
}
|
||||
|
||||
useGPUsField.innerHTML = ''
|
||||
autoPickGPUsField.checked = (res['config'] === 'auto')
|
||||
|
||||
useGPUsField.innerHTML = ''
|
||||
allDeviceIds.forEach(device => {
|
||||
let deviceName = res['all'][device]
|
||||
let selected = (activeDeviceIds.includes(device) ? 'selected' : '')
|
||||
let deviceOption = `<option value="${device}" ${selected}>${deviceName}</option>`
|
||||
let deviceOption = `<option value="${device}">${deviceName}</option>`
|
||||
useGPUsField.insertAdjacentHTML('beforeend', deviceOption)
|
||||
})
|
||||
|
||||
if (!autoPickGPUsField.checked) {
|
||||
$('#use_gpus').val(activeDeviceIds)
|
||||
}
|
||||
}
|
||||
} catch (e) {
|
||||
console.log('error fetching devices', e)
|
||||
|
@ -73,6 +73,12 @@ var PARAMETERS = [
|
||||
note: "warning: this will be *very* slow",
|
||||
default: false,
|
||||
},
|
||||
{
|
||||
id: "auto_pick_gpus",
|
||||
type: ParameterType.checkbox,
|
||||
label: "Automatically pick the GPUs",
|
||||
default: false,
|
||||
},
|
||||
{
|
||||
id: "use_gpus",
|
||||
type: ParameterType.select_multiple,
|
||||
|
156
ui/sd_internal/device_manager.py
Normal file
156
ui/sd_internal/device_manager.py
Normal file
@ -0,0 +1,156 @@
|
||||
import os
|
||||
import torch
|
||||
import traceback
|
||||
import re
|
||||
|
||||
COMPARABLE_GPU_PERCENTILE = 0.75 # if a GPU's free_mem is within this % of the GPU with the most free_mem, it will be picked
|
||||
|
||||
def get_device_delta(render_devices, active_devices):
|
||||
'''
|
||||
render_devices: 'cpu', or 'auto' or ['cuda:N'...]
|
||||
active_devices: ['cpu', 'cuda:N'...]
|
||||
'''
|
||||
|
||||
if render_devices is not None:
|
||||
if render_devices in ('cpu', 'auto'):
|
||||
render_devices = [render_devices]
|
||||
elif isinstance(render_devices, list) and len(render_devices) > 0:
|
||||
render_devices = list(filter(lambda x: x.startswith('cuda:'), render_devices))
|
||||
if len(render_devices) == 0:
|
||||
raise Exception('Invalid render_devices value in config.json. Valid: {"render_devices": ["cuda:0", "cuda:1"...]}, or {"render_devices": "cpu"} or {"render_devices": "auto"}')
|
||||
|
||||
render_devices = list(filter(lambda x: is_device_compatible(x), render_devices))
|
||||
if len(render_devices) == 0:
|
||||
raise Exception('Sorry, none of the render_devices configured in config.json are compatible with Stable Diffusion')
|
||||
else:
|
||||
raise Exception('Invalid render_devices value in config.json. Valid: {"render_devices": ["cuda:0", "cuda:1"...]}, or {"render_devices": "cpu"} or {"render_devices": "auto"}')
|
||||
else:
|
||||
render_devices = ['auto']
|
||||
|
||||
if 'auto' in render_devices:
|
||||
render_devices = auto_pick_devices(active_devices)
|
||||
if 'cpu' in render_devices:
|
||||
print('WARNING: Could not find a compatible GPU. Using the CPU, but this will be very slow!')
|
||||
|
||||
active_devices = set(active_devices)
|
||||
render_devices = set(render_devices)
|
||||
|
||||
devices_to_start = render_devices - active_devices
|
||||
devices_to_stop = active_devices - render_devices
|
||||
|
||||
return devices_to_start, devices_to_stop
|
||||
|
||||
def auto_pick_devices(currently_active_devices):
|
||||
if not torch.cuda.is_available(): return ['cpu']
|
||||
|
||||
device_count = torch.cuda.device_count()
|
||||
if device_count == 1:
|
||||
return ['cuda:0'] if is_device_compatible('cuda:0') else ['cpu']
|
||||
|
||||
print('Autoselecting GPU. Using most free memory.')
|
||||
devices = []
|
||||
for device in range(device_count):
|
||||
device = f'cuda:{device}'
|
||||
if not is_device_compatible(device):
|
||||
continue
|
||||
|
||||
mem_free, mem_total = torch.cuda.mem_get_info(device)
|
||||
mem_free /= float(10**9)
|
||||
mem_total /= float(10**9)
|
||||
device_name = torch.cuda.get_device_name(device)
|
||||
print(f'{device} detected: {device_name} - Memory: {round(mem_total - mem_free, 2)}Gb / {round(mem_total, 2)}Gb')
|
||||
devices.append({'device': device, 'device_name': device_name, 'mem_free': mem_free})
|
||||
|
||||
devices.sort(key=lambda x:x['mem_free'], reverse=True)
|
||||
max_free_mem = devices[0]['mem_free']
|
||||
free_mem_threshold = COMPARABLE_GPU_PERCENTILE * max_free_mem
|
||||
|
||||
# Auto-pick algorithm:
|
||||
# 1. Pick the top 75 percentile of the GPUs, sorted by free_mem.
|
||||
# 2. Also include already-running devices (GPU-only), otherwise their free_mem will
|
||||
# always be very low (since their VRAM contains the model).
|
||||
# These already-running devices probably aren't terrible, since they were picked in the past.
|
||||
# Worst case, the user can restart the program and that'll get rid of them.
|
||||
devices = list(filter((lambda x: x['mem_free'] > free_mem_threshold or x['device'] in currently_active_devices), devices))
|
||||
return devices
|
||||
|
||||
def device_init(thread_data, device):
|
||||
'''
|
||||
This function assumes the 'device' has already been verified to be compatible.
|
||||
`get_device_delta()` has already filtered out incompatible devices.
|
||||
'''
|
||||
|
||||
validate_device_id(device, log_prefix='device_init')
|
||||
|
||||
if device == 'cpu':
|
||||
thread_data.device = 'cpu'
|
||||
thread_data.device_name = get_processor_name()
|
||||
print('Render device CPU available as', thread_data.device_name)
|
||||
return
|
||||
|
||||
thread_data.device_name = torch.cuda.get_device_name(device)
|
||||
thread_data.device = device
|
||||
|
||||
# Force full precision on 1660 and 1650 NVIDIA cards to avoid creating green images
|
||||
device_name = thread_data.device_name.lower()
|
||||
thread_data.force_full_precision = ('nvidia' in device_name or 'geforce' in device_name) and (' 1660' in device_name or ' 1650' in device_name)
|
||||
if thread_data.force_full_precision:
|
||||
print('forcing full precision on NVIDIA 16xx cards, to avoid green images. GPU detected: ', thread_data.device_name)
|
||||
# Apply force_full_precision now before models are loaded.
|
||||
thread_data.precision = 'full'
|
||||
|
||||
print(f'Setting {device} as active')
|
||||
torch.cuda.device(device)
|
||||
|
||||
return
|
||||
|
||||
def validate_device_id(device, log_prefix=''):
|
||||
def is_valid():
|
||||
if not isinstance(device, str):
|
||||
return False
|
||||
if device == 'cpu':
|
||||
return True
|
||||
if not device.startswith('cuda:') or not device[5:].isnumeric():
|
||||
return False
|
||||
return True
|
||||
|
||||
if not is_valid():
|
||||
raise EnvironmentError(f"{log_prefix}: device id should be 'cpu', or 'cuda:N' (where N is an integer index for the GPU). Got: {device}")
|
||||
|
||||
def is_device_compatible(device):
|
||||
'''
|
||||
Returns True/False, and prints any compatibility errors
|
||||
'''
|
||||
validate_device_id(device, log_prefix='is_device_compatible')
|
||||
|
||||
if device == 'cpu': return True
|
||||
# Memory check
|
||||
try:
|
||||
_, mem_total = torch.cuda.mem_get_info(device)
|
||||
mem_total /= float(10**9)
|
||||
if mem_total < 3.0:
|
||||
print(f'GPU {device} with less than 3 GB of VRAM is not compatible with Stable Diffusion')
|
||||
return False
|
||||
except RuntimeError as e:
|
||||
print(str(e))
|
||||
return False
|
||||
return True
|
||||
|
||||
def get_processor_name():
|
||||
try:
|
||||
import platform, subprocess
|
||||
if platform.system() == "Windows":
|
||||
return platform.processor()
|
||||
elif platform.system() == "Darwin":
|
||||
os.environ['PATH'] = os.environ['PATH'] + os.pathsep + '/usr/sbin'
|
||||
command = "sysctl -n machdep.cpu.brand_string"
|
||||
return subprocess.check_output(command).strip()
|
||||
elif platform.system() == "Linux":
|
||||
command = "cat /proc/cpuinfo"
|
||||
all_info = subprocess.check_output(command, shell=True).decode().strip()
|
||||
for line in all_info.split("\n"):
|
||||
if "model name" in line:
|
||||
return re.sub(".*model name.*:", "", line, 1).strip()
|
||||
except:
|
||||
print(traceback.format_exc())
|
||||
return "cpu"
|
@ -37,6 +37,7 @@ config_yaml = "optimizedSD/v1-inference.yaml"
|
||||
filename_regex = re.compile('[^a-zA-Z0-9]')
|
||||
|
||||
# api stuff
|
||||
from sd_internal import device_manager
|
||||
from . import Request, Response, Image as ResponseImage
|
||||
import base64
|
||||
from io import BytesIO
|
||||
@ -45,73 +46,7 @@ from io import BytesIO
|
||||
from threading import local as LocalThreadVars
|
||||
thread_data = LocalThreadVars()
|
||||
|
||||
def get_processor_name():
|
||||
try:
|
||||
import platform, subprocess
|
||||
if platform.system() == "Windows":
|
||||
return platform.processor()
|
||||
elif platform.system() == "Darwin":
|
||||
os.environ['PATH'] = os.environ['PATH'] + os.pathsep + '/usr/sbin'
|
||||
command = "sysctl -n machdep.cpu.brand_string"
|
||||
return subprocess.check_output(command).strip()
|
||||
elif platform.system() == "Linux":
|
||||
command = "cat /proc/cpuinfo"
|
||||
all_info = subprocess.check_output(command, shell=True).decode().strip()
|
||||
for line in all_info.split("\n"):
|
||||
if "model name" in line:
|
||||
return re.sub(".*model name.*:", "", line, 1).strip()
|
||||
except:
|
||||
print(traceback.format_exc())
|
||||
return "cpu"
|
||||
|
||||
def validate_device_id(device, allow_auto=False, log_prefix=''):
|
||||
device_names = ['cpu', 'auto'] if allow_auto else ['cpu']
|
||||
if not isinstance(device, str) or (device not in device_names and (len(device) <= len('cuda:') or device[:5] != 'cuda:' or not device[5:].isnumeric())):
|
||||
raise EnvironmentError(f"{log_prefix}: device id should be {', '.join(device_names)}, or 'cuda:N' (where N is an integer index for the GPU). Got: {device}")
|
||||
|
||||
'''
|
||||
Returns True/False, and prints any compatibility errors
|
||||
'''
|
||||
def is_device_compatible(device):
|
||||
validate_device_id(device, allow_auto=False, log_prefix='is_device_compatible')
|
||||
|
||||
if device == 'cpu': return True
|
||||
# Memory check
|
||||
try:
|
||||
mem_free, mem_total = torch.cuda.mem_get_info(device)
|
||||
mem_total /= float(10**9)
|
||||
if mem_total < 3.0:
|
||||
print('GPUs with less than 3 GB of VRAM are not compatible with Stable Diffusion')
|
||||
return False
|
||||
except RuntimeError as e:
|
||||
print(str(e))
|
||||
return False
|
||||
return True
|
||||
|
||||
def device_select(device):
|
||||
validate_device_id(device, allow_auto=False, log_prefix='device_select')
|
||||
|
||||
if device == 'cpu': return True
|
||||
if not torch.cuda.is_available(): return False
|
||||
if not is_device_compatible(device):
|
||||
return False
|
||||
|
||||
thread_data.device_name = torch.cuda.get_device_name(device)
|
||||
thread_data.device = device
|
||||
|
||||
# Force full precision on 1660 and 1650 NVIDIA cards to avoid creating green images
|
||||
device_name = thread_data.device_name.lower()
|
||||
thread_data.force_full_precision = ('nvidia' in device_name or 'geforce' in device_name) and (' 1660' in device_name or ' 1650' in device_name)
|
||||
if thread_data.force_full_precision:
|
||||
print('forcing full precision on NVIDIA 16xx cards, to avoid green images. GPU detected: ', thread_data.device_name)
|
||||
# Apply force_full_precision now before models are loaded.
|
||||
thread_data.precision = 'full'
|
||||
|
||||
return True
|
||||
|
||||
def device_init(device_selection):
|
||||
validate_device_id(device_selection, allow_auto=True, log_prefix='device_init')
|
||||
|
||||
def thread_init(device):
|
||||
# Thread bound properties
|
||||
thread_data.stop_processing = False
|
||||
thread_data.temp_images = {}
|
||||
@ -140,50 +75,7 @@ def device_init(device_selection):
|
||||
thread_data.force_full_precision = False
|
||||
thread_data.reduced_memory = True
|
||||
|
||||
if device_selection == 'cpu':
|
||||
thread_data.device = 'cpu'
|
||||
thread_data.device_name = get_processor_name()
|
||||
print('Render device CPU available as', thread_data.device_name)
|
||||
return True
|
||||
if not torch.cuda.is_available():
|
||||
if device_selection == 'auto':
|
||||
print('WARNING: Could not find a compatible GPU. Using the CPU, but this will be very slow!')
|
||||
thread_data.device = 'cpu'
|
||||
thread_data.device_name = get_processor_name()
|
||||
return True
|
||||
else:
|
||||
raise EnvironmentError(f'Could not find a compatible GPU for the requested device_selection: {device_selection}!')
|
||||
|
||||
if device_selection == 'auto':
|
||||
device_count = torch.cuda.device_count()
|
||||
if device_count == 1 and device_select('cuda:0'):
|
||||
torch.cuda.device('cuda:0')
|
||||
return True
|
||||
|
||||
print('Autoselecting GPU. Using most free memory.')
|
||||
max_mem_free = 0
|
||||
best_device = None
|
||||
for device in range(device_count):
|
||||
device = f'cuda:{device}'
|
||||
mem_free, mem_total = torch.cuda.mem_get_info(device)
|
||||
mem_free /= float(10**9)
|
||||
mem_total /= float(10**9)
|
||||
device_name = torch.cuda.get_device_name(device)
|
||||
print(f'{device} detected: {device_name} - Memory: {round(mem_total - mem_free, 2)}Gb / {round(mem_total, 2)}Gb')
|
||||
if max_mem_free < mem_free:
|
||||
max_mem_free = mem_free
|
||||
best_device = device
|
||||
if best_device and device_select(best_device):
|
||||
print(f'Setting {device} as active')
|
||||
torch.cuda.device(device)
|
||||
return True
|
||||
|
||||
if device_selection != 'auto' and device_select(device_selection):
|
||||
print(f'Setting {device_selection} as active')
|
||||
torch.cuda.device(device_selection)
|
||||
return True
|
||||
|
||||
return False
|
||||
device_manager.device_init(thread_data, device)
|
||||
|
||||
def load_model_ckpt():
|
||||
if not thread_data.ckpt_file: raise ValueError(f'Thread ckpt_file is undefined.')
|
||||
@ -296,6 +188,8 @@ def unload_filters():
|
||||
del thread_data.model_real_esrgan
|
||||
thread_data.model_real_esrgan = None
|
||||
|
||||
gc()
|
||||
|
||||
def unload_models():
|
||||
if thread_data.model is not None:
|
||||
print('Unloading models...')
|
||||
@ -313,6 +207,8 @@ def unload_models():
|
||||
thread_data.modelCS = None
|
||||
thread_data.modelFS = None
|
||||
|
||||
gc()
|
||||
|
||||
def wait_model_move_to(model, target_device): # Send to target_device and wait until complete.
|
||||
if thread_data.device == target_device: return
|
||||
start_mem = torch.cuda.memory_allocated(thread_data.device) / 1e6
|
||||
@ -518,7 +414,6 @@ def do_mk_img(req: Request):
|
||||
if needs_model_reload:
|
||||
unload_models()
|
||||
unload_filters()
|
||||
gc()
|
||||
load_model_ckpt()
|
||||
|
||||
if thread_data.turbo != req.turbo:
|
||||
|
@ -14,7 +14,7 @@ import queue, threading, time, weakref
|
||||
from typing import Any, Generator, Hashable, Optional, Union
|
||||
|
||||
from pydantic import BaseModel
|
||||
from sd_internal import Request, Response, runtime
|
||||
from sd_internal import Request, Response, runtime, device_manager
|
||||
|
||||
THREAD_NAME_PREFIX = 'Runtime-Render/'
|
||||
ERR_LOCK_FAILED = ' failed to acquire lock within timeout.'
|
||||
@ -253,11 +253,7 @@ def thread_render(device):
|
||||
global current_state, current_state_error, current_model_path, current_vae_path
|
||||
from . import runtime
|
||||
try:
|
||||
if not runtime.device_init(device):
|
||||
weak_thread_data[threading.current_thread()] = {
|
||||
'error': f'Could not start on the selected device: {device}'
|
||||
}
|
||||
return
|
||||
runtime.thread_init(device)
|
||||
except Exception as e:
|
||||
print(traceback.format_exc())
|
||||
weak_thread_data[threading.current_thread()] = {
|
||||
@ -266,13 +262,19 @@ def thread_render(device):
|
||||
return
|
||||
weak_thread_data[threading.current_thread()] = {
|
||||
'device': runtime.thread_data.device,
|
||||
'device_name': runtime.thread_data.device_name
|
||||
'device_name': runtime.thread_data.device_name,
|
||||
'alive': True
|
||||
}
|
||||
if runtime.thread_data.device != 'cpu' or is_alive() == 1:
|
||||
preload_model()
|
||||
current_state = ServerStates.Online
|
||||
while True:
|
||||
task_cache.clean()
|
||||
if not weak_thread_data[threading.current_thread()]['alive']:
|
||||
print(f'Shutting down thread for device {runtime.thread_data.device}')
|
||||
runtime.unload_models()
|
||||
runtime.unload_filters()
|
||||
return
|
||||
if isinstance(current_state_error, SystemExit):
|
||||
current_state = ServerStates.Unavailable
|
||||
return
|
||||
@ -371,12 +373,12 @@ def get_devices():
|
||||
gpu_count = torch.cuda.device_count()
|
||||
for device in range(gpu_count):
|
||||
device = f'cuda:{device}'
|
||||
if not runtime.is_device_compatible(device):
|
||||
if not device_manager.is_device_compatible(device):
|
||||
continue
|
||||
|
||||
devices['all'].update({device: torch.cuda.get_device_name(device)})
|
||||
|
||||
devices['all'].update({'cpu': runtime.get_processor_name()})
|
||||
devices['all'].update({'cpu': device_manager.get_processor_name()})
|
||||
|
||||
# list the activated devices
|
||||
if not manager_lock.acquire(blocking=True, timeout=LOCK_TIMEOUT): raise Exception('get_devices' + ERR_LOCK_FAILED)
|
||||
@ -411,13 +413,13 @@ def is_alive(device=None):
|
||||
finally:
|
||||
manager_lock.release()
|
||||
|
||||
def start_render_thread(device='auto'):
|
||||
if not manager_lock.acquire(blocking=True, timeout=LOCK_TIMEOUT): raise Exception('start_render_threads' + ERR_LOCK_FAILED)
|
||||
def start_render_thread(device):
|
||||
if not manager_lock.acquire(blocking=True, timeout=LOCK_TIMEOUT): raise Exception('start_render_thread' + ERR_LOCK_FAILED)
|
||||
print('Start new Rendering Thread on device', device)
|
||||
try:
|
||||
rthread = threading.Thread(target=thread_render, kwargs={'device': device})
|
||||
rthread.daemon = True
|
||||
rthread.name = THREAD_NAME_PREFIX + str(device)
|
||||
rthread.name = THREAD_NAME_PREFIX + device
|
||||
rthread.start()
|
||||
render_threads.append(rthread)
|
||||
finally:
|
||||
@ -425,6 +427,7 @@ def start_render_thread(device='auto'):
|
||||
timeout = DEVICE_START_TIMEOUT
|
||||
while not rthread.is_alive() or not rthread in weak_thread_data or not 'device' in weak_thread_data[rthread]:
|
||||
if rthread in weak_thread_data and 'error' in weak_thread_data[rthread]:
|
||||
print(rthread, device, 'error:', weak_thread_data[rthread]['error'])
|
||||
return False
|
||||
if timeout <= 0:
|
||||
return False
|
||||
@ -432,6 +435,59 @@ def start_render_thread(device='auto'):
|
||||
time.sleep(1)
|
||||
return True
|
||||
|
||||
def stop_render_thread(device):
|
||||
try:
|
||||
device_manager.validate_device_id(device, log_prefix='stop_render_thread')
|
||||
except:
|
||||
print(traceback.format_exec())
|
||||
return False
|
||||
|
||||
if not manager_lock.acquire(blocking=True, timeout=LOCK_TIMEOUT): raise Exception('stop_render_thread' + ERR_LOCK_FAILED)
|
||||
print('Stopping Rendering Thread on device', device)
|
||||
|
||||
try:
|
||||
thread_to_remove = None
|
||||
for rthread in render_threads:
|
||||
weak_data = weak_thread_data.get(rthread)
|
||||
if weak_data is None or not 'device' in weak_data or weak_data['device'] is None:
|
||||
continue
|
||||
thread_device = weak_data['device']
|
||||
if thread_device == device:
|
||||
weak_data['alive'] = False
|
||||
thread_to_remove = rthread
|
||||
break
|
||||
if thread_to_remove is not None:
|
||||
render_threads.remove(rthread)
|
||||
return True
|
||||
finally:
|
||||
manager_lock.release()
|
||||
|
||||
return False
|
||||
|
||||
def update_render_threads(render_devices, active_devices):
|
||||
devices_to_start, devices_to_stop = device_manager.get_device_delta(render_devices, active_devices)
|
||||
print('devices_to_start', devices_to_start)
|
||||
print('devices_to_stop', devices_to_stop)
|
||||
|
||||
for device in devices_to_stop:
|
||||
if is_alive(device) <= 0:
|
||||
print(device, 'is not alive')
|
||||
continue
|
||||
if not stop_render_thread(device):
|
||||
print(device, 'could not stop render thread')
|
||||
|
||||
for device in devices_to_start:
|
||||
if is_alive(device) >= 1:
|
||||
print(device, 'already registered.')
|
||||
continue
|
||||
if not start_render_thread(device):
|
||||
print(device, 'failed to start.')
|
||||
|
||||
if is_alive() <= 0: # No running devices, probably invalid user config.
|
||||
raise EnvironmentError('ERROR: No active render devices! Please verify the "render_devices" value in config.json')
|
||||
|
||||
print('active devices', get_devices()['active'])
|
||||
|
||||
def shutdown_event(): # Signal render thread to close on shutdown
|
||||
global current_state_error
|
||||
current_state_error = SystemExit('Application shutting down.')
|
||||
@ -478,7 +534,6 @@ def render(req : ImageRequest):
|
||||
r.stream_image_progress = False
|
||||
|
||||
new_task = RenderTask(r)
|
||||
new_task.render_device = req.render_device
|
||||
|
||||
if task_cache.put(r.session_id, new_task, TASK_TTL):
|
||||
# Use twice the normal timeout for adding user requests.
|
||||
|
80
ui/server.py
80
ui/server.py
@ -224,7 +224,10 @@ def read_web_data(key:str=None):
|
||||
raise HTTPException(status_code=500, detail="Config file is missing or unreadable")
|
||||
return JSONResponse(config, headers=NOCACHE_HEADERS)
|
||||
elif key == 'devices':
|
||||
return JSONResponse(task_manager.get_devices(), headers=NOCACHE_HEADERS)
|
||||
config = getConfig()
|
||||
devices = task_manager.get_devices()
|
||||
devices['config'] = config.get('render_devices', "auto")
|
||||
return JSONResponse(devices, headers=NOCACHE_HEADERS)
|
||||
elif key == 'models':
|
||||
return JSONResponse(getModels(), headers=NOCACHE_HEADERS)
|
||||
elif key == 'modifiers': return FileResponse(os.path.join(SD_UI_DIR, 'modifiers.json'), headers=NOCACHE_HEADERS)
|
||||
@ -272,17 +275,41 @@ def save_model_to_config(ckpt_model_name, vae_model_name):
|
||||
|
||||
setConfig(config)
|
||||
|
||||
@app.post('/render')
|
||||
def render(req : task_manager.ImageRequest):
|
||||
def save_render_devices_to_config(render_devices):
|
||||
config = getConfig()
|
||||
if 'render_devices' not in config:
|
||||
config['render_devices'] = {}
|
||||
|
||||
config['render_devices'] = render_devices
|
||||
if render_devices is None or len(render_devices) == 0:
|
||||
del config['render_devices']
|
||||
|
||||
setConfig(config)
|
||||
|
||||
def update_render_threads_on_request(req : task_manager.ImageRequest):
|
||||
if req.use_cpu: # TODO Remove after transition.
|
||||
print('WARNING Replace {use_cpu: true} by {render_device: "cpu"}')
|
||||
req.render_device = 'cpu'
|
||||
del req.use_cpu
|
||||
if req.render_device != 'cpu':
|
||||
req.render_device = 'cuda:0' # temp hack to get beta working
|
||||
if req.render_device and task_manager.is_alive(req.render_device) <= 0: raise HTTPException(status_code=403, detail=f'{req.render_device} rendering is not enabled in config.json or the thread has died...') # HTTP403 Forbidden
|
||||
|
||||
if req.render_device not in ('cpu', 'auto') and not req.render_device.startswith('cuda:'):
|
||||
raise HTTPException(status_code=400, detail=f'Invalid render device requested: {req.render_device}')
|
||||
|
||||
if req.render_device.startswith('cuda:'):
|
||||
req.render_device = req.render_device.split(',')
|
||||
|
||||
save_render_devices_to_config(req.render_device)
|
||||
del req.render_device
|
||||
|
||||
update_render_threads()
|
||||
|
||||
@app.post('/render')
|
||||
def render(req : task_manager.ImageRequest):
|
||||
update_render_threads_on_request(req)
|
||||
|
||||
if req.use_face_correction and task_manager.is_alive('cuda:0') <= 0: #TODO Remove when GFPGANer is fixed upstream.
|
||||
raise HTTPException(status_code=412, detail=f'GFPGANer only works GPU:0, use CUDA_VISIBLE_DEVICES if GFPGANer is needed on a specific GPU.') # HTTP412 Precondition Failed
|
||||
raise HTTPException(status_code=412, detail=f'The "Fix incorrect faces" feature works only on cuda:0. Disable "Fix incorrect faces" (in Image Settings), or use the CUDA_VISIBLE_DEVICES environment variable.')
|
||||
|
||||
try:
|
||||
save_model_to_config(req.use_stable_diffusion_model, req.use_vae_model)
|
||||
req.use_stable_diffusion_model = resolve_ckpt_to_use(req.use_stable_diffusion_model)
|
||||
@ -359,44 +386,19 @@ class LogSuppressFilter(logging.Filter):
|
||||
return True
|
||||
logging.getLogger('uvicorn.access').addFilter(LogSuppressFilter())
|
||||
|
||||
config = getConfig()
|
||||
|
||||
# Start the task_manager
|
||||
task_manager.default_model_to_load = resolve_ckpt_to_use()
|
||||
task_manager.default_vae_to_load = resolve_vae_to_use()
|
||||
if 'render_devices' in config: # Start a new thread for each device.
|
||||
if not isinstance(config['render_devices'], list):
|
||||
raise Exception('Invalid render_devices value in config. Should be a list')
|
||||
config['render_devices'] = set(config['render_devices']) # de-duplicate
|
||||
for device in config['render_devices']:
|
||||
if task_manager.is_alive(device) >= 1:
|
||||
print(device, 'already registered.')
|
||||
continue
|
||||
if not task_manager.start_render_thread(device):
|
||||
print(device, 'failed to start.')
|
||||
if task_manager.is_alive() <= 0: # No running devices, probably invalid user config.
|
||||
print('WARNING: No active render devices after loading config. Validate "render_devices" in config.json')
|
||||
print('Loading default render devices to replace invalid render_devices field from config', config['render_devices'])
|
||||
|
||||
if task_manager.is_alive() <= 0: # Either no defaults or no devices after loading config.
|
||||
# Select best GPU device using free memory, if more than one device.
|
||||
if task_manager.start_render_thread('auto'): # Detect best device for renders
|
||||
# if cuda:0 is missing, another cuda device is better. try to start it...
|
||||
if task_manager.is_alive('cuda:0') <= 0 and task_manager.is_alive('cpu') <= 0 and not task_manager.start_render_thread('cuda:0'):
|
||||
print('Failed to start GPU:0...')
|
||||
else:
|
||||
print('Failed to start gpu device.')
|
||||
if task_manager.is_alive('cpu') <= 0 and not task_manager.start_render_thread('cpu'): # Allow CPU to be used for renders
|
||||
print('Failed to start CPU render device...')
|
||||
def update_render_threads():
|
||||
config = getConfig()
|
||||
render_devices = config.get('render_devices', "auto")
|
||||
active_devices = task_manager.get_devices()['active'].keys()
|
||||
|
||||
is_using_a_gpu = (task_manager.is_alive() > task_manager.is_alive('cpu'))
|
||||
if is_using_a_gpu and task_manager.is_alive('cuda:0') <= 0:
|
||||
print('WARNING: GFPGANer only works on GPU:0, use CUDA_VISIBLE_DEVICES if GFPGANer is needed on a specific GPU.')
|
||||
print('Using CUDA_VISIBLE_DEVICES will remap the selected devices starting at GPU:0 fixing GFPGANer')
|
||||
print('Add the line "@set CUDA_VISIBLE_DEVICES=N" where N is the GPUs to use to config.bat')
|
||||
print('Add the line "CUDA_VISIBLE_DEVICES=N" where N is the GPUs to use to config.sh')
|
||||
print('requesting for render_devices', render_devices)
|
||||
task_manager.update_render_threads(render_devices, active_devices)
|
||||
|
||||
print('active devices', task_manager.get_devices()['active'])
|
||||
update_render_threads()
|
||||
|
||||
# start the browser ui
|
||||
import webbrowser; webbrowser.open('http://localhost:9000')
|
||||
|
Loading…
Reference in New Issue
Block a user