Merge branch 'beta' into beta

This commit is contained in:
cmdr2 2022-11-14 19:06:35 +05:30 committed by GitHub
commit 257cd34101
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 375 additions and 220 deletions

View File

@ -7,7 +7,7 @@
<link rel="icon" type="image/png" href="/media/images/favicon-32x32.png" sizes="32x32"> <link rel="icon" type="image/png" href="/media/images/favicon-32x32.png" sizes="32x32">
<link rel="stylesheet" href="/media/css/fonts.css?v=1"> <link rel="stylesheet" href="/media/css/fonts.css?v=1">
<link rel="stylesheet" href="/media/css/themes.css?v=2"> <link rel="stylesheet" href="/media/css/themes.css?v=2">
<link rel="stylesheet" href="/media/css/main.css?v=10"> <link rel="stylesheet" href="/media/css/main.css?v=11">
<link rel="stylesheet" href="/media/css/auto-save.css?v=5"> <link rel="stylesheet" href="/media/css/auto-save.css?v=5">
<link rel="stylesheet" href="/media/css/modifier-thumbnails.css?v=4"> <link rel="stylesheet" href="/media/css/modifier-thumbnails.css?v=4">
<link rel="stylesheet" href="/media/css/fontawesome-all.min.css?v=1"> <link rel="stylesheet" href="/media/css/fontawesome-all.min.css?v=1">
@ -19,7 +19,7 @@
<div id="container"> <div id="container">
<div id="top-nav"> <div id="top-nav">
<div id="logo"> <div id="logo">
<h1>Stable Diffusion UI <small>v2.3.14 <span id="updateBranchLabel"></span></small></h1> <h1>Stable Diffusion UI <small>v2.4 <span id="updateBranchLabel"></span></small></h1>
</div> </div>
<div id="server-status"> <div id="server-status">
<div id="server-status-color"></div> <div id="server-status-color"></div>
@ -35,6 +35,9 @@
<span id="tab-about" class="tab"> <span id="tab-about" class="tab">
<span><i class="fa fa-comments icon"></i> Help & Community</span> <span><i class="fa fa-comments icon"></i> Help & Community</span>
</span> </span>
<!-- <span id="tab-system-info" class="tab">
<span><i class="fa fa-microchip icon"></i> System Info</span>
</span> -->
</div> </div>
</div> </div>
@ -280,6 +283,18 @@
</div> </div>
</div> </div>
</div> </div>
<!-- <div id="tab-content-system-info" class="tab-content">
<div id="system-info" class="tab-content-inner">
<h1>System Info</h1>
<table>
<tr><td><label>Processor:</label></td><td id="system-info-cpu" class="value">Dingus</td></tr>
<tr><td><label>RAM:</label></td><td id="system-info-ram" class="value">Dingus Another</td></tr>
<tr><td><label>Compatible Graphics Cards (all):</label></td><td id="system-info-all-gpus" class="value">Dingus</td></tr>
<tr><td></td><td>&nbsp;</td></tr>
<tr><td><label>What's being used for rendering 🔥:</label></td><td id="system-info-active-gpus" class="value">Dingus<br/>Intel Graphics SOmething<br/>Another thing</td></tr>
</table>
</div>
</div> -->
</div> </div>
@ -317,13 +332,13 @@
</div> </div>
</body> </body>
<script src="media/js/parameters.js?v=4"></script> <script src="media/js/parameters.js?v=6"></script>
<script src="media/js/plugins.js?v=1"></script> <script src="media/js/plugins.js?v=1"></script>
<script src="media/js/utils.js?v=6"></script> <script src="media/js/utils.js?v=6"></script>
<script src="media/js/inpainting-editor.js?v=1"></script> <script src="media/js/inpainting-editor.js?v=1"></script>
<script src="media/js/image-modifiers.js?v=6"></script> <script src="media/js/image-modifiers.js?v=6"></script>
<script src="media/js/auto-save.js?v=7"></script> <script src="media/js/auto-save.js?v=8"></script>
<script src="media/js/main.js?v=13"></script> <script src="media/js/main.js?v=16"></script>
<script src="media/js/themes.js?v=4"></script> <script src="media/js/themes.js?v=4"></script>
<script src="media/js/dnd.js?v=8"></script> <script src="media/js/dnd.js?v=8"></script>
<script> <script>

View File

@ -123,7 +123,7 @@ label {
padding: 16px; padding: 16px;
display: flex; display: flex;
flex-direction: column; flex-direction: column;
flex: 0 0 350pt; flex: 0 0 370pt;
} }
#editor label { #editor label {
font-weight: normal; font-weight: normal;
@ -891,3 +891,9 @@ input::file-selector-button {
i.active { i.active {
background: var(--accent-color); background: var(--accent-color);
} }
#system-info .value {
text-align: left;
}
#system-info label {
float: right;
}

View File

@ -283,6 +283,9 @@ function tryLoadOldSettings() {
var localStorageValue = localStorage.getItem(localStorageKey); var localStorageValue = localStorage.getItem(localStorageKey);
if (localStorageValue !== null) { if (localStorageValue !== null) {
var setting = SETTINGS[individual_settings_map[localStorageKey]] var setting = SETTINGS[individual_settings_map[localStorageKey]]
if (setting == null || setting == undefined) {
return
}
if (setting.element.type == "checkbox" && (typeof localStorageValue === "string" || localStorageValue instanceof String)) { if (setting.element.type == "checkbox" && (typeof localStorageValue === "string" || localStorageValue instanceof String)) {
localStorageValue = localStorageValue == "true" localStorageValue = localStorageValue == "true"
} }

View File

@ -27,6 +27,7 @@ let maskImageSelector = document.querySelector("#mask")
let maskImagePreview = document.querySelector("#mask_preview") let maskImagePreview = document.querySelector("#mask_preview")
let turboField = document.querySelector('#turbo') let turboField = document.querySelector('#turbo')
let useCPUField = document.querySelector('#use_cpu') let useCPUField = document.querySelector('#use_cpu')
let autoPickGPUsField = document.querySelector('#auto_pick_gpus')
let useGPUsField = document.querySelector('#use_gpus') let useGPUsField = document.querySelector('#use_gpus')
let useFullPrecisionField = document.querySelector('#use_full_precision') let useFullPrecisionField = document.querySelector('#use_full_precision')
let saveToDiskField = document.querySelector('#save_to_disk') let saveToDiskField = document.querySelector('#save_to_disk')
@ -87,7 +88,6 @@ maskResetButton.style.fontWeight = 'normal'
maskResetButton.style.fontSize = '10pt' maskResetButton.style.fontSize = '10pt'
let serverState = {'status': 'Offline', 'time': Date.now()} let serverState = {'status': 'Offline', 'time': Date.now()}
let lastPromptUsed = ''
let bellPending = false let bellPending = false
let taskQueue = [] let taskQueue = []
@ -428,7 +428,6 @@ async function doMakeImage(task) {
let res = undefined let res = undefined
try { try {
const lastTask = serverState.task
let renderRequest = undefined let renderRequest = undefined
do { do {
res = await fetch('/render', { res = await fetch('/render', {
@ -633,7 +632,6 @@ async function doMakeImage(task) {
return false return false
} }
lastPromptUsed = reqBody['prompt']
showImages(reqBody, stepUpdate, outputContainer, false) showImages(reqBody, stepUpdate, outputContainer, false)
} catch (e) { } catch (e) {
console.log('request error', e) console.log('request error', e)
@ -810,14 +808,15 @@ function getCurrentUserRequest() {
} }
function getCurrentRenderDeviceSelection() { function getCurrentRenderDeviceSelection() {
if (useCPUField.checked) { let selectedGPUs = $('#use_gpus').val()
if (useCPUField.checked && !autoPickGPUsField.checked) {
return 'cpu' return 'cpu'
} }
if (autoPickGPUsField.checked || selectedGPUs.length == 0) {
let selectedGPUs = $(useGPUsField).val() return 'auto'
if (selectedGPUs.length == 0) {
selectedGPUs = ['auto']
} }
return selectedGPUs.join(',') return selectedGPUs.join(',')
} }
@ -1136,11 +1135,37 @@ updatePromptStrength()
useCPUField.addEventListener('click', function() { useCPUField.addEventListener('click', function() {
let gpuSettingEntry = getParameterSettingsEntry('use_gpus') let gpuSettingEntry = getParameterSettingsEntry('use_gpus')
let autoPickGPUSettingEntry = getParameterSettingsEntry('auto_pick_gpus')
if (this.checked) { if (this.checked) {
gpuSettingEntry.style.display = 'none' gpuSettingEntry.style.display = 'none'
autoPickGPUSettingEntry.style.display = 'none'
autoPickGPUsField.setAttribute('data-old-value', autoPickGPUsField.checked)
autoPickGPUsField.checked = false
} else if (useGPUsField.options.length >= MIN_GPUS_TO_SHOW_SELECTION) { } else if (useGPUsField.options.length >= MIN_GPUS_TO_SHOW_SELECTION) {
gpuSettingEntry.style.display = '' gpuSettingEntry.style.display = ''
autoPickGPUSettingEntry.style.display = ''
let oldVal = autoPickGPUsField.getAttribute('data-old-value')
if (oldVal === null || oldVal === undefined) { // the UI started with CPU selected by default
autoPickGPUsField.checked = true
} else {
autoPickGPUsField.checked = (oldVal === 'true')
} }
gpuSettingEntry.style.display = (autoPickGPUsField.checked ? 'none' : '')
}
})
useGPUsField.addEventListener('click', function() {
let selectedGPUs = $('#use_gpus').val()
autoPickGPUsField.checked = (selectedGPUs.length === 0)
})
autoPickGPUsField.addEventListener('click', function() {
if (this.checked) {
$('#use_gpus').val([])
}
let gpuSettingEntry = getParameterSettingsEntry('use_gpus')
gpuSettingEntry.style.display = (this.checked ? 'none' : '')
}) })
async function changeAppConfig(configDelta) { async function changeAppConfig(configDelta) {
@ -1357,24 +1382,33 @@ async function getDevices() {
useCPUField.checked = true useCPUField.checked = true
} }
if (allDeviceIds.length < MIN_GPUS_TO_SHOW_SELECTION) { if (allDeviceIds.length < MIN_GPUS_TO_SHOW_SELECTION || useCPUField.checked) {
let gpuSettingEntry = getParameterSettingsEntry('use_gpus') let gpuSettingEntry = getParameterSettingsEntry('use_gpus')
gpuSettingEntry.style.display = 'none' gpuSettingEntry.style.display = 'none'
let autoPickGPUSettingEntry = getParameterSettingsEntry('auto_pick_gpus')
autoPickGPUSettingEntry.style.display = 'none'
}
if (allDeviceIds.length === 0) { if (allDeviceIds.length === 0) {
useCPUField.checked = true useCPUField.checked = true
useCPUField.disabled = true // no compatible GPUs, so make the CPU mandatory useCPUField.disabled = true // no compatible GPUs, so make the CPU mandatory
} }
}
autoPickGPUsField.checked = (res['config'] === 'auto')
useGPUsField.innerHTML = '' useGPUsField.innerHTML = ''
allDeviceIds.forEach(device => { allDeviceIds.forEach(device => {
let deviceName = res['all'][device] let deviceName = res['all'][device]
let selected = (activeDeviceIds.includes(device) ? 'selected' : '') let deviceOption = `<option value="${device}">${deviceName}</option>`
let deviceOption = `<option value="${device}" ${selected}>${deviceName}</option>`
useGPUsField.insertAdjacentHTML('beforeend', deviceOption) useGPUsField.insertAdjacentHTML('beforeend', deviceOption)
}) })
if (autoPickGPUsField.checked) {
let gpuSettingEntry = getParameterSettingsEntry('use_gpus')
gpuSettingEntry.style.display = 'none'
} else {
$('#use_gpus').val(activeDeviceIds)
}
} }
} catch (e) { } catch (e) {
console.log('error fetching devices', e) console.log('error fetching devices', e)

View File

@ -73,11 +73,17 @@ var PARAMETERS = [
note: "warning: this will be *very* slow", note: "warning: this will be *very* slow",
default: false, default: false,
}, },
{
id: "auto_pick_gpus",
type: ParameterType.checkbox,
label: "Automatically pick the GPUs",
default: false,
},
{ {
id: "use_gpus", id: "use_gpus",
type: ParameterType.select_multiple, type: ParameterType.select_multiple,
label: "GPUs to use", label: "GPUs to use",
note: "select multiple GPUs to process in parallel", note: "to process in parallel",
default: false, default: false,
}, },
{ {

View File

@ -0,0 +1,157 @@
import os
import torch
import traceback
import re
COMPARABLE_GPU_PERCENTILE = 0.65 # if a GPU's free_mem is within this % of the GPU with the most free_mem, it will be picked
def get_device_delta(render_devices, active_devices):
'''
render_devices: 'cpu', or 'auto' or ['cuda:N'...]
active_devices: ['cpu', 'cuda:N'...]
'''
if render_devices is not None:
if render_devices in ('cpu', 'auto'):
render_devices = [render_devices]
elif isinstance(render_devices, list) and len(render_devices) > 0:
render_devices = list(filter(lambda x: x.startswith('cuda:'), render_devices))
if len(render_devices) == 0:
raise Exception('Invalid render_devices value in config.json. Valid: {"render_devices": ["cuda:0", "cuda:1"...]}, or {"render_devices": "cpu"} or {"render_devices": "auto"}')
render_devices = list(filter(lambda x: is_device_compatible(x), render_devices))
if len(render_devices) == 0:
raise Exception('Sorry, none of the render_devices configured in config.json are compatible with Stable Diffusion')
else:
raise Exception('Invalid render_devices value in config.json. Valid: {"render_devices": ["cuda:0", "cuda:1"...]}, or {"render_devices": "cpu"} or {"render_devices": "auto"}')
else:
render_devices = ['auto']
if 'auto' in render_devices:
render_devices = auto_pick_devices(active_devices)
if 'cpu' in render_devices:
print('WARNING: Could not find a compatible GPU. Using the CPU, but this will be very slow!')
active_devices = set(active_devices)
render_devices = set(render_devices)
devices_to_start = render_devices - active_devices
devices_to_stop = active_devices - render_devices
return devices_to_start, devices_to_stop
def auto_pick_devices(currently_active_devices):
if not torch.cuda.is_available(): return ['cpu']
device_count = torch.cuda.device_count()
if device_count == 1:
return ['cuda:0'] if is_device_compatible('cuda:0') else ['cpu']
print('Autoselecting GPU. Using most free memory.')
devices = []
for device in range(device_count):
device = f'cuda:{device}'
if not is_device_compatible(device):
continue
mem_free, mem_total = torch.cuda.mem_get_info(device)
mem_free /= float(10**9)
mem_total /= float(10**9)
device_name = torch.cuda.get_device_name(device)
print(f'{device} detected: {device_name} - Memory (free/total): {round(mem_free, 2)}Gb / {round(mem_total, 2)}Gb')
devices.append({'device': device, 'device_name': device_name, 'mem_free': mem_free})
devices.sort(key=lambda x:x['mem_free'], reverse=True)
max_free_mem = devices[0]['mem_free']
free_mem_threshold = COMPARABLE_GPU_PERCENTILE * max_free_mem
# Auto-pick algorithm:
# 1. Pick the top 75 percentile of the GPUs, sorted by free_mem.
# 2. Also include already-running devices (GPU-only), otherwise their free_mem will
# always be very low (since their VRAM contains the model).
# These already-running devices probably aren't terrible, since they were picked in the past.
# Worst case, the user can restart the program and that'll get rid of them.
devices = list(filter((lambda x: x['mem_free'] > free_mem_threshold or x['device'] in currently_active_devices), devices))
devices = list(map(lambda x: x['device'], devices))
return devices
def device_init(thread_data, device):
'''
This function assumes the 'device' has already been verified to be compatible.
`get_device_delta()` has already filtered out incompatible devices.
'''
validate_device_id(device, log_prefix='device_init')
if device == 'cpu':
thread_data.device = 'cpu'
thread_data.device_name = get_processor_name()
print('Render device CPU available as', thread_data.device_name)
return
thread_data.device_name = torch.cuda.get_device_name(device)
thread_data.device = device
# Force full precision on 1660 and 1650 NVIDIA cards to avoid creating green images
device_name = thread_data.device_name.lower()
thread_data.force_full_precision = ('nvidia' in device_name or 'geforce' in device_name) and (' 1660' in device_name or ' 1650' in device_name)
if thread_data.force_full_precision:
print('forcing full precision on NVIDIA 16xx cards, to avoid green images. GPU detected: ', thread_data.device_name)
# Apply force_full_precision now before models are loaded.
thread_data.precision = 'full'
print(f'Setting {device} as active')
torch.cuda.device(device)
return
def validate_device_id(device, log_prefix=''):
def is_valid():
if not isinstance(device, str):
return False
if device == 'cpu':
return True
if not device.startswith('cuda:') or not device[5:].isnumeric():
return False
return True
if not is_valid():
raise EnvironmentError(f"{log_prefix}: device id should be 'cpu', or 'cuda:N' (where N is an integer index for the GPU). Got: {device}")
def is_device_compatible(device):
'''
Returns True/False, and prints any compatibility errors
'''
validate_device_id(device, log_prefix='is_device_compatible')
if device == 'cpu': return True
# Memory check
try:
_, mem_total = torch.cuda.mem_get_info(device)
mem_total /= float(10**9)
if mem_total < 3.0:
print(f'GPU {device} with less than 3 GB of VRAM is not compatible with Stable Diffusion')
return False
except RuntimeError as e:
print(str(e))
return False
return True
def get_processor_name():
try:
import platform, subprocess
if platform.system() == "Windows":
return platform.processor()
elif platform.system() == "Darwin":
os.environ['PATH'] = os.environ['PATH'] + os.pathsep + '/usr/sbin'
command = "sysctl -n machdep.cpu.brand_string"
return subprocess.check_output(command).strip()
elif platform.system() == "Linux":
command = "cat /proc/cpuinfo"
all_info = subprocess.check_output(command, shell=True).decode().strip()
for line in all_info.split("\n"):
if "model name" in line:
return re.sub(".*model name.*:", "", line, 1).strip()
except:
print(traceback.format_exc())
return "cpu"

View File

@ -35,8 +35,10 @@ logging.set_verbosity_error()
# consts # consts
config_yaml = "optimizedSD/v1-inference.yaml" config_yaml = "optimizedSD/v1-inference.yaml"
filename_regex = re.compile('[^a-zA-Z0-9]') filename_regex = re.compile('[^a-zA-Z0-9]')
force_gfpgan_to_cuda0 = True # workaround: gfpgan currently works only on cuda:0
# api stuff # api stuff
from sd_internal import device_manager
from . import Request, Response, Image as ResponseImage from . import Request, Response, Image as ResponseImage
import base64 import base64
from io import BytesIO from io import BytesIO
@ -45,73 +47,7 @@ from io import BytesIO
from threading import local as LocalThreadVars from threading import local as LocalThreadVars
thread_data = LocalThreadVars() thread_data = LocalThreadVars()
def get_processor_name(): def thread_init(device):
try:
import platform, subprocess
if platform.system() == "Windows":
return platform.processor()
elif platform.system() == "Darwin":
os.environ['PATH'] = os.environ['PATH'] + os.pathsep + '/usr/sbin'
command = "sysctl -n machdep.cpu.brand_string"
return subprocess.check_output(command).strip()
elif platform.system() == "Linux":
command = "cat /proc/cpuinfo"
all_info = subprocess.check_output(command, shell=True).decode().strip()
for line in all_info.split("\n"):
if "model name" in line:
return re.sub(".*model name.*:", "", line, 1).strip()
except:
print(traceback.format_exc())
return "cpu"
def validate_device_id(device, allow_auto=False, log_prefix=''):
device_names = ['cpu', 'auto'] if allow_auto else ['cpu']
if not isinstance(device, str) or (device not in device_names and (len(device) <= len('cuda:') or device[:5] != 'cuda:' or not device[5:].isnumeric())):
raise EnvironmentError(f"{log_prefix}: device id should be {', '.join(device_names)}, or 'cuda:N' (where N is an integer index for the GPU). Got: {device}")
'''
Returns True/False, and prints any compatibility errors
'''
def is_device_compatible(device):
validate_device_id(device, allow_auto=False, log_prefix='is_device_compatible')
if device == 'cpu': return True
# Memory check
try:
mem_free, mem_total = torch.cuda.mem_get_info(device)
mem_total /= float(10**9)
if mem_total < 3.0:
print('GPUs with less than 3 GB of VRAM are not compatible with Stable Diffusion')
return False
except RuntimeError as e:
print(str(e))
return False
return True
def device_select(device):
validate_device_id(device, allow_auto=False, log_prefix='device_select')
if device == 'cpu': return True
if not torch.cuda.is_available(): return False
if not is_device_compatible(device):
return False
thread_data.device_name = torch.cuda.get_device_name(device)
thread_data.device = device
# Force full precision on 1660 and 1650 NVIDIA cards to avoid creating green images
device_name = thread_data.device_name.lower()
thread_data.force_full_precision = ('nvidia' in device_name or 'geforce' in device_name) and (' 1660' in device_name or ' 1650' in device_name)
if thread_data.force_full_precision:
print('forcing full precision on NVIDIA 16xx cards, to avoid green images. GPU detected: ', thread_data.device_name)
# Apply force_full_precision now before models are loaded.
thread_data.precision = 'full'
return True
def device_init(device_selection):
validate_device_id(device_selection, allow_auto=True, log_prefix='device_init')
# Thread bound properties # Thread bound properties
thread_data.stop_processing = False thread_data.stop_processing = False
thread_data.temp_images = {} thread_data.temp_images = {}
@ -140,50 +76,7 @@ def device_init(device_selection):
thread_data.force_full_precision = False thread_data.force_full_precision = False
thread_data.reduced_memory = True thread_data.reduced_memory = True
if device_selection == 'cpu': device_manager.device_init(thread_data, device)
thread_data.device = 'cpu'
thread_data.device_name = get_processor_name()
print('Render device CPU available as', thread_data.device_name)
return True
if not torch.cuda.is_available():
if device_selection == 'auto':
print('WARNING: Could not find a compatible GPU. Using the CPU, but this will be very slow!')
thread_data.device = 'cpu'
thread_data.device_name = get_processor_name()
return True
else:
raise EnvironmentError(f'Could not find a compatible GPU for the requested device_selection: {device_selection}!')
if device_selection == 'auto':
device_count = torch.cuda.device_count()
if device_count == 1 and device_select('cuda:0'):
torch.cuda.device('cuda:0')
return True
print('Autoselecting GPU. Using most free memory.')
max_mem_free = 0
best_device = None
for device in range(device_count):
device = f'cuda:{device}'
mem_free, mem_total = torch.cuda.mem_get_info(device)
mem_free /= float(10**9)
mem_total /= float(10**9)
device_name = torch.cuda.get_device_name(device)
print(f'{device} detected: {device_name} - Memory: {round(mem_total - mem_free, 2)}Gb / {round(mem_total, 2)}Gb')
if max_mem_free < mem_free:
max_mem_free = mem_free
best_device = device
if best_device and device_select(best_device):
print(f'Setting {device} as active')
torch.cuda.device(device)
return True
if device_selection != 'auto' and device_select(device_selection):
print(f'Setting {device_selection} as active')
torch.cuda.device(device_selection)
return True
return False
def load_model_ckpt(): def load_model_ckpt():
if not thread_data.ckpt_file: raise ValueError(f'Thread ckpt_file is undefined.') if not thread_data.ckpt_file: raise ValueError(f'Thread ckpt_file is undefined.')
@ -296,6 +189,8 @@ def unload_filters():
del thread_data.model_real_esrgan del thread_data.model_real_esrgan
thread_data.model_real_esrgan = None thread_data.model_real_esrgan = None
gc()
def unload_models(): def unload_models():
if thread_data.model is not None: if thread_data.model is not None:
print('Unloading models...') print('Unloading models...')
@ -313,6 +208,8 @@ def unload_models():
thread_data.modelCS = None thread_data.modelCS = None
thread_data.modelFS = None thread_data.modelFS = None
gc()
def wait_model_move_to(model, target_device): # Send to target_device and wait until complete. def wait_model_move_to(model, target_device): # Send to target_device and wait until complete.
if thread_data.device == target_device: return if thread_data.device == target_device: return
start_mem = torch.cuda.memory_allocated(thread_data.device) / 1e6 start_mem = torch.cuda.memory_allocated(thread_data.device) / 1e6
@ -339,19 +236,13 @@ def wait_model_move_to(model, target_device): # Send to target_device and wait u
def load_model_gfpgan(): def load_model_gfpgan():
if thread_data.gfpgan_file is None: raise ValueError(f'Thread gfpgan_file is undefined.') if thread_data.gfpgan_file is None: raise ValueError(f'Thread gfpgan_file is undefined.')
#print('load_model_gfpgan called without setting gfpgan_file')
#return
if thread_data.device != 'cuda:0':
#TODO Remove when fixed - A bug with GFPGANer and facexlib needs to be fixed before use on other devices.
raise Exception(f'Current device {torch.device(thread_data.device)} is not {torch.device("cuda:0")}. Cannot run GFPGANer.')
model_path = thread_data.gfpgan_file + ".pth" model_path = thread_data.gfpgan_file + ".pth"
thread_data.model_gfpgan = GFPGANer(device=torch.device(thread_data.device), model_path=model_path, upscale=1, arch='clean', channel_multiplier=2, bg_upsampler=None) device = 'cuda:0' if force_gfpgan_to_cuda0 else thread_data.device
thread_data.model_gfpgan = GFPGANer(device=torch.device(device), model_path=model_path, upscale=1, arch='clean', channel_multiplier=2, bg_upsampler=None)
print('loaded', thread_data.gfpgan_file, 'to', thread_data.model_gfpgan.device, 'precision', thread_data.precision) print('loaded', thread_data.gfpgan_file, 'to', thread_data.model_gfpgan.device, 'precision', thread_data.precision)
def load_model_real_esrgan(): def load_model_real_esrgan():
if thread_data.real_esrgan_file is None: raise ValueError(f'Thread real_esrgan_file is undefined.') if thread_data.real_esrgan_file is None: raise ValueError(f'Thread real_esrgan_file is undefined.')
#print('load_model_real_esrgan called without setting real_esrgan_file')
#return
model_path = thread_data.real_esrgan_file + ".pth" model_path = thread_data.real_esrgan_file + ".pth"
RealESRGAN_models = { RealESRGAN_models = {
@ -396,11 +287,11 @@ def get_base_path(disk_path, session_id, prompt, img_id, ext, suffix=None):
def apply_filters(filter_name, image_data, model_path=None): def apply_filters(filter_name, image_data, model_path=None):
print(f'Applying filter {filter_name}...') print(f'Applying filter {filter_name}...')
gc() # Free space before loading new data. gc() # Free space before loading new data.
if isinstance(image_data, torch.Tensor):
print(image_data)
image_data.to(thread_data.device)
if filter_name == 'gfpgan': if filter_name == 'gfpgan':
if isinstance(image_data, torch.Tensor):
image_data.to('cuda:0' if force_gfpgan_to_cuda0 else thread_data.device)
if model_path is not None and model_path != thread_data.gfpgan_file: if model_path is not None and model_path != thread_data.gfpgan_file:
thread_data.gfpgan_file = model_path thread_data.gfpgan_file = model_path
load_model_gfpgan() load_model_gfpgan()
@ -412,6 +303,9 @@ def apply_filters(filter_name, image_data, model_path=None):
image_data = output[:,:,::-1] image_data = output[:,:,::-1]
if filter_name == 'real_esrgan': if filter_name == 'real_esrgan':
if isinstance(image_data, torch.Tensor):
image_data.to(thread_data.device)
if model_path is not None and model_path != thread_data.real_esrgan_file: if model_path is not None and model_path != thread_data.real_esrgan_file:
thread_data.real_esrgan_file = model_path thread_data.real_esrgan_file = model_path
load_model_real_esrgan() load_model_real_esrgan()
@ -518,7 +412,6 @@ def do_mk_img(req: Request):
if needs_model_reload: if needs_model_reload:
unload_models() unload_models()
unload_filters() unload_filters()
gc()
load_model_ckpt() load_model_ckpt()
if thread_data.turbo != req.turbo: if thread_data.turbo != req.turbo:

View File

@ -14,7 +14,7 @@ import queue, threading, time, weakref
from typing import Any, Generator, Hashable, Optional, Union from typing import Any, Generator, Hashable, Optional, Union
from pydantic import BaseModel from pydantic import BaseModel
from sd_internal import Request, Response, runtime from sd_internal import Request, Response, runtime, device_manager
THREAD_NAME_PREFIX = 'Runtime-Render/' THREAD_NAME_PREFIX = 'Runtime-Render/'
ERR_LOCK_FAILED = ' failed to acquire lock within timeout.' ERR_LOCK_FAILED = ' failed to acquire lock within timeout.'
@ -72,7 +72,7 @@ class ImageRequest(BaseModel):
save_to_disk_path: str = None save_to_disk_path: str = None
turbo: bool = True turbo: bool = True
use_cpu: bool = False ##TODO Remove after UI and plugins transition. use_cpu: bool = False ##TODO Remove after UI and plugins transition.
render_device: str = None render_device: str = 'auto'
use_full_precision: bool = False use_full_precision: bool = False
use_face_correction: str = None # or "GFPGANv1.3" use_face_correction: str = None # or "GFPGANv1.3"
use_upscale: str = None # or "RealESRGAN_x4plus" or "RealESRGAN_x4plus_anime_6B" use_upscale: str = None # or "RealESRGAN_x4plus" or "RealESRGAN_x4plus_anime_6B"
@ -218,17 +218,10 @@ def thread_get_next_task():
task = None task = None
try: # Select a render task. try: # Select a render task.
for queued_task in tasks_queue: for queued_task in tasks_queue:
if queued_task.request.use_face_correction: # TODO Remove when fixed - A bug with GFPGANer and facexlib needs to be fixed before use on other devices. if queued_task.request.use_face_correction and runtime.thread_data.device == 'cpu' and is_alive() == 1:
if is_alive('cuda:0') <= 0: # Allows GFPGANer only on cuda:0. queued_task.error = Exception('The CPU cannot be used to run this task currently. Please remove "Fix incorrect faces" from Image Settings and try again.')
queued_task.error = Exception('cuda:0 is not available with the current config. Remove GFPGANer filter to run task.')
task = queued_task task = queued_task
break break
if queued_task.render_device == 'cpu':
queued_task.error = Exception('Cpu cannot be used to run this task. Remove GFPGANer filter to run task.')
task = queued_task
break
if runtime.thread_data.device != 'cuda:0':
continue # Wait for cuda:0
if queued_task.render_device and runtime.thread_data.device != queued_task.render_device: if queued_task.render_device and runtime.thread_data.device != queued_task.render_device:
# Is asking for a specific render device. # Is asking for a specific render device.
if is_alive(queued_task.render_device) > 0: if is_alive(queued_task.render_device) > 0:
@ -253,11 +246,7 @@ def thread_render(device):
global current_state, current_state_error, current_model_path, current_vae_path global current_state, current_state_error, current_model_path, current_vae_path
from . import runtime from . import runtime
try: try:
if not runtime.device_init(device): runtime.thread_init(device)
weak_thread_data[threading.current_thread()] = {
'error': f'Could not start on the selected device: {device}'
}
return
except Exception as e: except Exception as e:
print(traceback.format_exc()) print(traceback.format_exc())
weak_thread_data[threading.current_thread()] = { weak_thread_data[threading.current_thread()] = {
@ -266,24 +255,24 @@ def thread_render(device):
return return
weak_thread_data[threading.current_thread()] = { weak_thread_data[threading.current_thread()] = {
'device': runtime.thread_data.device, 'device': runtime.thread_data.device,
'device_name': runtime.thread_data.device_name 'device_name': runtime.thread_data.device_name,
'alive': True
} }
if runtime.thread_data.device != 'cpu' or is_alive() == 1: if runtime.thread_data.device != 'cpu' or is_alive() == 1:
preload_model() preload_model()
current_state = ServerStates.Online current_state = ServerStates.Online
while True: while True:
task_cache.clean() task_cache.clean()
if not weak_thread_data[threading.current_thread()]['alive']:
print(f'Shutting down thread for device {runtime.thread_data.device}')
runtime.unload_models()
runtime.unload_filters()
return
if isinstance(current_state_error, SystemExit): if isinstance(current_state_error, SystemExit):
current_state = ServerStates.Unavailable current_state = ServerStates.Unavailable
return return
task = thread_get_next_task() task = thread_get_next_task()
if task is None: if task is None:
if runtime.thread_data.device == 'cpu' and is_alive() > 1 and hasattr(runtime.thread_data, 'lastActive') and time.time() - runtime.thread_data.lastActive > CPU_UNLOAD_TIMEOUT:
# GPUs present and CPU is idle. Unload resources.
runtime.unload_models()
runtime.unload_filters()
del runtime.thread_data.lastActive
print('unloaded models from CPU because it was idle for too long')
time.sleep(1) time.sleep(1)
continue continue
if task.error is not None: if task.error is not None:
@ -371,12 +360,12 @@ def get_devices():
gpu_count = torch.cuda.device_count() gpu_count = torch.cuda.device_count()
for device in range(gpu_count): for device in range(gpu_count):
device = f'cuda:{device}' device = f'cuda:{device}'
if not runtime.is_device_compatible(device): if not device_manager.is_device_compatible(device):
continue continue
devices['all'].update({device: torch.cuda.get_device_name(device)}) devices['all'].update({device: torch.cuda.get_device_name(device)})
devices['all'].update({'cpu': runtime.get_processor_name()}) devices['all'].update({'cpu': device_manager.get_processor_name()})
# list the activated devices # list the activated devices
if not manager_lock.acquire(blocking=True, timeout=LOCK_TIMEOUT): raise Exception('get_devices' + ERR_LOCK_FAILED) if not manager_lock.acquire(blocking=True, timeout=LOCK_TIMEOUT): raise Exception('get_devices' + ERR_LOCK_FAILED)
@ -411,13 +400,13 @@ def is_alive(device=None):
finally: finally:
manager_lock.release() manager_lock.release()
def start_render_thread(device='auto'): def start_render_thread(device):
if not manager_lock.acquire(blocking=True, timeout=LOCK_TIMEOUT): raise Exception('start_render_threads' + ERR_LOCK_FAILED) if not manager_lock.acquire(blocking=True, timeout=LOCK_TIMEOUT): raise Exception('start_render_thread' + ERR_LOCK_FAILED)
print('Start new Rendering Thread on device', device) print('Start new Rendering Thread on device', device)
try: try:
rthread = threading.Thread(target=thread_render, kwargs={'device': device}) rthread = threading.Thread(target=thread_render, kwargs={'device': device})
rthread.daemon = True rthread.daemon = True
rthread.name = THREAD_NAME_PREFIX + str(device) rthread.name = THREAD_NAME_PREFIX + device
rthread.start() rthread.start()
render_threads.append(rthread) render_threads.append(rthread)
finally: finally:
@ -425,6 +414,7 @@ def start_render_thread(device='auto'):
timeout = DEVICE_START_TIMEOUT timeout = DEVICE_START_TIMEOUT
while not rthread.is_alive() or not rthread in weak_thread_data or not 'device' in weak_thread_data[rthread]: while not rthread.is_alive() or not rthread in weak_thread_data or not 'device' in weak_thread_data[rthread]:
if rthread in weak_thread_data and 'error' in weak_thread_data[rthread]: if rthread in weak_thread_data and 'error' in weak_thread_data[rthread]:
print(rthread, device, 'error:', weak_thread_data[rthread]['error'])
return False return False
if timeout <= 0: if timeout <= 0:
return False return False
@ -432,6 +422,59 @@ def start_render_thread(device='auto'):
time.sleep(1) time.sleep(1)
return True return True
def stop_render_thread(device):
try:
device_manager.validate_device_id(device, log_prefix='stop_render_thread')
except:
print(traceback.format_exec())
return False
if not manager_lock.acquire(blocking=True, timeout=LOCK_TIMEOUT): raise Exception('stop_render_thread' + ERR_LOCK_FAILED)
print('Stopping Rendering Thread on device', device)
try:
thread_to_remove = None
for rthread in render_threads:
weak_data = weak_thread_data.get(rthread)
if weak_data is None or not 'device' in weak_data or weak_data['device'] is None:
continue
thread_device = weak_data['device']
if thread_device == device:
weak_data['alive'] = False
thread_to_remove = rthread
break
if thread_to_remove is not None:
render_threads.remove(rthread)
return True
finally:
manager_lock.release()
return False
def update_render_threads(render_devices, active_devices):
devices_to_start, devices_to_stop = device_manager.get_device_delta(render_devices, active_devices)
print('devices_to_start', devices_to_start)
print('devices_to_stop', devices_to_stop)
for device in devices_to_stop:
if is_alive(device) <= 0:
print(device, 'is not alive')
continue
if not stop_render_thread(device):
print(device, 'could not stop render thread')
for device in devices_to_start:
if is_alive(device) >= 1:
print(device, 'already registered.')
continue
if not start_render_thread(device):
print(device, 'failed to start.')
if is_alive() <= 0: # No running devices, probably invalid user config.
raise EnvironmentError('ERROR: No active render devices! Please verify the "render_devices" value in config.json')
print('active devices', get_devices()['active'])
def shutdown_event(): # Signal render thread to close on shutdown def shutdown_event(): # Signal render thread to close on shutdown
global current_state_error global current_state_error
current_state_error = SystemExit('Application shutting down.') current_state_error = SystemExit('Application shutting down.')
@ -478,7 +521,6 @@ def render(req : ImageRequest):
r.stream_image_progress = False r.stream_image_progress = False
new_task = RenderTask(r) new_task = RenderTask(r)
new_task.render_device = req.render_device
if task_cache.put(r.session_id, new_task, TASK_TTL): if task_cache.put(r.session_id, new_task, TASK_TTL):
# Use twice the normal timeout for adding user requests. # Use twice the normal timeout for adding user requests.

View File

@ -224,7 +224,10 @@ def read_web_data(key:str=None):
raise HTTPException(status_code=500, detail="Config file is missing or unreadable") raise HTTPException(status_code=500, detail="Config file is missing or unreadable")
return JSONResponse(config, headers=NOCACHE_HEADERS) return JSONResponse(config, headers=NOCACHE_HEADERS)
elif key == 'devices': elif key == 'devices':
return JSONResponse(task_manager.get_devices(), headers=NOCACHE_HEADERS) config = getConfig()
devices = task_manager.get_devices()
devices['config'] = config.get('render_devices', "auto")
return JSONResponse(devices, headers=NOCACHE_HEADERS)
elif key == 'models': elif key == 'models':
return JSONResponse(getModels(), headers=NOCACHE_HEADERS) return JSONResponse(getModels(), headers=NOCACHE_HEADERS)
elif key == 'modifiers': return FileResponse(os.path.join(SD_UI_DIR, 'modifiers.json'), headers=NOCACHE_HEADERS) elif key == 'modifiers': return FileResponse(os.path.join(SD_UI_DIR, 'modifiers.json'), headers=NOCACHE_HEADERS)
@ -272,17 +275,38 @@ def save_model_to_config(ckpt_model_name, vae_model_name):
setConfig(config) setConfig(config)
@app.post('/render') def save_render_devices_to_config(render_devices):
def render(req : task_manager.ImageRequest): config = getConfig()
if 'render_devices' not in config:
config['render_devices'] = {}
config['render_devices'] = render_devices
if render_devices is None or len(render_devices) == 0:
del config['render_devices']
setConfig(config)
def update_render_threads_on_request(req : task_manager.ImageRequest):
if req.use_cpu: # TODO Remove after transition. if req.use_cpu: # TODO Remove after transition.
print('WARNING Replace {use_cpu: true} by {render_device: "cpu"}') print('WARNING Replace {use_cpu: true} by {render_device: "cpu"}')
req.render_device = 'cpu' req.render_device = 'cpu'
del req.use_cpu del req.use_cpu
if req.render_device != 'cpu':
req.render_device = 'cuda:0' # temp hack to get beta working if req.render_device not in ('cpu', 'auto') and not req.render_device.startswith('cuda:'):
if req.render_device and task_manager.is_alive(req.render_device) <= 0: raise HTTPException(status_code=403, detail=f'{req.render_device} rendering is not enabled in config.json or the thread has died...') # HTTP403 Forbidden raise HTTPException(status_code=400, detail=f'Invalid render device requested: {req.render_device}')
if req.use_face_correction and task_manager.is_alive('cuda:0') <= 0: #TODO Remove when GFPGANer is fixed upstream.
raise HTTPException(status_code=412, detail=f'GFPGANer only works GPU:0, use CUDA_VISIBLE_DEVICES if GFPGANer is needed on a specific GPU.') # HTTP412 Precondition Failed if req.render_device.startswith('cuda:'):
req.render_device = req.render_device.split(',')
save_render_devices_to_config(req.render_device)
del req.render_device
update_render_threads()
@app.post('/render')
def render(req : task_manager.ImageRequest):
update_render_threads_on_request(req)
try: try:
save_model_to_config(req.use_stable_diffusion_model, req.use_vae_model) save_model_to_config(req.use_stable_diffusion_model, req.use_vae_model)
req.use_stable_diffusion_model = resolve_ckpt_to_use(req.use_stable_diffusion_model) req.use_stable_diffusion_model = resolve_ckpt_to_use(req.use_stable_diffusion_model)
@ -359,44 +383,19 @@ class LogSuppressFilter(logging.Filter):
return True return True
logging.getLogger('uvicorn.access').addFilter(LogSuppressFilter()) logging.getLogger('uvicorn.access').addFilter(LogSuppressFilter())
config = getConfig()
# Start the task_manager # Start the task_manager
task_manager.default_model_to_load = resolve_ckpt_to_use() task_manager.default_model_to_load = resolve_ckpt_to_use()
task_manager.default_vae_to_load = resolve_vae_to_use() task_manager.default_vae_to_load = resolve_vae_to_use()
if 'render_devices' in config: # Start a new thread for each device.
if not isinstance(config['render_devices'], list):
raise Exception('Invalid render_devices value in config. Should be a list')
config['render_devices'] = set(config['render_devices']) # de-duplicate
for device in config['render_devices']:
if task_manager.is_alive(device) >= 1:
print(device, 'already registered.')
continue
if not task_manager.start_render_thread(device):
print(device, 'failed to start.')
if task_manager.is_alive() <= 0: # No running devices, probably invalid user config.
print('WARNING: No active render devices after loading config. Validate "render_devices" in config.json')
print('Loading default render devices to replace invalid render_devices field from config', config['render_devices'])
if task_manager.is_alive() <= 0: # Either no defaults or no devices after loading config. def update_render_threads():
# Select best GPU device using free memory, if more than one device. config = getConfig()
if task_manager.start_render_thread('auto'): # Detect best device for renders render_devices = config.get('render_devices', "auto")
# if cuda:0 is missing, another cuda device is better. try to start it... active_devices = task_manager.get_devices()['active'].keys()
if task_manager.is_alive('cuda:0') <= 0 and task_manager.is_alive('cpu') <= 0 and not task_manager.start_render_thread('cuda:0'):
print('Failed to start GPU:0...')
else:
print('Failed to start gpu device.')
if task_manager.is_alive('cpu') <= 0 and not task_manager.start_render_thread('cpu'): # Allow CPU to be used for renders
print('Failed to start CPU render device...')
is_using_a_gpu = (task_manager.is_alive() > task_manager.is_alive('cpu')) print('requesting for render_devices', render_devices)
if is_using_a_gpu and task_manager.is_alive('cuda:0') <= 0: task_manager.update_render_threads(render_devices, active_devices)
print('WARNING: GFPGANer only works on GPU:0, use CUDA_VISIBLE_DEVICES if GFPGANer is needed on a specific GPU.')
print('Using CUDA_VISIBLE_DEVICES will remap the selected devices starting at GPU:0 fixing GFPGANer')
print('Add the line "@set CUDA_VISIBLE_DEVICES=N" where N is the GPUs to use to config.bat')
print('Add the line "CUDA_VISIBLE_DEVICES=N" where N is the GPUs to use to config.sh')
print('active devices', task_manager.get_devices()['active']) update_render_threads()
# start the browser ui # start the browser ui
import webbrowser; webbrowser.open('http://localhost:9000') import webbrowser; webbrowser.open('http://localhost:9000')