Improved detection of missing cuda:0 and added warning to console about how to fix.

This commit is contained in:
Marc-Andre Ferland 2022-10-17 02:27:30 -04:00
parent 554b67a2f0
commit c92129ac63
3 changed files with 31 additions and 9 deletions

View File

@ -130,7 +130,7 @@ def device_init(device_selection=None):
device_selection = device_selection.lower() device_selection = device_selection.lower()
if device_selection.startswith('gpu:'): if device_selection.startswith('gpu:'):
device_selection = int(device_selection[4:]) device_selection = int(device_selection[4:])
if device_selection != 'cuda' and device_selection != 'current' and device_selection != 'gpu': if device_selection != 'current' and device_selection != 'gpu':
if device_select(device_selection): if device_select(device_selection):
if isinstance(device_selection, int): if isinstance(device_selection, int):
print(f'Setting GPU:{device_selection} as active') print(f'Setting GPU:{device_selection} as active')

View File

@ -299,16 +299,19 @@ def thread_render(device):
print(f'Session {task.request.session_id} task {id(task)} completed.') print(f'Session {task.request.session_id} task {id(task)} completed.')
current_state = ServerStates.Online current_state = ServerStates.Online
def is_first_cuda_device(device):
from . import runtime # When calling runtime from outside thread_render DO NOT USE thread specific attributes or functions.
return runtime.is_first_cuda_device(device)
def is_alive(name=None): def is_alive(name=None):
from . import runtime # When calling runtime from here DO NOT USE thread specific attributes or functions.
if not manager_lock.acquire(blocking=True, timeout=LOCK_TIMEOUT): raise Exception('is_alive' + ERR_LOCK_FAILED) if not manager_lock.acquire(blocking=True, timeout=LOCK_TIMEOUT): raise Exception('is_alive' + ERR_LOCK_FAILED)
nbr_alive = 0 nbr_alive = 0
try: try:
for rthread in render_threads: for rthread in render_threads:
thread_name = rthread.name[len(THREAD_NAME_PREFIX):].lower() thread_name = rthread.name[len(THREAD_NAME_PREFIX):].lower()
if name is not None: if name is not None:
if runtime.is_first_cuda_device(name): if is_first_cuda_device(name):
if not runtime.is_first_cuda_device(thread_name): if not is_first_cuda_device(thread_name):
continue continue
elif thread_name != name: elif thread_name != name:
continue continue

View File

@ -78,16 +78,27 @@ def setConfig(config):
print(traceback.format_exc()) print(traceback.format_exc())
if 'render_devices' in config: if 'render_devices' in config:
gpu_devices = filter(lambda dev: dev.startswith('GPU:'), config['render_devices']) gpu_devices = filter(lambda dev: dev.lower().startswith('gpu') or dev.lower().startswith('cuda'), config['render_devices'])
else: else:
gpu_devices = [] gpu_devices = []
has_first_cuda_device = False
for device in gpu_devices:
if not task_manager.is_first_cuda_device(device): continue
has_first_cuda_device = True
break
if len(gpu_devices) > 0 and not has_first_cuda_device:
print('WARNING: GFPGANer only works on CPU or GPU:0, use CUDA_VISIBLE_DEVICES if GFPGANer is needed on a specific GPU.')
print('Using CUDA_VISIBLE_DEVICES will remap the selected devices starting at GPU:0 fixing GFPGANer')
try: # config.bat try: # config.bat
config_bat = [ config_bat = [
f"@set update_branch={config['update_branch']}" f"@set update_branch={config['update_branch']}"
] ]
if len(gpu_devices) > 0: if len(gpu_devices) > 0 and not has_first_cuda_device:
config_sh.append(f"@set CUDA_VISIBLE_DEVICES={','.join(gpu_devices)}") config_sh.append('::Set the devices visible inside SD-UI here')
config_bat.append(f"::@set CUDA_VISIBLE_DEVICES={','.join(gpu_devices)}") # Needs better detection for edge cases, add as a comment for now.
print('Add the line "@set CUDA_VISIBLE_DEVICES=N" where N is the GPUs to use to config.bat')
config_bat_path = os.path.join(CONFIG_DIR, 'config.bat') config_bat_path = os.path.join(CONFIG_DIR, 'config.bat')
with open(config_bat_path, 'w') as f: with open(config_bat_path, 'w') as f:
f.write(f.write('\r\n'.join(config_bat))) f.write(f.write('\r\n'.join(config_bat)))
@ -99,8 +110,10 @@ def setConfig(config):
'#!/bin/bash' '#!/bin/bash'
f"export update_branch={config['update_branch']}" f"export update_branch={config['update_branch']}"
] ]
if len(gpu_devices) > 0: if len(gpu_devices) > 0 and not has_first_cuda_device:
config_sh.append(f"CUDA_VISIBLE_DEVICES={','.join(gpu_devices)}") config_sh.append('#Set the devices visible inside SD-UI here')
config_sh.append(f"#CUDA_VISIBLE_DEVICES={','.join(gpu_devices)}") # Needs better detection for edge cases, add as a comment for now.
print('Add the line "CUDA_VISIBLE_DEVICES=N" where N is the GPUs to use to config.sh')
config_sh_path = os.path.join(CONFIG_DIR, 'config.sh') config_sh_path = os.path.join(CONFIG_DIR, 'config.sh')
with open(config_sh_path, 'w') as f: with open(config_sh_path, 'w') as f:
f.write('\n'.join(config_sh)) f.write('\n'.join(config_sh))
@ -345,5 +358,11 @@ if task_manager.is_alive() <= 0: # No running devices, apply defaults.
if task_manager.is_alive('cpu') <= 0 and allow_cpu: if task_manager.is_alive('cpu') <= 0 and allow_cpu:
task_manager.start_render_thread('cpu') task_manager.start_render_thread('cpu')
if task_manager.is_alive(0) <= 0: # Missing cuda:0, warn the user.
print('WARNING: GFPGANer only works on CPU or GPU:0, use CUDA_VISIBLE_DEVICES if GFPGANer is needed on a specific GPU.')
print('Using CUDA_VISIBLE_DEVICES will remap the selected devices starting at GPU:0 fixing GFPGANer')
print('Add the line "@set CUDA_VISIBLE_DEVICES=N" where N is the GPUs to use to config.bat')
print('Add the line "CUDA_VISIBLE_DEVICES=N" where N is the GPUs to use to config.sh')
# start the browser ui # start the browser ui
import webbrowser; webbrowser.open('http://localhost:9000') import webbrowser; webbrowser.open('http://localhost:9000')